POJ 2050 Searching the Web

浏览数：43 / 时间：2015年06月09日

题意简述：做一个极其简单的搜索系统，对以下四种输入进行分析与搜索：

　　　　1. 只有一个单词：如 term，只需找到含有这个单词的document，然后把这个document的含有这个单词term的那些行输出。

　　　　2.term1 AND term2，找到同时含有term1 和 term2 的document，然后把这个document的含有这个单词term1 或 term2 的那些行输出。

　　　　3.term1 OR term2，找到含有term1 或 term2 的document，然后把这个document的含有这个单词term1 或 term2 的那些行输出。

　　　　4.NOT term，将不含有 term的document全部输出

思路简述：

　　　　做一个set集合 Src，记录了一个单词出现在哪些文件的哪些行；用一个map做映射，指出一个单词出现在哪些文件中，这样子，如果是两个单词，可以分别求出各个单词属于哪些文件，根据“AND”则进行set_intersection运算，根据“OR”进行set_union运算。

/*
	Poj 2050
	Emerald
	10 May 2015
*/
#include <iostream>
#include <cstring>
#include <cstdio>
#include <cctype>
#include <sstream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <algorithm>

using namespace std;

class Figure{ // meaning : a word can be found in the lineOrder‘th line of the docOrder‘th Doc
public:
	int docOrder, lineOrder;
	Figure() {}
	Figure( int d, int l ) {
		docOrder = d;
		lineOrder = l;
	}
};

bool operator < ( const Figure f1, const Figure f2 ) { // the comparisions used in set
	if( f1.docOrder!=f2.docOrder ) {
		return f1.docOrder < f2.docOrder;
	} else {
		return f1.lineOrder < f2.lineOrder;
	}
}

class Doc{ // meaning : the order of a Doc, and how many lines the Doc contains
public :
	int docOrder, lineLimit;
	Doc() {}
	Doc( int d, int l ) {
		this->docOrder = d;
		this->lineLimit = l;
	}
};

// set < Figure > src; // this defination defines an accurate instant
typedef set < Figure > Src; // a src contains the figures of a word
typedef set < int > docSrc;
map < string, docSrc > docMap; // referring to a word, wordLine get a line
map < string, Src > dict; 
map < Figure, string > wordLine; // referring to a word, wordLine get a line
vector < Doc > docs;

const string DOC_END = "**********";
#define ALL(x) x.begin(),x.end()
#define INS(x) inserter(x,x.begin())

void Standard( string &line ); // make words tolower and other chars to whitespace
void WordRecord( string &line, int docOrder, int lineOrder ); // transfer words into src
void PrintSrc( Src &src ); // print as the problem commands
void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ); // know the docs and words, get target figures
void NotWord( string& word, Src &src ); // not the word w

int main() {
	int allDocs, queries;
	
	// input docs
	scanf( "%d", &allDocs );
	cin.get();
	for( int i=0; i<allDocs; i++ ) {
		string line;
		int lineCounter = 0;
		while( getline( cin, line ) && line != DOC_END ) { // read until end
			wordLine[ Figure( i, lineCounter ) ] = line;
			Standard( line );
			WordRecord( line, i, lineCounter ++ );
		}
		docs.push_back( Doc( i, lineCounter ) );
	}

	// input queries
	string command;
	scanf( "%d", &queries );
	cin.get();
	while( queries -- ) {
		getline( cin, command );
		Standard( command );
		Src src;
		if( command.find_last_of( ‘ ‘ ) == string::npos ) { // no whitespace
			Standard( command );
			src = dict[ command ];
		} else if( command.find_last_of( ‘ ‘ ) != command.find_first_of( ‘ ‘ ) ) { // if there‘re two different whitespaces
			stringstream ss( command );                                          // xxx AND/OR xxx
			string w1, w2, connected;
			ss >> w1 >> connected >> w2;
			docSrc dSrc1 = docMap[ w1 ];
			docSrc dSrc2 = docMap[ w2 ];

			docSrc dsrc;
			if( connected == "and" ) {
				set_intersection( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // intersection
			} else {
				set_union( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // union
			}

			WordsToFigure( src, dsrc, w1, w2 );
		} else { // only one whitespace -> Not xxx
			stringstream ss( command );
			string w1;
			ss >> w1 >> w1;
			NotWord( w1, src );
		}
		PrintSrc( src );
	}
	return 0;
}

void Standard( string &line ) {
	int length = line.length();
	for( int i=0; i<length; i++ ) {
		if( isalpha( line[i] ) ) {
			line[i] = tolower( line[i] ); // tolower, such as ‘A‘ to ‘a‘
		} else {
			line[i] = ‘ ‘; // if c isn‘t a alpha, c will be transferred to a whitespace
		}
	}
}

void WordRecord( string &line, int docOrder, int lineOrder ) {
	stringstream ss( line );
	string word;
	while( ss >> word ) {
		if( dict.count( word ) ) { // whether the word has been found in the total input
			if( !dict[word].count( Figure( docOrder, lineOrder ) ) ) { // whether the word has been found in this line
				dict[word].insert( Figure( docOrder, lineOrder ) ) ;
			}
		} else {
			Src src;
			src.insert( Figure( docOrder, lineOrder ) );
			dict[ word ] = src;
		}
		if( docMap.count( word ) ) { // whether the word has been found in this document
			docMap[word].insert( docOrder );
		} else {
			docSrc ds;
			docMap[word] = ds;
			docMap[word].insert( docOrder );
		}
	}
}

void PrintSrc( Src &src ) { // print the result
	if( src.size() == 0 ) {
		printf("Sorry, I found nothing.\n");
		printf( "==========\n" );
		return ;
	}
	Src :: iterator it = src.begin(), bef; // bef represents the former one 
	printf( "%s\n", wordLine[ *it ].c_str() );
	bef = it++;
	while( it != src.end() ) {
		if( it->docOrder != bef->docOrder ) {
			printf( "----------\n" );
		}
		printf( "%s\n", wordLine[ *it ].c_str() );
		bef = it;
		it ++;
	}
	printf( "==========\n" );
}

void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ) {
	docSrc :: iterator it;
	for( it=dsrc.begin(); it != dsrc.end(); it ++ ) {
		Src :: iterator it2 ;
		for( it2 = dict[ w1 ].begin(); it2 !=dict[w1].end(); it2 ++ ) {
			if( *it == it2 -> docOrder ) {
				src.insert( *it2 ); // the w1 appears in this line of this document
			}
		}
		for( it2 = dict[ w2 ].begin(); it2 !=dict[w2].end(); it2 ++ ) {
			if( *it == it2 -> docOrder ) {
				src.insert( *it2 );	// the w1 appears in this line of this document
			}
		}
	}
}

void NotWord( string& word, Src &src ) { // not this word
	docSrc dsrc = docMap[ word ];
	vector< Doc > :: iterator it;
	for( it = docs.begin(); it != docs.end(); it ++ ) {
		if( !dsrc.count( it->docOrder ) ) {
			for( int i=0; i< it->lineLimit; i ++ ) {
				src.insert( Figure( it->docOrder, i ) );
			}
		}
	}
}

郑重声明：本站内容如果来自互联网及其他传播媒体，其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享，并不代表本站赞同其观点和对其真实性负责，也不构成任何其他建议。

POJ 2050 Searching the Web