// replace #include with // #include #include #include #include #include #include #include #include #include #include using std::cout; using std::cerr; using std::endl; using std::ifstream; using std::stringstream; using std::pair; using std::string; using std::map; using std::unordered_map; using std::vector; using std::advance; using std::find_if; // replace std::tr1 with std:: using std::tr1::regex; using std::tr1::sregex_iterator; using str2Int= unordered_map; using intAndWords= pair>; using int2Words= map>; // count the frequency of each word str2Int wordCount(const string& text){ regex wordReg(R"(\w+)"); sregex_iterator wordItBegin(text.begin(),text.end(),wordReg); const sregex_iterator wordItEnd; str2Int allWords; for (; wordItBegin != wordItEnd;++wordItBegin){ ++allWords[wordItBegin->str()]; } return allWords; } // get to all frequencies the appropriate words int2Words frequencyOfWords(str2Int& wordCount){ int2Words freq2Words; for ( auto wordIt: wordCount ){ auto freqWord= wordIt.second; if ( freq2Words.find(freqWord) == freq2Words.end() ){ freq2Words.insert( intAndWords(freqWord,{wordIt.first} )); } else { freq2Words[freqWord].push_back(wordIt.first); } } return freq2Words; } int main(int argc, char* argv[]){ cout << endl; // get the filename std::string myFile; if ( argc == 2 ){ myFile= {argv[1]}; } else{ cerr << "Filename missing !" << endl; exit(EXIT_FAILURE); } // open the file ifstream file(myFile, std::ios::in); if ( !file ){ cerr << "Can't open file "+ myFile + "!" << endl; exit(EXIT_FAILURE); } // read the file stringstream buffer; buffer << file.rdbuf(); string text(buffer.str()); // get the frequency of each word auto allWords= wordCount(text); cout << "The first 20 (key, value)-pairs: " << endl; auto end= allWords.begin(); advance(end,20); for (auto pair= allWords.begin(); pair != end; ++pair){ cout << "(" << pair->first << ": " << pair->second << ")"; } cout << "\n\n"; cout << "allWords[Web]: " << allWords["Web"] << endl; cout << "allWords[The]: " << allWords["The"] << "\n\n"; cout << "Number of unique words: "; cout << allWords.size() << "\n\n"; size_t sumWords=0; for ( auto wordIt: allWords) sumWords+= wordIt.second; cout << "Total number of words: " << sumWords <<"\n\n"; auto allFreq= frequencyOfWords(allWords); std::cout << "Number of different frequencies: " << allFreq.size() << "\n\n"; cout << "All frequencies: "; for ( auto freqIt: allFreq) cout << freqIt.first << " "; cout << "\n\n"; cout << "The most frequently occurring word(s): " << endl; auto atTheEnd= allFreq.rbegin(); cout << atTheEnd->first << " :"; for ( auto word: atTheEnd->second) cout << word << " "; cout << "\n\n"; cout << "All word which appears more then 1000 times:" << endl; auto biggerIt= find_if(allFreq.begin(),allFreq.end(), [](intAndWords iAndW){return iAndW.first > 1000;}); if ( biggerIt == allFreq.end()){ cerr << "No word appears more then 1000 times !" << endl; exit(EXIT_FAILURE); } else{ for ( auto allFreqIt= biggerIt; allFreqIt != allFreq.end(); ++allFreqIt){ cout << allFreqIt->first << " :"; for ( auto word: allFreqIt->second) cout << word << " "; cout << endl; } } cout << endl; }