其他语言

本类阅读TOP10

·基于Solaris 开发环境的整体构思
·使用AutoMake轻松生成Makefile
·BCB数据库图像保存技术
·GNU中的Makefile
·射频芯片nRF401天线设计的分析
·iframe 的自适应高度
·BCB之Socket通信
·软件企业如何实施CMM
·入门系列--OpenGL最简单的入门
·WIN95中日志钩子(JournalRecord Hook)的使用

分类导航
VC语言Delphi
VB语言ASP
PerlJava
Script数据库
其他语言游戏开发
文件格式网站制作
软件工程.NET开发
std::map初体验

作者:未知 来源:月光软件站 加入时间:2005-2-28 月光软件站

/*

This exercise is in the field of bibliometric (words and text) analysis. You will be provided with a text

file, which consists of several paragraphs of English text. Your task is to write a program which will

analyse the text, and output a range of statistics about the text.

Your program should do the following:

A list of all the words that occur, in alphabetical order. You must not print the same word twice.

Print out the 20 most common words in the text, together with the number of instances of each word,

with the most common word at the top.

*/

//////////////////////////////////////////////////////////////////////////

//     ReadTxt.cpp : .

//     by Mythma

//     以标准的字母表形式列出文章中的单词,不能重复

//     并把出现频率最高的20个打印出来

//   compiler: g++

//     vc6不能编译此程序

//////////////////////////////////////////////////////////////////////////

#include <iostream>

#include <fstream.h>

#include <string>

#include <vector>

#include <map>

#include <algorithm>

using namespace std;

 

const string STR_INTERVAL = "\n-----------------------------------------------------------";

 

map<string, int>     gMap;

vector<string>              gMMStr;

 

bool AddWordToList(string strWord)

{

       if(strWord.empty())

              return false;

 

       string strTemp = strWord;

      

 

       //upper to lower

       for(int i = 0; i < strTemp.size(); i++)

       {

              strTemp[i] = tolower(strTemp[i]);

             

       }    

      

       //remove head punctuation and number

       while(strTemp[0] < 'a' || strTemp[0] > 'z')

       {

              if(strTemp.size() > 1)

                     strTemp = strTemp.substr(1, strTemp.size() - 1);

              else

                     return false;

       }

 

       //remove tail punctuation and number

       while(strTemp[strTemp.size() - 1] < 'a' || strTemp[strTemp.size() - 1] > 'z')

       {

              if(strTemp.size() > 1)

                     strTemp = strTemp.substr(0, strTemp.size() - 2);

              else

                     return false;

       }

      

      

       map<string ,int>::iterator it = gMap.find(strTemp);

       //add to map if exist

       if(it == gMap.end())

              gMap.insert(map<string, int>::value_type(strTemp, 1));

       //increase if not exist

       else

              ++ (*it).second;

 

       return true;

}

 

void OutPutWordsList()

{

      

      

       cout << STR_INTERVAL

               << "\n--文件中单词的总数为: "

               << gMap.size()

               << " 按字母排列如下"

               << STR_INTERVAL << endl;

 

       int n = 0;

       for(map<string, int>::iterator it = gMap.begin(); it != gMap.end(); ++it)

       {

              ++n;

              cout.width(15);

              cout.flags(ios::left);

              cout << it->first.c_str();

              if( 5 == n)

              {

                     cout << endl;

                     n = 0;

              }

       }

 

       cout << STR_INTERVAL << endl;

}

 

bool Cmp(const pair<string,int> &p1, const pair<string,int> &p2)

{

              return p1.second > p2.second;

}

 

void OutPutCount()

{    

      

       vector< pair<string,int> > wd(gMap.begin(), gMap.end());

       sort(wd.begin(), wd.end(), Cmp);

      

       cout << STR_INTERVAL

            << "\n--出现频率最多的几个单词是:"

               << STR_INTERVAL;

 

       int i = 0;

      

       for(vector< pair<string, int> >::iterator it=wd.begin();

              it != wd.end() && i < 20; ++it, ++i)

       {

              cout.width(15);

              cout.flags(ios::left);

              cout << endl

                      << it->first.c_str()

                      << " ----       "

                      << it->second;

       }

       cout << STR_INTERVAL;

}

 

 

int main(int argc, char* argv[])

{

       char*      strPath;

       if(argc == 2)

              strPath = argv[1];

       else

              strPath = "c:\\words.txt";

       //read file

       ifstream  inFile(strPath);

       while( !inFile.eof())

       {

              string strWord;

              inFile >> strWord;

              AddWordToList(strWord);

       }

 

       OutPutWordsList();

       OutPutCount();

 

       return 0;

}

 

 




相关文章

相关软件