姗姗来迟的词频统计代码 BUG 的发现

1. 此前提交的第一次代码作业总结博客

http://www.cnblogs.com/ustczwq/p/8680704.html

2. BUG 本天成，妙手偶得之

虽然代码已经提交，但总是感觉哪个地方不太对，bug 存在得过于莫名其妙。然后，随手打开代码，稍微调试了一下，当我发现 bug 的时候，不知道该说些什么好，只想讲脏话。

出现 bug 的地方：

改过之后：

看出来了吧，妈卖批，三目运算符没赋值。改完之后，输出结果立马正确。怪不得用 unordered_map 的时候哈希表的查询出问题了，我 TM 定义的哈希函数有问题。虽然迟了，但那种优化是对的，简单补一篇，算是对原博客的完善。

3. 加了几个等于号之后的源代码

 #include "io.h"

 #include "math.h"

 #include "stdio.h"

 #include "string.h"

 #include "stdlib.h"

 #include "unordered_map"

 using namespace std;

 #define small 2

 int wordnum = ;

 int charnum = ;

 int linenum = ;

 struct wordsdata                //存放单词信息

 {

     char words[];           //单词字符串

     int number;                 //出现次数

     wordsdata *next;

 };

 struct phrases

 {

     char *one;

     char *two;

     int num;

 };

 int wordcmp(char *str1, char *str2);

 int gettop(struct wordsdata **word);

 int getwords(char *path, struct wordsdata **word);

 int getfiles(char *path, struct _finddata_t *fileinfo, long handle);

 struct phrase_cmp

 {

     bool operator()(const phrases &p1, const phrases &p2) const

     {

         return ((wordcmp(p1.one, p2.one) < ) && (wordcmp(p1.two, p2.two) < ));

     }

 };

 struct phrase_hash

 {

     size_t operator()(const phrases &ph) const

     {

         unsigned long __h = ;

         int temp;

         size_t i;

         for (i = ; ph.one[i]; i++)

         {

             temp = ph.one[i];

             if (temp > )

             {

                 (temp > ) ? (temp -= ) : (temp -= );

                 __h += ( * __h + temp);

                 __h %= ;

             }

         }

         for (i = ; ph.two[i]; i++)

         {

             temp = ph.two[i];

             if (temp > )

             {

                 (temp > ) ? (temp -= ) : (temp -= );

                 __h += ( * __h + temp);

                 __h %= ;

             }

         }

         return size_t(__h);

     }

 };

 typedef unordered_map<phrases, int, phrase_hash, phrase_cmp> Char_Phrase;

 Char_Phrase phrasemap;

 struct wordsdata *fourletter[ *  *  * ] = {}; //按首四字母排序

 int main()

 {

     int j = ;

     long handle = ;                           // 用于查找的句柄

     struct _finddata_t fileinfo;               // 文件信息的结构体

     char *path = __argv[];

     getfiles(path, &fileinfo, handle);

     gettop(fourletter);

     system("pause");

     return ;

 }

 int getfiles(char *path, struct _finddata_t *fileinfo, long handle)

 {

     handle = _findfirst(path, fileinfo);            //第一次打开父目录

     if (handle == -)

         return -;

     do

     {

         //printf("> %s\n", path);           //显示目录名

         if (fileinfo->attrib & _A_SUBDIR)           //如果读取到子目录

         {

             if (strcmp(fileinfo->name, ".") !=  && strcmp(fileinfo->name, "..") != )

             {

                 char temppath[] = "";              //记录子目录路径

                 long temphandle = ;

                 struct _finddata_t tempfileinfo;

                 strcpy(temppath, path);

                 strcat(temppath, "/*");

                 temphandle = _findfirst(temppath, &tempfileinfo);  //第一次打开子目录

                 if (temphandle == -)

                     return -;

                 do                              //对子目录所有文件递归

                 {

                     if (strcmp(tempfileinfo.name, ".") !=  && strcmp(tempfileinfo.name, "..") != )

                     {

                         strcpy(temppath, path);

                         strcat(temppath, "/");

                         strcat(temppath, tempfileinfo.name);

                         getfiles(temppath, &tempfileinfo, temphandle);

                     }

                 } while (_findnext(temphandle, &tempfileinfo) != -);

                 _findclose(temphandle);

             }//递归完毕

         } //子目录读取完毕

         else

             getwords(path, fourletter);

     } while (_findnext(handle, fileinfo) != -);

     _findclose(handle);       //关闭句柄

     return ;

 }

 int getwords(char *path, struct wordsdata **word)

 {

     FILE *fp;

     int j = ;

     int cmp = ;

     int num = ;               //计算首四位地址

     char temp = ;             //读取一个字符 ACSII 码值

     int length = ;

     char present[] = "";  //存储当前单词

     char address[] = "";

     struct wordsdata *q = NULL;

     struct wordsdata *pre = NULL;

     struct wordsdata *neword = NULL;

     struct wordsdata *now = NULL;

     struct wordsdata *previous = NULL;

     struct phrases *newphrase = NULL;

     if ((fp = fopen(path, "r")) == NULL)

     {

         //printf("error!!! \n", path);

         return ;

     }

     linenum++;

     while (temp != -)

     {

         //读取字符串

         temp = fgetc(fp);

         if (temp >  && temp < )

             charnum++;

         if (temp == '\n' || temp == '\r')

             linenum++;

         while ((temp >= '' && temp <= '') || (temp >= 'a' && temp <= 'z') || (temp >= 'A' && temp <= 'Z'))

         {

             if (length != - && length < )

             {

                 if (temp >= 'A')  //是字母

                 {

                     present[length] = temp;

                     address[length] = (temp >= 'a' ? (temp - 'a') : (temp - 'A'));

                     length++;

                 }

                 else            //不是字母

                     length = -;

             }

             else if (length >= )

             {

                 present[length] = temp;

                 length++;

             }

             temp = fgetc(fp);

             if (temp >  && temp < )

                 charnum++;

             if (temp == '\n' || temp == '\r')

                 linenum++;

         } // end while

           //判断是否为单词

         if (length >= )

         {

             wordnum++;

             //计算首四位代表地址

             num = address[] *  + address[] *  + address[] *  + address[];

             //插入当前单词

             if (word[num] == NULL)

             {

                 word[num] = new wordsdata;

                 neword = new wordsdata;

                 neword->number = ;

                 neword->next = NULL;

                 strcpy(neword->words, present);

                 word[num]->next = neword;

                 now = neword;

             }

             else

             {

                 pre = word[num];

                 q = pre->next;

                 cmp = wordcmp(q->words, present);

                 while (cmp == small)

                 {

                     pre = q;

                     q = q->next;

                     if (q != NULL)

                         cmp = wordcmp(q->words, present);

                     else

                         break;

                 }

                 if (q != NULL && cmp <= )

                 {

                     now = q;

                     q->number++;

                     if (cmp == )

                         strcpy(q->words, present);

                 }

                 else

                 {

                     neword = new wordsdata;

                     neword->number = ;

                     strcpy(neword->words, present);

                     pre->next = neword;

                     neword->next = q;

                     now = neword;

                 }

             }

             if (previous != NULL)

             {

                 newphrase = new phrases;

                 newphrase->one = previous->words;

                 newphrase->two = now->words;

                 unordered_map<phrases, int>::const_iterator got = phrasemap.find( *newphrase);

                 if (got != phrasemap.end())

                 {

                     phrasemap[*newphrase]++;

                 }

                 else

                 {

                     phrasemap.insert(pair<phrases, int>(*newphrase, ));

                 }

             }

             previous = now;

             //当前单词置空

             for (int j = ; present[j] && j < ; j++)

                 present[j] = ;

         }

         length = ;

     }

     fclose(fp);

     return ;

 }

 int wordcmp(char *str1, char *str2)

 {

     char *p1 = str1;

     char *p2 = str2;

     char q1 = *p1;

     char q2 = *p2;

     if (q1 >= 'a' && q1 <= 'z')

         q1 -= ;

     if (q2 >= 'a' && q2 <= 'z')

         q2 -= ;

     while (q1 && q2 && q1 == q2)

     {

         p1++;

         p2++;

         q1 = *p1;

         q2 = *p2;

         if (q1 >= 'a' && q1 <= 'z')

             q1 -= ;

         if (q2 >= 'a' && q2 <= 'z')

             q2 -= ;

     }

     while (*p1 >= '' && *p1 <= '')

         p1++;

     while (*p2 >= '' && *p2 <= '')

         p2++;

     if (*p1 ==  && *p2 == )           //两单词等价

         return strcmp(str1, str2);       //等价前者字典顺序小返回-1，大返回1，完全相等返回0

     if (q1 < q2)                   //前者小

         return ;

     if (q1 > q2)                   //后者小

         return ;

     return ;

 }

 int gettop(struct wordsdata **word)

 {

     int i = , j = ;

     struct wordsdata *topw[] = {};

     struct phrases *toph[] = {};

     struct wordsdata *w = NULL;

     FILE *fp;

     fp = fopen("result.txt", "w");

     fprintf(fp,"characters:%d \nwords:%d \nlines:%d\n",  charnum,wordnum, linenum);

     for (j = ; j < ; j++)

     {

         toph[j] = new struct phrases;

         toph[j]->num = ;

         topw[j] = new struct wordsdata;

         topw[j]->number = ;

     }

     for (i = ; i < ; i++)

     {

         if (word[i] != NULL)

         {

             w = word[i]->next;

             while (w != NULL)

             {

                 topw[]->number = w->number;

                 topw[]->next = w;

                 j = ;

                 while (j >  && topw[j]->number > topw[j - ]->number)

                 {

                     topw[] = topw[j];

                     topw[j] = topw[j - ];

                     topw[j - ] = topw[];

                     j--;

                 }

                 w = w->next;

             }

         }

     }

     for (j = ; j < ; j++)

     {

         if (topw[j]->number)

             fprintf(fp,"\n%s :%d", topw[j]->next->words, topw[j]->number);

     }

     for (Char_Phrase::iterator it = phrasemap.begin(); it != phrasemap.end(); it++)

     {

         toph[]->one = it->first.one;

         toph[]->two = it->first.two;

         toph[]->num = it->second;

         j = ;

         while (j >  && toph[j]->num > toph[j - ]->num)

         {

             toph[] = toph[j];

             toph[j] = toph[j - ];

             toph[j - ] = toph[];

             j--;

         }

     }

     fprintf(fp, "\n");

     for (j = ; j < ; j++)

     {

         if (toph[j]->num)

             fprintf(fp,"\n%s %s :%d", toph[j]->one, toph[j]->two, toph[j]->num);

     }

     fclose(fp);

     return ;

 }

记一个男默女泪的 BUG的更多相关文章

salesforce零基础学习（一百一十五）记一个有趣的bug
本篇参考:https://help.salesforce.com/s/articleView?language=en_US&type=1&id=000319486 page layou ...
记一个神奇的Bug
多年以后,当Abraham凝视着一行行新时代的代码在屏幕上川流不息的时候,他会想起2019年4月17日那个不平凡夜晚,以及在那个夜晚他发现的那个不可思议的Bug. 虽然像无数个普普通通的夜晚一样,我在 ...
【bug】记一个有趣的“bug”
产品经理在使用我们用户功能的是,需要查询一个用户,知道这个用户的id,我说支持模糊查询的. 他输入"余XX",点击查询,怎么都查不出这个用户. 我到用户表里确认,确实有这个ID的用 ...
记一个深层的bug
1. 业务场景产品需要每隔几天进行一次组件的更新,在自动化测试中,每隔30s检测一次更新源上的某个文件MD5值是否与本地一致,不一致代表有更新的版本,开始更新. 2. 问题出现一个再平常不过的繁忙 ...
记一个社交APP的开发过程——基础架构选型（转自一位大哥）
记一个社交APP的开发过程——基础架构选型目录[-] 基本产品形态技术选型最近两周在忙于开发一个社交App,因为之前做过一点儿社交方面的东西,就被拉去做API后端了,一个人头一次完整的去搭这么一 ...
一个iOS6系统bug＋一个iOS7系统bug
先看实际工作中遇到的两个bug:(1)iPhone Qzone有一个导航栏背景随着页面滑动而渐变的体验,当页面滑动到一定距离时,会改变导航栏上title文本的颜色,但是有一个莫名其妙的bug,如下:
FIREDAC（DELPHI10 or 10.1）提交数据给ORACLE数据库的一个不是BUG的BUG
发现FIREDAC(DELPHI10 or 10.1)提交数据给ORACLE数据库的一个不是BUG的BUG,提交的表名大小写是敏感的. 只要有一个表名字母的大小写不匹配,ORACLE就会认为是一个不认 ...
pycharm下： conda installation is not found ----一个公开的bug的解决方案
pycharm conda installation is not found ----一个公开的bug的解决方案 pycharm+anaconda 是当前的主流的搭建方案,但是常出现上述问题. ...
一个神奇的bug：OOM？优雅终止线程？系统内存占用较高？
摘要:该项目是DAYU平台的数据开发(DLF),数据开发中一个重要的功能就是ETL(数据清洗).ETL由源端到目的端,中间的业务逻辑一般由用户自己编写的SQL模板实现,velocity是其中涉及的一种 ...

随机推荐

Robot Framework（五）使用测试库
使用测试库测试库包含那些最低级别的关键字,通常称为库关键字,实际上与被测系统交互.所有测试用例总是使用某些库中的关键字,通常是通过更高级别的用户关键字.本节介绍如何使用测试库以及如何使用它们提供的 ...
Python爬虫：HTTP协议、Requests库（爬虫学习第一天）
HTTP协议: HTTP(Hypertext Transfer Protocol):即超文本传输协议.URL是通过HTTP协议存取资源的Internet路径,一个URL对应一个数据资源. HTTP协议 ...
Vue push() pop() shift() unshift() splice() sort() reverse() ...
Vue 变异方法 push() 方法可向数组的末尾添加一个或多个元素,并返回新的长度. pop() 方法用于删除并返回数组的最后一个元素. shift() 方法用于把数组的第一个元素从其中删除,并返回 ...
[jzoj5786]【NOIP2008模拟】观察 (dfs序+lca)
传送门 Description infleaking十分愉快地走在路上, 因为经过10^9^9^9年后, 他得到了一个新技能--观察大法. 刚出来的infleaking就想要挑战自我. 为什么infl ...
爬虫框架Scrapy初步使用
本文转载自: Scrapy 爬取并分析酷安 6000 款 App,找到良心佳软(抓取篇) https://www.makcyun.top/web_scraping_withpython10.html ...
多种方法爬取猫眼电影Top100排行榜,保存到csv文件,下载封面图
参考链接: https://blog.csdn.net/BF02jgtRS00XKtCx/article/details/83663400 https://www.makcyun.top/web_sc ...
python-if判断
1. python 条件语句 Python条件语句是通过一条或多条语句的执行结果(True或者False)来决定执行的代码块. 可以通过下图来简单了解条件语句的执行过程: Python程序语言指定任何 ...
15.Hibernate一对多双向关联映射+分页
1.创建如下数据库脚本 --创建用户信息表 --编号,用户名,密码,年龄,性别,昵称,手机,地址,管理员,图像地址 create table users ( id ) primary key, use ...
OpenCV实现USM锐化与测试
OpenCV实现USM锐化 [转]http://www.programdevelop.com/4964391/ USM (Unsharp masking) is a common operation ...
eclipse重置页面恢复到最初布局状态
eclipse重置页面恢复到最初布局状态 window->perspective->reset perspective