字典是针对非序列集合而提供的一种数据类型，字典中的数据是无序排列的。

字典的操作

为字典增加一项

dict[key] = value

students = {"Z004":"John","T002":"Peter"}
students
Out[23]: {'T002': 'Peter', 'Z004': 'John'}
students["S007"] = "Susan"
students
Out[25]: {'S007': 'Susan', 'T002': 'Peter', 'Z004': 'John'}

访问字典中的值

dict[key] 返回key对应的值value

dict.get(key,default)--返回字典中key对应的值，若未找到key，则返回default值，default值可不写

删除字典中的一项

del dict[key]

字典的遍历

遍历字典的键key

for key in dict.keys():print(key)

遍历字典的值value

for value in dict.values():print(value)

遍历字典的项

for item in dict.items():print(item)

是否一个键在字典中

注：值不能判断

in 或者 not in

删除字典项目

dict.clear()--删除字典中的所有项目

dict.pop(key)--删除并返回字典中key对应的值

直接赋值、浅拷贝、深拷贝

直接赋值：其实就是对象的引用（别名）。

浅拷贝(copy)：拷贝父对象，不会拷贝对象的内部的子对象。

深拷贝(deepcopy)： copy 模块的 deepcopy 方法，完全拷贝了父对象及其子对象。

字典浅拷贝实例：

>>>a = {1: [1,2,3]}
>>> b = a.copy()
>>> a, b
({1: [1, 2, 3]}, {1: [1, 2, 3]})
>>> a[1].append(4)
>>> a, b
({1: [1, 2, 3, 4]}, {1: [1, 2, 3, 4]})

深度拷贝需要引入 copy 模块：

>>>import copy
>>> c = copy.deepcopy(a)
>>> a, c
({1: [1, 2, 3, 4]}, {1: [1, 2, 3, 4]})
>>> a[1].append(5)
>>> a, c
({1: [1, 2, 3, 4, 5]}, {1: [1, 2, 3, 4]})

http://www.runoob.com/w3cnote/python-understanding-dict-copy-shallow-or-deep.html

示例：词频统计

第一步：输入文章

第二步：建立用于词频计算的空字典

第三步：对文本的每一行计算词频，如果文章长度一般，则不需用一次读一行，一次便可读完。

第四步：从字典中获取数据对到列表中

第五步：对列表中的数据对交换位置，并从大到小进行排序

第六步：输出结果

下图所示为程序输出结果及输出的统计结果

汉字的词频统计、排除特定词集合的程序后续更新...

普通版本

def getText():
txt=open('hamlet.txt','r').read()
txt=txt.lower()
for ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\'""":
txt=txt.replace(ch,' ')
return txt
hamletTxt=getText()
words=hamletTxt.split()
counts={}
sumcount = 0
for word in words:
counts[word]=counts.get(word,0)+1
sumcount = sumcount + 1
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
word,count=items[i]
print('{0:<10}{1:>5}'.format(word,count))
#将统计结果写入文本文件中
outfile = open('词频统计结果.txt', "w")
lines = []
lines.append('单词种类：'+str(len(items))+'\n')
lines.append('单词总数：'+str(sumcount)+'\n')
lines.append('词频排序如下:\n')
lines.append('word\tcounts\n')
s= ''
for i in range(len(items)):
s = '\t'.join([str(items[i][0]), str(items[i][1])])
s += '\n'
lines.append(s)
print('\n统计完成！\n')
outfile.writelines(lines)
outfile.close()

排除特定词库

#排除词库
excludes = ['the','and','to','of','i','a','in','it','that','is',
'you','my','with','not','his','this','but','for',
'me','s','he','be','as','so','him','your']
def getText():
txt=open('hamlet.txt','r').read()
txt=txt.lower()
for ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\'""":
txt=txt.replace(ch,' ')
return txt
hamletTxt=getText()
words=hamletTxt.split()
counts={}
sumcount = 0
for word in words:
counts[word]=counts.get(word,0)+1
sumcount = sumcount + 1
counts_ex = counts.copy()
for key in counts.keys():
if key in excludes:
counts_ex.pop(key)
items=list(counts_ex.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
word,count=items[i]
print('{0:<10}{1:>5}'.format(word,count))
#将统计结果写入文本文件中
outfile = open('词频统计结果.txt', "w")
lines = []
lines.append('单词种类：'+str(len(items))+'\n')
lines.append('单词总数：'+str(sumcount)+'\n')
lines.append('词频排序如下:\n')
lines.append('word\tcounts\n')
s= ''
for i in range(len(items)):
s = '\t'.join([str(items[i][0]), str(items[i][1])])
s += '\n'
lines.append(s)
print('\n统计完成！\n')
outfile.writelines(lines)
outfile.close()

GUI版本

import tkinter as tk
from tkinter import ttk
from tkinter import scrolledtext
from tkinter import filedialog
from tkinter import messagebox as mBox
#获取原文内容
def getText(DIR):
txt=open(DIR,'r').read()
return txt
txt.close()
#打开文件
def __opendir():
srcText.delete('1.0', tk.END) # 先删除所有
# 打开文件夹对话框
fname = filedialog.askopenfilename(filetypes=( ("Text file", "*.txt*"),("HTML files", "*.html;*.htm")))
entryvar.set(fname) # 设置变量entryvar，等同于设置部件Entry
if not fname:
mBox.showwarning('警告', message='未选择文件夹！') # 弹出消息提示框
#显示需要统计的文本
Txt=getText(fname)
srcText.insert(tk.END, Txt)
srcText.update()
#手动输入文件名时回车键触发
def srcEnter(event=None):
fname=DirEntry.get()
if not fname:
mBox.showwarning('警告', message='请选择文件！') # 弹出消息提示框
Txt=getText(fname)
srcText.insert(tk.END, Txt)
srcText.update()
#词频统计
def wordFrequence():
fname=DirEntry.get()
if not fname:
mBox.showwarning('警告', message='请选择文件！') # 弹出消息提示框
txt=getText(fname)
#对原文进行小写，标点符号转换处理
txt=txt.lower()
for ch in '!"#$%&*()+,.-;:<=>?@[]\^_{}|`':
txt=txt.replace(ch,' ')
#词频统计
words=txt.split()
counts={} #用空字典存储统计结果
for word in words:
counts[word]=counts.get(word,0)+1
#词频排序
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
#输出排序结果
num=0
for i in range(len(counts)):
word,count=items[i]
num=i*count+num
dstText.insert(tk.END, '单词种类：')
dstText.insert(tk.END, str(len(items)))
dstText.insert(tk.END, '\n')
dstText.insert(tk.END, '单词总数：')
dstText.insert(tk.END, str(num))
dstText.insert(tk.END, '\n')
dstText.insert(tk.END, '词频排序如下:\n')
dstText.insert(tk.END, '#word:\t\t#counts:\n')
for i in range(len(counts)):
word,count=items[i]
dstText.insert(tk.END, word)
dstText.insert(tk.END, '\t\t')
dstText.insert(tk.END, count)
dstText.insert(tk.END, '\n')
def savefile():
# 打开文件夹对话框
dirname = filedialog.askdirectory()
outvar.set(dirname) # 设置变量entryvar，等同于设置部件Entry
if not dirname:
mBox.showwarning('警告', message='请选择保存位置！') # 弹出消息提示框
fname=dirname+'\词频统计结果.txt'
outfile = open(fname, "w")
outfile.writelines(dstText.get(1.0,tk.END))
outfile.close()
mBox.showinfo('词频统计', '统计结果保存成功！')
def dstEnter(event=None):
dirname=outvar.get()
if not dirname:
mBox.showwarning('警告', message='请选择保存位置！') # 弹出消息提示框
fname=dirname+'\词频统计结果.txt'
outfile = open(fname, "w")
outfile.writelines(dstText.get(1.0,tk.END))
outfile.close()
mBox.showinfo('词频统计', '统计结果保存成功！')
# Create instance
win = tk.Tk()
# Add a title
win.title("词频统计GUI")
# Disable resizing the GUI
win.resizable(0,0)
#---------------窗口控件介绍------------------#
#打开文件对话框
SelDirButton = ttk.Button(win, command=__opendir, text='选择文件目录：')
SelDirButton.grid(row=0, column=0,sticky=tk.W,pady=3,padx=3)
#文件的目录显示
entryvar = tk.StringVar()
DirEntry=ttk.Entry(win, width=30,textvariable=entryvar)
DirEntry.grid(row=1, column=0,sticky=tk.W,pady=3,padx=3)
DirEntry.bind('<Return>', func=srcEnter)
#文件内容的显示
srcText = scrolledtext.ScrolledText(win,width=30,height=30)#内容输出框
srcText.grid(row=2, column=0,columnspan=1,sticky=tk.W,pady=3,padx=3)
#词频统计按钮
CalcuButton = ttk.Button(win, command=wordFrequence, text='词频统计')
CalcuButton.grid(row=0, column=1,sticky=tk.W,pady=3,padx=3)
#统计结果显示
dstText = scrolledtext.ScrolledText(win,width=30,height=30)#内容输出框
dstText.grid(row=2, column=1,columnspan=2,sticky=tk.W,pady=3,padx=3)
#保存文件按钮
SavefileButton = ttk.Button(win, command=savefile, text='统计结果保存到：')
SavefileButton.grid(row=0, column=2,sticky=tk.W,pady=3,padx=3)
#保存文件目录
outvar = tk.StringVar()
saveEntry=ttk.Entry(win, width=30,textvariable=outvar)
saveEntry.grid(row=1, column=1,columnspan=2,sticky=tk.W,pady=3,padx=3)
saveEntry.bind('<Return>', func=dstEnter)
#======================
# Start GUI
#======================
win.mainloop()

<pre code_snippet_id="2297514" snippet_file_name="blog_20170328_1_7839256" name="code" class="python"><pre code_snippet_id="2297514" snippet_file_name="blog_20170328_1_7839256"></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
<pre></pre>
</pre>

Python字典使用--词频统计的GUI实现的更多相关文章

用Python实现一个词频统计(词云+图）
第一步:首先需要安装工具python 第二步:在电脑cmd后台下载安装如下工具: (有一些是安装好python电脑自带有哦) 有一些会出现一种情况就是安装不了词云展示库有下面解决方法,需看请复制链接 ...
Python3.7 练习题(二) 使用Python进行文本词频统计
# 使用Python进行词频统计 mytext = """Background Industrial Light & Magic (ILM) was starte ...
利用python实现简单词频统计、构建词云
1.利用jieba分词,排除停用词stopword之后,对文章中的词进行词频统计,并用matplotlib进行直方图展示 # coding: utf-8 import codecs import ma ...
（改进）Python语言实现词频统计
需求: 1.设计一个词频统计的程序. 2.英语文章中包含的英语标点符号不计入统计. 3.将统计结果按照单词的出现频率由大到小进行排序. 设计: 1.基本功能和用法会在程序中进行提示. 2.原理是利用分 ...
python实现简易词频统计-源码
需求:给瓦尔登湖文章统计单词出现的频率思路:首先读取文件并以空格分割得到列表,然后利用for循环遍历列表中的元素并把去掉列表元素中的符号,第三步去掉相同的元素,将列表转换为一个字典,最后按照键值对升 ...
python:Hamlet英文词频统计
#CalHamletV1.py def getText(): #定义函数读取文件 txt = open("hamlet.txt","r").read() txt ...
用Python来进行词频统计
# 把语料中的单词全部抽取出来, 转成小写, 并且去除单词中间的特殊符号 def words(text): return re.findall('[a-z]+', text.lower()) def ...
Python 词频统计
利用Python做一个词频统计 GitHub地址:FightingBob [Give me a star , thanks.] 词频统计对纯英语的文本文件[Eg: 瓦尔登湖(英文版).txt]的英文 ...
spark ---词频统计(二)
利用python来操作spark的词频统计,现将过程分享如下: 1.新建项目:(这里是在已有的项目中创建的,可单独创建wordcount项目) ①新建txt文件: wordcount.txt (文件内 ...

随机推荐

58、synchronized同步方法
线程安全问题先看下面代码出现的问题: 定义一个Task类,里面有一个成员变量和一个有boolean类型参数的方法,方法内部会根据传入参数修改成员变量的值. package com.sutaoyu.T ...
Python概念-禁锢术之__slots__
之所以给它起名为禁锢术,并非空缺来风,下面我们来了解一下__slost__ __slost__:其实就是将类中的名称锁定,实例化对象,只可以赋值和调用,不可以删除名字和增加新的名字代码示例:(实例化 ...
nginx+tomat https ssl 部署完美解决方案
关于nginx+tomcat https的部署之前网上一直有2种说法: 1.nginx和tomcat都要部署ssl证书 2.nginx部署ssl证书,tomcat增加ssl支持在实际的部署过程中ng ...
[转]ROS(Robot Operating System)常用环境变量介绍
本文简单介绍ROS系统中常用的环境变量用途及设置方式.ROS系统环境中除了必须配置的环境变量以外,其他的也是十分有用,通过修改变量路径,可以设置ROS系统中log文件存放路径,单元测试结果存放路径等. ...
用Nginx分流绕开Github反爬机制
用Nginx分流绕开Github反爬机制 0x00 前言如果哪天有hacker进入到了公司内网为所欲为,你一定激动地以为这是一次蓄谋已久的APT,事实上,还有可能只是某位粗线条的员工把VPN信息泄露 ...
pymongo的几个操作
# -*- coding: utf-8 -*- # @Time : 2018/9/11 17:16 # @Author : cxa # @File : mongotest.py # @Software ...
微信web开发者工具无法打开的解决方法
参考网址:https://blog.csdn.net/gz506840597/article/details/77915488 我试了上面兄弟说的方法还是无效下面说说我的方法: 我打开文件所在位置, ...
08 Packages 包
Packages Standard library Other packages Sub-repositories Community Standard library Name Synopsis ...
Nginx - upstream 模块及参数测试
目录 - 1. 前言- 2. 配置示例及指令说明 - 2.1 配置示例 - 2.2 指令 - 2.3 upstream相关变量- 3. 参数配置及测试 - 3.1 max_fa ...
jQuery选择器详解及实例---《转载》
选择器是jQuery最基础的东西,本文中列举的选择器基本上囊括了所有的jQuery选择器,也许各位通过这篇文章能够加深对jQuery选择器的理解,它们本身用法就非常简单,我更希望的是它能够提升个人编写 ...

Python字典使用--词频统计的GUI实现