mongodb.py
from chatterbot.storage import StorageAdapter
class Query(object):
def __init__(self, query={}):
self.query = query
def value(self):
return self.query.copy()
def raw(self, data):
query = self.query.copy()
query.update(data)
return Query(query)
def statement_text_equals(self, statement_text):
query = self.query.copy()
query['text'] = statement_text
return Query(query)
def statement_text_not_in(self, statements):
query = self.query.copy()
if 'text' not in query:
query['text'] = {}
if '$nin' not in query['text']:
query['text']['$nin'] = []
query['text']['$nin'].extend(statements)
return Query(query)
def statement_response_list_contains(self, statement_text):
query = self.query.copy()
if 'in_response_to' not in query:
query['in_response_to'] = {}
if '$elemMatch' not in query['in_response_to']:
query['in_response_to']['$elemMatch'] = {}
query['in_response_to']['$elemMatch']['text'] = statement_text
return Query(query)
def statement_response_list_equals(self, response_list):
query = self.query.copy()
query['in_response_to'] = response_list
return Query(query)
class MongoDatabaseAdapter(StorageAdapter):
"""
The MongoDatabaseAdapter is an interface that allows
ChatterBot to store statements in a MongoDB database.
:keyword database: The name of the database you wish to connect to.
:type database: str
.. code-block:: python
database='chatterbot-database'
:keyword database_uri: The URI of a remote instance of MongoDB.
:type database_uri: str
.. code-block:: python
database_uri='mongodb://example.com:8100/'
"""
def __init__(self, **kwargs):
super(MongoDatabaseAdapter, self).__init__(**kwargs)
from pymongo import MongoClient
from pymongo.errors import OperationFailure
self.database_name = self.kwargs.get(
'database', 'chatterbot-database'
)
self.database_uri = self.kwargs.get(
'database_uri', 'mongodb://localhost:27017/'
)
# Use the default host and port
self.client = MongoClient(self.database_uri)
# Increase the sort buffer to 42M if possible
try:
self.client.admin.command({'setParameter': 1, 'internalQueryExecMaxBlockingSortBytes': 44040192})
except OperationFailure:
pass
# Specify the name of the database
self.database = self.client[self.database_name]
# The mongo collection of statement documents
self.statements = self.database['statements']
# The mongo collection of conversation documents
self.conversations = self.database['conversations']
# Set a requirement for the text attribute to be unique
self.statements.create_index('text', unique=True)
self.base_query = Query()
def get_statement_model(self):
"""
Return the class for the statement model.
"""
from chatterbot.conversation import Statement
# Create a storage-aware statement
statement = Statement
statement.storage = self
return statement
def get_response_model(self):
"""
Return the class for the response model.
"""
from chatterbot.conversation import Response
# Create a storage-aware response
response = Response
response.storage = self
return response
def count(self):
return self.statements.count()
def find(self, statement_text):
Statement = self.get_model('statement')
query = self.base_query.statement_text_equals(statement_text)
values = self.statements.find_one(query.value())
if not values:
return None
del values['text']
# Build the objects for the response list
values['in_response_to'] = self.deserialize_responses(
values.get('in_response_to', [])
)
return Statement(statement_text, **values)
def deserialize_responses(self, response_list):
"""
Takes the list of response items and returns
the list converted to Response objects.
"""
Statement = self.get_model('statement')
Response = self.get_model('response')
proxy_statement = Statement('')
for response in response_list:
text = response['text']
del response['text']
proxy_statement.add_response(
Response(text, **response)
)
return proxy_statement.in_response_to
def mongo_to_object(self, statement_data):
"""
Return Statement object when given data
returned from Mongo DB.
"""
Statement = self.get_model('statement')
statement_text = statement_data['text']
del statement_data['text']
statement_data['in_response_to'] = self.deserialize_responses(
statement_data.get('in_response_to', [])
)
return Statement(statement_text, **statement_data)
def filter(self, **kwargs):
"""
Returns a list of statements in the database
that match the parameters specified.
"""
import pymongo
query = self.base_query
order_by = kwargs.pop('order_by', None)
# Convert Response objects to data
if 'in_response_to' in kwargs:
serialized_responses = []
for response in kwargs['in_response_to']:
serialized_responses.append({'text': response})
query = query.statement_response_list_equals(serialized_responses)
del kwargs['in_response_to']
if 'in_response_to__contains' in kwargs:
query = query.statement_response_list_contains(
kwargs['in_response_to__contains']
)
del kwargs['in_response_to__contains']
query = query.raw(kwargs)
matches = self.statements.find(query.value())
if order_by:
direction = pymongo.ASCENDING
# Sort so that newer datetimes appear first
if order_by == 'created_at':
direction = pymongo.DESCENDING
matches = matches.sort(order_by, direction)
results = []
for match in list(matches):
results.append(self.mongo_to_object(match))
return results
def update(self, statement):
from pymongo import UpdateOne
from pymongo.errors import BulkWriteError
data = statement.serialize()
operations = []
update_operation = UpdateOne(
{'text': statement.text},
{'$set': data},
upsert=True
)
operations.append(update_operation)
# Make sure that an entry for each response is saved
for response_dict in data.get('in_response_to', []):
response_text = response_dict.get('text')
# $setOnInsert does nothing if the document is not created
update_operation = UpdateOne(
{'text': response_text},
{'$set': response_dict},
upsert=True
)
operations.append(update_operation)
try:
self.statements.bulk_write(operations, ordered=False)
except BulkWriteError as bwe:
# Log the details of a bulk write error
self.logger.error(str(bwe.details))
return statement
def create_conversation(self):
"""
Create a new conversation.
"""
conversation_id = self.conversations.insert_one({}).inserted_id
return conversation_id
def get_latest_response(self, conversation_id):
"""
Returns the latest response in a conversation if it exists.
Returns None if a matching conversation cannot be found.
"""
from pymongo import DESCENDING
statements = list(self.statements.find({
'conversations.id': conversation_id
}).sort('conversations.created_at', DESCENDING))
if not statements:
return None
return self.mongo_to_object(statements[-2])
def add_to_conversation(self, conversation_id, statement, response):
"""
Add the statement and response to the conversation.
"""
from datetime import datetime, timedelta
self.statements.update_one(
{
'text': statement.text
},
{
'$push': {
'conversations': {
'id': conversation_id,
'created_at': datetime.utcnow()
}
}
}
)
self.statements.update_one(
{
'text': response.text
},
{
'$push': {
'conversations': {
'id': conversation_id,
# Force the response to be at least one millisecond after the input statement
'created_at': datetime.utcnow() + timedelta(milliseconds=1)
}
}
}
)
def get_random(self):
"""
Returns a random statement from the database
"""
from random import randint
count = self.count()
if count < 1:
raise self.EmptyDatabaseException()
random_integer = randint(0, count - 1)
statements = self.statements.find().limit(1).skip(random_integer)
return self.mongo_to_object(list(statements)[0])
def remove(self, statement_text):
"""
Removes the statement that matches the input text.
Removes any responses from statements if the response text matches the
input text.
"""
for statement in self.filter(in_response_to__contains=statement_text):
statement.remove_response(statement_text)
self.update(statement)
self.statements.delete_one({'text': statement_text})
def get_response_statements(self):
"""
Return only statements that are in response to another statement.
A statement must exist which lists the closest matching statement in the
in_response_to field. Otherwise, the logic adapter may find a closest
matching statement that does not have a known response.
"""
response_query = self.statements.aggregate([{'$group': {'_id': '$in_response_to.text'}}])
responses = []
for r in response_query:
try:
responses.extend(r['_id'])
except TypeError:
pass
_statement_query = {
'text': {
'$in': responses
}
}
_statement_query.update(self.base_query.value())
statement_query = self.statements.find(_statement_query)
statement_objects = []
for statement in list(statement_query):
statement_objects.append(self.mongo_to_object(statement))
return statement_objects
def drop(self):
"""
Remove the database.
"""
self.client.drop_database(self.database_name)
mongodb.py的更多相关文章
- Python mongoDB 的简单操作
#!/usr/bin/env python # coding:utf-8 # Filename:mongodb.py from pymongo import MongoClient,ASCENDING ...
- Python连接MongoDB数据库并执行操作
原文:https://blog.51cto.com/1767340368/2092813 环境设置: [root@mongodb ~]# cat /etc/redhat-release CentOS ...
- MongoDB与python 交互
一.安装pymongo 注意 :当同时安装了python2和python3,为区分两者的pip,分别取名为pip2和pip3. 推荐:https://www.cnblogs.com/thunderLL ...
- 2019-05-25 Python之Mongodb的使用
Mongodb学习总结: one.插入数据pass two.查看数据pass three.修改数据pass four.数据排序pass five.删除数据pass 一.安装Mongodb https: ...
- ansible的lookup
lookup路径: /usr/lib/python2.7/site-packages/ansible/plugins/lookup 所有的lookup插件列表cartesian.py dnstxt.p ...
- SaltStack介绍及简单配置-第一篇
SaltStack介绍 一种全新的基础设施管理方式,部署轻松,在几分钟内可运行起来,扩展性好,很容易管理上万台服务器,速度够快,服务器之间秒级通讯. salt底层采用动态的连接总线, 使其可以用于编配 ...
- appium 爬取抖音
1.MongoDB.py import pymongo from pymongo.collection import Collection client = pymongo.MongoClient(h ...
- Scrapy的piplines.py存储文件和存储mongodb
一.将数据保存到文件 1.piplines.py文件 import json class TencentPipeline(object): def open_spider(self,spider): ...
- py操作mongodb总结
python使用的版本 python3. python操作mongodb使用的是pymongo,安装方法: pip install pymongo 测试 PyMongo 接下来我们可以创建一个测试文件 ...
随机推荐
- hdu 2685(数论相关定理+欧几里德定理+快速取模)
I won't tell you this is about number theory Time Limit: 2000/1000 MS (Java/Others) Memory Limit: ...
- 四、Ubuntu 一些常用命令
1.锁定root用户 :sudo passwd -l root 2.解锁root用户 :sudo passwd -u root 3.切换身份:su root 或者 su 其他用户名,然后输入密码, ...
- 洛谷 P1478 陶陶摘苹果(升级版)【贪心/结构体排序/可用01背包待补】
[链接]:https://www.luogu.org/problemnew/show/P1478 题目描述 又是一年秋季时,陶陶家的苹果树结了n个果子.陶陶又跑去摘苹果,这次她有一个a公分的椅子.当他 ...
- Topcoder SRM 664 DIV 1
BearPlays 快速幂 题意: 给你两个数A,B,有种操作是将大的数减去小的数,并将小的数乘以2.反复k次,问你最后的小的数回是多少. 题解: 由于整个过程$A+B$的值是不会改变的.现在令$S= ...
- Java基础教程---JDK的安装和环境变量的配置
一.Java的安装和环境变量配置 1.Java的安装: 第一步,从Oracle官网下载安装包,当然也可以从其他安全可靠的地方下载(PS:根据不同电脑系统下载相应的安装包,注意电脑的位数.如x64,x3 ...
- 单源最短路Dijstra算法
Dijstra算法是寻找从某一顶点i出发到大其他顶点的最短路径.Distra算法的思想与Prim算法很像,它收录顶点的规则是按照路径长度递增的顺序收录的.设v0是源顶点,我们要寻找从v0出发到其他任意 ...
- GLSL 基础量定义 【转】
转载:http://blog.csdn.net/misol/article/details/7658949 GLSL语法跟C语言非常相似: 1.数据类型: GLSL包含下面几种简单的数据类型 fl ...
- docker入门小结(一)
入职需要学习docker,记录学习随笔.争取两天大致看完docker学习.博客也算是迁移到cnblogs. 学习的链接参考<docker从入门到实践>http://dockerpool.c ...
- css样式表可以被嵌入网页里面吗?
我们一般听说的是:javascript可以被嵌入到网页任何地方? 而我们一直忽略了css也可以嵌入到网页任何地方 不过,建议这种方式少写,为了浏览器的渲染速度,但对于行内样式来说,这种方式还是比较有效 ...
- [C++设计模式] state 状态模式
<head first 设计模式>中的样例非常不错,想制造一个自己主动交易的糖果机,糖果机有四个状态:投入钱不足,投入钱足够,出售糖果,糖果售罄. 糖果机的当前状态处于当中不同的状态时,它 ...