深度学习之 seq2seq 进行 英文到法文的翻译

import os
import torch
import random source_path = "data/small_vocab_en"
target_path = "data/small_vocab_fr"
MAX_LENGTH = 100
SOS_token = 0
EOS_token = 1 def load_data(path):
input_file = os.path.join(path)
with open(input_file, 'r', encoding='utf-8') as f:
data = f.read()
return data source_text = load_data(source_path)
target_text = load_data(target_path) class Dictionary(object):
def __init__(self):
self.word2idx = {'<SOS>': 0, '<EOS>': 1}
self.idx2word = {0: '<SOS>', 1: '<EOS>'}
self.count = 2 def add_word(self, word):
if word not in self.word2idx: self.idx2word[self.count - 1] = word self.word2idx[word] = len(self.idx2word) - 1 self.count += 1
return self.word2idx[word] def __len__(self):
return len(self.idx2word) class Lang(object):
def __init__(self, name):
self.name = name
self.dictionary = Dictionary() def addSentence(self, sentence):
return [self.addWord(w) for w in sentence.split()] def addWord(self, word):
return self.dictionary.add_word(word) def __len__(self):
return len(self.dictionary) def readLangs(source_name, source_lang_text, target_name, target_lang_text):
source_lang = Lang(source_name)
source_data = [source_lang.addSentence(s) for s in source_lang_text.lower().split('\n')] target_lang = Lang(target_name)
target_sentences = [ s + ' <EOS>' for s in target_lang_text.lower().split('\n')]
target_data = [target_lang.addSentence(s) for s in target_sentences] pairs = list(zip(source_data, target_data))
return source_lang, target_lang, pairs source_lang, target_lang, pairs_data = readLangs('en', source_text, 'fe', target_text) import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size):
super(EncoderRNN, self).__init__()
self.hidden_size = hidden_size self.embedding = nn.Embedding(input_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size) def forward(self, input, hidden):
embedded = self.embedding(input).view(1, 1, -1)
output = embedded
output, hidden = self.gru(output, hidden)
return output, hidden def initHidden(self):
result = Variable(torch.zeros(1, 1, self.hidden_size))
return result class DecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
super(DecoderRNN, self).__init__() self.hidden_size = hidden_size
self.output_size = output_size
self.dropout_p = dropout_p
self.max_length = max_length self.embedding = nn.Embedding(self.output_size, self.hidden_size)
self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
self.dropout = nn.Dropout(self.dropout_p)
self.gru = nn.GRU(self.hidden_size, self.hidden_size)
self.out = nn.Linear(self.hidden_size, self.output_size) def forward(self, input, hidden, encoder_outputs):
embedded = self.embedding(input).view(1, 1, -1)
embedded = self.dropout(embedded) attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) output = torch.cat((embedded[0], attn_applied[0]), 1)
output = self.attn_combine(output).unsqueeze(0) output = F.relu(output)
output, hidden = self.gru(output, hidden) output = F.log_softmax(self.out(output[0]), dim=1)
return output, hidden, attn_weights def initHidden(self):
result = Variable(torch.zeros(1, 1, self.hidden_size))
return result epochs = 10
print_every = 2
hidden_size = 256
teacher_forcing_ratio = 0.5 encoder_model = EncoderRNN(len(source_lang), hidden_size)
att_decoder_model = DecoderRNN(hidden_size, len(target_lang), dropout_p=0.1) def variablesFromIds(ids):
return Variable(torch.LongTensor(ids).view(-1, 1)) def variablesFromPair(pair):
input_var = variablesFromIds(pair[0])
output_var = variablesFromIds(pair[1])
return (input_var, output_var) def train(input, target, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad() input_length = input.size()[0]
target_length = target.size()[0] encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size)) loss = 0 for i in range(input_length):
encoder_output, encoder_hidden = encoder(input[i], encoder_hidden)
encoder_outputs[i] = encoder_output[0][0] decoder_input = Variable(torch.LongTensor([[SOS_token]])) decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False if use_teacher_forcing:
for di in range(target_length):
decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target[di])
decoder_input = target[di]
else:
for di in range(target_length):
decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs) topv, topi = decoder_output.data.topk(1)
ni = topi[0][0] decoder_input = Variable(torch.LongTensor([[ni]])) loss += criterion(decoder_output, target[di]) if ni == EOS_token:
break;
loss.backward() encoder_optimizer.step()
decoder_optimizer.step() return loss.data[0] / target_length def trainIters(encoder, decoder, n_iters, print_every=10, learning_rate=0.01):
encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) training_pairs = [variablesFromPair(random.choice(pairs_data)) for i in range(n_iters)]
criterion = nn.NLLLoss() total_loss = 0 for iter in range(1, n_iters + 1):
training_pair = training_pairs[iter - 1]
input_variable = training_pair[0]
target_variable = training_pair[1] loss = train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
total_loss += loss if iter % print_every == 0: print('(%d %d%%) loss %d total-loss %d percent %.4f' % (iter, iter / n_iters * 100, loss ,total_loss, total_loss / print_every)) trainIters(encoder_model, att_decoder_model, 5000) def evaluate(encoder, decoder, sentence, max_length = MAX_LENGTH):
input_variable = variablesFromIds(sentence) input_length = input_variable.size()[0]
encoder_hidden = encoder.initHidden() encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size)) for ei in range(input_length):
encoder_output, encoder_hidden = encoder(input_variable[ei], encoder_hidden)
encoder_outputs[ei] = encoder_outputs[ei] + encoder_output[0][0] decoder_input = Variable(torch.LongTensor([[SOS_token]])) # SOS decoder_hidden = encoder_hidden decoded_words = []
decoder_attentions = torch.zeros(max_length, max_length) for di in range(max_length):
decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
decoder_attentions[di] = decoder_attention.data
topv, topi = decoder_output.data.topk(1)
ni = topi[0][0] if ni == EOS_token:
decoded_words.append('<EOS>')
break
else:
decoded_words.append(target_lang.dictionary.idx2word[ni]) decoder_input = Variable(torch.LongTensor([[ni]]))
return decoded_words, decoder_attentions[:di + 1] evaluateRandomly(encoder_model, att_decoder_model)

结论

训练少,正确率较低,后面再实现一个对话机器人

深度学习之 seq2seq 进行 英文到法文的翻译的更多相关文章

  1. 深度学习教程 | Seq2Seq序列模型和注意力机制

    作者:韩信子@ShowMeAI 教程地址:http://www.showmeai.tech/tutorials/35 本文地址:http://www.showmeai.tech/article-det ...

  2. 时间序列深度学习:seq2seq 模型预测太阳黑子

    目录 时间序列深度学习:seq2seq 模型预测太阳黑子 学习路线 商业中的时间序列深度学习 商业中应用时间序列深度学习 深度学习时间序列预测:使用 keras 预测太阳黑子 递归神经网络 设置.预处 ...

  3. 深度学习的seq2seq模型——本质是LSTM,训练过程是使得所有样本的p(y1,...,yT‘|x1,...,xT)概率之和最大

    from:https://baijiahao.baidu.com/s?id=1584177164196579663&wfr=spider&for=pc seq2seq模型是以编码(En ...

  4. 深度学习之seq2seq模型以及Attention机制

    RNN,LSTM,seq2seq等模型广泛用于自然语言处理以及回归预测,本期详解seq2seq模型以及attention机制的原理以及在回归预测方向的运用. 1. seq2seq模型介绍 seq2se ...

  5. 机器学习(Machine Learning)&深度学习(Deep Learning)资料【转】

    转自:机器学习(Machine Learning)&深度学习(Deep Learning)资料 <Brief History of Machine Learning> 介绍:这是一 ...

  6. 深度学习中的Attention机制

    1.深度学习的seq2seq模型 从rnn结构说起 根据输出和输入序列不同数量rnn可以有多种不同的结构,不同结构自然就有不同的引用场合.如下图, one to one 结构,仅仅只是简单的给一个输入 ...

  7. 机器学习(Machine Learning)与深度学习(Deep Learning)资料汇总

    <Brief History of Machine Learning> 介绍:这是一篇介绍机器学习历史的文章,介绍很全面,从感知机.神经网络.决策树.SVM.Adaboost到随机森林.D ...

  8. 深度学习(Deep Learning)算法简介

    http://www.cnblogs.com/ysjxw/archive/2011/10/08/2201782.html Comments from Xinwei: 最近的一个课题发展到与深度学习有联 ...

  9. 时间序列深度学习:状态 LSTM 模型预测太阳黑子

    目录 时间序列深度学习:状态 LSTM 模型预测太阳黑子 教程概览 商业应用 长短期记忆(LSTM)模型 太阳黑子数据集 构建 LSTM 模型预测太阳黑子 1 若干相关包 2 数据 3 探索性数据分析 ...

随机推荐

  1. angular路由详解四(子路由)

    子路由是相对路由 路由配置部分: 主要是children const routes: Routes = [ {path:'home', component: HomeComponent, childr ...

  2. Apache Shiro 标签方式授权

    Shiro提供了一套JSP标签库来实现页面级的授权控制. 在使用Shiro标签库前,首先需要在JSP引入shiro标签: <%@ taglib prefix="shiro"  ...

  3. C++学习-4

    1.一个类重写了operator(),可以f()-相当于匿名对象-f()()相当于调用operator()函数  把对象名当成函数名来使用--伪函数 2.通过成员函数创建多线程 a)   可以用成员函 ...

  4. doT.js——前端javascript模板引擎问题备忘录

    我手里维护的一个项目,遇到一个问题:原项目的开发人员在Javascript中,大量的拼接HTML,导致代码极丑,极难维护.他们怎么能够忍受的了这么丑陋.拙劣的代码呢,也许是他们的忍受力极强,压根就没想 ...

  5. 【Unity与23种设计模式】备忘录模式(Memento)

    GoF中定义: "在不违反封装的原则下,获取一个对象的内部状态并保留在外部,让对象可以在日后恢复到原先保留时的状态." 对于一些需要存储的数据,比如历史最高分 当与得分减分系统写入 ...

  6. c# 根据唯一码,存缓存 实现12小时内 阅读量+1

    需求:某一个详细页面需要实现用户 12小时内阅读量+1, 实现思路;得到一个唯一码的机器码,不管是否用户登录了 都有这个码,然后存到缓存里面 最后判断时间+12小时  是否超过当前时间 string ...

  7. nginx location匹配顺序及CI框架的nginx配置

    Nginx location匹配顺序如下: 用前缀字符串定义的location规则对URI进行匹配测试. =号定义了精确的前缀字符串匹配,如果发现精确匹配则使用当前规则.否则继续下一步匹配. 匹配其它 ...

  8. Linux find用法

    Linux中find常见用法示例 ----摘抄哪里忘记了 ·find   path   -option   [   -print ]   [ -exec   -ok   command ]   {} ...

  9. [模拟赛] T2 不等数列

    Description 将1到n任意排列,然后在排列的每两个数之间根据他们的大小关系插入">"和"<".问在所有排列中,有多少个排列恰好有k个&qu ...

  10. Oracle 12c(12.1.0.5)OEM server agent 安装配置

    注意: 此文档为生产上操作文档,省略了IP,oracle用户server,agent 端至少需要sudo,ping,ssh,创建目录权限. 一.安装要求 1.1. 系统情况一览 IP 数据库 OEM ...