深度学习之 seq2seq 进行英文到法文的翻译

import os

import torch

import random

source_path = "data/small_vocab_en"

target_path = "data/small_vocab_fr"

MAX_LENGTH = 100

SOS_token = 0

EOS_token = 1

def load_data(path):

    input_file = os.path.join(path)

    with open(input_file, 'r', encoding='utf-8') as f:

        data = f.read()

    return data

source_text = load_data(source_path)

target_text = load_data(target_path)

class Dictionary(object):

    def __init__(self):

        self.word2idx = {'<SOS>': 0, '<EOS>': 1}

        self.idx2word = {0: '<SOS>', 1: '<EOS>'}

        self.count = 2

    def add_word(self, word):

        if word not in self.word2idx:

            self.idx2word[self.count - 1] = word

            self.word2idx[word] = len(self.idx2word) - 1

            self.count += 1

        return self.word2idx[word]

    def __len__(self):

        return len(self.idx2word)

class Lang(object):

    def __init__(self, name):

        self.name = name

        self.dictionary = Dictionary()

    def addSentence(self, sentence):

        return [self.addWord(w) for w in sentence.split()]

    def addWord(self, word):

        return self.dictionary.add_word(word)

    def __len__(self):

        return len(self.dictionary)

def readLangs(source_name, source_lang_text, target_name, target_lang_text):

    source_lang = Lang(source_name)

    source_data = [source_lang.addSentence(s) for s in source_lang_text.lower().split('\n')]

    target_lang = Lang(target_name)

    target_sentences = [ s + ' <EOS>' for s in target_lang_text.lower().split('\n')]

    target_data = [target_lang.addSentence(s) for s in target_sentences]

    pairs = list(zip(source_data, target_data))

    return source_lang, target_lang, pairs

source_lang, target_lang, pairs_data = readLangs('en', source_text, 'fe', target_text)

import torch.nn as nn

from torch.autograd import Variable

from torch import optim

import torch.nn.functional as F

class EncoderRNN(nn.Module):

    def __init__(self, input_size, hidden_size):

        super(EncoderRNN, self).__init__()

        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)

        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):

        embedded = self.embedding(input).view(1, 1, -1)

        output = embedded

        output, hidden = self.gru(output, hidden)

        return output, hidden

    def initHidden(self):

        result = Variable(torch.zeros(1, 1, self.hidden_size))

        return result

class DecoderRNN(nn.Module):

    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):

        super(DecoderRNN, self).__init__()

        self.hidden_size = hidden_size

        self.output_size = output_size

        self.dropout_p = dropout_p

        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)

        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)

        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)

        self.dropout = nn.Dropout(self.dropout_p)

        self.gru = nn.GRU(self.hidden_size, self.hidden_size)

        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):

        embedded = self.embedding(input).view(1, 1, -1)

        embedded = self.dropout(embedded)

        attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)

        attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)

        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)

        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)

        return output, hidden, attn_weights

    def initHidden(self):

        result = Variable(torch.zeros(1, 1, self.hidden_size))

        return result

epochs = 10

print_every = 2

hidden_size = 256

teacher_forcing_ratio = 0.5

encoder_model = EncoderRNN(len(source_lang), hidden_size)

att_decoder_model = DecoderRNN(hidden_size, len(target_lang), dropout_p=0.1)

def variablesFromIds(ids):

    return Variable(torch.LongTensor(ids).view(-1, 1))

def variablesFromPair(pair):

    input_var = variablesFromIds(pair[0])

    output_var = variablesFromIds(pair[1])

    return (input_var, output_var)

def train(input, target, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):

    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()

    decoder_optimizer.zero_grad()

    input_length = input.size()[0]

    target_length = target.size()[0]

    encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))

    loss = 0

    for i in range(input_length):

        encoder_output, encoder_hidden = encoder(input[i], encoder_hidden)

        encoder_outputs[i] = encoder_output[0][0]

    decoder_input = Variable(torch.LongTensor([[SOS_token]]))

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:

        for di in range(target_length):

            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)

            loss += criterion(decoder_output, target[di])

            decoder_input = target[di]

    else:

        for di in range(target_length):

            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)

            topv, topi = decoder_output.data.topk(1)

            ni = topi[0][0]

            decoder_input = Variable(torch.LongTensor([[ni]]))

            loss += criterion(decoder_output, target[di])

            if ni == EOS_token:

                break;

    loss.backward()

    encoder_optimizer.step()

    decoder_optimizer.step()

    return loss.data[0] / target_length

def trainIters(encoder, decoder, n_iters, print_every=10, learning_rate=0.01):

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)

    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    training_pairs = [variablesFromPair(random.choice(pairs_data)) for i in range(n_iters)]

    criterion = nn.NLLLoss()

    total_loss = 0

    for iter in range(1, n_iters + 1):

        training_pair = training_pairs[iter - 1]

        input_variable = training_pair[0]

        target_variable = training_pair[1]

        loss = train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)

        total_loss  += loss

        if iter % print_every == 0:

            print('(%d %d%%) loss %d total-loss %d percent %.4f' % (iter, iter / n_iters * 100, loss ,total_loss, total_loss / print_every))

trainIters(encoder_model, att_decoder_model, 5000)

def evaluate(encoder, decoder, sentence, max_length = MAX_LENGTH):

    input_variable = variablesFromIds(sentence)

    input_length = input_variable.size()[0]

    encoder_hidden = encoder.initHidden()

    encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))

    for ei in range(input_length):

        encoder_output, encoder_hidden = encoder(input_variable[ei], encoder_hidden)

        encoder_outputs[ei] = encoder_outputs[ei] + encoder_output[0][0]

    decoder_input = Variable(torch.LongTensor([[SOS_token]]))  # SOS

    decoder_hidden = encoder_hidden

    decoded_words = []

    decoder_attentions = torch.zeros(max_length, max_length)

    for di in range(max_length):

        decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)

        decoder_attentions[di] = decoder_attention.data

        topv, topi = decoder_output.data.topk(1)

        ni = topi[0][0]

        if ni == EOS_token:

            decoded_words.append('<EOS>')

            break

        else:

            decoded_words.append(target_lang.dictionary.idx2word[ni])

        decoder_input = Variable(torch.LongTensor([[ni]]))

    return decoded_words, decoder_attentions[:di + 1]  

evaluateRandomly(encoder_model, att_decoder_model)

结论

训练少，正确率较低，后面再实现一个对话机器人

深度学习之 seq2seq 进行英文到法文的翻译的更多相关文章

深度学习教程 | Seq2Seq序列模型和注意力机制
作者:韩信子@ShowMeAI 教程地址:http://www.showmeai.tech/tutorials/35 本文地址:http://www.showmeai.tech/article-det ...
时间序列深度学习：seq2seq 模型预测太阳黑子
目录时间序列深度学习:seq2seq 模型预测太阳黑子学习路线商业中的时间序列深度学习商业中应用时间序列深度学习深度学习时间序列预测:使用 keras 预测太阳黑子递归神经网络设置.预处 ...
深度学习的seq2seq模型——本质是LSTM，训练过程是使得所有样本的p(y1,...,yT‘|x1,...,xT)概率之和最大
from:https://baijiahao.baidu.com/s?id=1584177164196579663&wfr=spider&for=pc seq2seq模型是以编码(En ...
深度学习之seq2seq模型以及Attention机制
RNN,LSTM,seq2seq等模型广泛用于自然语言处理以及回归预测,本期详解seq2seq模型以及attention机制的原理以及在回归预测方向的运用. 1. seq2seq模型介绍 seq2se ...
机器学习(Machine Learning)&深度学习(Deep Learning)资料【转】
转自:机器学习(Machine Learning)&深度学习(Deep Learning)资料 <Brief History of Machine Learning> 介绍:这是一 ...
深度学习中的Attention机制
1.深度学习的seq2seq模型从rnn结构说起根据输出和输入序列不同数量rnn可以有多种不同的结构,不同结构自然就有不同的引用场合.如下图, one to one 结构,仅仅只是简单的给一个输入 ...
机器学习(Machine Learning)与深度学习(Deep Learning)资料汇总
<Brief History of Machine Learning> 介绍:这是一篇介绍机器学习历史的文章,介绍很全面,从感知机.神经网络.决策树.SVM.Adaboost到随机森林.D ...
深度学习(Deep Learning)算法简介
http://www.cnblogs.com/ysjxw/archive/2011/10/08/2201782.html Comments from Xinwei: 最近的一个课题发展到与深度学习有联 ...
时间序列深度学习：状态 LSTM 模型预测太阳黑子
目录时间序列深度学习:状态 LSTM 模型预测太阳黑子教程概览商业应用长短期记忆(LSTM)模型太阳黑子数据集构建 LSTM 模型预测太阳黑子 1 若干相关包 2 数据 3 探索性数据分析 ...

随机推荐

angular路由详解四（子路由）
子路由是相对路由路由配置部分: 主要是children const routes: Routes = [ {path:'home', component: HomeComponent, childr ...
Apache Shiro 标签方式授权
Shiro提供了一套JSP标签库来实现页面级的授权控制. 在使用Shiro标签库前,首先需要在JSP引入shiro标签: <%@ taglib prefix="shiro" ...
C++学习-4
1.一个类重写了operator(),可以f()-相当于匿名对象-f()()相当于调用operator()函数把对象名当成函数名来使用--伪函数 2.通过成员函数创建多线程 a) 可以用成员函 ...
doT.js——前端javascript模板引擎问题备忘录
我手里维护的一个项目,遇到一个问题:原项目的开发人员在Javascript中,大量的拼接HTML,导致代码极丑,极难维护.他们怎么能够忍受的了这么丑陋.拙劣的代码呢,也许是他们的忍受力极强,压根就没想 ...
【Unity与23种设计模式】备忘录模式（Memento）
GoF中定义: "在不违反封装的原则下,获取一个对象的内部状态并保留在外部,让对象可以在日后恢复到原先保留时的状态." 对于一些需要存储的数据,比如历史最高分当与得分减分系统写入 ...
c# 根据唯一码，存缓存实现12小时内阅读量+1
需求:某一个详细页面需要实现用户 12小时内阅读量+1, 实现思路;得到一个唯一码的机器码,不管是否用户登录了都有这个码,然后存到缓存里面最后判断时间+12小时是否超过当前时间 string ...
nginx location匹配顺序及CI框架的nginx配置
Nginx location匹配顺序如下: 用前缀字符串定义的location规则对URI进行匹配测试. =号定义了精确的前缀字符串匹配,如果发现精确匹配则使用当前规则.否则继续下一步匹配. 匹配其它 ...
Linux find用法
Linux中find常见用法示例 ----摘抄哪里忘记了 ·find path -option [ -print ] [ -exec -ok command ] {} ...
[模拟赛] T2 不等数列
Description 将1到n任意排列,然后在排列的每两个数之间根据他们的大小关系插入">"和"<".问在所有排列中,有多少个排列恰好有k个&qu ...
Oracle 12c（12.1.0.5）OEM server agent 安装配置
注意: 此文档为生产上操作文档,省略了IP,oracle用户server,agent 端至少需要sudo,ping,ssh,创建目录权限. 一.安装要求 1.1. 系统情况一览 IP 数据库 OEM ...

深度学习之 seq2seq 进行 英文到法文的翻译

深度学习之 seq2seq 进行 英文到法文的翻译

结论

深度学习之 seq2seq 进行 英文到法文的翻译的更多相关文章

随机推荐

热门专题

深度学习之 seq2seq 进行英文到法文的翻译

深度学习之 seq2seq 进行英文到法文的翻译

深度学习之 seq2seq 进行英文到法文的翻译的更多相关文章