pytorch --Rnn语言模型(LSTM,BiLSTM) -- 《Recurrent neural network based language model》
论文通过实现RNN来完成了文本分类。
论文地址:88888888
模型结构图:
原理自行参考论文,code and comment(https://github.com/graykode/nlp-tutorial):
# -*- coding: utf-8 -*-
# @time : 2019/11/9 15:12 import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable dtype = torch.FloatTensor sentences = [ "i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict) # TextRNN Parameter
batch_size = len(sentences)
n_step = 2 # number of cells(= number of Step)
n_hidden = 5 # number of hidden units in one cell def make_batch(sentences):
input_batch = []
target_batch = [] for sen in sentences:
word = sen.split()
input = [word_dict[n] for n in word[:-1]]
target = word_dict[word[-1]] input_batch.append(np.eye(n_class)[input])
target_batch.append(target) return input_batch, target_batch # to Torch.Tensor
input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.Tensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch)) class TextRNN(nn.Module):
def __init__(self):
super(TextRNN, self).__init__() self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden,batch_first=True)
self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
self.b = nn.Parameter(torch.randn([n_class]).type(dtype)) def forward(self, hidden, X):
if self.rnn.batch_first == True:
# X [batch_size,time_step,word_vector]
outputs, hidden = self.rnn(X, hidden) # outputs [batch_size, time_step, hidden_size*num_directions]
output = outputs[:, -1, :] # [batch_size, num_directions(=1) * n_hidden]
model = torch.mm(output, self.W) + self.b # model : [batch_size, n_class]
return model
else:
X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]
outputs, hidden = self.rnn(X, hidden)
# outputs : [n_step, batch_size, num_directions(=1) * n_hidden]
# hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden] output = outputs[-1,:,:] # [batch_size, num_directions(=1) * n_hidden]
model = torch.mm(output, self.W) + self.b # model : [batch_size, n_class]
return model model = TextRNN() criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) # Training
for epoch in range(5000):
optimizer.zero_grad() # hidden : [num_layers * num_directions, batch, hidden_size]
hidden = Variable(torch.zeros(1, batch_size, n_hidden))
# input_batch : [batch_size, n_step, n_class]
output = model(hidden, input_batch) # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
loss = criterion(output, target_batch)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward()
optimizer.step() # Predict
hidden_initial = Variable(torch.zeros(1, batch_size, n_hidden))
predict = model(hidden_initial, input_batch).data.max(1, keepdim=True)[1]
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
LSTM unit的RNN模型:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable dtype = torch.FloatTensor char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
word_dict = {n: i for i, n in enumerate(char_arr)}
number_dict = {i: w for i, w in enumerate(char_arr)}
n_class = len(word_dict) # number of class(=number of vocab) seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star'] # TextLSTM Parameters
n_step = 3
n_hidden = 128 def make_batch(seq_data):
input_batch, target_batch = [], [] for seq in seq_data:
input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
target = word_dict[seq[-1]] # 'e' is target
input_batch.append(np.eye(n_class)[input])
target_batch.append(target) return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch)) class TextLSTM(nn.Module):
def __init__(self):
super(TextLSTM, self).__init__() self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)
self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
self.b = nn.Parameter(torch.randn([n_class]).type(dtype)) def forward(self, X):
input = X.transpose(0, 1) # X : [n_step, batch_size, n_class] hidden_state = Variable(
torch.zeros(1, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
cell_state = Variable(
torch.zeros(1, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden] outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))
outputs = outputs[-1] # [batch_size, n_hidden]
model = torch.mm(outputs, self.W) + self.b # model : [batch_size, n_class]
return model input_batch, target_batch = make_batch(seq_data) model = TextLSTM() criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) # Training
for epoch in range(1000): output = model(input_batch)
loss = criterion(output, target_batch)
if (epoch + 1) % 100 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
optimizer.zero_grad()
loss.backward()
optimizer.step() inputs = [sen[:3] for sen in seq_data] predict = model(input_batch).data.max(1, keepdim=True)[1]
print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])
BiLSTM RNN model:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F dtype = torch.FloatTensor sentence = (
'Lorem ipsum dolor sit amet consectetur adipisicing elit '
'sed do eiusmod tempor incididunt ut labore et dolore magna '
'aliqua Ut enim ad minim veniam quis nostrud exercitation'
) word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}
number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}
n_class = len(word_dict)
max_len = len(sentence.split())
n_hidden = 5 def make_batch(sentence):
input_batch = []
target_batch = [] words = sentence.split()
for i, word in enumerate(words[:-1]):
input = [word_dict[n] for n in words[:(i + 1)]]
input = input + [0] * (max_len - len(input))
target = word_dict[words[i + 1]]
input_batch.append(np.eye(n_class)[input])
target_batch.append(target) return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch)) class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__() self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)
self.W = nn.Parameter(torch.randn([n_hidden * 2, n_class]).type(dtype))
self.b = nn.Parameter(torch.randn([n_class]).type(dtype)) def forward(self, X):
input = X.transpose(0, 1) # input : [n_step, batch_size, n_class] hidden_state = Variable(torch.zeros(1*2, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
cell_state = Variable(torch.zeros(1*2, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden] outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))
outputs = outputs[-1] # [batch_size, n_hidden]
model = torch.mm(outputs, self.W) + self.b # model : [batch_size, n_class]
return model input_batch, target_batch = make_batch(sentence) model = BiLSTM() criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) # Training
for epoch in range(10000):
output = model(input_batch)
loss = criterion(output, target_batch)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) optimizer.zero_grad()
loss.backward()
optimizer.step() predict = model(input_batch).data.max(1, keepdim=True)[1]
print(sentence)
print([number_dict[n.item()] for n in predict.squeeze()])
pytorch --Rnn语言模型(LSTM,BiLSTM) -- 《Recurrent neural network based language model》的更多相关文章
- 4.5 RNN循环神经网络(recurrent neural network)
自己开发了一个股票智能分析软件,功能很强大,需要的点击下面的链接获取: https://www.cnblogs.com/bclshuai/p/11380657.html 1.1 RNN循环神经网络 ...
- 论文笔记:ReNet: A Recurrent Neural Network Based Alternative to Convolutional Networks
ReNet: A Recurrent Neural Network Based Alternative to Convolutional Networks2018-03-05 11:13:05 ...
- 【NLP】Recurrent Neural Network and Language Models
0. Overview What is language models? A time series prediction problem. It assigns a probility to a s ...
- RNN循环神经网络(Recurrent Neural Network)学习
一.RNN简介 1.)什么是RNN? RNN是一种特殊的神经网络结构,考虑前一时刻的输入,且赋予了网络对前面的内容的一种'记忆'功能. 2.)RNN可以解决什么问题? 时间先后顺序的问题都可以使用RN ...
- Recurrent Neural Network系列1--RNN(循环神经网络)概述
作者:zhbzz2007 出处:http://www.cnblogs.com/zhbzz2007 欢迎转载,也请保留这段声明.谢谢! 本文翻译自 RECURRENT NEURAL NETWORKS T ...
- (zhuan) Recurrent Neural Network
Recurrent Neural Network 2016年07月01日 Deep learning Deep learning 字数:24235 this blog from: http:/ ...
- Recurrent neural network (RNN) - Pytorch版
import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms # ...
- Recurrent Neural Network系列2--利用Python,Theano实现RNN
作者:zhbzz2007 出处:http://www.cnblogs.com/zhbzz2007 欢迎转载,也请保留这段声明.谢谢! 本文翻译自 RECURRENT NEURAL NETWORKS T ...
- Recurrent Neural Network系列4--利用Python,Theano实现GRU或LSTM
yi作者:zhbzz2007 出处:http://www.cnblogs.com/zhbzz2007 欢迎转载,也请保留这段声明.谢谢! 本文翻译自 RECURRENT NEURAL NETWORK ...
随机推荐
- 【xinsir】webpack实践
webpack现在是前端必会的技能了,也是在工作中必定用到的.所以,如果我们现在还不会webpack,那么在将来面试中肯定会被扣分的. webpack中文官网:https://www.webpackj ...
- 线段相交的异或值 (线段树 or 优先队列)
VVQ 最近迷上了线段这种东西 现在他手上有 n 条线段,他希望在其中找到两条有公共点的线段,使得他们的异或值最大. 定义线段的异或值为它们并的长度减他们交的长度 输入描述: 第一行包括一个正整数 n ...
- 对标Eureka的AP一致性,Nacos如何实现Raft算法
一.快速了解Raft算法 Raft 适用于一个管理日志一致性的协议,相比于 Paxos 协议 Raft 更易于理解和去实现它. 为了提高理解性,Raft 将一致性算法分为了几个部分,包括领导选取(le ...
- java项目Jenkins部署
假设背景: Nginx跳板机服务器:192.168.10.1 Tomcat应用服务器:192.168.10.3 端口:10083 应用名称:appXXX 1.配置跳板机的转发路径 如:192.168. ...
- Jenkins-k8s-helm-harbor-githab-mysql-nfs微服务发布平台实战
基于 K8S 构建 Jenkins 微服务发布平台 实现汇总: 发布流程设计讲解 准备基础环境 K8s环境(部署Ingress Controller,CoreDNS,Calico/Flannel) 部 ...
- idea实现svn拉分支和合并分支的教程
原文地址:https://blog.csdn.net/qq_27471405/article/details/78498260 今天测试了一下svn拉分支和合并分支的教程,决定分享给大家 拉分支教程: ...
- 【VBA】EXCEL通过VBA生成SQL,自动生成创建表结构SQL
原文:https://blog.csdn.net/zutsoft/article/details/45441343 编程往往与数据库密不可分,一个项目往往有很多的表,很多时候通过excel来维护表结构 ...
- 团队项目-Beta冲刺1
博客介绍 这个作业属于哪个课程 https://edu.cnblogs.com/campus/xnsy/GeographicInformationScience 这个作业要求在哪里 https://w ...
- generic
是什么 算法实现时保有待定类型的参数. 为什么 一份代码用于多个算法(当算法中只数个类型不同的时候) 可重新性 很多常用算法和容器数据结构都可以type-generic的方式实现 why not 许多 ...
- Java基础系列1:深入理解Java数据类型
Java基础系列1:深入理解Java数据类型 当初学习计算机的时候,教科书中对程序的定义是:程序=数据结构+算法,Java基础系列第一篇就聊聊Java中的数据类型. 本篇聊Java数据类型主要包括四个 ...