吴裕雄--天生自然 PYTHON数据分析：所有美国股票和etf的历史日价格和成交量分析

# This Python 3 environment comes with many helpful analytics libraries installed

# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python

# For example, here's several helpful packages to load in 

import matplotlib.pyplot as plt

import statsmodels.tsa.seasonal as smt

import numpy as np # linear algebra

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import random

import datetime as dt

from sklearn import linear_model

from sklearn.metrics import mean_absolute_error

import plotly

# import the relevant Keras modules

from keras.models import Sequential

from keras.layers import Activation, Dense

from keras.layers import LSTM

from keras.layers import Dropout

# Input data files are available in the "../input/" directory.

# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output

import os

os.chdir('F:\\kaggleDataSet\\price-volume\\Stocks')

#read data

# kernels let us navigate through the zipfile as if it were a directory

# trying to read a file of size zero will throw an error, so skip them

# filenames = [x for x in os.listdir() if x.endswith('.txt') and os.path.getsize(x) > 0]

# filenames = random.sample(filenames,1)

filenames = ['prk.us.txt', 'bgr.us.txt', 'jci.us.txt', 'aa.us.txt', 'fr.us.txt', 'star.us.txt', 'sons.us.txt', 'ipl_d.us.txt', 'sna.us.txt', 'utg.us.txt']

filenames = [filenames[1]]

print(filenames)

data = []

for filename in filenames:

    df = pd.read_csv(filename, sep=',')

    label, _, _ = filename.split(sep='.')

    df['Label'] = filename

    df['Date'] = pd.to_datetime(df['Date'])

    data.append(df)

traces = []

for df in data:

    clr = str(r()) + str(r()) + str(r())

    df = df.sort_values('Date')

    label = df['Label'].iloc[0]

    trace = plotly.graph_objs.Scattergl(x=df['Date'],y=df['Close'])

    traces.append(trace)

layout = plotly.graph_objs.Layout(title='Plot',)

fig = plotly.graph_objs.Figure(data=traces, layout=layout)

plotly.offline.init_notebook_mode(connected=True)

plotly.offline.iplot(fig, filename='dataplot')

df = data[0]

window_len = 10

#Create a data point (i.e. a date) which splits the training and testing set

split_date = list(data[0]["Date"][-(2*window_len+1):])[0]

#Split the training and test set

training_set, test_set = df[df['Date'] < split_date], df[df['Date'] >= split_date]

training_set = training_set.drop(['Date','Label', 'OpenInt'], 1)

test_set = test_set.drop(['Date','Label','OpenInt'], 1)

#Create windows for training

LSTM_training_inputs = []

for i in range(len(training_set)-window_len):

    temp_set = training_set[i:(i+window_len)].copy()

    for col in list(temp_set):

        temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1

    LSTM_training_inputs.append(temp_set)

LSTM_training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1

LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]

LSTM_training_inputs = np.array(LSTM_training_inputs)

#Create windows for testing

LSTM_test_inputs = []

for i in range(len(test_set)-window_len):

    temp_set = test_set[i:(i+window_len)].copy()

    for col in list(temp_set):

        temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1

    LSTM_test_inputs.append(temp_set)

LSTM_test_outputs = (test_set['Close'][window_len:].values/test_set['Close'][:-window_len].values)-1

LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]

LSTM_test_inputs = np.array(LSTM_test_inputs)

def build_model(inputs, output_size, neurons, activ_func="linear",dropout=0.10, loss="mae", optimizer="adam"):

    model = Sequential()

    model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))

    model.add(Dropout(dropout))

    model.add(Dense(units=output_size))

    model.add(Activation(activ_func))

    model.compile(loss=loss, optimizer=optimizer)

    return model

# initialise model architecture

nn_model = build_model(LSTM_training_inputs, output_size=1, neurons = 32)

# model output is next price normalised to 10th previous closing price

# train model on data

# note: eth_history contains information on the training error per epoch

nn_history = nn_model.fit(LSTM_training_inputs, LSTM_training_outputs, epochs=5, batch_size=1, verbose=2, shuffle=True)

plt.plot(LSTM_test_outputs, label = "actual")

plt.plot(nn_model.predict(LSTM_test_inputs), label = "predicted")

plt.legend()

plt.show()

MAE = mean_absolute_error(LSTM_test_outputs, nn_model.predict(LSTM_test_inputs))

print('The Mean Absolute Error is: {}'.format(MAE))

#https://github.com/llSourcell/How-to-Predict-Stock-Prices-Easily-Demo/blob/master/lstm.py

def predict_sequence_full(model, data, window_size):

    #Shift the window by 1 new prediction each time, re-run predictions on new window

    curr_frame = data[0]

    predicted = []

    for i in range(len(data)):

        predicted.append(model.predict(curr_frame[np.newaxis,:,:])[0,0])

        curr_frame = curr_frame[1:]

        curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)

    return predicted

predictions = predict_sequence_full(nn_model, LSTM_test_inputs, 10)

plt.plot(LSTM_test_outputs, label="actual")

plt.plot(predictions, label="predicted")

plt.legend()

plt.show()

MAE = mean_absolute_error(LSTM_test_outputs, predictions)

print('The Mean Absolute Error is: {}'.format(MAE))

结论

LSTM不能解决时间序列预测问题。对一个时间步长的预测并不比滞后模型好多少。如果我们增加预测的时间步长，性能下降的速度就不会像其他更传统的方法那么快。然而，在这种情况下，我们的误差增加了大约4.5倍。它随着我们试图预测的时间步长呈超线性增长。

吴裕雄--天生自然 PYTHON数据分析：所有美国股票和etf的历史日价格和成交量分析的更多相关文章

吴裕雄--天生自然 PYTHON数据分析：糖尿病视网膜病变数据分析（完整版）
# This Python 3 environment comes with many helpful analytics libraries installed # It is defined by ...
吴裕雄--天生自然 python数据分析：健康指标聚集分析（健康分析）
# This Python 3 environment comes with many helpful analytics libraries installed # It is defined by ...
吴裕雄--天生自然 python数据分析：葡萄酒分析
# import pandas import pandas as pd # creating a DataFrame pd.DataFrame({'Yes': [50, 31], 'No': [101 ...
吴裕雄--天生自然 PYTHON数据分析：人类发展报告——HDI, GDI，健康，全球人口数据数据分析
import pandas as pd # Data analysis import numpy as np #Data analysis import seaborn as sns # Data v ...
吴裕雄--天生自然 python数据分析：医疗费数据分析
import numpy as np import pandas as pd import os import matplotlib.pyplot as pl import seaborn as sn ...
吴裕雄--天生自然 PYTHON数据分析：基于Keras的CNN分析太空深处寻找系外行星数据
#We import libraries for linear algebra, graphs, and evaluation of results import numpy as np import ...
吴裕雄--天生自然 python数据分析：基于Keras使用CNN神经网络处理手写数据集
import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mp ...
吴裕雄--天生自然 PYTHON数据分析：钦奈水资源管理分析
df = pd.read_csv("F:\\kaggleDataSet\\chennai-water\\chennai_reservoir_levels.csv") df[&quo ...
吴裕雄--天生自然 PYTHON数据分析：医疗数据分析
import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.rea ...

随机推荐

Ubuntu解决 MariaDB无密码就可以登录的问题
使用apt-get来安装mysql,安装好之后发现安装的是 MariaDB,如下,无需密码既可以登录了.即使使用mysqladmin或mysql_secure_installation 设置好密码,用 ...
（数据科学学习手札75）基于geopandas的空间数据分析——坐标参考系篇
本文对应代码已上传至我的Github仓库https://github.com/CNFeffery/DataScienceStudyNotes 1 简介在上一篇文章中我们对geopandas中的数据结 ...
一次DB故障引起的反思和MySQL Operator选型
前言在一次数据库故障后,我们发现业务库会根据业务的等级会划分多个 MySQL 实例,许多业务库会同时属于一个 MySQL 实例,当一个库引发问题后整个实例的状态是不可控的.从而导致这个实例上的所有业 ...
Zabbix监控实现跨区域跨网络监控数据
Zabbix监控实现跨区域跨网络监控数据环境: 公司现有服务器10台,其中5台服务器有一台安装了zabbix,并且这5台服务器处于一个网络,只有一台服务器有公网ip, 另外的5台处于另一个网络,仅有 ...
T1飞跃树林 && 【最长等差子序列】
solution by Mr.gtf 一道简单的递推首先我们对树高从大到小排序很容易得到递推式 ans[i]=Σans[j] (j<i && h[j]-h[i]<=K) ...
大话IDL编程之函数功能调用（envi_doit、ENVIRaster、ENVITask）
2020年2月1日.好长时间没更新博客,还真有点不习惯.受新型冠性病毒的影响,平时街上熙熙攘攘的人流了无踪影,2020的春节竟然来的如此冷清.为响应“呆在家里就是做贡献的号召”,在家一宅就是十多天.闲 ...
使用docker19.03.6部署zabbix
可参考官方文档:https://www.zabbix.com/documentation/4.0/zh/manual/installation/containers 1)启动一个空的mysql服务器实 ...
php 页面分页样式示例
<?php class SubPages { private $each_disNums; //每页显示的条目数 private $nums; //总条目数 private $current_p ...
《自拍教程6》打开Windows文件后缀
如果你用的是Windows操作系统, 请把文件后缀名打开,千万别隐藏后缀名, 后续有各类 .py, .sh, .bat, .exe等不同文件, 需要你时刻关注文件后缀名. 如果是我看到我手下的测试人员 ...
SpringCloud微服务：阿里开源组件Nacos，服务和配置管理
源码地址:GitHub·点这里||GitEE·点这里一.阿里微服务简介 1.基础描述 Alibaba-Cloud致力于提供微服务开发的一站式解决方案.此项目包含开发分布式应用微服务的必需组件,方便开 ...

吴裕雄--天生自然 PYTHON数据分析：所有美国股票和etf的历史日价格和成交量分析

吴裕雄--天生自然 PYTHON数据分析：所有美国股票和etf的历史日价格和成交量分析的更多相关文章

随机推荐

热门专题