import datetime
import pandas as pd
import pandas.io.data
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
from matplotlib import style
import numpy as np
import time
import math
style.use('ggplot')
def outlier_fixing(stock_name,ma1=100,ma2=250,ma3=500,ma4=5000):
df = pd.read_csv('X:/stocks_sentdex_dates_short.csv', index_col='time', parse_dates=True)
print df.head()
df = df[df.type == stock_name.lower()]
std = pd.rolling_std(df['close'], 25, min_periods=1)
print std
df['std'] = pd.rolling_std(df['close'], 25, min_periods=1)
df = df[df['std'] < 17]
MA1 = pd.rolling_mean(df['value'], ma1)
MA2 = pd.rolling_mean(df['value'], ma2)
MA3 = pd.rolling_mean(df['value'], ma3)
MA4 = pd.rolling_mean(df['value'], ma4)
ax1 = plt.subplot(3, 1, 1)
df['close'].plot(label='Price')
ax2 = plt.subplot(3, 1, 2, sharex = ax1)
MA1.plot(label=(str(ma1)+'MA'))
MA2.plot(label=(str(ma2)+'MA'))
MA3.plot(label=(str(ma3)+'MA'))
MA4.plot(label=(str(ma4)+'MA'))
ax3 = plt.subplot(3, 1, 3, sharex = ax1)
df['std'].plot(label='Deviation')
plt.legend()
plt.show()
#outlier_fixing('btcusd',ma1=100,ma2=2500,ma3=5000,ma4=50000)
def single_stock(stock_name,ma1=100,ma2=250,ma3=500,ma4=5000):
df = pd.read_csv('X:/stocks_sentdex_dates_full.csv', index_col='time', parse_dates=True)
print df.head()
df = df[df.type == stock_name.lower()]
MA1 = pd.rolling_mean(df['value'], ma1)
MA2 = pd.rolling_mean(df['value'], ma2)
MA3 = pd.rolling_mean(df['value'], ma3)
MA4 = pd.rolling_mean(df['value'], ma4)
ax1 = plt.subplot(2, 1, 1)
df['close'].plot(label='Price')
ax2 = plt.subplot(2, 1, 2, sharex = ax1)
MA1.plot(label=(str(ma1)+'MA'))
MA2.plot(label=(str(ma2)+'MA'))
MA3.plot(label=(str(ma3)+'MA'))
MA4.plot(label=(str(ma4)+'MA'))
plt.legend()
plt.show()
# at the very end... #
# now for some quick house keeping. We can see that our reference to count
# divided by whatever is already referenced in 3 spots. We intend to leave
# this as is, but we may desire to change it later to try longer term
# and shorter term strategies. Even if we think we wont change it...
# we should realize that we probably will. Let's clean up that mess.
# so now we put it in the params, and we can choose to change them later.
# but it will only require 1 change, not a bunch.
def single_stock_auto_MA(stock_name, div1=275, div2=110, div3=55, div4=5.5):
df = pd.read_csv('X:/stocks_sentdex_dates_full.csv', index_col='time', parse_dates=True)
df = df[df.type == stock_name.lower()]
#print '----'
count = df['type'].value_counts()
#print 'trying:'
count = int(count[stock_name])
# MAKE THIS CHANGE LATER
MA1 = pd.rolling_mean(df['value'], (count/div1))
MA2 = pd.rolling_mean(df['value'], (count/div2))
MA3 = pd.rolling_mean(df['value'], (count/div3))
MA4 = pd.rolling_mean(df['value'], (count/div4))
# starting point, so we can start at the point that matters #
# DIV4 CHANGE LAST
SP = int(math.ceil(count/div4))
# Here we append the MA data to our dataframe #
# so that we can reference it very simply soon #
df['MA1'] = MA1
df['MA2'] = MA2
df['MA3'] = MA3
df['MA4'] = MA4
print df.head()
# after calculations, let us only reference data that contains
# all of the averages we want
df = df[SP:]
print df.head()
ax1 = plt.subplot(2, 1, 1)
df['close'].plot(label='Price')
# now we want to plot based on our dataframe, so this must change too #
ax2 = plt.subplot(2, 1, 2, sharex = ax1)
df['MA1'].plot(label=(str(count/div1)+'MA'))
df['MA2'].plot(label=(str(count/div2)+'MA'))
df['MA3'].plot(label=(str(count/div3)+'MA'))
df['MA4'].plot(label=(str(int(round((count/div4), 0)))+'MA'))
plt.legend()
plt.show()
single_stock_auto_MA('aapl')
The next tutorial:
Python and Pandas with Sentiment Analysis Database