It is bad practice to remove outliers that actually belong to the data, though you may find your data-set actually has bad data, and you want to be able to find and remove it.
We're going to utilize standard deviation to find bad plots.
def outlier_fixing(stock_name,ma1=100,ma2=250,ma3=500,ma4=5000):
df = pd.read_csv('X:/stocks_sentdex_dates_short.csv',
index_col='time', parse_dates=True)
print df.head()
df = df[df.type == stock_name.lower()]
std = pd.rolling_std(df['close'], 25, min_periods=1)
print std
df['std'] = pd.rolling_std(df['close'], 25, min_periods=1)
# so now we want to find a way to clearly identify the problems. To me,
# it looks like anything above 20 is definitely a glitch, and
# anything below is legit. So let's work with that.
df = df[df['std'] < 17]
MA1 = pd.rolling_mean(df['value'], ma1)
MA2 = pd.rolling_mean(df['value'], ma2)
MA3 = pd.rolling_mean(df['value'], ma3)
MA4 = pd.rolling_mean(df['value'], ma4)
ax1 = plt.subplot(3, 1, 1)
df['close'].plot(label='Price')
ax2 = plt.subplot(3, 1, 2, sharex = ax1)
MA1.plot(label=(str(ma1)+'MA'))
MA2.plot(label=(str(ma2)+'MA'))
MA3.plot(label=(str(ma3)+'MA'))
MA4.plot(label=(str(ma4)+'MA'))
#change here...
ax3 = plt.subplot(3, 1, 3, sharex = ax1)
df['std'].plot(label='Deviation')
plt.legend()
plt.show()
outlier_fixing('btcusd',ma1=100,ma2=2500,ma3=5000,ma4=50000)