It is bad practice to remove outliers that actually belong to the data, though you may find your data-set actually has bad data, and you want to be able to find and remove it.
We're going to utilize standard deviation to find bad plots.
def outlier_fixing(stock_name,ma1=100,ma2=250,ma3=500,ma4=5000): df = pd.read_csv('X:/stocks_sentdex_dates_short.csv', index_col='time', parse_dates=True) print df.head() df = df[df.type == stock_name.lower()] std = pd.rolling_std(df['close'], 25, min_periods=1) print std df['std'] = pd.rolling_std(df['close'], 25, min_periods=1) # so now we want to find a way to clearly identify the problems. To me, # it looks like anything above 20 is definitely a glitch, and # anything below is legit. So let's work with that. df = df[df['std'] < 17] MA1 = pd.rolling_mean(df['value'], ma1) MA2 = pd.rolling_mean(df['value'], ma2) MA3 = pd.rolling_mean(df['value'], ma3) MA4 = pd.rolling_mean(df['value'], ma4) ax1 = plt.subplot(3, 1, 1) df['close'].plot(label='Price') ax2 = plt.subplot(3, 1, 2, sharex = ax1) MA1.plot(label=(str(ma1)+'MA')) MA2.plot(label=(str(ma2)+'MA')) MA3.plot(label=(str(ma3)+'MA')) MA4.plot(label=(str(ma4)+'MA')) #change here... ax3 = plt.subplot(3, 1, 3, sharex = ax1) df['std'].plot(label='Deviation') plt.legend() plt.show() outlier_fixing('btcusd',ma1=100,ma2=2500,ma3=5000,ma4=50000)