# =========================================================
# Regression
#
# From: Practical Machine Learning with Python
# 1. www.youtube.com/watch?v=jcI5Vnw0b2c
# (Regression Intro - #2)
# 2. www.youtube.com/watch?v=IN5jesocJjk
# (regression Features and labels - #3)
#
# =========================================================
# sudo apt-get install sklearn
# sudo apt-get install panda
# sudo apt-get install quandrl
# =========================================================
import math
import pandas as pd
import quandl
df = quandl.get('WIKI/GOOGL')
##print(df.head())
df = df[['Adj. Open','Adj. High','Adj. Low','Adj. Close','Adj. Volume',]]
df['HL_PCT'] = (df['Adj. High'] - df['Adj. Close']) \
/ df['Adj. Close'] * 100.0
df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) \
/ df['Adj. Open'] * 100.0
df = df[['Adj. Close','HL_PCT','PCT_change','Adj. Volume']]
##print(df.head())
forcast_col = 'Adj. Close'
df.fillna(-99999, inplace=True)
# -- predict out 10% of the data frame
# -- using data that came in 10 days ago to predict the future
forcast_out = int(math.ceil(0.01*len(df)))
# -- create a label
# -- "label" for each row will be the "Adj. Close Price"
# -- forcast days into the future
df['label'] = df[forcast_col].shift(-forcast_out)
# -- you can now compare forcast and actual price
# -- display forcast close price and (actual) adjust close price
df.dropna(inplace=True)
print(df.head())