Tip: Jake Vanderplas has a great tutorial on using pandas!

#collapse-hide
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import permutations
%matplotlib inline
sns.set()

Note: Make random Data

np.random.seed(42)

def brownian_motion(mean,std,npts):
    return np.cumsum(np.random.normal(scale=std, size=npts)) + mean
    
num_stocks = 10
num_timesteps = 1000

letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
tickers = [''.join(x) for x in np.random.choice(list(letters), size=(num_stocks,3))]

dates = pd.date_range('2020-11-21', periods=num_timesteps, freq='D')

data = np.vstack([brownian_motion(mean, std, num_timesteps) for mean, std in zip(np.random.randint(50,200,num_stocks), np.random.randint(2,5, num_stocks))]).T

df = pd.DataFrame(data, columns=tickers, index=dates)
df = df[df > 0].dropna(axis=1)
df.head()

Tip: Use this to load csv’s from your own google drive

from google.colab import drive 
drive.mount('/content/gdrive')
!ls "gdrive/My Drive" # this line will look in the folder

df = pd.read_csv('gdrive/My Drive/data.csv') # put the full path to the file in google drive here if you have one

fig, ax = plt.subplots(1, figsize=(20,8))
df.plot(ax=ax)
plt.show()

df.diff().corr()

e_val, e_vect = np.linalg.eig(df.diff().corr())

evect_df = pd.DataFrame(e_vect[np.argsort(e_val)[::-1]], columns=df.columns, index=df.columns)
evect_df

fig, ax = plt.subplots(1, figsize=(12,10))
ax.set_title('Eigenvalues of Correlation of Running Difference', fontsize=16)
sns.heatmap(evect_df, ax=ax, annot=True, fmt=".2f",  linewidths=.5)
fig.savefig('../images/eigen_correlation_heatmap.png')
plt.show()

	GTO	KHU	GZS	XUD	HXC	VUB	XLF	LZV
1969-04-20	141.116771	97.403393	110.300592	104.750134	159.614557	106.114842	112.311045	101.060632
1969-04-21	136.712380	101.730706	110.860277	100.405943	161.325646	109.976118	108.934610	103.011746
1969-04-22	141.233292	98.615540	107.905621	99.246578	159.395447	109.840294	108.188974	103.533443
1969-04-23	142.725768	100.724268	108.421143	101.966275	160.113603	106.707104	108.881800	102.359047
1969-04-24	141.179876	104.067308	111.383620	98.116840	164.030552	109.869213	109.098647	105.953067

	GTO	KHU	GZS	XUD	HXC	VUB	XLF	LZV
GTO	1.000000	-0.034842	-0.008919	0.057757	-0.004012	-0.023837	-0.017131	-0.012034
KHU	-0.034842	1.000000	-0.004939	0.026932	-0.013135	0.036544	0.027497	-0.025655
GZS	-0.008919	-0.004939	1.000000	0.032693	-0.032603	0.063454	0.020113	0.010456
XUD	0.057757	0.026932	0.032693	1.000000	-0.010530	0.005832	-0.030495	0.034310
HXC	-0.004012	-0.013135	-0.032603	-0.010530	1.000000	-0.021562	0.007714	-0.021141
VUB	-0.023837	0.036544	0.063454	0.005832	-0.021562	1.000000	0.004669	0.017130
XLF	-0.017131	0.027497	0.020113	-0.030495	0.007714	0.004669	1.000000	-0.012195
LZV	-0.012034	-0.025655	0.010456	0.034310	-0.021141	0.017130	-0.012195	1.000000

	GTO	KHU	GZS	XUD	HXC	VUB	XLF	LZV
GTO	-0.462612	0.286135	-0.307235	-0.000519	-0.609454	0.470913	0.000300	0.128569
KHU	-0.386584	0.551908	0.043692	0.485364	0.097528	-0.506980	-0.066881	-0.193522
GZS	-0.146585	-0.366611	-0.130274	0.009367	-0.124167	-0.071223	-0.827676	-0.348587
XUD	0.260056	0.568043	-0.141505	-0.627165	0.046178	-0.006299	-0.109068	-0.426931
HXC	0.223860	0.091539	-0.431629	-0.137911	-0.149098	-0.488327	-0.224082	0.652133
VUB	-0.298582	0.233458	0.288640	-0.178202	0.531979	0.319270	-0.394092	0.448368
XLF	0.518531	0.270936	0.541743	0.282665	-0.413420	0.133974	-0.289216	0.108735
LZV	-0.373213	-0.141987	0.550999	-0.490227	-0.354058	-0.397183	0.097036	0.064043