Correlation Matrix
In [1]:
#!pip install pandas numpy matplotlib
In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = {
'A': np.random.rand(100),
'B': np.random.rand(100),
'C': np.random.rand(100),
'D': np.random.rand(100)
}
df = pd.DataFrame(data)
corr = df.corr()
fig, ax = plt.subplots(figsize=(8, 6))
cax = ax.matshow(corr, cmap='coolwarm')
fig.colorbar(cax)
ax.set_xticks(np.arange(len(corr.columns)))
ax.set_yticks(np.arange(len(corr.columns)))
ax.set_xticklabels(corr.columns)
ax.set_yticklabels(corr.columns)
plt.xticks(rotation=45)
plt.title('Correlation Matrix', pad=20)
plt.show()
In [3]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = {
'A': np.random.rand(100),
'B': np.random.rand(100),
'C': np.random.rand(100),
'D': np.random.rand(100)
}
df = pd.DataFrame(data)
corr = df.corr()
fig, ax = plt.subplots(figsize=(8, 6))
cax = ax.matshow(corr, cmap='coolwarm')
fig.colorbar(cax)
ax.set_xticks(np.arange(len(corr.columns)))
ax.set_yticks(np.arange(len(corr.columns)))
ax.set_xticklabels(corr.columns)
ax.set_yticklabels(corr.columns)
plt.xticks(rotation=45)
plt.title('Correlation Matrix', pad=20)
# Annotate each cell with the correlation coefficient as a percentage
for i in range(len(corr.columns)):
for j in range(len(corr.columns)):
text = ax.text(j, i, f'{corr.iloc[i, j]*100:.1f}%', ha='center', va='center', color='black')
plt.show()
In [4]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = {
'A': np.random.rand(100),
'B': np.random.rand(100),
'C': np.random.rand(100),
'D': np.random.rand(100)
}
df = pd.DataFrame(data)
corr = df.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
fig, ax = plt.subplots(figsize=(8, 6))
cax = ax.matshow(corr, cmap='coolwarm')
fig.colorbar(cax)
ax.set_xticks(np.arange(len(corr.columns)))
ax.set_yticks(np.arange(len(corr.columns)))
ax.set_xticklabels(corr.columns)
ax.set_yticklabels(corr.columns)
plt.xticks(rotation=45)
plt.title('Correlation Matrix (Upper Triangle)', pad=20)
for i in range(len(corr.columns)):
for j in range(len(corr.columns)):
if mask[i, j]:
text = ax.text(j, i, f'{corr.iloc[i, j]*100:.1f}%', ha='center', va='center', color='black')
for i in range(len(corr.columns)):
for j in range(len(corr.columns)):
if not mask[i, j]:
ax.text(j, i, ', ha='center', va='center', color='black')
plt.show()
