OneHot Encoding
In [1]:
# Using pd.get_dummies()
import pandas as pd
df = pd.DataFrame({
'Color': ['Red', 'Green', 'Blue', 'Red']
})
df_encoded = pd.get_dummies(df, columns=['Color'])
print("df")
print(df)
print("df_encoded")
print(df_encoded)
df Color 0 Red 1 Green 2 Blue 3 Red df_encoded Color_Blue Color_Green Color_Red 0 False False True 1 False True False 2 True False False 3 False False True
In [2]:
# One-Hot Encoding Multiple Columns
import pandas as pd
df = pd.DataFrame({
'Color': ['Red', 'Green', 'Blue', 'Red'],
'Size': ['S', 'M', 'L', 'S']
})
df_encoded = pd.get_dummies(df, columns=['Color', 'Size'])
print(df_encoded)
Color_Blue Color_Green Color_Red Size_L Size_M Size_S 0 False False True False False True 1 False True False False True False 2 True False False True False False 3 False False True False False True
In [3]:
# Using OneHotEncoder from sklearn
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
df = pd.DataFrame({
'Color': ['Red', 'Green', 'Blue', 'Red']
})
encoder = OneHotEncoder()
encoded_data = encoder.fit_transform(df[['Color']]).toarray()
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(['Color']))
df_encoded = df.join(encoded_df)
print(df_encoded)
Color Color_Blue Color_Green Color_Red 0 Red 0.0 0.0 1.0 1 Green 0.0 1.0 0.0 2 Blue 1.0 0.0 0.0 3 Red 0.0 0.0 1.0
