Info/Describe
In [1]:
# Loading the Data
import pandas as pd
# Sample DataFrame
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Edward'],
'Age': [24, 27, 22, 32, 29],
'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
'Salary': [70000, 80000, 65000, 90000, 85000]
}
df = pd.DataFrame(data)
print(df)
Name Age City Salary 0 Alice 24 New York 70000 1 Bob 27 Los Angeles 80000 2 Charlie 22 Chicago 65000 3 David 32 Houston 90000 4 Edward 29 Phoenix 85000
In [2]:
# Getting Basic Information
print("**HEAD")
print(df.head())
print("**TAIL")
print(df.tail())
print("**INFO")
print(df.info())
print("**SHAPE")
print(df.shape)
print("*COLS")
print(df.columns)
**HEAD
Name Age City Salary
0 Alice 24 New York 70000
1 Bob 27 Los Angeles 80000
2 Charlie 22 Chicago 65000
3 David 32 Houston 90000
4 Edward 29 Phoenix 85000
**TAIL
Name Age City Salary
0 Alice 24 New York 70000
1 Bob 27 Los Angeles 80000
2 Charlie 22 Chicago 65000
3 David 32 Houston 90000
4 Edward 29 Phoenix 85000
**INFO
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 5 non-null object
1 Age 5 non-null int64
2 City 5 non-null object
3 Salary 5 non-null int64
dtypes: int64(2), object(2)
memory usage: 288.0+ bytes
None
**SHAPE
(5, 4)
*COLS
Index(['Name', 'Age', 'City', 'Salary'], dtype='object')
In [3]:
# Descriptive Statistics
print(df.describe())
print(df.describe(include='all'))
Age Salary
count 5.000000 5.000000
mean 26.800000 78000.000000
std 3.962323 10368.220677
min 22.000000 65000.000000
25% 24.000000 70000.000000
50% 27.000000 80000.000000
75% 29.000000 85000.000000
max 32.000000 90000.000000
Name Age City Salary
count 5 5.000000 5 5.000000
unique 5 NaN 5 NaN
top Alice NaN New York NaN
freq 1 NaN 1 NaN
mean NaN 26.800000 NaN 78000.000000
std NaN 3.962323 NaN 10368.220677
min NaN 22.000000 NaN 65000.000000
25% NaN 24.000000 NaN 70000.000000
50% NaN 27.000000 NaN 80000.000000
75% NaN 29.000000 NaN 85000.000000
max NaN 32.000000 NaN 90000.000000
In [4]:
# Checking for Missing Values
print(df.isnull().sum())
Name 0 Age 0 City 0 Salary 0 dtype: int64
In [5]:
# Value Counts for Categorical Columns
print(df['City'].value_counts())
City New York 1 Los Angeles 1 Chicago 1 Houston 1 Phoenix 1 Name: count, dtype: int64
In [6]:
# Unique Values in a Column
print(df['City'].unique())
['New York' 'Los Angeles' 'Chicago' 'Houston' 'Phoenix']
