# Loading the Data
import pandas as pd

# Sample DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Edward'],
    'Age': [24, 27, 22, 32, 29],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'Salary': [70000, 80000, 65000, 90000, 85000]
}

df = pd.DataFrame(data)
print(df)

      Name  Age         City  Salary
0    Alice   24     New York   70000
1      Bob   27  Los Angeles   80000
2  Charlie   22      Chicago   65000
3    David   32      Houston   90000
4   Edward   29      Phoenix   85000

# Getting Basic Information
print("**HEAD")
print(df.head())
print("**TAIL")
print(df.tail())
print("**INFO")
print(df.info())
print("**SHAPE")
print(df.shape)
print("*COLS")
print(df.columns)

**HEAD
      Name  Age         City  Salary
0    Alice   24     New York   70000
1      Bob   27  Los Angeles   80000
2  Charlie   22      Chicago   65000
3    David   32      Houston   90000
4   Edward   29      Phoenix   85000
**TAIL
      Name  Age         City  Salary
0    Alice   24     New York   70000
1      Bob   27  Los Angeles   80000
2  Charlie   22      Chicago   65000
3    David   32      Houston   90000
4   Edward   29      Phoenix   85000
**INFO

RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    5 non-null      object
 1   Age     5 non-null      int64 
 2   City    5 non-null      object
 3   Salary  5 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 288.0+ bytes
None
**SHAPE
(5, 4)
*COLS
Index(['Name', 'Age', 'City', 'Salary'], dtype='object')

# Descriptive Statistics
print(df.describe())
print(df.describe(include='all'))

             Age        Salary
count   5.000000      5.000000
mean   26.800000  78000.000000
std     3.962323  10368.220677
min    22.000000  65000.000000
25%    24.000000  70000.000000
50%    27.000000  80000.000000
75%    29.000000  85000.000000
max    32.000000  90000.000000
         Name        Age      City        Salary
count       5   5.000000         5      5.000000
unique      5        NaN         5           NaN
top     Alice        NaN  New York           NaN
freq        1        NaN         1           NaN
mean      NaN  26.800000       NaN  78000.000000
std       NaN   3.962323       NaN  10368.220677
min       NaN  22.000000       NaN  65000.000000
25%       NaN  24.000000       NaN  70000.000000
50%       NaN  27.000000       NaN  80000.000000
75%       NaN  29.000000       NaN  85000.000000
max       NaN  32.000000       NaN  90000.000000

# Checking for Missing Values
print(df.isnull().sum())

Name      0
Age       0
City      0
Salary    0
dtype: int64

# Value Counts for Categorical Columns
print(df['City'].value_counts())

City
New York       1
Los Angeles    1
Chicago        1
Houston        1
Phoenix        1
Name: count, dtype: int64

# Unique Values in a Column
print(df['City'].unique())

['New York' 'Los Angeles' 'Chicago' 'Houston' 'Phoenix']

Info/describe

Info/Describe

www.rd112.com