AWS
In [ ]:
!pip install boto3
In [ ]:
import pandas as pd
import boto3
from io import StringIO
# Replace with your actual credentials and bucket/file details
aws_access_key_id = 'YOUR_ACCESS_KEY'
aws_secret_access_key = 'YOUR_SECRET_KEY'
bucket_name = 'your-bucket-name'
file_key = 'your-file.csv'
# Authenticate and fetch the file from S3
s3 = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
obj = s3.get_object(Bucket=bucket_name, Key=file_key)
data = obj['Body'].read().decode('utf-8')
# Create DataFrame
df = pd.read_csv(StringIO(data))
print(df.head())
In [ ]:
import boto3
import pandas as pd
from io import StringIO
# Replace with your actual credentials and bucket details
aws_access_key_id = 'YOUR_ACCESS_KEY'
aws_secret_access_key = 'YOUR_SECRET_KEY'
bucket_name = 'your-bucket-name'
folder_prefix = 'your-folder/'
s3 = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
# List all files in the bucket
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_prefix)
files = [content['Key'] for content in response.get('Contents', []) if content['Key'].endswith('.csv')]
# Read multiple CSV files into DataFrames
dfs = []
for file_key in files:
obj = s3.get_object(Bucket=bucket_name, Key=file_key)
data = obj['Body'].read().decode('utf-8')
df = pd.read_csv(StringIO(data))
dfs.append(df)
# Combine all DataFrames into one
combined_df = pd.concat(dfs, ignore_index=True)
