!pip install pandas pymupdf
!pip install reportlab

# importing multiple documents pdf into a dataframe
import os
import pandas as pd
import fitz  # PyMuPDF

def read_pdf(file_path):
    doc = fitz.open(file_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

folder_path = 'path/to/your/folder'
documents = []
for filename in os.listdir(folder_path):
    if filename.endswith('.pdf'):
        file_path = os.path.join(folder_path, filename)
        doc_text = read_pdf(file_path)
        documents.append({'filename': filename, 'text': doc_text})

df = pd.DataFrame(documents)
print(df)

# Writing DataFrame to PDF Files
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

def write_text_to_pdf(text, file_path):
    c = canvas.Canvas(file_path, pagesize=letter)
    width, height = letter
    c.drawString(72, height - 72, text)
    c.save()

for index, row in df.iterrows():
    output_file = f"output_{row['filename']}_page_{row['page_number']}.pdf"
    write_text_to_pdf(row['text'], output_file)

# Writing multiple pages to pdf document
import pandas as pd
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

# Sample DataFrame
data = {
    'filename': ['doc1', 'doc1', 'doc2', 'doc2'],
    'page_number': [1, 2, 1, 2],
    'text': [
        'This is the text of page 1 of doc1.',
        'This is the text of page 2 of doc1.',
        'This is the text of page 1 of doc2.',
        'This is the text of page 2 of doc2.'
    ]
}
df = pd.DataFrame(data)
def write_dataframe_to_pdf(df, output_file):
    c = canvas.Canvas(output_file, pagesize=letter)
    width, height = letter
    for index, row in df.iterrows():
        c.drawString(72, height - 72, f"Filename: {row['filename']}")
        c.drawString(72, height - 90, f"Page Number: {row['page_number']}")
        text = row['text']
        lines = text.split('\n')
        y = height - 120
        for line in lines:
            c.drawString(72, y, line)
            y -= 15 
        c.showPage()    
    c.save()
write_dataframe_to_pdf(df, 'output.pdf')

!pip install pdfkit
!pip install reportlab

# Converting HTML to PDF
import pdfkit

# HTML content
html_content = """



    Sample HTML


    Hello, World!
    This is a sample HTML to PDF conversion.


"""

# Convert HTML string to PDF
pdfkit.from_string(html_content, 'output.pdf')

# Converting an HTML File to PDF
import pdfkit


# Path to the HTML file
html_file = 'sample.html'

# Convert HTML file to PDF
pdfkit.from_file(html_file, 'output.pdf')

# Converting a URL to PDF
import pdfkit

# URL of the web page
url = 'https://www.example.com'

# Convert URL to PDF
pdfkit.from_url(url, 'output.pdf')

# more options
import pdfkit

html_content = """



    Sample HTML


    Hello, World!
    This is a sample HTML to PDF conversion.


"""

options = {
    'page-size': 'A4',
    'orientation': 'Portrait',
    'margin-top': '10mm',
    'margin-right': '10mm',
    'margin-bottom': '10mm',
    'margin-left': '10mm'
}
pdfkit.from_string(html_content, 'output.pdf', options=options)

# Adding Images to a PDF
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader

def create_pdf_with_images(output_file):
    c = canvas.Canvas(output_file, pagesize=letter)
    width, height = letter
    # Add some text
    c.drawString(72, height - 72, "Hello, World!")
    c.drawString(72, height - 90, "This is a sample PDF with images.")
    # Add an image
    image_path = 'path/to/your/image.jpg'
    image = ImageReader(image_path)
    c.drawImage(image, 72, height - 300, width=200, height=200)
    # Add another image
    another_image_path = 'path/to/another/image.png'
    another_image = ImageReader(another_image_path)
    c.drawImage(another_image, 300, height - 300, width=200, height=200)
    c.save()
create_pdf_with_images('output_with_images.pdf')

# Adding Images to Each Page from a DataFrame
import pandas as pd
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader

data = {
    'text': ['Page 1 text', 'Page 2 text', 'Page 3 text'],
    'image_path': ['path/to/image1.jpg', 'path/to/image2.jpg', 'path/to/image3.jpg']
}
df = pd.DataFrame(data)
def create_pdf_from_dataframe(df, output_file):
    c = canvas.Canvas(output_file, pagesize=letter)
    width, height = letter
    for index, row in df.iterrows():
        # Add text
        c.drawString(72, height - 72, row['text'])
        # Add image
        image = ImageReader(row['image_path'])
        c.drawImage(image, 72, height - 300, width=200, height=200)
        # Create a new page
        c.showPage()
    c.save()
create_pdf_from_dataframe(df, 'output_from_dataframe.pdf')

# HTML content with image links to pdf
import pdfkit

html_content = """



    Sample HTML with Images


    Welcome to My PDF
    This is a sample HTML file with images.
    
    
    


"""
with open('sample.html', 'w') as file:
    file.write(html_content)
pdfkit.from_file('sample.html', 'output.pdf')

PDF

Hello, World!

Hello, World!

Welcome to My PDF

www.rd112.com