import argparse
import io
from PyPDF2 import PdfReader, PdfWriter
from weasyprint import HTML
import json

def create_pdf_with_html(html_content):
    """Render HTML content to a PDF and return a BytesIO buffer with multiple pages if needed."""
    pdf_data = HTML(string=html_content).write_pdf()
    return io.BytesIO(pdf_data)

def insert_html_into_pdf(input_pdf_path, html_content, output_pdf_path, insert_page):
    """Insert an HTML-rendered page into an existing PDF at the specified page number."""
    reader = PdfReader(input_pdf_path)
    writer = PdfWriter()

    for page_num in range(insert_page):
        writer.add_page(reader.pages[page_num])

    html_pdf_buffer = create_pdf_with_html(html_content)
    html_reader = PdfReader(html_pdf_buffer)

    for page in html_reader.pages:
        writer.add_page(page)

    for page_num in range(insert_page, len(reader.pages)):
        writer.add_page(reader.pages[page_num])

    with open(output_pdf_path, "wb") as output_pdf:
        writer.write(output_pdf)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Insert an HTML-rendered page at a specified position in the PDF.")
    parser.add_argument("input_pdf", help="Path to the input PDF file.")
    parser.add_argument("output_pdf", help="Path to the output PDF file.")
    parser.add_argument("html_string", help="HTML content to be rendered into the PDF.")
    parser.add_argument("--insert_page", type=int, default=None, help="Page number to start inserting the HTML content (0-indexed). Defaults to the end if not provided.")

    args = parser.parse_args()

    insert_page = args.insert_page if args.insert_page is not None else len(PdfReader(args.input_pdf).pages)

    insert_html_into_pdf(args.input_pdf, args.html_string, args.output_pdf, insert_page)
    print(f"HTML inserted into {args.output_pdf} starting from page {insert_page}.")
