Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions src/filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,32 @@ def fill_form(self, pdf_form: str, llm: LLM):
for annot in sorted_annots:
if annot.Subtype == "/Widget" and annot.T:
if i < len(answers_list):
annot.V = f"{answers_list[i]}"
annot.AP = None
answer = answers_list[i]

# Check if the field type is a Button (Checkbox/Radio)
field_type = annot.FT if annot.FT else (annot.Parent.FT if annot.Parent else None)
if str(field_type) == "/Btn":
is_truthy = str(answer).lower() in ["yes", "true", "1", "x", "on"]

# Find the 'ON' state from the appearance dictionary
on_state = "/Yes" # Default assumption
if annot.AP and annot.AP.N:
keys = [k for k in annot.AP.N.keys() if k != "/Off"]
if keys:
on_state = keys[0]

if is_truthy:
from pdfrw import PdfName
annot.V = PdfName(on_state.strip("/"))
annot.AS = PdfName(on_state.strip("/"))
else:
from pdfrw import PdfName
annot.V = PdfName("Off")
annot.AS = PdfName("Off")
else:
annot.V = f"{answer}"
annot.AP = None

i += 1
else:
# Stop if we run out of answers
Expand Down
47 changes: 46 additions & 1 deletion src/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Union

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Union is not used in new versions of python use the "|" operator instead.

import os

os.environ["CUDA_VISIBLE_DEVICES"] = ""

# Monkey patch rfdetr to force CPU usage on Mac Silicon / Docker
Expand All @@ -12,10 +14,53 @@ def patched_ensure(model_ctx):
except ImportError:
pass

from commonforms import prepare_form
from commonforms import prepare_form

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AI models create unnecessary spaces in the end of the line, get rid of them.

from pypdf import PdfReader
from controller import Controller

def input_fields(num_fields: int):
fields = []
for i in range(num_fields):
field = input(f"Enter description for field {i + 1}: ")
fields.append(field)
return fields

def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Union[str, os.PathLike]):
"""
This function is called by the frontend server.
It receives the raw data, runs the PDF filling logic,
and returns the path to the newly created file.
"""

print("[1] Received request from frontend.")
print(f"[2] PDF template path: {pdf_form_path}")

# Normalize Path/PathLike to a plain string for downstream code
pdf_form_path = os.fspath(pdf_form_path)

if not os.path.exists(pdf_form_path):
print(f"Error: PDF template not found at {pdf_form_path}")
return None # Or raise an exception

print("[3] Starting extraction and PDF filling process...")
try:
controller = Controller()
output_name = controller.fill_form(
user_input=user_input,
fields=definitions,
pdf_form_path=pdf_form_path
)

print("\n----------------------------------")
print(f"✅ Process Complete.")
print(f"Output saved to: {output_name}")

return output_name

except Exception as e:
print(f"An error occurred during PDF generation: {e}")
# Re-raise the exception so the frontend can handle it
raise e
if __name__ == "__main__":
file = "./src/inputs/file.pdf"
user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is <Mamañema>, and the date is 01/02/2005"
Expand Down