Coverage for tests/test_parser.py: 100%
42 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-20 21:23 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-20 21:23 +0000
1import sys
2import os
3import pytest
5# Add the project root to Python's module path
6sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
8# Getting the parse function from the py files
9from app.doc_parsers.parse_pdf import parse_pdf
10from app.doc_parsers.parse_txt import parse_txt
13def test_parse_pdf():
14 """
16 test the parse_pdf
18 """
19 FILE_PATH = "./tests/test_data/Dna.pdf"
21 # Call the function you want to test
22 result = parse_pdf(FILE_PATH)
24 # Example test assertion - check if result is not empty
25 assert len(result) > 0, "No PDFs were parsed."
27 # Example test assertion - check if result is a list
28 assert isinstance(result, list), "Result is not a list."
31def test_parse_txt():
32 """
34 test the parse_txt
36 """
38 FILE_PATH = "./tests/test_data/Dna.txt"
40 # Call the function you want to test
41 result = parse_txt(FILE_PATH)
43 # Example test assertion - check if result is not empty
44 assert len(result) > 0, "No txts were parsed."
46 # Example test assertion - check if result is a list
47 assert isinstance(result, list), "Result is not a list."
51# Testing for Formatted Pdfloader
52import fitz # PyMuPDF
53def test_formatted_pdf(file_path="./tests/test_data/formatted_test_pdf.pdf"):
54 doc = fitz.open(file_path)
55 spans = []
57 for page in doc:
58 text_dict = page.get_text("dict")
59 for block in text_dict["blocks"]:
60 if block["type"] == 0:
61 for line in block["lines"]:
62 for span in line["spans"]:
63 text = span["text"].strip()
64 if text:
65 spans.append({
66 "text": text,
67 "font": span["font"],
68 "size": span["size"],
69 "bold": "Bold" in span["font"]
70 })
72 # --- Check Title ---
73 title = next((s for s in spans if s["text"] == "Sample Formatted PDF"), None)
74 assert title is not None, "Title text not found"
75 assert title["bold"], "Title is not bold"
76 assert title["size"] >= 16.0, "Title font size is too small"
78 # --- Check Bullet Point ---
79 bullet = next((s for s in spans if s["text"] == "- First bullet point"), None)
80 assert bullet is not None, "Bullet point not found"
81 assert not bullet["bold"], "Bullet point should not be bold"
82 assert bullet["size"] == 12.0, "Bullet point font size incorrect"
84 # --- Check Table Header ---
85 header = next((s for s in spans if s["text"] == "Name"), None)
86 assert header is not None, "Table header 'Name' not found"
87 assert header["bold"], "'Name' should be bold"
88 assert header["size"] == 12.0, "Header font size incorrect"
90 print("All formatting checks passed!")
92# Example usage:
93# check_formatted_pdf()