|
--- |
|
library_name: transformers |
|
tags: [] |
|
--- |
|
|
|
## Prompt and Image Function |
|
```python |
|
prompt = """Extract the following metadata from this architectural drawing: |
|
- drawing_title (datatype: string) |
|
- drawing_number (datatype: string) |
|
- revision_history (datatype: list of dictionaries) |
|
- revision_id (datatype: string) |
|
- revision_description (datatype: string) |
|
- revision_date (datatype: string) |
|
|
|
Here's an example of the output format: |
|
{ |
|
"drawing_title": "Electrical Floor Plan", |
|
"drawing_number": "E101", |
|
"revision_history": [ |
|
{ |
|
"revision_id": "1", |
|
"revision_description": "Revised drawing", |
|
"revision_date": "2024-01-01" |
|
}, |
|
{ |
|
"revision_id": "", |
|
"revision_description": "Bid Set", |
|
"revision_date": "2024/09/21" |
|
}, |
|
{ |
|
"revision_id": "P3", |
|
"revision_description": "NOT FOR CONSTRUCTION", |
|
"revision_date": "10.20.24" |
|
}, |
|
] |
|
} |
|
|
|
The output must follow this format exactly. If you are unsure of a value, leave it blank. |
|
""" |
|
|
|
def pdf_to_images(pdf_path, page_number, zoom=4.0): |
|
"""Convert a PDF page to an image.""" |
|
doc = pymupdf.open(pdf_path) |
|
if page_number >= doc.page_count: |
|
print(f"Page {page_number} not found in {pdf_path}") |
|
return None |
|
page = doc.load_page(page_number) |
|
mat = pymupdf.Matrix(zoom, zoom) |
|
pix = page.get_pixmap(matrix=mat) |
|
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) |
|
doc.close() |
|
return img |
|
|
|
``` |