fantastic-futures

Sleeping

App Files Files Community

apjanco commited on Aug 14

Commit

81db712

1 Parent(s): aac5be6

not quite working

Browse files

Files changed (1) hide show

app.py +8 -135

app.py CHANGED Viewed

@@ -59,7 +59,7 @@ assistant_prompt = '<|assistant|>\n'
 prompt_suffix = "<|end|>\n"
 @spaces.GPU
-def run_example(image:str, model_id:str = "nanonets/Nanonets-OCR-s", prompt: str = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Return the equations in LaTeX representation. If there is an image in the document and image caption is not present, add a small description of the image inside the <img></img> tag; otherwise, add the image caption inside <img></img>. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""):
     image_path = array_to_image_path(image)
@@ -107,71 +107,9 @@ def run_example(image:str, model_id:str = "nanonets/Nanonets-OCR-s", prompt: str
     return ocr_text, ocr_text  # Return twice: once for display, once for state
-css = """
-  /* Overall app styling */
-  .gradio-container {
-    max-width: 1200px !important;
-    margin: 0 auto;
-    padding: 20px;
-    background-color: #f8f9fa;
-  }
-  /* Tabs styling */
-  .tabs {
-    border-radius: 8px;
-    background: white;
-    padding: 20px;
-    box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
-  }
-  /* Input/Output containers */
-  .input-container, .output-container {
-    background: white;
-    border-radius: 8px;
-    padding: 15px;
-    margin: 10px 0;
-    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
-  }
-  /* Button styling */
-  .submit-btn {
-    background-color: #2d31fa !important;
-    border: none !important;
-    padding: 8px 20px !important;
-    border-radius: 6px !important;
-    color: white !important;
-    transition: all 0.3s ease !important;
-  }
-  .submit-btn:hover {
-    background-color: #1f24c7 !important;
-    transform: translateY(-1px);
-  }
-  /* Output text area */
-  #output {
-    height: 500px;
-    overflow: auto;
-    border: 1px solid #e0e0e0;
-    border-radius: 6px;
-    padding: 15px;
-    background: #ffffff;
-    font-family: 'Arial', sans-serif;
-  }
-  /* Dropdown styling */
-  .gr-dropdown {
-    border-radius: 6px !important;
-    border: 1px solid #e0e0e0 !important;
-  }
-  /* Image upload area */
-  .gr-image-input {
-    border: 2px dashed #ccc;
-    border-radius: 8px;
-    padding: 20px;
-    transition: all 0.3s ease;
-  }
-  .gr-image-input:hover {
-    border-color: #2d31fa;
-  }
-"""
-with gr.Blocks(css=css) as demo:
     # Add state variables to store OCR results
     ocr_state = gr.State()
@@ -180,8 +118,8 @@ with gr.Blocks(css=css) as demo:
             with gr.Column(elem_classes="input-container"):
                 input_img = gr.Image(label="Input Picture", elem_classes="gr-image-input")
                 model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2.5-VL-7B-Instruct", elem_classes="gr-dropdown")
                 submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
             with gr.Column(elem_classes="output-container"):
                 output_text = gr.HighlightedText(label="Output Text", elem_id="output")
@@ -191,75 +129,10 @@ with gr.Blocks(css=css) as demo:
         # Modify the submit button click handler to update state
         submit_btn.click(
             run_example,
-            inputs=[input_img, model_selector],
             outputs=[output_text, ocr_state]  # Add ocr_state to outputs
         )
-    with gr.Row():
-        filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
-        download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
-        download_output = gr.File(label="Download")
-    # Modify create_zip to use the state data
-    def create_zip(image, fname, ocr_result):
-        # Validate inputs
-        if not fname or image is None:  # Changed the validation check
-            return None
-        try:
-            # Convert numpy array to PIL Image if needed
-            if isinstance(image, np.ndarray):
-                image = Image.fromarray(image)
-            elif not isinstance(image, Image.Image):
-                return None
-            with tempfile.TemporaryDirectory() as temp_dir:
-                # Save image
-                img_path = os.path.join(temp_dir, f"{fname}.png")
-                image.save(img_path)
-                # Use the OCR result from state
-                original_text = ocr_result.original_text if ocr_result else ""
-                entities = ocr_result.entities if ocr_result else []
-                # Save text
-                txt_path = os.path.join(temp_dir, f"{fname}.txt")
-                with open(txt_path, 'w', encoding='utf-8') as f:
-                    f.write(original_text)
-                # Create JSON with text and entities
-                json_data = {
-                    "text": original_text,
-                    "entities": entities,
-                    "image_file": f"{fname}.png"
-                }
-                # Save JSON
-                json_path = os.path.join(temp_dir, f"{fname}.json")
-                with open(json_path, 'w', encoding='utf-8') as f:
-                    json.dump(json_data, f, indent=2, ensure_ascii=False)
-                # Create zip file
-                output_dir = "downloads"
-                os.makedirs(output_dir, exist_ok=True)
-                zip_path = os.path.join(output_dir, f"{fname}.zip")
-                with zipfile.ZipFile(zip_path, 'w') as zipf:
-                    zipf.write(img_path, os.path.basename(img_path))
-                    zipf.write(txt_path, os.path.basename(txt_path))
-                    zipf.write(json_path, os.path.basename(json_path))
-                return zip_path
-        except Exception as e:
-            print(f"Error creating zip: {str(e)}")
-            return None
-    # Update the download button click handler to include state
-    download_btn.click(
-        create_zip,
-        inputs=[input_img, filename, ocr_state],
-        outputs=[download_output]
-    )
 demo.queue(api_open=False)
 demo.launch(debug=True)

 prompt_suffix = "<|end|>\n"
 @spaces.GPU
+def run_example(image, model_id= "nanonets/Nanonets-OCR-s", prompt= """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Return the equations in LaTeX representation. If there is an image in the document and image caption is not present, add a small description of the image inside the <img></img> tag; otherwise, add the image caption inside <img></img>. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""):
     image_path = array_to_image_path(image)
     return ocr_text, ocr_text  # Return twice: once for display, once for state
+with gr.Blocks() as demo:
     # Add state variables to store OCR results
     ocr_state = gr.State()
             with gr.Column(elem_classes="input-container"):
                 input_img = gr.Image(label="Input Picture", elem_classes="gr-image-input")
                 model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2.5-VL-7B-Instruct", elem_classes="gr-dropdown")
+                prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", elem_classes="gr-textbox")
                 submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
             with gr.Column(elem_classes="output-container"):
                 output_text = gr.HighlightedText(label="Output Text", elem_id="output")
         # Modify the submit button click handler to update state
         submit_btn.click(
             run_example,
+            inputs=[input_img, model_selector,prompt],
             outputs=[output_text, ocr_state]  # Add ocr_state to outputs
         )
 demo.queue(api_open=False)
 demo.launch(debug=True)