Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,24 +19,38 @@ if not HF_API_KEY:
|
|
19 |
HF_API_KEY = st.secrets.get("HF_API_KEY", "") # Try getting from Streamlit secrets
|
20 |
|
21 |
# Hugging Face API function
|
22 |
-
def process_image_with_hf(image_bytes):
|
23 |
-
|
24 |
-
API_URL = "https://api-inference.huggingface.co/models/llava-hf/llava-1.5-7b-hf"
|
25 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
|
26 |
|
27 |
# Convert image to base64
|
28 |
image_b64 = base64.b64encode(image_bytes).decode('utf-8')
|
29 |
|
30 |
-
# Prepare payload
|
31 |
-
|
32 |
-
|
33 |
-
"
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
}
|
39 |
-
}
|
40 |
|
41 |
# Make API request
|
42 |
response = requests.post(API_URL, headers=headers, json=payload)
|
@@ -45,12 +59,17 @@ def process_image_with_hf(image_bytes):
|
|
45 |
raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
|
46 |
|
47 |
# Handle different response formats
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# Title and description in main area
|
56 |
try:
|
@@ -78,15 +97,15 @@ with st.sidebar:
|
|
78 |
st.header("Settings")
|
79 |
model_option = st.selectbox(
|
80 |
"Select Vision Model",
|
81 |
-
["LLaVA
|
82 |
index=0
|
83 |
)
|
84 |
|
85 |
-
#
|
86 |
model_mapping = {
|
87 |
-
"LLaVA
|
88 |
-
"
|
89 |
-
"
|
90 |
}
|
91 |
|
92 |
selected_model = model_mapping[model_option]
|
@@ -106,14 +125,11 @@ with st.sidebar:
|
|
106 |
if st.button("Extract Text π", type="primary"):
|
107 |
with st.spinner(f"Processing image with {model_option}..."):
|
108 |
try:
|
109 |
-
# Update the model URL
|
110 |
-
API_URL = f"https://api-inference.huggingface.co/models/{selected_model}"
|
111 |
-
|
112 |
# Get image bytes
|
113 |
img_bytes = uploaded_file.getvalue()
|
114 |
|
115 |
-
# Process with Hugging Face API
|
116 |
-
result = process_image_with_hf(img_bytes)
|
117 |
st.session_state['ocr_result'] = result
|
118 |
except Exception as e:
|
119 |
st.error(f"Error processing image: {str(e)}")
|
|
|
19 |
HF_API_KEY = st.secrets.get("HF_API_KEY", "") # Try getting from Streamlit secrets
|
20 |
|
21 |
# Hugging Face API function
|
22 |
+
def process_image_with_hf(image_bytes, model_id):
|
23 |
+
API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
|
|
|
24 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
|
25 |
|
26 |
# Convert image to base64
|
27 |
image_b64 = base64.b64encode(image_bytes).decode('utf-8')
|
28 |
|
29 |
+
# Prepare payload based on model type
|
30 |
+
if "llava" in model_id.lower():
|
31 |
+
payload = {
|
32 |
+
"inputs": {
|
33 |
+
"image": image_b64,
|
34 |
+
"prompt": """Analyze the text in the provided image. Extract all readable content
|
35 |
+
and present it in a structured Markdown format that is clear, concise,
|
36 |
+
and well-organized. Ensure proper formatting (e.g., headings, lists, or
|
37 |
+
code blocks) as necessary to represent the content effectively."""
|
38 |
+
},
|
39 |
+
"parameters": {
|
40 |
+
"max_new_tokens": 1024
|
41 |
+
}
|
42 |
+
}
|
43 |
+
else:
|
44 |
+
# Generic payload format for other models
|
45 |
+
payload = {
|
46 |
+
"inputs": {
|
47 |
+
"image": image_b64,
|
48 |
+
"text": """Analyze the text in the provided image. Extract all readable content
|
49 |
+
and present it in a structured Markdown format that is clear, concise,
|
50 |
+
and well-organized. Ensure proper formatting (e.g., headings, lists, or
|
51 |
+
code blocks) as necessary to represent the content effectively."""
|
52 |
+
}
|
53 |
}
|
|
|
54 |
|
55 |
# Make API request
|
56 |
response = requests.post(API_URL, headers=headers, json=payload)
|
|
|
59 |
raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
|
60 |
|
61 |
# Handle different response formats
|
62 |
+
response_json = response.json()
|
63 |
+
if isinstance(response_json, list):
|
64 |
+
return response_json[0]["generated_text"]
|
65 |
+
elif isinstance(response_json, dict):
|
66 |
+
if "generated_text" in response_json:
|
67 |
+
return response_json["generated_text"]
|
68 |
+
elif "text" in response_json:
|
69 |
+
return response_json["text"]
|
70 |
+
|
71 |
+
# Fallback
|
72 |
+
return str(response_json)
|
73 |
|
74 |
# Title and description in main area
|
75 |
try:
|
|
|
97 |
st.header("Settings")
|
98 |
model_option = st.selectbox(
|
99 |
"Select Vision Model",
|
100 |
+
["LLaVA-1.5-7B", "MiniGPT-4", "Idefics"],
|
101 |
index=0
|
102 |
)
|
103 |
|
104 |
+
# Updated model mapping with confirmed working models
|
105 |
model_mapping = {
|
106 |
+
"LLaVA-1.5-7B": "llava-hf/llava-1.5-7b-hf",
|
107 |
+
"MiniGPT-4": "Vision-CAIR/MiniGPT-4",
|
108 |
+
"Idefics": "HuggingFaceM4/idefics-9b-instruct"
|
109 |
}
|
110 |
|
111 |
selected_model = model_mapping[model_option]
|
|
|
125 |
if st.button("Extract Text π", type="primary"):
|
126 |
with st.spinner(f"Processing image with {model_option}..."):
|
127 |
try:
|
|
|
|
|
|
|
128 |
# Get image bytes
|
129 |
img_bytes = uploaded_file.getvalue()
|
130 |
|
131 |
+
# Process with Hugging Face API using selected model
|
132 |
+
result = process_image_with_hf(img_bytes, selected_model)
|
133 |
st.session_state['ocr_result'] = result
|
134 |
except Exception as e:
|
135 |
st.error(f"Error processing image: {str(e)}")
|