OCRTranslation / app.py
sikeaditya's picture
Update app.py
5276211 verified
import os
from flask import Flask, render_template, request, jsonify
import google.generativeai as genai
from PIL import Image
from dotenv import load_dotenv
import time
import traceback
import sys
import json
# Load environment variables
load_dotenv()
# Configure Gemini API with key from environment variable
api_key = os.getenv("GEMINI_API_KEY", "AIzaSyB0IOx76FydAk4wabMz1juzzHF5oBiHW64")
if api_key == "AIzaSyB0IOx76FydAk4wabMz1juzzHF5oBiHW64":
print("WARNING: Using hardcoded API key. Set GEMINI_API_KEY environment variable instead.")
# Function to test API connectivity
def test_gemini_api():
try:
genai.configure(api_key=api_key)
# Test with a simple text prompt using the latest model
model = genai.GenerativeModel('gemini-2.0-flash')
response = model.generate_content("Hello, please respond with 'API is working'")
if not response or not hasattr(response, 'text') or not response.text:
print("WARNING: Received empty response during API test")
return False
print(f"API Test Response: {response.text.strip()}")
return True
except Exception as e:
print(f"ERROR: Failed to connect to Gemini API: {str(e)}")
print(traceback.format_exc())
return False
# Initialize Flask app
app = Flask(__name__)
# Configure error responses
@app.errorhandler(500)
def server_error(e):
return jsonify(error="Internal server error: " + str(e)), 500
def extract_text_with_gemini(image_path):
"""Extract text from image using Gemini Vision model"""
max_retries = 3
retry_delay = 2
for attempt in range(max_retries):
try:
print(f"Attempt {attempt + 1} to extract text using Gemini...")
# Updated model options to use the latest available models
model_options = ['gemini-2.0-flash']
for model_name in model_options:
try:
print(f"Trying model: {model_name}")
model = genai.GenerativeModel(model_name)
break
except Exception as model_error:
print(f"Error with model {model_name}: {str(model_error)}")
if model_name == model_options[-1]: # Last model option
raise
continue
# Load the image
with Image.open(image_path) as img:
print(f"Image loaded from {image_path} (Size: {img.size}, Format: {img.format})")
# Resize image if too large (API may have size limits)
max_dimension = 1024
if img.width > max_dimension or img.height > max_dimension:
print(f"Resizing large image from {img.width}x{img.height}")
ratio = min(max_dimension / img.width, max_dimension / img.height)
new_width = int(img.width * ratio)
new_height = int(img.height * ratio)
img = img.resize((new_width, new_height))
print(f"Resized to {new_width}x{new_height}")
img.save(image_path) # Save resized image
# Create prompt for text extraction
prompt = "Extract all the text from this image. Return only the extracted text, nothing else."
# Generate response with image
print("Sending request to Gemini API for text extraction...")
response = model.generate_content([prompt, img])
# Validate response
if not response or not hasattr(response, 'text') or not response.text:
raise ValueError("Received empty response from Gemini API")
extracted_text = response.text.strip()
print(f"Successfully extracted text (length: {len(extracted_text)})")
return extracted_text
except Exception as e:
print(f"Attempt {attempt + 1} failed: {str(e)}")
print(traceback.format_exc())
if attempt < max_retries - 1:
print(f"Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
continue
return f"Could not extract text from the image: {str(e)}"
def translate_text(text):
"""Translate text from English to Hindi using Gemini"""
max_retries = 3
retry_delay = 2
# Check if there's text to translate
if not text or text.strip() == "":
return "No text to translate."
# If the text indicates an error occurred during extraction, don't try to translate
if text.startswith("Could not extract text from the image"):
return "Cannot translate due to OCR failure."
for attempt in range(max_retries):
try:
print(f"Attempt {attempt + 1} to translate text using Gemini...")
# Updated model options to use the latest available models
model_options = ['gemini-2.0-flash']
for model_name in model_options:
try:
print(f"Trying model: {model_name}")
model = genai.GenerativeModel(model_name)
break
except Exception as model_error:
print(f"Error with model {model_name}: {str(model_error)}")
if model_name == model_options[-1]: # Last model option
raise
continue
# Create prompt for translation
prompt = f"""
Translate the following English text to Hindi.
Keep proper names, titles, and organization names unchanged.
Text to translate: {text}
"""
# Generate response
print("Sending request to Gemini API for translation...")
response = model.generate_content(prompt)
# Validate response
if not response or not hasattr(response, 'text') or not response.text:
raise ValueError("Received empty response from Gemini API")
translated_text = response.text.strip()
print(f"Successfully translated text (length: {len(translated_text)})")
return translated_text
except Exception as e:
print(f"Translation attempt {attempt + 1} failed: {str(e)}")
print(traceback.format_exc())
if attempt < max_retries - 1:
print(f"Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
continue
return f"Translation failed: {str(e)}"
@app.route('/')
def home():
return render_template('index.html')
@app.route('/upload', methods=['POST'])
def upload_file():
print("Received upload request")
if 'file' not in request.files:
print("No file part in the request")
return jsonify({'error': 'No file uploaded'}), 400
file = request.files['file']
if file.filename == '':
print("No file selected")
return jsonify({'error': 'No file selected'}), 400
# Check file extension
allowed_extensions = {'png', 'jpg', 'jpeg', 'gif', 'bmp'}
if '.' not in file.filename or file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions:
print(f"Invalid file format: {file.filename}")
return jsonify({'error': 'Invalid file format. Please upload an image (PNG, JPG, JPEG, GIF, BMP).'}), 400
temp_path = None
try:
# Create temp directory if it doesn't exist
temp_dir = "temp"
os.makedirs(temp_dir, exist_ok=True)
print(f"Ensuring temp directory exists: {temp_dir}")
# Make sure the temp directory has write permissions
try:
if not os.access(temp_dir, os.W_OK):
os.chmod(temp_dir, 0o755) # chmod to ensure write permissions
print(f"Updated permissions for temp directory: {temp_dir}")
except Exception as perm_error:
print(f"Warning: Could not update permissions: {str(perm_error)}")
# Save the uploaded file temporarily with a unique name
temp_filename = f"temp_image_{int(time.time())}.png"
temp_path = os.path.join(temp_dir, temp_filename)
print(f"Saving uploaded file to {temp_path}")
# Save in a way that ensures we have write permissions
file.save(temp_path)
# Ensure the file has appropriate permissions
try:
os.chmod(temp_path, 0o644) # Make the file readable
print(f"Updated permissions for file: {temp_path}")
except Exception as file_perm_error:
print(f"Warning: Could not update file permissions: {str(file_perm_error)}")
# Extract text using Gemini
print("Starting text extraction...")
extracted_text = extract_text_with_gemini(temp_path)
print(f"Text extraction result: {extracted_text[:100]}...")
# Translate text
print("Starting text translation...")
translated_text = translate_text(extracted_text)
print(f"Translation result: {translated_text[:100]}...")
return jsonify({
'original_text': extracted_text,
'translated_text': translated_text
})
except Exception as e:
error_msg = f"Error processing image: {str(e)}"
print(error_msg)
print(traceback.format_exc())
return jsonify({
'error': error_msg
}), 500
finally:
# Clean up temporary file if it exists
try:
if temp_path and os.path.exists(temp_path):
os.remove(temp_path)
print(f"Removed temporary file: {temp_path}")
except Exception as e:
print(f"Failed to remove temporary file: {str(e)}")
# Don't let this failure affect the response
if __name__ == '__main__':
# Ensure the template folder exists
if not os.path.exists('templates'):
os.makedirs('templates')
print("Created 'templates' directory. Please place your HTML files here.")
# Test API connectivity at startup
api_working = test_gemini_api()
if api_working:
print("βœ… Gemini API connection successful!")
else:
print("❌ WARNING: Gemini API connection failed. The application may not work correctly!")
# For Hugging Face Spaces, we need to listen on 0.0.0.0 and port 7860
print(f"Starting Flask app on port {os.environ.get('PORT', 7860)}")
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))