sikeaditya commited on
Commit
cb8b30e
·
verified ·
1 Parent(s): 043a5ff

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +11 -0
  2. app.py +154 -0
  3. requirements.txt +4 -0
  4. templates/index.html +213 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY . .
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ EXPOSE 7860
10
+
11
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from flask import Flask, render_template, request, jsonify
3
+ import google.generativeai as genai
4
+ from PIL import Image
5
+ from dotenv import load_dotenv
6
+ import time
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ # Configure Gemini API with key from environment variable
12
+ api_key = os.getenv("GEMINI_API_KEY", "AIzaSyB0IOx76FydAk4wabMz1juzzHF5oBiHW64")
13
+ if api_key == "AIzaSyB0IOx76FydAk4wabMz1juzzHF5oBiHW64":
14
+ print("WARNING: Using hardcoded API key. Set GEMINI_API_KEY environment variable instead.")
15
+
16
+ try:
17
+ genai.configure(api_key=api_key)
18
+ except Exception as e:
19
+ print(f"Error configuring Gemini API: {str(e)}")
20
+
21
+ # Initialize Flask app
22
+ app = Flask(__name__)
23
+
24
+
25
+ def extract_text_with_gemini(image_path):
26
+ """Extract text from image using Gemini Vision model"""
27
+ max_retries = 3
28
+ retry_delay = 2
29
+
30
+ for attempt in range(max_retries):
31
+ try:
32
+ # Initialize Gemini Pro Vision model
33
+ model = genai.GenerativeModel('gemini-2.0-flash')
34
+
35
+ # Load the image
36
+ with Image.open(image_path) as img:
37
+ # Create prompt for text extraction
38
+ prompt = "Extract all the text from this image. Return only the extracted text, nothing else."
39
+
40
+ # Generate response with image
41
+ response = model.generate_content([prompt, img])
42
+
43
+ # Validate response
44
+ if not response or not hasattr(response, 'text') or not response.text:
45
+ raise ValueError("Received empty response from Gemini API")
46
+
47
+ return response.text.strip()
48
+ except Exception as e:
49
+ print(f"Attempt {attempt + 1} failed: {str(e)}")
50
+ if attempt < max_retries - 1:
51
+ time.sleep(retry_delay)
52
+ continue
53
+ return "Could not extract text from the image. Please try with a clearer image."
54
+
55
+
56
+ def translate_text(text):
57
+ """Translate text from English to Hindi using Gemini"""
58
+ max_retries = 3
59
+ retry_delay = 2
60
+
61
+ # Check if there's text to translate
62
+ if not text or text.strip() == "":
63
+ return "No text to translate."
64
+
65
+ for attempt in range(max_retries):
66
+ try:
67
+ # Initialize Gemini model
68
+ model = genai.GenerativeModel('gemini-2.0-flash')
69
+
70
+ # Create prompt for translation
71
+ prompt = f"""
72
+ Translate the following English text to Hindi.
73
+ Keep proper names, titles, and organization names unchanged.
74
+ Text to translate: {text}
75
+ """
76
+
77
+ # Generate response
78
+ response = model.generate_content(prompt)
79
+
80
+ # Validate response
81
+ if not response or not hasattr(response, 'text') or not response.text:
82
+ raise ValueError("Received empty response from Gemini API")
83
+
84
+ return response.text.strip()
85
+ except Exception as e:
86
+ print(f"Translation attempt {attempt + 1} failed: {str(e)}")
87
+ if attempt < max_retries - 1:
88
+ time.sleep(retry_delay)
89
+ continue
90
+ return "Translation failed. Please try again later."
91
+
92
+
93
+ @app.route('/')
94
+ def home():
95
+ return render_template('index.html')
96
+
97
+
98
+ @app.route('/upload', methods=['POST'])
99
+ def upload_file():
100
+ if 'file' not in request.files:
101
+ return jsonify({'error': 'No file uploaded'}), 400
102
+
103
+ file = request.files['file']
104
+ if file.filename == '':
105
+ return jsonify({'error': 'No file selected'}), 400
106
+
107
+ # Check file extension
108
+ allowed_extensions = {'png', 'jpg', 'jpeg', 'gif', 'bmp'}
109
+ if '.' not in file.filename or file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions:
110
+ return jsonify({'error': 'Invalid file format. Please upload an image (PNG, JPG, JPEG, GIF, BMP).'}), 400
111
+
112
+ temp_path = None
113
+ try:
114
+ # Create temp directory if it doesn't exist
115
+ temp_dir = "temp"
116
+ if not os.path.exists(temp_dir):
117
+ os.makedirs(temp_dir)
118
+
119
+ # Save the uploaded file temporarily with a unique name
120
+ temp_path = os.path.join(temp_dir, f"temp_image_{int(time.time())}.png")
121
+ file.save(temp_path)
122
+
123
+ # Extract text using Gemini
124
+ extracted_text = extract_text_with_gemini(temp_path)
125
+
126
+ # Translate text
127
+ translated_text = translate_text(extracted_text)
128
+
129
+ return jsonify({
130
+ 'original_text': extracted_text,
131
+ 'translated_text': translated_text
132
+ })
133
+ except Exception as e:
134
+ print(f"Error processing image: {str(e)}")
135
+ return jsonify({
136
+ 'error': 'An error occurred while processing your image. Please try again.'
137
+ }), 500
138
+ finally:
139
+ # Clean up temporary file if it exists
140
+ try:
141
+ if temp_path and os.path.exists(temp_path):
142
+ os.remove(temp_path)
143
+ except Exception as e:
144
+ print(f"Failed to remove temporary file: {str(e)}")
145
+
146
+
147
+ if __name__ == '__main__':
148
+ # Ensure the template folder exists
149
+ if not os.path.exists('templates'):
150
+ os.makedirs('templates')
151
+ print("Created 'templates' directory. Please place your HTML files here.")
152
+
153
+ # For Hugging Face Spaces, we need to listen on 0.0.0.0 and port 7860
154
+ app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ flask==2.0.1
2
+ google-generativeai>=0.1.0
3
+ Pillow>=9.0.0
4
+ python-dotenv>=0.19.0
templates/index.html ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>OCR Translation - English to Hindi</title>
7
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
8
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&display=swap" rel="stylesheet">
9
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
10
+ <style>
11
+ body {
12
+ font-family: 'Poppins', sans-serif;
13
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
14
+ }
15
+ .loader {
16
+ border: 3px solid #f3f3f3;
17
+ border-radius: 50%;
18
+ border-top: 3px solid #3498db;
19
+ width: 40px;
20
+ height: 40px;
21
+ animation: spin 1s linear infinite;
22
+ display: none;
23
+ }
24
+ @keyframes spin {
25
+ 0% { transform: rotate(0deg); }
26
+ 100% { transform: rotate(360deg); }
27
+ }
28
+ .drop-zone {
29
+ border: 2px dashed #cbd5e0;
30
+ transition: all 0.3s ease;
31
+ }
32
+ .drop-zone:hover {
33
+ border-color: #3498db;
34
+ background-color: #f8fafc;
35
+ }
36
+ .result-box {
37
+ background: rgba(255, 255, 255, 0.9);
38
+ backdrop-filter: blur(10px);
39
+ transition: all 0.3s ease;
40
+ }
41
+ .result-box:hover {
42
+ transform: translateY(-2px);
43
+ box-shadow: 0 8px 20px rgba(0, 0, 0, 0.1);
44
+ }
45
+ .custom-file-input::-webkit-file-upload-button {
46
+ visibility: hidden;
47
+ width: 0;
48
+ }
49
+ .custom-file-input::before {
50
+ content: 'Choose File';
51
+ display: inline-block;
52
+ background: #3498db;
53
+ color: white;
54
+ padding: 8px 16px;
55
+ border-radius: 5px;
56
+ cursor: pointer;
57
+ }
58
+ .translate-btn {
59
+ background: linear-gradient(135deg, #3498db 0%, #2980b9 100%);
60
+ transition: all 0.3s ease;
61
+ }
62
+ .translate-btn:hover {
63
+ transform: translateY(-1px);
64
+ box-shadow: 0 4px 12px rgba(52, 152, 219, 0.3);
65
+ }
66
+ </style>
67
+ </head>
68
+ <body class="min-h-screen py-12 px-4">
69
+ <div class="container mx-auto max-w-4xl">
70
+ <!-- Header -->
71
+ <div class="text-center mb-12">
72
+ <h1 class="text-4xl font-bold text-gray-800 mb-3">
73
+ <i class="fas fa-language mr-2"></i>OCR Translation
74
+ </h1>
75
+ <p class="text-gray-600">English to Hindi Translation with Image Recognition</p>
76
+ </div>
77
+
78
+ <!-- Main Content -->
79
+ <div class="bg-white rounded-xl shadow-lg p-8 mb-8">
80
+ <!-- Upload Section -->
81
+ <div class="drop-zone rounded-lg p-8 text-center mb-6">
82
+ <div class="mb-4">
83
+ <i class="fas fa-cloud-upload-alt text-4xl text-gray-400 mb-3"></i>
84
+ <h3 class="text-lg font-semibold text-gray-700 mb-2">Upload Image</h3>
85
+ <p class="text-sm text-gray-500 mb-4">Support for PNG, JPG, JPEG, GIF, BMP</p>
86
+ </div>
87
+
88
+ <input type="file"
89
+ id="imageInput"
90
+ accept="image/*"
91
+ class="custom-file-input w-full mb-4 cursor-pointer">
92
+
93
+ <button onclick="processImage()"
94
+ class="translate-btn w-full py-3 px-6 text-white rounded-lg font-medium flex items-center justify-center">
95
+ <i class="fas fa-sync-alt mr-2"></i>
96
+ <span>Translate Now</span>
97
+ </button>
98
+
99
+ <div id="loader" class="loader mx-auto mt-4"></div>
100
+ </div>
101
+
102
+ <!-- Preview Section -->
103
+ <div id="previewSection" class="hidden mb-6">
104
+ <h3 class="text-lg font-semibold text-gray-700 mb-3">Image Preview</h3>
105
+ <img id="imagePreview" class="max-w-full h-auto rounded-lg shadow" src="" alt="Preview">
106
+ </div>
107
+
108
+ <!-- Results Section -->
109
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
110
+ <!-- Original Text -->
111
+ <div class="result-box rounded-xl p-6">
112
+ <div class="flex items-center justify-between mb-4">
113
+ <h3 class="text-lg font-semibold text-gray-700">
114
+ <i class="fas fa-file-alt mr-2"></i>Extracted Text
115
+ </h3>
116
+ <button onclick="copyText('extractedText')" class="text-blue-500 hover:text-blue-600">
117
+ <i class="far fa-copy"></i>
118
+ </button>
119
+ </div>
120
+ <div id="extractedText" class="p-4 bg-gray-50 rounded-lg min-h-[150px] text-gray-700">
121
+ <!-- Extracted text will appear here -->
122
+ </div>
123
+ </div>
124
+
125
+ <!-- Translated Text -->
126
+ <div class="result-box rounded-xl p-6">
127
+ <div class="flex items-center justify-between mb-4">
128
+ <h3 class="text-lg font-semibold text-gray-700">
129
+ <i class="fas fa-language mr-2"></i>Hindi Translation
130
+ </h3>
131
+ <button onclick="copyText('translatedText')" class="text-blue-500 hover:text-blue-600">
132
+ <i class="far fa-copy"></i>
133
+ </button>
134
+ </div>
135
+ <div id="translatedText" class="p-4 bg-gray-50 rounded-lg min-h-[150px] text-gray-700"
136
+ style="font-family: 'Noto Sans Devanagari', sans-serif;">
137
+ <!-- Translated text will appear here -->
138
+ </div>
139
+ </div>
140
+ </div>
141
+ </div>
142
+ </div>
143
+
144
+ <script>
145
+ // Image preview functionality
146
+ document.getElementById('imageInput').addEventListener('change', function(e) {
147
+ const file = e.target.files[0];
148
+ if (file) {
149
+ const reader = new FileReader();
150
+ reader.onload = function(e) {
151
+ document.getElementById('imagePreview').src = e.target.result;
152
+ document.getElementById('previewSection').classList.remove('hidden');
153
+ }
154
+ reader.readAsDataURL(file);
155
+ }
156
+ });
157
+
158
+ // Copy text functionality
159
+ function copyText(elementId) {
160
+ const text = document.getElementById(elementId).textContent;
161
+ navigator.clipboard.writeText(text).then(() => {
162
+ // Show a brief notification
163
+ const element = document.getElementById(elementId);
164
+ const originalBackground = element.style.backgroundColor;
165
+ element.style.backgroundColor = '#e8f5e9';
166
+ setTimeout(() => {
167
+ element.style.backgroundColor = originalBackground;
168
+ }, 500);
169
+ });
170
+ }
171
+
172
+ function processImage() {
173
+ const fileInput = document.getElementById('imageInput');
174
+ const loader = document.getElementById('loader');
175
+ const extractedTextDiv = document.getElementById('extractedText');
176
+ const translatedTextDiv = document.getElementById('translatedText');
177
+
178
+ if (!fileInput.files[0]) {
179
+ alert('Please select an image first');
180
+ return;
181
+ }
182
+
183
+ const formData = new FormData();
184
+ formData.append('file', fileInput.files[0]);
185
+
186
+ // Show loader and clear previous results
187
+ loader.style.display = 'block';
188
+ extractedTextDiv.textContent = '';
189
+ translatedTextDiv.textContent = '';
190
+
191
+ fetch('/upload', {
192
+ method: 'POST',
193
+ body: formData
194
+ })
195
+ .then(response => response.json())
196
+ .then(data => {
197
+ if (data.error) {
198
+ throw new Error(data.error);
199
+ }
200
+ extractedTextDiv.textContent = data.original_text;
201
+ translatedTextDiv.textContent = data.translated_text;
202
+ })
203
+ .catch(error => {
204
+ console.error('Error:', error);
205
+ alert(error.message || 'An error occurred during processing');
206
+ })
207
+ .finally(() => {
208
+ loader.style.display = 'none';
209
+ });
210
+ }
211
+ </script>
212
+ </body>
213
+ </html>