Spaces:

phani50101
/

edu_v1.1

Sleeping

App Files Files Community

phani50101 commited on 12 days ago

Commit

bbde278

1 Parent(s): 66b44b8

iversion

Browse files

Files changed (2) hide show

app.py +638 -0
requirements.txt +15 -0

app.py ADDED Viewed

	@@ -0,0 +1,638 @@

+from huggingface_hub import snapshot_download
+import gradio as gr
+import openvino_genai
+import librosa
+import numpy as np
+from threading import Lock, Event
+from scipy.ndimage import uniform_filter1d
+from queue import Queue, Empty
+from googleapiclient.discovery import build
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
+import cpuinfo
+import gc
+import os
+import requests
+from PIL import Image
+from io import BytesIO
+import openvino as ov
+import threading
+# Set CPU affinity for optimization
+os.environ["GOMP_CPU_AFFINITY"] = "0-7"  # Use first 8 CPU cores
+os.environ["OMP_NUM_THREADS"] = "8"
+# Configuration constants
+GOOGLE_API_KEY = "AIzaSyAo-1iW5MEZbc53DlEldtnUnDaYuTHUDH4"
+GOOGLE_CSE_ID = "3027bedf3c88a4efb"
+DEFAULT_MAX_TOKENS = 100
+DEFAULT_NUM_IMAGES = 1
+MAX_HISTORY_TURNS = 2
+MAX_TOKENS_LIMIT = 1000
+# Download models
+start_time = time.time()
+snapshot_download(repo_id="OpenVINO/mistral-7b-instruct-v0.1-int8-ov", local_dir="mistral-ov")
+snapshot_download(repo_id="OpenVINO/whisper-tiny-fp16-ov", local_dir="whisper-ov-model")
+snapshot_download(repo_id="OpenVINO/InternVL2-1B-int8-ov", local_dir="internvl-ov")  # Added for image analysis
+print(f"Model download time: {time.time() - start_time:.2f} seconds")
+# CPU-specific configuration
+cpu_features = cpuinfo.get_cpu_info()['flags']
+config_options = {}
+if 'avx512' in cpu_features:
+    config_options["ENFORCE_BF16"] = "YES"
+    print("Using AVX512 optimizations")
+elif 'avx2' in cpu_features:
+    config_options["INFERENCE_PRECISION_HINT"] = "f32"
+    print("Using AVX2 optimizations")
+# Initialize models with performance flags
+start_time = time.time()
+mistral_pipe = openvino_genai.LLMPipeline(
+    "mistral-ov",
+    device="CPU",
+    config={
+        "PERFORMANCE_HINT": "THROUGHPUT",
+        **config_options
+    }
+)
+whisper_pipe = openvino_genai.WhisperPipeline(
+    "whisper-ov-model",
+    device="CPU"
+)
+pipe_lock = Lock()
+print(f"Model initialization time: {time.time() - start_time:.2f} seconds")
+# Initialize InternVL pipeline for image analysis (lazy loading)
+internvl_pipe = None
+internvl_lock = Lock()
+def get_internvl_pipeline():
+    global internvl_pipe
+    with internvl_lock:
+        if internvl_pipe is None:
+            print("Initializing InternVL pipeline...")
+            start_time = time.time()
+            internvl_pipe = openvino_genai.VLMPipeline("internvl-ov", device="CPU")
+            print(f"InternVL pipeline initialization time: {time.time() - start_time:.2f} seconds")
+    return internvl_pipe
+# Warm up models
+print("Warming up models...")
+start_time = time.time()
+with pipe_lock:
+    mistral_pipe.generate("Warmup", openvino_genai.GenerationConfig(max_new_tokens=10))
+    whisper_pipe.generate(np.zeros(16000, dtype=np.float32))
+print(f"Model warmup time: {time.time() - start_time:.2f} seconds")
+# Thread pools
+generation_executor = ThreadPoolExecutor(max_workers=4)  # Increased workers
+image_executor = ThreadPoolExecutor(max_workers=8)
+def fetch_images(query: str, num: int = DEFAULT_NUM_IMAGES) -> list:
+    """Fetch unique images by requesting different result pages"""
+    start_time = time.time()
+    if num <= 0:
+        return []
+    try:
+        service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
+        image_links = []
+        seen_urls = set()  # To track unique URLs
+        # Start from different positions to get unique images
+        for start_index in range(1, num * 2, 2):  # Step by 2 to get different pages
+            if len(image_links) >= num:
+                break
+            res = service.cse().list(
+                q=query,
+                cx=GOOGLE_CSE_ID,
+                searchType="image",
+                num=1,  # Get one result per request
+                start=start_index  # Start at different positions
+            ).execute()
+            if "items" in res and res["items"]:
+                item = res["items"][0]
+                # Skip duplicates
+                if item["link"] not in seen_urls:
+                    image_links.append(item["link"])
+                    seen_urls.add(item["link"])
+        print(f"Unique image fetch time: {time.time() - start_time:.2f} seconds")
+        return image_links[:num]  # Return only the requested number
+    except Exception as e:
+        print(f"Error in image fetching: {e}")
+        return []
+def process_audio(data, sr):
+    start_time = time.time()
+    data = librosa.to_mono(data.T) if data.ndim > 1 else data
+    data = data.astype(np.float32)
+    data /= np.max(np.abs(data))
+    rms = librosa.feature.rms(y=data, frame_length=2048, hop_length=512)[0]
+    smoothed_rms = uniform_filter1d(rms, size=5)
+    speech_frames = np.where(smoothed_rms > 0.025)[0]
+    if not speech_frames.size:
+        print(f"Audio processing time: {time.time() - start_time:.2f} seconds")
+        return None
+    start = max(0, int(speech_frames[0] * 512 - 0.1 * sr))
+    end = min(len(data), int((speech_frames[-1] + 1) * 512 + 0.1 * sr))
+    print(f"Audio processing time: {time.time() - start_time:.2f} seconds")
+    return data[start:end]
+def transcribe(audio):
+    start_time = time.time()
+    if audio is None:
+        print(f"Transcription time: {time.time() - start_time:.2f} seconds")
+        return ""
+    sr, data = audio
+    processed = process_audio(data, sr)
+    if processed is None or len(processed) < 1600:
+        print(f"Transcription time: {time.time() - start_time:.2f} seconds")
+        return ""
+    if sr != 16000:
+        processed = librosa.resample(processed, orig_sr=sr, target_sr=16000)
+    result = whisper_pipe.generate(processed)
+    print(f"Transcription time: {time.time() - start_time:.2f} seconds")
+    return result
+def stream_answer(message: str, max_tokens: int, include_images: bool) -> str:
+    start_time = time.time()
+    response_queue = Queue()
+    completion_event = Event()
+    error = [None]
+    optimized_config = openvino_genai.GenerationConfig(
+        max_new_tokens=max_tokens,
+        num_beams=1,
+        do_sample=False,
+        temperature=1.0,
+        top_p=0.9,
+        top_k=30,
+        streaming=True,
+        streaming_interval=5  # Batch tokens in groups of 5
+    )
+    def callback(tokens):  # Now accepts multiple tokens
+        response_queue.put("".join(tokens))
+        return openvino_genai.StreamingStatus.RUNNING
+    def generate():
+        try:
+            with pipe_lock:
+                mistral_pipe.generate(message, optimized_config, callback)
+        except Exception as e:
+            error[0] = str(e)
+        finally:
+            completion_event.set()
+    generation_executor.submit(generate)
+    accumulated = []
+    token_count = 0
+    last_gc = time.time()
+    while not completion_event.is_set() or not response_queue.empty():
+        if error[0]:
+            yield f"Error: {error[0]}"
+            print(f"Stream answer time: {time.time() - start_time:.2f} seconds")
+            return
+        try:
+            token_batch = response_queue.get_nowait()
+            accumulated.append(token_batch)
+            token_count += len(token_batch)
+            # Periodic garbage collection
+            if time.time() - last_gc > 2.0:  # Every 2 seconds
+                gc.collect()
+                last_gc = time.time()
+            yield "".join(accumulated)
+        except Empty:
+            continue
+    print(f"Generated {token_count} tokens in {time.time() - start_time:.2f} seconds "
+          f"({token_count/(time.time() - start_time):.2f} tokens/sec)")
+    yield "".join(accumulated)
+def run_chat(message: str, history: list, include_images: bool, max_tokens: int, num_images: int):
+    start_time = time.time()
+    final_text = ""
+    # Create a placeholder for the streaming response
+    history.append((message, "", []))
+    rendered_history = render_history(history)
+    yield rendered_history, gr.update(value="", interactive=False)
+    # Stream tokens and update chatbot in real-time
+    for output in stream_answer(message, max_tokens, include_images):
+        final_text = output
+        # Update only the last response in history
+        updated_history = history[:-1] + [(message, final_text, [])]
+        rendered_history = render_history(updated_history)
+        yield rendered_history, gr.update(value="", interactive=False)
+    images = []
+    if include_images:
+        images = fetch_images(message, num_images)
+    # Update history with final response and images
+    history[-1] = (message, final_text, images)
+    if len(history) > MAX_HISTORY_TURNS:
+        history = history[-MAX_HISTORY_TURNS:]
+    rendered_history = render_history(history)
+    print(f"Total chat time: {time.time() - start_time:.2f} seconds")
+    yield rendered_history, gr.update(value="", interactive=True)
+def render_history(history):
+    start_time = time.time()
+    rendered = []
+    for user_msg, bot_msg, image_links in history:
+        text = bot_msg
+        if image_links:
+            images_html = "".join(
+                f"<img src='{url}' class='chat-image' onclick='showImage(\"{url}\")' />"
+                for url in image_links
+            )
+            text += f"<br><br><b>📸 Related Visuals:</b><br><div style='display: flex; flex-wrap: wrap;'>{images_html}</div>"
+        rendered.append((user_msg, text))
+    return rendered
+# ===== IMAGE ANALYSIS FUNCTIONS =====
+def load_image(image_source):
+    """Load image from various sources: file path, URL, or PIL Image"""
+    if isinstance(image_source, str):
+        if image_source.startswith(("http://", "https://")):
+            # Load from URL
+            response = requests.get(image_source)
+            image = Image.open(BytesIO(response.content)).convert("RGB")
+        else:
+            # Load from file path
+            image = Image.open(image_source).convert("RGB")
+    elif isinstance(image_source, Image.Image):
+        # Already a PIL image
+        image = image_source
+    else:
+        raise ValueError("Unsupported image input type")
+    # Convert to OpenVINO tensor
+    image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte)
+    return ov.Tensor(image_data)
+def analyze_image(image, url, prompt):
+    try:
+        # Determine image source (priority: uploaded image > URL)
+        image_source = image if image is not None else url
+        if not image_source:
+            return "⚠️ Please upload an image or enter an image URL"
+        # Convert to OpenVINO tensor
+        image_tensor = load_image(image_source)
+        # Get pipeline (lazy initialization)
+        pipe = get_internvl_pipeline()
+        # Generate response with thread safety
+        with internvl_lock:
+            pipe.start_chat()
+            output = pipe.generate(prompt, image=image_tensor, max_new_tokens=100)
+            pipe.finish_chat()
+        return output
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# ===== GRADIO INTERFACE =====
+css = """
+    .processing {
+        animation: pulse 1.5s infinite;
+        color: #4a5568;
+        padding: 10px;
+        border-radius: 5px;
+        text-align: center;
+        margin: 10px 0;
+    }
+    @keyframes pulse {
+        0%, 100% { opacity: 1; }
+        50% { opacity: 0.5; }
+    }
+    .chat-image {
+        cursor: pointer;
+        transition: transform 0.2s;
+        max-height: 100px;
+        margin: 4px;
+        border-radius: 8px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    }
+    .chat-image:hover {
+        transform: scale(1.05);
+        box-shadow: 0 4px 8px rgba(0,0,0,0.2);
+    }
+    .modal {
+        position: fixed;
+        top: 0;
+        left: 0;
+        width: 100%;
+        height: 100%;
+        background: rgba(0,0,0,0.8);
+        display: none;
+        z-index: 1000;
+        cursor: zoom-out;
+    }
+    .modal-content {
+        position: absolute;
+        top: 50%;
+        left: 50%;
+        transform: translate(-50%, -50%);
+        max-width: 90%;
+        max-height: 90%;
+        background: white;
+        padding: 10px;
+        border-radius: 12px;
+    }
+    .modal-img {
+        width: auto;
+        height: auto;
+        max-width: 100%;
+        max-height: 100%;
+        border-radius: 8px;
+    }
+    .chat-container {
+        border: 1px solid #e5e7eb;
+        border-radius: 12px;
+        padding: 20px;
+        margin-bottom: 20px;
+    }
+    .slider-container {
+        margin-top: 20px;
+        padding: 15px;
+        border-radius: 10px;
+        background-color: #f8f9fa;
+    }
+    .slider-label {
+        font-weight: bold;
+        margin-bottom: 5px;
+    }
+    .system-info {
+        background-color: #7B9BDB;
+        padding: 15px;
+        border-radius: 8px;
+        margin: 15px 0;
+        border-left: 4px solid #1890ff;
+    }
+    .typing-indicator {
+        display: inline-block;
+        position: relative;
+        width: 40px;
+        height: 20px;
+    }
+    .typing-dot {
+        display: inline-block;
+        width: 6px;
+        height: 6px;
+        border-radius: 50%;
+        background-color: #4a5568;
+        position: absolute;
+        animation: typing 1.4s infinite ease-in-out;
+    }
+    .typing-dot:nth-child(1) {
+        left: 0;
+        animation-delay: 0s;
+    }
+    .typing-dot:nth-child(2) {
+        left: 12px;
+        animation-delay: 0.2s;
+    }
+    .typing-dot:nth-child(3) {
+        left: 24px;
+        animation-delay: 0.4s;
+    }
+    @keyframes typing {
+        0%, 60%, 100% { transform: translateY(0); }
+        30% { transform: translateY(-5px); }
+    }
+    .tab-container {
+        border-radius: 12px;
+        padding: 20px;
+        background:#3fc9f8;
+        box-shadow: 0 4px 6px rgba(0,0,0,0.05);
+        margin-bottom: 20px;
+    }
+    .tab-header {
+        font-size: 24px;
+        margin-bottom: 20px;
+        padding-bottom: 10px;
+        border-bottom: 2px solid #e5e7eb;
+    }
+"""
+with gr.Blocks(css=css, title="EDU Chat by Phanindra Reddy K") as demo:
+    gr.Markdown("# 🤖 EDU CHAT BY PHANINDRA REDDY K")
+    # System info banner
+    gr.HTML("""
+    <div class="system-info">
+        <strong>Multi-Modal AI Assistant</strong>
+        <ul>
+            <li>Text & Voice Chat with Mistral-7B</li>
+            <li>Image Understanding with InternVL</li>
+            <li>Optimized for High-RAM Systems</li>
+        </ul>
+    </div>
+    """)
+    modal_html = """
+    <div class="modal" id="imageModal" onclick="this.style.display='none'">
+        <div class="modal-content">
+            <img class="modal-img" id="expandedImg">
+        </div>
+    </div>
+    <script>
+    function showImage(url) {
+        document.getElementById('expandedImg').src = url;
+        document.getElementById('imageModal').style.display = 'block';
+    }
+    </script>
+    """
+    gr.HTML(modal_html)
+    # Create tabs for different functionalities
+    with gr.Tabs():
+        # ===== MAIN CHAT TAB =====
+        with gr.Tab("💬 Chat Assistant", id="chat_tab"):
+            state = gr.State([])
+            with gr.Column(scale=2, elem_classes="chat-container"):
+                chatbot = gr.Chatbot(label="Conversation", height=500, bubble_full_width=False)
+            with gr.Column(scale=1):
+                gr.Markdown("### 💬 Ask Your Question")
+                with gr.Row():
+                    user_input = gr.Textbox(
+                        placeholder="Type your question here...",
+                        label="",
+                        container=False,
+                        elem_id="question-input"
+                    )
+                    include_images = gr.Checkbox(
+                        label="Include Visuals",
+                        value=True,
+                        container=False,
+                        elem_id="image-checkbox"
+                    )
+                # Add the sliders container
+                with gr.Column(elem_classes="slider-container"):
+                    gr.Markdown("### ⚙️ Generation Settings")
+                    with gr.Row():
+                        max_tokens = gr.Slider(
+                            minimum=10,
+                            maximum=MAX_TOKENS_LIMIT,  # Increased to 1000
+                            value=DEFAULT_MAX_TOKENS,
+                            step=10,
+                            label="Response Length (Tokens)",
+                            info=f"Max: {MAX_TOKENS_LIMIT} tokens (for detailed explanations)",
+                            elem_classes="slider-label"
+                        )
+                    # Conditionally visible image slider row
+                    with gr.Row(visible=True) as image_slider_row:
+                        num_images = gr.Slider(
+                            minimum=0,
+                            maximum=5,
+                            value=DEFAULT_NUM_IMAGES,
+                            step=1,
+                            label="Number of Images",
+                            info="Set to 0 to disable images",
+                            elem_classes="slider-label"
+                        )
+                with gr.Row():
+                    submit_btn = gr.Button("Send Text", variant="primary")
+                    mic_btn = gr.Button("Transcribe Voice", variant="secondary")
+                    mic = gr.Audio(
+                        sources=["microphone"],
+                        type="numpy",
+                        label="Voice Input",
+                        show_label=False,
+                        elem_id="voice-input"
+                    )
+                processing = gr.HTML("""
+                    <div id="processing" style="display: none;">
+                        <div class="processing">🔮 Processing your request...</div>
+                    </div>
+                """)
+            # Toggle image slider visibility based on checkbox
+            def toggle_image_slider(include_visuals):
+                return gr.update(visible=include_visuals)
+            include_images.change(
+                fn=toggle_image_slider,
+                inputs=include_images,
+                outputs=image_slider_row
+            )
+            def toggle_processing():
+                return gr.update(visible=True), gr.update(interactive=False)
+            def hide_processing():
+                return gr.update(visible=False), gr.update(interactive=True)
+            # Update the submit_btn click handler to include streaming
+            submit_btn.click(
+                fn=toggle_processing,
+                outputs=[processing, submit_btn]
+            ).then(
+                fn=lambda: (gr.update(visible=True), gr.update(interactive=False)),
+                outputs=[processing, submit_btn]
+            ).then(
+                fn=run_chat,
+                inputs=[user_input, state, include_images, max_tokens, num_images],
+                outputs=[chatbot, user_input]
+            ).then(
+                fn=lambda: (gr.update(visible=False), gr.update(interactive=True)),
+                outputs=[processing, submit_btn]
+            )
+            # Voice transcription
+            mic_btn.click(
+                fn=toggle_processing,
+                outputs=[processing, mic_btn]
+            ).then(
+                fn=transcribe,
+                inputs=mic,
+                outputs=user_input
+            ).then(
+                fn=hide_processing,
+                outputs=[processing, mic_btn]
+            )
+        # ===== IMAGE ANALYSIS TAB =====
+        with gr.Tab("🖼️ Image Analysis", id="image_tab"):
+            with gr.Column(elem_classes="tab-container"):
+                gr.Markdown("## 🖼️ Image Understanding with InternVL")
+                gr.Markdown("Upload an image or enter a URL, then ask questions about it")
+                with gr.Row():
+                    with gr.Column():
+                        # Image upload
+                        image_upload = gr.Image(type="pil", label="Upload Image")
+                        # URL input
+                        url_input = gr.Textbox(
+                            label="OR Enter Image URL",
+                            placeholder="https://example.com/image.jpg",
+                            info="Enter a direct image URL"
+                        )
+                        # Preview image
+                        preview = gr.Image(label="Preview", interactive=False)
+                        # Update preview when inputs change
+                        def update_preview(img, url):
+                            if img is not None:
+                                return img
+                            elif url and url.startswith(("http://", "https://")):
+                                return url
+                            return None
+                        image_upload.change(update_preview, [image_upload, url_input], preview)
+                        url_input.change(update_preview, [image_upload, url_input], preview)
+                    with gr.Column():
+                        # Question input
+                        prompt = gr.Textbox(
+                            label="Question",
+                            placeholder="What is unusual in this image?",
+                            info="Ask anything about the image"
+                        )
+                        # Submit button
+                        img_submit_btn = gr.Button("Ask Question", variant="primary")
+                        # Output
+                        img_output = gr.Textbox(label="Model Response", interactive=False)
+                # Submit action
+                img_submit_btn.click(
+                    fn=analyze_image,
+                    inputs=[image_upload, url_input, prompt],
+                    outputs=img_output
+                )
+if __name__ == "__main__":
+    demo.launch(share=True, debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+gradio==4.26.0
+openvino-genai>=1.0.0
+librosa>=0.10.0
+numpy>=1.24.0
+scipy>=1.10.0
+huggingface_hub>=0.21.4
+google-api-python-client>=2.132.0
+py-cpuinfo>=8.0.0
+requests>=2.32.3
+Pillow>=10.3.0
+soundfile>=0.12.1
+openvino>=2024.1.0
+tqdm>=4.66.2
+protobuf>=4.25.3
+tokenizers>=0.19.1