Spaces:

Alimubariz124
/

chat-with-data

Sleeping

App Files Files Community

Alimubariz124 commited on Apr 24

Commit

c1f6043

verified ·

1 Parent(s): 3b5783e

Create app.py

Browse files

Files changed (1) hide show

app.py +57 -59

app.py CHANGED Viewed

@@ -1,73 +1,71 @@
 import gradio as gr
-from model_loader import load_embedding_model, load_llm
-from transcript_handler import chunk_text, embed_chunks, create_faiss_index
-from qa_engine import query_faiss, build_prompt
 # Load models
-embedder = load_embedding_model()
-llm = load_llm()
-# Main Gradio app
-with gr.Blocks() as demo:
-    gr.Markdown("# 📄 Chat with a Transcript (Open Source + Free!)")
-    # State variables for storing index and chunks per session
-    index_state = gr.State(None)
-    chunks_state = gr.State([])
-    transcript_input = gr.File(label="Upload Transcript (.txt)")
-    upload_button = gr.Button("Upload and Process")
-    query_input = gr.Textbox(label="Ask a question about the transcript")
-    answer_output = gr.Textbox(label="Answer")
-    def upload_transcript(file, chunks_state):
-        try:
-            # Read and decode the file
-            text = file.read().decode("utf-8")
-            if not text.strip():
-                return "Error: Uploaded file is empty.", None, []
-            # Preprocess the transcript
-            text = preprocess_transcript(text)
-            if not text.strip():
-                return "Error: Transcript is empty after preprocessing.", None, []
-            # Chunk the text
-            chunks = chunk_text(text)
-            if not chunks:
-                return "Error: No chunks generated from the transcript.", None, []
-            # Generate embeddings
-            embeddings, chunks = embed_chunks(chunks, embedder)
-            if embeddings.size == 0:
-                return "Error: Failed to generate embeddings.", None, []
-            # Create FAISS index
-            index = create_faiss_index(embeddings)
-            return "Transcript uploaded and indexed successfully!", index, chunks
-        except Exception as e:
-            return f"Error processing transcript: {str(e)}", None, []
-    def chat_with_transcript(query, index_state, chunks_state):
-        if index_state is None:
-            return "Please upload a transcript first."
-        context = query_faiss(query, index_state, embedder, chunks_state)
-        prompt = build_prompt(context, query)
-        response = llm(prompt)[0]['generated_text']
-        if "Answer:" not in response:
-            return "Error: Unable to parse the model's response."
-        return response.split("Answer:")[-1].strip()
-    upload_button.click(
-        upload_transcript,
-        inputs=[transcript_input, chunks_state],
-        outputs=[answer_output, index_state, chunks_state]
-    )
     query_input.submit(
         chat_with_transcript,
-        inputs=[query_input, index_state, chunks_state],
         outputs=[answer_output]
     )

 import gradio as gr
+from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+import faiss
+import numpy as np
 # Load models
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+model_name = "google/flan-t5-base"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+llm = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200)
+# Hardcoded transcript (5-10 lines)
+transcript = """
+The meeting started at 10 AM. The team discussed the new project timeline.
+John mentioned that the deadline is tight but achievable. Sarah suggested adding more resources.
+The team agreed to meet again tomorrow to finalize the plan.
+"""
+# Preprocess and chunk the transcript
+def preprocess_transcript(text):
+    return ' '.join(text.split())  # Remove extra whitespace
+def chunk_text(text, chunk_size=300, overlap=50):
+    words = text.split()
+    chunks = []
+    for i in range(0, len(words), chunk_size - overlap):
+        chunk = ' '.join(words[i:i + chunk_size])
+        chunks.append(chunk)
+    return chunks
+chunks = chunk_text(preprocess_transcript(transcript))
+# Generate embeddings and create FAISS index
+embeddings = embedder.encode(chunks)
+index = faiss.IndexFlatL2(embeddings.shape[1])
+index.add(np.array(embeddings))
+# Query the FAISS index
+def query_faiss(query, index, embedder, chunks, top_k=2):
+    query_vector = embedder.encode([query])
+    D, I = index.search(np.array(query_vector), top_k)
+    retrieved_chunks = [chunks[i] for i in I[0]]
+    return "\n\n".join(retrieved_chunks)
+# Build prompt and generate answer
+def chat_with_transcript(query):
+    context = query_faiss(query, index, embedder, chunks)
+    prompt = f"""You are an AI assistant. Use the following context to answer the question.
+Context:
+{context}
+Question: {query}
+Answer:"""
+    response = llm(prompt)[0]['generated_text']
+    if "Answer:" not in response:
+        return "Error: Unable to parse the model's response."
+    return response.split("Answer:")[-1].strip()
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# 📄 Chat with a Transcript")
+    query_input = gr.Textbox(label="Ask a question about the transcript")
+    answer_output = gr.Textbox(label="Answer")
     query_input.submit(
         chat_with_transcript,
+        inputs=[query_input],
         outputs=[answer_output]
     )