Spaces:

Alimubariz124
/

chat-with-data

Sleeping

App Files Files Community

chat-with-data / app.py

Alimubariz124

Update app.py

84176fd verified 3 months ago

raw

history blame contribute delete

2.43 kB

	import gradio as gr
	from sentence_transformers import SentenceTransformer
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	import faiss
	import numpy as np

	# Load models
	embedder = SentenceTransformer("all-MiniLM-L6-v2")
	model_name = "google/flan-t5-base"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
	llm = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200)

	# Hardcoded transcript (5-10 lines)
	transcript = """
	The meeting started at 10 AM. The team discussed the new project timeline.
	John mentioned that the deadline is tight but achievable. Sarah suggested adding more resources.
	The team agreed to meet again tomorrow to finalize the plan.
	"""

	# Preprocess and chunk the transcript
	def preprocess_transcript(text):
	return ' '.join(text.split()) # Remove extra whitespace

	def chunk_text(text, chunk_size=300, overlap=50):
	words = text.split()
	chunks = []
	for i in range(0, len(words), chunk_size - overlap):
	chunk = ' '.join(words[i:i + chunk_size])
	chunks.append(chunk)
	return chunks

	chunks = chunk_text(preprocess_transcript(transcript))

	# Generate embeddings and create FAISS index
	embeddings = embedder.encode(chunks)
	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(np.array(embeddings))

	# Query the FAISS index
	def query_faiss(query, index, embedder, chunks, top_k=2):
	query_vector = embedder.encode([query])
	D, I = index.search(np.array(query_vector), top_k)
	retrieved_chunks = [chunks[i] for i in I[0]]
	return "\n\n".join(retrieved_chunks)

	# Build prompt and generate answer
	def chat_with_transcript(query):
	context = query_faiss(query, index, embedder, chunks)
	prompt = f"""You are an AI assistant. Use the following context to answer the question.
	Context:
	{context}
	Question: {query}
	Provide your answer below:
	"""
	response = llm(prompt)[0]['generated_text']
	print("Raw model response:", response) # Debug statement
	return response.strip()

	# Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# 📄 Chat with a Transcript")
	query_input = gr.Textbox(label="Ask a question about the transcript")
	answer_output = gr.Textbox(label="Answer")

	query_input.submit(
	chat_with_transcript,
	inputs=[query_input],
	outputs=[answer_output]
	)

	demo.launch()