Alimubariz124 commited on
Commit
564aac1
·
verified ·
1 Parent(s): e685ba4

Update transcript_handler.py

Browse files
Files changed (1) hide show
  1. transcript_handler.py +8 -2
transcript_handler.py CHANGED
@@ -2,18 +2,24 @@ import faiss
2
  import numpy as np
3
 
4
  def chunk_text(text, chunk_size=300, overlap=50):
 
 
 
5
  words = text.split()
6
  chunks = []
7
  for i in range(0, len(words), chunk_size - overlap):
8
- chunk = ' '.join(words[i:i+chunk_size])
9
  chunks.append(chunk)
10
  return chunks
11
 
12
  def embed_chunks(chunks, embedder):
 
13
  embeddings = embedder.encode(chunks)
14
  return np.array(embeddings), chunks
15
 
16
  def create_faiss_index(embeddings):
 
17
  index = faiss.IndexFlatL2(embeddings.shape[1])
18
  index.add(embeddings)
19
- return index
 
 
2
  import numpy as np
3
 
4
  def chunk_text(text, chunk_size=300, overlap=50):
5
+ if not text.strip():
6
+ raise ValueError("Transcript is empty.")
7
+
8
  words = text.split()
9
  chunks = []
10
  for i in range(0, len(words), chunk_size - overlap):
11
+ chunk = ' '.join(words[i:i + chunk_size])
12
  chunks.append(chunk)
13
  return chunks
14
 
15
  def embed_chunks(chunks, embedder):
16
+ print(f"Embedding {len(chunks)} chunks...")
17
  embeddings = embedder.encode(chunks)
18
  return np.array(embeddings), chunks
19
 
20
  def create_faiss_index(embeddings):
21
+ print(f"Creating FAISS index with {embeddings.shape[0]} embeddings...")
22
  index = faiss.IndexFlatL2(embeddings.shape[1])
23
  index.add(embeddings)
24
+ print("FAISS index created successfully.")
25
+ return index