Alimubariz124 commited on
Commit
61e33cd
·
verified ·
1 Parent(s): 37f30a1

Create transcript_handler.py

Browse files
Files changed (1) hide show
  1. transcript_handler.py +19 -0
transcript_handler.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import numpy as np
3
+
4
+ def chunk_text(text, chunk_size=300, overlap=50):
5
+ words = text.split()
6
+ chunks = []
7
+ for i in range(0, len(words), chunk_size - overlap):
8
+ chunk = ' '.join(words[i:i+chunk_size])
9
+ chunks.append(chunk)
10
+ return chunks
11
+
12
+ def embed_chunks(chunks, embedder):
13
+ embeddings = embedder.encode(chunks)
14
+ return np.array(embeddings), chunks
15
+
16
+ def create_faiss_index(embeddings):
17
+ index = faiss.IndexFlatL2(embeddings.shape[1])
18
+ index.add(embeddings)
19
+ return index