Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, File, UploadFile | |
| from pydantic import BaseModel | |
| from pathlib import Path | |
| from fastapi import Form | |
| from fastapi.responses import JSONResponse | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from PyPDF2 import PdfReader | |
| from fastapi import Depends | |
| app = FastAPI() | |
| class FileToProcess(BaseModel): | |
| uploaded_file: UploadFile = File(...) | |
| async def home(): | |
| return "API Working!" | |
| async def upload_file(username: str, file_to_process: FileToProcess = Depends()): | |
| uploaded_file = file_to_process.uploaded_file | |
| path_to_save_file = Path.home() / username / "saved_files" | |
| path_to_save_file.mkdir(parents=True, exist_ok=True) | |
| file_location = f"{path_to_save_file}/{uploaded_file.filename}" | |
| with open(file_location, "wb+") as file_object: | |
| file_object.write(uploaded_file.file.read()) | |
| # 下面是你要处理的代码 | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| #separator = "\n", | |
| chunk_size = 500, | |
| chunk_overlap = 100, #striding over the text | |
| length_function = len, | |
| ) | |
| doc_reader = PdfReader(file_location) | |
| raw_text = '' | |
| for i, page in enumerate(doc_reader.pages): | |
| text = page.extract_text() | |
| if text: | |
| raw_text += text | |
| temp_texts = text_splitter.split_text(raw_text) | |
| print(temp_texts) | |
| return {"INFO": f"File '{uploaded_file.filename}' saved to your profile."} |