Spaces:
Runtime error
Runtime error
Nikolay Angelov
commited on
Commit
·
5ff4011
1
Parent(s):
a65ad52
change model
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from langchain_huggingface import
|
2 |
from langchain.agents import AgentExecutor, create_react_agent
|
3 |
from langchain_core.prompts import PromptTemplate
|
4 |
from langchain.memory import ConversationBufferMemory
|
@@ -19,7 +19,6 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
19 |
import gradio as gr
|
20 |
|
21 |
from Gradio_UI import GradioUI
|
22 |
-
from huggingface_hub import login
|
23 |
|
24 |
# Initialize FastAPI app
|
25 |
app = FastAPI(title="AI Assistant", description="AI Assistant with LangChain and Gradio")
|
@@ -35,37 +34,24 @@ app.add_middleware(
|
|
35 |
# Token authentication for Hugging Face
|
36 |
if not os.getenv('HUGGINGFACEHUB_API_TOKEN'):
|
37 |
raise ValueError("Please set HUGGINGFACEHUB_API_TOKEN environment variable")
|
38 |
-
login(token=os.getenv('HUGGINGFACEHUB_API_TOKEN'))
|
39 |
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
42 |
task="text-generation",
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
50 |
}
|
51 |
)
|
52 |
|
53 |
-
# Initialize the HuggingFace pipeline
|
54 |
-
# llm = HuggingFaceEndpoint(
|
55 |
-
# endpoint_url="https://api-inference.huggingface.co/models/Qwen/Qwen1.5-7B-Chat",
|
56 |
-
# huggingfacehub_api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN'),
|
57 |
-
# task="text-generation",
|
58 |
-
# temperature=0.1,
|
59 |
-
# max_new_tokens=1024,
|
60 |
-
# top_p=0.95,
|
61 |
-
# repetition_penalty=1.1,
|
62 |
-
# do_sample=True,
|
63 |
-
# return_full_text=False,
|
64 |
-
# model_kwargs={
|
65 |
-
# "stop": ["Human:", "Assistant:", "Observation:"]
|
66 |
-
# }
|
67 |
-
# )
|
68 |
-
|
69 |
# Load system prompt and template
|
70 |
with open("prompts.yaml", 'r') as stream:
|
71 |
prompt_templates = yaml.safe_load(stream)
|
|
|
1 |
+
from langchain_huggingface import HuggingFaceEndpoint
|
2 |
from langchain.agents import AgentExecutor, create_react_agent
|
3 |
from langchain_core.prompts import PromptTemplate
|
4 |
from langchain.memory import ConversationBufferMemory
|
|
|
19 |
import gradio as gr
|
20 |
|
21 |
from Gradio_UI import GradioUI
|
|
|
22 |
|
23 |
# Initialize FastAPI app
|
24 |
app = FastAPI(title="AI Assistant", description="AI Assistant with LangChain and Gradio")
|
|
|
34 |
# Token authentication for Hugging Face
|
35 |
if not os.getenv('HUGGINGFACEHUB_API_TOKEN'):
|
36 |
raise ValueError("Please set HUGGINGFACEHUB_API_TOKEN environment variable")
|
|
|
37 |
|
38 |
+
# Initialize the HuggingFace pipeline
|
39 |
+
llm = HuggingFaceEndpoint(
|
40 |
+
repo_id="meta-llama/Llama-3.3-70B-Instruct",
|
41 |
+
huggingfacehub_api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN'),
|
42 |
+
provider="hf-inference",
|
43 |
task="text-generation",
|
44 |
+
temperature=0.1,
|
45 |
+
max_new_tokens=1024,
|
46 |
+
top_p=0.95,
|
47 |
+
repetition_penalty=1.1,
|
48 |
+
do_sample=True,
|
49 |
+
return_full_text=False,
|
50 |
+
model_kwargs={
|
51 |
+
"stop": ["Human:", "Assistant:", "Observation:"]
|
52 |
}
|
53 |
)
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
# Load system prompt and template
|
56 |
with open("prompts.yaml", 'r') as stream:
|
57 |
prompt_templates = yaml.safe_load(stream)
|