Nikolay Angelov commited on
Commit
5ff4011
·
1 Parent(s): a65ad52

change model

Browse files
Files changed (1) hide show
  1. app.py +14 -28
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from langchain_huggingface import HuggingFacePipeline
2
  from langchain.agents import AgentExecutor, create_react_agent
3
  from langchain_core.prompts import PromptTemplate
4
  from langchain.memory import ConversationBufferMemory
@@ -19,7 +19,6 @@ from fastapi.middleware.cors import CORSMiddleware
19
  import gradio as gr
20
 
21
  from Gradio_UI import GradioUI
22
- from huggingface_hub import login
23
 
24
  # Initialize FastAPI app
25
  app = FastAPI(title="AI Assistant", description="AI Assistant with LangChain and Gradio")
@@ -35,37 +34,24 @@ app.add_middleware(
35
  # Token authentication for Hugging Face
36
  if not os.getenv('HUGGINGFACEHUB_API_TOKEN'):
37
  raise ValueError("Please set HUGGINGFACEHUB_API_TOKEN environment variable")
38
- login(token=os.getenv('HUGGINGFACEHUB_API_TOKEN'))
39
 
40
- llm = HuggingFacePipeline.from_model_id(
41
- model_id="Qwen/Qwen1.5-7B-Chat",
 
 
 
42
  task="text-generation",
43
- pipeline_kwargs={
44
- "temperature": 0.0,
45
- "max_new_tokens": 1024,
46
- "top_p": 0.95,
47
- "repetition_penalty": 1.1,
48
- "do_sample": True,
49
- "return_full_text": False,
 
50
  }
51
  )
52
 
53
- # Initialize the HuggingFace pipeline
54
- # llm = HuggingFaceEndpoint(
55
- # endpoint_url="https://api-inference.huggingface.co/models/Qwen/Qwen1.5-7B-Chat",
56
- # huggingfacehub_api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN'),
57
- # task="text-generation",
58
- # temperature=0.1,
59
- # max_new_tokens=1024,
60
- # top_p=0.95,
61
- # repetition_penalty=1.1,
62
- # do_sample=True,
63
- # return_full_text=False,
64
- # model_kwargs={
65
- # "stop": ["Human:", "Assistant:", "Observation:"]
66
- # }
67
- # )
68
-
69
  # Load system prompt and template
70
  with open("prompts.yaml", 'r') as stream:
71
  prompt_templates = yaml.safe_load(stream)
 
1
+ from langchain_huggingface import HuggingFaceEndpoint
2
  from langchain.agents import AgentExecutor, create_react_agent
3
  from langchain_core.prompts import PromptTemplate
4
  from langchain.memory import ConversationBufferMemory
 
19
  import gradio as gr
20
 
21
  from Gradio_UI import GradioUI
 
22
 
23
  # Initialize FastAPI app
24
  app = FastAPI(title="AI Assistant", description="AI Assistant with LangChain and Gradio")
 
34
  # Token authentication for Hugging Face
35
  if not os.getenv('HUGGINGFACEHUB_API_TOKEN'):
36
  raise ValueError("Please set HUGGINGFACEHUB_API_TOKEN environment variable")
 
37
 
38
+ # Initialize the HuggingFace pipeline
39
+ llm = HuggingFaceEndpoint(
40
+ repo_id="meta-llama/Llama-3.3-70B-Instruct",
41
+ huggingfacehub_api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN'),
42
+ provider="hf-inference",
43
  task="text-generation",
44
+ temperature=0.1,
45
+ max_new_tokens=1024,
46
+ top_p=0.95,
47
+ repetition_penalty=1.1,
48
+ do_sample=True,
49
+ return_full_text=False,
50
+ model_kwargs={
51
+ "stop": ["Human:", "Assistant:", "Observation:"]
52
  }
53
  )
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  # Load system prompt and template
56
  with open("prompts.yaml", 'r') as stream:
57
  prompt_templates = yaml.safe_load(stream)