xsa-dev commited on
Commit
0d9e7b2
·
1 Parent(s): a4bb0fb

init llama 6

Browse files
Files changed (3) hide show
  1. README.md +6 -5
  2. app.py +58 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- title: Llama-2-7b-chat-ggmlv3-q6 K
3
- emoji: 😻
4
- colorFrom: green
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.40.1
8
  app_file: app.py
9
  pinned: false
10
- license: llama2
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: Llama2
3
+ emoji: 👀
4
+ colorFrom: purple
5
+ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.40.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+
14
+ XSA.
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import copy
3
+ from llama_cpp import Llama
4
+ from huggingface_hub import hf_hub_download # load from huggingfaces
5
+
6
+
7
+ CONST_REPO_ID = "TheBloke/Llama-2-7B-Chat-GGML"
8
+ CONST_FILENAME = "llama-2-7b-chat.ggmlv3.q6_K.bin"
9
+
10
+ N_CTX = 4096
11
+
12
+ llm = Llama(model_path=hf_hub_download(
13
+ repo_id=CONST_REPO_ID,
14
+ filename=CONST_FILENAME),
15
+ n_ctx=N_CTX
16
+ )
17
+ history = N_CTX
18
+
19
+
20
+ pre_prompt = \
21
+ " The user and the AI are having a conversation : <|endoftext|> \n"
22
+
23
+
24
+ def generate_text(input_text, history):
25
+ temp = ""
26
+ if history == []:
27
+ input_text_with_history = f"SYSTEM:{pre_prompt}" + \
28
+ "\n" + f"USER: {input_text} " + "\n" + " ASSISTANT:"
29
+ else:
30
+ input_text_with_history = f"{history[-1][1]}" + "\n"
31
+ input_text_with_history += f"USER: {input_text}" + "\n" + " ASSISTANT:"
32
+ output = llm(input_text_with_history, max_tokens=4096, stop=[
33
+ "<|prompter|>", "<|endoftext|>", "<|endoftext|> \n",
34
+ "ASSISTANT:", "USER:", "SYSTEM:"], stream=True
35
+ )
36
+ for out in output:
37
+ stream = copy.deepcopy(out)
38
+ temp += stream["choices"][0]["text"]
39
+ yield temp
40
+
41
+ history = ["init", input_text_with_history]
42
+
43
+
44
+ demo = gr.ChatInterface(generate_text,
45
+ title=f"Lama2 on CPU: {CONST_FILENAME}",
46
+ description=f"Running Llama2 with llama_cpp: \
47
+ \r\n<i>{CONST_REPO_ID} {CONST_FILENAME}</i>",
48
+ examples=["Hi!",
49
+ "Does it hard to be machine?",
50
+ "When i am need a doctor?",
51
+ "Ты говоришь по русски? Я злой."
52
+ ],
53
+ cache_examples=True,
54
+ undo_btn="Undo",
55
+ clear_btn="Clear")
56
+
57
+ demo.queue(concurrency_count=10, max_size=50)
58
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ llama-cpp-python
2
+ huggingface_hub
3
+ gradio_client