Spaces:

xsa-dev
/

llama-2-7b-chat-ggmlv3-q6_K

Runtime error

App Files Files Community

xsa-dev commited on Aug 14, 2023

Commit

0d9e7b2

1 Parent(s): a4bb0fb

init llama 6

Browse files

Files changed (3) hide show

README.md +6 -5
app.py +58 -0
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: Llama-2-7b-chat-ggmlv3-q6 K
-emoji: 😻
-colorFrom: green
-colorTo: yellow
 sdk: gradio
 sdk_version: 3.40.1
 app_file: app.py
 pinned: false
-license: llama2
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Llama2
+emoji: 👀
+colorFrom: purple
+colorTo: indigo
 sdk: gradio
 sdk_version: 3.40.1
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+XSA.

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import gradio as gr
+import copy
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download  # load from huggingfaces
+CONST_REPO_ID = "TheBloke/Llama-2-7B-Chat-GGML"
+CONST_FILENAME = "llama-2-7b-chat.ggmlv3.q6_K.bin"
+N_CTX = 4096
+llm = Llama(model_path=hf_hub_download(
+    repo_id=CONST_REPO_ID,
+    filename=CONST_FILENAME),
+    n_ctx=N_CTX
+)
+history = N_CTX
+pre_prompt = \
+    " The user and the AI are having a conversation : <|endoftext|> \n"
+def generate_text(input_text, history):
+    temp = ""
+    if history == []:
+        input_text_with_history = f"SYSTEM:{pre_prompt}" + \
+            "\n" + f"USER: {input_text} " + "\n" + " ASSISTANT:"
+    else:
+        input_text_with_history = f"{history[-1][1]}" + "\n"
+        input_text_with_history += f"USER: {input_text}" + "\n" + " ASSISTANT:"
+    output = llm(input_text_with_history, max_tokens=4096, stop=[
+        "<|prompter|>", "<|endoftext|>", "<|endoftext|> \n",
+        "ASSISTANT:", "USER:", "SYSTEM:"], stream=True
+    )
+    for out in output:
+        stream = copy.deepcopy(out)
+        temp += stream["choices"][0]["text"]
+        yield temp
+    history = ["init", input_text_with_history]
+demo = gr.ChatInterface(generate_text,
+                        title=f"Lama2 on CPU: {CONST_FILENAME}",
+                        description=f"Running Llama2 with llama_cpp: \
+                               \r\n<i>{CONST_REPO_ID} {CONST_FILENAME}</i>",
+                        examples=["Hi!",
+                                  "Does it hard to be machine?",
+                                  "When i am need a doctor?",
+                                  "Ты говоришь по русски? Я злой."
+                                  ],
+                        cache_examples=True,
+                        undo_btn="Undo",
+                        clear_btn="Clear")
+demo.queue(concurrency_count=10, max_size=50)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+llama-cpp-python
+huggingface_hub
+gradio_client