LPX55 commited on
Commit
86d0b72
·
verified ·
1 Parent(s): 2e71e83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -5
app.py CHANGED
@@ -1,10 +1,22 @@
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
8
 
9
 
10
  def respond(
@@ -15,6 +27,9 @@ def respond(
15
  temperature,
16
  top_p,
17
  ):
 
 
 
18
  messages = [{"role": "system", "content": system_message}]
19
 
20
  for val in history:
 
1
+ import torch
2
+ import os
3
  import gradio as gr
4
+ from huggingface_hub import InferenceClient, client
5
+ # Use a pipeline as a high-level helper
6
+ from transformers import BitsAndBytesConfig
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
9
 
10
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
11
+ model_4bit = AutoModelForCausalLM.from_pretrained(
12
+ "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit",
13
+ quantization_config=quantization_config,
14
+ torch_dtype="auto"
15
+ )
16
+ # pipe = pipeline("image-text-to-text", model="")
17
+ # pipe(messages)
18
+
19
+ client = client(model_4bit)
20
 
21
 
22
  def respond(
 
27
  temperature,
28
  top_p,
29
  ):
30
+ messages = [
31
+ {"role": "user", "content": "Who are you?"},
32
+ ]
33
  messages = [{"role": "system", "content": system_message}]
34
 
35
  for val in history: