Spaces:

philschmid
/

igel-playground

Paused

philschmid commited on Apr 4, 2023

Commit

8c68191

1 Parent(s): 2dd9b98

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,9 +24,13 @@ if HF_TOKEN:
 # Load peft config for pre-trained checkpoint etc.
 torch_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 model_id = "philschmid/instruct-igel-001"
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 prompt_template = f"""### Anweisung:
@@ -65,7 +69,7 @@ def generate(instruction, temperature, max_new_tokens, top_p, length_penalty):
     streamer = IteratorStreamer(tokenizer)
     model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048)
     # move to gpu
-    model_inputs = {k: v.cuda() for k, v in model_inputs.items()}
     generate_kwargs = dict(
         top_p=top_p,
@@ -186,4 +190,4 @@ with gr.Blocks(theme=theme) as demo:
     )
 demo.queue()
-demo.launch(share=True)

 # Load peft config for pre-trained checkpoint etc.
+device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 model_id = "philschmid/instruct-igel-001"
+if device == "cpu":
+    model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True)
+else:
+    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 prompt_template = f"""### Anweisung:
     streamer = IteratorStreamer(tokenizer)
     model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048)
     # move to gpu
+    model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
     generate_kwargs = dict(
         top_p=top_p,
     )
 demo.queue()
+demo.launch()