|
import gradio as gr |
|
from peft import AutoPeftModelForCausalLM |
|
from transformers import AutoTokenizer, GPTQConfig, GenerationConfig |
|
from peft import AutoPeftModelForCausalLM |
|
from transformers import GenerationConfig |
|
from transformers import AutoTokenizer, GPTQConfig |
|
import torch |
|
|
|
gptq_config = GPTQConfig(bits=4, disable_exllama=True) |
|
model = AutoPeftModelForCausalLM.from_pretrained( |
|
"Aneeth/zephyr_10k", |
|
return_dict=True, |
|
torch_dtype=torch.float32, |
|
trust_remote_code=True, |
|
quantization_config=gptq_config |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("Aneeth/zephyr_10k") |
|
generation_config = GenerationConfig( |
|
do_sample=True, |
|
top_k=1, |
|
temperature=0.5, |
|
max_new_tokens=5000, |
|
pad_token_id=tokenizer.eos_token_id, |
|
) |
|
|
|
def process_data_sample(example): |
|
processed_example = "\n Generate an authentic job description using the given input.\n\n" + example["instruction"] + "\n\n" |
|
return processed_example |
|
|
|
def generate_text(prompt): |
|
inp_str = process_data_sample({"instruction": prompt}) |
|
inputs = tokenizer(inp_str, return_tensors="pt").to("cpu") |
|
outputs = model.generate(**inputs, generation_config=generation_config) |
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return response |
|
|
|
iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", live=True) |
|
iface.launch() |
|
|