Spaces:
Sleeping
Sleeping
File size: 2,135 Bytes
c1fc690 9b744c5 c1fc690 9b744c5 c1fc690 18ec458 9b744c5 c1fc690 9b744c5 c1fc690 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import json
with open("issues_dict.json", "r") as f:
issues = json.load(f)
topic_maintainers_map ={
"text models": ["@ArthurZucker", "@younesbelkada"],
"vision models": "@amyeroberts",
"speech models": "@sanchit-gandhi",
"graph models": "@clefourrier",
"flax": "@sanchit-gandhi",
"generate": "@gante",
"pipelines": "@Narsil",
"tensorflow": ["@gante", "@Rocketknight1"],
"tokenizers": "@ArthurZucker",
"trainer": ["@muellerzr", "@pacman100"],
"deepspeed": "@pacman100",
"ray/raytune": ["@richardliaw", "@amogkam"],
"Big Model Inference": "@SunMarc",
"quantization (bitsandbytes, autogpt)": ["@SunMarc", "@younesbelkada"],
"Documentation": ["@stevhliu", "@MKhalusova"],
"accelerate": "different repo",
"datasets": "different repo",
"diffusers": "different repo",
"rust tokenizers": "different repo",
"Flax examples": "@sanchit-gandhi",
"PyTorch vision examples": "@amyeroberts",
"PyTorch text examples": "@ArthurZucker",
"PyTorch speech examples": "@sanchit-gandhi",
"PyTorch generate examples": "@gante",
"TensorFlow": "@Rocketknight1",
"Research projects and examples": "not maintained",
}
issue_no = 2781
issue = issues[str(issue_no)]
from transformers import AutoTokenizer, LlamaForCausalLM
model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
# prompt = f"Which of the following topics {list(topic_maintainers_map.keys())} is this issue about:\n{issue['body']}"
prompt = f"QUESTION: What is the provided issue about? Pick up to 3 topics from the following list: {list(topic_maintainers_map.keys())} \nISSUE START:\n{issue['body']} \n ISSUE END. \n ANSWER:"
inputs = tokenizer(prompt, return_tensors="pt")
prefix_len = inputs.input_ids.shape[1]
# Generate
generate_ids = model.generate(inputs.input_ids, max_length=30 + prefix_len)
outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
print(outputs[prefix_len:])
print("TITLE", issue["number"] + " " + issue["title"])
|