Spaces:
Sleeping
Sleeping
import json | |
with open("issues_dict.json", "r") as f: | |
issues = json.load(f) | |
topic_maintainers_map ={ | |
"text models": ["@ArthurZucker", "@younesbelkada"], | |
"vision models": "@amyeroberts", | |
"speech models": "@sanchit-gandhi", | |
"graph models": "@clefourrier", | |
"flax": "@sanchit-gandhi", | |
"generate": "@gante", | |
"pipelines": "@Narsil", | |
"tensorflow": ["@gante", "@Rocketknight1"], | |
"tokenizers": "@ArthurZucker", | |
"trainer": ["@muellerzr", "@pacman100"], | |
"deepspeed": "@pacman100", | |
"ray/raytune": ["@richardliaw", "@amogkam"], | |
"Big Model Inference": "@SunMarc", | |
"quantization (bitsandbytes, autogpt)": ["@SunMarc", "@younesbelkada"], | |
"Documentation": ["@stevhliu", "@MKhalusova"], | |
"accelerate": "different repo", | |
"datasets": "different repo", | |
"diffusers": "different repo", | |
"rust tokenizers": "different repo", | |
"Flax examples": "@sanchit-gandhi", | |
"PyTorch vision examples": "@amyeroberts", | |
"PyTorch text examples": "@ArthurZucker", | |
"PyTorch speech examples": "@sanchit-gandhi", | |
"PyTorch generate examples": "@gante", | |
"TensorFlow": "@Rocketknight1", | |
"Research projects and examples": "not maintained", | |
} | |
issue_no = 2781 | |
issue = issues[str(issue_no)] | |
from transformers import AutoTokenizer, LlamaForCausalLM | |
model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf") | |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf") | |
# prompt = f"Which of the following topics {list(topic_maintainers_map.keys())} is this issue about:\n{issue['body']}" | |
prompt = f"QUESTION: What is the provided issue about? Pick up to 3 topics from the following list: {list(topic_maintainers_map.keys())} \nISSUE START:\n{issue['body']} \n ISSUE END. \n ANSWER:" | |
inputs = tokenizer(prompt, return_tensors="pt") | |
prefix_len = inputs.input_ids.shape[1] | |
# Generate | |
generate_ids = model.generate(inputs.input_ids, max_length=30 + prefix_len) | |
outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | |
print(outputs[prefix_len:]) | |
print("TITLE", issue["number"] + " " + issue["title"]) | |