Spaces:

amyeroberts
/

transformers-github-bot

Sleeping

Amy Roberts

Tidy up

18ec458 over 1 year ago

2.14 kB


	import json

	with open("issues_dict.json", "r") as f:
	issues = json.load(f)

	topic_maintainers_map ={
	"text models": ["@ArthurZucker", "@younesbelkada"],
	"vision models": "@amyeroberts",
	"speech models": "@sanchit-gandhi",
	"graph models": "@clefourrier",
	"flax": "@sanchit-gandhi",
	"generate": "@gante",
	"pipelines": "@Narsil",
	"tensorflow": ["@gante", "@Rocketknight1"],
	"tokenizers": "@ArthurZucker",
	"trainer": ["@muellerzr", "@pacman100"],
	"deepspeed": "@pacman100",
	"ray/raytune": ["@richardliaw", "@amogkam"],
	"Big Model Inference": "@SunMarc",
	"quantization (bitsandbytes, autogpt)": ["@SunMarc", "@younesbelkada"],
	"Documentation": ["@stevhliu", "@MKhalusova"],
	"accelerate": "different repo",
	"datasets": "different repo",
	"diffusers": "different repo",
	"rust tokenizers": "different repo",
	"Flax examples": "@sanchit-gandhi",
	"PyTorch vision examples": "@amyeroberts",
	"PyTorch text examples": "@ArthurZucker",
	"PyTorch speech examples": "@sanchit-gandhi",
	"PyTorch generate examples": "@gante",
	"TensorFlow": "@Rocketknight1",
	"Research projects and examples": "not maintained",
	}


	issue_no = 2781
	issue = issues[str(issue_no)]


	from transformers import AutoTokenizer, LlamaForCausalLM

	model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

	# prompt = f"Which of the following topics {list(topic_maintainers_map.keys())} is this issue about:\n{issue['body']}"
	prompt = f"QUESTION: What is the provided issue about? Pick up to 3 topics from the following list: {list(topic_maintainers_map.keys())} \nISSUE START:\n{issue['body']} \n ISSUE END. \n ANSWER:"
	inputs = tokenizer(prompt, return_tensors="pt")

	prefix_len = inputs.input_ids.shape[1]

	# Generate
	generate_ids = model.generate(inputs.input_ids, max_length=30 + prefix_len)
	outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
	print(outputs[prefix_len:])

	print("TITLE", issue["number"] + " " + issue["title"])