""" Module which updates any of the issues to reflect changes in the issue state """ import json import datetime from defaults import TOKEN, OWNER, REPO GITHUB_API_VERSION = "2022-11-28" # Get the issues that have been updated since the last update import json import argparse import requests import os import numpy as np import json import datetime import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") OWNER = "huggingface" REPO = "transformers" GITHUB_API_VERSION = "2022-11-28" TOKEN = os.environ.get("GITHUB_TOKEN") JSON_FILE = f"issues.json" def get_issues( input_filename=JSON_FILE, output_filename=JSON_FILE, github_api_version=GITHUB_API_VERSION, owner=OWNER, repo=REPO, token=TOKEN, n_pages=-1, ): """ Function to get the issues from the transformers repo and save them to a json file """ with open("issues_dict.json", "r") as f: issues = json.load(f) # Get most recent updated at information updated_at = [issue["updated_at"] for issue in issues.values()] most_recent = max(updated_at) # If file exists and we want to overwrite it, delete it if not os.path.exists(output_filename): raise ValueError(f"File {output_filename} does not exist") # Define the URL and headers url = f"https://api.github.com/repos/{owner}/{repo}/issues" headers = { "Accept": "application/vnd.github+json", f"Authorization": f"{token}", "X-GitHub-Api-Version": f"{github_api_version}", "User-Agent": "amyeroberts", } per_page = 100 page = 1 query_params = { "state": "all", "since": "2024-02-01T11:33:35Z", # "since": most_recent, "sort": "created", "direction": "asc", "page": page, } new_lines = [] page_limit = (n_pages + page) if n_pages > 0 else np.inf while True: if page >= page_limit: break # Send the GET request response = requests.get(url, headers=headers, params=query_params) if not response.status_code == 200: raise ValueError( f"Request failed with status code {response.status_code} and message {response.text}" ) json_response = response.json() logger.info(f"Page: {page}, number of issues: {len(json_response)}") # If we get an empty response, we've reached the end of the issues if len(json_response) == 0: break new_lines.extend(json_response) # If we get less than the number of issues per page, we've reached the end of the issues if len(json_response) < per_page: break page += 1 query_params["page"] = page issue_lines_map = {issue["number"]: issue for issue in new_lines} with open(input_filename, "r") as f: with open("tmp_" + output_filename, "a") as g: for line in f: issue = json.loads(line) number = issue["number"] if number in issue_lines_map: g.write(json.dumps(issue_lines_map[number])) g.write("\n") else: g.write(line) os.rename("tmp_" + output_filename, output_filename) with open("updated_issues.json", "w") as f: json.dump(issue_lines_map, f, indent=4, sort_keys=True) return output_filename if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--input_filename", type=str, default=JSON_FILE) parser.add_argument("--output_filename", type=str, default=JSON_FILE) parser.add_argument("--github_api_version", type=str, default=GITHUB_API_VERSION) parser.add_argument("--owner", type=str, default=OWNER) parser.add_argument("--repo", type=str, default=REPO) parser.add_argument("--token", type=str, default=TOKEN) parser.add_argument("--n_pages", type=int, default=-1) args = parser.parse_args() get_issues(**vars(args))