Spaces:
Sleeping
Sleeping
File size: 4,806 Bytes
9b744c5 12ae336 9b744c5 b42fea9 9b744c5 b42fea9 9b744c5 7d5704e 9b744c5 b42fea9 9b744c5 b42fea9 9b744c5 18ec458 12ae336 9b744c5 b42fea9 9b744c5 c1fc690 9b744c5 c1fc690 9b744c5 c1fc690 9b744c5 c1fc690 9b744c5 c1fc690 9b744c5 18ec458 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
"""
Module which updates any of the issues to reflect changes in the issue state e.g. new comments
The module can be run from the command line using the following arguments:
--input_filename: The name of the input file containing the issues
--output_filename: The name of the output file to save the updated issues
--github_api_version: The version of the GitHub API to use
--owner: The owner of the repo
--repo: The name of the repo
--token: The GitHub token to use
--n_pages: The number of pages to fetch. Useful for testing
"""
import argparse
import json
import logging
import os
import numpy as np
import requests
from .defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
JSON_FILE = "issues.json"
def update_issues(
input_filename=ISSUE_JSON_FILE,
output_filename=ISSUE_JSON_FILE,
github_api_version=GITHUB_API_VERSION,
owner=OWNER,
repo=REPO,
token=TOKEN,
n_pages=-1,
):
"""
Function to get the issues from the transformers repo and save them to a json file
"""
with open("issues_dict.json", "r") as f:
issues = json.load(f)
# Get most recent updated at information
updated_at = [issue["updated_at"] for issue in issues.values()]
most_recent = max(updated_at)
# If file exists and we want to overwrite it, delete it
if not os.path.exists(output_filename):
raise ValueError(f"File {output_filename} does not exist")
# Define the URL and headers
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
headers = {
"Accept": "application/vnd.github+json",
"Authorization": f"{token}",
"X-GitHub-Api-Version": f"{github_api_version}",
"User-Agent": "amyeroberts",
}
per_page = 100
page = 1
query_params = {
"state": "all",
"since": most_recent,
"sort": "created",
"direction": "asc",
"page": page,
}
new_lines = []
page_limit = (n_pages + page) if n_pages > 0 else np.inf
while True:
if page >= page_limit:
break
# Send the GET request
response = requests.get(url, headers=headers, params=query_params)
if not response.status_code == 200:
raise ValueError(
f"Request failed with status code {response.status_code} and message {response.text}"
)
json_response = response.json()
logger.info(f"Page: {page}, number of issues: {len(json_response)}")
# If we get an empty response, we've reached the end of the issues
if len(json_response) == 0:
break
new_lines.extend(json_response)
# If we get less than the number of issues per page, we've reached the end of the issues
if len(json_response) < per_page:
break
page += 1
query_params["page"] = page
issue_lines_map = {issue["number"]: issue for issue in new_lines}
updated_issues = []
# Update any issues that already exist
with open(input_filename, "r") as f:
with open("tmp_" + output_filename, "a") as g:
for line in f:
issue = json.loads(line)
number = issue["number"]
if number in issue_lines_map:
g.write(json.dumps(issue_lines_map[number]))
g.write("\n")
updated_issues.append(number)
else:
g.write(line)
# Append any new issues
new_issues = [issue for issue in new_lines if issue["number"] not in updated_issues]
with open("tmp_" + output_filename, "a") as g:
for issue in new_issues:
g.write(json.dumps(issue))
g.write("\n")
# Overwrite the old file with the new file
os.rename("tmp_" + output_filename, output_filename)
# Save a record of the updated issues for the embedding update
with open("updated_issues.json", "w") as f:
json.dump(issue_lines_map, f, indent=4, sort_keys=True)
return output_filename
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--input_filename", type=str, default=JSON_FILE)
parser.add_argument("--output_filename", type=str, default=JSON_FILE)
parser.add_argument("--github_api_version", type=str, default=GITHUB_API_VERSION)
parser.add_argument("--owner", type=str, default=OWNER)
parser.add_argument("--repo", type=str, default=REPO)
parser.add_argument("--token", type=str, default=TOKEN)
parser.add_argument("--n_pages", type=int, default=-1)
args = parser.parse_args()
update_issues(**vars(args))
|