Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import requests
|
4 |
+
from datetime import datetime
|
5 |
+
import json
|
6 |
+
import os
|
7 |
+
|
8 |
+
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-Small-24B-Instruct-2501"
|
9 |
+
|
10 |
+
EXAMPLE_CSV = """Citation
|
11 |
+
Máñez, I., Lipnevich, A.A., Lopera-Oquendo, C. et al. Examining pre-service teachers' feedback on low- and high-quality written assignments. Educ Asse Eval Acc 36, 225–256 (2024). https://doi.org/10.1007/s11092-024-09432-x
|
12 |
+
Roda, A., & Menken, K. (2024). The Conflation of Dual Language Bilingual Education With Gifted Programs in New York City Schools. Educational Policy, 0(0). https://doi.org/10.1177/08959048241237724"""
|
13 |
+
|
14 |
+
def get_journal_name(citation):
|
15 |
+
headers = {"Content-Type": "application/json"}
|
16 |
+
payload = {
|
17 |
+
"inputs": f"""<s>[INST]What is the journal name in this academic citation? Return only the journal name, nothing else. If the journal name is abbreviated, return the full journal name.
|
18 |
+
|
19 |
+
Citation: {citation}[/INST]</s>"""
|
20 |
+
}
|
21 |
+
|
22 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
23 |
+
result = response.json()
|
24 |
+
|
25 |
+
# The model returns a list of responses, we take the first one
|
26 |
+
if isinstance(result, list) and len(result) > 0:
|
27 |
+
return result[0]["generated_text"].strip()
|
28 |
+
return "Journal name not found"
|
29 |
+
|
30 |
+
def get_oa_policies(journal):
|
31 |
+
base_url = 'https://v2.sherpa.ac.uk/cgi/retrieve_by_id'
|
32 |
+
api_key = os.environ.get('SHERPA_ROMEO_API_KEY')
|
33 |
+
full_url = f'{base_url}?item-type=publication&api-key={api_key}&format=Json&identifier={journal}'
|
34 |
+
try:
|
35 |
+
response = requests.get(full_url)
|
36 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
37 |
+
data = response.json()
|
38 |
+
|
39 |
+
policies = data['items'][0]['publisher_policy']
|
40 |
+
|
41 |
+
policies_dict = {
|
42 |
+
'journal': journal,
|
43 |
+
'policies': []
|
44 |
+
}
|
45 |
+
|
46 |
+
for policy in policies:
|
47 |
+
list_of_policies = []
|
48 |
+
|
49 |
+
for article_version_policy in policy.get('permitted_oa', []):
|
50 |
+
article_version = ''.join(article_version_policy['article_version'])
|
51 |
+
oa_fee = article_version_policy.get('additional_oa_fee', 'no')
|
52 |
+
|
53 |
+
embargo = article_version_policy.get('embargo', {})
|
54 |
+
formatted_embargo = f"{embargo.get('amount', 'no')} {embargo.get('units', 'no')}"
|
55 |
+
|
56 |
+
locations = article_version_policy.get('location', {})
|
57 |
+
location_list = ', '.join(locations.get('location', ['none']))
|
58 |
+
|
59 |
+
conditions = article_version_policy.get('conditions', 'none')
|
60 |
+
formatted_conditions = ', '.join(map(str, conditions)) if isinstance(conditions, list) else str(conditions)
|
61 |
+
|
62 |
+
compiled_article_version_policy = {
|
63 |
+
'article_version': article_version,
|
64 |
+
'oa_fee': oa_fee,
|
65 |
+
'embargo': formatted_embargo,
|
66 |
+
'locations': location_list,
|
67 |
+
'conditions': formatted_conditions
|
68 |
+
}
|
69 |
+
|
70 |
+
list_of_policies.append(compiled_article_version_policy)
|
71 |
+
|
72 |
+
policies_dict['policies'].append(list_of_policies)
|
73 |
+
|
74 |
+
return policies_dict
|
75 |
+
|
76 |
+
except (IndexError, requests.exceptions.RequestException) as e:
|
77 |
+
print(f"Error retrieving policies for {journal}: {e}") # Added error logging
|
78 |
+
return {'journal': journal, 'policies': 'No information found or error occurred.'}
|
79 |
+
|
80 |
+
# --- Main Processing Function ---
|
81 |
+
def process_citations(csv_file):
|
82 |
+
try:
|
83 |
+
df = pd.read_csv(csv_file.name)
|
84 |
+
if 'Citation' not in df.columns:
|
85 |
+
return "Error: CSV file must have a 'Citation' column."
|
86 |
+
|
87 |
+
df['Journal'] = df['Citation'].apply(get_journal_name)
|
88 |
+
|
89 |
+
output_message = f"""
|
90 |
+
Queens College Library Research Services
|
91 |
+
= = = = = = = = = = = = = = = = = = = = =
|
92 |
+
({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
|
93 |
+
|
94 |
+
"""
|
95 |
+
for i, row in df.iterrows():
|
96 |
+
citation = row['Citation']
|
97 |
+
journal = row['Journal']
|
98 |
+
oa_policies_data = get_oa_policies(journal)
|
99 |
+
|
100 |
+
oa_policies = oa_policies_data['policies']
|
101 |
+
|
102 |
+
paragraphs = []
|
103 |
+
if oa_policies == 'No information found or error occurred.':
|
104 |
+
paragraphs.append(oa_policies)
|
105 |
+
else:
|
106 |
+
for policy_list in oa_policies:
|
107 |
+
for policy in policy_list:
|
108 |
+
paragraphs.append(f"""
|
109 |
+
Version: {policy['article_version'].title()} Manuscript
|
110 |
+
OA Fee: {policy['oa_fee'].title()}
|
111 |
+
Embargo: {policy['embargo'].title()}
|
112 |
+
Locations: {policy['locations']}
|
113 |
+
Conditions: {policy['conditions']}
|
114 |
+
""")
|
115 |
+
|
116 |
+
per_journal_oa_policies = f"""
|
117 |
+
+ + + + + + + + + + Citation {i+1} + + + + + + + + + +
|
118 |
+
|
119 |
+
{citation}
|
120 |
+
|
121 |
+
Journal: {journal}
|
122 |
+
"""
|
123 |
+
per_journal_oa_policies += '\n' + '\n'.join(paragraphs) if len(paragraphs) > 0 else '\nNo policies found.\n'
|
124 |
+
|
125 |
+
output_message += per_journal_oa_policies
|
126 |
+
|
127 |
+
return output_message
|
128 |
+
|
129 |
+
except Exception as e:
|
130 |
+
return f"An error occurred: {e}"
|
131 |
+
|
132 |
+
# --- Gradio Interface ---
|
133 |
+
with gr.Blocks() as interface:
|
134 |
+
gr.Markdown("# Open Access Policy Checker")
|
135 |
+
gr.Markdown("Upload a CSV file with a 'Citation' column to get Open Access policies.")
|
136 |
+
|
137 |
+
with gr.Accordion("Example CSV Format", open=False):
|
138 |
+
gr.Code(
|
139 |
+
EXAMPLE_CSV,
|
140 |
+
language="csv",
|
141 |
+
label="Example CSV content"
|
142 |
+
)
|
143 |
+
|
144 |
+
with gr.Row():
|
145 |
+
input_file = gr.File(label="Upload CSV")
|
146 |
+
output_text = gr.Textbox(label="OA Policies Report", show_copy_button=True)
|
147 |
+
process_button = gr.Button("Get OA Policies")
|
148 |
+
process_button.click(process_citations, inputs=input_file, outputs=output_text)
|
149 |
+
|
150 |
+
|
151 |
+
# Launch the interface
|
152 |
+
interface.launch()
|