Zwounds commited on
Commit
de2cd51
·
verified ·
1 Parent(s): 8bd3d42

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -0
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import requests
4
+ from datetime import datetime
5
+ import json
6
+ import os
7
+
8
+ API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-Small-24B-Instruct-2501"
9
+
10
+ EXAMPLE_CSV = """Citation
11
+ Máñez, I., Lipnevich, A.A., Lopera-Oquendo, C. et al. Examining pre-service teachers' feedback on low- and high-quality written assignments. Educ Asse Eval Acc 36, 225–256 (2024). https://doi.org/10.1007/s11092-024-09432-x
12
+ Roda, A., & Menken, K. (2024). The Conflation of Dual Language Bilingual Education With Gifted Programs in New York City Schools. Educational Policy, 0(0). https://doi.org/10.1177/08959048241237724"""
13
+
14
+ def get_journal_name(citation):
15
+ headers = {"Content-Type": "application/json"}
16
+ payload = {
17
+ "inputs": f"""<s>[INST]What is the journal name in this academic citation? Return only the journal name, nothing else. If the journal name is abbreviated, return the full journal name.
18
+
19
+ Citation: {citation}[/INST]</s>"""
20
+ }
21
+
22
+ response = requests.post(API_URL, headers=headers, json=payload)
23
+ result = response.json()
24
+
25
+ # The model returns a list of responses, we take the first one
26
+ if isinstance(result, list) and len(result) > 0:
27
+ return result[0]["generated_text"].strip()
28
+ return "Journal name not found"
29
+
30
+ def get_oa_policies(journal):
31
+ base_url = 'https://v2.sherpa.ac.uk/cgi/retrieve_by_id'
32
+ api_key = os.environ.get('SHERPA_ROMEO_API_KEY')
33
+ full_url = f'{base_url}?item-type=publication&api-key={api_key}&format=Json&identifier={journal}'
34
+ try:
35
+ response = requests.get(full_url)
36
+ response.raise_for_status() # Raise an exception for bad status codes
37
+ data = response.json()
38
+
39
+ policies = data['items'][0]['publisher_policy']
40
+
41
+ policies_dict = {
42
+ 'journal': journal,
43
+ 'policies': []
44
+ }
45
+
46
+ for policy in policies:
47
+ list_of_policies = []
48
+
49
+ for article_version_policy in policy.get('permitted_oa', []):
50
+ article_version = ''.join(article_version_policy['article_version'])
51
+ oa_fee = article_version_policy.get('additional_oa_fee', 'no')
52
+
53
+ embargo = article_version_policy.get('embargo', {})
54
+ formatted_embargo = f"{embargo.get('amount', 'no')} {embargo.get('units', 'no')}"
55
+
56
+ locations = article_version_policy.get('location', {})
57
+ location_list = ', '.join(locations.get('location', ['none']))
58
+
59
+ conditions = article_version_policy.get('conditions', 'none')
60
+ formatted_conditions = ', '.join(map(str, conditions)) if isinstance(conditions, list) else str(conditions)
61
+
62
+ compiled_article_version_policy = {
63
+ 'article_version': article_version,
64
+ 'oa_fee': oa_fee,
65
+ 'embargo': formatted_embargo,
66
+ 'locations': location_list,
67
+ 'conditions': formatted_conditions
68
+ }
69
+
70
+ list_of_policies.append(compiled_article_version_policy)
71
+
72
+ policies_dict['policies'].append(list_of_policies)
73
+
74
+ return policies_dict
75
+
76
+ except (IndexError, requests.exceptions.RequestException) as e:
77
+ print(f"Error retrieving policies for {journal}: {e}") # Added error logging
78
+ return {'journal': journal, 'policies': 'No information found or error occurred.'}
79
+
80
+ # --- Main Processing Function ---
81
+ def process_citations(csv_file):
82
+ try:
83
+ df = pd.read_csv(csv_file.name)
84
+ if 'Citation' not in df.columns:
85
+ return "Error: CSV file must have a 'Citation' column."
86
+
87
+ df['Journal'] = df['Citation'].apply(get_journal_name)
88
+
89
+ output_message = f"""
90
+ Queens College Library Research Services
91
+ = = = = = = = = = = = = = = = = = = = = =
92
+ ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
93
+
94
+ """
95
+ for i, row in df.iterrows():
96
+ citation = row['Citation']
97
+ journal = row['Journal']
98
+ oa_policies_data = get_oa_policies(journal)
99
+
100
+ oa_policies = oa_policies_data['policies']
101
+
102
+ paragraphs = []
103
+ if oa_policies == 'No information found or error occurred.':
104
+ paragraphs.append(oa_policies)
105
+ else:
106
+ for policy_list in oa_policies:
107
+ for policy in policy_list:
108
+ paragraphs.append(f"""
109
+ Version: {policy['article_version'].title()} Manuscript
110
+ OA Fee: {policy['oa_fee'].title()}
111
+ Embargo: {policy['embargo'].title()}
112
+ Locations: {policy['locations']}
113
+ Conditions: {policy['conditions']}
114
+ """)
115
+
116
+ per_journal_oa_policies = f"""
117
+ + + + + + + + + + + Citation {i+1} + + + + + + + + + +
118
+
119
+ {citation}
120
+
121
+ Journal: {journal}
122
+ """
123
+ per_journal_oa_policies += '\n' + '\n'.join(paragraphs) if len(paragraphs) > 0 else '\nNo policies found.\n'
124
+
125
+ output_message += per_journal_oa_policies
126
+
127
+ return output_message
128
+
129
+ except Exception as e:
130
+ return f"An error occurred: {e}"
131
+
132
+ # --- Gradio Interface ---
133
+ with gr.Blocks() as interface:
134
+ gr.Markdown("# Open Access Policy Checker")
135
+ gr.Markdown("Upload a CSV file with a 'Citation' column to get Open Access policies.")
136
+
137
+ with gr.Accordion("Example CSV Format", open=False):
138
+ gr.Code(
139
+ EXAMPLE_CSV,
140
+ language="csv",
141
+ label="Example CSV content"
142
+ )
143
+
144
+ with gr.Row():
145
+ input_file = gr.File(label="Upload CSV")
146
+ output_text = gr.Textbox(label="OA Policies Report", show_copy_button=True)
147
+ process_button = gr.Button("Get OA Policies")
148
+ process_button.click(process_citations, inputs=input_file, outputs=output_text)
149
+
150
+
151
+ # Launch the interface
152
+ interface.launch()