Spaces:
Running
Running
Create filter_corpus
Browse files- filter_corpus +86 -0
filter_corpus
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import requests
|
4 |
+
|
5 |
+
# URL of the file you want to download
|
6 |
+
url = "https://raw.githubusercontent.com/Bestroi150/NLP_LAT_COLL/main/corpus_sermo_vulgaris_token.csv"
|
7 |
+
|
8 |
+
# Specify the local file name to save the downloaded content
|
9 |
+
local_filename = "corpus_sermo_vulgaris_token.csv"
|
10 |
+
|
11 |
+
# Send an HTTP GET request to the URL
|
12 |
+
response = requests.get(url)
|
13 |
+
|
14 |
+
# Check if the request was successful (status code 200)
|
15 |
+
if response.status_code == 200:
|
16 |
+
# Open the local file for writing and save the content from the response
|
17 |
+
with open(local_filename, 'wb') as f:
|
18 |
+
f.write(response.content)
|
19 |
+
print(f"File '{local_filename}' has been downloaded and saved.")
|
20 |
+
else:
|
21 |
+
print(f"Failed to download the file. Status code: {response.status_code}")
|
22 |
+
|
23 |
+
data = pd.read_csv('corpus_sermo_vulgaris_token.csv')
|
24 |
+
|
25 |
+
# Create a DataFrame from the data
|
26 |
+
df = pd.DataFrame(data, columns=["token", "pos", "lemma", "aspect", "tense", "verbForm", "voice", "mood", "number", "person", "case", "gender"])
|
27 |
+
|
28 |
+
# Define a filtering function
|
29 |
+
def filter_data(token, pos, lemma, aspect, tense, verbForm, voice, mood, number, person, case, gender):
|
30 |
+
filtered_df = df.copy() # Make a copy of the original DataFrame
|
31 |
+
|
32 |
+
# Make the token filter case-insensitive
|
33 |
+
if token:
|
34 |
+
filtered_df = filtered_df[filtered_df['token'].str.lower() == token.lower()]
|
35 |
+
|
36 |
+
if pos:
|
37 |
+
filtered_df = filtered_df[filtered_df['pos'] == pos]
|
38 |
+
if lemma:
|
39 |
+
filtered_df = filtered_df[filtered_df['lemma'] == lemma]
|
40 |
+
if aspect:
|
41 |
+
filtered_df = filtered_df[filtered_df['aspect'] == aspect]
|
42 |
+
# Add more filters for other columns in a similar way
|
43 |
+
|
44 |
+
total_entries = len(filtered_df) # Calculate the total number of entries
|
45 |
+
return {" total_entries ": total_entries, " filtered_data ": filtered_df.to_dict(orient="records")}
|
46 |
+
|
47 |
+
# Define dropdown menu options
|
48 |
+
pos_options = list(df['pos'].unique())
|
49 |
+
aspect_options = list(df['aspect'].unique())
|
50 |
+
tense_options = list(df['tense'].unique())
|
51 |
+
verbForm_options = list(df['verbForm'].unique())
|
52 |
+
voice_options = list(df['voice'].unique())
|
53 |
+
mood_options = list(df['mood'].unique())
|
54 |
+
number_options = list(df['number'].unique())
|
55 |
+
person_options = list(df['person'].unique())
|
56 |
+
case_options = list(df['case'].unique())
|
57 |
+
gender_options = list(df['gender'].unique())
|
58 |
+
|
59 |
+
# Create a Gradio interface
|
60 |
+
iface = gr.Interface(
|
61 |
+
fn=filter_data,
|
62 |
+
inputs=[
|
63 |
+
gr.Textbox(label="Token (token)"),
|
64 |
+
gr.inputs.Dropdown(choices=pos_options, label="Part of Speech (pos)"),
|
65 |
+
gr.inputs.Textbox(label="Lemma (lemma)"),
|
66 |
+
gr.inputs.Dropdown(choices=aspect_options, label="Aspect (aspect)"),
|
67 |
+
gr.inputs.Dropdown(choices=tense_options, label="Tense (tense)"),
|
68 |
+
gr.inputs.Dropdown(choices=verbForm_options, label="Verb Form (verbForm)"),
|
69 |
+
gr.inputs.Dropdown(choices=voice_options, label="Voice (voice)"),
|
70 |
+
gr.inputs.Dropdown(choices=mood_options, label="Mood (mood)"),
|
71 |
+
gr.inputs.Dropdown(choices=number_options, label="Number (number)"),
|
72 |
+
gr.inputs.Dropdown(choices=person_options, label="Person (person)"),
|
73 |
+
gr.inputs.Dropdown(choices=case_options, label="Case (case)"),
|
74 |
+
gr.inputs.Dropdown(choices=gender_options, label="Gender (gender)"),
|
75 |
+
],
|
76 |
+
outputs=gr.outputs.JSON(),
|
77 |
+
css="label[for=pos] { color: red; }", # Highlight 'pos' label in red
|
78 |
+
theme=gr.themes.Base(primary_hue="teal").set(
|
79 |
+
button_primary_background_fill="*primary_400",
|
80 |
+
button_primary_background_fill_hover="*primary_300",
|
81 |
+
))
|
82 |
+
|
83 |
+
|
84 |
+
iface.launch(
|
85 |
+
share=True
|
86 |
+
)
|