Spaces:

bestroi
/

corpusSermoVulgaris

Running

App Files Files Community

bestroi commited on Oct 17, 2023

Commit

b20813c

1 Parent(s): fbee5ed

Create filter_corpus

Browse files

Files changed (1) hide show

filter_corpus +86 -0

filter_corpus ADDED Viewed

	@@ -0,0 +1,86 @@

+import gradio as gr
+import pandas as pd
+import requests
+# URL of the file you want to download
+url = "https://raw.githubusercontent.com/Bestroi150/NLP_LAT_COLL/main/corpus_sermo_vulgaris_token.csv"
+# Specify the local file name to save the downloaded content
+local_filename = "corpus_sermo_vulgaris_token.csv"
+# Send an HTTP GET request to the URL
+response = requests.get(url)
+# Check if the request was successful (status code 200)
+if response.status_code == 200:
+    # Open the local file for writing and save the content from the response
+    with open(local_filename, 'wb') as f:
+        f.write(response.content)
+    print(f"File '{local_filename}' has been downloaded and saved.")
+else:
+    print(f"Failed to download the file. Status code: {response.status_code}")
+data = pd.read_csv('corpus_sermo_vulgaris_token.csv')
+# Create a DataFrame from the data
+df = pd.DataFrame(data, columns=["token", "pos", "lemma", "aspect", "tense", "verbForm", "voice", "mood", "number", "person", "case", "gender"])
+# Define a filtering function
+def filter_data(token, pos, lemma, aspect, tense, verbForm, voice, mood, number, person, case, gender):
+    filtered_df = df.copy()  # Make a copy of the original DataFrame
+    # Make the token filter case-insensitive
+    if token:
+        filtered_df = filtered_df[filtered_df['token'].str.lower() == token.lower()]
+    if pos:
+        filtered_df = filtered_df[filtered_df['pos'] == pos]
+    if lemma:
+        filtered_df = filtered_df[filtered_df['lemma'] == lemma]
+    if aspect:
+        filtered_df = filtered_df[filtered_df['aspect'] == aspect]
+    # Add more filters for other columns in a similar way
+    total_entries = len(filtered_df)  # Calculate the total number of entries
+    return {" total_entries ": total_entries, " filtered_data ": filtered_df.to_dict(orient="records")}
+# Define dropdown menu options
+pos_options = list(df['pos'].unique())
+aspect_options = list(df['aspect'].unique())
+tense_options = list(df['tense'].unique())
+verbForm_options = list(df['verbForm'].unique())
+voice_options = list(df['voice'].unique())
+mood_options = list(df['mood'].unique())
+number_options = list(df['number'].unique())
+person_options = list(df['person'].unique())
+case_options = list(df['case'].unique())
+gender_options = list(df['gender'].unique())
+# Create a Gradio interface
+iface = gr.Interface(
+    fn=filter_data,
+    inputs=[
+        gr.Textbox(label="Token (token)"),
+        gr.inputs.Dropdown(choices=pos_options, label="Part of Speech (pos)"),
+        gr.inputs.Textbox(label="Lemma (lemma)"),
+        gr.inputs.Dropdown(choices=aspect_options, label="Aspect (aspect)"),
+        gr.inputs.Dropdown(choices=tense_options, label="Tense (tense)"),
+        gr.inputs.Dropdown(choices=verbForm_options, label="Verb Form (verbForm)"),
+        gr.inputs.Dropdown(choices=voice_options, label="Voice (voice)"),
+        gr.inputs.Dropdown(choices=mood_options, label="Mood (mood)"),
+        gr.inputs.Dropdown(choices=number_options, label="Number (number)"),
+        gr.inputs.Dropdown(choices=person_options, label="Person (person)"),
+        gr.inputs.Dropdown(choices=case_options, label="Case (case)"),
+        gr.inputs.Dropdown(choices=gender_options, label="Gender (gender)"),
+    ],
+    outputs=gr.outputs.JSON(),
+    css="label[for=pos] { color: red; }",  # Highlight 'pos' label in red
+    theme=gr.themes.Base(primary_hue="teal").set(
+    button_primary_background_fill="*primary_400",
+    button_primary_background_fill_hover="*primary_300",
+))
+iface.launch(
+    share=True
+)