bestroi commited on
Commit
b20813c
·
1 Parent(s): fbee5ed

Create filter_corpus

Browse files
Files changed (1) hide show
  1. filter_corpus +86 -0
filter_corpus ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import requests
4
+
5
+ # URL of the file you want to download
6
+ url = "https://raw.githubusercontent.com/Bestroi150/NLP_LAT_COLL/main/corpus_sermo_vulgaris_token.csv"
7
+
8
+ # Specify the local file name to save the downloaded content
9
+ local_filename = "corpus_sermo_vulgaris_token.csv"
10
+
11
+ # Send an HTTP GET request to the URL
12
+ response = requests.get(url)
13
+
14
+ # Check if the request was successful (status code 200)
15
+ if response.status_code == 200:
16
+ # Open the local file for writing and save the content from the response
17
+ with open(local_filename, 'wb') as f:
18
+ f.write(response.content)
19
+ print(f"File '{local_filename}' has been downloaded and saved.")
20
+ else:
21
+ print(f"Failed to download the file. Status code: {response.status_code}")
22
+
23
+ data = pd.read_csv('corpus_sermo_vulgaris_token.csv')
24
+
25
+ # Create a DataFrame from the data
26
+ df = pd.DataFrame(data, columns=["token", "pos", "lemma", "aspect", "tense", "verbForm", "voice", "mood", "number", "person", "case", "gender"])
27
+
28
+ # Define a filtering function
29
+ def filter_data(token, pos, lemma, aspect, tense, verbForm, voice, mood, number, person, case, gender):
30
+ filtered_df = df.copy() # Make a copy of the original DataFrame
31
+
32
+ # Make the token filter case-insensitive
33
+ if token:
34
+ filtered_df = filtered_df[filtered_df['token'].str.lower() == token.lower()]
35
+
36
+ if pos:
37
+ filtered_df = filtered_df[filtered_df['pos'] == pos]
38
+ if lemma:
39
+ filtered_df = filtered_df[filtered_df['lemma'] == lemma]
40
+ if aspect:
41
+ filtered_df = filtered_df[filtered_df['aspect'] == aspect]
42
+ # Add more filters for other columns in a similar way
43
+
44
+ total_entries = len(filtered_df) # Calculate the total number of entries
45
+ return {" total_entries ": total_entries, " filtered_data ": filtered_df.to_dict(orient="records")}
46
+
47
+ # Define dropdown menu options
48
+ pos_options = list(df['pos'].unique())
49
+ aspect_options = list(df['aspect'].unique())
50
+ tense_options = list(df['tense'].unique())
51
+ verbForm_options = list(df['verbForm'].unique())
52
+ voice_options = list(df['voice'].unique())
53
+ mood_options = list(df['mood'].unique())
54
+ number_options = list(df['number'].unique())
55
+ person_options = list(df['person'].unique())
56
+ case_options = list(df['case'].unique())
57
+ gender_options = list(df['gender'].unique())
58
+
59
+ # Create a Gradio interface
60
+ iface = gr.Interface(
61
+ fn=filter_data,
62
+ inputs=[
63
+ gr.Textbox(label="Token (token)"),
64
+ gr.inputs.Dropdown(choices=pos_options, label="Part of Speech (pos)"),
65
+ gr.inputs.Textbox(label="Lemma (lemma)"),
66
+ gr.inputs.Dropdown(choices=aspect_options, label="Aspect (aspect)"),
67
+ gr.inputs.Dropdown(choices=tense_options, label="Tense (tense)"),
68
+ gr.inputs.Dropdown(choices=verbForm_options, label="Verb Form (verbForm)"),
69
+ gr.inputs.Dropdown(choices=voice_options, label="Voice (voice)"),
70
+ gr.inputs.Dropdown(choices=mood_options, label="Mood (mood)"),
71
+ gr.inputs.Dropdown(choices=number_options, label="Number (number)"),
72
+ gr.inputs.Dropdown(choices=person_options, label="Person (person)"),
73
+ gr.inputs.Dropdown(choices=case_options, label="Case (case)"),
74
+ gr.inputs.Dropdown(choices=gender_options, label="Gender (gender)"),
75
+ ],
76
+ outputs=gr.outputs.JSON(),
77
+ css="label[for=pos] { color: red; }", # Highlight 'pos' label in red
78
+ theme=gr.themes.Base(primary_hue="teal").set(
79
+ button_primary_background_fill="*primary_400",
80
+ button_primary_background_fill_hover="*primary_300",
81
+ ))
82
+
83
+
84
+ iface.launch(
85
+ share=True
86
+ )