Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,25 +19,23 @@ def extract_number(entry):
|
|
19 |
break
|
20 |
return float(num_str) if num_str else 0.0
|
21 |
|
22 |
-
def visualize_data(csv_file, sort_entries=False):
|
23 |
-
# Load data from CSV file
|
24 |
data = pd.read_csv(csv_file)
|
25 |
-
|
26 |
-
# Sorting by the first number after "plin. nat." if specified
|
27 |
if sort_entries:
|
28 |
data['SortKey'] = data['Book/Chapter'].apply(extract_number)
|
29 |
data = data.sort_values(by='SortKey')
|
30 |
|
31 |
data['token_count'] = data['Context'].apply(count_tokens)
|
32 |
|
33 |
-
|
34 |
lemma_stats = data.groupby('Lemma').agg({'Context': 'count', 'token_count': 'mean'}).reset_index()
|
35 |
|
36 |
-
|
37 |
st.write("Basic Statistics:")
|
38 |
st.table(lemma_stats)
|
39 |
|
40 |
-
|
41 |
fig_bar = px.bar(
|
42 |
lemma_stats,
|
43 |
x='Lemma',
|
@@ -47,21 +45,16 @@ def visualize_data(csv_file, sort_entries=False):
|
|
47 |
title='Lemma Frequency in the Dataset'
|
48 |
)
|
49 |
|
50 |
-
|
51 |
-
st.plotly_chart(fig_bar)
|
52 |
-
|
53 |
-
# Additional Visualization
|
54 |
-
# Basic statistics for additional data
|
55 |
lemma_stats_additional = data['Lemma'].value_counts().reset_index()
|
56 |
lemma_stats_additional.columns = ['Lemma', 'Frequency']
|
57 |
|
58 |
-
|
59 |
most_common_lemma_additional = lemma_stats_additional.iloc[0]['Lemma']
|
60 |
|
61 |
-
|
62 |
-
chapter_stats_additional = data.groupby(['Lemma', 'Book/Chapter']).size().unstack(fill_value=0)
|
63 |
-
|
64 |
-
# Create a pie chart for lemma frequency using Plotly Express
|
65 |
fig_pie = px.pie(
|
66 |
lemma_stats_additional,
|
67 |
values='Frequency',
|
@@ -69,10 +62,7 @@ def visualize_data(csv_file, sort_entries=False):
|
|
69 |
title='Lemma Frequency Distribution'
|
70 |
)
|
71 |
|
72 |
-
|
73 |
-
st.plotly_chart(fig_pie)
|
74 |
-
|
75 |
-
# Create a subplot for chapter-wise lemma mentions
|
76 |
fig_additional = px.bar(
|
77 |
chapter_stats_additional,
|
78 |
barmode='stack',
|
@@ -80,15 +70,12 @@ def visualize_data(csv_file, sort_entries=False):
|
|
80 |
title='Chapter-wise Lemma Mentions'
|
81 |
)
|
82 |
|
83 |
-
|
84 |
-
st.plotly_chart(fig_additional)
|
85 |
-
|
86 |
-
# Display the most common lemma
|
87 |
st.write(f"Most Common Lemma: {most_common_lemma_additional}")
|
88 |
|
89 |
-
|
90 |
-
with st.expander("Click to view context"):
|
91 |
-
# Display context for each entry
|
92 |
for index, row in data.iterrows():
|
93 |
st.write(f"Lemma: {row['Lemma']}")
|
94 |
st.write(f"Book/Chapter: {row['Book/Chapter']}")
|
@@ -98,10 +85,8 @@ def visualize_data(csv_file, sort_entries=False):
|
|
98 |
def main():
|
99 |
st.title("Lemma Frequency Visualization")
|
100 |
|
101 |
-
# File selection
|
102 |
csv_file = st.sidebar.selectbox("Select CSV file:", ["allData.csv","places.csv","ethnonyms.csv","rivers.csv","mountains.csv","toponyms.csv"])
|
103 |
|
104 |
-
# Visualization based on selected file and option to sort
|
105 |
visualize_data(csv_file)
|
106 |
|
107 |
if __name__ == "__main__":
|
|
|
19 |
break
|
20 |
return float(num_str) if num_str else 0.0
|
21 |
|
22 |
+
def visualize_data(csv_file, sort_entries=False):
|
|
|
23 |
data = pd.read_csv(csv_file)
|
24 |
+
|
|
|
25 |
if sort_entries:
|
26 |
data['SortKey'] = data['Book/Chapter'].apply(extract_number)
|
27 |
data = data.sort_values(by='SortKey')
|
28 |
|
29 |
data['token_count'] = data['Context'].apply(count_tokens)
|
30 |
|
31 |
+
|
32 |
lemma_stats = data.groupby('Lemma').agg({'Context': 'count', 'token_count': 'mean'}).reset_index()
|
33 |
|
34 |
+
|
35 |
st.write("Basic Statistics:")
|
36 |
st.table(lemma_stats)
|
37 |
|
38 |
+
|
39 |
fig_bar = px.bar(
|
40 |
lemma_stats,
|
41 |
x='Lemma',
|
|
|
45 |
title='Lemma Frequency in the Dataset'
|
46 |
)
|
47 |
|
48 |
+
|
49 |
+
st.plotly_chart(fig_bar)
|
|
|
|
|
|
|
50 |
lemma_stats_additional = data['Lemma'].value_counts().reset_index()
|
51 |
lemma_stats_additional.columns = ['Lemma', 'Frequency']
|
52 |
|
53 |
+
|
54 |
most_common_lemma_additional = lemma_stats_additional.iloc[0]['Lemma']
|
55 |
|
56 |
+
|
57 |
+
chapter_stats_additional = data.groupby(['Lemma', 'Book/Chapter']).size().unstack(fill_value=0)
|
|
|
|
|
58 |
fig_pie = px.pie(
|
59 |
lemma_stats_additional,
|
60 |
values='Frequency',
|
|
|
62 |
title='Lemma Frequency Distribution'
|
63 |
)
|
64 |
|
65 |
+
st.plotly_chart(fig_pie)
|
|
|
|
|
|
|
66 |
fig_additional = px.bar(
|
67 |
chapter_stats_additional,
|
68 |
barmode='stack',
|
|
|
70 |
title='Chapter-wise Lemma Mentions'
|
71 |
)
|
72 |
|
73 |
+
|
74 |
+
st.plotly_chart(fig_additional)
|
|
|
|
|
75 |
st.write(f"Most Common Lemma: {most_common_lemma_additional}")
|
76 |
|
77 |
+
|
78 |
+
with st.expander("Click to view context"):
|
|
|
79 |
for index, row in data.iterrows():
|
80 |
st.write(f"Lemma: {row['Lemma']}")
|
81 |
st.write(f"Book/Chapter: {row['Book/Chapter']}")
|
|
|
85 |
def main():
|
86 |
st.title("Lemma Frequency Visualization")
|
87 |
|
|
|
88 |
csv_file = st.sidebar.selectbox("Select CSV file:", ["allData.csv","places.csv","ethnonyms.csv","rivers.csv","mountains.csv","toponyms.csv"])
|
89 |
|
|
|
90 |
visualize_data(csv_file)
|
91 |
|
92 |
if __name__ == "__main__":
|