bestroi commited on
Commit
4626319
·
verified ·
1 Parent(s): 5d6563b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -30
app.py CHANGED
@@ -19,25 +19,23 @@ def extract_number(entry):
19
  break
20
  return float(num_str) if num_str else 0.0
21
 
22
- def visualize_data(csv_file, sort_entries=False):
23
- # Load data from CSV file
24
  data = pd.read_csv(csv_file)
25
-
26
- # Sorting by the first number after "plin. nat." if specified
27
  if sort_entries:
28
  data['SortKey'] = data['Book/Chapter'].apply(extract_number)
29
  data = data.sort_values(by='SortKey')
30
 
31
  data['token_count'] = data['Context'].apply(count_tokens)
32
 
33
- # Basic statistics
34
  lemma_stats = data.groupby('Lemma').agg({'Context': 'count', 'token_count': 'mean'}).reset_index()
35
 
36
- # Display the basic statistics using st.table()
37
  st.write("Basic Statistics:")
38
  st.table(lemma_stats)
39
 
40
- # Bar chart for lemma frequency using Plotly Express
41
  fig_bar = px.bar(
42
  lemma_stats,
43
  x='Lemma',
@@ -47,21 +45,16 @@ def visualize_data(csv_file, sort_entries=False):
47
  title='Lemma Frequency in the Dataset'
48
  )
49
 
50
- # Display the bar chart using st.plotly_chart()
51
- st.plotly_chart(fig_bar)
52
-
53
- # Additional Visualization
54
- # Basic statistics for additional data
55
  lemma_stats_additional = data['Lemma'].value_counts().reset_index()
56
  lemma_stats_additional.columns = ['Lemma', 'Frequency']
57
 
58
- # Find the most common lemma
59
  most_common_lemma_additional = lemma_stats_additional.iloc[0]['Lemma']
60
 
61
- # Distribution across chapters
62
- chapter_stats_additional = data.groupby(['Lemma', 'Book/Chapter']).size().unstack(fill_value=0)
63
-
64
- # Create a pie chart for lemma frequency using Plotly Express
65
  fig_pie = px.pie(
66
  lemma_stats_additional,
67
  values='Frequency',
@@ -69,10 +62,7 @@ def visualize_data(csv_file, sort_entries=False):
69
  title='Lemma Frequency Distribution'
70
  )
71
 
72
- # Display the pie chart using st.plotly_chart()
73
- st.plotly_chart(fig_pie)
74
-
75
- # Create a subplot for chapter-wise lemma mentions
76
  fig_additional = px.bar(
77
  chapter_stats_additional,
78
  barmode='stack',
@@ -80,15 +70,12 @@ def visualize_data(csv_file, sort_entries=False):
80
  title='Chapter-wise Lemma Mentions'
81
  )
82
 
83
- # Display the subplot using st.plotly_chart()
84
- st.plotly_chart(fig_additional)
85
-
86
- # Display the most common lemma
87
  st.write(f"Most Common Lemma: {most_common_lemma_additional}")
88
 
89
- # Expandable section to display context
90
- with st.expander("Click to view context"):
91
- # Display context for each entry
92
  for index, row in data.iterrows():
93
  st.write(f"Lemma: {row['Lemma']}")
94
  st.write(f"Book/Chapter: {row['Book/Chapter']}")
@@ -98,10 +85,8 @@ def visualize_data(csv_file, sort_entries=False):
98
  def main():
99
  st.title("Lemma Frequency Visualization")
100
 
101
- # File selection
102
  csv_file = st.sidebar.selectbox("Select CSV file:", ["allData.csv","places.csv","ethnonyms.csv","rivers.csv","mountains.csv","toponyms.csv"])
103
 
104
- # Visualization based on selected file and option to sort
105
  visualize_data(csv_file)
106
 
107
  if __name__ == "__main__":
 
19
  break
20
  return float(num_str) if num_str else 0.0
21
 
22
+ def visualize_data(csv_file, sort_entries=False):
 
23
  data = pd.read_csv(csv_file)
24
+
 
25
  if sort_entries:
26
  data['SortKey'] = data['Book/Chapter'].apply(extract_number)
27
  data = data.sort_values(by='SortKey')
28
 
29
  data['token_count'] = data['Context'].apply(count_tokens)
30
 
31
+
32
  lemma_stats = data.groupby('Lemma').agg({'Context': 'count', 'token_count': 'mean'}).reset_index()
33
 
34
+
35
  st.write("Basic Statistics:")
36
  st.table(lemma_stats)
37
 
38
+
39
  fig_bar = px.bar(
40
  lemma_stats,
41
  x='Lemma',
 
45
  title='Lemma Frequency in the Dataset'
46
  )
47
 
48
+
49
+ st.plotly_chart(fig_bar)
 
 
 
50
  lemma_stats_additional = data['Lemma'].value_counts().reset_index()
51
  lemma_stats_additional.columns = ['Lemma', 'Frequency']
52
 
53
+
54
  most_common_lemma_additional = lemma_stats_additional.iloc[0]['Lemma']
55
 
56
+
57
+ chapter_stats_additional = data.groupby(['Lemma', 'Book/Chapter']).size().unstack(fill_value=0)
 
 
58
  fig_pie = px.pie(
59
  lemma_stats_additional,
60
  values='Frequency',
 
62
  title='Lemma Frequency Distribution'
63
  )
64
 
65
+ st.plotly_chart(fig_pie)
 
 
 
66
  fig_additional = px.bar(
67
  chapter_stats_additional,
68
  barmode='stack',
 
70
  title='Chapter-wise Lemma Mentions'
71
  )
72
 
73
+
74
+ st.plotly_chart(fig_additional)
 
 
75
  st.write(f"Most Common Lemma: {most_common_lemma_additional}")
76
 
77
+
78
+ with st.expander("Click to view context"):
 
79
  for index, row in data.iterrows():
80
  st.write(f"Lemma: {row['Lemma']}")
81
  st.write(f"Book/Chapter: {row['Book/Chapter']}")
 
85
  def main():
86
  st.title("Lemma Frequency Visualization")
87
 
 
88
  csv_file = st.sidebar.selectbox("Select CSV file:", ["allData.csv","places.csv","ethnonyms.csv","rivers.csv","mountains.csv","toponyms.csv"])
89
 
 
90
  visualize_data(csv_file)
91
 
92
  if __name__ == "__main__":