bestroi commited on
Commit
e2d28ce
·
1 Parent(s): de20108

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+
5
+ # Function to read data and perform visualization
6
+ def visualize_data(csv_file):
7
+ data = pd.read_csv(csv_file)
8
+ data['Token_Count'] = data['Context'].apply(count_tokens)
9
+
10
+ # Basic statistics
11
+ lemma_stats = data.groupby('Lemma').agg({'Context': 'count', 'Token_Count': 'mean'}).reset_index()
12
+
13
+ # Bar chart for lemma frequency
14
+ plt.figure(figsize=(10, 6))
15
+ plt.bar(lemma_stats['Lemma'], lemma_stats['Context'], color='skyblue')
16
+ plt.xlabel('Lemma')
17
+ plt.ylabel('Frequency')
18
+ plt.title('Lemma Frequency in the Dataset')
19
+ plt.xticks(rotation=45)
20
+ st.pyplot()
21
+
22
+ # Display basic statistics
23
+ st.write("Basic Statistics:")
24
+ st.write(lemma_stats)
25
+
26
+ # Additional Visualization
27
+ # Load data from CSV file
28
+ data_additional = pd.read_csv(csv_file)
29
+
30
+ # Basic statistics
31
+ lemma_stats_additional = data_additional['Lemma'].value_counts().reset_index()
32
+ lemma_stats_additional.columns = ['Lemma', 'Frequency']
33
+
34
+ # Find the most common lemma
35
+ most_common_lemma_additional = lemma_stats_additional.iloc[0]['Lemma']
36
+
37
+ # Distribution across chapters
38
+ chapter_stats_additional = data_additional.groupby(['Lemma', 'Book/Chapter']).size().unstack(fill_value=0)
39
+
40
+ # Create a single row with two subplots
41
+ fig, axs = plt.subplots(1, 2, figsize=(20, 10))
42
+
43
+ # Pie chart for lemma frequency
44
+ axs[0].pie(lemma_stats_additional['Frequency'], labels=lemma_stats_additional['Lemma'], autopct='%1.1f%%', startangle=90)
45
+ axs[0].set_title('Lemma Frequency Distribution')
46
+
47
+ # Bar chart for chapter-wise lemma mentions
48
+ chapter_stats_additional.plot(kind='bar', stacked=True, ax=axs[1])
49
+ axs[1].set_title('Chapter-wise Lemma Mentions')
50
+ axs[1].set_xlabel('Book/Chapter')
51
+ axs[1].set_ylabel('Mentions')
52
+ axs[1].legend(title='Lemma', bbox_to_anchor=(1.05, 1), loc='upper left')
53
+
54
+ st.pyplot()
55
+
56
+ # Display the most common lemma
57
+ st.write(f"Most Common Lemma: {most_common_lemma_additional}")
58
+
59
+ # Main Streamlit app
60
+ def main():
61
+ st.title("Lemma Frequency Visualization")
62
+
63
+ # File selection
64
+ csv_file = st.sidebar.selectbox("Select CSV file:", ["toponyms.csv", "ethonyms.csv"])
65
+
66
+ # Visualization based on selected file
67
+ visualize_data(csv_file)
68
+
69
+ if __name__ == "__main__":
70
+ main()