Zeel commited on
Commit
9579d2d
·
0 Parent(s):

Initial clean commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ raw_data/main_data.csv filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: QandA For VayuBuddy
3
+ emoji: 🏢
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: streamlit
7
+ sdk_version: 1.42.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
add_questions.ipynb ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 6,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import json\n",
10
+ "import inspect\n",
11
+ "import subprocess\n",
12
+ "import numpy as np\n",
13
+ "import tempfile\n",
14
+ "from glob import glob"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "markdown",
19
+ "metadata": {},
20
+ "source": [
21
+ "# Don't change this part\n",
22
+ "\n",
23
+ "You are an air quality expert Python code generator. You need to act on `data`, a pandas DataFrame with air quality data from India to answer questions about air quality. Data frequency is daily. The data has the following columns and data types: {'Timestamp': dtype('<M8[ns]'), 'station': dtype('O'), 'PM2.5': dtype('float64'), 'PM10': dtype('float64'), 'address': dtype('O'), 'city': dtype('O'), 'latitude': dtype('float64'), 'longitude': dtype('float64'), 'state': dtype('O')}."
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "markdown",
28
+ "metadata": {},
29
+ "source": [
30
+ "# Work on this part"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 12,
36
+ "metadata": {},
37
+ "outputs": [
38
+ {
39
+ "name": "stdout",
40
+ "output_type": "stream",
41
+ "text": [
42
+ "0.02\n"
43
+ ]
44
+ }
45
+ ],
46
+ "source": [
47
+ "category = \"temporal_aggregation\"\n",
48
+ "question = \"What is the minimum PM2.5 value recorded ever?\"\n",
49
+ "\n",
50
+ "def true_code():\n",
51
+ " import pandas as pd\n",
52
+ " main_data = pd.read_csv(\"raw_data/main_data.csv\")\n",
53
+ " print(main_data[\"PM2.5\"].min())\n",
54
+ " \n",
55
+ "code = inspect.getsource(true_code) + \"\\ntrue_code()\"\n",
56
+ "\n",
57
+ "output = subprocess.check_output([\"python3\", \"-c\", code]).decode(\"utf-8\").strip()\n",
58
+ "print(output)"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "markdown",
63
+ "metadata": {},
64
+ "source": [
65
+ "# Don't change this part"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": 13,
71
+ "metadata": {},
72
+ "outputs": [
73
+ {
74
+ "name": "stdout",
75
+ "output_type": "stream",
76
+ "text": [
77
+ "Saving to data/temporal_aggregation/jio8bxfb.json and data/temporal_aggregation/jio8bxfb.py\n"
78
+ ]
79
+ }
80
+ ],
81
+ "source": [
82
+ "existing_questions = []\n",
83
+ "existing_files = []\n",
84
+ "for file in glob(f\"data/*/*.json\"):\n",
85
+ " with open(file, \"r\") as f:\n",
86
+ " data = json.load(f)\n",
87
+ " existing_questions.append(data[\"question\"])\n",
88
+ " existing_files.append(file)\n",
89
+ " \n",
90
+ "if question in existing_questions:\n",
91
+ " index = existing_questions.index(question)\n",
92
+ " raise ValueError(f\"Question already exists in {existing_files[index]}\")\n",
93
+ "\n",
94
+ "save_name = tempfile.mktemp(suffix='.json', prefix=\"\", dir=f\"data/{category}\")\n",
95
+ "save_code = save_name.replace(\".json\", \".py\")\n",
96
+ "print(f\"Saving to {save_name} and {save_code}\")\n",
97
+ "\n",
98
+ "with open(save_name, \"w\") as f:\n",
99
+ " json_data = {\"question\": question}\n",
100
+ " json.dump(json_data, f)\n",
101
+ " \n",
102
+ "with open(save_code, \"w\") as f:\n",
103
+ " f.write(code)"
104
+ ]
105
+ },
106
+ {
107
+ "cell_type": "code",
108
+ "execution_count": null,
109
+ "metadata": {},
110
+ "outputs": [],
111
+ "source": []
112
+ }
113
+ ],
114
+ "metadata": {
115
+ "kernelspec": {
116
+ "display_name": "zeel_py310",
117
+ "language": "python",
118
+ "name": "python3"
119
+ },
120
+ "language_info": {
121
+ "codemirror_mode": {
122
+ "name": "ipython",
123
+ "version": 3
124
+ },
125
+ "file_extension": ".py",
126
+ "mimetype": "text/x-python",
127
+ "name": "python",
128
+ "nbconvert_exporter": "python",
129
+ "pygments_lexer": "ipython3",
130
+ "version": "3.10.15"
131
+ }
132
+ },
133
+ "nbformat": 4,
134
+ "nbformat_minor": 2
135
+ }
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import subprocess
3
+ import pandas as pd
4
+ import streamlit as st
5
+ from glob import glob
6
+
7
+ mode = st.sidebar.radio("", ["All questions", "Inspect"])
8
+
9
+
10
+ def read_question(file):
11
+ with open(file, "r") as f:
12
+ data = json.load(f)
13
+ return data["question"]
14
+
15
+
16
+ data = pd.DataFrame()
17
+ data["file"] = glob("data/*/*.json")
18
+ data["category"] = data["file"].apply(lambda x: " ".join(x.split("/")[1].split("_")).title())
19
+ data["question"] = data["file"].apply(read_question)
20
+
21
+ if mode == "All questions":
22
+ grouped_data = data.groupby("category").agg(list).reset_index()
23
+ for i, row in grouped_data.iterrows():
24
+ st.write(f"## {row['category']}")
25
+ for i, question in enumerate(row["question"]):
26
+ st.write(f"{i+1}. {question} ({row['file'][i]})")
27
+
28
+ elif mode == "Inspect":
29
+ category = st.selectbox("Category", data["category"].unique())
30
+ question = st.selectbox("Question", data[data["category"] == category]["question"])
31
+
32
+ row = data[(data["category"] == category) & (data["question"] == question)].iloc[0]
33
+ with open(row["file"], "r") as f:
34
+ row_data = json.load(f)
35
+ with open(row["file"].replace(".json", ".py"), "r") as f:
36
+ row_code = f.read()
37
+
38
+ st.write(
39
+ f"""## Code
40
+ ```python
41
+ {row_code}
42
+ ```
43
+ """
44
+ )
45
+
46
+ execute = st.button("Execute")
47
+ if execute:
48
+ with st.status("", expanded=True):
49
+ output = subprocess.check_output(["python3", row["file"].replace(".json", ".py")])
50
+ print(output)
51
+ st.write(f"{output.decode('utf-8').strip()}")
data/spatial_aggregation/se9pgd1q.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"question": "Which city has the highest average PM2.5 in December 2023?"}
data/spatial_aggregation/se9pgd1q.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def true_code():
2
+ import pandas as pd
3
+ main_data = pd.read_csv("raw_data/main_data.csv")
4
+ main_data["Timestamp"] = pd.to_datetime(main_data["Timestamp"])
5
+ answer = main_data[(main_data["Timestamp"].dt.year == 2023) & (main_data["Timestamp"].dt.month == 12)].groupby("city")["PM2.5"].mean().idxmax()
6
+ print(answer)
7
+
8
+ true_code()
data/temporal_aggregation/jio8bxfb.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"question": "What is the minimum PM2.5 value recorded ever?"}
data/temporal_aggregation/jio8bxfb.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def true_code():
2
+ import pandas as pd
3
+ main_data = pd.read_csv("raw_data/main_data.csv")
4
+ print(main_data["PM2.5"].min())
5
+
6
+ true_code()
data/temporal_aggregation/tebhtf88.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"question": "What is the maximum PM2.5 value recorded ever?"}
data/temporal_aggregation/tebhtf88.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def true_code():
2
+ import pandas as pd
3
+ main_data = pd.read_csv("raw_data/main_data.csv")
4
+ print(main_data["PM2.5"].max())
5
+
6
+ true_code()
raw_data/main_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77ea5aff6c41f6e8e5562a75ec4ac97f498debd706d3a047e1b57a9d8bd42be1
3
+ size 266893056