Spaces:

razaAhmed
/

WebScrapingTool

Running

App Files Files Community

razaAhmed commited on Dec 13, 2023

Commit

72b4c46

1 Parent(s): ed28502

Upload 2 files

Browse files

Files changed (2) hide show

app.py +52 -0
requirements.txt +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+import urllib3
+def simple_web_scraper(url, scrape_option):
+    try:
+        # Create a PoolManager with urllib3 to handle SSL
+        http = urllib3.PoolManager()
+        # Send an HTTP request
+        response = http.request('GET', url)
+        # Check if the request was successful (status code 200)
+        if response.status == 200:
+            # Parse the HTML content of the page
+            soup = BeautifulSoup(response.data, 'html.parser')
+            # Extract information from the HTML based on user's choice
+            if scrape_option == 'data':
+                # Extract all text content from the page
+                all_text = soup.get_text()
+                # Prepare data for the table (split text by lines)
+                table_data = [{'Data': line.strip()} for line in all_text.split('\n') if line.strip()]
+                # Display the data in a table
+                st.table(table_data)
+            elif scrape_option == 'links':
+                # Example: Extract all the links on the page
+                links = soup.find_all('a')
+                # Prepare data for the table
+                table_data = [{'Links': link.get('href')} for link in links]
+                # Display the data in a table
+                st.table(table_data)
+            else:
+                st.write('Invalid scrape option. Please choose "data" or "links".')
+        else:
+            st.write(f'Error: {response.status}')
+    except Exception as e:
+        st.write(f'An error occurred: {e}')
+# Streamlit UI
+st.title("Web Scraping Tool")
+website_url = st.text_input("Enter the URL to scrape:")
+scrape_option = st.selectbox("Select what to scrape:", ['data', 'links'])
+if st.button("Scrape"):
+    simple_web_scraper(website_url, scrape_option)

requirements.txt ADDED Viewed

	@@ -0,0 +1,56 @@

+altair==5.2.0
+attrs==23.1.0
+beautifulsoup4==4.12.2
+blinker==1.7.0
+cachetools==5.3.2
+certifi==2023.11.17
+charset-normalizer==3.3.2
+click==8.1.7
+filelock==3.13.1
+fsspec==2023.12.2
+gitdb==4.0.11
+GitPython==3.1.40
+huggingface-hub==0.19.4
+idna==3.6
+importlib-metadata==6.11.0
+Jinja2==3.1.2
+jsonschema==4.20.0
+jsonschema-specifications==2023.11.2
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+mdurl==0.1.2
+numpy==1.26.2
+packaging==23.2
+pandas==2.1.4
+Pillow==10.1.0
+protobuf==4.25.1
+pyarrow==14.0.1
+pydeck==0.8.1b0
+Pygments==2.17.2
+python-dateutil==2.8.2
+pytz==2023.3.post1
+PyYAML==6.0.1
+referencing==0.32.0
+regex==2023.10.3
+requests==2.31.0
+rich==13.7.0
+rpds-py==0.13.2
+safetensors==0.4.1
+six==1.16.0
+smmap==5.0.1
+soupsieve==2.5
+streamlit==1.29.0
+tenacity==8.2.3
+tokenizers==0.15.0
+toml==0.10.2
+toolz==0.12.0
+tornado==6.4
+tqdm==4.66.1
+transformers==4.36.0
+typing_extensions==4.9.0
+tzdata==2023.3
+tzlocal==5.2
+urllib3==2.1.0
+validators==0.22.0
+watchdog==3.0.0
+zipp==3.17.0