Spaces:
Runtime error
Runtime error
Chintan Donda
Adding support to get the Feedback on Answer shared by KCC-FTAs in Custom Query widget
f0af1c3
| import os | |
| import re | |
| import pandas as pd | |
| from urllib.parse import urlparse | |
| import logging | |
| logging.basicConfig( | |
| format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S" | |
| ) | |
| logger = logging.getLogger(__name__) | |
| class UTILS: | |
| def __init__(self): | |
| pass | |
| def split_text( | |
| self, | |
| text | |
| ): | |
| text = text.split(',') | |
| text = [t.strip() for t in text] | |
| return text | |
| def replace_newlines_and_spaces( | |
| self, | |
| text | |
| ): | |
| # Replace all newline characters with spaces | |
| text = text.replace("\n", " ") | |
| # Replace multiple spaces with a single space | |
| text = re.sub(r'\s+', ' ', text) | |
| return text | |
| def clean_df( | |
| self, | |
| df, | |
| dropna=True, | |
| fillna=False | |
| ): | |
| if fillna: | |
| df.fillna('', inplace=True) | |
| if dropna: | |
| df.dropna(inplace=True) | |
| # df = df[~df.isna()] | |
| df = df.drop_duplicates().reset_index(drop=True) | |
| return df | |
| def validate_url_format( | |
| self, | |
| urls, | |
| url_type='urls' | |
| ): | |
| valid_urls = [] | |
| for url in urls: | |
| result = urlparse(url) | |
| # Check if the url is valid | |
| if all([result.scheme, result.netloc]): | |
| # Online PDF urls should end with .pdf extension | |
| if url_type == 'online_pdf' and not url.endswith('.pdf'): | |
| continue | |
| valid_urls.append(url) | |
| logging.info(f'Valid URLs are: {valid_urls}') | |
| return valid_urls | |