import requests from bs4 import BeautifulSoup import gradio as gr import re import warnings warnings.filterwarnings('ignore') def fetch_html_content(input_text): """ Fetch HTML content from a URL or process raw HTML input Returns the HTML content and source type for better user feedback """ input_text = input_text.strip() # Check if input looks like HTML content if input_text.startswith('<') and '>' in input_text: return input_text, "raw_html" # Check if input looks like a URL if input_text.startswith(('http://', 'https://', 'www.')): try: # Add protocol if missing if input_text.startswith('www.'): input_text = 'https://' + input_text # Fetch the webpage headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } response = requests.get(input_text, headers=headers, timeout=10) if response.status_code == 200: return response.text, "url" else: return None, f"HTTP Error {response.status_code}" except requests.exceptions.RequestException as e: return None, f"Request failed: {str(e)}" return None, "Invalid input format" def check_alt_text(soup, base_url=""): """ Comprehensive check for image alt text accessibility Returns detailed issues and suggestions for improvement """ issues = [] suggestions = [] # Find all image elements images = soup.find_all('img') if not images: return [], ["Consider adding relevant images with proper alt text to enhance content."] for i, img in enumerate(images, 1): alt = img.get('alt') src = img.get('src', 'Unknown source') # Check for missing alt attribute if alt is None: issues.append({ 'type': 'Missing Alt Attribute', 'element': f'Image {i}', 'description': f'Image with src="{src}" has no alt attribute', 'severity': 'High', 'wcag': 'WCAG 2.1 Level A - 1.1.1' }) # Check for empty alt text (which is valid for decorative images) elif alt.strip() == '': issues.append({ 'type': 'Empty Alt Text', 'element': f'Image {i}', 'description': f'Image with src="{src}" has empty alt text (okay if decorative)', 'severity': 'Medium', 'wcag': 'WCAG 2.1 Level A - 1.1.1' }) # Check for poor alt text practices elif alt.strip().lower() in ['image', 'picture', 'photo', 'img']: issues.append({ 'type': 'Generic Alt Text', 'element': f'Image {i}', 'description': f'Alt text "{alt}" is too generic and not descriptive', 'severity': 'Medium', 'wcag': 'WCAG 2.1 Level A - 1.1.1' }) # Generate suggestions based on issues found if issues: suggestions.extend([ "Write descriptive alt text that conveys the meaning and context of images", "Use empty alt='' for purely decorative images", "Avoid generic terms like 'image', 'picture', or 'photo'", "Keep alt text concise but meaningful (aim for 125 characters or less)" ]) return issues, suggestions def check_font_sizes(soup): """ Analyze font sizes and readability issues in the HTML content """ issues = [] suggestions = [] # Check inline styles for font-size elements_with_styles = soup.find_all(style=True) small_fonts_found = False for i, element in enumerate(elements_with_styles, 1): style = element.get('style', '') # Look for font-size declarations font_size_match = re.search(r'font-size:\s*(\d+(?:\.\d+)?)(px|pt|em|rem|%)', style, re.IGNORECASE) if font_size_match: size_value = float(font_size_match.group(1)) unit = font_size_match.group(2).lower() # Convert to approximate pixel values for comparison if unit == 'px': pixel_size = size_value elif unit == 'pt': pixel_size = size_value * 1.33 # Rough conversion elif unit in ['em', 'rem']: pixel_size = size_value * 16 # Assuming 16px base elif unit == '%': pixel_size = (size_value / 100) * 16 # Assuming 16px base else: continue # Flag very small fonts (less than 12px equivalent) if pixel_size < 12: issues.append({ 'type': 'Small Font Size', 'element': f'Element {i} ({element.name})', 'description': f'Font size {size_value}{unit} (โ{pixel_size:.1f}px) may be too small for readability', 'severity': 'Medium', 'wcag': 'WCAG 2.1 Level AA - 1.4.4' }) small_fonts_found = True # Check for potential readability issues with text content text_elements = soup.find_all(['p', 'div', 'span', 'li', 'td', 'th']) long_paragraphs = 0 for element in text_elements: text = element.get_text(strip=True) if len(text) > 500: # Very long text blocks long_paragraphs += 1 if long_paragraphs > 0: issues.append({ 'type': 'Long Text Blocks', 'element': f'{long_paragraphs} elements', 'description': f'Found {long_paragraphs} very long text blocks that may hurt readability', 'severity': 'Low', 'wcag': 'WCAG 2.1 Level AAA - 2.4.8' }) # Generate suggestions if small_fonts_found: suggestions.extend([ "Use minimum 12px font size for body text (14px+ recommended)", "Ensure text can be zoomed to 200% without loss of functionality", "Test readability on different devices and screen sizes" ]) if long_paragraphs > 0: suggestions.append("Break up long text blocks with headings, bullet points, or shorter paragraphs") return issues, suggestions def check_color_contrast(soup): """ Analyze color contrast issues in the HTML content This is a simplified heuristic check - full WCAG compliance requires more complex calculations """ issues = [] suggestions = [] # Check elements with inline color styles elements_with_styles = soup.find_all(style=True) contrast_issues_found = False for i, element in enumerate(elements_with_styles, 1): style = element.get('style', '') # Extract color and background-color color_match = re.search(r'color:\s*([^;]+)', style, re.IGNORECASE) bg_match = re.search(r'background-color:\s*([^;]+)', style, re.IGNORECASE) if color_match and bg_match: text_color = color_match.group(1).strip() bg_color = bg_match.group(1).strip() # Simplified contrast check using color name heuristics light_colors = ['white', '#fff', '#ffffff', '#f0f0f0', '#e0e0e0', 'yellow', 'lightyellow', 'lightgray', 'lightgrey', 'beige'] dark_colors = ['black', '#000', '#000000', '#333', '#666', 'navy', 'darkblue', 'darkgreen', 'darkred', 'purple'] text_is_light = any(light in text_color.lower() for light in light_colors) text_is_dark = any(dark in text_color.lower() for dark in dark_colors) bg_is_light = any(light in bg_color.lower() for light in light_colors) bg_is_dark = any(dark in bg_color.lower() for dark in dark_colors) # Flag potential contrast issues if (text_is_light and bg_is_light) or (text_is_dark and bg_is_dark): issues.append({ 'type': 'Poor Color Contrast', 'element': f'Element {i} ({element.name})', 'description': f'Text color "{text_color}" on background "{bg_color}" may have insufficient contrast', 'severity': 'High', 'wcag': 'WCAG 2.1 Level AA - 1.4.3' }) contrast_issues_found = True # Check for color-only information conveyance elements_with_color = soup.find_all(lambda tag: tag.get('style') and 'color:' in tag.get('style', '')) if len(elements_with_color) > 5: # Arbitrary threshold issues.append({ 'type': 'Color Dependency', 'element': f'{len(elements_with_color)} elements', 'description': 'Heavy reliance on color - ensure information is also conveyed through other means', 'severity': 'Medium', 'wcag': 'WCAG 2.1 Level A - 1.4.1' }) # Generate suggestions if contrast_issues_found: suggestions.extend([ "Ensure text has at least 4.5:1 contrast ratio with background (7:1 for AAA compliance)", "Test colors using online contrast checkers", "Avoid using similar shades for text and background" ]) if len(elements_with_color) > 5: suggestions.append("Don't rely solely on color to convey information - use icons, text, or patterns too") return issues, suggestions def analyze_accessibility(input_text): """ Main function to analyze HTML content for accessibility issues """ if not input_text.strip(): return "Please provide a URL or HTML content to analyze.", "Ready to analyze...", 0 # Fetch HTML content html_content, source_type = fetch_html_content(input_text) if html_content is None: return f"Error: {source_type}", "Error occurred", 0 # Parse HTML try: soup = BeautifulSoup(html_content, 'html.parser') except Exception as e: return f"Error parsing HTML: {str(e)}", "Parsing error", 0 # Perform accessibility checks alt_issues, alt_suggestions = check_alt_text(soup) font_issues, font_suggestions = check_font_sizes(soup) color_issues, color_suggestions = check_color_contrast(soup) # Combine all issues all_issues = alt_issues + font_issues + color_issues all_suggestions = list(set(alt_suggestions + font_suggestions + color_suggestions)) # Remove duplicates # Generate report if not all_issues: report = "๐ **Great news!** No major accessibility issues detected.\n\n" report += "However, consider these general best practices:\n" for suggestion in all_suggestions[:3]: # Show top 3 general suggestions report += f"โข {suggestion}\n" return report, "โ Accessibility check completed successfully!", len(all_issues) # Create detailed report report = f"## ๐ Accessibility Analysis Report\n\n" report += f"**Issues Found:** {len(all_issues)}\n" report += f"**Source:** {source_type.replace('_', ' ').title()}\n\n" # Group issues by severity high_issues = [issue for issue in all_issues if issue['severity'] == 'High'] medium_issues = [issue for issue in all_issues if issue['severity'] == 'Medium'] low_issues = [issue for issue in all_issues if issue['severity'] == 'Low'] # Report high priority issues first if high_issues: report += "### ๐จ High Priority Issues\n" for issue in high_issues: report += f"**{issue['type']}** - {issue['element']}\n" report += f"โข {issue['description']}\n" report += f"โข Standard: {issue['wcag']}\n\n" if medium_issues: report += "### โ ๏ธ Medium Priority Issues\n" for issue in medium_issues: report += f"**{issue['type']}** - {issue['element']}\n" report += f"โข {issue['description']}\n" report += f"โข Standard: {issue['wcag']}\n\n" if low_issues: report += "### ๐ก Low Priority Issues\n" for issue in low_issues: report += f"**{issue['type']}** - {issue['element']}\n" report += f"โข {issue['description']}\n" report += f"โข Standard: {issue['wcag']}\n\n" # Add suggestions if all_suggestions: report += "## ๐ ๏ธ Recommended Actions\n\n" for i, suggestion in enumerate(all_suggestions, 1): report += f"{i}. {suggestion}\n" return report, f"Analysis completed! Found {len(all_issues)} accessibility issues.", len(all_issues) def create_accessibility_interface(): """ Create the Gradio interface for the accessibility checker """ # Custom CSS for better styling css = """ .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; } .report-box { background-color: black; border-radius: 8px; padding: 15px; border: 1px solid #dee2e6; } """ with gr.Blocks(title="๐ Accessibility Checker", theme=gr.themes.Soft(), css=css) as demo: gr.Markdown(""" # ๐ Personalized Accessibility Checker **Make the web more inclusive!** This tool analyzes web pages and HTML content for accessibility issues, helping you create content that's usable by everyone, including people with disabilities. ### What We Check: - ๐ผ๏ธ **Alt text** for images (screen reader compatibility) - ๐ **Font sizes** and readability - ๐จ **Color contrast** between text and backgrounds - ๐ **WCAG compliance** with specific guidelines ### How to Use: 1. Enter a website URL (e.g., `https://example.com`) **OR** paste HTML content 2. Click "Analyze Accessibility" 3. Review the detailed report and suggestions """) with gr.Row(): with gr.Column(scale=2): input_box = gr.Textbox( label="๐ Website URL or HTML Content", placeholder="Enter URL (https://example.com) or paste HTML content here...", lines=3, max_lines=10 ) analyze_btn = gr.Button( "๐ Analyze Accessibility", variant="primary", size="lg" ) with gr.Column(scale=1): status_box = gr.Textbox( label="๐ Status", value="Ready to analyze...", interactive=False, lines=2 ) issue_count = gr.Number( label="Issues Found", value=0, interactive=False ) # Results area report_output = gr.Markdown( label="๐ Accessibility Report", value="Your detailed accessibility analysis will appear here...", elem_classes=["report-box"] ) # Example URLs section gr.Markdown("### ๐งช Try These Example URLs:") example_urls = [ "https://www.w3.org/WAI/", # Good accessibility "https://example.com", # Simple site ] with gr.Row(): for url in example_urls: gr.Button(f"Test: {url.split('//')[1]}", size="sm").click( lambda u=url: u, outputs=input_box ) # Example HTML section gr.Markdown("### ๐ Or Try This Example HTML:") example_html = '''
This text is too small