isana25's picture
Create app.py
7cf2554 verified
import requests
from bs4 import BeautifulSoup
import gradio as gr
import re
import warnings
warnings.filterwarnings('ignore')
def fetch_html_content(input_text):
"""
Fetch HTML content from a URL or process raw HTML input
Returns the HTML content and source type for better user feedback
"""
input_text = input_text.strip()
# Check if input looks like HTML content
if input_text.startswith('<') and '>' in input_text:
return input_text, "raw_html"
# Check if input looks like a URL
if input_text.startswith(('http://', 'https://', 'www.')):
try:
# Add protocol if missing
if input_text.startswith('www.'):
input_text = 'https://' + input_text
# Fetch the webpage
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(input_text, headers=headers, timeout=10)
if response.status_code == 200:
return response.text, "url"
else:
return None, f"HTTP Error {response.status_code}"
except requests.exceptions.RequestException as e:
return None, f"Request failed: {str(e)}"
return None, "Invalid input format"
def check_alt_text(soup, base_url=""):
"""
Comprehensive check for image alt text accessibility
Returns detailed issues and suggestions for improvement
"""
issues = []
suggestions = []
# Find all image elements
images = soup.find_all('img')
if not images:
return [], ["Consider adding relevant images with proper alt text to enhance content."]
for i, img in enumerate(images, 1):
alt = img.get('alt')
src = img.get('src', 'Unknown source')
# Check for missing alt attribute
if alt is None:
issues.append({
'type': 'Missing Alt Attribute',
'element': f'Image {i}',
'description': f'Image with src="{src}" has no alt attribute',
'severity': 'High',
'wcag': 'WCAG 2.1 Level A - 1.1.1'
})
# Check for empty alt text (which is valid for decorative images)
elif alt.strip() == '':
issues.append({
'type': 'Empty Alt Text',
'element': f'Image {i}',
'description': f'Image with src="{src}" has empty alt text (okay if decorative)',
'severity': 'Medium',
'wcag': 'WCAG 2.1 Level A - 1.1.1'
})
# Check for poor alt text practices
elif alt.strip().lower() in ['image', 'picture', 'photo', 'img']:
issues.append({
'type': 'Generic Alt Text',
'element': f'Image {i}',
'description': f'Alt text "{alt}" is too generic and not descriptive',
'severity': 'Medium',
'wcag': 'WCAG 2.1 Level A - 1.1.1'
})
# Generate suggestions based on issues found
if issues:
suggestions.extend([
"Write descriptive alt text that conveys the meaning and context of images",
"Use empty alt='' for purely decorative images",
"Avoid generic terms like 'image', 'picture', or 'photo'",
"Keep alt text concise but meaningful (aim for 125 characters or less)"
])
return issues, suggestions
def check_font_sizes(soup):
"""
Analyze font sizes and readability issues in the HTML content
"""
issues = []
suggestions = []
# Check inline styles for font-size
elements_with_styles = soup.find_all(style=True)
small_fonts_found = False
for i, element in enumerate(elements_with_styles, 1):
style = element.get('style', '')
# Look for font-size declarations
font_size_match = re.search(r'font-size:\s*(\d+(?:\.\d+)?)(px|pt|em|rem|%)', style, re.IGNORECASE)
if font_size_match:
size_value = float(font_size_match.group(1))
unit = font_size_match.group(2).lower()
# Convert to approximate pixel values for comparison
if unit == 'px':
pixel_size = size_value
elif unit == 'pt':
pixel_size = size_value * 1.33 # Rough conversion
elif unit in ['em', 'rem']:
pixel_size = size_value * 16 # Assuming 16px base
elif unit == '%':
pixel_size = (size_value / 100) * 16 # Assuming 16px base
else:
continue
# Flag very small fonts (less than 12px equivalent)
if pixel_size < 12:
issues.append({
'type': 'Small Font Size',
'element': f'Element {i} ({element.name})',
'description': f'Font size {size_value}{unit} (β‰ˆ{pixel_size:.1f}px) may be too small for readability',
'severity': 'Medium',
'wcag': 'WCAG 2.1 Level AA - 1.4.4'
})
small_fonts_found = True
# Check for potential readability issues with text content
text_elements = soup.find_all(['p', 'div', 'span', 'li', 'td', 'th'])
long_paragraphs = 0
for element in text_elements:
text = element.get_text(strip=True)
if len(text) > 500: # Very long text blocks
long_paragraphs += 1
if long_paragraphs > 0:
issues.append({
'type': 'Long Text Blocks',
'element': f'{long_paragraphs} elements',
'description': f'Found {long_paragraphs} very long text blocks that may hurt readability',
'severity': 'Low',
'wcag': 'WCAG 2.1 Level AAA - 2.4.8'
})
# Generate suggestions
if small_fonts_found:
suggestions.extend([
"Use minimum 12px font size for body text (14px+ recommended)",
"Ensure text can be zoomed to 200% without loss of functionality",
"Test readability on different devices and screen sizes"
])
if long_paragraphs > 0:
suggestions.append("Break up long text blocks with headings, bullet points, or shorter paragraphs")
return issues, suggestions
def check_color_contrast(soup):
"""
Analyze color contrast issues in the HTML content
This is a simplified heuristic check - full WCAG compliance requires more complex calculations
"""
issues = []
suggestions = []
# Check elements with inline color styles
elements_with_styles = soup.find_all(style=True)
contrast_issues_found = False
for i, element in enumerate(elements_with_styles, 1):
style = element.get('style', '')
# Extract color and background-color
color_match = re.search(r'color:\s*([^;]+)', style, re.IGNORECASE)
bg_match = re.search(r'background-color:\s*([^;]+)', style, re.IGNORECASE)
if color_match and bg_match:
text_color = color_match.group(1).strip()
bg_color = bg_match.group(1).strip()
# Simplified contrast check using color name heuristics
light_colors = ['white', '#fff', '#ffffff', '#f0f0f0', '#e0e0e0',
'yellow', 'lightyellow', 'lightgray', 'lightgrey', 'beige']
dark_colors = ['black', '#000', '#000000', '#333', '#666',
'navy', 'darkblue', 'darkgreen', 'darkred', 'purple']
text_is_light = any(light in text_color.lower() for light in light_colors)
text_is_dark = any(dark in text_color.lower() for dark in dark_colors)
bg_is_light = any(light in bg_color.lower() for light in light_colors)
bg_is_dark = any(dark in bg_color.lower() for dark in dark_colors)
# Flag potential contrast issues
if (text_is_light and bg_is_light) or (text_is_dark and bg_is_dark):
issues.append({
'type': 'Poor Color Contrast',
'element': f'Element {i} ({element.name})',
'description': f'Text color "{text_color}" on background "{bg_color}" may have insufficient contrast',
'severity': 'High',
'wcag': 'WCAG 2.1 Level AA - 1.4.3'
})
contrast_issues_found = True
# Check for color-only information conveyance
elements_with_color = soup.find_all(lambda tag: tag.get('style') and 'color:' in tag.get('style', ''))
if len(elements_with_color) > 5: # Arbitrary threshold
issues.append({
'type': 'Color Dependency',
'element': f'{len(elements_with_color)} elements',
'description': 'Heavy reliance on color - ensure information is also conveyed through other means',
'severity': 'Medium',
'wcag': 'WCAG 2.1 Level A - 1.4.1'
})
# Generate suggestions
if contrast_issues_found:
suggestions.extend([
"Ensure text has at least 4.5:1 contrast ratio with background (7:1 for AAA compliance)",
"Test colors using online contrast checkers",
"Avoid using similar shades for text and background"
])
if len(elements_with_color) > 5:
suggestions.append("Don't rely solely on color to convey information - use icons, text, or patterns too")
return issues, suggestions
def analyze_accessibility(input_text):
"""
Main function to analyze HTML content for accessibility issues
"""
if not input_text.strip():
return "Please provide a URL or HTML content to analyze.", "Ready to analyze...", 0
# Fetch HTML content
html_content, source_type = fetch_html_content(input_text)
if html_content is None:
return f"Error: {source_type}", "Error occurred", 0
# Parse HTML
try:
soup = BeautifulSoup(html_content, 'html.parser')
except Exception as e:
return f"Error parsing HTML: {str(e)}", "Parsing error", 0
# Perform accessibility checks
alt_issues, alt_suggestions = check_alt_text(soup)
font_issues, font_suggestions = check_font_sizes(soup)
color_issues, color_suggestions = check_color_contrast(soup)
# Combine all issues
all_issues = alt_issues + font_issues + color_issues
all_suggestions = list(set(alt_suggestions + font_suggestions + color_suggestions)) # Remove duplicates
# Generate report
if not all_issues:
report = "πŸŽ‰ **Great news!** No major accessibility issues detected.\n\n"
report += "However, consider these general best practices:\n"
for suggestion in all_suggestions[:3]: # Show top 3 general suggestions
report += f"β€’ {suggestion}\n"
return report, "βœ… Accessibility check completed successfully!", len(all_issues)
# Create detailed report
report = f"## πŸ” Accessibility Analysis Report\n\n"
report += f"**Issues Found:** {len(all_issues)}\n"
report += f"**Source:** {source_type.replace('_', ' ').title()}\n\n"
# Group issues by severity
high_issues = [issue for issue in all_issues if issue['severity'] == 'High']
medium_issues = [issue for issue in all_issues if issue['severity'] == 'Medium']
low_issues = [issue for issue in all_issues if issue['severity'] == 'Low']
# Report high priority issues first
if high_issues:
report += "### 🚨 High Priority Issues\n"
for issue in high_issues:
report += f"**{issue['type']}** - {issue['element']}\n"
report += f"β€’ {issue['description']}\n"
report += f"β€’ Standard: {issue['wcag']}\n\n"
if medium_issues:
report += "### ⚠️ Medium Priority Issues\n"
for issue in medium_issues:
report += f"**{issue['type']}** - {issue['element']}\n"
report += f"β€’ {issue['description']}\n"
report += f"β€’ Standard: {issue['wcag']}\n\n"
if low_issues:
report += "### πŸ’‘ Low Priority Issues\n"
for issue in low_issues:
report += f"**{issue['type']}** - {issue['element']}\n"
report += f"β€’ {issue['description']}\n"
report += f"β€’ Standard: {issue['wcag']}\n\n"
# Add suggestions
if all_suggestions:
report += "## πŸ› οΈ Recommended Actions\n\n"
for i, suggestion in enumerate(all_suggestions, 1):
report += f"{i}. {suggestion}\n"
return report, f"Analysis completed! Found {len(all_issues)} accessibility issues.", len(all_issues)
def create_accessibility_interface():
"""
Create the Gradio interface for the accessibility checker
"""
# Custom CSS for better styling
css = """
.gradio-container {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
.report-box {
background-color: black;
border-radius: 8px;
padding: 15px;
border: 1px solid #dee2e6;
}
"""
with gr.Blocks(title="🌐 Accessibility Checker", theme=gr.themes.Soft(), css=css) as demo:
gr.Markdown("""
# 🌐 Personalized Accessibility Checker
**Make the web more inclusive!** This tool analyzes web pages and HTML content for accessibility issues,
helping you create content that's usable by everyone, including people with disabilities.
### What We Check:
- πŸ–ΌοΈ **Alt text** for images (screen reader compatibility)
- πŸ“ **Font sizes** and readability
- 🎨 **Color contrast** between text and backgrounds
- πŸ“‹ **WCAG compliance** with specific guidelines
### How to Use:
1. Enter a website URL (e.g., `https://example.com`) **OR** paste HTML content
2. Click "Analyze Accessibility"
3. Review the detailed report and suggestions
""")
with gr.Row():
with gr.Column(scale=2):
input_box = gr.Textbox(
label="🌐 Website URL or HTML Content",
placeholder="Enter URL (https://example.com) or paste HTML content here...",
lines=3,
max_lines=10
)
analyze_btn = gr.Button(
"πŸ” Analyze Accessibility",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
status_box = gr.Textbox(
label="πŸ“Š Status",
value="Ready to analyze...",
interactive=False,
lines=2
)
issue_count = gr.Number(
label="Issues Found",
value=0,
interactive=False
)
# Results area
report_output = gr.Markdown(
label="πŸ“‹ Accessibility Report",
value="Your detailed accessibility analysis will appear here...",
elem_classes=["report-box"]
)
# Example URLs section
gr.Markdown("### πŸ§ͺ Try These Example URLs:")
example_urls = [
"https://www.w3.org/WAI/", # Good accessibility
"https://example.com", # Simple site
]
with gr.Row():
for url in example_urls:
gr.Button(f"Test: {url.split('//')[1]}", size="sm").click(
lambda u=url: u,
outputs=input_box
)
# Example HTML section
gr.Markdown("### πŸ“ Or Try This Example HTML:")
example_html = '''<html>
<body>
<h1 style="color: yellow; background-color: white;">Welcome</h1>
<img src="logo.jpg">
<p style="font-size: 8px;">This text is too small</p>
<div style="color: lightgray; background-color: white;">Poor contrast text</div>
</body>
</html>'''
gr.Button("πŸ“„ Load Example HTML", size="sm").click(
lambda: example_html,
outputs=input_box
)
# Set up event handlers
analyze_btn.click(
fn=analyze_accessibility,
inputs=[input_box],
outputs=[report_output, status_box, issue_count]
)
# Real-time analysis (optional)
input_box.change(
fn=lambda text: ("Ready to analyze..." if text.strip() else "Enter URL or HTML content"),
inputs=[input_box],
outputs=[status_box]
)
# Footer with resources
gr.Markdown("""
---
### πŸ“š Accessibility Resources:
- **WCAG Guidelines**: Web Content Accessibility Guidelines 2.1
- **Color Contrast**: Aim for 4.5:1 ratio (normal text) or 3:1 (large text)
- **Alt Text**: Describe the content and function of images
- **Font Size**: Minimum 12px, recommended 14px+ for body text
*This tool provides basic accessibility checking. For comprehensive testing, use specialized tools and manual testing.*
""")
return demo
# Create and launch the interface
if __name__ == "__main__":
demo = create_accessibility_interface()
demo.launch()