isana25 commited on
Commit
7cf2554
Β·
verified Β·
1 Parent(s): 5c08438

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +458 -0
app.py ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import gradio as gr
4
+ import re
5
+ import warnings
6
+ warnings.filterwarnings('ignore')
7
+
8
+ def fetch_html_content(input_text):
9
+ """
10
+ Fetch HTML content from a URL or process raw HTML input
11
+ Returns the HTML content and source type for better user feedback
12
+ """
13
+ input_text = input_text.strip()
14
+
15
+ # Check if input looks like HTML content
16
+ if input_text.startswith('<') and '>' in input_text:
17
+ return input_text, "raw_html"
18
+
19
+ # Check if input looks like a URL
20
+ if input_text.startswith(('http://', 'https://', 'www.')):
21
+ try:
22
+ # Add protocol if missing
23
+ if input_text.startswith('www.'):
24
+ input_text = 'https://' + input_text
25
+
26
+ # Fetch the webpage
27
+ headers = {
28
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
29
+ }
30
+ response = requests.get(input_text, headers=headers, timeout=10)
31
+
32
+ if response.status_code == 200:
33
+ return response.text, "url"
34
+ else:
35
+ return None, f"HTTP Error {response.status_code}"
36
+
37
+ except requests.exceptions.RequestException as e:
38
+ return None, f"Request failed: {str(e)}"
39
+
40
+ return None, "Invalid input format"
41
+
42
+ def check_alt_text(soup, base_url=""):
43
+ """
44
+ Comprehensive check for image alt text accessibility
45
+ Returns detailed issues and suggestions for improvement
46
+ """
47
+ issues = []
48
+ suggestions = []
49
+
50
+ # Find all image elements
51
+ images = soup.find_all('img')
52
+
53
+ if not images:
54
+ return [], ["Consider adding relevant images with proper alt text to enhance content."]
55
+
56
+ for i, img in enumerate(images, 1):
57
+ alt = img.get('alt')
58
+ src = img.get('src', 'Unknown source')
59
+
60
+ # Check for missing alt attribute
61
+ if alt is None:
62
+ issues.append({
63
+ 'type': 'Missing Alt Attribute',
64
+ 'element': f'Image {i}',
65
+ 'description': f'Image with src="{src}" has no alt attribute',
66
+ 'severity': 'High',
67
+ 'wcag': 'WCAG 2.1 Level A - 1.1.1'
68
+ })
69
+
70
+ # Check for empty alt text (which is valid for decorative images)
71
+ elif alt.strip() == '':
72
+ issues.append({
73
+ 'type': 'Empty Alt Text',
74
+ 'element': f'Image {i}',
75
+ 'description': f'Image with src="{src}" has empty alt text (okay if decorative)',
76
+ 'severity': 'Medium',
77
+ 'wcag': 'WCAG 2.1 Level A - 1.1.1'
78
+ })
79
+
80
+ # Check for poor alt text practices
81
+ elif alt.strip().lower() in ['image', 'picture', 'photo', 'img']:
82
+ issues.append({
83
+ 'type': 'Generic Alt Text',
84
+ 'element': f'Image {i}',
85
+ 'description': f'Alt text "{alt}" is too generic and not descriptive',
86
+ 'severity': 'Medium',
87
+ 'wcag': 'WCAG 2.1 Level A - 1.1.1'
88
+ })
89
+
90
+ # Generate suggestions based on issues found
91
+ if issues:
92
+ suggestions.extend([
93
+ "Write descriptive alt text that conveys the meaning and context of images",
94
+ "Use empty alt='' for purely decorative images",
95
+ "Avoid generic terms like 'image', 'picture', or 'photo'",
96
+ "Keep alt text concise but meaningful (aim for 125 characters or less)"
97
+ ])
98
+
99
+ return issues, suggestions
100
+
101
+ def check_font_sizes(soup):
102
+ """
103
+ Analyze font sizes and readability issues in the HTML content
104
+ """
105
+ issues = []
106
+ suggestions = []
107
+
108
+ # Check inline styles for font-size
109
+ elements_with_styles = soup.find_all(style=True)
110
+ small_fonts_found = False
111
+
112
+ for i, element in enumerate(elements_with_styles, 1):
113
+ style = element.get('style', '')
114
+
115
+ # Look for font-size declarations
116
+ font_size_match = re.search(r'font-size:\s*(\d+(?:\.\d+)?)(px|pt|em|rem|%)', style, re.IGNORECASE)
117
+
118
+ if font_size_match:
119
+ size_value = float(font_size_match.group(1))
120
+ unit = font_size_match.group(2).lower()
121
+
122
+ # Convert to approximate pixel values for comparison
123
+ if unit == 'px':
124
+ pixel_size = size_value
125
+ elif unit == 'pt':
126
+ pixel_size = size_value * 1.33 # Rough conversion
127
+ elif unit in ['em', 'rem']:
128
+ pixel_size = size_value * 16 # Assuming 16px base
129
+ elif unit == '%':
130
+ pixel_size = (size_value / 100) * 16 # Assuming 16px base
131
+ else:
132
+ continue
133
+
134
+ # Flag very small fonts (less than 12px equivalent)
135
+ if pixel_size < 12:
136
+ issues.append({
137
+ 'type': 'Small Font Size',
138
+ 'element': f'Element {i} ({element.name})',
139
+ 'description': f'Font size {size_value}{unit} (β‰ˆ{pixel_size:.1f}px) may be too small for readability',
140
+ 'severity': 'Medium',
141
+ 'wcag': 'WCAG 2.1 Level AA - 1.4.4'
142
+ })
143
+ small_fonts_found = True
144
+
145
+ # Check for potential readability issues with text content
146
+ text_elements = soup.find_all(['p', 'div', 'span', 'li', 'td', 'th'])
147
+ long_paragraphs = 0
148
+
149
+ for element in text_elements:
150
+ text = element.get_text(strip=True)
151
+ if len(text) > 500: # Very long text blocks
152
+ long_paragraphs += 1
153
+
154
+ if long_paragraphs > 0:
155
+ issues.append({
156
+ 'type': 'Long Text Blocks',
157
+ 'element': f'{long_paragraphs} elements',
158
+ 'description': f'Found {long_paragraphs} very long text blocks that may hurt readability',
159
+ 'severity': 'Low',
160
+ 'wcag': 'WCAG 2.1 Level AAA - 2.4.8'
161
+ })
162
+
163
+ # Generate suggestions
164
+ if small_fonts_found:
165
+ suggestions.extend([
166
+ "Use minimum 12px font size for body text (14px+ recommended)",
167
+ "Ensure text can be zoomed to 200% without loss of functionality",
168
+ "Test readability on different devices and screen sizes"
169
+ ])
170
+
171
+ if long_paragraphs > 0:
172
+ suggestions.append("Break up long text blocks with headings, bullet points, or shorter paragraphs")
173
+
174
+ return issues, suggestions
175
+
176
+ def check_color_contrast(soup):
177
+ """
178
+ Analyze color contrast issues in the HTML content
179
+ This is a simplified heuristic check - full WCAG compliance requires more complex calculations
180
+ """
181
+ issues = []
182
+ suggestions = []
183
+
184
+ # Check elements with inline color styles
185
+ elements_with_styles = soup.find_all(style=True)
186
+ contrast_issues_found = False
187
+
188
+ for i, element in enumerate(elements_with_styles, 1):
189
+ style = element.get('style', '')
190
+
191
+ # Extract color and background-color
192
+ color_match = re.search(r'color:\s*([^;]+)', style, re.IGNORECASE)
193
+ bg_match = re.search(r'background-color:\s*([^;]+)', style, re.IGNORECASE)
194
+
195
+ if color_match and bg_match:
196
+ text_color = color_match.group(1).strip()
197
+ bg_color = bg_match.group(1).strip()
198
+
199
+ # Simplified contrast check using color name heuristics
200
+ light_colors = ['white', '#fff', '#ffffff', '#f0f0f0', '#e0e0e0',
201
+ 'yellow', 'lightyellow', 'lightgray', 'lightgrey', 'beige']
202
+ dark_colors = ['black', '#000', '#000000', '#333', '#666',
203
+ 'navy', 'darkblue', 'darkgreen', 'darkred', 'purple']
204
+
205
+ text_is_light = any(light in text_color.lower() for light in light_colors)
206
+ text_is_dark = any(dark in text_color.lower() for dark in dark_colors)
207
+ bg_is_light = any(light in bg_color.lower() for light in light_colors)
208
+ bg_is_dark = any(dark in bg_color.lower() for dark in dark_colors)
209
+
210
+ # Flag potential contrast issues
211
+ if (text_is_light and bg_is_light) or (text_is_dark and bg_is_dark):
212
+ issues.append({
213
+ 'type': 'Poor Color Contrast',
214
+ 'element': f'Element {i} ({element.name})',
215
+ 'description': f'Text color "{text_color}" on background "{bg_color}" may have insufficient contrast',
216
+ 'severity': 'High',
217
+ 'wcag': 'WCAG 2.1 Level AA - 1.4.3'
218
+ })
219
+ contrast_issues_found = True
220
+
221
+ # Check for color-only information conveyance
222
+ elements_with_color = soup.find_all(lambda tag: tag.get('style') and 'color:' in tag.get('style', ''))
223
+ if len(elements_with_color) > 5: # Arbitrary threshold
224
+ issues.append({
225
+ 'type': 'Color Dependency',
226
+ 'element': f'{len(elements_with_color)} elements',
227
+ 'description': 'Heavy reliance on color - ensure information is also conveyed through other means',
228
+ 'severity': 'Medium',
229
+ 'wcag': 'WCAG 2.1 Level A - 1.4.1'
230
+ })
231
+
232
+ # Generate suggestions
233
+ if contrast_issues_found:
234
+ suggestions.extend([
235
+ "Ensure text has at least 4.5:1 contrast ratio with background (7:1 for AAA compliance)",
236
+ "Test colors using online contrast checkers",
237
+ "Avoid using similar shades for text and background"
238
+ ])
239
+
240
+ if len(elements_with_color) > 5:
241
+ suggestions.append("Don't rely solely on color to convey information - use icons, text, or patterns too")
242
+
243
+ return issues, suggestions
244
+
245
+ def analyze_accessibility(input_text):
246
+ """
247
+ Main function to analyze HTML content for accessibility issues
248
+ """
249
+ if not input_text.strip():
250
+ return "Please provide a URL or HTML content to analyze.", "Ready to analyze...", 0
251
+
252
+ # Fetch HTML content
253
+ html_content, source_type = fetch_html_content(input_text)
254
+
255
+ if html_content is None:
256
+ return f"Error: {source_type}", "Error occurred", 0
257
+
258
+ # Parse HTML
259
+ try:
260
+ soup = BeautifulSoup(html_content, 'html.parser')
261
+ except Exception as e:
262
+ return f"Error parsing HTML: {str(e)}", "Parsing error", 0
263
+
264
+ # Perform accessibility checks
265
+ alt_issues, alt_suggestions = check_alt_text(soup)
266
+ font_issues, font_suggestions = check_font_sizes(soup)
267
+ color_issues, color_suggestions = check_color_contrast(soup)
268
+
269
+ # Combine all issues
270
+ all_issues = alt_issues + font_issues + color_issues
271
+ all_suggestions = list(set(alt_suggestions + font_suggestions + color_suggestions)) # Remove duplicates
272
+
273
+ # Generate report
274
+ if not all_issues:
275
+ report = "πŸŽ‰ **Great news!** No major accessibility issues detected.\n\n"
276
+ report += "However, consider these general best practices:\n"
277
+ for suggestion in all_suggestions[:3]: # Show top 3 general suggestions
278
+ report += f"β€’ {suggestion}\n"
279
+ return report, "βœ… Accessibility check completed successfully!", len(all_issues)
280
+
281
+ # Create detailed report
282
+ report = f"## πŸ” Accessibility Analysis Report\n\n"
283
+ report += f"**Issues Found:** {len(all_issues)}\n"
284
+ report += f"**Source:** {source_type.replace('_', ' ').title()}\n\n"
285
+
286
+ # Group issues by severity
287
+ high_issues = [issue for issue in all_issues if issue['severity'] == 'High']
288
+ medium_issues = [issue for issue in all_issues if issue['severity'] == 'Medium']
289
+ low_issues = [issue for issue in all_issues if issue['severity'] == 'Low']
290
+
291
+ # Report high priority issues first
292
+ if high_issues:
293
+ report += "### 🚨 High Priority Issues\n"
294
+ for issue in high_issues:
295
+ report += f"**{issue['type']}** - {issue['element']}\n"
296
+ report += f"β€’ {issue['description']}\n"
297
+ report += f"β€’ Standard: {issue['wcag']}\n\n"
298
+
299
+ if medium_issues:
300
+ report += "### ⚠️ Medium Priority Issues\n"
301
+ for issue in medium_issues:
302
+ report += f"**{issue['type']}** - {issue['element']}\n"
303
+ report += f"β€’ {issue['description']}\n"
304
+ report += f"β€’ Standard: {issue['wcag']}\n\n"
305
+
306
+ if low_issues:
307
+ report += "### πŸ’‘ Low Priority Issues\n"
308
+ for issue in low_issues:
309
+ report += f"**{issue['type']}** - {issue['element']}\n"
310
+ report += f"β€’ {issue['description']}\n"
311
+ report += f"β€’ Standard: {issue['wcag']}\n\n"
312
+
313
+ # Add suggestions
314
+ if all_suggestions:
315
+ report += "## πŸ› οΈ Recommended Actions\n\n"
316
+ for i, suggestion in enumerate(all_suggestions, 1):
317
+ report += f"{i}. {suggestion}\n"
318
+
319
+ return report, f"Analysis completed! Found {len(all_issues)} accessibility issues.", len(all_issues)
320
+
321
+ def create_accessibility_interface():
322
+ """
323
+ Create the Gradio interface for the accessibility checker
324
+ """
325
+
326
+ # Custom CSS for better styling
327
+ css = """
328
+ .gradio-container {
329
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
330
+ }
331
+ .report-box {
332
+ background-color: black;
333
+ border-radius: 8px;
334
+ padding: 15px;
335
+ border: 1px solid #dee2e6;
336
+ }
337
+ """
338
+
339
+ with gr.Blocks(title="🌐 Accessibility Checker", theme=gr.themes.Soft(), css=css) as demo:
340
+
341
+ gr.Markdown("""
342
+ # 🌐 Personalized Accessibility Checker
343
+
344
+ **Make the web more inclusive!** This tool analyzes web pages and HTML content for accessibility issues,
345
+ helping you create content that's usable by everyone, including people with disabilities.
346
+
347
+ ### What We Check:
348
+ - πŸ–ΌοΈ **Alt text** for images (screen reader compatibility)
349
+ - πŸ“ **Font sizes** and readability
350
+ - 🎨 **Color contrast** between text and backgrounds
351
+ - πŸ“‹ **WCAG compliance** with specific guidelines
352
+
353
+ ### How to Use:
354
+ 1. Enter a website URL (e.g., `https://example.com`) **OR** paste HTML content
355
+ 2. Click "Analyze Accessibility"
356
+ 3. Review the detailed report and suggestions
357
+ """)
358
+
359
+ with gr.Row():
360
+ with gr.Column(scale=2):
361
+ input_box = gr.Textbox(
362
+ label="🌐 Website URL or HTML Content",
363
+ placeholder="Enter URL (https://example.com) or paste HTML content here...",
364
+ lines=3,
365
+ max_lines=10
366
+ )
367
+
368
+ analyze_btn = gr.Button(
369
+ "πŸ” Analyze Accessibility",
370
+ variant="primary",
371
+ size="lg"
372
+ )
373
+
374
+ with gr.Column(scale=1):
375
+ status_box = gr.Textbox(
376
+ label="πŸ“Š Status",
377
+ value="Ready to analyze...",
378
+ interactive=False,
379
+ lines=2
380
+ )
381
+
382
+ issue_count = gr.Number(
383
+ label="Issues Found",
384
+ value=0,
385
+ interactive=False
386
+ )
387
+
388
+ # Results area
389
+ report_output = gr.Markdown(
390
+ label="πŸ“‹ Accessibility Report",
391
+ value="Your detailed accessibility analysis will appear here...",
392
+ elem_classes=["report-box"]
393
+ )
394
+
395
+ # Example URLs section
396
+ gr.Markdown("### πŸ§ͺ Try These Example URLs:")
397
+
398
+ example_urls = [
399
+ "https://www.w3.org/WAI/", # Good accessibility
400
+ "https://example.com", # Simple site
401
+ ]
402
+
403
+ with gr.Row():
404
+ for url in example_urls:
405
+ gr.Button(f"Test: {url.split('//')[1]}", size="sm").click(
406
+ lambda u=url: u,
407
+ outputs=input_box
408
+ )
409
+
410
+ # Example HTML section
411
+ gr.Markdown("### πŸ“ Or Try This Example HTML:")
412
+
413
+ example_html = '''<html>
414
+ <body>
415
+ <h1 style="color: yellow; background-color: white;">Welcome</h1>
416
+ <img src="logo.jpg">
417
+ <p style="font-size: 8px;">This text is too small</p>
418
+ <div style="color: lightgray; background-color: white;">Poor contrast text</div>
419
+ </body>
420
+ </html>'''
421
+
422
+ gr.Button("πŸ“„ Load Example HTML", size="sm").click(
423
+ lambda: example_html,
424
+ outputs=input_box
425
+ )
426
+
427
+ # Set up event handlers
428
+ analyze_btn.click(
429
+ fn=analyze_accessibility,
430
+ inputs=[input_box],
431
+ outputs=[report_output, status_box, issue_count]
432
+ )
433
+
434
+ # Real-time analysis (optional)
435
+ input_box.change(
436
+ fn=lambda text: ("Ready to analyze..." if text.strip() else "Enter URL or HTML content"),
437
+ inputs=[input_box],
438
+ outputs=[status_box]
439
+ )
440
+
441
+ # Footer with resources
442
+ gr.Markdown("""
443
+ ---
444
+ ### πŸ“š Accessibility Resources:
445
+ - **WCAG Guidelines**: Web Content Accessibility Guidelines 2.1
446
+ - **Color Contrast**: Aim for 4.5:1 ratio (normal text) or 3:1 (large text)
447
+ - **Alt Text**: Describe the content and function of images
448
+ - **Font Size**: Minimum 12px, recommended 14px+ for body text
449
+
450
+ *This tool provides basic accessibility checking. For comprehensive testing, use specialized tools and manual testing.*
451
+ """)
452
+
453
+ return demo
454
+
455
+ # Create and launch the interface
456
+ if __name__ == "__main__":
457
+ demo = create_accessibility_interface()
458
+ demo.launch()