pravinai commited on
Commit
51cbd0f
·
verified ·
1 Parent(s): 4e5af29

Add multilingual_sentiment.py

Browse files
Files changed (1) hide show
  1. multilingual_sentiment.py +590 -0
multilingual_sentiment.py ADDED
@@ -0,0 +1,590 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ SentilensAI - Multilingual Sentiment Analysis Module
4
+
5
+ Advanced multilingual sentiment analysis supporting:
6
+ - English (en)
7
+ - Spanish (es)
8
+ - Chinese (zh)
9
+ - Automatic language detection
10
+ - Language-specific sentiment models
11
+ - Cross-language sentiment comparison
12
+
13
+ Author: Pravin Selvamuthu
14
+ """
15
+
16
+ import logging
17
+ from typing import Dict, List, Optional, Any, Tuple
18
+ import re
19
+ import unicodedata
20
+ from dataclasses import dataclass
21
+ from datetime import datetime
22
+
23
+ # Multilingual NLP libraries
24
+ try:
25
+ import langdetect
26
+ from langdetect import detect, detect_langs
27
+ LANGDETECT_AVAILABLE = True
28
+ except ImportError:
29
+ LANGDETECT_AVAILABLE = False
30
+
31
+ try:
32
+ import spacy
33
+ SPACY_AVAILABLE = True
34
+ except ImportError:
35
+ SPACY_AVAILABLE = False
36
+
37
+ try:
38
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
39
+ import torch
40
+ TRANSFORMERS_AVAILABLE = True
41
+ except ImportError:
42
+ TRANSFORMERS_AVAILABLE = False
43
+
44
+ # Configure logging
45
+ logging.basicConfig(level=logging.INFO)
46
+ logger = logging.getLogger(__name__)
47
+
48
+ @dataclass
49
+ class MultilingualSentimentResult:
50
+ """Result of multilingual sentiment analysis"""
51
+ text: str
52
+ detected_language: str
53
+ language_confidence: float
54
+ sentiment: str
55
+ confidence: float
56
+ emotions: Dict[str, float]
57
+ methods_used: List[str]
58
+ language_specific_analysis: Dict[str, Any]
59
+ cross_language_consensus: Optional[Dict[str, Any]] = None
60
+
61
+ class MultilingualSentimentAnalyzer:
62
+ """Advanced multilingual sentiment analyzer for English, Spanish, and Chinese"""
63
+
64
+ def __init__(self, model_cache_dir: str = "./multilingual_models"):
65
+ self.model_cache_dir = model_cache_dir
66
+ self.supported_languages = ['en', 'es', 'zh']
67
+ self.language_names = {
68
+ 'en': 'English',
69
+ 'es': 'Spanish',
70
+ 'zh': 'Chinese'
71
+ }
72
+
73
+ # Language detection patterns
74
+ self.language_patterns = {
75
+ 'en': r'[a-zA-Z]',
76
+ 'es': r'[ñáéíóúüÑÁÉÍÓÚÜ]',
77
+ 'zh': r'[\u4e00-\u9fff]'
78
+ }
79
+
80
+ # Language-specific sentiment models
81
+ self.sentiment_models = {
82
+ 'en': 'cardiffnlp/twitter-roberta-base-sentiment-latest',
83
+ 'es': 'pysentimiento/robertuito-sentiment-analysis',
84
+ 'zh': 'uer/roberta-base-finetuned-dianping-chinese'
85
+ }
86
+
87
+ # Initialize language-specific models
88
+ self.models = {}
89
+ self.tokenizers = {}
90
+ self._load_language_models()
91
+
92
+ def _load_language_models(self):
93
+ """Load language-specific models"""
94
+ if not TRANSFORMERS_AVAILABLE:
95
+ logger.warning("Transformers not available. Multilingual features limited.")
96
+ return
97
+
98
+ for lang_code in self.supported_languages:
99
+ try:
100
+ model_name = self.sentiment_models[lang_code]
101
+ logger.info(f"Loading {self.language_names[lang_code]} model: {model_name}")
102
+
103
+ self.tokenizers[lang_code] = AutoTokenizer.from_pretrained(model_name)
104
+ self.models[lang_code] = AutoModelForSequenceClassification.from_pretrained(
105
+ model_name,
106
+ num_labels=3, # positive, negative, neutral
107
+ ignore_mismatched_sizes=True
108
+ )
109
+ logger.info(f"✅ {self.language_names[lang_code]} model loaded successfully")
110
+
111
+ except Exception as e:
112
+ logger.warning(f"Failed to load {self.language_names[lang_code]} model: {e}")
113
+
114
+ def detect_language(self, text: str) -> Tuple[str, float]:
115
+ """Detect the language of the input text"""
116
+
117
+ # Clean and preprocess text
118
+ cleaned_text = self._clean_text(text)
119
+
120
+ if not cleaned_text.strip():
121
+ return 'en', 0.0
122
+
123
+ # Method 1: Use langdetect if available
124
+ if LANGDETECT_AVAILABLE:
125
+ try:
126
+ detected_langs = detect_langs(cleaned_text)
127
+ if detected_langs:
128
+ best_lang = detected_langs[0]
129
+ if best_lang.lang in self.supported_languages:
130
+ return best_lang.lang, best_lang.prob
131
+ except Exception as e:
132
+ logger.warning(f"Language detection failed: {e}")
133
+
134
+ # Method 2: Pattern-based detection
135
+ pattern_scores = {}
136
+ for lang_code, pattern in self.language_patterns.items():
137
+ matches = len(re.findall(pattern, cleaned_text))
138
+ pattern_scores[lang_code] = matches / len(cleaned_text) if cleaned_text else 0
139
+
140
+ # Method 3: Character-based detection for Chinese
141
+ chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', cleaned_text))
142
+ if chinese_chars > 0:
143
+ pattern_scores['zh'] = chinese_chars / len(cleaned_text)
144
+
145
+ # Select best language
146
+ if pattern_scores:
147
+ best_lang = max(pattern_scores.items(), key=lambda x: x[1])
148
+ confidence = min(best_lang[1] * 2, 1.0) # Scale confidence
149
+ return best_lang[0], confidence
150
+
151
+ # Default to English
152
+ return 'en', 0.5
153
+
154
+ def _clean_text(self, text: str) -> str:
155
+ """Clean and normalize text for language detection"""
156
+ # Remove extra whitespace
157
+ text = re.sub(r'\s+', ' ', text.strip())
158
+
159
+ # Normalize unicode
160
+ text = unicodedata.normalize('NFKD', text)
161
+
162
+ # Remove URLs, mentions, hashtags
163
+ text = re.sub(r'http\S+|www\S+|@\w+|#\w+', '', text)
164
+
165
+ return text
166
+
167
+ def analyze_sentiment_multilingual(self, text: str,
168
+ target_language: Optional[str] = None,
169
+ enable_cross_language: bool = False) -> MultilingualSentimentResult:
170
+ """Analyze sentiment in multiple languages"""
171
+
172
+ # Detect language if not specified
173
+ if target_language is None:
174
+ detected_lang, lang_confidence = self.detect_language(text)
175
+ else:
176
+ detected_lang = target_language
177
+ lang_confidence = 1.0
178
+
179
+ # Ensure language is supported
180
+ if detected_lang not in self.supported_languages:
181
+ detected_lang = 'en'
182
+ lang_confidence = 0.5
183
+
184
+ # Analyze sentiment in detected language
185
+ sentiment_result = self._analyze_sentiment_language_specific(text, detected_lang)
186
+
187
+ # Cross-language analysis if enabled
188
+ cross_language_consensus = None
189
+ if enable_cross_language and len(self.supported_languages) > 1:
190
+ cross_language_consensus = self._analyze_cross_language_consensus(text)
191
+
192
+ return MultilingualSentimentResult(
193
+ text=text,
194
+ detected_language=detected_lang,
195
+ language_confidence=lang_confidence,
196
+ sentiment=sentiment_result['sentiment'],
197
+ confidence=sentiment_result['confidence'],
198
+ emotions=sentiment_result['emotions'],
199
+ methods_used=sentiment_result['methods_used'],
200
+ language_specific_analysis=sentiment_result['language_analysis'],
201
+ cross_language_consensus=cross_language_consensus
202
+ )
203
+
204
+ def _analyze_sentiment_language_specific(self, text: str, language: str) -> Dict[str, Any]:
205
+ """Analyze sentiment using language-specific models"""
206
+
207
+ result = {
208
+ 'sentiment': 'neutral',
209
+ 'confidence': 0.5,
210
+ 'emotions': {},
211
+ 'methods_used': [],
212
+ 'language_analysis': {}
213
+ }
214
+
215
+ # Method 1: Transformer model for specific language
216
+ if language in self.models and self.models[language] is not None:
217
+ try:
218
+ transformer_result = self._analyze_with_transformer(text, language)
219
+ result['sentiment'] = transformer_result['sentiment']
220
+ result['confidence'] = transformer_result['confidence']
221
+ result['methods_used'].append(f'transformer_{language}')
222
+ result['language_analysis']['transformer'] = transformer_result
223
+ except Exception as e:
224
+ logger.warning(f"Transformer analysis failed for {language}: {e}")
225
+
226
+ # Method 2: Language-specific rules and patterns
227
+ rule_based_result = self._analyze_with_language_rules(text, language)
228
+ if rule_based_result['confidence'] > result['confidence']:
229
+ result['sentiment'] = rule_based_result['sentiment']
230
+ result['confidence'] = rule_based_result['confidence']
231
+ result['methods_used'].append(f'rules_{language}')
232
+
233
+ result['language_analysis']['rule_based'] = rule_based_result
234
+
235
+ # Method 3: Emotion analysis
236
+ emotions = self._analyze_emotions_language_specific(text, language)
237
+ result['emotions'] = emotions
238
+ result['methods_used'].append(f'emotions_{language}')
239
+
240
+ return result
241
+
242
+ def _analyze_with_transformer(self, text: str, language: str) -> Dict[str, Any]:
243
+ """Analyze sentiment using transformer model"""
244
+
245
+ if language not in self.models or self.models[language] is None:
246
+ return {'sentiment': 'neutral', 'confidence': 0.5}
247
+
248
+ try:
249
+ tokenizer = self.tokenizers[language]
250
+ model = self.models[language]
251
+
252
+ # Tokenize input
253
+ inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
254
+
255
+ # Get predictions
256
+ with torch.no_grad():
257
+ outputs = model(**inputs)
258
+ probabilities = torch.softmax(outputs.logits, dim=-1)
259
+ prediction = torch.argmax(probabilities, dim=-1).item()
260
+ confidence = torch.max(probabilities).item()
261
+
262
+ # Map to sentiment labels
263
+ sentiment_map = {0: 'negative', 1: 'neutral', 2: 'positive'}
264
+ sentiment = sentiment_map.get(prediction, 'neutral')
265
+
266
+ return {
267
+ 'sentiment': sentiment,
268
+ 'confidence': float(confidence),
269
+ 'probabilities': {
270
+ 'negative': float(probabilities[0][0]),
271
+ 'neutral': float(probabilities[0][1]),
272
+ 'positive': float(probabilities[0][2])
273
+ }
274
+ }
275
+
276
+ except Exception as e:
277
+ logger.warning(f"Transformer analysis failed: {e}")
278
+ return {'sentiment': 'neutral', 'confidence': 0.5}
279
+
280
+ def _analyze_with_language_rules(self, text: str, language: str) -> Dict[str, Any]:
281
+ """Analyze sentiment using language-specific rules"""
282
+
283
+ # Language-specific sentiment words
284
+ sentiment_words = {
285
+ 'en': {
286
+ 'positive': ['good', 'great', 'excellent', 'amazing', 'wonderful', 'fantastic', 'love', 'like', 'happy', 'pleased'],
287
+ 'negative': ['bad', 'terrible', 'awful', 'horrible', 'hate', 'dislike', 'angry', 'frustrated', 'disappointed', 'sad']
288
+ },
289
+ 'es': {
290
+ 'positive': ['bueno', 'excelente', 'maravilloso', 'fantástico', 'genial', 'amor', 'me gusta', 'feliz', 'contento', 'satisfecho'],
291
+ 'negative': ['malo', 'terrible', 'horrible', 'odio', 'no me gusta', 'enojado', 'frustrado', 'decepcionado', 'triste', 'molesto']
292
+ },
293
+ 'zh': {
294
+ 'positive': ['好', '很好', '优秀', '棒', '喜欢', '爱', '高兴', '满意', '开心', '不错'],
295
+ 'negative': ['坏', '糟糕', '讨厌', '不喜欢', '生气', '失望', '难过', '愤怒', '烦恼', '不好']
296
+ }
297
+ }
298
+
299
+ if language not in sentiment_words:
300
+ return {'sentiment': 'neutral', 'confidence': 0.5}
301
+
302
+ text_lower = text.lower()
303
+ positive_count = sum(1 for word in sentiment_words[language]['positive'] if word in text_lower)
304
+ negative_count = sum(1 for word in sentiment_words[language]['negative'] if word in text_lower)
305
+
306
+ total_sentiment_words = positive_count + negative_count
307
+
308
+ if total_sentiment_words == 0:
309
+ return {'sentiment': 'neutral', 'confidence': 0.5}
310
+
311
+ if positive_count > negative_count:
312
+ sentiment = 'positive'
313
+ confidence = positive_count / total_sentiment_words
314
+ elif negative_count > positive_count:
315
+ sentiment = 'negative'
316
+ confidence = negative_count / total_sentiment_words
317
+ else:
318
+ sentiment = 'neutral'
319
+ confidence = 0.5
320
+
321
+ return {
322
+ 'sentiment': sentiment,
323
+ 'confidence': min(confidence, 1.0),
324
+ 'positive_words': positive_count,
325
+ 'negative_words': negative_count
326
+ }
327
+
328
+ def _analyze_emotions_language_specific(self, text: str, language: str) -> Dict[str, float]:
329
+ """Analyze emotions using language-specific patterns"""
330
+
331
+ # Language-specific emotion patterns
332
+ emotion_patterns = {
333
+ 'en': {
334
+ 'joy': ['happy', 'joy', 'excited', 'delighted', 'cheerful', 'elated'],
335
+ 'anger': ['angry', 'mad', 'furious', 'irritated', 'annoyed', 'rage'],
336
+ 'sadness': ['sad', 'depressed', 'melancholy', 'gloomy', 'sorrowful'],
337
+ 'fear': ['afraid', 'scared', 'terrified', 'worried', 'anxious', 'nervous'],
338
+ 'surprise': ['surprised', 'shocked', 'amazed', 'astonished', 'stunned']
339
+ },
340
+ 'es': {
341
+ 'joy': ['alegre', 'feliz', 'contento', 'emocionado', 'dichoso', 'gozoso'],
342
+ 'anger': ['enojado', 'furioso', 'irritado', 'molesto', 'rabioso', 'colérico'],
343
+ 'sadness': ['triste', 'deprimido', 'melancólico', 'afligido', 'apenado'],
344
+ 'fear': ['asustado', 'temeroso', 'preocupado', 'ansioso', 'nervioso'],
345
+ 'surprise': ['sorprendido', 'asombrado', 'atónito', 'desconcertado']
346
+ },
347
+ 'zh': {
348
+ 'joy': ['高兴', '快乐', '开心', '兴奋', '愉快', '欣喜'],
349
+ 'anger': ['生气', '愤怒', '恼火', '愤怒', '气愤', '暴怒'],
350
+ 'sadness': ['悲伤', '难过', '沮丧', '忧郁', '哀伤', '痛苦'],
351
+ 'fear': ['害怕', '恐惧', '担心', '焦虑', '紧张', '不安'],
352
+ 'surprise': ['惊讶', '震惊', '吃惊', '意外', '诧异', '惊愕']
353
+ }
354
+ }
355
+
356
+ if language not in emotion_patterns:
357
+ return {}
358
+
359
+ text_lower = text.lower()
360
+ emotions = {}
361
+
362
+ for emotion, words in emotion_patterns[language].items():
363
+ count = sum(1 for word in words if word in text_lower)
364
+ emotions[emotion] = min(count / len(words), 1.0) if words else 0.0
365
+
366
+ return emotions
367
+
368
+ def _analyze_cross_language_consensus(self, text: str) -> Dict[str, Any]:
369
+ """Analyze sentiment across multiple languages for consensus"""
370
+
371
+ consensus_results = {}
372
+
373
+ for language in self.supported_languages:
374
+ if language in self.models and self.models[language] is not None:
375
+ try:
376
+ result = self._analyze_sentiment_language_specific(text, language)
377
+ consensus_results[language] = {
378
+ 'sentiment': result['sentiment'],
379
+ 'confidence': result['confidence'],
380
+ 'language': self.language_names[language]
381
+ }
382
+ except Exception as e:
383
+ logger.warning(f"Cross-language analysis failed for {language}: {e}")
384
+
385
+ if not consensus_results:
386
+ return None
387
+
388
+ # Calculate consensus
389
+ sentiments = [result['sentiment'] for result in consensus_results.values()]
390
+ confidences = [result['confidence'] for result in consensus_results.values()]
391
+
392
+ # Most common sentiment
393
+ from collections import Counter
394
+ sentiment_counts = Counter(sentiments)
395
+ consensus_sentiment = sentiment_counts.most_common(1)[0][0]
396
+
397
+ # Average confidence
398
+ avg_confidence = sum(confidences) / len(confidences)
399
+
400
+ # Agreement rate
401
+ agreement_rate = sentiment_counts[consensus_sentiment] / len(sentiments)
402
+
403
+ return {
404
+ 'consensus_sentiment': consensus_sentiment,
405
+ 'average_confidence': avg_confidence,
406
+ 'agreement_rate': agreement_rate,
407
+ 'language_results': consensus_results,
408
+ 'total_languages': len(consensus_results)
409
+ }
410
+
411
+ def get_supported_languages(self) -> List[str]:
412
+ """Get list of supported languages"""
413
+ return self.supported_languages
414
+
415
+ def get_language_name(self, language_code: str) -> str:
416
+ """Get human-readable language name"""
417
+ return self.language_names.get(language_code, language_code)
418
+
419
+ def analyze_conversation_multilingual(self, conversation: Dict[str, Any]) -> Dict[str, Any]:
420
+ """Analyze a conversation with multilingual support"""
421
+
422
+ results = {
423
+ 'conversation_id': conversation.get('conversation_id', 'unknown'),
424
+ 'timestamp': conversation.get('timestamp'),
425
+ 'language_analysis': {},
426
+ 'sentiment_analysis': {},
427
+ 'cross_language_insights': {},
428
+ 'multilingual_metrics': {}
429
+ }
430
+
431
+ messages = conversation.get('messages', [])
432
+ language_distribution = {}
433
+ sentiment_by_language = {}
434
+
435
+ for i, message in enumerate(messages):
436
+ user_text = message.get('user', '')
437
+ bot_text = message.get('bot', '')
438
+
439
+ message_analysis = {
440
+ 'message_index': i + 1,
441
+ 'timestamp': message.get('timestamp'),
442
+ 'user_analysis': None,
443
+ 'bot_analysis': None
444
+ }
445
+
446
+ # Analyze user message
447
+ if user_text:
448
+ user_result = self.analyze_sentiment_multilingual(user_text, enable_cross_language=True)
449
+ message_analysis['user_analysis'] = user_result
450
+
451
+ # Track language distribution
452
+ lang = user_result.detected_language
453
+ language_distribution[lang] = language_distribution.get(lang, 0) + 1
454
+
455
+ # Track sentiment by language
456
+ if lang not in sentiment_by_language:
457
+ sentiment_by_language[lang] = []
458
+ sentiment_by_language[lang].append(user_result.sentiment)
459
+
460
+ # Analyze bot message
461
+ if bot_text:
462
+ bot_result = self.analyze_sentiment_multilingual(bot_text, enable_cross_language=True)
463
+ message_analysis['bot_analysis'] = bot_result
464
+
465
+ # Track language distribution
466
+ lang = bot_result.detected_language
467
+ language_distribution[lang] = language_distribution.get(lang, 0) + 1
468
+
469
+ # Track sentiment by language
470
+ if lang not in sentiment_by_language:
471
+ sentiment_by_language[lang] = []
472
+ sentiment_by_language[lang].append(bot_result.sentiment)
473
+
474
+ results['sentiment_analysis'][f'message_{i+1}'] = message_analysis
475
+
476
+ # Calculate multilingual metrics
477
+ results['multilingual_metrics'] = {
478
+ 'language_distribution': language_distribution,
479
+ 'sentiment_by_language': sentiment_by_language,
480
+ 'total_languages_detected': len(language_distribution),
481
+ 'primary_language': max(language_distribution.items(), key=lambda x: x[1])[0] if language_distribution else 'en',
482
+ 'language_diversity': len(language_distribution) / len(self.supported_languages)
483
+ }
484
+
485
+ return results
486
+
487
+ def main():
488
+ """Demo function for multilingual sentiment analysis"""
489
+ print("🌍 SentilensAI - Multilingual Sentiment Analysis Demo")
490
+ print("=" * 60)
491
+
492
+ # Initialize multilingual analyzer
493
+ analyzer = MultilingualSentimentAnalyzer()
494
+
495
+ # Sample texts in different languages
496
+ sample_texts = [
497
+ {
498
+ 'text': "I love this product! It's amazing and works perfectly.",
499
+ 'expected_lang': 'en',
500
+ 'description': 'English positive sentiment'
501
+ },
502
+ {
503
+ 'text': "¡Me encanta este producto! Es increíble y funciona perfectamente.",
504
+ 'expected_lang': 'es',
505
+ 'description': 'Spanish positive sentiment'
506
+ },
507
+ {
508
+ 'text': "这个产品太棒了!我非常喜欢它,效果很好。",
509
+ 'expected_lang': 'zh',
510
+ 'description': 'Chinese positive sentiment'
511
+ },
512
+ {
513
+ 'text': "This is terrible. I hate it and want a refund immediately.",
514
+ 'expected_lang': 'en',
515
+ 'description': 'English negative sentiment'
516
+ },
517
+ {
518
+ 'text': "Esto es terrible. Lo odio y quiero un reembolso inmediatamente.",
519
+ 'expected_lang': 'es',
520
+ 'description': 'Spanish negative sentiment'
521
+ },
522
+ {
523
+ 'text': "这太糟糕了。我讨厌它,想要立即退款。",
524
+ 'expected_lang': 'zh',
525
+ 'description': 'Chinese negative sentiment'
526
+ }
527
+ ]
528
+
529
+ print(f"🔍 Analyzing {len(sample_texts)} texts in multiple languages...")
530
+ print(f"Supported languages: {', '.join([analyzer.get_language_name(lang) for lang in analyzer.get_supported_languages()])}")
531
+ print()
532
+
533
+ for i, sample in enumerate(sample_texts, 1):
534
+ print(f"📝 Sample {i}: {sample['description']}")
535
+ print(f"Text: {sample['text']}")
536
+
537
+ # Analyze with multilingual support
538
+ result = analyzer.analyze_sentiment_multilingual(
539
+ sample['text'],
540
+ enable_cross_language=True
541
+ )
542
+
543
+ print(f"Detected Language: {analyzer.get_language_name(result.detected_language)} (confidence: {result.language_confidence:.2f})")
544
+ print(f"Sentiment: {result.sentiment} (confidence: {result.confidence:.2f})")
545
+ print(f"Methods Used: {', '.join(result.methods_used)}")
546
+
547
+ if result.emotions:
548
+ print(f"Emotions: {', '.join([f'{k}: {v:.2f}' for k, v in result.emotions.items() if v > 0])}")
549
+
550
+ if result.cross_language_consensus:
551
+ consensus = result.cross_language_consensus
552
+ print(f"Cross-language Consensus: {consensus['consensus_sentiment']} (agreement: {consensus['agreement_rate']:.2f})")
553
+
554
+ print("-" * 50)
555
+
556
+ # Test conversation analysis
557
+ print("\n🗣️ Multilingual Conversation Analysis:")
558
+ print("=" * 40)
559
+
560
+ multilingual_conversation = {
561
+ 'conversation_id': 'multilingual_demo_001',
562
+ 'timestamp': '2024-01-15T10:30:00Z',
563
+ 'messages': [
564
+ {
565
+ 'user': 'Hello, I need help with my account',
566
+ 'bot': 'Hola, puedo ayudarte con tu cuenta',
567
+ 'timestamp': '2024-01-15T10:30:15Z'
568
+ },
569
+ {
570
+ 'user': '谢谢你的帮助!',
571
+ 'bot': 'You\'re welcome! I\'m happy to help.',
572
+ 'timestamp': '2024-01-15T10:30:30Z'
573
+ }
574
+ ]
575
+ }
576
+
577
+ conversation_result = analyzer.analyze_conversation_multilingual(multilingual_conversation)
578
+
579
+ print(f"Conversation ID: {conversation_result['conversation_id']}")
580
+ print(f"Languages Detected: {conversation_result['multilingual_metrics']['total_languages_detected']}")
581
+ print(f"Primary Language: {analyzer.get_language_name(conversation_result['multilingual_metrics']['primary_language'])}")
582
+ print(f"Language Distribution: {conversation_result['multilingual_metrics']['language_distribution']}")
583
+ print(f"Language Diversity: {conversation_result['multilingual_metrics']['language_diversity']:.2f}")
584
+
585
+ print(f"\n✅ Multilingual sentiment analysis demo completed!")
586
+ print(f"🌍 SentilensAI now supports {len(analyzer.get_supported_languages())} languages!")
587
+ print(f"🚀 Ready for global AI chatbot conversations!")
588
+
589
+ if __name__ == "__main__":
590
+ main()