File size: 2,563 Bytes
4e6064b
58ef472
 
 
4e6064b
58ef472
 
 
 
 
 
 
 
 
 
 
312e880
 
 
58ef472
312e880
58ef472
 
 
 
 
 
312e880
58ef472
 
9234c46
312e880
58ef472
9234c46
312e880
 
 
58ef472
312e880
58ef472
 
 
 
 
312e880
 
58ef472
 
 
 
 
 
9234c46
58ef472
 
 
 
 
 
 
 
 
 
 
 
 
 
312e880
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
from transformers import pipeline
import json
import re

# Load the phrase mapping from the JSON file (English to Swahili)
with open('phrase_mappings.json', 'r') as f:
    phrase_mapping = json.load(f)

# Define the UnifiedTranslator class for English-to-Swahili translation
class UnifiedTranslator:
    def __init__(self, model_name, phrase_mapping):
        self.model = pipeline("translation", model=model_name)
        self.phrase_mapping = phrase_mapping

    def translate(self, text):
        # Normalize text to lowercase and strip extra spaces
        text_lower = text.lower().strip()
        
        # Debugging output
        print(f"Input text: {text_lower}")

        # Check if the text matches any pattern in the phrase_mapping (English to Swahili)
        for pattern, translation in self.phrase_mapping.items():
            try:
                # Use regex to match the pattern with placeholders
                pattern_regex = re.compile(
                    re.escape(pattern).replace(r"\{name\}", r"([\w'-]+)").strip(), 
                    re.IGNORECASE
                )
                print(f"Checking pattern: {pattern}")
                match = pattern_regex.fullmatch(text_lower)
                if match:
                    print(f"Match found: {match.group(0)}")
                    # Replace the placeholder with the actual value if needed
                    if '{name}' in pattern:
                        return translation.format(name=match.group(1))
                    else:
                        return translation
            except re.error as e:
                print(f"Regex error with pattern {pattern}: {e}")
        
        # Fallback to model translation if no pattern matches
        try:
            print(f"Fallback to model translation for text: {text}")
            translation = self.model(text)[0]
            return translation['translation_text']
        except Exception as e:
            print(f"Model translation error: {e}")
            return "Translation error occurred"

# Initialize the UnifiedTranslator with your model and custom phrases
translator = UnifiedTranslator("Bildad/English-Swahili_Translation", phrase_mapping)

# Define the Gradio interface
def translate_text(text):
    return translator.translate(text)

iface = gr.Interface(
    fn=translate_text,
    inputs="text",
    outputs="text",
    title="English to Swahili Translation",
    description="Translate English to Swahili with custom phrase mappings."
)

# Launch the interface
iface.launch(share=True)