Spaces:

akashraut
/

indic_bot

Sleeping

App Files Files Community

akashraut commited on 5 days ago

Commit

afb7b3f

verified ·

1 Parent(s): 2223d6d

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +88 -40

streamlit_app.py CHANGED Viewed

@@ -63,23 +63,26 @@ class LangChainBot:
                     model=model_name,
                     device=device,
                     torch_dtype=torch_dtype,
-                    max_new_tokens=150,
-                    repetition_penalty=1.2,
-                    trust_remote_code=True  # Added this for compatibility
                 )
                 # Wrap in LangChain LLM
                 llm = HuggingFacePipeline(pipeline=generator_pipeline)
                 # Create prompt template
-                template = """
-                You are a helpful conversational AI. Respond to the user's message appropriately.
-                Previous conversation:
-                {history}
-                Human: {input}
-                Assistant:
-                """
                 prompt_template = PromptTemplate(
                     input_variables=["history", "input"],
                     template=template
@@ -109,8 +112,8 @@ class LangChainBot:
     def _load_translator(self, device):
         """Load the translator with fallback options."""
         translators_to_try = [
             "ai4bharat/indictrans2-indic-indic-1B",
-            "Helsinki-NLP/opus-mt-en-hi",  # Fallback for English-Hindi
         ]
         for translator_name in translators_to_try:
@@ -140,30 +143,41 @@ class LangChainBot:
             return text
         try:
-            # Define language codes
-            codes = {
                 'english': 'eng_Latn',
                 'hindi': 'hin_Deva',
                 'tamil': 'tam_Taml',
                 'telugu': 'tel_Telu'
             }
-            if source_lang in codes and target_lang in codes:
-                result = self.translator(
-                    text,
-                    src_lang=codes[source_lang],
-                    tgt_lang=codes[target_lang]
-                )
-                return result[0]['translation_text']
-            else:
-                # Fallback for simple English-Hindi translation
-                if source_lang == 'english' and target_lang == 'hindi':
-                    result = self.translator(text)
-                    return result[0]['translation_text'] if result else text
         except Exception as e:
             logger.warning(f"Translation failed: {e}")
-            st.warning(f"Translation failed, using original text. Error: {e}")
         return text
@@ -173,26 +187,53 @@ class LangChainBot:
             return "Error: The LangChain chain is not initialized. Please check the logs above."
         try:
-            # Translate input to a common language if needed
-            if input_lang != 'english':
-                processed_message = self._translate(user_message, input_lang, 'english')
-            else:
                 processed_message = user_message
             # Generate response
-            response = self.chain.run(processed_message)
-            # Translate output if needed
-            if output_lang != 'english':
                 final_response = self._translate(response, 'english', output_lang)
             else:
-                final_response = response
-            return final_response
         except Exception as e:
             logger.error(f"Error generating response: {e}")
-            return f"I apologize, but I encountered an error while processing your request: {str(e)}"
 # -----------------------------------------------------------------------------
 # STREAMLIT UI WITH BETTER ERROR HANDLING
@@ -222,7 +263,7 @@ if bot and bot.chain:
     st.markdown("---")
-    # Language selection
     language_options = ["english", "hindi", "tamil", "telugu"]
     col1, col2 = st.columns(2)
@@ -237,9 +278,16 @@ if bot and bot.chain:
         output_lang = st.selectbox(
             "🗣️ Output Language",
             options=language_options,
-            index=1,
             help="Select the language for the response"
         )
     # Chat interface
     st.markdown("### 💬 Chat Interface")

                     model=model_name,
                     device=device,
                     torch_dtype=torch_dtype,
+                    max_new_tokens=100,  # Reduced from 150
+                    repetition_penalty=1.5,  # Increased from 1.2
+                    do_sample=True,
+                    temperature=0.7,
+                    top_p=0.9,
+                    no_repeat_ngram_size=3,  # Prevent repetition
+                    trust_remote_code=True
                 )
                 # Wrap in LangChain LLM
                 llm = HuggingFacePipeline(pipeline=generator_pipeline)
                 # Create prompt template
+                template = """You are a helpful AI assistant. Please provide a clear and concise response to the user's question.
+Previous conversation:
+{history}
+User: {input}
+Assistant:"""
                 prompt_template = PromptTemplate(
                     input_variables=["history", "input"],
                     template=template
     def _load_translator(self, device):
         """Load the translator with fallback options."""
         translators_to_try = [
+            "Helsinki-NLP/opus-mt-en-hi",  # More reliable fallback for English-Hindi
             "ai4bharat/indictrans2-indic-indic-1B",
         ]
         for translator_name in translators_to_try:
             return text
         try:
+            # Define language codes for indictrans2
+            indictrans_codes = {
                 'english': 'eng_Latn',
                 'hindi': 'hin_Deva',
                 'tamil': 'tam_Taml',
                 'telugu': 'tel_Telu'
             }
+            # Try indictrans2 format first
+            if source_lang in indictrans_codes and target_lang in indictrans_codes:
+                try:
+                    result = self.translator(
+                        text,
+                        src_lang=indictrans_codes[source_lang],
+                        tgt_lang=indictrans_codes[target_lang]
+                    )
+                    if result and len(result) > 0 and 'translation_text' in result[0]:
+                        return result[0]['translation_text']
+                except Exception as e:
+                    logger.warning(f"Indictrans2 translation failed: {e}")
+            # Fallback: Try simple pipeline format
+            try:
+                result = self.translator(text)
+                if result and len(result) > 0:
+                    if 'translation_text' in result[0]:
+                        return result[0]['translation_text']
+                    elif 'generated_text' in result[0]:
+                        return result[0]['generated_text']
+            except Exception as e:
+                logger.warning(f"Simple translation failed: {e}")
         except Exception as e:
             logger.warning(f"Translation failed: {e}")
+            # Don't show warning to user for every translation failure
         return text
             return "Error: The LangChain chain is not initialized. Please check the logs above."
         try:
+            # Clean the input message
+            user_message = user_message.strip()
+            # For now, let's work primarily in English to avoid translation issues
+            # Only translate if specifically needed and working
+            if input_lang == 'english':
                 processed_message = user_message
+            else:
+                # Try translation, but fallback to original if it fails
+                translated = self._translate(user_message, input_lang, 'english')
+                processed_message = translated if translated != user_message else user_message
+            # Generate response with input validation
+            if len(processed_message.strip()) == 0:
+                return "I didn't receive a valid message. Please try again."
             # Generate response
+            response = self.chain.run(input=processed_message)
+            # Clean up the response
+            response = response.strip()
+            # Remove any repetitive patterns
+            words = response.split()
+            if len(words) > 10:
+                # Check for excessive repetition
+                word_counts = {}
+                for word in words:
+                    word_counts[word] = word_counts.get(word, 0) + 1
+                # If any word appears more than 5 times, it's likely repetitive
+                max_count = max(word_counts.values()) if word_counts else 0
+                if max_count > 5:
+                    # Generate a simple fallback response
+                    response = f"I understand you said '{processed_message[:50]}...' Let me provide a helpful response to that."
+            # Translate output if needed and different from English
+            if output_lang != 'english' and output_lang != input_lang:
                 final_response = self._translate(response, 'english', output_lang)
+                # If translation fails, return English response
+                return final_response if final_response != response else response
             else:
+                return response
         except Exception as e:
             logger.error(f"Error generating response: {e}")
+            return f"I apologize, but I encountered an error while processing your request. Please try rephrasing your message."
 # -----------------------------------------------------------------------------
 # STREAMLIT UI WITH BETTER ERROR HANDLING
     st.markdown("---")
+    # Language selection with helpful notes
     language_options = ["english", "hindi", "tamil", "telugu"]
     col1, col2 = st.columns(2)
         output_lang = st.selectbox(
             "🗣️ Output Language",
             options=language_options,
+            index=0,  # Default to English for now
             help="Select the language for the response"
         )
+    # Show translation status
+    if not bot.translator:
+        st.info("ℹ️ Translation is currently limited. For best results, use English input and output.")
+    elif input_lang != 'english' or output_lang != 'english':
+        st.warning("⚠️ Translation is experimental. If you encounter issues, try using English.")
     # Chat interface
     st.markdown("### 💬 Chat Interface")