try: import spaces print("'spaces' module imported successfully.") except ImportError: print("Warning: 'spaces' module not found. Using dummy decorator for local execution.") # Define a dummy decorator that does nothing if 'spaces' isn't available class DummySpaces: def GPU(self, *args, **kwargs): def decorator(func): # This dummy decorator just returns the original function print(f"Note: Dummy @GPU decorator used for function '{func.__name__}'.") return func return decorator spaces = DummySpaces() # Create an instance of the dummy class import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Or TFAutoModelForSeq2SeqLM import torch # Or import tensorflow as tf import os import math # Requires Gradio version supporting spaces.GPU decorator if running on Spaces # Might need: from gradio.external import spaces <- if spaces not directly available #import gradio.external as spaces # Use this import path from huggingface_hub import hf_hub_download # --- Configuration --- # IMPORTANT: REPLACE THIS with your model's Hugging Face Hub ID or local path MODEL_PATH = "Gregniuki/pl-en-pl-v2" # Use your actual model path MAX_WORDS_PER_CHUNK = 40 BATCH_SIZE = 4 # Adjust based on GPU memory / desired throughput # --- Device Setup (Zero GPU Support) --- if torch.cuda.is_available(): device = torch.device("cuda") print("GPU detected. Using CUDA.") else: device = torch.device("cpu") print("No GPU detected. Using CPU.") # --- Get Hugging Face Token from Secrets for Private Models --- HF_AUTH_TOKEN = os.getenv("HF_TOKEN") if MODEL_PATH and "/" in MODEL_PATH and not os.path.exists(MODEL_PATH): # Rough check if it's likely a Hub ID if HF_AUTH_TOKEN is None: print(f"Warning: HF_TOKEN secret not found. Trying to load {MODEL_PATH} without authentication.") else: print("HF_TOKEN found. Using token for model loading.") else: print(f"Loading model from local path: {MODEL_PATH}") HF_AUTH_TOKEN = None # Don't use token for local paths # --- Load Model and Tokenizer (once on startup) --- print(f"Loading model and tokenizer from: {MODEL_PATH}") try: tokenizer = AutoTokenizer.from_pretrained( MODEL_PATH, token=HF_AUTH_TOKEN, trust_remote_code=False ) # --- Choose the correct model class --- # PyTorch (most common) model = AutoModelForSeq2SeqLM.from_pretrained( MODEL_PATH, token=HF_AUTH_TOKEN, trust_remote_code=False ) model.to(device) # Move model to the determined device model.eval() # Set model to evaluation mode print(f"Using PyTorch model on device: {device}") # # TensorFlow (uncomment if your model is TF) # from transformers import TFAutoModelForSeq2SeqLM # import tensorflow as tf # model = TFAutoModelForSeq2SeqLM.from_pretrained( # MODEL_PATH, # token=HF_AUTH_TOKEN, # trust_remote_code=False # ) # # TF device placement is often automatic or managed via strategies # print("Using TensorFlow model.") print("Model and tokenizer loaded successfully.") except Exception as e: print(f"FATAL Error loading model/tokenizer: {e}") if "401 Client Error" in str(e): error_message = f"Authentication failed. Ensure the HF_TOKEN secret has read access to {MODEL_PATH}." else: error_message = f"Failed to load model from {MODEL_PATH}. Error: {e}" # Raise error to prevent app launch if model loading fails raise RuntimeError(error_message) # --- Helper Function for Chunking Sentences --- def chunk_sentence(sentence, max_words): """Splits a sentence into chunks of max_words.""" if not sentence or sentence.isspace(): return [] words = sentence.split() # Simple space splitting chunks = [] current_chunk = [] for word in words: current_chunk.append(word) if len(current_chunk) >= max_words: chunks.append(" ".join(current_chunk)) current_chunk = [] if current_chunk: # Add any remaining words chunks.append(" ".join(current_chunk)) return chunks # --- Define the BATCH translation function --- # Add GPU decorator for Spaces (adjust duration if needed) @spaces.GPU def translate_batch(text_input): """ Translates multi-line input text using batching and sentence chunking. Assumes auto-detection of language direction (no prefixes). """ if not text_input or text_input.strip() == "": return "[Error] Please enter some text to translate." print(f"Received input block for batch translation.") # 1. Split input into potential sentences (lines) and clean lines = [line.strip() for line in text_input.splitlines() if line.strip()] if not lines: return "[Info] No valid text lines found in input." # 2. Chunk long sentences all_chunks = [] for line in lines: sentence_chunks = chunk_sentence(line, MAX_WORDS_PER_CHUNK) all_chunks.extend(sentence_chunks) if not all_chunks: return "[Info] No text chunks generated after processing input." print(f"Processing {len(all_chunks)} chunks in batches...") # 3. Process chunks in batches all_translations = [] num_batches = math.ceil(len(all_chunks) / BATCH_SIZE) for i in range(num_batches): batch_start = i * BATCH_SIZE batch_end = batch_start + BATCH_SIZE batch_chunks = all_chunks[batch_start:batch_end] print(f" Processing batch {i+1}/{num_batches} ({len(batch_chunks)} chunks)") # Tokenize the batch try: # PyTorch inputs = tokenizer(batch_chunks, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device) # # TensorFlow # inputs = tokenizer(batch_chunks, return_tensors="tf", padding=True, truncation=True, max_length=512) except Exception as e: print(f"Error during batch tokenization: {e}") # Return partial results or a general error return "[Error] Tokenization failed for a batch." # Generate translations for the batch try: # PyTorch with torch.no_grad(): outputs = model.generate( **inputs, max_length=1024, num_beams=8, early_stopping=False ) # output_ids shape: [batch_size, sequence_length] # # TensorFlow # outputs = model.generate( # inputs['input_ids'], # attention_mask=inputs['attention_mask'], # max_length=512, # num_beams=4, # early_stopping=True # ) # outputs is typically a tensor of shape [batch_size, sequence_length] # Decode the batch results batch_translations = tokenizer.batch_decode(outputs, skip_special_tokens=True) all_translations.extend(batch_translations) except Exception as e: print(f"Error during batch generation/decoding: {e}") # Return partial results or a general error return "[Error] Translation generation failed for a batch." # 4. Join translated chunks back together # Simple join with newline, might not perfectly preserve original structure if chunking happened mid-sentence. final_output = "\n".join(all_translations) print("Batch translation finished.") return final_output # --- Create Gradio Interface for Batch Translation --- input_textbox = gr.Textbox( lines=10, # Allow more lines for batch input label="Input Text (Polish or English - One sentence per line recommended)", placeholder="Enter text here. Longer sentences will be split into chunks (max 20 words)." ) output_textbox = gr.Textbox(label="Translation Output", lines=10) # Interface definition interface = gr.Interface( fn=translate_batch, # Use the batch function inputs=input_textbox, outputs=output_textbox, title="🇵🇱 <-> 🇬🇧 Batch ByT5 Translator (Auto-Detect, Chunking)", description=f"Translate multiple lines of text between Polish and English.\nModel: {MODEL_PATH}\nLong sentences are automatically split into chunks of max {MAX_WORDS_PER_CHUNK} words.", article="Enter text (ideally one sentence per line). Click Submit to translate all lines.", allow_flagging="never" ) # --- Launch the App --- if __name__ == "__main__": # Set share=True for a public link if running locally, not needed on Spaces interface.launch()