import gradio as gr import os import pandas as pd import numpy as np import tensorflow as tf from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences import pickle import sys from tensorflow.keras import preprocessing sys.modules['keras.src.preprocessing'] = preprocessing from tensorflow import keras sys.modules['keras'] = keras from huggingface_hub import HfApi # Set your Hugging Face API token in the settings of this space as a secret variable # Authenticate using HfApi # api = HfApi() # api.login(token=os.getenv("HUGGINGFACE_HUB_TOKEN")) from huggingface_hub import login login(token=os.getenv("HUGGINGFACE_HUB_TOKEN")) # --------------------------------------------------------------------------------------------------------------------------------------- # Loading the translation model and english and french tokenizers with open('english_tokenizer.pickle', 'rb') as handle: english_tokenizer = pickle.load(handle) with open('french_tokenizer.pickle', 'rb') as handle: french_tokenizer = pickle.load(handle) translation_model = tf.keras.models.load_model('model2_v2.h5') # --------------------------------------------------------------------------------------------------------------------------------------- # Translate sentence function MAX_LEN_EN = 15 MAX_LEN_FR = 21 VOCAB_SIZE_EN = len(english_tokenizer.word_index) VOCAB_SIZE_FR = len(french_tokenizer.word_index) # print(f'MAX_LEN_EN: {MAX_LEN_EN}') # print(f'MAX_LEN_FR: {MAX_LEN_FR}') # print(f'VOCAB_SIZE_EN: {VOCAB_SIZE_EN}') # print(f'VOCAB_SIZE_FR: {VOCAB_SIZE_FR}') # function implemented earlier, modified it to be used with gradio. def translate_sentence(sentence, verbose=False): # Preprocess the input sentence sequence = english_tokenizer.texts_to_sequences([sentence]) padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN_EN, padding='post') # Initialize the target sequence with the start token start_token = VOCAB_SIZE_FR #344 target_sequence = np.zeros((1, MAX_LEN_FR)) target_sequence[0, 0] = start_token # Placeholder for the translation translation = '' # Step-by-step translation for i in range(1, MAX_LEN_FR): # Predict the next word output_tokens = translation_model.predict([padded_sequence, target_sequence], verbose=verbose) # Get the most likely next word sampled_token_index = np.argmax(output_tokens[0, i - 1, :]) if verbose: print(f'sampled_token_index: {sampled_token_index}') if sampled_token_index == 0: # End token break sampled_word = french_tokenizer.index_word[sampled_token_index] if verbose: print(f'sampled_word: {sampled_word}') # Append the word to the translation translation += ' ' + sampled_word # Update the target sequence target_sequence[0, i] = sampled_token_index return translation.strip() # Example usage: # english_sentence = "paris is relaxing during december but it is usually chilly in july" # print(english_sentence) # translated_sentence = translate_sentence(english_sentence) # print(translated_sentence) # ---------------------------------------------------------------------------------------------------------------------------------------- # Gradio app from datasets import load_dataset, Dataset # Function to load the dataset from Hugging Face def load_hf_dataset(): dataset = load_dataset("Zaherrr/translation_log") return dataset["train"] # Access the dataset without split specification def update_history_with_status(english, french, history, status): history.append((english, french, status)) history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history]) return history_text, history def revert_last_action(history): if history: # Revert history history.pop() # Update history block text history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history]) # Revert last row in the dataset if row_indices: last_index = row_indices.pop() # Remove the last row from the dataset dataset = load_hf_dataset() df = dataset.to_pandas() df = df.drop(last_index).reset_index(drop=True) updated_dataset = Dataset.from_pandas(df) updated_dataset.push_to_hub("Zaherrr/translation_log") return history_text, history # Function to flag data def flag_action(english, french, corrected_french, flagged_successful, history): data = {"english": english, "french": french, "corrected_french": corrected_french, "status": flagged_successful} dataset = load_hf_dataset() df = dataset.to_pandas() new_df = pd.DataFrame([data]) df = pd.concat([df, new_df], ignore_index=True) updated_dataset = Dataset.from_pandas(df) updated_dataset.push_to_hub("Zaherrr/translation_log") index = len(df) - 1 row_indices.append(index) return update_history_with_status(english, french, history, "Flagged") # Function to accept data def accept_action(english, french, hidden_text, flagged_successful, history): data = {"english": english, "french": french, "corrected_french": hidden_text, "status": flagged_successful} dataset = load_hf_dataset() df = dataset.to_pandas() new_df = pd.DataFrame([data]) df = pd.concat([df, new_df], ignore_index=True) updated_dataset = Dataset.from_pandas(df) updated_dataset.push_to_hub("Zaherrr/translation_log") index = len(df) - 1 row_indices.append(index) return update_history_with_status(english, french, history, "Accepted") # Define the Gradio interface with gr.Blocks(theme='gstaff/sketch') as demo: gr.Markdown("

Translate English to French

") with gr.Row(): with gr.Column(): english = gr.Textbox(label="English", placeholder="Input English text here") Translate_button = gr.Button(value="Translate", variant="primary") hidden_text = gr.Textbox(label="Hidden Text", placeholder="Hidden Text", interactive=False, visible=False) flagged_successful = gr.Textbox(label="Acceptance Status", placeholder="Flagged Successful", interactive=False, visible=False) with gr.Column(): french = gr.Textbox(label="French", placeholder="Predicted French text here", interactive=False) corrected_french = gr.Textbox(label="Corrected French", placeholder="Corrected French translation here") with gr.Column(): with gr.Row(): accept_button = gr.Button(value="Accept", variant="primary") flag_button = gr.Button(value="Flag", variant="secondary") revert_button = gr.Button(value="Revert", variant="secondary") examples = gr.Examples(examples=[ "paris is relaxing during december but it is usually chilly in july", "She is driving the truck"], inputs=english) gr.Markdown("History:") history_block = gr.Textbox(label="History", placeholder="English - French Translation Pairs", interactive=False, lines=5, max_lines=50) history = gr.State([]) # Track the row indices in the CSVLogger row_indices = [] gr.on( triggers=[english.submit, Translate_button.click], fn=translate_sentence, inputs=english, outputs=[french], ).then( fn=lambda: gr.Textbox(visible=False), inputs=None, outputs=flagged_successful, ) gr.on( triggers=[flag_button.click], fn=lambda: gr.Textbox(value="Flagged", visible=True), outputs=flagged_successful, ).then( fn=flag_action, inputs=[english, french, corrected_french, flagged_successful, history], outputs=[history_block, history], ) gr.on( triggers=[accept_button.click], fn=lambda: gr.Textbox(value="Accepted", visible=True), outputs=flagged_successful, ).then( fn=accept_action, inputs=[english, french, hidden_text, flagged_successful, history], outputs=[history_block, history], ) gr.on( triggers=[revert_button.click], fn=revert_last_action, inputs=[history], outputs=[history_block, history], ).then( fn=lambda: gr.Textbox(placeholder="Reverted", visible=True), outputs=flagged_successful, ) demo.launch(share=True, auth=('username', 'password123'), auth_message="Check your Login details sent to your email")