from fine_tuning import fine_tune_model  # Import the function from fine_tuning.py

def clean_text_with_spacy(text):
    # Clean the text using spaCy
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct]
    return " ".join(tokens)

# Function to read prompts.txt file
def read_prompts(file_path):
    input_texts = []
    target_texts = []
    with open(file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()
        for line in lines:
            if line.startswith("input:"):
                input_texts.append(line.replace("input:", "").strip())
            elif line.startswith("target:"):
                target_texts.append(line.replace("target:", "").strip())
    return input_texts, target_texts

# Function to process input texts for fine-tuning
def process_input_for_fine_tuning(input_texts, target_texts):
    cleaned_input_texts = [clean_text_with_spacy(text) for text in input_texts]
    cleaned_target_texts = [clean_text_with_spacy(text) for text in target_texts]
    fine_tune_model(cleaned_input_texts, cleaned_target_texts)

# Reading data from prompts.txt
input_texts, target_texts = read_prompts("prompts.txt")

# Initiating the fine-tuning process
process_input_for_fine_tuning(input_texts, target_texts)