GurgenGulay commited on
Commit
1deb054
·
verified ·
1 Parent(s): e328f2e

Update fine_tuning.py

Browse files
Files changed (1) hide show
  1. fine_tuning.py +40 -1
fine_tuning.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
2
  from datasets import Dataset
3
  from sklearn.model_selection import train_test_split
@@ -6,6 +7,10 @@ from nltk.corpus import stopwords
6
  from nltk.tokenize import word_tokenize
7
  from nltk.stem import PorterStemmer
8
 
 
 
 
 
9
 
10
  stop_words = set(stopwords.words('english'))
11
  ps = PorterStemmer()
@@ -86,4 +91,38 @@ trainer = Trainer(
86
  trainer.train()
87
 
88
  model.save_pretrained("./fine_tuned_model")
89
- tokenizer.save_pretrained("./fine_tuned_model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
  from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
3
  from datasets import Dataset
4
  from sklearn.model_selection import train_test_split
 
7
  from nltk.tokenize import word_tokenize
8
  from nltk.stem import PorterStemmer
9
 
10
+ # Logging Ayarları
11
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
12
+ logger = logging.getLogger(__name__)
13
+
14
 
15
  stop_words = set(stopwords.words('english'))
16
  ps = PorterStemmer()
 
91
  trainer.train()
92
 
93
  model.save_pretrained("./fine_tuned_model")
94
+ tokenizer.save_pretrained("./fine_tuned_model")
95
+
96
+ try:
97
+ logger.info("Loading tokenizer and model.")
98
+ model_name = "t5-base"
99
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
100
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
101
+
102
+ logger.info("Reading and cleaning prompts.")
103
+ input_texts, target_texts = read_prompts("prompts.txt")
104
+ input_texts_cleaned = [clean_text(text) for text in input_texts]
105
+ target_texts_cleaned = [clean_text(text) for text in target_texts]
106
+
107
+ logger.info("Splitting dataset into training and validation sets.")
108
+ train_texts, val_texts, train_labels, val_labels = train_test_split(input_texts_cleaned, target_texts_cleaned, test_size=0.1)
109
+
110
+ logger.info("Preparing datasets for training.")
111
+ train_dataset = Dataset.from_dict(prepare_data(train_texts, train_labels))
112
+ val_dataset = Dataset.from_dict(prepare_data(val_texts, val_labels))
113
+
114
+ logger.info("Starting model training.")
115
+ trainer = Trainer(
116
+ model=model,
117
+ args=training_args,
118
+ train_dataset=train_dataset,
119
+ eval_dataset=val_dataset
120
+ )
121
+ trainer.train()
122
+
123
+ logger.info("Saving fine-tuned model.")
124
+ model.save_pretrained("./fine_tuned_model")
125
+ tokenizer.save_pretrained("./fine_tuned_model")
126
+
127
+ except Exception as e:
128
+ logger.error(f"An error occurred during fine-tuning: {str(e)}")