{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "b12ae8a3-9e08-402c-894c-31697fad6c56", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "54d7e7ee895949c4a025acf2c9640f96", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='
\n", " \n", " \n", " [7345/9180 15:48 < 03:56, 7.75 it/s, Epoch 24/30]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossAccuracy
1No log0.6836570.740234
20.7763000.6102210.773438
30.7763000.5782310.783203
40.6570000.5698100.794922
50.6267000.5743070.779297
60.6267000.5465420.806641
70.6082000.5474220.806641
80.6082000.5488430.794922
90.5976000.5359040.812500
100.5845000.5235630.808594
110.5845000.5240360.802734
120.5769000.5120480.812500
130.5769000.5105320.812500
140.5742000.5282250.796875
150.5631000.5025920.808594
160.5631000.5119820.812500
170.5529000.4996290.814453
180.5525000.4927830.814453
190.5525000.5143170.802734
200.5471000.4858570.820312
210.5471000.4923330.814453
220.5397000.4874500.824219
230.5404000.4926410.818359
240.5404000.4912930.822266

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model = ConsistentSentenceClassifier(\n", " freeze_bert=True)\n", "\n", "training_args = TrainingArguments(\n", " output_dir=\"../factual-consistency-classification-ja-avgpool\",\n", " learning_rate=1e-4,\n", " per_device_train_batch_size=64,\n", " per_device_eval_batch_size=8,\n", " num_train_epochs=30,\n", " weight_decay=0.02,\n", " evaluation_strategy=\"epoch\",\n", " eval_accumulation_steps=4,\n", " save_strategy=\"epoch\",\n", " load_best_model_at_end=True,\n", " save_total_limit=5,\n", " push_to_hub=True,\n", ")\n", "\n", "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=tokenized_dataset[\"train\"],\n", " eval_dataset=tokenized_dataset[\"test\"],\n", " tokenizer=tokenizer,\n", " data_collator=data_collator,\n", " compute_metrics=get_metrics(),\n", ")\n", "\n", "trainer.train()\n", "trainer.push_to_hub('factual-consistency-classification-ja-avgpool')" ] }, { "cell_type": "code", "execution_count": null, "id": "a6eb93f7-5a38-49a2-be0d-e42267e23a0a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "environment": { "kernel": "python3", "name": "pytorch-gpu.2-0.m112", "type": "gcloud", "uri": "gcr.io/deeplearning-platform-release/pytorch-gpu.2-0:m112" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }