bhavinjawade commited on May 14

Commit

9564ed2

verified ·

1 Parent(s): 906f62f

Model save

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +103 -0
=0.41.0 +0 -0
=0.6.0 +0 -0
DSPy_Optimization.ipynb +415 -0
InstructionFinetuning.ipynb +1277 -0
README.md +58 -0
SFT_Expert.py +229 -0
TQ_template.py +37 -0
TextGrad_Optimization.ipynb +544 -0
adapter_config.json +45 -0
adapter_model.safetensors +3 -0
added_tokens.json +3 -0
data_prep.py +380 -0
gemma-12b-tq-model/README.md +58 -0
gemma-12b-tq-model/adapter_config.json +45 -0
gemma-12b-tq-model/adapter_model.safetensors +3 -0
gemma-12b-tq-model/added_tokens.json +3 -0
gemma-12b-tq-model/checkpoint-2/README.md +202 -0
gemma-12b-tq-model/checkpoint-2/adapter_config.json +45 -0
gemma-12b-tq-model/checkpoint-2/adapter_model.safetensors +3 -0
gemma-12b-tq-model/checkpoint-2/added_tokens.json +3 -0
gemma-12b-tq-model/checkpoint-2/optimizer.pt +3 -0
gemma-12b-tq-model/checkpoint-2/rng_state.pth +3 -0
gemma-12b-tq-model/checkpoint-2/scheduler.pt +3 -0
gemma-12b-tq-model/checkpoint-2/special_tokens_map.json +33 -0
gemma-12b-tq-model/checkpoint-2/tokenizer.json +3 -0
gemma-12b-tq-model/checkpoint-2/tokenizer.model +3 -0
gemma-12b-tq-model/checkpoint-2/tokenizer_config.json +0 -0
gemma-12b-tq-model/checkpoint-2/trainer_state.json +51 -0
gemma-12b-tq-model/checkpoint-2/training_args.bin +3 -0
gemma-12b-tq-model/runs/Apr25_08-39-59_9945b53f-579e-4565-94fc-5fbe73c83cc2/events.out.tfevents.1745570448.9945b53f-579e-4565-94fc-5fbe73c83cc2 +3 -0
gemma-12b-tq-model/runs/Apr25_08-42-29_9945b53f-579e-4565-94fc-5fbe73c83cc2/events.out.tfevents.1745570563.9945b53f-579e-4565-94fc-5fbe73c83cc2 +3 -0
gemma-12b-tq-model/runs/Apr25_09-19-39_9945b53f-579e-4565-94fc-5fbe73c83cc2/events.out.tfevents.1745572788.9945b53f-579e-4565-94fc-5fbe73c83cc2 +3 -0
gemma-12b-tq-model/special_tokens_map.json +33 -0
gemma-12b-tq-model/tokenizer.json +3 -0
gemma-12b-tq-model/tokenizer.model +3 -0
gemma-12b-tq-model/tokenizer_config.json +0 -0
gemma-12b-tq-model/training_args.bin +3 -0
gemma-1b-tq-model/README.md +58 -0
gemma-1b-tq-model/adapter_config.json +42 -0
gemma-1b-tq-model/adapter_model.safetensors +3 -0
gemma-1b-tq-model/added_tokens.json +3 -0
gemma-1b-tq-model/checkpoint-10/README.md +202 -0
gemma-1b-tq-model/checkpoint-10/adapter_config.json +42 -0
gemma-1b-tq-model/checkpoint-10/adapter_model.safetensors +3 -0
gemma-1b-tq-model/checkpoint-10/added_tokens.json +3 -0
gemma-1b-tq-model/checkpoint-10/optimizer.pt +3 -0
gemma-1b-tq-model/checkpoint-10/rng_state.pth +3 -0
gemma-1b-tq-model/checkpoint-10/scheduler.pt +3 -0
gemma-1b-tq-model/checkpoint-10/special_tokens_map.json +33 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,106 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+gemma-12b-tq-model/checkpoint-2/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-12b-tq-model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-10/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-11/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-12/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-14/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-16/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-18/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-2/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-20/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-22/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-24/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-26/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-28/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-30/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-32/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-33/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-34/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-36/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-38/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-40/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-42/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-44/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-46/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-48/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-52/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-54/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-56/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-58/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-6/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-60/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-62/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-64/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-66/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-68/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-70/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-72/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-74/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-76/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-78/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-8/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-82/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-84/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-86/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-88/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-90/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-92/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-94/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-96/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/checkpoint-98/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-1b-tq-model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-27b-tq_sft_finetuned-model/checkpoint-129/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-27b-tq_sft_finetuned-model/checkpoint-131/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-27b-tq_sft_finetuned-model/checkpoint-133/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-27b-tq_sft_finetuned-model/checkpoint-256/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-27b-tq_sft_finetuned-model/checkpoint-264/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-27b-tq_sft_finetuned-model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-27b-tq_sft_finetuned-model-full/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-10/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-1040/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-116/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-1170/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-124/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-125/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-130/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-1300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-140/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-186/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-248/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-260/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-29/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-3/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-310/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-372/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-375/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-390/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-434/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-496/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-520/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-558/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-58/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-6/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-61/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-610/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-62/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-625/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-650/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-780/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-87/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-9/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/checkpoint-910/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+gemma-4b-tq_sft_finetuned-model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+may13-gemma-27b-tq_sft_finetuned-model/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+may13-gemma-27b-tq_sft_finetuned-model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+merged_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

=0.41.0 ADDED Viewed

File without changes

=0.6.0 ADDED Viewed

File without changes

DSPy_Optimization.ipynb ADDED Viewed

	@@ -0,0 +1,415 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "8b3ee6e2-ca9c-40fa-b4c6-a9596f075f79",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-04-22T23:03:20.101831Z",
+     "iopub.status.busy": "2025-04-22T23:03:20.101435Z",
+     "iopub.status.idle": "2025-04-22T23:03:20.105088Z",
+     "shell.execute_reply": "2025-04-22T23:03:20.104580Z",
+     "shell.execute_reply.started": "2025-04-22T23:03:20.101804Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import dspy\n",
+    "from dspy.teleprompt import MIPROv2\n",
+    "from typing import List, Dict\n",
+    "import json\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import random\n",
+    "from tqdm import tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "4ec9a29b-9162-4fe3-b32d-4de4397c6483",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-04-22T23:00:21.439753Z",
+     "iopub.status.busy": "2025-04-22T23:00:21.439342Z",
+     "iopub.status.idle": "2025-04-22T23:00:21.526091Z",
+     "shell.execute_reply": "2025-04-22T23:00:21.525575Z",
+     "shell.execute_reply.started": "2025-04-22T23:00:21.439727Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 4/4 [00:00<00:00, 77.75it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'input': {'src_text': 'Ma io che ne so, comandà? Io stavo a casa di mia madre, lo sapete.\\n\\nLo so.',\n",
+       "  'tgt_text': \"What do I know, Commander? I was at my mom's house, you know it.\\n\\nI knows.\",\n",
+       "  'src_prev': \"Questa è una linea. Qua faccio quello che voglio, è terra mia, la legge è mia. Dall'altro lato c'è un mondo fatto di spazzatura. Questa linea non l'ho mai oltrepassata. Impara chi è tua madre una volta per tutte. Tieni, questo era per te. Mà… Mà! Secondo me non è stata lei. Come al solito ti sei fatto prendere per il culo. Comandà, credo che non è stata lei. Carmine, sei uno stronzo. Robè, portalo via. Andiamocene.\",\n",
+       "  'src_next': 'E allora che altro vi devo dire? Tu non devi dire niente. Devi tenere la bocca chiusa. E non dire a nessuno quello che ti ho detto. Ma a nessuno però. Ho capito. Però devi tenere le orecchie aperte e ascoltare tutto quello che si dice qua dentro. Perché prima o poi, chi fa queste cose parla. Si deve atteggiare, si deve fare grosso. Che si è divertito con la moglie del comandante. Secondo me vi sbagliate, comandà. Non può essere stato nessuno che sta qua dentro. Lo so.',\n",
+       "  'tgt_prev': \"This is a line. Here I do whatever I want, it's my territory, it's my law. On the other side there's a world of trash. I've never crossed that line. Learn who your mother is, once and for all. Here, this was for you. Ma… Ma! I don't think it was her. As usual you let her fuck you around. Commander, I think she didn't do it. Carmine, you're an asshole. Robè, take him away. Let's go.\",\n",
+       "  'tgt_next': \"So what else should I say? You don't have to say anything. You have to keep your mouth shut. And don't tell anybody what I told you. To nobody. Got it. But keep your ears open and listen to what they say in here. Because sooner or later, guys who do such things talk. They need to swagger, act like big guys. Bragging they had fun with the Commander's wife. I think you're wrong, Commander. It can't have been anyone who's in here. I know.\",\n",
+       "  'src_lang': 'it',\n",
+       "  'tgt_lang': 'en'},\n",
+       " 'evaluation': {'Accuracy Issues': [],\n",
+       "  'Readability Issues': [],\n",
+       "  'Accuracy Score': '4',\n",
+       "  'Readability Score': '4',\n",
+       "  'Confidence Level': 'the_translation_is_excellent_without_any_error_spans_and_no_creative_liberties_were_taken',\n",
+       "  'Main Vs Alternate': 'Alternate Translated Text has marginally better quality',\n",
+       "  'Score': 32}}"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_path = \"/root/notebooks/MT_TQ/TQ/DataPrep_Prompting_Experiments/labeled_data/parsed/\"\n",
+    "json_files = [os.path.join(root, file) for root, _, files in os.walk(data_path) for file in files if file.endswith('.json') and 'PLDL' in file]\n",
+    "\n",
+    "training_samples = []\n",
+    "for json_file in tqdm(json_files):\n",
+    "    with open(json_file, 'r') as file:\n",
+    "        data = json.load(file)\n",
+    "    sampled_items = random.sample(data[\"data\"], 20)\n",
+    "    training_samples.extend(sampled_items)\n",
+    "\n",
+    "datapoints = []\n",
+    "\n",
+    "for sample in training_samples:\n",
+    "    datapoint = {\"input\":{}}\n",
+    "    datapoint[\"input\"][\"src_text\"] = sample[\"main_src_text\"]\n",
+    "    datapoint[\"input\"][\"tgt_text\"] = sample[\"tgt_text\"]\n",
+    "    datapoint[\"input\"][\"src_prev\"] = sample[\"tt_src_prev\"]\n",
+    "    datapoint[\"input\"][\"src_next\"] = sample[\"tt_src_next\"]\n",
+    "    datapoint[\"input\"][\"tgt_prev\"] = sample[\"tt_tgt_prev\"]\n",
+    "    datapoint[\"input\"][\"tgt_next\"] = sample[\"tt_tgt_next\"]\n",
+    "    datapoint[\"input\"][\"src_lang\"] = sample[\"src_lang\"]\n",
+    "    datapoint[\"input\"][\"tgt_lang\"] = sample[\"tgt_lang\"]\n",
+    "    datapoint[\"evaluation\"] = sample[\"labelers\"][0][\"annotation\"]\n",
+    "    datapoints.append(datapoint)\n",
+    "\n",
+    "datapoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "bde34303-2f52-415f-b117-264e266b84f0",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-04-22T23:04:16.302953Z",
+     "iopub.status.busy": "2025-04-22T23:04:16.302402Z",
+     "iopub.status.idle": "2025-04-22T23:04:16.334330Z",
+     "shell.execute_reply": "2025-04-22T23:04:16.333644Z",
+     "shell.execute_reply.started": "2025-04-22T23:04:16.302928Z"
+    }
+   },
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "module 'dspy' has no attribute 'Predictor'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[35], line 28\u001b[0m\n\u001b[1;32m     25\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m prediction\u001b[38;5;241m.\u001b[39mevaluation\n\u001b[1;32m     27\u001b[0m \u001b[38;5;66;03m# Create a custom predictor using your Netflix model\u001b[39;00m\n\u001b[0;32m---> 28\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mNetflixPredictor\u001b[39;00m(\u001b[43mdspy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPredictor\u001b[49m):\n\u001b[1;32m     29\u001b[0m     \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, model):\n\u001b[1;32m     30\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m model\n",
+      "\u001b[0;31mAttributeError\u001b[0m: module 'dspy' has no attribute 'Predictor'"
+     ]
+    }
+   ],
+   "source": [
+    "class TranslationQualityChecker(dspy.Signature):\n",
+    "    \"\"\"Evaluate the quality of translation.\"\"\"\n",
+    "    \n",
+    "    context = dspy.InputField(desc=\"Source and target text with context\")\n",
+    "    evaluation = dspy.OutputField(desc=\"Detailed evaluation of the translation quality\")\n",
+    "\n",
+    "class TranslationQualityModule(dspy.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.checker = dspy.Predict(TranslationQualityChecker)\n",
+    "    \n",
+    "    def forward(self, src_text, tgt_text, src_prev, tgt_prev, src_next, tgt_next, src_lang, tgt_lang):\n",
+    "        context = {\n",
+    "            \"source_text\": src_text,\n",
+    "            \"target_text\": tgt_text,\n",
+    "            \"source_previous\": src_prev,\n",
+    "            \"target_previous\": tgt_prev,\n",
+    "            \"source_next\": src_next,\n",
+    "            \"target_next\": tgt_next,\n",
+    "            \"source_language\": src_lang,\n",
+    "            \"target_language\": tgt_lang\n",
+    "        }\n",
+    "        \n",
+    "        prediction = self.checker(context=context)\n",
+    "        return prediction.evaluation\n",
+    "\n",
+    "# Create a custom backend using your Netflix model\n",
+    "class NetflixBackend(dspy.BackendBase):\n",
+    "    def __init__(self, model):\n",
+    "        super().__init__()\n",
+    "        self.model = model\n",
+    "        \n",
+    "    def complete(self, prompt, **kwargs):\n",
+    "        messages = [{\"role\": \"user\", \"content\": prompt}]\n",
+    "        response = self.model.generate(messages)\n",
+    "        return response\n",
+    "\n",
+    "    def completions(self, prompts, **kwargs):\n",
+    "        return [self.complete(prompt, **kwargs) for prompt in prompts]\n",
+    "\n",
+    "# Prepare training data\n",
+    "def prepare_training_data(data_points):\n",
+    "    compiled_data = []\n",
+    "    for dp in data_points:\n",
+    "        input_data = dp['input']\n",
+    "        train_example = dspy.Example(\n",
+    "            context={\n",
+    "                \"source_text\": input_data['src_text'],\n",
+    "                \"target_text\": input_data['tgt_text'],\n",
+    "                \"source_previous\": input_data['src_prev'],\n",
+    "                \"target_previous\": input_data['tgt_prev'],\n",
+    "                \"source_next\": input_data['src_next'],\n",
+    "                \"target_next\": input_data['tgt_next'],\n",
+    "                \"source_language\": input_data['src_lang'],\n",
+    "                \"target_language\": input_data['tgt_lang']\n",
+    "            },\n",
+    "            evaluation=dp['evaluation']\n",
+    "        )\n",
+    "        compiled_data.append(train_example)\n",
+    "    return compiled_data\n",
+    "\n",
+    "def optimize_prompt(model, training_data, validation_data):\n",
+    "    # Initialize DSPy with your custom backend\n",
+    "    backend = NetflixBackend(model)\n",
+    "    dspy.settings.configure(lm=backend)\n",
+    "    \n",
+    "    # Create the optimizer\n",
+    "    optimizer = MIPROv2(\n",
+    "        metric=\"exact_match\",  # or another appropriate metric\n",
+    "        max_rounds=5,\n",
+    "        max_prompts=3,\n",
+    "        temp=0.7\n",
+    "    )\n",
+    "    \n",
+    "    # Compile the module\n",
+    "    translation_module = TranslationQualityModule()\n",
+    "    \n",
+    "    # Optimize the prompt\n",
+    "    optimized_module = optimizer.optimize(\n",
+    "        module=translation_module,\n",
+    "        trainset=training_data,\n",
+    "        valset=validation_data,\n",
+    "        metric=dspy.evaluate.answer_exact_match\n",
+    "    )\n",
+    "    \n",
+    "    return optimized_module"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67a4583f-162c-4e2d-b061-798f6c676a28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TranslationQualityAssessor(dspy.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.assess = dspy.ChainOfThought(TranslationQualitySignature)\n",
+    "\n",
+    "    def forward(self, src_lang, tgt_lang, src_text, translation, src_prev=\"\", tgt_prev=\"\", src_next=\"\", tgt_next=\"\"):\n",
+    "        context = f\"\"\"Previous Context:\n",
+    "                Source: {src_prev}\n",
+    "                Translation: {tgt_prev}\n",
+    "                \n",
+    "                Next Context:\n",
+    "                Source: {src_next}\n",
+    "                Translation: {tgt_next}\"\"\"\n",
+    "\n",
+    "        result = self.assess(\n",
+    "            context=context,\n",
+    "            source=f\"Source ({src_lang}): {src_text}\",\n",
+    "            translation=f\"Translation ({tgt_lang}): {translation}\"\n",
+    "        )\n",
+    "        \n",
+    "        return result.evaluation\n",
+    "\n",
+    "class TranslationMetrics:\n",
+    "    @staticmethod\n",
+    "    def exact_match_score(pred, gold):\n",
+    "        try:\n",
+    "            pred_json = json.loads(pred)\n",
+    "            gold_json = gold\n",
+    "            \n",
+    "            accuracy_match = (str(pred_json.get('Accuracy Score')) == str(gold_json.get('Accuracy Score')))\n",
+    "            readability_match = (str(pred_json.get('Readability Score')) == str(gold_json.get('Readability Score')))\n",
+    "            \n",
+    "            return (accuracy_match and readability_match)\n",
+    "        except:\n",
+    "            return False\n",
+    "    \n",
+    "    @staticmethod\n",
+    "    def partial_match_score(pred, gold):\n",
+    "        try:\n",
+    "            pred_json = json.loads(pred)\n",
+    "            gold_json = gold\n",
+    "            \n",
+    "            # Score comparison\n",
+    "            accuracy_diff = abs(float(pred_json.get('Accuracy Score', 0)) - float(gold_json.get('Accuracy Score', 0)))\n",
+    "            readability_diff = abs(float(pred_json.get('Readability Score', 0)) - float(gold_json.get('Readability Score', 0)))\n",
+    "            \n",
+    "            # Issues comparison\n",
+    "            pred_accuracy_issues = set(str(issue) for issue in pred_json.get('Accuracy Issues', []))\n",
+    "            gold_accuracy_issues = set(str(issue) for issue in gold_json.get('Accuracy Issues', []))\n",
+    "            pred_readability_issues = set(str(issue) for issue in pred_json.get('Readability Issues', []))\n",
+    "            gold_readability_issues = set(str(issue) for issue in gold_json.get('Readability Issues', []))\n",
+    "            \n",
+    "            # Calculate Jaccard similarity for issues\n",
+    "            accuracy_issues_sim = len(pred_accuracy_issues & gold_accuracy_issues) / max(1, len(pred_accuracy_issues | gold_accuracy_issues))\n",
+    "            readability_issues_sim = len(pred_readability_issues & gold_readability_issues) / max(1, len(pred_readability_issues | gold_readability_issues))\n",
+    "            \n",
+    "            # Combine scores (0.6 weight to scores, 0.4 to issues similarity)\n",
+    "            score_component = 1 - ((accuracy_diff + readability_diff) / 8)\n",
+    "            issues_component = (accuracy_issues_sim + readability_issues_sim) / 2\n",
+    "            \n",
+    "            final_score = 0.6 * score_component + 0.4 * issues_component\n",
+    "            return max(0, final_score)\n",
+    "        except:\n",
+    "            return 0\n",
+    "\n",
+    "def prepare_dataset(file_path):\n",
+    "    with open(file_path, 'r') as f:\n",
+    "        data = json.load(f)\n",
+    "    \n",
+    "    prepared_data = []\n",
+    "    \n",
+    "    for item in data:\n",
+    "        example = dspy.Example(\n",
+    "            context=f\"\"\"Previous Context:\n",
+    "                    Source: {item['src_prev']}\n",
+    "                    Translation: {item['tgt_prev']}\n",
+    "                    \n",
+    "                    Next Context:\n",
+    "                    Source: {item['src_next']}\n",
+    "                    Translation: {item['tgt_next']}\"\"\",\n",
+    "            source=f\"Source ({item['src_lang']}): {item['src_text']}\",\n",
+    "            translation=f\"Translation ({item['tgt_lang']}): {item['main_text']}\",\n",
+    "            evaluation=json.dumps(item['evaluation'], ensure_ascii=False)\n",
+    "        ).with_inputs(\"context\", \"source\", \"translation\")\n",
+    "        \n",
+    "        prepared_data.append(example)\n",
+    "    \n",
+    "    # Split data: 70% train, 15% dev, 15% test\n",
+    "    train_size = int(0.7 * len(prepared_data))\n",
+    "    dev_size = int(0.15 * len(prepared_data))\n",
+    "    \n",
+    "    train_data = prepared_data[:train_size]\n",
+    "    dev_data = prepared_data[train_size:train_size + dev_size]\n",
+    "    test_data = prepared_data[train_size + dev_size:]\n",
+    "    \n",
+    "    return train_data, dev_data, test_data\n",
+    "\n",
+    "def optimize_translation_quality_assessment():\n",
+    "    # Initialize DSPy\n",
+    "    lm = TranslationQualityLM()\n",
+    "    dspy.settings.configure(lm=lm)\n",
+    "    \n",
+    "    # Load and prepare dataset\n",
+    "    train_data, dev_data, test_data = prepare_dataset('translation_quality_dataset.json')\n",
+    "    \n",
+    "    # Create evaluator\n",
+    "    evaluator = Evaluate(\n",
+    "        metrics={\n",
+    "            'exact_match': TranslationMetrics.exact_match_score,\n",
+    "            'partial_match': TranslationMetrics.partial_match_score\n",
+    "        }\n",
+    "    )\n",
+    "    \n",
+    "    # Initialize module\n",
+    "    assessor = TranslationQualityAssessor()\n",
+    "    \n",
+    "    # Initialize MIPROv2 optimizer\n",
+    "    optimizer = dspy.MIPROv2(\n",
+    "        metric=lambda x: x['partial_match'],\n",
+    "        max_rounds=5,              # Number of optimization rounds\n",
+    "        max_traces=10,            # Number of traces per round\n",
+    "        max_depth=3,              # Maximum depth of reasoning chains\n",
+    "        num_candidate_prompts=5,  # Number of candidate prompts to generate\n",
+    "        num_rounds_per_prompt=3,  # Number of rounds per candidate prompt\n",
+    "        temperature=0.7,\n",
+    "        verbose=True\n",
+    "    )\n",
+    "    \n",
+    "    # Compile the module with optimization\n",
+    "    compiled_assessor = optimizer.compile(\n",
+    "        assessor,\n",
+    "        trainset=train_data,\n",
+    "        devset=dev_data,\n",
+    "        eval_kwargs={\n",
+    "            'metric': 'partial_match',\n",
+    "            'num_threads': 4,\n",
+    "            'batch_size': 8\n",
+    "        }\n",
+    "    )\n",
+    "    \n",
+    "    # Evaluate on test set\n",
+    "    results = []\n",
+    "    for example in test_data:\n",
+    "        pred = compiled_assessor(\n",
+    "            context=example.context,\n",
+    "            source=example.source,\n",
+    "            translation=example.translation\n",
+    "        )\n",
+    "        \n",
+    "        result = evaluator.evaluate(\n",
+    "            predictions=[pred],\n",
+    "            ground_truth=[example.evaluation]\n",
+    "        )\n",
+    "        results.append(result)\n",
+    "    \n",
+    "    # Calculate and print final metrics\n",
+    "    avg_exact_match = np.mean([r['exact_match'] for r in results])\n",
+    "    avg_partial_match = np.mean([r['partial_match'] for r in results])\n",
+    "    \n",
+    "    print(f\"Average Exact Match Score: {avg_exact_match:.3f}\")\n",
+    "    print(f\"Average Partial Match Score: {avg_partial_match:.3f}\")\n",
+    "    \n",
+    "    return compiled_assessor\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    optimized_assessor = optimize_translation_quality_assessment()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "timedlibs",
+   "language": "python",
+   "name": "timedlibs"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

InstructionFinetuning.ipynb ADDED Viewed

	@@ -0,0 +1,1277 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e6d20008-a91c-4618-baa0-5991e031f1bd",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-13T21:48:57.985184Z",
+     "iopub.status.busy": "2025-05-13T21:48:57.984795Z",
+     "iopub.status.idle": "2025-05-13T21:51:48.369715Z",
+     "shell.execute_reply": "2025-05-13T21:51:48.368907Z",
+     "shell.execute_reply.started": "2025-05-13T21:48:57.985144Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/root/notebooks/MT_TQ/Libraries/timedlibs/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoProcessor, Gemma3ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq\n",
+    "import torch\n",
+    "from peft import LoraConfig, get_peft_model\n",
+    "\n",
+    "import os\n",
+    "from tqdm import tqdm\n",
+    "import json\n",
+    "\n",
+    "import random\n",
+    "from datasets import load_dataset\n",
+    "from datasets import Dataset, DatasetDict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "67f95fc8-a9d8-48cf-a551-7d30781cdb55",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-13T21:53:58.075473Z",
+     "iopub.status.busy": "2025-05-13T21:53:58.074767Z",
+     "iopub.status.idle": "2025-05-13T21:53:58.767860Z",
+     "shell.execute_reply": "2025-05-13T21:53:58.767319Z",
+     "shell.execute_reply.started": "2025-05-13T21:53:58.075446Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 8/8 [00:00<00:00, 22.76it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['messages'],\n",
+      "        num_rows: 309\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['messages'],\n",
+      "        num_rows: 343\n",
+      "    })\n",
+      "})\n"
+     ]
+    }
+   ],
+   "source": [
+    "data_path = (\n",
+    "    \"/root/notebooks/MT_TQ/Caches/May2025/tquality.annotated.data/parsed/pldl/\"\n",
+    ")\n",
+    "\n",
+    "json_files = [\n",
+    "    os.path.join(root, file)\n",
+    "    for root, _, files in os.walk(data_path)\n",
+    "    for file in files\n",
+    "    if file.endswith(\".json\")\n",
+    "]\n",
+    "\n",
+    "training_samples = []\n",
+    "testing_samples  = []\n",
+    "\n",
+    "for json_file in tqdm(json_files):\n",
+    "    with open(json_file, \"r\") as file:\n",
+    "        data = json.load(file)\n",
+    "    sampled_items = data[\"data\"]\n",
+    "    if \"test\" in json_file:\n",
+    "        testing_samples.extend(sampled_items)\n",
+    "    if \"train\" in json_file:\n",
+    "        training_samples.extend(sampled_items)\n",
+    "\n",
+    "training_datapoints = []\n",
+    "testing_datapoints  = []\n",
+    "\n",
+    "for idx, sample in enumerate(training_samples):\n",
+    "    datapoint = {\"input\": {}}\n",
+    "    datapoint[\"input\"][\"src_text\"]    = sample[\"src_text\"]\n",
+    "    datapoint[\"input\"][\"tgt_text\"]    = sample[\"main_tgt_text\"]\n",
+    "    datapoint[\"input\"][\"src_prev\"]    = sample[\"tt_src_prev\"]\n",
+    "    datapoint[\"input\"][\"src_next\"]    = sample[\"tt_src_next\"]\n",
+    "    datapoint[\"input\"][\"tgt_prev\"]    = sample[\"tt_tgt_prev\"]\n",
+    "    datapoint[\"input\"][\"tgt_next\"]    = sample[\"tt_tgt_next\"]\n",
+    "    datapoint[\"input\"][\"src_lang\"]    = sample[\"src_lang\"]\n",
+    "    datapoint[\"input\"][\"tgt_lang\"]    = sample[\"tgt_lang\"]\n",
+    "    datapoint[\"input\"][\"start_frame\"] = sample[\"start_frame\"]\n",
+    "    datapoint[\"input\"][\"end_frame\"]   = sample[\"end_frame\"]\n",
+    "    datapoint[\"input\"][\"title_id\"]    = sample[\"title_id\"]\n",
+    "    datapoint[\"input\"][\"alt_tgt_text\"]= sample[\"alt_tgt_text\"]\n",
+    "    datapoint[\"input\"][\"id\"]          = idx\n",
+    "    datapoint[\"evaluation\"]           = sample[\"labelers\"][0][\"annotation\"]\n",
+    "    training_datapoints.append(datapoint)\n",
+    "\n",
+    "for idx, sample in enumerate(testing_samples):\n",
+    "    datapoint = {\"input\": {}}\n",
+    "    datapoint[\"input\"][\"src_text\"]    = sample[\"src_text\"]\n",
+    "    datapoint[\"input\"][\"tgt_text\"]    = sample[\"main_tgt_text\"]\n",
+    "    datapoint[\"input\"][\"src_prev\"]    = sample[\"tt_src_prev\"]\n",
+    "    datapoint[\"input\"][\"src_next\"]    = sample[\"tt_src_next\"]\n",
+    "    datapoint[\"input\"][\"tgt_prev\"]    = sample[\"tt_tgt_prev\"]\n",
+    "    datapoint[\"input\"][\"tgt_next\"]    = sample[\"tt_tgt_next\"]\n",
+    "    datapoint[\"input\"][\"src_lang\"]    = sample[\"src_lang\"]\n",
+    "    datapoint[\"input\"][\"tgt_lang\"]    = sample[\"tgt_lang\"]\n",
+    "    datapoint[\"input\"][\"start_frame\"] = sample[\"start_frame\"]\n",
+    "    datapoint[\"input\"][\"end_frame\"]   = sample[\"end_frame\"]\n",
+    "    datapoint[\"input\"][\"title_id\"]    = sample[\"title_id\"]\n",
+    "    datapoint[\"input\"][\"alt_tgt_text\"]= sample[\"alt_tgt_text\"]\n",
+    "    datapoint[\"input\"][\"id\"]          = idx\n",
+    "    datapoint[\"evaluation\"]           = sample[\"labelers\"][0][\"annotation\"]\n",
+    "    testing_datapoints.append(datapoint)\n",
+    "\n",
+    "system_message = \"You are a helpful assistant who is an expert in estimating quality of translations.\"\n",
+    "\n",
+    "output_template = '''\n",
+    "{\n",
+    "        \"Accuracy Issues\": [\n",
+    "                {\n",
+    "                        \"Error Span\": \"\",\n",
+    "                        \"Error Explanation\": \"\",\n",
+    "                        \"Error Quality Category\": \"\",\n",
+    "                        \"Error Quality Tags\": [],\n",
+    "                        \"Error Severity\": \"\"\n",
+    "                }\n",
+    "        ],\n",
+    "        \"Accuracy Score\": \"\",\n",
+    "        \"Readability Issues\": [\n",
+    "                {\n",
+    "                        \"Error Span\": \"\",\n",
+    "                        \"Error Explanation\": \"\",\n",
+    "                        \"Error Quality Category\": \"\",\n",
+    "                        \"Error Quality Tags\": [],\n",
+    "                        \"Error Severity\": \"\"\n",
+    "                }\n",
+    "        ],\n",
+    "        \"Readability Score\": \"\"\n",
+    "}'''\n",
+    "\n",
+    "def create_conversation(input_sample, output_sample):\n",
+    "  return {\n",
+    "    \"messages\": [\n",
+    "      # {\"role\": \"system\", \"content\": system_message},\n",
+    "      {\"role\": \"user\", \"content\": input_sample},\n",
+    "      {\"role\": \"assistant\", \"content\": output_sample}\n",
+    "    ]\n",
+    "  }\n",
+    "\n",
+    "def create_dataset(datapoints, template_string):\n",
+    "    dataset = []\n",
+    "    meta    = []\n",
+    "    for datapoint in datapoints:\n",
+    "        src_text = datapoint['input']['src_text']\n",
+    "        tgt_text = datapoint['input']['tgt_text']\n",
+    "        src_prev = datapoint['input']['src_prev']\n",
+    "        src_next = datapoint['input']['src_next']        \n",
+    "        tgt_prev = datapoint['input']['tgt_prev']\n",
+    "        tgt_next = datapoint['input']['tgt_next']\n",
+    "        src_lang = datapoint['input']['src_lang']\n",
+    "        tgt_lang = datapoint['input']['tgt_lang']\n",
+    "        \n",
+    "        start_frame = datapoint['input']['start_frame']\n",
+    "        end_frame   = datapoint['input']['end_frame']\n",
+    "        title_id    = datapoint['input']['title_id']\n",
+    "        output      = datapoint['evaluation']\n",
+    "        idx         = datapoint['input']['id']\n",
+    "        if len(output['Accuracy Issues']) != 0 or len(output['Readability Issues']) != 0:\n",
+    "            item = template_string.format(src_text=src_text, tgt_text=tgt_text, \n",
+    "                                          src_prev=src_prev, src_next=src_next, \n",
+    "                                          tgt_prev=tgt_prev, tgt_next=tgt_next, \n",
+    "                                          src_lang=src_lang, tgt_lang=tgt_lang,\n",
+    "                                          template=output_template)\n",
+    "            \n",
+    "            dataset.append(create_conversation(item, json.dumps(output)))\n",
+    "            meta.append({\"id\": idx, \"start_frame\": start_frame, \"end_frame\": end_frame, \"title_id\": title_id})\n",
+    "    \n",
+    "    return dataset, meta\n",
+    "    \n",
+    "def dataset_prep(datapoints):\n",
+    "    with open(\"prompts.txt\") as file:\n",
+    "        template_string = file.read()\n",
+    "    dataset, meta = create_dataset(datapoints, template_string)\n",
+    "    return dataset, meta\n",
+    "\n",
+    "train_dataset, train_meta = dataset_prep(training_datapoints)\n",
+    "test_dataset,  test_meta  = dataset_prep(testing_datapoints)\n",
+    "\n",
+    "dataset = {\"train\": train_dataset, \"test\": test_dataset}\n",
+    "\n",
+    "def convert_to_hf_dataset(dataset):\n",
+    "    train_dataset = Dataset.from_list(dataset['train'])\n",
+    "    test_dataset  = Dataset.from_list(dataset['test'])\n",
+    "    \n",
+    "    hf_dataset = DatasetDict({\n",
+    "        'train': train_dataset,\n",
+    "        'test': test_dataset\n",
+    "    })\n",
+    "    \n",
+    "    return hf_dataset\n",
+    "\n",
+    "hf_dataset = convert_to_hf_dataset(dataset)\n",
+    "print(hf_dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8b52f143-1077-4da6-ac92-b1dce5cdc17c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-13T21:54:12.568533Z",
+     "iopub.status.busy": "2025-05-13T21:54:12.568078Z",
+     "iopub.status.idle": "2025-05-13T21:54:49.724121Z",
+     "shell.execute_reply": "2025-05-13T21:54:49.723481Z",
+     "shell.execute_reply.started": "2025-05-13T21:54:12.568507Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading checkpoint shards: 100%|██████████| 12/12 [00:18<00:00,  1.58s/it]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForImageTextToText, BitsAndBytesConfig\n",
+    "from transformers import AutoProcessor, Gemma3ForConditionalGeneration\n",
+    "device = torch.device(\"cuda:0\")\n",
+    "\n",
+    "# Hugging Face model id\n",
+    "model_id = \"google/gemma-3-27b-it\" # or `google/gemma-3-4b-pt`, `google/gemma-3-12b-pt`, `google/gemma-3-27b-pt`\n",
+    "\n",
+    "# Select model class based on id\n",
+    "if model_id == \"google/gemma-3-27b-it\":\n",
+    "    model_class = Gemma3ForConditionalGeneration\n",
+    "else:\n",
+    "    model_class = AutoModelForImageTextToText\n",
+    "\n",
+    "torch_dtype = torch.bfloat16\n",
+    "\n",
+    "model_kwargs = dict(\n",
+    "    attn_implementation=\"eager\",\n",
+    "    torch_dtype=torch_dtype,\n",
+    "    device_map=\"auto\",\n",
+    ")\n",
+    "\n",
+    "model = model_class.from_pretrained(model_id, **model_kwargs)\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"google/gemma-3-27b-it\") # Load the Instruction Tokenizer to use the official Gemma template"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8443dfd8-6193-480c-9937-f6e0c43a9f56",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-13T21:55:12.713958Z",
+     "iopub.status.busy": "2025-05-13T21:55:12.713495Z",
+     "iopub.status.idle": "2025-05-13T21:55:12.717707Z",
+     "shell.execute_reply": "2025-05-13T21:55:12.717199Z",
+     "shell.execute_reply.started": "2025-05-13T21:55:12.713930Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from peft import LoraConfig\n",
+    "\n",
+    "peft_config = LoraConfig(\n",
+    "    lora_alpha=128,\n",
+    "    lora_dropout=0.05,\n",
+    "    r=16,\n",
+    "    bias=\"none\",\n",
+    "    target_modules=\"all-linear\",\n",
+    "    task_type=\"CAUSAL_LM\",\n",
+    "    modules_to_save=[\"lm_head\", \"embed_tokens\"] # make sure to save the lm_head and embed_tokens as you train the special tokens\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "8f2b8371-ba1b-44ff-9462-d0c90335f82a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-13T21:55:22.076515Z",
+     "iopub.status.busy": "2025-05-13T21:55:22.076029Z",
+     "iopub.status.idle": "2025-05-13T21:55:22.783524Z",
+     "shell.execute_reply": "2025-05-13T21:55:22.782937Z",
+     "shell.execute_reply.started": "2025-05-13T21:55:22.076489Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from trl import SFTConfig\n",
+    "\n",
+    "args = SFTConfig(\n",
+    "    output_dir=\"may13-gemma-27b-tq_sft_finetuned-model\",\n",
+    "    max_seq_length=2048,\n",
+    "    packing=True,\n",
+    "    num_train_epochs=1,\n",
+    "    per_device_train_batch_size=1,\n",
+    "    gradient_accumulation_steps=4,\n",
+    "    gradient_checkpointing=True,\n",
+    "    optim=\"adamw_torch_fused\",\n",
+    "    logging_steps=1,\n",
+    "    save_strategy=\"epoch\",\n",
+    "    learning_rate=1e-4,\n",
+    "    fp16=True if torch_dtype == torch.float16 else False,\n",
+    "    bf16=True if torch_dtype == torch.bfloat16 else False,\n",
+    "    max_grad_norm=0.3,\n",
+    "    warmup_ratio=0.03,\n",
+    "    lr_scheduler_type=\"constant\",\n",
+    "    push_to_hub=True,\n",
+    "    report_to=\"tensorboard\",\n",
+    "    dataset_kwargs={\n",
+    "        \"add_special_tokens\": False,\n",
+    "        \"append_concat_token\": True,\n",
+    "    },\n",
+    "    no_cuda=False,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2be55b87-70c9-4973-b0db-33154c272e47",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-13T21:55:25.765385Z",
+     "iopub.status.busy": "2025-05-13T21:55:25.764949Z",
+     "iopub.status.idle": "2025-05-13T21:55:36.592163Z",
+     "shell.execute_reply": "2025-05-13T21:55:36.591614Z",
+     "shell.execute_reply.started": "2025-05-13T21:55:25.765360Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Converting train dataset to ChatML: 100%|██████████| 309/309 [00:00<00:00, 9533.70 examples/s]\n",
+      "Applying chat template to train dataset: 100%|██████████| 309/309 [00:00<00:00, 4443.06 examples/s]\n",
+      "Tokenizing train dataset: 100%|██████████| 309/309 [00:01<00:00, 226.22 examples/s]\n",
+      "Packing train dataset: 100%|██████████| 309/309 [00:00<00:00, 102364.74 examples/s]\n",
+      "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from trl import SFTTrainer\n",
+    "\n",
+    "# Create Trainer object\n",
+    "trainer = SFTTrainer(\n",
+    "    model=model,\n",
+    "    args=args,\n",
+    "    train_dataset=hf_dataset[\"train\"],\n",
+    "    peft_config=peft_config,\n",
+    "    processing_class=tokenizer\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d8d82767-27ed-48ed-ad22-3f3cf2dff15e",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-13T22:00:25.107226Z",
+     "iopub.status.busy": "2025-05-13T22:00:25.106569Z",
+     "iopub.status.idle": "2025-05-13T22:27:35.945604Z",
+     "shell.execute_reply": "2025-05-13T22:27:35.944775Z",
+     "shell.execute_reply.started": "2025-05-13T22:00:25.107196Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='80' max='80' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [80/80 22:44, Epoch 0/1]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>10.801900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>8.381400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>6.970200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>5.784300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>4.970800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>4.389700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>4.325000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>3.557000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>3.357700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>3.092500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>11</td>\n",
+       "      <td>3.170300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>12</td>\n",
+       "      <td>2.648500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>13</td>\n",
+       "      <td>3.067800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>14</td>\n",
+       "      <td>2.377100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>15</td>\n",
+       "      <td>2.847700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>16</td>\n",
+       "      <td>2.628800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>17</td>\n",
+       "      <td>2.630800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>18</td>\n",
+       "      <td>2.820900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>19</td>\n",
+       "      <td>2.596700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>20</td>\n",
+       "      <td>2.675300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>21</td>\n",
+       "      <td>2.846300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>22</td>\n",
+       "      <td>2.706700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>23</td>\n",
+       "      <td>2.645100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>24</td>\n",
+       "      <td>2.214600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>25</td>\n",
+       "      <td>2.790700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>26</td>\n",
+       "      <td>2.640700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>27</td>\n",
+       "      <td>2.908900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>28</td>\n",
+       "      <td>2.690400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>29</td>\n",
+       "      <td>2.807200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>30</td>\n",
+       "      <td>2.713600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>31</td>\n",
+       "      <td>2.563200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>32</td>\n",
+       "      <td>2.412700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>33</td>\n",
+       "      <td>2.627700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>34</td>\n",
+       "      <td>2.431800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>35</td>\n",
+       "      <td>2.240600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>36</td>\n",
+       "      <td>2.650300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>37</td>\n",
+       "      <td>2.014900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>38</td>\n",
+       "      <td>2.463100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>39</td>\n",
+       "      <td>2.283300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>40</td>\n",
+       "      <td>2.450500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>41</td>\n",
+       "      <td>2.570400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>42</td>\n",
+       "      <td>2.550500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>43</td>\n",
+       "      <td>2.530600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>44</td>\n",
+       "      <td>2.551400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>45</td>\n",
+       "      <td>2.383000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>46</td>\n",
+       "      <td>2.550500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>47</td>\n",
+       "      <td>2.575900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>48</td>\n",
+       "      <td>2.494300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>49</td>\n",
+       "      <td>2.387200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>50</td>\n",
+       "      <td>2.318800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>51</td>\n",
+       "      <td>2.365200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>52</td>\n",
+       "      <td>2.190100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>53</td>\n",
+       "      <td>2.419100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>54</td>\n",
+       "      <td>2.290900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>55</td>\n",
+       "      <td>2.152500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>56</td>\n",
+       "      <td>2.398700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>57</td>\n",
+       "      <td>2.982500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>58</td>\n",
+       "      <td>2.380200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>59</td>\n",
+       "      <td>2.357500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>60</td>\n",
+       "      <td>2.386300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>61</td>\n",
+       "      <td>2.741300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>62</td>\n",
+       "      <td>2.850300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>63</td>\n",
+       "      <td>2.682100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>64</td>\n",
+       "      <td>2.972100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>65</td>\n",
+       "      <td>2.237800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>66</td>\n",
+       "      <td>2.518300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>67</td>\n",
+       "      <td>2.520700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>68</td>\n",
+       "      <td>2.122700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>69</td>\n",
+       "      <td>2.210200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>70</td>\n",
+       "      <td>2.414000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>71</td>\n",
+       "      <td>2.348200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>72</td>\n",
+       "      <td>2.470800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>73</td>\n",
+       "      <td>2.417400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>74</td>\n",
+       "      <td>2.562900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>75</td>\n",
+       "      <td>2.286800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>76</td>\n",
+       "      <td>2.671400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>77</td>\n",
+       "      <td>2.176200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>78</td>\n",
+       "      <td>2.284200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>79</td>\n",
+       "      <td>2.354700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>80</td>\n",
+       "      <td>2.363400</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "trainer.train()\n",
+    "trainer.save_model()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "2398696f-eeb8-45d1-8dee-ed88a7ac140b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-13T22:34:47.172016Z",
+     "iopub.status.busy": "2025-05-13T22:34:47.171574Z",
+     "iopub.status.idle": "2025-05-13T22:39:06.055171Z",
+     "shell.execute_reply": "2025-05-13T22:39:06.054429Z",
+     "shell.execute_reply.started": "2025-05-13T22:34:47.171989Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "('/root/notebooks/MT_TQ/TQ/TQTune/gemma-27b-tq_sft_finetuned-model-full/tokenizer_config.json',\n",
+       " '/root/notebooks/MT_TQ/TQ/TQTune/gemma-27b-tq_sft_finetuned-model-full/special_tokens_map.json',\n",
+       " '/root/notebooks/MT_TQ/TQ/TQTune/gemma-27b-tq_sft_finetuned-model-full/tokenizer.model',\n",
+       " '/root/notebooks/MT_TQ/TQ/TQTune/gemma-27b-tq_sft_finetuned-model-full/added_tokens.json',\n",
+       " '/root/notebooks/MT_TQ/TQ/TQTune/gemma-27b-tq_sft_finetuned-model-full/tokenizer.json')"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "lora_model = trainer.model\n",
+    "merged_model = lora_model.merge_and_unload()\n",
+    "# Save the model with fused weights\n",
+    "merged_model.save_pretrained('/root/notebooks/MT_TQ/TQ/TQTune/gemma-27b-tq_sft_finetuned-model-full')\n",
+    "trainer.tokenizer.save_pretrained('/root/notebooks/MT_TQ/TQ/TQTune/gemma-27b-tq_sft_finetuned-model-full')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8b811a84-0cdb-4b40-bb96-d6e6f27d41d3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-08T21:17:00.794785Z",
+     "iopub.status.busy": "2025-05-08T21:17:00.794339Z",
+     "iopub.status.idle": "2025-05-08T21:17:18.309148Z",
+     "shell.execute_reply": "2025-05-08T21:17:18.308319Z",
+     "shell.execute_reply.started": "2025-05-08T21:17:00.794761Z"
+    }
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'model' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Merge LoRA weights into the base model\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name, param \u001b[38;5;129;01min\u001b[39;00m \u001b[43mmodel\u001b[49m\u001b[38;5;241m.\u001b[39mnamed_parameters():\n\u001b[1;32m      3\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mpeft_model\u001b[38;5;241m.\u001b[39mlora_weights:\n\u001b[1;32m      4\u001b[0m         param\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m trainer\u001b[38;5;241m.\u001b[39mpeft_model\u001b[38;5;241m.\u001b[39mlora_weights[name]\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "# Merge LoRA weights into the base model\n",
+    "for name, param in model.named_parameters():\n",
+    "    if name in trainer.peft_model.lora_weights:\n",
+    "        param.data += trainer.peft_model.lora_weights[name]\n",
+    "\n",
+    "# Save the model with fused weights\n",
+    "model.save_pretrained('/root/notebooks/MT_TQ/TQ/TQTune/gemma-27b-tq_sft_finetuned-model-full')\n",
+    "tokenizer.save_pretrained('/root/notebooks/MT_TQ/TQ/TQTune/gemma-27b-tq_sft_finetuned-model-full')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e5b4930d-92c5-46e8-9163-6e7f722e0c99",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-08T19:13:24.762234Z",
+     "iopub.status.busy": "2025-05-08T19:13:24.761972Z",
+     "iopub.status.idle": "2025-05-08T19:13:50.993002Z",
+     "shell.execute_reply": "2025-05-08T19:13:50.992329Z",
+     "shell.execute_reply.started": "2025-05-08T19:13:24.762215Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading checkpoint shards: 100%|██████████| 12/12 [00:19<00:00,  1.60s/it]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from transformers import pipeline\n",
+    "from random import randint\n",
+    "import re\n",
+    "\n",
+    "model_id = \"google/gemma-3-27b-it\"\n",
+    "model = model_class.from_pretrained(\n",
+    "  model_id,\n",
+    "  device_map=\"auto\",\n",
+    "  torch_dtype=torch_dtype,\n",
+    "  attn_implementation=\"eager\",\n",
+    ")\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5a428dea-261a-4c74-89a8-1b62d7ade5ab",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-08T19:13:50.999539Z",
+     "iopub.status.busy": "2025-05-08T19:13:50.999160Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.024652Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.022626Z",
+     "shell.execute_reply.started": "2025-05-08T19:13:50.999517Z"
+    }
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'trainer' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[10], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Merge LoRA weights into the base model\u001b[39;00m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name, param \u001b[38;5;129;01min\u001b[39;00m model\u001b[38;5;241m.\u001b[39mnamed_parameters():\n\u001b[0;32m----> 3\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m \u001b[43mtrainer\u001b[49m\u001b[38;5;241m.\u001b[39mpeft_model\u001b[38;5;241m.\u001b[39mlora_weights:\n\u001b[1;32m      4\u001b[0m         param\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m trainer\u001b[38;5;241m.\u001b[39mpeft_model\u001b[38;5;241m.\u001b[39mlora_weights[name]\n\u001b[1;32m      6\u001b[0m \u001b[38;5;66;03m# Save the model with fused weights\u001b[39;00m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'trainer' is not defined"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c7e7172e-db49-40f3-a0d6-9a87a3b2cf80",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.026597Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.026984Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.026875Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.026863Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer)\n",
+    "rand_idx = randint(0, len(dataset[\"test\"]))\n",
+    "test_sample = hf_dataset[\"test\"][rand_idx]\n",
+    "stop_token_ids = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(\"<end_of_turn>\")]\n",
+    "prompt = pipe.tokenizer.apply_chat_template(test_sample[\"messages\"][:1], tokenize=False, add_generation_prompt=True)\n",
+    "\n",
+    "outputs = pipe(prompt, max_new_tokens=1024, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=stop_token_ids, disable_compile=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "de05b438-ca77-4b95-b2b1-32ea7ae033a5",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.028819Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.029072Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.028971Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.028960Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "start = outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip().find(\"{\")\n",
+    "end   = outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip().rfind(\"}\")\n",
+    "print(start, end)\n",
+    "print(outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip()[start:end + 1])\n",
+    "json.loads(outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip()[start:end + 1])\n",
+    "rand_idx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60b3da99-0edc-4ef6-b0e0-be7d046eaa02",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.030913Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.031227Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.031122Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.031111Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "json.loads(hf_dataset[\"test\"][81][\"messages\"][1]['content'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cdc44250-e3b9-4870-bce5-23f475023962",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.032999Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.033327Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.033207Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.033196Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from transformers import pipeline\n",
+    "from random import randint\n",
+    "import re\n",
+    "from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForImageTextToText, BitsAndBytesConfig\n",
+    "from transformers import AutoProcessor, Gemma3ForConditionalGeneration\n",
+    "device = torch.device(\"cuda:0\")\n",
+    "\n",
+    "model_class = Gemma3ForConditionalGeneration\n",
+    "torch_dtype = torch.bfloat16\n",
+    "\n",
+    "model_id = \"gemma-27b-tq_sft_finetuned-model\"\n",
+    "model = model_class.from_pretrained(\n",
+    "  model_id,\n",
+    "  device_map=\"auto\",\n",
+    "  torch_dtype=torch_dtype,\n",
+    "  attn_implementation=\"eager\",\n",
+    ")\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
+    "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce4070a9-5291-477a-bb3f-867b7971e391",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.035085Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.035416Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.035307Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.035295Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def extract_json_data(json_string):\n",
+    "    key_pattern = r'\"(.*?)\"\\s*:\\s*'\n",
+    "    value_pattern = r'(?:\"(.*?)\"|(\\d+)|$$(.*?)$$|\\{(.*?)\\})'\n",
+    "    matches = re.finditer(key_pattern + value_pattern, json_string, re.DOTALL)    \n",
+    "    data = {}\n",
+    "    for match in matches:\n",
+    "        key = match.group(1)\n",
+    "        value = match.group(2) or match.group(3) or match.group(4) or match.group(5)        \n",
+    "        if value:\n",
+    "            try:\n",
+    "                value = json.loads(value)\n",
+    "            except (json.JSONDecodeError, TypeError):\n",
+    "                pass\n",
+    "        data[key] = value\n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4940ab0c-ff5a-4c1e-a543-b0e8be91a4cb",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.037234Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.037745Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.037637Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.037626Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "rand_idx = randint(0, len(dataset[\"test\"]))\n",
+    "test_predictions = []\n",
+    "\n",
+    "index = 9\n",
+    "\n",
+    "meta_data = test_meta[index]\n",
+    "stop_token_ids = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(\"<end_of_turn>\")]\n",
+    "prompt = pipe.tokenizer.apply_chat_template(hf_dataset[\"test\"][index][\"messages\"][:1], tokenize=False, add_generation_prompt=True)\n",
+    "outputs = pipe(prompt, max_new_tokens=2048, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=stop_token_ids, disable_compile=True)\n",
+    "start = outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip().find(\"{\")\n",
+    "end   = outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip().rfind(\"}\")\n",
+    "try:\n",
+    "    pred_dict = json.loads(outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip()[start:end + 1])\n",
+    "except:\n",
+    "    start     = outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip().find(\"{\")\n",
+    "    end       = outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip().rfind(\"}\")\n",
+    "    pred_dict = outputs[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip()[start:end + 1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fdf03584-7cd0-40cc-af95-87279a2dc05e",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.039492Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.039810Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.039704Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.039693Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pred_dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "80603718-a168-4e4c-aa55-842dfb20f265",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.041594Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.041970Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.041865Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.041854Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "hf_dataset[\"test\"][index][\"messages\"][1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6d3731c9-4686-453f-8c91-e9477fe5541c",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.043675Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.043977Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.043872Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.043861Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "batch_size = 8\n",
+    "test_predictions = []\n",
+    "\n",
+    "for i in tqdm(range(0, len(hf_dataset[\"test\"]), batch_size)):\n",
+    "    batch_samples = hf_dataset[\"test\"][i:i + batch_size][\"messages\"]\n",
+    "    batch_meta = test_meta[i:i + batch_size]\n",
+    "    prompts = [\n",
+    "        pipe.tokenizer.apply_chat_template(sample[:1], tokenize=False, add_generation_prompt=True)\n",
+    "        for sample in batch_samples\n",
+    "    ]\n",
+    "    outputs = pipe(prompts, max_new_tokens=2048, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=[tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(\"<end_of_turn>\")], disable_compile=True)\n",
+    "\n",
+    "    for index, output in tqdm(enumerate(tqdm(outputs))):\n",
+    "        output_dict = {}\n",
+    "        start = output[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip().find(\"{\")\n",
+    "        end = output[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip().rfind(\"}\")\n",
+    "        try:\n",
+    "            pred_dict = json.loads(output[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip()[start:end + 1])\n",
+    "        except:\n",
+    "            pred_dict = output[0]['generated_text'].split(r\"<start_of_turn>model\")[1].strip()[start:end + 1]\n",
+    "    \n",
+    "        output_dict.update(batch_meta[index])\n",
+    "        output_dict[\"predictions\"]      = pred_dict\n",
+    "        output_dict[\"human-annotation\"] = batch_samples[index][1]['content']\n",
+    "        output_dict[\"prompt\"]           = batch_samples[index][0]['content']\n",
+    "        test_predictions.append(output_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "616eb30a-eac2-4229-b86c-24eca7534cc6",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.045755Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.046057Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.045954Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.045943Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "with open(\"/root/notebooks/trashspace/gemma_finetuned_expertdata/test_pred.json\", 'w') as json_file:\n",
+    "    json.dump(test_predictions, json_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71480057-d6b9-4499-a8d7-26bf3f3f9342",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce73604c-4bbb-46c7-8433-d957b0e10405",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4741a722-a772-44bf-949e-e77671a4ef03",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07c2adef-c2f6-4ba9-98f7-277cce2701d0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1adfa27b-9bfa-4479-be3f-5149a2237c1f",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.047823Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.048130Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.048026Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.048015Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "data = json.loads(test_sample['messages'][1]['content'])\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "750d3454-6300-469b-bdc3-77cce45a00ce",
+   "metadata": {
+    "execution": {
+     "iopub.status.busy": "2025-05-08T19:15:04.049897Z",
+     "iopub.status.idle": "2025-05-08T19:15:04.050203Z",
+     "shell.execute_reply": "2025-05-08T19:15:04.050099Z",
+     "shell.execute_reply.started": "2025-05-08T19:15:04.050088Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "print(len(hf_dataset[\"test\"]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3be45a2d-336f-4899-a8e9-e000437fab8c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "248182ff-bec8-46ff-bc34-14b523d877bf",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "timedlibs",
+   "language": "python",
+   "name": "timedlibs"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: google/gemma-3-4b-it
+library_name: transformers
+model_name: TQTune
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for TQTune
+This model is a fine-tuned version of [google/gemma-3-4b-it](https://huggingface.co/google/gemma-3-4b-it).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="bhavinjawade/TQTune", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.16.1
+- Transformers: 4.50.0.dev0
+- Pytorch: 2.6.0+cu124
+- Datasets: 3.3.2
+- Tokenizers: 0.21.0
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

SFT_Expert.py ADDED Viewed

	@@ -0,0 +1,229 @@

+from transformers import AutoProcessor, Gemma3ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq
+import torch
+from peft import LoraConfig, get_peft_model
+import os
+from tqdm import tqdm
+import json
+import random
+from datasets import load_dataset
+from datasets import Dataset, DatasetDict
+system_message = "You are a helpful assistant who is an expert in estimating quality of translations."
+output_template = '''
+{
+        "Accuracy Issues": [
+                {
+                        "Error Span": "",
+                        "Error Explanation": "",
+                        "Error Quality Category": "",
+                        "Error Quality Tags": [],
+                        "Error Severity": ""
+                }
+        ],
+        "Accuracy Score": "",
+        "Readability Issues": [
+                {
+                        "Error Span": "",
+                        "Error Explanation": "",
+                        "Error Quality Category": "",
+                        "Error Quality Tags": [],
+                        "Error Severity": ""
+                }
+        ],
+        "Readability Score": ""
+}'''
+def create_conversation(input_sample, output_sample):
+  return {
+    "messages": [
+      # {"role": "system", "content": system_message},
+      {"role": "user", "content": input_sample},
+      {"role": "assistant", "content": output_sample}
+    ]
+  }
+data_path = (
+    "/root/notebooks/MT_TQ/TQ/TQTune/labeled_data/parsed/"
+)
+json_files = [
+    os.path.join(root, file)
+    for root, _, files in os.walk(data_path)
+    for file in files
+    if file.endswith(".json") and "PLDL" in file
+]
+training_samples = []
+for json_file in tqdm(json_files):
+    with open(json_file, "r") as file:
+        data = json.load(file)
+    sampled_items = random.sample(data["data"], 20)
+    training_samples.extend(sampled_items)
+datapoints = []
+for sample in training_samples:
+    datapoint = {"input": {}}
+    datapoint["input"]["src_text"] = sample["main_src_text"]
+    datapoint["input"]["tgt_text"] = sample["tgt_text"]
+    datapoint["input"]["src_prev"] = sample["tt_src_prev"]
+    datapoint["input"]["src_next"] = sample["tt_src_next"]
+    datapoint["input"]["tgt_prev"] = sample["tt_tgt_prev"]
+    datapoint["input"]["tgt_next"] = sample["tt_tgt_next"]
+    datapoint["input"]["src_lang"] = sample["src_lang"]
+    datapoint["input"]["tgt_lang"] = sample["tgt_lang"]
+    datapoint["evaluation"] = sample["labelers"][0]["annotation"]
+    datapoints.append(datapoint)
+def dataset_prep(datapoints, test_size=0.2):
+    with open("prompts.txt") as file:
+        template_string = file.read()
+    random.shuffle(datapoints)
+    split_index = int(len(datapoints) * (1 - test_size))
+    train_datapoints = datapoints[:split_index]
+    test_datapoints = datapoints[split_index:]
+    def create_dataset(datapoints):
+        dataset = []
+        for datapoint in datapoints:
+            src_text = datapoint['input']['src_text']
+            tgt_text = datapoint['input']['tgt_text']
+            src_prev = datapoint['input']['src_prev']
+            src_next = datapoint['input']['src_next']
+            tgt_prev = datapoint['input']['tgt_prev']
+            tgt_next = datapoint['input']['tgt_next']
+            src_lang = datapoint['input']['src_lang']
+            tgt_lang = datapoint['input']['tgt_lang']
+            output   = datapoint['evaluation']
+            del output["Confidence Level"]
+            del output["Main Vs Alternate"]
+            del output["Score"]
+            if len(output['Accuracy Issues']) != 0 and len(output['Readability Issues']) != 0:
+                item = template_string.format(src_text=src_text, tgt_text=tgt_text,
+                                              src_prev=src_prev, src_next=src_next,
+                                              tgt_prev=tgt_prev, tgt_next=tgt_next,
+                                              src_lang=src_lang, tgt_lang=tgt_lang,
+                                              template=output_template)
+                dataset.append(create_conversation(item, json.dumps(output)))
+        return dataset
+    train_set = create_dataset(train_datapoints)
+    test_set = create_dataset(test_datapoints)
+    return train_set, test_set
+train_dataset, test_dataset = dataset_prep(datapoints)
+dataset = {"train": train_dataset, "test": test_dataset}
+def convert_to_hf_dataset(dataset):
+    # Convert the train and test datasets into Hugging Face Dataset objects
+    train_dataset = Dataset.from_list(dataset['train'])
+    test_dataset = Dataset.from_list(dataset['test'])
+    # Combine them into a DatasetDict
+    hf_dataset = DatasetDict({
+        'train': train_dataset,
+        'test': test_dataset
+    })
+    return hf_dataset
+# Convert your dataset into a Hugging Face Dataset object
+hf_dataset = convert_to_hf_dataset(dataset)
+# Now you can use hf_dataset for your machine learning tasks
+print(hf_dataset)
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForImageTextToText, BitsAndBytesConfig
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+device = torch.device("cuda:0")
+# Hugging Face model id
+model_id = "google/gemma-3-12b-it" # or `google/gemma-3-4b-pt`, `google/gemma-3-12b-pt`, `google/gemma-3-27b-pt`
+# Select model class based on id
+if model_id == "google/gemma-3-12b-it":
+    model_class = Gemma3ForConditionalGeneration
+else:
+    model_class = AutoModelForImageTextToText
+torch_dtype = torch.bfloat16
+model_kwargs = dict(
+    attn_implementation="eager",
+    torch_dtype=torch_dtype,
+    device_map="auto",  # Change from {'': 0} to "auto"
+)
+model_kwargs["quantization_config"] = BitsAndBytesConfig(
+    load_in_8bit=True,
+    bnb_8bit_use_double_quant=True,
+    bnb_8bit_quant_type='nf8',
+    bnb_8bit_compute_dtype=model_kwargs['torch_dtype'],
+    bnb_8bit_quant_storage=model_kwargs['torch_dtype'],
+)
+model = model_class.from_pretrained(model_id, **model_kwargs)
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-12b-it") # Load the Instruction Tokenizer to use the official Gemma template
+from peft import LoraConfig
+peft_config = LoraConfig(
+    lora_alpha=128,
+    lora_dropout=0.05,
+    r=16,
+    bias="none",
+    target_modules="all-linear",
+    task_type="CAUSAL_LM",
+    modules_to_save=["lm_head", "embed_tokens"] # make sure to save the lm_head and embed_tokens as you train the special tokens
+)
+from trl import SFTConfig
+args = SFTConfig(
+    output_dir="gemma-12b-tq-model",
+    max_seq_length=512,
+    packing=True,
+    num_train_epochs=1,
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    gradient_checkpointing=True,
+    optim="adamw_torch_fused",
+    logging_steps=1,
+    save_strategy="epoch",
+    learning_rate=2e-4,
+    fp16=True if torch_dtype == torch.float16 else False,
+    bf16=True if torch_dtype == torch.bfloat16 else False,
+    max_grad_norm=0.3,
+    warmup_ratio=0.03,
+    lr_scheduler_type="constant",
+    push_to_hub=True,
+    report_to="tensorboard",
+    dataset_kwargs={
+        "add_special_tokens": False,
+        "append_concat_token": True,
+    },
+    ddp_find_unused_parameters=False,
+    no_cuda=False,
+)
+from trl import SFTTrainer
+# Create Trainer object
+trainer = SFTTrainer(
+    model=model,
+    args=args,
+    train_dataset=hf_dataset["train"],
+    peft_config=peft_config,
+    processing_class=tokenizer
+)
+trainer.train()
+trainer.save_model()

TQ_template.py ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "src_lang": "en",
+  "tgt_lang": "de",
+  "src_prev": "Alice: Hi Bob, how are you?\nBob: I'm good, thanks!",
+  "tgt_prev": "Alice: Hallo Bob, wie geht's?\nBob: Mir geht's gut, danke!",
+  "src_next": "Alice: Want to grab coffee later?\nBob: Sure, sounds good.",
+  "tgt_next": "Alice: Möchtest du später einen Kaffee trinken?\nBob: Klar, klingt gut.",
+  "src_text": "Bob: I just got back from Paris.",
+  "main_text": "This is the main text",
+  "alternate_text": "This is the alternate text",
+  "evaluation": {
+    "Accuracy Issues": [
+      {
+        "Error Span": [5,8],
+        "Error Explanation": "Incorrect translation of 'just got back' as 'gerade aus' instead of 'gerade zurück'.",
+        "Error Quality Category": "Fidelity",
+        "Error Quality Tags": ["terminology", "accuracy"],
+        "Error Severity": "Major"
+      }
+    ],
+    "Accuracy Score": "4", # ["projects"]["What ever key is there"]["labels"][index here of the labels]["annotations"]["classifications"][1]["radio_answer"]["name"]
+    "Readability Issues": [
+      {
+        "Error Location": "Src", # ["projects"]["What ever key is there"]["labels"][index here of the labels]["annotations"]["objects"][index of it]["conversational_location"]["message_id"]
+        "Error Span": [0,2],  # ["projects"]["What ever key is there"]["labels"][index here of the labels]["annotations"]["objects"][index of it]["conversational_location"]["location"]["start" and "end" use them to make this list (start, end)]
+        "Error Explanation": "Sentence structure is awkward in German translation.", # ["projects"]["What ever key is there"]["labels"][index here of the labels]["annotations"]["objects"][index of it]["classification"][2]["text_answer"]["content"]
+        "Error Quality Category": "Style", # ["projects"]["What ever key is there"]["labels"][index here of the labels]["annotations"]["objects"][index of it]["name"] - here if the name is "Style" - put it under Readability Issues else put it under Accuracy Issues.
+        "Error Quality Tags": ["awkward", "structure"], # ["projects"]["What ever key is there"]["labels"][index here of the labels]["annotations"]["objects"][index of it]["classification"][1]["checklist_answers"][list of dicts, take name keys for all and make list of it]
+        "Error Severity": "Minor" # ["projects"]["What ever key is there"]["labels"][index here of the labels]["annotations"]["objects"][index of it]["classification"][0]["radio_answer"]["name"]
+      }
+    ],
+    "Readability Score": "3", # ["projects"]["What ever key is there"]["labels"][index here of the labels]["classifications"][2]["radio_answer"]["name"]
+    "Confidence Level": "the_translation_is_excellent_without_any_error_spans_and_no_creative_liberties_were_taken", # # ["projects"]["What ever key is there"]["labels"][index here of the labels]["classifications"][3]["radio_answer"]["name"]
+    "Main Vs Alternate": "Both of them have roughly the same quality" # ["projects"]["What ever key is there"]["labels"][index here of the labels]["classifications"][0]["radio_answer"]["name"]
+  },
+  "Score": "26"
+}

TextGrad_Optimization.ipynb ADDED Viewed

	@@ -0,0 +1,544 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "8b3ee6e2-ca9c-40fa-b4c6-a9596f075f79",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-09T17:36:47.763713Z",
+     "iopub.status.busy": "2025-05-09T17:36:47.763339Z",
+     "iopub.status.idle": "2025-05-09T17:36:47.768648Z",
+     "shell.execute_reply": "2025-05-09T17:36:47.768166Z",
+     "shell.execute_reply.started": "2025-05-09T17:36:47.763676Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "env: OPENAI_API_KEY=\"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\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "import argparse\n",
+    "import concurrent\n",
+    "from dotenv import load_dotenv\n",
+    "from tqdm import tqdm\n",
+    "import textgrad as tg\n",
+    "from textgrad.tasks import load_task\n",
+    "import numpy as np\n",
+    "import random\n",
+    "load_dotenv(override=True)\n",
+    "import os\n",
+    "import json\n",
+    "\n",
+    "%env OPENAI_API_KEY=\"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\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "4ec9a29b-9162-4fe3-b32d-4de4397c6483",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-09T17:33:04.417822Z",
+     "iopub.status.busy": "2025-05-09T17:33:04.417437Z",
+     "iopub.status.idle": "2025-05-09T17:33:04.429505Z",
+     "shell.execute_reply": "2025-05-09T17:33:04.429029Z",
+     "shell.execute_reply.started": "2025-05-09T17:33:04.417795Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "0it [00:00, ?it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "data_path = \"/root/notebooks/MT_TQ/TQ/DataPrep_Prompting_Experiments/labeled_data/parsed/\"\n",
+    "json_files = [os.path.join(root, file) for root, _, files in os.walk(data_path) for file in files if file.endswith('.json') and 'PLDL' in file]\n",
+    "\n",
+    "training_samples = []\n",
+    "for json_file in tqdm(json_files):\n",
+    "    with open(json_file, 'r') as file:\n",
+    "        data = json.load(file)\n",
+    "    sampled_items = random.sample(data[\"data\"], 20)\n",
+    "    training_samples.extend(sampled_items)\n",
+    "\n",
+    "datapoints = []\n",
+    "\n",
+    "for sample in training_samples:\n",
+    "    datapoint = {\"input\":{}}\n",
+    "    datapoint[\"input\"][\"src_text\"] = sample[\"main_src_text\"]\n",
+    "    datapoint[\"input\"][\"tgt_text\"] = sample[\"tgt_text\"]\n",
+    "    datapoint[\"input\"][\"src_prev\"] = sample[\"tt_src_prev\"]\n",
+    "    datapoint[\"input\"][\"src_next\"] = sample[\"tt_src_next\"]\n",
+    "    datapoint[\"input\"][\"tgt_prev\"] = sample[\"tt_tgt_prev\"]\n",
+    "    datapoint[\"input\"][\"tgt_next\"] = sample[\"tt_tgt_next\"]\n",
+    "    datapoint[\"input\"][\"src_lang\"] = sample[\"src_lang\"]\n",
+    "    datapoint[\"input\"][\"tgt_lang\"] = sample[\"tgt_lang\"]\n",
+    "    datapoint[\"evaluation\"] = sample[\"labelers\"][0][\"annotation\"]\n",
+    "    datapoints.append(datapoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a894ce72-d451-44fa-aaa5-85bf8e6dc9da",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-09T17:33:40.240759Z",
+     "iopub.status.busy": "2025-05-09T17:33:40.240243Z",
+     "iopub.status.idle": "2025-05-09T17:33:40.244435Z",
+     "shell.execute_reply": "2025-05-09T17:33:40.243818Z",
+     "shell.execute_reply.started": "2025-05-09T17:33:40.240720Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def set_seed(seed):\n",
+    "    np.random.seed(seed)\n",
+    "    random.seed(seed)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "4eeaa266-3ca2-4360-b80b-b38aa3bbdb70",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-09T17:33:55.982807Z",
+     "iopub.status.busy": "2025-05-09T17:33:55.982080Z",
+     "iopub.status.idle": "2025-05-09T17:33:55.988522Z",
+     "shell.execute_reply": "2025-05-09T17:33:55.987924Z",
+     "shell.execute_reply.started": "2025-05-09T17:33:55.982770Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def eval_sample(item, eval_fn, model):\n",
+    "    \"\"\"\n",
+    "    This function allows us to evaluate if an answer to a question in the prompt is a good answer.\n",
+    "\n",
+    "    \"\"\"\n",
+    "    x, y = item\n",
+    "    x = tg.Variable(x, requires_grad=False, role_description=\"query to the language model\")\n",
+    "    y = tg.Variable(y, requires_grad=False, role_description=\"correct answer for the query\")\n",
+    "    response = model(x)\n",
+    "    try:\n",
+    "        eval_output_variable = eval_fn(inputs=dict(prediction=response, ground_truth_answer=y))\n",
+    "        return int(eval_output_variable.value)\n",
+    "    except:\n",
+    "        eval_output_variable = eval_fn([x, y, response])\n",
+    "        eval_output_parsed = eval_fn.parse_output(eval_output_variable)\n",
+    "        return int(eval_output_parsed)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "c7e57f9d-c0ff-4139-9e61-b93510599353",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-09T17:34:08.606301Z",
+     "iopub.status.busy": "2025-05-09T17:34:08.605538Z",
+     "iopub.status.idle": "2025-05-09T17:34:08.612515Z",
+     "shell.execute_reply": "2025-05-09T17:34:08.611911Z",
+     "shell.execute_reply.started": "2025-05-09T17:34:08.606262Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def eval_dataset(test_set, eval_fn, model, max_samples: int=None):\n",
+    "    if max_samples is None:\n",
+    "        max_samples = len(test_set)\n",
+    "    accuracy_list = []\n",
+    "    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:\n",
+    "        futures = []\n",
+    "        for _, sample in enumerate(test_set):\n",
+    "            \n",
+    "            future = executor.submit(eval_sample, sample, eval_fn, model)\n",
+    "            futures.append(future)\n",
+    "            if len(futures) >= max_samples:\n",
+    "                break\n",
+    "        tqdm_loader = tqdm(concurrent.futures.as_completed(futures), total=len(futures), position=0)\n",
+    "        for future in tqdm_loader:\n",
+    "            acc_item = future.result()\n",
+    "            accuracy_list.append(acc_item)\n",
+    "            tqdm_loader.set_description(f\"Accuracy: {np.mean(accuracy_list)}\")\n",
+    "    return accuracy_list "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "039af9f3-a124-4a50-98a7-e728a913c069",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-09T17:34:22.703336Z",
+     "iopub.status.busy": "2025-05-09T17:34:22.702980Z",
+     "iopub.status.idle": "2025-05-09T17:34:22.707253Z",
+     "shell.execute_reply": "2025-05-09T17:34:22.706781Z",
+     "shell.execute_reply.started": "2025-05-09T17:34:22.703313Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def run_validation_revert(system_prompt: tg.Variable, results, model, eval_fn, val_set):\n",
+    "    val_performance = np.mean(eval_dataset(val_set, eval_fn, model))\n",
+    "    previous_performance = np.mean(results[\"validation_acc\"][-1])\n",
+    "    print(\"val_performance: \", val_performance)\n",
+    "    print(\"previous_performance: \", previous_performance)\n",
+    "    previous_prompt = results[\"prompt\"][-1]\n",
+    "    \n",
+    "    if val_performance < previous_performance:\n",
+    "        print(f\"rejected prompt: {system_prompt.value}\")\n",
+    "        system_prompt.set_value(previous_prompt)\n",
+    "        val_performance = previous_performance\n",
+    "\n",
+    "    results[\"validation_acc\"].append(val_performance)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "031ebb6e-f5ff-45b0-a810-d1bd81ef6d2a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-09T17:40:38.476352Z",
+     "iopub.status.busy": "2025-05-09T17:40:38.475979Z",
+     "iopub.status.idle": "2025-05-09T17:40:38.701947Z",
+     "shell.execute_reply": "2025-05-09T17:40:38.701394Z",
+     "shell.execute_reply.started": "2025-05-09T17:40:38.476327Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train/Val/Test Set Lengths:  50 100 100\n"
+     ]
+    }
+   ],
+   "source": [
+    "set_seed(12)\n",
+    "llm_api_eval = tg.get_engine(engine_name=\"gpt-4o\")\n",
+    "llm_api_test = tg.get_engine(engine_name=\"gpt-3.5-turbo-0125\")\n",
+    "tg.set_backward_engine(llm_api_eval, override=True)\n",
+    "\n",
+    "# Load the data and the evaluation function\n",
+    "train_set, val_set, test_set, eval_fn = load_task(\"BBH_object_counting\", evaluation_api=llm_api_eval)\n",
+    "print(\"Train/Val/Test Set Lengths: \", len(train_set), len(val_set), len(test_set))\n",
+    "STARTING_SYSTEM_PROMPT = train_set.get_task_description()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "bde34303-2f52-415f-b117-264e266b84f0",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-09T17:40:39.330651Z",
+     "iopub.status.busy": "2025-05-09T17:40:39.330285Z",
+     "iopub.status.idle": "2025-05-09T17:40:39.398820Z",
+     "shell.execute_reply": "2025-05-09T17:40:39.398116Z",
+     "shell.execute_reply.started": "2025-05-09T17:40:39.330626Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/100 [00:00<?, ?it/s]\n"
+     ]
+    },
+    {
+     "ename": "AssertionError",
+     "evalue": "Value must be a string, int, or image (bytes). Got: <class 'numpy.int64'>",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[15], line 18\u001b[0m\n\u001b[1;32m     15\u001b[0m optimizer \u001b[38;5;241m=\u001b[39m tg\u001b[38;5;241m.\u001b[39mTextualGradientDescent(engine\u001b[38;5;241m=\u001b[39mllm_api_eval, parameters\u001b[38;5;241m=\u001b[39m[system_prompt])\n\u001b[1;32m     17\u001b[0m results \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_acc\u001b[39m\u001b[38;5;124m\"\u001b[39m: [], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt\u001b[39m\u001b[38;5;124m\"\u001b[39m: [], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalidation_acc\u001b[39m\u001b[38;5;124m\"\u001b[39m: []}\n\u001b[0;32m---> 18\u001b[0m results[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_acc\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mappend(\u001b[43meval_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_set\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43meval_fn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m     19\u001b[0m results[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalidation_acc\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mappend(eval_dataset(val_set, eval_fn, model))\n\u001b[1;32m     20\u001b[0m results[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mappend(system_prompt\u001b[38;5;241m.\u001b[39mget_value())\n",
+      "Cell \u001b[0;32mIn[7], line 15\u001b[0m, in \u001b[0;36meval_dataset\u001b[0;34m(test_set, eval_fn, model, max_samples)\u001b[0m\n\u001b[1;32m     13\u001b[0m tqdm_loader \u001b[38;5;241m=\u001b[39m tqdm(concurrent\u001b[38;5;241m.\u001b[39mfutures\u001b[38;5;241m.\u001b[39mas_completed(futures), total\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mlen\u001b[39m(futures), position\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m future \u001b[38;5;129;01min\u001b[39;00m tqdm_loader:\n\u001b[0;32m---> 15\u001b[0m     acc_item \u001b[38;5;241m=\u001b[39m \u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     16\u001b[0m     accuracy_list\u001b[38;5;241m.\u001b[39mappend(acc_item)\n\u001b[1;32m     17\u001b[0m     tqdm_loader\u001b[38;5;241m.\u001b[39mset_description(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccuracy: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnp\u001b[38;5;241m.\u001b[39mmean(accuracy_list)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m/apps/python3.10/lib/python3.10/concurrent/futures/_base.py:451\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    449\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[1;32m    450\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[0;32m--> 451\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    453\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[1;32m    455\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n",
+      "File \u001b[0;32m/apps/python3.10/lib/python3.10/concurrent/futures/_base.py:403\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    401\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[1;32m    402\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 403\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[1;32m    404\u001b[0m     \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m    405\u001b[0m         \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[1;32m    406\u001b[0m         \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/apps/python3.10/lib/python3.10/concurrent/futures/thread.py:58\u001b[0m, in \u001b[0;36m_WorkItem.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     55\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m     57\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 58\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     59\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m     60\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfuture\u001b[38;5;241m.\u001b[39mset_exception(exc)\n",
+      "Cell \u001b[0;32mIn[6], line 8\u001b[0m, in \u001b[0;36meval_sample\u001b[0;34m(item, eval_fn, model)\u001b[0m\n\u001b[1;32m      6\u001b[0m x, y \u001b[38;5;241m=\u001b[39m item\n\u001b[1;32m      7\u001b[0m x \u001b[38;5;241m=\u001b[39m tg\u001b[38;5;241m.\u001b[39mVariable(x, requires_grad\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, role_description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery to the language model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 8\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[43mtg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mVariable\u001b[49m\u001b[43m(\u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequires_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrole_description\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcorrect answer for the query\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      9\u001b[0m response \u001b[38;5;241m=\u001b[39m model(x)\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m~/notebooks/MT_TQ/Libraries/timedlibs/lib/python3.10/site-packages/textgrad/variable.py:43\u001b[0m, in \u001b[0;36mVariable.__init__\u001b[0;34m(self, value, image_path, predecessors, requires_grad, role_description)\u001b[0m\n\u001b[1;32m     39\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;129;01mnot\u001b[39;00m requires_grad) \u001b[38;5;129;01mand\u001b[39;00m (\u001b[38;5;28mlen\u001b[39m(_predecessor_requires_grad) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m):\n\u001b[1;32m     40\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIf the variable does not require grad, none of its predecessors should require grad.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     41\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIn this case, following predecessors require grad: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_predecessor_requires_grad\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 43\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(value) \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mbytes\u001b[39m, \u001b[38;5;28mint\u001b[39m], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mValue must be a string, int, or image (bytes). Got: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\u001b[38;5;28mtype\u001b[39m(value))\n\u001b[1;32m     44\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, \u001b[38;5;28mint\u001b[39m):\n\u001b[1;32m     45\u001b[0m     value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(value)\n",
+      "\u001b[0;31mAssertionError\u001b[0m: Value must be a string, int, or image (bytes). Got: <class 'numpy.int64'>"
+     ]
+    }
+   ],
+   "source": [
+    "train_loader = tg.tasks.DataLoader(train_set, batch_size=3, shuffle=True)\n",
+    "\n",
+    "\n",
+    "# Testing the 0-shot performance of the evaluation engine\n",
+    "system_prompt = tg.Variable(STARTING_SYSTEM_PROMPT, \n",
+    "                            requires_grad=True, \n",
+    "                            role_description=\"system prompt to the language model\")\n",
+    "model_evaluation = tg.BlackboxLLM(llm_api_eval, system_prompt)\n",
+    "\n",
+    "system_prompt = tg.Variable(STARTING_SYSTEM_PROMPT, \n",
+    "                            requires_grad=True,\n",
+    "                            role_description=\"structured system prompt to a somewhat capable language model that specifies the behavior and strategies for the QA task\")\n",
+    "model = tg.BlackboxLLM(llm_api_test, system_prompt)\n",
+    "\n",
+    "optimizer = tg.TextualGradientDescent(engine=llm_api_eval, parameters=[system_prompt])\n",
+    "\n",
+    "results = {\"test_acc\": [], \"prompt\": [], \"validation_acc\": []}\n",
+    "results[\"test_acc\"].append(eval_dataset(test_set, eval_fn, model))\n",
+    "results[\"validation_acc\"].append(eval_dataset(val_set, eval_fn, model))\n",
+    "results[\"prompt\"].append(system_prompt.get_value())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47c15231-22ff-459b-b5cc-ca32aaa62332",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for epoch in range(3):\n",
+    "    for steps, (batch_x, batch_y) in enumerate((pbar := tqdm(train_loader, position=0))):\n",
+    "        pbar.set_description(f\"Training step {steps}. Epoch {epoch}\")\n",
+    "        optimizer.zero_grad()\n",
+    "        losses = []\n",
+    "        for (x, y) in zip(batch_x, batch_y):\n",
+    "            x = tg.Variable(x, requires_grad=False, role_description=\"query to the language model\")\n",
+    "            y = tg.Variable(y, requires_grad=False, role_description=\"correct answer for the query\")\n",
+    "            response = model(x)\n",
+    "            try:\n",
+    "                eval_output_variable = eval_fn(inputs=dict(prediction=response, ground_truth_answer=y))\n",
+    "            except:\n",
+    "                eval_output_variable = eval_fn([x, y, response])\n",
+    "            losses.append(eval_output_variable)\n",
+    "        total_loss = tg.sum(losses)\n",
+    "        total_loss.backward()\n",
+    "        optimizer.step()\n",
+    "        \n",
+    "        run_validation_revert(system_prompt, results, model, eval_fn, val_set)\n",
+    "        \n",
+    "        print(\"sys prompt: \", system_prompt)\n",
+    "        test_acc = eval_dataset(test_set, eval_fn, model)\n",
+    "        results[\"test_acc\"].append(test_acc)\n",
+    "        results[\"prompt\"].append(system_prompt.get_value())\n",
+    "        if steps == 3:\n",
+    "            break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c5e93f5-8d1c-4b87-a6d1-811714982d47",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67a4583f-162c-4e2d-b061-798f6c676a28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TranslationQualityAssessor(dspy.Module):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.assess = dspy.ChainOfThought(TranslationQualitySignature)\n",
+    "\n",
+    "    def forward(self, src_lang, tgt_lang, src_text, translation, src_prev=\"\", tgt_prev=\"\", src_next=\"\", tgt_next=\"\"):\n",
+    "        context = f\"\"\"Previous Context:\n",
+    "                Source: {src_prev}\n",
+    "                Translation: {tgt_prev}\n",
+    "                \n",
+    "                Next Context:\n",
+    "                Source: {src_next}\n",
+    "                Translation: {tgt_next}\"\"\"\n",
+    "\n",
+    "        result = self.assess(\n",
+    "            context=context,\n",
+    "            source=f\"Source ({src_lang}): {src_text}\",\n",
+    "            translation=f\"Translation ({tgt_lang}): {translation}\"\n",
+    "        )\n",
+    "        \n",
+    "        return result.evaluation\n",
+    "\n",
+    "class TranslationMetrics:\n",
+    "    @staticmethod\n",
+    "    def exact_match_score(pred, gold):\n",
+    "        try:\n",
+    "            pred_json = json.loads(pred)\n",
+    "            gold_json = gold\n",
+    "            \n",
+    "            accuracy_match = (str(pred_json.get('Accuracy Score')) == str(gold_json.get('Accuracy Score')))\n",
+    "            readability_match = (str(pred_json.get('Readability Score')) == str(gold_json.get('Readability Score')))\n",
+    "            \n",
+    "            return (accuracy_match and readability_match)\n",
+    "        except:\n",
+    "            return False\n",
+    "    \n",
+    "    @staticmethod\n",
+    "    def partial_match_score(pred, gold):\n",
+    "        try:\n",
+    "            pred_json = json.loads(pred)\n",
+    "            gold_json = gold\n",
+    "            \n",
+    "            # Score comparison\n",
+    "            accuracy_diff = abs(float(pred_json.get('Accuracy Score', 0)) - float(gold_json.get('Accuracy Score', 0)))\n",
+    "            readability_diff = abs(float(pred_json.get('Readability Score', 0)) - float(gold_json.get('Readability Score', 0)))\n",
+    "            \n",
+    "            # Issues comparison\n",
+    "            pred_accuracy_issues = set(str(issue) for issue in pred_json.get('Accuracy Issues', []))\n",
+    "            gold_accuracy_issues = set(str(issue) for issue in gold_json.get('Accuracy Issues', []))\n",
+    "            pred_readability_issues = set(str(issue) for issue in pred_json.get('Readability Issues', []))\n",
+    "            gold_readability_issues = set(str(issue) for issue in gold_json.get('Readability Issues', []))\n",
+    "            \n",
+    "            # Calculate Jaccard similarity for issues\n",
+    "            accuracy_issues_sim = len(pred_accuracy_issues & gold_accuracy_issues) / max(1, len(pred_accuracy_issues | gold_accuracy_issues))\n",
+    "            readability_issues_sim = len(pred_readability_issues & gold_readability_issues) / max(1, len(pred_readability_issues | gold_readability_issues))\n",
+    "            \n",
+    "            # Combine scores (0.6 weight to scores, 0.4 to issues similarity)\n",
+    "            score_component = 1 - ((accuracy_diff + readability_diff) / 8)\n",
+    "            issues_component = (accuracy_issues_sim + readability_issues_sim) / 2\n",
+    "            \n",
+    "            final_score = 0.6 * score_component + 0.4 * issues_component\n",
+    "            return max(0, final_score)\n",
+    "        except:\n",
+    "            return 0\n",
+    "\n",
+    "def prepare_dataset(file_path):\n",
+    "    with open(file_path, 'r') as f:\n",
+    "        data = json.load(f)\n",
+    "    \n",
+    "    prepared_data = []\n",
+    "    \n",
+    "    for item in data:\n",
+    "        example = dspy.Example(\n",
+    "            context=f\"\"\"Previous Context:\n",
+    "                    Source: {item['src_prev']}\n",
+    "                    Translation: {item['tgt_prev']}\n",
+    "                    \n",
+    "                    Next Context:\n",
+    "                    Source: {item['src_next']}\n",
+    "                    Translation: {item['tgt_next']}\"\"\",\n",
+    "            source=f\"Source ({item['src_lang']}): {item['src_text']}\",\n",
+    "            translation=f\"Translation ({item['tgt_lang']}): {item['main_text']}\",\n",
+    "            evaluation=json.dumps(item['evaluation'], ensure_ascii=False)\n",
+    "        ).with_inputs(\"context\", \"source\", \"translation\")\n",
+    "        \n",
+    "        prepared_data.append(example)\n",
+    "    \n",
+    "    # Split data: 70% train, 15% dev, 15% test\n",
+    "    train_size = int(0.7 * len(prepared_data))\n",
+    "    dev_size = int(0.15 * len(prepared_data))\n",
+    "    \n",
+    "    train_data = prepared_data[:train_size]\n",
+    "    dev_data = prepared_data[train_size:train_size + dev_size]\n",
+    "    test_data = prepared_data[train_size + dev_size:]\n",
+    "    \n",
+    "    return train_data, dev_data, test_data\n",
+    "\n",
+    "def optimize_translation_quality_assessment():\n",
+    "    # Initialize DSPy\n",
+    "    lm = TranslationQualityLM()\n",
+    "    dspy.settings.configure(lm=lm)\n",
+    "    \n",
+    "    # Load and prepare dataset\n",
+    "    train_data, dev_data, test_data = prepare_dataset('translation_quality_dataset.json')\n",
+    "    \n",
+    "    # Create evaluator\n",
+    "    evaluator = Evaluate(\n",
+    "        metrics={\n",
+    "            'exact_match': TranslationMetrics.exact_match_score,\n",
+    "            'partial_match': TranslationMetrics.partial_match_score\n",
+    "        }\n",
+    "    )\n",
+    "    \n",
+    "    # Initialize module\n",
+    "    assessor = TranslationQualityAssessor()\n",
+    "    \n",
+    "    # Initialize MIPROv2 optimizer\n",
+    "    optimizer = dspy.MIPROv2(\n",
+    "        metric=lambda x: x['partial_match'],\n",
+    "        max_rounds=5,              # Number of optimization rounds\n",
+    "        max_traces=10,            # Number of traces per round\n",
+    "        max_depth=3,              # Maximum depth of reasoning chains\n",
+    "        num_candidate_prompts=5,  # Number of candidate prompts to generate\n",
+    "        num_rounds_per_prompt=3,  # Number of rounds per candidate prompt\n",
+    "        temperature=0.7,\n",
+    "        verbose=True\n",
+    "    )\n",
+    "    \n",
+    "    # Compile the module with optimization\n",
+    "    compiled_assessor = optimizer.compile(\n",
+    "        assessor,\n",
+    "        trainset=train_data,\n",
+    "        devset=dev_data,\n",
+    "        eval_kwargs={\n",
+    "            'metric': 'partial_match',\n",
+    "            'num_threads': 4,\n",
+    "            'batch_size': 8\n",
+    "        }\n",
+    "    )\n",
+    "    \n",
+    "    # Evaluate on test set\n",
+    "    results = []\n",
+    "    for example in test_data:\n",
+    "        pred = compiled_assessor(\n",
+    "            context=example.context,\n",
+    "            source=example.source,\n",
+    "            translation=example.translation\n",
+    "        )\n",
+    "        \n",
+    "        result = evaluator.evaluate(\n",
+    "            predictions=[pred],\n",
+    "            ground_truth=[example.evaluation]\n",
+    "        )\n",
+    "        results.append(result)\n",
+    "    \n",
+    "    # Calculate and print final metrics\n",
+    "    avg_exact_match = np.mean([r['exact_match'] for r in results])\n",
+    "    avg_partial_match = np.mean([r['partial_match'] for r in results])\n",
+    "    \n",
+    "    print(f\"Average Exact Match Score: {avg_exact_match:.3f}\")\n",
+    "    print(f\"Average Partial Match Score: {avg_partial_match:.3f}\")\n",
+    "    \n",
+    "    return compiled_assessor\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    optimized_assessor = optimize_translation_quality_assessment()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "timedlibs",
+   "language": "python",
+   "name": "timedlibs"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

adapter_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-4b-it",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "lm_head",
+    "embed_tokens"
+  ],
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "out_proj",
+    "v_proj",
+    "k_proj",
+    "fc1",
+    "down_proj",
+    "up_proj",
+    "fc2",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:913b7aae1196ba3282bd45e04a65bf705d67f933d758540a06902f63054ad6e7
+size 2839124552

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

data_prep.py ADDED Viewed

	@@ -0,0 +1,380 @@

+import typing as T
+import os
+import sys
+import argparse
+import json
+import nflx_copilot as ncp
+import pandas as pd
+import re
+sys.path.append("/root/workspace")
+from timedtext.adapters.translation.generation.pldl import TimedTextAdapter, ConverterDialogContext
+from timedtext.manager import TimedTextManager
+from timedtext.handlers import OriginalLanguagePivotLanguageHandler, EnglishTemplateSubtitleHandler
+from timedprompts.evaluation.pldl_prompt_one.prompt import (
+    ReferenceFreeFeedbackTransform,
+    ContextFreeFeedbackTransform,
+    ReferenceFreeDirectTransform,
+    ReferenceBasedFeedbackTransform,
+    ReferenceFreeExampleTransform,
+)
+from tqdm import tqdm
+from timedtune.convert.tq_for_pldl.pldl_train_one import PldlTrainOneReferenceFreeTransform
+from timedtext.adapters.translation.evaluation import compute_score_delta
+def compute_32_point_score(response, generation):
+    parsed, score = {}, -1
+    try:
+        score = (
+            int(response["Accuracy Score"])
+            + int(response["Readability Score"])
+            + compute_score_delta(response, "Accuracy Issues", generation)
+            + compute_score_delta(response, "Readability Issues", generation)
+        )
+        score = score * 4
+    except:
+        score = -1
+    return parsed, score
+# Your existing TimedTextAdapter and helper classes
+class TimedTextAdapterFromCache_PLDL(TimedTextAdapter):
+    def __init__(
+        self,
+        data_dir: str,
+        cache_size: int = 0,
+        ol_dialog_list_version: str = "",
+        pl_dialog_list_version: str = "",
+        ol_dialog_list_pl_dialog_list_version: str = "",
+        num_prev_events: int = 16,
+        num_next_events: int = 16,
+    ) -> None:
+        super().__init__(num_prev_events, num_next_events)
+        self.timed_text_manager = TimedTextManager(
+            data_dir,
+            cache_size=cache_size,
+            ol_dialog_list_version=ol_dialog_list_version,
+            pl_dialog_list_version=pl_dialog_list_version,
+            ol_dialog_list_pl_dialog_list_version=ol_dialog_list_pl_dialog_list_version,
+        )
+    def _get_timed_text(
+        self, movie_id: int, start_frame: int, end_frame: int, src_lang: str, tgt_lang: str
+    ) -> T.Dict[str, T.Union[T.Dict, T.List[T.Dict]]]:
+        results = self.timed_text_manager.match_and_get_timed_text(
+            handler_class=OriginalLanguagePivotLanguageHandler,
+            movie_id=movie_id,
+            start_frame=start_frame,
+            end_frame=end_frame,
+            src_lang=src_lang,
+            tgt_lang=tgt_lang,
+            mid_lang="",
+            **self.timed_text_kwargs,
+        )
+        curr_srcs = [result["curr"]["src"]["txt"] for result in results]
+        curr_tgts = [result["curr"]["tgt"]["txt"] for result in results]
+        return {
+            "curr": {"src": {"txt": "\n\n".join(curr_srcs)}, "tgt": {"txt": "\n\n".join(curr_tgts)}},
+            "prev": results[0]["prev"],
+            "next": results[-1]["next"],
+        }
+class TimedTextAdapterFromCache_SUBS(TimedTextAdapter):
+    def __init__(
+        self,
+        data_dir: str,
+        cache_size: int = 0,
+        num_prev_events: int = 16,
+        num_next_events: int = 16,
+    ) -> None:
+        super().__init__(num_prev_events, num_next_events)
+        self.timed_text_manager = TimedTextManager(
+            data_dir,
+            cache_size=cache_size,
+        )
+    def _get_timed_text(
+        self, movie_id: int, start_frame: int, end_frame: int, src_lang: str, tgt_lang: str
+    ) -> T.Dict[str, T.Union[T.Dict, T.List[T.Dict]]]:
+        results = self.timed_text_manager.match_and_get_timed_text(
+            handler_class=EnglishTemplateSubtitleHandler,
+            movie_id=movie_id,
+            start_frame=start_frame,
+            end_frame=end_frame,
+            src_lang=src_lang,
+            tgt_lang=tgt_lang,
+            mid_lang="",
+            **self.timed_text_kwargs,
+        )
+        curr_srcs = [result["curr"]["src"]["txt"] for result in results]
+        curr_tgts = [result["curr"]["tgt"]["txt"] for result in results]
+        return {
+            "curr": {"src": {"txt": "\n\n".join(curr_srcs)}, "tgt": {"txt": "\n\n".join(curr_tgts)}},
+            "prev": results[0]["prev"],
+            "next": results[-1]["next"],
+        }
+# Function to fetch contextual information using TimedTextAdapter
+def fetch_contextual_information(timed_text_adapter, row):
+    """
+    Fetches the required context information for each sample using timed_text_adapter.
+    Args:
+    timed_text_adapter (TimedTextAdapterFromCache): Adapter to fetch data from.
+    row (dict): Row containing the necessary information to fetch the context.
+    Returns:
+    dict: Contextual information containing src_text, tgt_text, prev_context, next_context, src_prev, src_next, tgt_prev, tgt_next.
+    """
+    # Fetching the actual translation context
+    src_text, tgt_text, prev_context, next_context = timed_text_adapter.get_timed_text(
+        movie_id=row["movie_id"],
+        start_frame=row["start_frame"],
+        end_frame=row["end_frame"],
+        src_lang=row["src_lang"],
+        tgt_lang=row["tgt_lang"],
+    )
+    timed_text_converter = ConverterDialogContext(timed_text_adapter)
+    # Converting context to the format expected by the prompt
+    src_prev, src_next, tgt_prev, tgt_next, _ = timed_text_converter.__context__(
+        row["src_lang"], row["tgt_lang"], prev_context, next_context, None
+    )
+    return {
+        "tt_src_text": src_text,
+        "tt_tgt_text": tgt_text,
+        "tt_src_prev": src_prev,
+        "tt_src_next": src_next,
+        "tt_tgt_prev": tgt_prev,
+        "tt_tgt_next": tgt_next,
+    }
+def transform_json(input_json):
+    # Get the first project key
+    project_key = list(input_json['projects'].keys())[0]
+    project = input_json['projects'][project_key]
+    final_output = {"labelers": []}
+    # Process each label
+    for index, label in enumerate(project['labels']):
+        # Initialize output structure
+        output = {
+            "annotation": {
+                "Accuracy Issues": [],
+                "Readability Issues": [],
+                "Accuracy Score": "",
+                "Readability Score": "",
+                "Confidence Level": "",
+                "Main Vs Alternate": "",
+                "Score": "-1"  # initalized -1, will be updated in next steps
+            },
+        }
+        # Process annotations/objects (issues)
+        if 'objects' in label['annotations']:
+            for obj in label['annotations']['objects']:
+                issue = {
+                    "Error Location": obj['conversational_location']['message_id'],
+                    "Error Span": [
+                        obj['conversational_location']['location']['start'],
+                        obj['conversational_location']['location']['end']
+                    ],
+                    "Error Explanation": "",
+                    "Error Quality Category": obj['name'],
+                    "Error Quality Tags": [],
+                    "Error Severity": ""
+                }
+                # Process classifications within object
+                for classification in obj['classifications']:
+                    if classification['name'] == 'Explanation':
+                        issue["Error Explanation"] = classification['text_answer']['content']
+                    elif classification['name'] == 'Quality Tag':
+                        issue["Error Quality Tags"] = [ans['name'].lower() for ans in classification['checklist_answers']]
+                    elif classification['name'] == 'Quality SubCategory':
+                        severity = classification['radio_answer']['name']
+                        if 'Major' in severity:
+                            issue["Error Severity"] = "Major"
+                        else:
+                            issue["Error Severity"] = "Minor"
+                # Add to appropriate issues list
+                if obj['name'] == 'Style':
+                    output['annotation']['Readability Issues'].append(issue)
+                else:
+                    output['annotation']['Accuracy Issues'].append(issue)
+        # Process classifications
+        for classification in label['annotations']['classifications']:
+            if classification['name'] == 'Accuracy Score':
+                output['annotation']['Accuracy Score'] = classification['radio_answer']['name'].split(' - ')[0]
+            elif classification['name'] == 'Readability Score':
+                output['annotation']['Readability Score'] = classification['radio_answer']['name'].split(' - ')[0]
+            elif classification['name'] == 'Confidence Level':
+                output['annotation']['Confidence Level'] = classification['radio_answer']['value']
+            elif classification['name'] == 'Main vs Alternate':
+                output['annotation']['Main Vs Alternate'] = classification['radio_answer']['name']
+        final_output["labelers"].append(output)
+    return final_output
+# Function to load the relevant meta json for a given key
+def load_meta_json(priority_key, data_row_key, meta_path):
+    """
+    Loads and validates metadata json from the specified path based on the priority key and data row key.
+    Args:
+    priority_key (str): Priority key from the label metadata.
+    data_row_key (str): Data row key to find the relevant file.
+    meta_path (str): Path to the metadata folder.
+    Returns:
+    dict: Loaded metadata.
+    """
+    with open(os.path.join(meta_path, f'{priority_key}.json')) as fread:
+        meta_dict = json.load(fread)
+        _, movie_id, start_end_frame, _, _, _, _ = data_row_key.split('.')
+        start_frame, end_frame = start_end_frame.split('_')
+        if int(meta_dict['movie_id']) != int(movie_id):
+            print("Movie Ids didn't match:", int(meta_dict['movie_id']), int(movie_id), os.path.join(meta_path, f'{priority_key}.json'), data_row_key)
+            exit(0)
+        assert int(meta_dict['start_frame']) == int(start_frame)
+        assert int(meta_dict['end_frame']) == int(end_frame)
+        return meta_dict
+# Main function that processes the data
+def process_json(timed_text_adapter, example_row, meta_path, conv_path):
+    """
+    Takes the full input json, converts it to the required format, and adds context using metadata.
+    Args:
+    timed_text_adapter (TimedTextAdapterFromCache): Adapter to fetch context.
+    example_row (dict): The full input JSON (like the example_row you provided).
+    meta_path (str): Path to the metadata folder to fetch meta json.
+    Returns:
+    dict: The enriched annotation format with context and annotation data.
+    """
+    # Step 1: Convert the full input JSON to the required annotation format
+    annotation_result = transform_json(example_row)
+    # Extracting the necessary data_row_key and priority_key
+    data_row_key = example_row['data_row']['global_key']
+    priority_key = example_row['projects'][list(example_row["projects"].keys())[0]]['project_details']['priority']
+    annotation_result["Data_Row_Key"] = data_row_key
+    key = ".".join(data_row_key.split(".")[:3])
+    with open(conv_path + "/" + key + ".json") as file:
+        data = json.load(file)
+        annotation_result["main_tgt_text"]  = data["messages"][0]["content"]
+        annotation_result["src_text"]       = data["messages"][1]["content"]
+        annotation_result["alt_tgt_text"]   = data["messages"][2]["content"]
+    # Load the metadata using the keys from the json
+    meta_dict = load_meta_json(priority_key, data_row_key, meta_path)
+    # Step 2: Add the metadata fields (e.g., title_id, start_frame, end_frame, src_lang, tgt_lang)
+    annotation_result.update({
+        "title_id": meta_dict['movie_id'],
+        "start_frame": meta_dict['start_frame'],
+        "end_frame": meta_dict['end_frame'],
+        "src_lang": meta_dict['src_lang'],
+        "tgt_lang": meta_dict['tgt_lang'],
+    })
+    # Step 3: Fetch contextual information using the given timed_text_adapter
+    context_info = fetch_contextual_information(timed_text_adapter, meta_dict)
+    annotation_result.update(context_info)
+    # Update error spans with actual text for each labeler
+    for labeler in annotation_result["labelers"]:
+        # Process Accuracy Issues
+        for issue in labeler["annotation"]["Accuracy Issues"]:
+            error_location = issue["Error Location"]
+            start, end = issue["Error Span"][0], issue["Error Span"][1]
+            # Get the actual text based on error location
+            if error_location == "src":
+                actual_text = annotation_result["src_text"][start:end]
+            else:  # tgt
+                actual_text = annotation_result["main_tgt_text"][start:end]
+            # Update the error span with actual text
+            issue["Error Span"] = actual_text
+        # Process Readability Issues
+        for issue in labeler["annotation"]["Readability Issues"]:
+            error_location = issue["Error Location"]
+            start, end = issue["Error Span"]
+            # Get the actual text based on error location
+            if error_location == "src":
+                actual_text = annotation_result["src_text"][start:end]
+            else:  # tgt
+                actual_text = annotation_result["main_tgt_text"][start:end]
+            # Update the error span with actual text
+            issue["Error Span"] = actual_text
+    return annotation_result
+# Example usage
+def main():
+    base_path  = "MT_TQ/Caches/May2025/tquality.annotated.data/"
+    json_files = [base_path + "raw/" + f for f in os.listdir(base_path + "raw/") if f.endswith('.json')]
+    for json_file in tqdm(json_files):
+        if "calibration" in json_file:
+            print("Warning: Skipping Calibration Data, Remove this if you want to use Calibration data")
+            continue
+        if "PLDL" in json_file:
+            folder = "pldl"
+            timed_text_adapter = TimedTextAdapterFromCache_PLDL(
+                data_dir="/fsx_l10n/l10n_dse_timedtext/cache", num_prev_events=32, num_next_events=32
+            )
+        elif "SUBS" in json_file:
+            folder = "subs"
+            timed_text_adapter = TimedTextAdapterFromCache_SUBS(
+                data_dir="/fsx_l10n/l10n_dse_timedtext/cache", num_prev_events=32, num_next_events=32
+            )
+        else:
+            folder = ""
+            assert "invalid json file"
+        langs_type   = json_file.split("/")[-1].split("-")[1].replace("_",".")
+        phase        = json_file.split("/")[-1].split("-")[3]
+        phase_number = int(''.join(re.findall(r'\d+', phase))) if re.findall(r'\d+', phase) else None
+        phase_date   = json_file.split("/")[-1].split("-")[4].replace(".json", "")
+        zzmetapath   = f"/root/notebooks/MT_TQ/Caches/labelspace/tquality.zzmeta.data/{folder}/{langs_type}/phase {phase_number} - {phase_date}"
+        meta_path = zzmetapath + "/meta"
+        conv_path = zzmetapath + "/conv"
+        with open(json_file) as file:
+            data = json.load(file)
+        output_data = []
+        for data_point in tqdm(data):
+            annotation_result = process_json(timed_text_adapter, data_point, meta_path, conv_path)
+            for labeler in annotation_result["labelers"]:
+                _, score = compute_32_point_score(labeler["annotation"], annotation_result["main_tgt_text"])
+                labeler["annotation"]["Score"] = score
+            output_data.append(annotation_result)
+        with open(base_path + "parsed/" + json_file.split("/")[-1], 'w') as json_file:
+            json.dump({"data": output_data}, json_file, indent=4)
+if __name__ == "__main__":
+    main()

gemma-12b-tq-model/README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: google/gemma-3-4b-it
+library_name: transformers
+model_name: gemma-12b-tq-model
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for gemma-12b-tq-model
+This model is a fine-tuned version of [google/gemma-3-4b-it](https://huggingface.co/google/gemma-3-4b-it).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="bhavinjawade/gemma-12b-tq-model", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.16.1
+- Transformers: 4.50.0.dev0
+- Pytorch: 2.7.0
+- Datasets: 3.3.2
+- Tokenizers: 0.21.0
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

gemma-12b-tq-model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-4b-it",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "lm_head",
+    "embed_tokens"
+  ],
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "gate_proj",
+    "fc2",
+    "out_proj",
+    "fc1",
+    "down_proj",
+    "o_proj",
+    "k_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

gemma-12b-tq-model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44481365420dd4297edab0bc7d76dc43d4e6d7f38e393cce87c2fabdbea96661
+size 2839124552

gemma-12b-tq-model/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

gemma-12b-tq-model/checkpoint-2/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/gemma-3-4b-it
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

gemma-12b-tq-model/checkpoint-2/adapter_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-4b-it",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "lm_head",
+    "embed_tokens"
+  ],
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "gate_proj",
+    "fc2",
+    "out_proj",
+    "fc1",
+    "down_proj",
+    "o_proj",
+    "k_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

gemma-12b-tq-model/checkpoint-2/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44481365420dd4297edab0bc7d76dc43d4e6d7f38e393cce87c2fabdbea96661
+size 2839124552

gemma-12b-tq-model/checkpoint-2/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

gemma-12b-tq-model/checkpoint-2/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:898abdc006b804547b999529ece7d0ca106ab09c0a2352337e1809b5041573ee
+size 5608850589

gemma-12b-tq-model/checkpoint-2/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:250560ab3d528161ab3659b120def6e4a9ab4b457e3399603bbcfa40db3efc90
+size 14645

gemma-12b-tq-model/checkpoint-2/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29847f084360f67920e16e2780978c5b4908b1a69433f50a755d4db1e0c11563
+size 1401

gemma-12b-tq-model/checkpoint-2/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

gemma-12b-tq-model/checkpoint-2/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
+size 33384568

gemma-12b-tq-model/checkpoint-2/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

gemma-12b-tq-model/checkpoint-2/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma-12b-tq-model/checkpoint-2/trainer_state.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.7272727272727273,
+  "eval_steps": 500,
+  "global_step": 2,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.36363636363636365,
+      "grad_norm": 269.2862243652344,
+      "learning_rate": 0.0002,
+      "loss": 12.5019,
+      "mean_token_accuracy": 0.4838709682226181,
+      "num_tokens": 4096.0,
+      "step": 1
+    },
+    {
+      "epoch": 0.7272727272727273,
+      "grad_norm": 232.57679748535156,
+      "learning_rate": 0.0002,
+      "loss": 9.4112,
+      "mean_token_accuracy": 0.5447482168674469,
+      "num_tokens": 7561.0,
+      "step": 2
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 2,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 196609832513952.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

gemma-12b-tq-model/checkpoint-2/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a6ae5bb0171c4fcfd72d4d46496b122da48e4ebf65ff31d5812d7d8dba26a8e
+size 6161

gemma-12b-tq-model/runs/Apr25_08-39-59_9945b53f-579e-4565-94fc-5fbe73c83cc2/events.out.tfevents.1745570448.9945b53f-579e-4565-94fc-5fbe73c83cc2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1316da37ce84d8a16f9542d45cf4ab5617be2069029895d1e700955c546f6c26
+size 6460

gemma-12b-tq-model/runs/Apr25_08-42-29_9945b53f-579e-4565-94fc-5fbe73c83cc2/events.out.tfevents.1745570563.9945b53f-579e-4565-94fc-5fbe73c83cc2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a216b92a09ed293f852daa5eac2575a729347e101cd1e58fb41bce94b4a0c3d
+size 6460

gemma-12b-tq-model/runs/Apr25_09-19-39_9945b53f-579e-4565-94fc-5fbe73c83cc2/events.out.tfevents.1745572788.9945b53f-579e-4565-94fc-5fbe73c83cc2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac3b5c4eb7654e90a80565f05650922f5b8fe9c0c0e8f02134b18287d5ef32db
+size 7455

gemma-12b-tq-model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

gemma-12b-tq-model/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
+size 33384568

gemma-12b-tq-model/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

gemma-12b-tq-model/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma-12b-tq-model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a6ae5bb0171c4fcfd72d4d46496b122da48e4ebf65ff31d5812d7d8dba26a8e
+size 6161

gemma-1b-tq-model/README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: google/gemma-3-1b-pt
+library_name: transformers
+model_name: gemma-1b-tq-model
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for gemma-1b-tq-model
+This model is a fine-tuned version of [google/gemma-3-1b-pt](https://huggingface.co/google/gemma-3-1b-pt).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="bhavinjawade/gemma-1b-tq-model", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.16.1
+- Transformers: 4.50.0.dev0
+- Pytorch: 2.7.0
+- Datasets: 3.3.2
+- Tokenizers: 0.21.0
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

gemma-1b-tq-model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-1b-pt",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "lm_head",
+    "embed_tokens"
+  ],
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "down_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

gemma-1b-tq-model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5ccfa8bc91f4f5e80a03ca2a73036523a47df7550df49b4cd8296c486ed37de
+size 1260191096

gemma-1b-tq-model/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

gemma-1b-tq-model/checkpoint-10/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/gemma-3-1b-pt
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

gemma-1b-tq-model/checkpoint-10/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-1b-pt",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "lm_head",
+    "embed_tokens"
+  ],
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "down_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

gemma-1b-tq-model/checkpoint-10/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b5ac34a75c3a7a06c3c9fda32b95d9870267691dacb15af9c0c2c08dc4e7934
+size 1260191096

gemma-1b-tq-model/checkpoint-10/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

gemma-1b-tq-model/checkpoint-10/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f990dd49302e243f6510cfc5976f8dc28049c45c9af22f8424f89bc8b3d89b2
+size 2520598381

gemma-1b-tq-model/checkpoint-10/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67a697233a108d806598e97819e22cb699651bb7e046c04cc47db386d7540306
+size 14645

gemma-1b-tq-model/checkpoint-10/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:591c3072a024fe1a8043b72e8e5366699aec4a9d0c3da5bde546eb445034a199
+size 1401

gemma-1b-tq-model/checkpoint-10/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}