\"I Am Curious: Yellow\" is a risible and preten...
\n",
"
0
\n",
"
\n",
"
\n",
"
2
\n",
"
If only to avoid making this type of film in t...
\n",
"
0
\n",
"
\n",
"
\n",
"
3
\n",
"
This film was probably inspired by Godard's Ma...
\n",
"
0
\n",
"
\n",
"
\n",
"
4
\n",
"
Oh, brother...after hearing about this ridicul...
\n",
"
0
\n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" text label\n",
"0 I rented I AM CURIOUS-YELLOW from my video sto... 0\n",
"1 \"I Am Curious: Yellow\" is a risible and preten... 0\n",
"2 If only to avoid making this type of film in t... 0\n",
"3 This film was probably inspired by Godard's Ma... 0\n",
"4 Oh, brother...after hearing about this ridicul... 0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "",
"text/plain": [
"
\n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"✔ Evaluación final en test:\n",
" eval_loss: 0.5139\n",
" eval_accuracy: 0.8884\n",
" eval_f1: 0.8883\n",
" eval_runtime: 7263.0685\n",
" eval_samples_per_second: 3.4420\n",
" eval_steps_per_second: 0.2150\n",
" epoch: 3.0000\n",
"\n",
"✔ Modelo y tokenizer guardados en 'sentiment-bert-model/'\n"
]
}
],
"source": [
"# -------------------------------------------------------------\n",
"# CELDA 3: DEFINICIÓN, ENTRENAMIENTO, EVALUACIÓN Y GUARDADO\n",
"# -------------------------------------------------------------\n",
"\n",
"# 1) Importaciones para el entrenamiento\n",
"from transformers import (\n",
" AutoModelForSequenceClassification,\n",
" TrainingArguments,\n",
" Trainer\n",
")\n",
"import numpy as np\n",
"from sklearn.metrics import accuracy_score, f1_score\n",
"\n",
"# 2) Carga del modelo BERT para clasificación binaria\n",
"# num_labels=2 porque tenemos dos clases: positiva y negativa\n",
"model = AutoModelForSequenceClassification.from_pretrained(\n",
" \"bert-base-uncased\",\n",
" num_labels=2\n",
")\n",
"\n",
"# 3) Definición de la función de métricas\n",
"def compute_metrics(eval_pred):\n",
" logits, labels = eval_pred\n",
" preds = np.argmax(logits, axis=1)\n",
" return {\n",
" \"accuracy\": accuracy_score(labels, preds),\n",
" \"f1\": f1_score(labels, preds)\n",
" }\n",
"\n",
"# 4) Configuración de los argumentos de entrenamiento\n",
"# Adaptado a transformers 4.51.3 con do_train y do_eval\n",
"training_args = TrainingArguments(\n",
" output_dir=\"./results\", # Carpeta donde guardar checkpoints\n",
" num_train_epochs=3, # Número de pasadas sobre el dataset\n",
" per_device_train_batch_size=16, # Tamaño de batch en entrenamiento\n",
" per_device_eval_batch_size=16, # Tamaño de batch en evaluación\n",
" do_train=True, # Ejecutar fase de entrenamiento\n",
" do_eval=True, # Ejecutar evaluación al final\n",
" logging_dir=\"./logs\", # Carpeta de logs para TensorBoard\n",
" logging_steps=100 # Cada cuántos pasos registrar métricas\n",
")\n",
"\n",
"print(\"✔ TrainingArguments configurados\")\n",
"\n",
"# 5) Creación del Trainer\n",
"trainer = Trainer(\n",
" model=model,\n",
" args=training_args,\n",
" train_dataset=train_dataset, # Viene de la celda 2\n",
" eval_dataset=test_dataset, # Viene de la celda 2\n",
" compute_metrics=compute_metrics\n",
")\n",
"\n",
"print(\"✔ Trainer instanciado\")\n",
"\n",
"# 6) Lanzar el entrenamiento\n",
"print(\"\\n→ Entrenamiento en curso… puede tardar varios minutos:\")\n",
"trainer.train()\n",
"\n",
"# 7) Evaluación final en test set\n",
"metrics = trainer.evaluate()\n",
"print(\"\\n✔ Evaluación final en test:\")\n",
"for name, value in metrics.items():\n",
" print(f\" {name}: {value:.4f}\")\n",
"\n",
"# 8) Guardar modelo y tokenizer para despliegue\n",
"trainer.save_model(\"sentiment-bert-model\") # Pesos y config\n",
"tokenizer.save_pretrained(\"sentiment-bert-model\") # Archivos del tokenizer\n",
"print(\"\\n✔ Modelo y tokenizer guardados en 'sentiment-bert-model/'\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "0db4d022-aa06-40e2-b4b8-ab6b1e226a16",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: huggingface_hub in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (0.31.1)\n",
"Collecting huggingface_hub\n",
" Using cached huggingface_hub-0.32.0-py3-none-any.whl.metadata (14 kB)\n",
"Requirement already satisfied: filelock in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from huggingface_hub) (3.18.0)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from huggingface_hub) (2025.3.0)\n",
"Requirement already satisfied: packaging>=20.9 in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from huggingface_hub) (25.0)\n",
"Requirement already satisfied: pyyaml>=5.1 in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from huggingface_hub) (6.0.2)\n",
"Requirement already satisfied: requests in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from huggingface_hub) (2.32.3)\n",
"Requirement already satisfied: tqdm>=4.42.1 in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from huggingface_hub) (4.67.1)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from huggingface_hub) (4.13.2)\n",
"Requirement already satisfied: colorama in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from tqdm>=4.42.1->huggingface_hub) (0.4.6)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from requests->huggingface_hub) (3.4.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from requests->huggingface_hub) (3.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from requests->huggingface_hub) (2.4.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\light\\documents\\sentiment-analysis-bert\\venv\\lib\\site-packages (from requests->huggingface_hub) (2025.4.26)\n",
"Using cached huggingface_hub-0.32.0-py3-none-any.whl (509 kB)\n",
"Installing collected packages: huggingface_hub\n",
" Attempting uninstall: huggingface_hub\n",
" Found existing installation: huggingface-hub 0.31.1\n",
" Uninstalling huggingface-hub-0.31.1:\n",
" Successfully uninstalled huggingface-hub-0.31.1\n",
"Successfully installed huggingface_hub-0.32.0\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade huggingface_hub"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "031f4c70-5a75-4d89-9905-bbf8016b7b68",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from huggingface_hub import HfApi, login, create_repo\n",
"\n",
"# --- CONFIGURACIÓN CRUCIAL ---\n",
"HF_USERNAME = \"Light-Dav\" # ¡Tu nombre de usuario correcto!\n",
"HF_TOKEN = \"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\" # ¡Tu token de Hugging Face!\n",
"\n",
"REPO_NAME = \"sentiment-analysis-full-project\" # <--- ¡Confirma que este es el nombre EXACTO del repo en HF Hub!\n",
"REPO_ID = f\"{HF_USERNAME}/{REPO_NAME}\"\n",
"\n",
"# --- AJUSTE DE LA RUTA DEL PROYECTO ---\n",
"# Esto asegurará que siempre suba la raíz de tu proyecto, sin importar dónde esté el notebook.\n",
"# Asumimos que tu carpeta principal es 'sentiment-analysis-bert'\n",
"# y que tu notebook está dentro de ella o en una subcarpeta (como 'notebooks').\n",
"# Puedes ajustar esta ruta si tu proyecto no está en C:\\Users\\Light\\Documents\\sentiment-analysis-bert\n",
"PROJECT_ROOT_DIR = \"C:\\\\Users\\\\Light\\\\Documents\\\\sentiment-analysis-bert\"\n",
"\n",
"\n",
"print(f\"La ruta del proyecto a subir es: {PROJECT_ROOT_DIR}\")\n",
"print(f\"El repositorio de destino será: {REPO_ID}\")\n",
"\n",
"# --- EJECUCIÓN ---\n",
"try:\n",
" print(\"\\n--- Paso 1: Iniciando sesión en Hugging Face Hub ---\")\n",
" login(token=HF_TOKEN)\n",
" print(\"Inicio de sesión exitoso en Hugging Face Hub.\")\n",
"\n",
" api = HfApi()\n",
"\n",
" print(f\"\\n--- Paso 2: Creando/Verificando el repositorio '{REPO_ID}' en Hugging Face Hub ---\")\n",
" create_repo(repo_id=REPO_ID, private=False, exist_ok=True, token=HF_TOKEN)\n",
" print(f\"Repositorio '{REPO_ID}' creado o ya existe en Hugging Face Hub.\")\n",
"\n",
" print(f\"\\n--- Paso 3: Subiendo el contenido de '{PROJECT_ROOT_DIR}' a '{REPO_ID}' ---\")\n",
" api.upload_folder(\n",
" folder_path=PROJECT_ROOT_DIR,\n",
" repo_id=REPO_ID,\n",
" repo_type=\"model\",\n",
" commit_message=\"Initial upload of full sentiment analysis project (code, notebooks, data)\",\n",
" # Ahora 'git_exclude' debería funcionar después de la actualización.\n",
" git_exclude=[\n",
" \"venv/\", # Excluye el entorno virtual\n",
" \"**/__pycache__/\", # Excluye archivos de caché de Python\n",
" \"*.ipynb_checkpoints/\", # Excluye checkpoints de Jupyter si están en la raíz\n",
" \"notebooks/.ipynb_checkpoints/\", # Excluye checkpoints específicos de notebooks\n",
" # Puedes añadir más exclusiones aquí, ej: \"data/raw_large_files/\"\n",
" # También podrías querer excluir tu modelo ya subido si ya está en otro repo:\n",
" # \"sentiment-bert-model/\"\n",
" ],\n",
" multi_commits=True # Útil para uploads grandes\n",
" )\n",
"\n",
" print(\"\\n¡Carga de todo el proyecto completada con éxito!\")\n",
" print(f\"Tu proyecto está ahora disponible en: https://huggingface.co/{REPO_ID}\")\n",
"\n",
"except Exception as e:\n",
" print(f\"\\nERROR al subir el proyecto a Hugging Face Hub: {e}\")\n",
" print(\"Verifica los siguientes puntos:\")\n",
" print(f\"- Tu nombre de usuario: '{HF_USERNAME}' y tu token son correctos y tienen permisos de escritura.\")\n",
" print(f\"- El nombre del repositorio: '{REPO_NAME}' es el que deseas.\")\n",
" print(f\"- La ruta local del proyecto: '{PROJECT_ROOT_DIR}' es la correcta y contiene todos tus archivos.\")\n",
" print(\"- Tienes conexión a internet.\")\n",
" print(\"- ¡MUY IMPORTANTE: Asegúrate de haber reiniciado el kernel de Jupyter después de actualizar la librería!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}