Spaces:

Saim-11
/

Youtube-Videos-Sentiment

Sleeping

App Files Files Community

Saim-11 commited on Jul 29, 2024

Commit

ddf3530

verified ·

1 Parent(s): 66b80ca

Upload 4 files

Browse files

Files changed (4) hide show

app.py +65 -0
config.json +27 -0
fetching_data.ipynb +681 -0
youtube_video_sentiment_fine_tuning.ipynb +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import gradio as gr
+from transformers import pipeline
+from bs4 import BeautifulSoup
+import requests
+from transformers import BertTokenizer, BertForSequenceClassification, AutoConfig
+import torch
+from pytubefix import YouTube
+from pytubefix.cli import on_progress
+import whisper
+def get_page_title(url):
+    try:
+        response = requests.get(url)
+        soup = BeautifulSoup(response.content, 'html.parser')
+        return soup.title.string
+    except Exception as e:
+        return "Error fetching title"
+def analyze_sentiment(url):
+    yt = YouTube(url, on_progress_callback = on_progress)
+    print(yt.title)
+    ys = yt.streams.get_audio_only()
+    audio = ys.download(mp3=True)
+    model = whisper.load_model("tiny")
+    data = model.transcribe(audio,fp16=False)
+    text = data["text"]
+    print(text)
+    model_dir = '.'
+    config = AutoConfig.from_pretrained(model_dir)
+    model = BertForSequenceClassification.from_pretrained(model_dir, config=config)
+    tokenizer = BertTokenizer.from_pretrained(model_dir)
+    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
+    # Set the model to evaluation mode
+    model.eval()
+    # Disable gradient calculation for faster inference
+    with torch.no_grad():
+        # Make the prediction
+        outputs = model(**inputs)
+        logits = outputs.logits
+        predicted_class = torch.argmax(logits, dim=1).item()
+    # Convert predicted class to label (0 or 1)
+    label = 'negative' if predicted_class == 0 else 'positive'
+    # Debug: Print the logits to understand the output
+    print(f"Logits: {logits}")
+    print(f"Predicted class: {predicted_class}")
+    result = label
+    return result
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=analyze_sentiment,
+    inputs=gr.Textbox(label="Enter YouTube URL"),
+    outputs=gr.Textbox(label="Sentiment Analysis Result"),
+    title="YouTube Video Sentiment",
+    description="Enter a YouTube video URL to analyze the sentiment of its title.",
+)
+# Launch the interface
+iface.launch()

config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "_name_or_path": "bert-base-uncased",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.4",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

fetching_data.ipynb ADDED Viewed

	@@ -0,0 +1,681 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "4c664706",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting pytubefixNote: you may need to restart the kernel to use updated packages.\n",
+      "\n",
+      "  Downloading pytubefix-6.6.3-py3-none-any.whl.metadata (4.3 kB)\n",
+      "Downloading pytubefix-6.6.3-py3-none-any.whl (73 kB)\n",
+      "   ---------------------------------------- 0.0/73.4 kB ? eta -:--:--\n",
+      "   ----- ---------------------------------- 10.2/73.4 kB ? eta -:--:--\n",
+      "   ----- ---------------------------------- 10.2/73.4 kB ? eta -:--:--\n",
+      "   ----- ---------------------------------- 10.2/73.4 kB ? eta -:--:--\n",
+      "   ----- ---------------------------------- 10.2/73.4 kB ? eta -:--:--\n",
+      "   ----- ---------------------------------- 10.2/73.4 kB ? eta -:--:--\n",
+      "   ----- ---------------------------------- 10.2/73.4 kB ? eta -:--:--\n",
+      "   ----- ---------------------------------- 10.2/73.4 kB ? eta -:--:--\n",
+      "   ---------------------- ----------------- 41.0/73.4 kB 89.3 kB/s eta 0:00:01\n",
+      "   ---------------------------------------- 73.4/73.4 kB 149.7 kB/s eta 0:00:00\n",
+      "Installing collected packages: pytubefix\n",
+      "Successfully installed pytubefix-6.6.3\n"
+     ]
+    }
+   ],
+   "source": [
+    "pip install pytubefix "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d3d52132",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Transcribe Video to Text with Python and Watson in 15 Minutes\n",
+      " ↳ |██████████████████████████████████████████████████████████████████| 100.0%\r"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'C:\\\\Users\\\\Crown Tech\\\\NLP\\\\project\\\\Transcribe Video to Text with Python and Watson in 15 Minutes.mp3'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pytubefix import YouTube\n",
+    "from pytubefix.cli import on_progress\n",
+    " \n",
+    "url = \"https://www.youtube.com/watch?v=FM6kHcXpw98\"\n",
+    "yt = YouTube(url, on_progress_callback = on_progress)\n",
+    "print(yt.title)\n",
+    " \n",
+    "ys = yt.streams.get_audio_only()\n",
+    "ys.download(mp3=True) # pass the parameter mp3=True to save in .mp3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "fd427224",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "9fb78e6a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "possitive_links = []\n",
+    "with open(r\"C:\\Users\\Crown Tech\\NLP\\links\\possitive_videos\\possitive_videos_links.txt\",mode='r') as file:\n",
+    "    csvfile = csv.reader(file)\n",
+    "    for lines in csvfile:\n",
+    "        possitive_links.append(lines)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "04e31830",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['https://www.youtube.com/watch?v=HwLK9dBQn0g&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=Fo6oU4DfdH0&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=eBSeCp__xhI&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=Tuw8hxrFBH8&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=tbnzAVRZ9Xc&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=u8OySa4uZmU&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=u8OySa4uZmU&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=6n__uFXz_vo&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=IcaJ88kziRA&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=mGD3kO-Rs3Y&pp=ygUQcG9zc2l0aXZlIHNwZWVjaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=_rl6AyOWgKA&pp=ygUNaGFwcHkgbW9tZW50cw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=8BrCACPmdo4&pp=ygUNaGFwcHkgbW9tZW50cw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=78nsxRxbf4w&pp=ygUYaGFwcHkgbW9tZW50cyBpbiBlbmdsaXNo',\n",
+       " 'https://www.youtube.com/watch?v=_4__pKDIg2s&pp=ygUYaGFwcHkgbW9tZW50cyBpbiBlbmdsaXNo',\n",
+       " 'https://www.youtube.com/watch?v=JJ9GD0SiwEc&pp=ygUYaGFwcHkgbW9tZW50cyBpbiBlbmdsaXNo',\n",
+       " 'https://www.youtube.com/watch?v=_u2qggffbYM&pp=ygUTaGVhcnR3YXJtaW5nIHZpZGVvcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=MCX4YqcW7kU&pp=ygURZmVlbCBnb29kIHN0b3JpZXM%3D',\n",
+       " 'https://www.youtube.com/watch?v=sLRj4UlNTdg&pp=ygURZmVlbCBnb29kIHN0b3JpZXM%3D',\n",
+       " 'https://www.youtube.com/watch?v=apbSsILLh28&pp=ygURZmVlbCBnb29kIHN0b3JpZXM%3D',\n",
+       " 'https://www.youtube.com/watch?v=HgiiY9TLtX8&pp=ygUQdXBsaWZ0aW5nIHZpZGVvcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=Tuw8hxrFBH8&pp=ygUQdXBsaWZ0aW5nIHZpZGVvcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=tbnzAVRZ9Xc&pp=ygUWaW5zcGlyYXRpb25hbCBzcGVlY2hlcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=eAdcSoAMA-s&pp=ygUWaW5zcGlyYXRpb25hbCBzcGVlY2hlcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=QbL0X3B4mjg&pp=ygUWaW5zcGlyYXRpb25hbCBzcGVlY2hlcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=PGUdWfB8nLg&pp=ygUWaW5zcGlyYXRpb25hbCBzcGVlY2hlcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=TBuIGBCF9jc&pp=ygUWaW5zcGlyYXRpb25hbCBzcGVlY2hlcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=i9UYbJ2xMTI&pp=ygUNc3VjY2VzcyBzdG9yeQ%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=L2rR5RuJEPc&pp=ygUNc3VjY2VzcyBzdG9yeQ%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=JiwZQNYlGQI&pp=ygUNc3VjY2VzcyBzdG9yeQ%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=Dbb4htY9ldw&pp=ygUTYWNoaWV2ZW1lbnQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=ieFWfWtKmTc&pp=ygUTYWNoaWV2ZW1lbnQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=5g1LtbCtVhs&pp=ygUTYWNoaWV2ZW1lbnQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=2MGMvEnoD6U&pp=ygUUb3ZlcmNvbWluZyBvYnN0YWNsZXM%3D',\n",
+       " 'https://www.youtube.com/watch?v=Bd9H2INRxBY&pp=ygUUb3ZlcmNvbWluZyBvYnN0YWNsZXM%3D',\n",
+       " 'https://www.youtube.com/watch?v=-1v-4JJRLZs&pp=ygUUb3ZlcmNvbWluZyBvYnN0YWNsZXM%3D',\n",
+       " 'https://www.youtube.com/watch?v=61bMGNL6MrM&pp=ygUUb3ZlcmNvbWluZyBvYnN0YWNsZXM%3D',\n",
+       " 'https://www.youtube.com/watch?v=IOrmS8vJDQw&pp=ygUUb3ZlcmNvbWluZyBvYnN0YWNsZXM%3D',\n",
+       " 'https://www.youtube.com/watch?v=QMnEP2DYfmI&pp=ygUKZ29vZCBkZWVkcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=DowJfUmlzeI&pp=ygUKZ29vZCBkZWVkcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=DNIjX4BUWDw&t=47s&pp=ygUKZ29vZCBkZWVkcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=PsQ5rY1-M3U&pp=ygUKZ29vZCBkZWVkcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=lUKhMUZnLuw&pp=ygUOaGVscGluZyBvdGhlcnM%3D',\n",
+       " 'https://www.youtube.com/watch?v=9MQXqpQtzaE&pp=ygUOaGVscGluZyBvdGhlcnM%3D',\n",
+       " 'https://www.youtube.com/watch?v=UZC1D9ooSUg&pp=ygUOaGVscGluZyBvdGhlcnM%3D',\n",
+       " 'https://www.youtube.com/watch?v=Mh5xCpDs9rc&pp=ygUOaGVscGluZyBvdGhlcnM%3D']"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "updated_possitive_links = [item[0].strip() for item in possitive_links if item[0]]\n",
+    "updated_possitive_links"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "83e1f185",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "THE POWER OF POSITIVITY - Best Motivational Video For Positive Thinking\n",
+      "10 Minutes to Start Your Day Right! - Motivational Speech By Oprah Winfrey [YOU NEED TO WATCH THIS]\n",
+      "FOCUS ON YOURSELF NOT OTHERS (motivational video)█████████████████████| 100.0%\n",
+      "One of the Greatest Speeches Ever  Steve Jobs█████████████████████████| 100.0%\n",
+      "WATCH THIS EVERYDAY AND CHANGE YOUR LIFE - Denzel Washington Motivational Speech 2023\n",
+      "10 Minutes for the next 10 Years - Matthew McConaughey Motivational Speech0.0%\n",
+      "10 Minutes for the next 10 Years - Matthew McConaughey Motivational Speech0.0%\n",
+      "MOVE IN SILENCE, SHOCK THEM WITH YOUR SUCCESS - Motivational Speech (Marcus Elevation Taylor)\n",
+      "IT’S SUPPOSED TO BE HARD - Powerful Motivational Speech███████████████| 100.0%\n",
+      "DON'T COMPLAIN JUST ENJOY YOUR PAIN...REBUILD YOURSELF - Best Motivational Speeches\n",
+      "Happy Moments in Life█████████████████████████████████████████████████| 100.0%\n",
+      "Happy Moments Video Compilation 2016██████████████████████████████████| 100.0%\n",
+      "How to Be Happy Every Day It Will Change the World  Jacqueline Way  TEDxStanleyPark\n",
+      "HAPPY MOMENTS QUOTES That Will Inspire You  Cherish your Happy Moments| 100.0%\n",
+      "How To Be Happy  Buddhism In English██████████████████████████████████| 100.0%\n",
+      "The Speech That Brought This Entire School To Tears (The Most Inspiring Motivational Video of 2017)\n",
+      "Three Laughing Monks Story - zen motivation███████████████████████████| 100.0%\n",
+      "3 Stories That WILL MAKE YOU FEEL GOOD!███████████████████████████████| 100.0%\n",
+      "What really matters at the end of life  BJ Miller  TED████████████████| 100.0%\n",
+      "4 Minutes To Start Your Day Right! MORNING MOTIVATION and Positivity!█| 100.0%\n",
+      "One of the Greatest Speeches Ever  Steve Jobs█████████████████████████| 100.0%\n",
+      "WATCH THIS EVERYDAY AND CHANGE YOUR LIFE - Denzel Washington Motivational Speech 2023\n",
+      "GOD'S PLAN FOR YOU! Best Motivational Speech inspired by Denzel Washington, Inspirational Video\n",
+      "5 Minutes for the Next 50 Years - Mathhew McConaughey Motivational Speech00.0%\n",
+      "Barack Obama's Inspirational Speech with Subtitles  One of the best English speeches ever 2023\n",
+      "Admiral McRaven Leaves the Audience SPEECHLESS  One of the Best Motivational Speeches\n",
+      "How Starbucks Became a $100B Success Story  Howard Schultz  From Poor Boy To Billionaire\n",
+      "The Motivational Success Story Of J.K Rowling - From Deep Depression To World's RICHEST AUTHOR\n",
+      "Failing at Normal An ADHD Success Story  Jessica McCabe  TEDxBratislava 100.0%\n",
+      "He Failed 1000 Times (Real Life Story)████████████████████████████████| 100.0%\n",
+      "PROBLEMS IN LIFE  A Life Lesson Story On Growth And Success ██████████| 100.0%\n",
+      "Time Story.A Motivational Story.██████████████████████████████████████| 100.0%\n",
+      "Overcoming obstacles - Steven Claunch█████████████████████████████████| 100.0%\n",
+      "Don’t Avoid Obstacles. Overcome Them.   Jessie Adams  TEDxDavenport███| 100.0%\n",
+      "Overcoming Obstacles and Reaching Self-Fulfillment   Bryan Humphrey  TEDxSouthwesternAU\n",
+      "How To Overcome Adversity█████████████████████████████████████████████| 100.0%\n",
+      "To overcome challenges, stop comparing yourself to others  Dean Furness 100.0%\n",
+      "Ripple (Award Winning)- Kindness and good deeds will come back to you█| 100.0%\n",
+      "Friends - A Selfless Good Deed████████████████████████████████████████| 100.0%\n",
+      "13 DEEDS ALLAH ABSOLUTELY LOVES███████████████████████████████████████| 100.0%\n",
+      "Don't blindly rush into 'good' deeds  Acharya Prashant (2019)█████████| 100.0%\n",
+      "Helping others makes us happier -- but it matters how we do it  Elizabeth Dunn\n",
+      "Help Yourself by Helping Others  One of The Most Inspirational Speech Ever (Simon Sinek Motivation)\n",
+      "10 lines on Helping others l Essay on helping others l████████████████| 100.0%\n",
+      "Helping Others  Angus Hall  TEDxYouth@TCS█████████████████████████████| 100.0%\n",
+      " ↳ |██████████████████████████████████████████████████████████████████| 100.0%\r"
+     ]
+    }
+   ],
+   "source": [
+    "output_path = r\"C:\\Users\\Crown Tech\\NLP\\project\\audio\\possitve\"\n",
+    "for url in updated_possitive_links:\n",
+    "    yt = YouTube(url, on_progress_callback = on_progress)\n",
+    "    print(yt.title)\n",
+    "    ys = yt.streams.get_audio_only()\n",
+    "    out_file = ys.download(output_path=output_path)\n",
+    "    ys.download(mp3=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "64a47759",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pytubefix import YouTube\n",
+    "from pytubefix.cli import on_progress\n",
+    "import csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "86a2f228",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "negative_links = []\n",
+    "with open(r\"C:\\Users\\Crown Tech\\NLP\\project\\links\\negative_videos\\negative_sentiment_links.txt\",mode='r') as file:\n",
+    "    csvfile = csv.reader(file)\n",
+    "    for lines in csvfile:\n",
+    "        negative_links.append(lines)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4085c681",
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['https://www.youtube.com/watch?v=-FBq5lE1Kz0&pp=ygUQbmF0dXJhbCBkaXNhc3Rlcg%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=q8eIwmSJP0o&pp=ygUQbmF0dXJhbCBkaXNhc3Rlcg%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=DBSDxlwCnGk&pp=ygUXbmF0dXJhbCBkaXNhc3RlciBjb3JvbmE%3D',\n",
+       " 'https://www.youtube.com/watch?v=5gphZGE5tDc&pp=ygUXbmF0dXJhbCBkaXNhc3RlciBjb3JvbmE%3D',\n",
+       " 'https://www.youtube.com/watch?v=oqYnwXKxOu8&pp=ygUXbmF0dXJhbCBkaXNhc3RlciBjb3JvbmE%3D',\n",
+       " 'https://www.youtube.com/watch?v=GcmYbHkdTqk&pp=ygUXbmF0dXJhbCBkaXNhc3RlciBjb3JvbmE%3D',\n",
+       " 'https://www.youtube.com/watch?v=dJpIU1rSOFY&pp=ygUbbmF0dXJhbCBkaXNhc3RlciBlYXJ0aHF1YWtl',\n",
+       " 'https://www.youtube.com/watch?v=193PoeTUIvo&pp=ygUbbmF0dXJhbCBkaXNhc3RlciBlYXJ0aHF1YWtl',\n",
+       " 'https://www.youtube.com/watch?v=ydogesjgmzU&pp=ygUXdHJhZ2ljIGFjY2lkZW50IHJlcG9ydHM%3D',\n",
+       " 'https://www.youtube.com/watch?v=I7cHiYlwFSw&pp=ygUXdHJhZ2ljIGFjY2lkZW50IHJlcG9ydHM%3D',\n",
+       " 'https://www.youtube.com/watch?v=F0DUGH38ip0&pp=ygUXdHJhZ2ljIGFjY2lkZW50IHJlcG9ydHM%3D',\n",
+       " 'https://www.youtube.com/watch?v=FFpo-Z3g8nQ&pp=ygUXdHJhZ2ljIGFjY2lkZW50IHJlcG9ydHM%3D',\n",
+       " 'https://www.youtube.com/watch?v=5ZEPMDJXsXk&pp=ygUXdHJhZ2ljIGFjY2lkZW50IHJlcG9ydHM%3D',\n",
+       " 'https://www.youtube.com/watch?v=Kfdj3AfDD74&pp=ygUNZnJhdWQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=ISIjlRExdpE&pp=ygUNZnJhdWQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=FmGNya0j8d4&pp=ygUNZnJhdWQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=u_rfIboPyYs&t=388s&pp=ygUNZnJhdWQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=GHKyDYtKGEg&pp=ygUNZnJhdWQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=3MgCUdAUhc8&pp=ygUNZnJhdWQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=sDQuo_qIhjE&pp=ygUNZnJhdWQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=esRmijZ8Haw&pp=ygUNZnJhdWQgc3Rvcmllcw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=CvYs1UbBfpw&pp=ygUQZGV2YXN0YXRpbmcgbmV3cw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=fEQHB2HI50g&pp=ygUQZGV2YXN0YXRpbmcgbmV3cw%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=flQmzxGUfsM&pp=ygUUZGlzYXN0ZXIgZG9jdW1lbnRhcnk%3D',\n",
+       " 'https://www.youtube.com/watch?v=06moeVolGmI&pp=ygUUZGlzYXN0ZXIgZG9jdW1lbnRhcnk%3D',\n",
+       " 'https://www.youtube.com/watch?v=gn235-rPK4k&pp=ygUUZGlzYXN0ZXIgZG9jdW1lbnRhcnk%3D',\n",
+       " 'https://www.youtube.com/watch?v=MTGUwBsAfbQ&pp=ygUUZGlzYXN0ZXIgZG9jdW1lbnRhcnk%3D',\n",
+       " 'https://www.youtube.com/watch?v=qRKScRgsUaE&pp=ygUUZGlzYXN0ZXIgZG9jdW1lbnRhcnk%3D',\n",
+       " 'https://www.youtube.com/watch?v=1N4iG9b45mk&pp=ygUUZGlzYXN0ZXIgZG9jdW1lbnRhcnk%3D',\n",
+       " 'https://www.youtube.com/watch?v=Qd0U-72CEzE&pp=ygUUZGlzYXN0ZXIgZG9jdW1lbnRhcnk%3D',\n",
+       " 'https://www.youtube.com/watch?v=_cRoaDu3YOE&pp=ygUUZGlzYXN0ZXIgZG9jdW1lbnRhcnk%3D',\n",
+       " 'https://www.youtube.com/watch?v=4ErlhM1NaZA&pp=ygUSdW5mb3J0dW5hdGUgZXZlbnRz',\n",
+       " 'https://www.youtube.com/watch?v=5tdtwv7Xjsk&pp=ygUSdW5mb3J0dW5hdGUgZXZlbnRz',\n",
+       " 'https://www.youtube.com/watch?v=QrQSbaIMTaE&pp=ygUcY3JpbWUgZG9jdW1lbnRhcnkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=Kj2ghENIjvo&pp=ygUcY3JpbWUgZG9jdW1lbnRhcnkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=U_rC7P_hoL8&pp=ygUcY3JpbWUgZG9jdW1lbnRhcnkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=f6STd3pfz3U&pp=ygUcY3JpbWUgZG9jdW1lbnRhcnkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=ZRejAWOxAgE&pp=ygUcY3JpbWUgZG9jdW1lbnRhcnkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=91YY3dQKmAw&pp=ygUcY3JpbWUgZG9jdW1lbnRhcnkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=K5KuowO0ucE&pp=ygUcY3JpbWUgZG9jdW1lbnRhcnkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=bJrkwS_MpRY&pp=ygUcdmlvbGVudCBpbmNpZGVudHMgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=isOs6AUw_xM&pp=ygUcdmlvbGVudCBpbmNpZGVudHMgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=boNl0Wp6I4U&pp=ygUcdmlvbGVudCBpbmNpZGVudHMgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=h8wPcHtTXt0&pp=ygUaaG9ycmlmaWMgZXZlbnRzIGluIGVuZ2xpc2g%3D',\n",
+       " 'https://www.youtube.com/watch?v=9tVkr0RSY0Y&pp=ygUaaG9ycmlmaWMgZXZlbnRzIGluIGVuZ2xpc2g%3D',\n",
+       " 'https://www.youtube.com/watch?v=UNi0dvB-4L0&pp=ygUddHJ1ZSBjcmltZSBzdG9yaWVzIGluIGVuZ2xpc2g%3D',\n",
+       " 'https://www.youtube.com/watch?v=cVknriTulrk&pp=ygUddHJ1ZSBjcmltZSBzdG9yaWVzIGluIGVuZ2xpc2g%3D',\n",
+       " 'https://www.youtube.com/watch?v=PefdO2m7AjE&pp=ygUddHJ1ZSBjcmltZSBzdG9yaWVzIGluIGVuZ2xpc2g%3D',\n",
+       " 'https://www.youtube.com/watch?v=6kA0RjrE2ug&pp=ygUcY3JpbWluYWwgYWN0aXZpdHkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=nuMIRI8Ypi4&pp=ygUcY3JpbWluYWwgYWN0aXZpdHkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=qL6E-C7ilYg&pp=ygUcY3JpbWluYWwgYWN0aXZpdHkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=Un43WUz2afI&pp=ygUcY3JpbWluYWwgYWN0aXZpdHkgaW4gZW5nbGlzaA%3D%3D',\n",
+       " 'https://www.youtube.com/watch?v=qOaUUHlcX1k&pp=ygUad2FyIGRvY3VtZW50YXJ5IGluIGVuZ2xpc2g%3D',\n",
+       " 'https://www.youtube.com/watch?v=Mwy_vBs3j5A&pp=ygUad2FyIGRvY3VtZW50YXJ5IGluIGVuZ2xpc2g%3D',\n",
+       " 'https://www.youtube.com/watch?v=yGEE3NM0E3c&pp=ygUYY29uZmxpY3Qgem9uZSBpbiBlbmdsaXNo',\n",
+       " 'https://www.youtube.com/watch?v=yQc_0RWU3a4&pp=ygUfZWNvbm9taWMgY3Jpc2lzIHpvbmUgaW4gZW5nbGlzaA%3D%3D']"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "updated_negative_links = [item[0].strip() for item in negative_links if item[0]]\n",
+    "updated_negative_links"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "ebd47355",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Natural Hazards Crash Course Geography #27\n",
+      "Natural disasters████████████████████████████████████████████████████████████| 100.0%\n",
+      "The COVID-19 crisis is like a natural disaster███████████████████████████████| 100.0%\n",
+      "Disaster prevention and disaster control measures in natural disasters during COVID-19 pandemic\n",
+      "Restoring power after a natural disaster could look different in a COVID-19 world0.0%\n",
+      "Trump Coronavirus Incompetence 'Like Its Own Natural Disaster' Warren  Rachel Maddow  MSNBC\n",
+      "What Is An Earthquake?  The Dr. Binocs Show  Educational Videos For Kids█████| 100.0%\n",
+      "NATURAL DISASTERS  THE EARTHQUAKE  Stories For Kids In English  TIA & TOFU Lessons For Kids\n",
+      "Accident Case Study Faulty Assumptions███████████████████████████████████████| 100.0%\n",
+      "Accident report sheds light on Orlando theme park tragedy████████████████████| 100.0%\n",
+      "Accident reports█████████████████████████████████████████████████████████████| 100.0%\n",
+      "Air Disasters Death and Denial 🛬 Full Episode███████████████████████████████| 100.0%\n",
+      "How To Report Accidents & Incidents at Work  How To Report Accidents at Work  HSE STUDY GUIDE\n",
+      "How This 31 Year Old Woman Scammed JP Morgan█████████████████████████████████| 100.0%\n",
+      "The Uber Story Fraud, Betrayal, Death & Cars█████████████████████████████████| 100.0%\n",
+      "The Man Behind the World's Biggest Financial Fraud  Investigators████████████| 100.0%\n",
+      "When Greed Goes Too Far - The Worldcom Fraud█████████████████████████████████| 100.0%\n",
+      "Forensic accountant explains why fraud thrives on Wall Street████████████████| 100.0%\n",
+      "The World's Most Complex Catfishing Scam  Investigators██████████████████████| 100.0%\n",
+      "Exposing Iman Gadzhi and His Alleged Scam████████████████████████████████████| 100.0%\n",
+      "The Big Lottery Scam  Scammed  Real Crime████████████████████████████████████| 100.0%\n",
+      "Devastating News Divorce Court - William vs. Ashley██████████████████████████| 100.0%\n",
+      "Devastating News - Farmhouse Planning Permission Denied By Burnley Council!  UK Restoration Services\n",
+      "The Exploding Town Disaster██████████████████████████████████████████████████| 100.0%\n",
+      "The Willow Island Disaster  A Short Documentary  Fascinating Horror██████████| 100.0%\n",
+      "Real life tornado hunter reviews accuracy of Twisters movie██████████████████| 100.0%\n",
+      "The Glass Tower Disaster  A Short Documentary  Fascinating Horror████████████| 100.0%\n",
+      "Inside Japan's Nuclear Meltdown (full documentary)  FRONTLINE████████████████| 100.0%\n",
+      "A Single Misheard Word The Shiloh Baptist Church Disaster  A Short Documentary Fascinating Horror\n",
+      "Disaster Or Best Show Ever? ‘Notorious’ Ozark Music Festival Jolted Missouri 50 Years Ago\n",
+      "The Tay Bridge Disaster  A Short Documentary  Fascinating Horror█████████████| 100.0%\n",
+      "Lemony Snicket's A Series of Unfortunate Events (2004) Trailer #1  Movieclips Classic Trailers\n",
+      "The Tragic Life of Ranvir Shorey  Big Boss Reality███████████████████████████| 100.0%\n",
+      "Catching a Serial Killer Inside the Investigation  Real Stories True Crime Documentary\n",
+      "Britain's Unsolved Crimewave  Dispatches  Channel 4 Documentaries████████████| 100.0%\n",
+      "Groomed, and then Murdered by a Millionaire  Nadine Aburas  Click For Murder█| 100.0%\n",
+      "The secret relationship that ended in a Christmas murder - Murder Documentary UK00.0%\n",
+      "The Disturbing Case of Vanessa Marcotte  True Crime Documentary██████████████| 100.0%\n",
+      "The Heartbreaking Case of Ekaterina Baumann  True Crime Documentary██████████| 100.0%\n",
+      "The Most TWISTED Case You've Ever Heard  Documentary█████████████████████████| 100.0%\n",
+      "Violent protest grips Bangladesh over civil service hiring rules  Latest English News  WION\n",
+      "Mafia Boss & London Gangster Reveal Their Most Violent Crimes  Crime Stories█| 100.0%\n",
+      "Bangladesh Quota Protests At Least 5 Killed & 400 Injured  Vantage with Palki Sharma%\n",
+      "Top 10 World Events That Left Us Speechless██████████████████████████████████| 100.0%\n",
+      "Top 10 TRUE Historical Events NOT For The Faint-Hearted██████████████████████| 100.0%\n",
+      "Cases With The Most INSANE Twists You've Ever Heard██████████████████████████| 100.0%\n",
+      "The Chilling Case of Kira Steger  True Crime Documentary█████████████████████| 100.0%\n",
+      "The True Crime Story Behind “When a Stranger Calls” #Crimetober██████████████| 100.0%\n",
+      "Types Of Crime Part I████████████████████████████████████████████████████████| 100.0%\n",
+      "English Vocabulary Crime & Criminals█████████████████████████████████████████| 100.0%\n",
+      "Ways to talk about crime in English - Advanced English Lesson████████████████| 100.0%\n",
+      "Lesson 46 Types of Crime Vocabulary; Kidnapping, Arson, Human trafficking, Hijacking #learnenglish\n",
+      "Sun Tzu - The Art of War Documentary█████████████████████████████████████████| 100.0%\n",
+      "The Indo-Pakistani War 1965  Animated History████████████████████████████████| 100.0%\n",
+      "Conflict Zone Is Libya a failed state?  DW English███████████████████████████| 100.0%\n",
+      "Pakistan's Endless Economic Crisis███████████████████████████████████████████| 100.0%\n",
+      " ↳ |█████████████████████████████████████████████████████████████████████████| 100.0%\r"
+     ]
+    }
+   ],
+   "source": [
+    "output_path = r\"C:\\Users\\Crown Tech\\NLP\\project\\audio\\negative\"\n",
+    "for url in updated_negative_links:\n",
+    "    yt = YouTube(url, on_progress_callback = on_progress)\n",
+    "    print(yt.title)\n",
+    "    ys = yt.streams.get_audio_only()\n",
+    "    out_file = ys.download(output_path=output_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "cdb6e356",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " ever wish you could automatically generate lecture notes from your lecture videos? Or maybe you're tired of writing meeting minutes. I know I am. Well, in this video, we're gonna solve that completely. What's happening guys, my name's Nicholas, or not in this video, we're gonna be taking a look at video to text. Interested? Let's take a look in greater detail as to what we're going to be going through. So in this video, we're going to be converting video to text. So we're first out going to start out by using the YouTube DL library that download virtually any YouTube video. We're then going to pre-process that video to audio using the FFM page library. So this is really easy and it'll allow you to rip out the audio out of any video. We're then going to convert that video to text using the Watson speech to text service for free. And then we're going to be outputting those results from that conversion into a text file. So you'll then have a full blown transcript that you can then go away and use without having to worry about it. So in terms of how we're going to be doing this, we're going to be mainly working inside of a Jupyter Notebook. So we'll first start out by extracting our audio from our video that we've already downloaded using the YouTube DL functionality. We'll then convert it using the Watson speech to text service. And last but not least, we're going to push that out using the native Python functionality to create a text file. Ready to do it? Let's get to it. So in this video, we're going to be focused on converting a video all the way through to text. Now we're mainly going to be working in Python. So I've already got a Jupyter Notebook up here. Now our core steps that we're going to be going through installing and importing dependencies, extracting our audio using FFMPEG, then creating our speech to text service, converting it to text, and outputting it to a TXT file. Now in this case, we're going to add in one additional step which I haven't included here, and that's grabbing a video. So say, for example, we had a YouTube video that we wanted to grab. Well, we can actually grab that using the YouTube DL library. This is probably one of my favorite libraries. It allows you to grab YouTube videos and convert your own videos or convert other videos if you want to grab some notes. So in order to install YouTube DL, we just need to open up a new terminal and then type in pip install YouTube DL. So this is going to go through install everything that you need. You obviously need Python in order to use pip install command, but that should install it for you. Then all you need to do is type in YouTube DL and then grab a link of the video that you want. So in this case, I've got my AI versus machine learning versus data science video, and we can grab that link, paste it after the YouTube DL command and hit enter. This is going to go through and start downloading our video. Now, it's going to download it in your home directory, but then we can grab it and put it inside of the same directory that we've got our Jupyter Notebook. So let's let that download and then we can get started. Okay, so our video is now downloaded. We can hit open, and this is going to open it up in the same directory. So you can see here, we've got AI versus machine learning versus deep learning dot make V. So we can copy that over into our video to text folder. Now, I'm just going to rename it to make our lives a little bit easier when we actually start working with this file. So we'll just call it a IML dot make V. All right, cool. So that is our video file done. And now we can get into the good bit. So the first thing that we're going to do is first up install dependencies. Now we've got two key dependencies here. We need to have the IBM Watson services and we also need to install FFM peg. So FFM peg is basically a library that helps you work with a whole bunch of video files and audio files. And we're going to use it to extract the audio from our video in order to send it to the Watson speech to text service. So we can install those dependencies within our Jupyter Notebook. So let's go ahead and do that. Okay, so I've gone ahead and installed those dependencies. Now in order to do that, we've used the pip install IBM Watson command and I've commented it out. But if you wanted to install FFM peg, you just need to type in brew install FFM peg. If you're on a Mac, if you're on a Windows machine, there's some additional steps on this link. But again, I'll include the link to this in order to install it as well as a full GitHub repo for this tutorial in the description below. So now that that's installed, what we can then go and do is import our dependencies. So in this case, we're going to be using the IBM Watson dependencies plus we're going to be using subprocess to actually perform our audio extraction. So let's bring in our dependencies first up. So those are our dependencies imported. So we've gone ahead and imported a couple of things there. So first up, we imported subprocess and this is going to allow us to make a subprocess call using our regular terminal. Then we've imported the speech to text class from IBM Watson and this is going to be used to actually connect to our speech to text service. We've also imported a couple of helpers here. So recognize callback as well as audio source again from IBM Watson. And last but not least, we've imported IAM authenticated. So this is going to allow us to authenticate against our speech to text service once we set that up. Now the next thing that we're going to do is actually extract our audio. So remember when we extracted our video, we had this AI ML video here. Now what we want to do first is take that video and extract the audio from it so we can then convert speech to text. So let's go ahead and do that. And in order to do that, we're going to be using FFNPEG. So we've now gone and extracted our audio. Now in terms of what we've actually done here, we've passed through a command and that command is basically calling our FFNPEG library. We're passing through the name of the file that we want to extract our audio from, the bit rate as well as the frequency and then last but not least, we've specified what we want the file name to be. So if you haven't downloaded a YouTube video or you want to use your own video, all you need to do is specify a different file name here. And this just needs to be the file that you're trying to convert. Then what we've done is we've gone and called that command using our subprocess library and we've used our shell. So now if we take a look within our folder, we've got a file called audio.wav. So this is our audio file. So if we actually play that, ever wondered about the differences between AI? So you can see that we've now extracted the audio from our video. Now the next step is to start setting up our Watson Speech Detect service. So in this case, we're going to be using a free Watson service so you'll be able to convert up to, I think it's 500 minutes of free speech detects per month. So let's go ahead and start setting that up. Now in this case, the first thing that we need to do is grab two variables. So we're going to need an API key as well as a URL. Now in order to get an API key and a URL, you just need to go to cloud.ibm.com, forward slash catalog, then hit services. So over here and then scroll on down to AI and machine learning. And this is where you'll see all the AI and machine learning services. And then from there, you'll be able to see speech to text. So if you select speech to text, you can see that there'll be a whole heap of pricing plans that show up, but we just need this light plan here. And as I said, you get 500 free minutes per month to convert. So that's more than enough if you're just getting started. So choose that plan and then go in ahead and hit create. And as soon as this service is created, you'll be able to extract your API key as well as your URL and plug it back into your notebook. So now that that's created, we can just go to manage over here. So you'll start out on getting started, you just need to hit manage and then grab your API key down there. So we can just hit these copy buttons down here, copy, copy, copy, and we'll grab our API key first up and then we'll store that in our variable here and then likewise we'll go and grab our URL. Awesome, now that's done. Now what we need to do is actually start setting up our speech to text service. So let's go ahead and do that. So that is our service set up. So in those three lines, what we first did is we created a new IAM Authenticator and to that we passed our API key, which is this variable up here and that's the same one that we got from our IBM Cloud service. We then went and created a new speech to text service and passed through our Authenticator, which is this variable here. And then we also set our service URL. So this is basically telling our speech to text service or our speech to text variable inside of our Jupyter notebook where is our service inside of the worldwide web. So that's basically all set up. Now now what we can actually go and do is start converting our audio. So let's delete that cell there and create a couple of new cells below that. And then what we can do is create our conversion. So in order to do this, we're going to be using the speech to text.recognize method and we're basically going to be passing through our audio.wav file and converting that to text. We'll then be able to pre-process the results and output it to a TXT file. So let's go on ahead and start performing that conversion. So that's basically our conversion code done. Now before we actually go and run that, let's take a quick look at what we've actually written. So first up what we're doing is we're opening up our audio.wav file and then we're using the speech to text service that we set up here. So our speech to text variables coming from there. And we're using the recognize method to go and convert that speech to text. So we're passing through a bunch of commands. So in this case, we're setting our audio to a file that we just specified up here. We're also specifying the content type. So because our file has a.wav extension, we're just saying that we're going to be sending a wav extension, then we're also passing through the model. So this is the language model that we want to use. And in this case, there's a whole bunch that are available through the Watson speech to text service. But again, I'll include a link to this in the description so you can grab this and take a look at it. Then last but not least, we're using we're passing through the continuous flag and we're getting our result. So ideally, once we run this cell, it's going to go and process our audio and bring us back some text. So let's go on ahead and run that. Now, it might take a couple of minutes to run. It'll vary depending on how long your video is. Five minutes later. Alrighty, and that's our conversion done. Now, we haven't actually output anything to the screen, but it's all stored within our res variable here. So if we actually type in res and open that up, you can see we've got our converted audio. That is brilliant. Now, it did take a couple of minutes process, but keep in mind that our video was around about eight minutes long. So it wasn't too bad. Now, if we actually wanted to go and pre-process this and output it to a text file, we can actually loop through each one of these transcriptions and now output it. So if we want, we can create a couple of extra cells. And then if we actually take a look at our result keys, we can take a look at that. You can see that we've got two keys. So we've got result index and we've got results. Now, if we actually open up our results section, you can see that all of our results are actually stored in there. But there's actually a couple of different results. And that's because what the speech or text service does is it breaks it out into paragraphs. Now, we can actually loop through each one of these and extract them and then push them out to a text file. So let's go ahead and do that. So what we're basically going to be doing is looping through our res results array. And we're going to be pre-processing each one of those results and then storing it inside of another array. And then we'll do a little bit of pre-processing, push it out to our text file. And there we go. So what we've now got is all of our transcriptions inside of a big text array. So if we take a look, we should have eight paragraphs. And you can see we've got eight paragraphs. Now, what we can do is do a little bit of cleaning up. So what we'll do is we'll strip out any white space and we'll concatenate them all together and push it out to a text file. And there you go. So now if we take a look, you can see that we've got our output.text file here. And all of our video has now been transcoded. So we started out with our YouTube video that we downloaded. We converted it to make V. We then extracted our audio and we've gone all the way and generated our transcription. Now, in terms of those last couple of lines of code that we wrote, what I was basically doing is we take our first letter within each sentence and I was basically adding a title case so that basically makes it a little bit cleaner. And then what we're doing is we're joining each one of those together to get one big text block. So if we take a look at our transcript, you can see that it's everything together rather than having multiple paragraphs. And then last but not least, we're writing it out to a text file here. And that about wraps up this video. So just to quickly recap, we went and downloaded a video from YouTube using the YouTube DL library. We then went and imported all of our dependencies, extracted our audio using FFMPEG. We set up a speech to text service and converted our video to speech. And then we output our speech to a text file. So you've now effectively got a full blown video transcript that you can take away and use. Thanks so much for tuning in guys. Hopefully you found this video useful. If you did, be sure to give it a thumbs up, hit subscribe and tick that bell so you get notified of when I release future videos. And all the assets included within the video are going to be linked in the description below as well as a GitHub repository with all the code you need to get. Kickstart it on your way to performing video to text. And let me know in the comments below what you're using video to text for. Thanks again for tuning in, peace.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import whisper\n",
+    "\n",
+    "model = whisper.load_model(\"base\")\n",
+    "data = model.transcribe(\"audio.mp3\",fp16=False)\n",
+    "print(data[\"text\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4bd46f0d-ac8e-4bfc-8c0e-2f4ef97c8d08",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import whisper\n",
+    "\n",
+    "model = whisper.load_model(\"base\")\n",
+    "data = model.transcribe(\"audio.mp3\",fp16=False)\n",
+    "print(data[\"text\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38e1f62c-8f19-4d67-8c04-c475ecd12c68",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import whisper\n",
+    "\n",
+    "# Load the Whisper model\n",
+    "model = whisper.load_model(\"base\")\n",
+    "\n",
+    "# Define the paths to the audio and text folders\n",
+    "audio_folder = \"audio/negative\"\n",
+    "text_folder = \"text/negative\"\n",
+    "\n",
+    "# Create the text folder if it does not exist\n",
+    "if not os.path.exists(text_folder):\n",
+    "    os.makedirs(text_folder)\n",
+    "\n",
+    "# Iterate over all audio files in the audio folder\n",
+    "for filename in os.listdir(audio_folder):\n",
+    "    if filename.endswith((\".mp3\", \".mp4\")):\n",
+    "        # Transcribe the audio file\n",
+    "        audio_path = os.path.join(audio_folder, filename)\n",
+    "        data = model.transcribe(audio_path, fp16=False)\n",
+    "        \n",
+    "        # Extract the text from the transcription\n",
+    "        text = data[\"text\"]\n",
+    "        \n",
+    "        # Create the text file with the same name as the audio file\n",
+    "        text_filename = filename.rsplit('.', 1)[0] + \".txt\"\n",
+    "        text_path = os.path.join(text_folder, text_filename)\n",
+    "        \n",
+    "        # Write the text to the file\n",
+    "        with open(text_path, \"w\", encoding=\"utf-8\") as file:\n",
+    "            file.write(text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8a1f19d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Define the paths to your dataset\n",
+    "data_path = r\"C:\\Users\\Crown Tech\\NLP\\project\\text\"\n",
+    "positive_path = os.path.join(data_path, \"positive\")\n",
+    "negative_path = os.path.join(data_path, \"negative\")\n",
+    "\n",
+    "# Function to load data\n",
+    "def load_data_from_folder(folder_path, label):\n",
+    "    texts = []\n",
+    "    for filename in os.listdir(folder_path):\n",
+    "        file_path = os.path.join(folder_path, filename)\n",
+    "        if os.path.isfile(file_path):\n",
+    "            with open(file_path, 'r', encoding='utf-8') as file:\n",
+    "                text = file.read().strip()\n",
+    "                texts.append((text, label))\n",
+    "    return texts\n",
+    "\n",
+    "# Load positive and negative samples\n",
+    "positive_samples = load_data_from_folder(positive_path, 1)\n",
+    "negative_samples = load_data_from_folder(negative_path, 0)\n",
+    "\n",
+    "# Combine and shuffle the dataset\n",
+    "all_samples = positive_samples + negative_samples\n",
+    "df = pd.DataFrame(all_samples, columns=[\"text\", \"label\"])\n",
+    "df = df.sample(frac=1).reset_index(drop=True)  # Shuffle the dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5e1cb131",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>text</th>\n",
+       "      <th>label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Namah wuddhaaya. Welcome and welcome back to a...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>As you can see, I was born without fingers on ...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>So I have what some consider to be one of the ...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Tonight we're learning more about the tragic a...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Hi, welcome to another episode of Cold Fusion....</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>86</th>\n",
+       "      <td>The Internet has changed the way we live. It h...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>87</th>\n",
+       "      <td>Mammoth you</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>88</th>\n",
+       "      <td>Instead of jumping out of bed and rushing righ...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>89</th>\n",
+       "      <td>Hey guys, doing the day. You all right? It's a...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>90</th>\n",
+       "      <td>I would rather tell the world what I've done t...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>91 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                 text  label\n",
+       "0   Namah wuddhaaya. Welcome and welcome back to a...      1\n",
+       "1   As you can see, I was born without fingers on ...      1\n",
+       "2   So I have what some consider to be one of the ...      1\n",
+       "3   Tonight we're learning more about the tragic a...      0\n",
+       "4   Hi, welcome to another episode of Cold Fusion....      0\n",
+       "..                                                ...    ...\n",
+       "86  The Internet has changed the way we live. It h...      0\n",
+       "87                                        Mammoth you      1\n",
+       "88  Instead of jumping out of bed and rushing righ...      1\n",
+       "89  Hey guys, doing the day. You all right? It's a...      1\n",
+       "90  I would rather tell the world what I've done t...      1\n",
+       "\n",
+       "[91 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "video_analysis_env",
+   "language": "python",
+   "name": "video_analysis_env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

youtube_video_sentiment_fine_tuning.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff