{ "cells": [ { "cell_type": "markdown", "id": "aa5c1491", "metadata": { "papermill": { "duration": 0.012997, "end_time": "2024-06-08T05:23:03.349402", "exception": false, "start_time": "2024-06-08T05:23:03.336405", "status": "completed" }, "tags": [] }, "source": [ "## 导入库" ] }, { "cell_type": "code", "execution_count": 1, "id": "2ff051d2", "metadata": { "execution": { "iopub.execute_input": "2024-06-08T05:23:03.375582Z", "iopub.status.busy": "2024-06-08T05:23:03.375090Z", "iopub.status.idle": "2024-06-08T05:23:06.135626Z", "shell.execute_reply": "2024-06-08T05:23:06.134675Z" }, "papermill": { "duration": 2.776387, "end_time": "2024-06-08T05:23:06.138287", "exception": false, "start_time": "2024-06-08T05:23:03.361900", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import re\n", "import nltk\n", "from nltk.corpus.reader.tagged import ToktokTokenizer\n", "from bs4 import BeautifulSoup\n", "import pandas as pd\n", "import numpy as np\n", "\n", "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, classification_report\n", "from sklearn.linear_model import LogisticRegression\n", "\n", "from scipy.stats import chi2_contingency\n", "\n", "import warnings\n", "from sklearn.exceptions import UndefinedMetricWarning\n", "warnings.filterwarnings(\"ignore\", category=UndefinedMetricWarning)" ] }, { "cell_type": "markdown", "id": "afa5b530", "metadata": { "papermill": { "duration": 0.012953, "end_time": "2024-06-08T05:23:06.163346", "exception": false, "start_time": "2024-06-08T05:23:06.150393", "status": "completed" }, "tags": [] }, "source": [ "## 读取数据" ] }, { "cell_type": "code", "execution_count": 2, "id": "fa696287", "metadata": { "execution": { "iopub.execute_input": "2024-06-08T05:23:06.188980Z", "iopub.status.busy": "2024-06-08T05:23:06.187975Z", "iopub.status.idle": "2024-06-08T05:23:08.649105Z", "shell.execute_reply": "2024-06-08T05:23:08.647767Z" }, "papermill": { "duration": 2.476289, "end_time": "2024-06-08T05:23:08.651497", "exception": false, "start_time": "2024-06-08T05:23:06.175208", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "text | \n", "Emotion | \n", "
---|---|---|---|
0 | \n", "0 | \n", "i seriously hate one subject to death but now ... | \n", "hate | \n", "
1 | \n", "1 | \n", "im so full of life i feel appalled | \n", "neutral | \n", "
2 | \n", "2 | \n", "i sit here to write i start to dig out my feel... | \n", "neutral | \n", "
3 | \n", "3 | \n", "ive been really angry with r and i feel like a... | \n", "anger | \n", "
4 | \n", "4 | \n", "i feel suspicious if there is no one outside l... | \n", "neutral | \n", "
LogisticRegression(max_iter=1000)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(max_iter=1000)