{ "cells": [ { "cell_type": "code", "id": "755f1267a6176c24", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-04-15T08:56:52.063622Z", "start_time": "2024-04-15T08:56:51.856029Z" } }, "source": [ "import json\n", "import pandas as pd" ], "execution_count": 1, "outputs": [] }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-15T08:57:05.764152Z", "start_time": "2024-04-15T08:57:05.390988Z" } }, "cell_type": "code", "source": [ "with open(\"./dataset/instances_attributes_val2020.json\", \"r\") as f:\n", " dataset = json.load(f)\n", "\n", "print(dataset.keys())" ], "id": "initial_id", "execution_count": 3, "outputs": [] }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-15T09:41:18.117520Z", "start_time": "2024-04-15T09:41:18.110946Z" } }, "cell_type": "code", "source": [ "a = dataset['images']\n", "print(len(a))\n", "print(a)" ], "id": "24c0ff8dda428023", "execution_count": 30, "outputs": [] }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-15T12:48:49.207137Z", "start_time": "2024-04-15T12:48:49.202422Z" } }, "cell_type": "code", "source": [ "df_categories = pd.DataFrame(dataset['categories'])\n", "df_categories" ], "id": "4e45dd93ed3d6d11", "execution_count": 107, "outputs": [] }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-15T09:02:58.490674Z", "start_time": "2024-04-15T09:02:58.485590Z" } }, "cell_type": "code", "source": [ "df_attributes = pd.DataFrame(dataset['attributes'])\n", "df_attributes" ], "id": "f8c1f1649b4890b7", "execution_count": 18, "outputs": [] }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-15T09:02:21.405195Z", "start_time": "2024-04-15T09:02:21.390532Z" } }, "cell_type": "code", "source": [ "df_annotation = pd.DataFrame(dataset['annotations'])\n", "df_annotation = df_annotation.drop(['segmentation'], axis='columns')\n", "df_annotation_exploded =df_annotation.explode('attribute_ids') " ], "id": "4c53c3115c3dbef8", "execution_count": 16, "outputs": [] }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-15T09:02:23.057097Z", "start_time": "2024-04-15T09:02:23.051760Z" } }, "cell_type": "code", "source": "df_annotation_exploded", "id": "618f069b857225fc", "execution_count": 17, "outputs": [] }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-15T09:03:53.975762Z", "start_time": "2024-04-15T09:03:53.959100Z" } }, "cell_type": "code", "source": [ "df_joined = df_attributes.merge(df_annotation_exploded, how='left', left_on='id', right_on='attribute_ids')\n", "df_joined" ], "id": "8a85bbf68e23920b", "execution_count": 20, "outputs": [] }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-15T09:08:05.647378Z", "start_time": "2024-04-15T09:08:05.637874Z" } }, "cell_type": "code", "source": [ "df_neck_collar = df_joined[df_joined.name.str.contains('neck') | df_joined.name.str.contains('collar') ]\n", "df_neck_collar['name'].value_counts()" ], "id": "2888345540b560d5", "execution_count": 28, "outputs": [] }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-15T09:49:34.287590Z", "start_time": "2024-04-15T09:49:34.263905Z" } }, "cell_type": "code", "source": [ "df_annotation = pd.DataFrame(dataset['annotations']).drop(['segmentation', 'attribute_ids'], axis='columns')\n", "df_images = pd.DataFrame(dataset['images'])\n", "df_categories = pd.DataFrame(dataset['categories'])\n", "\n", "df_dataset = pd.merge(df_images, df_annotation, how='inner', left_on='id', right_on='image_id')\n", "df_dataset = pd.merge(df_dataset, df_categories, how='inner', left_on='category_id', right_on='id')\n", "df_dataset['path_to_image'] = 'coucou/' + df_dataset['file_name']\n", "df_dataset" ], "id": "ebb14b7642a1459d", "execution_count": 37, "outputs": [] }, { "metadata": {}, "cell_type": "code", "execution_count": null, "source": "", "id": "20ba29a56c257d56", "outputs": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }