{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "4936c0a76984492f88275a1500cf394f": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_4f189acddd754998acd4b4f9777469c4", "IPY_MODEL_ba8cec14e32648f4847b2194618d0757", "IPY_MODEL_2fa1d931574e4b118e524d3c252cc6e8" ], "layout": "IPY_MODEL_ae617cff68f24e09a3c6b0ad96f219dc" } }, "4f189acddd754998acd4b4f9777469c4": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f84ff4aa43fa4d778611f988215adc43", "placeholder": "​", "style": "IPY_MODEL_26ea2db896ef456b92e80f196172efce", "value": "README.md: 100%" } }, "ba8cec14e32648f4847b2194618d0757": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5c93f4c064a143b0b954a76194701100", "max": 12098, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_84229337aea946b1bfb9f4c394276791", "value": 12098 } }, "2fa1d931574e4b118e524d3c252cc6e8": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2954dab4bb914153acf9eb5527c8ef58", "placeholder": "​", "style": "IPY_MODEL_25dd04b3a0674b539ac56147477f5a64", "value": " 12.1k/12.1k [00:00<00:00, 412kB/s]" } }, "ae617cff68f24e09a3c6b0ad96f219dc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f84ff4aa43fa4d778611f988215adc43": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "26ea2db896ef456b92e80f196172efce": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5c93f4c064a143b0b954a76194701100": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "84229337aea946b1bfb9f4c394276791": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "2954dab4bb914153acf9eb5527c8ef58": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "25dd04b3a0674b539ac56147477f5a64": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "76944c7c2ec147b29705cf13841891c7": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_b9ed8a9466894003bf1bf35e6923ace2", "IPY_MODEL_866ce20493ad47048c3cb87aad0e5909", "IPY_MODEL_a8254a2af6a4431b81352e1eff75fcef" ], "layout": "IPY_MODEL_5009ae410a2c4d3b91999ab0e35c6c86" } }, "b9ed8a9466894003bf1bf35e6923ace2": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9db7fd2603c945d797239e2cba155e9b", "placeholder": "​", "style": "IPY_MODEL_cf2612b047574ecc9d209c2689795c6d", "value": "tatoeba_mt.py: 100%" } }, "866ce20493ad47048c3cb87aad0e5909": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3120cfab5cd44f08af6944d9cd3d7659", "max": 15499, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_fd0e167445dd44cc83007e5aeefb6dc8", "value": 15499 } }, "a8254a2af6a4431b81352e1eff75fcef": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_34332f0ec0054de082528326af5bc4b2", "placeholder": "​", "style": "IPY_MODEL_01826f384beb4abf84382add81b931b0", "value": " 15.5k/15.5k [00:00<00:00, 1.10MB/s]" } }, "5009ae410a2c4d3b91999ab0e35c6c86": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9db7fd2603c945d797239e2cba155e9b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cf2612b047574ecc9d209c2689795c6d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3120cfab5cd44f08af6944d9cd3d7659": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fd0e167445dd44cc83007e5aeefb6dc8": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "34332f0ec0054de082528326af5bc4b2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "01826f384beb4abf84382add81b931b0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "eaef0d163ed14989b44a1d3f21d59ce3": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_3ffe76ba7da14e98aca4dfe47b3076c6", "IPY_MODEL_88168392fcb34d40a7c99f402e730393", "IPY_MODEL_5ee168f8431249d7900001c84ecf20b7" ], "layout": "IPY_MODEL_59b62288d28048ff9c691f79f4ad11b0" } }, "3ffe76ba7da14e98aca4dfe47b3076c6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_fe6523eb73194f15a2428b542ae868e4", "placeholder": "​", "style": "IPY_MODEL_4ccccd3029124a72be368bd724f9072c", "value": "dataset_infos.json: 100%" } }, "88168392fcb34d40a7c99f402e730393": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_749c1b04cdb14c87b5ab95c86dcdd5f6", "max": 1958806, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_09cd81148c4845958c087ee8c7b5451c", "value": 1958806 } }, "5ee168f8431249d7900001c84ecf20b7": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9847b47b4310469ca2523199b3257e3a", "placeholder": "​", "style": "IPY_MODEL_ddc0dcd753834e02b6d4057155dcb0a1", "value": " 1.96M/1.96M [00:00<00:00, 6.76MB/s]" } }, "59b62288d28048ff9c691f79f4ad11b0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fe6523eb73194f15a2428b542ae868e4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4ccccd3029124a72be368bd724f9072c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "749c1b04cdb14c87b5ab95c86dcdd5f6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "09cd81148c4845958c087ee8c7b5451c": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9847b47b4310469ca2523199b3257e3a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ddc0dcd753834e02b6d4057155dcb0a1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "bd58c69cf5ad494eaaf12bbf10f66661": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c948d400029a4ee6b7ac40faebfe17e7", "IPY_MODEL_64f42dabd3eb48069167da0251f2d8fb", "IPY_MODEL_504de4c8b10041b3863a9cbe040d5cf6" ], "layout": "IPY_MODEL_6568660add1448ac81df0c6a67c47d66" } }, "c948d400029a4ee6b7ac40faebfe17e7": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5ec8f06d517c4f7bb3ab639f91b792f3", "placeholder": "​", "style": "IPY_MODEL_b7efe138f73a4811a14fd91d5f18766c", "value": "tatoeba-test.ara-eng.tsv: 100%" } }, "64f42dabd3eb48069167da0251f2d8fb": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_99757ac60afb4aea8d77e49e199a74b0", "max": 938171, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_84da3eb589514c8a8c7fdce342ad6ac8", "value": 938171 } }, "504de4c8b10041b3863a9cbe040d5cf6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8451c69261104dc4bc06924aa6b2b28a", "placeholder": "​", "style": "IPY_MODEL_555ff104df5a460f9f459ec5fcffd8be", "value": " 938k/938k [00:00<00:00, 12.9MB/s]" } }, "6568660add1448ac81df0c6a67c47d66": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5ec8f06d517c4f7bb3ab639f91b792f3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b7efe138f73a4811a14fd91d5f18766c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "99757ac60afb4aea8d77e49e199a74b0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "84da3eb589514c8a8c7fdce342ad6ac8": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "8451c69261104dc4bc06924aa6b2b28a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "555ff104df5a460f9f459ec5fcffd8be": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1466ecf8d0ad429a97ee8126c03dd78b": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c836fef6c5f64691988b7294d32ae0ae", "IPY_MODEL_b7303fdb822b4f559c136fd62781411e", "IPY_MODEL_c9b3815919124920b9f5cd593f75c4b5" ], "layout": "IPY_MODEL_4f462747f6524ab296f43975840b3468" } }, "c836fef6c5f64691988b7294d32ae0ae": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_94ee64b3583d4826abef918380b4fc5a", "placeholder": "​", "style": "IPY_MODEL_dd24590438d04d8c94493f81794d73ea", "value": "tatoeba-dev.ara-eng.tsv: 100%" } }, "b7303fdb822b4f559c136fd62781411e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3c04db996c374b5fb58541a10fc20d25", "max": 1778245, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b8fecaed88f341ada9c54993fb06eb67", "value": 1778245 } }, "c9b3815919124920b9f5cd593f75c4b5": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_643834dda8f149559b1a610b610ca37b", "placeholder": "​", "style": "IPY_MODEL_09b958f9ce894514970adc140ed97ff4", "value": " 1.78M/1.78M [00:00<00:00, 27.7MB/s]" } }, "4f462747f6524ab296f43975840b3468": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "94ee64b3583d4826abef918380b4fc5a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dd24590438d04d8c94493f81794d73ea": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3c04db996c374b5fb58541a10fc20d25": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b8fecaed88f341ada9c54993fb06eb67": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "643834dda8f149559b1a610b610ca37b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "09b958f9ce894514970adc140ed97ff4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "49bc9b93252c459b905ddc7a03dfed71": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_145e2e17263f4d798185b561941594e6", "IPY_MODEL_870fafbef85f4009b86a868b253f5691", "IPY_MODEL_ac290a5547fc41a28ee6669a6f79e259" ], "layout": "IPY_MODEL_71d4d789d5f546d0b2a877cf17c588ec" } }, "145e2e17263f4d798185b561941594e6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_74186ec4da634d229d9a68af07dd48e0", "placeholder": "​", "style": "IPY_MODEL_0eee54a4e2104331a32be3fcd5b1300e", "value": "Generating test split: 100%" } }, "870fafbef85f4009b86a868b253f5691": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_fdb0faa095b34c59a0875b1d1262dc24", "max": 10304, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d97b2fa0c0b341af9d62cfe8e39694c9", "value": 10304 } }, "ac290a5547fc41a28ee6669a6f79e259": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d1a219a28ac641819659e51e6dc6f467", "placeholder": "​", "style": "IPY_MODEL_47e2aa076a7540deb8e85d2596b9c2cc", "value": " 10304/10304 [00:00<00:00, 38777.70 examples/s]" } }, "71d4d789d5f546d0b2a877cf17c588ec": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "74186ec4da634d229d9a68af07dd48e0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0eee54a4e2104331a32be3fcd5b1300e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "fdb0faa095b34c59a0875b1d1262dc24": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d97b2fa0c0b341af9d62cfe8e39694c9": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "d1a219a28ac641819659e51e6dc6f467": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "47e2aa076a7540deb8e85d2596b9c2cc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f105c630bd4548ee953bfcb321194118": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_d41c6bb5990f4dd690d44f226d05fd74", "IPY_MODEL_7660d0e619e14f5b91fc04dee179bb4c", "IPY_MODEL_f12ba11fb38e4df98b96847d28e0c456" ], "layout": "IPY_MODEL_b858da19009a4bb795ccf8e5edd321c5" } }, "d41c6bb5990f4dd690d44f226d05fd74": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e07084d61862429989a8c1d75b0204c3", "placeholder": "​", "style": "IPY_MODEL_4e19113ec8824d7dbbd9ba93dd92cbb6", "value": "Generating validation split: 100%" } }, "7660d0e619e14f5b91fc04dee179bb4c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4f0c1475c59e4880b06023f0c919c320", "max": 19528, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_703e40ca5e1b4077a9d21a3927ab075b", "value": 19528 } }, "f12ba11fb38e4df98b96847d28e0c456": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5f64b257808d462bbabfb53c29bfba4d", "placeholder": "​", "style": "IPY_MODEL_ae32cbf6e2544ab4bc61dfc54ba937c4", "value": " 19528/19528 [00:00<00:00, 46798.95 examples/s]" } }, "b858da19009a4bb795ccf8e5edd321c5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e07084d61862429989a8c1d75b0204c3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4e19113ec8824d7dbbd9ba93dd92cbb6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "4f0c1475c59e4880b06023f0c919c320": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "703e40ca5e1b4077a9d21a3927ab075b": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "5f64b257808d462bbabfb53c29bfba4d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ae32cbf6e2544ab4bc61dfc54ba937c4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3aoxODC3qIl7", "outputId": "77715d63-626b-45ae-f18e-fe13ae97e474" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting gradio\n", " Downloading gradio-5.20.1-py3-none-any.whl.metadata (16 kB)\n", "Collecting datasets\n", " Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)\n", "Collecting aiofiles<24.0,>=22.0 (from gradio)\n", " Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n", "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.7.1)\n", "Collecting fastapi<1.0,>=0.115.2 (from gradio)\n", " Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)\n", "Collecting ffmpy (from gradio)\n", " Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)\n", "Collecting gradio-client==1.7.2 (from gradio)\n", " Downloading gradio_client-1.7.2-py3-none-any.whl.metadata (7.1 kB)\n", "Collecting groovy~=0.1 (from gradio)\n", " Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)\n", "Requirement already satisfied: httpx>=0.24.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.28.1)\n", "Requirement already satisfied: huggingface-hub>=0.28.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.28.1)\n", "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.1.6)\n", "Collecting markupsafe~=2.0 (from gradio)\n", " Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)\n", "Requirement already satisfied: numpy<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (1.26.4)\n", "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.10.15)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from gradio) (24.2)\n", "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.2.2)\n", "Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (11.1.0)\n", "Requirement already satisfied: pydantic>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.10.6)\n", "Collecting pydub (from gradio)\n", " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", "Collecting python-multipart>=0.0.18 (from gradio)\n", " Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)\n", "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (6.0.2)\n", "Collecting ruff>=0.9.3 (from gradio)\n", " Downloading ruff-0.10.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", "Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)\n", " Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)\n", "Collecting semantic-version~=2.0 (from gradio)\n", " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n", "Collecting starlette<1.0,>=0.40.0 (from gradio)\n", " Downloading starlette-0.46.1-py3-none-any.whl.metadata (6.2 kB)\n", "Collecting tomlkit<0.14.0,>=0.12.0 (from gradio)\n", " Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)\n", "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.15.2)\n", "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.12.2)\n", "Collecting uvicorn>=0.14.0 (from gradio)\n", " Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.7.2->gradio) (2024.10.0)\n", "Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.7.2->gradio) (14.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets) (3.17.0)\n", "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (18.1.0)\n", "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.32.3)\n", "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.11/dist-packages (from datasets) (4.67.1)\n", "Collecting xxhash (from datasets)\n", " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", "Collecting multiprocess<0.70.17 (from datasets)\n", " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.13)\n", "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (3.10)\n", "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)\n", "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.5.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (25.1.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.1.0)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.3.0)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.18.3)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (2025.1.31)\n", "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (1.0.7)\n", "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.14.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.1)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.1)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.0->gradio) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.0->gradio) (2.27.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.4.1)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2.3.0)\n", "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (8.1.8)\n", "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n", "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (13.9.4)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.17.0)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.18.0)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", "Downloading gradio-5.20.1-py3-none-any.whl (62.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.3/62.3 MB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading gradio_client-1.7.2-py3-none-any.whl (322 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.1/322.1 kB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading datasets-3.3.2-py3-none-any.whl (485 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m485.4/485.4 kB\u001b[0m \u001b[31m37.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n", "Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading fastapi-0.115.11-py3-none-any.whl (94 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.9/94.9 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading groovy-0.1.2-py3-none-any.whl (14 kB)\n", "Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (28 kB)\n", "Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading python_multipart-0.0.20-py3-none-any.whl (24 kB)\n", "Downloading ruff-0.10.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.3/11.3 MB\u001b[0m \u001b[31m94.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading safehttpx-0.1.6-py3-none-any.whl (8.7 kB)\n", "Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", "Downloading starlette-0.46.1-py3-none-any.whl (71 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.0/72.0 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading tomlkit-0.13.2-py3-none-any.whl (37 kB)\n", "Downloading uvicorn-0.34.0-py3-none-any.whl (62 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.3/62.3 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading ffmpy-0.5.0-py3-none-any.whl (6.0 kB)\n", "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: pydub, xxhash, uvicorn, tomlkit, semantic-version, ruff, python-multipart, markupsafe, groovy, ffmpy, dill, aiofiles, starlette, multiprocess, safehttpx, gradio-client, fastapi, gradio, datasets\n", " Attempting uninstall: markupsafe\n", " Found existing installation: MarkupSafe 3.0.2\n", " Uninstalling MarkupSafe-3.0.2:\n", " Successfully uninstalled MarkupSafe-3.0.2\n", "Successfully installed aiofiles-23.2.1 datasets-3.3.2 dill-0.3.8 fastapi-0.115.11 ffmpy-0.5.0 gradio-5.20.1 gradio-client-1.7.2 groovy-0.1.2 markupsafe-2.1.5 multiprocess-0.70.16 pydub-0.25.1 python-multipart-0.0.20 ruff-0.10.0 safehttpx-0.1.6 semantic-version-2.10.0 starlette-0.46.1 tomlkit-0.13.2 uvicorn-0.34.0 xxhash-3.5.0\n" ] } ], "source": [ "!pip install gradio datasets" ] }, { "cell_type": "code", "source": [ "!pip install torch==2.0.1 torchtext==0.15.2" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rBDxfZ_NqS27", "outputId": "9890ee2f-16dd-4383-c55d-efbf8b9c208a" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting torch==2.0.1\n", " Downloading torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl.metadata (24 kB)\n", "Collecting torchtext==0.15.2\n", " Downloading torchtext-0.15.2-cp311-cp311-manylinux1_x86_64.whl.metadata (7.4 kB)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (3.17.0)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (4.12.2)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (1.13.1)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (3.1.6)\n", "Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.1)\n", " Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.1)\n", " Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cuda-cupti-cu11==11.7.101 (from torch==2.0.1)\n", " Downloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cudnn-cu11==8.5.0.96 (from torch==2.0.1)\n", " Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cublas-cu11==11.10.3.66 (from torch==2.0.1)\n", " Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cufft-cu11==10.9.0.58 (from torch==2.0.1)\n", " Downloading nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-curand-cu11==10.2.10.91 (from torch==2.0.1)\n", " Downloading nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cusolver-cu11==11.4.0.1 (from torch==2.0.1)\n", " Downloading nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cusparse-cu11==11.7.4.91 (from torch==2.0.1)\n", " Downloading nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-nccl-cu11==2.14.3 (from torch==2.0.1)\n", " Downloading nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl.metadata (1.8 kB)\n", "Collecting nvidia-nvtx-cu11==11.7.91 (from torch==2.0.1)\n", " Downloading nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl.metadata (1.7 kB)\n", "Collecting triton==2.0.0 (from torch==2.0.1)\n", " Downloading triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.0 kB)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from torchtext==0.15.2) (4.67.1)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from torchtext==0.15.2) (2.32.3)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from torchtext==0.15.2) (1.26.4)\n", "Collecting torchdata==0.6.1 (from torchtext==0.15.2)\n", " Downloading torchdata-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch==2.0.1) (75.1.0)\n", "Requirement already satisfied: wheel in /usr/local/lib/python3.11/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch==2.0.1) (0.45.1)\n", "Requirement already satisfied: urllib3>=1.25 in /usr/local/lib/python3.11/dist-packages (from torchdata==0.6.1->torchtext==0.15.2) (2.3.0)\n", "Requirement already satisfied: cmake in /usr/local/lib/python3.11/dist-packages (from triton==2.0.0->torch==2.0.1) (3.31.6)\n", "Collecting lit (from triton==2.0.0->torch==2.0.1)\n", " Downloading lit-18.1.8-py3-none-any.whl.metadata (2.5 kB)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch==2.0.1) (2.1.5)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->torchtext==0.15.2) (3.4.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->torchtext==0.15.2) (3.10)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->torchtext==0.15.2) (2025.1.31)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy->torch==2.0.1) (1.3.0)\n", "Downloading torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl (619.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m619.9/619.9 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading torchtext-0.15.2-cp311-cp311-manylinux1_x86_64.whl (2.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m47.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m317.1/317.1 MB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m80.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.0/21.0 MB\u001b[0m \u001b[31m85.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m849.3/849.3 kB\u001b[0m \u001b[31m55.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m557.1/557.1 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux2014_x86_64.whl (168.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.4/168.4 MB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl (54.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 MB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.6/102.6 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl (173.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m173.2/173.2 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.1/177.1 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl (98 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.6/98.6 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading torchdata-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m100.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.3/63.3 MB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading lit-18.1.8-py3-none-any.whl (96 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m96.4/96.4 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: lit, nvidia-nvtx-cu11, nvidia-nccl-cu11, nvidia-cusparse-cu11, nvidia-curand-cu11, nvidia-cufft-cu11, nvidia-cuda-runtime-cu11, nvidia-cuda-nvrtc-cu11, nvidia-cuda-cupti-cu11, nvidia-cublas-cu11, nvidia-cusolver-cu11, nvidia-cudnn-cu11, triton, torch, torchdata, torchtext\n", " Attempting uninstall: triton\n", " Found existing installation: triton 3.1.0\n", " Uninstalling triton-3.1.0:\n", " Successfully uninstalled triton-3.1.0\n", " Attempting uninstall: torch\n", " Found existing installation: torch 2.5.1+cu124\n", " Uninstalling torch-2.5.1+cu124:\n", " Successfully uninstalled torch-2.5.1+cu124\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "torchvision 0.20.1+cu124 requires torch==2.5.1, but you have torch 2.0.1 which is incompatible.\n", "torchaudio 2.5.1+cu124 requires torch==2.5.1, but you have torch 2.0.1 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed lit-18.1.8 nvidia-cublas-cu11-11.10.3.66 nvidia-cuda-cupti-cu11-11.7.101 nvidia-cuda-nvrtc-cu11-11.7.99 nvidia-cuda-runtime-cu11-11.7.99 nvidia-cudnn-cu11-8.5.0.96 nvidia-cufft-cu11-10.9.0.58 nvidia-curand-cu11-10.2.10.91 nvidia-cusolver-cu11-11.4.0.1 nvidia-cusparse-cu11-11.7.4.91 nvidia-nccl-cu11-2.14.3 nvidia-nvtx-cu11-11.7.91 torch-2.0.1 torchdata-0.6.1 torchtext-0.15.2 triton-2.0.0\n" ] } ] }, { "cell_type": "code", "source": [ "import torch\n", "torch.cuda.is_available()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vmpc30L4qmAx", "outputId": "cad2e4fc-b87c-4aa6-d9ad-8bdfeb55cf92" }, "execution_count": 3, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "True" ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "code", "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "import math\n", "from datasets import load_dataset\n", "import numpy as np\n", "from collections import Counter\n", "import gradio as gr\n", "\n", "# Seting random seed for reproducibility\n", "torch.manual_seed(42)\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "dataset = load_dataset('Helsinki-NLP/tatoeba_mt', 'ara-eng')\n", "\n", "# tokenization (word-level)\n", "def tokenize(text):\n", " return text.split()\n", "\n", "# Building vocabulary from dataset\n", "def build_vocab(data, tokenizer, min_freq=2):\n", " counter = Counter()\n", " for example in data:\n", " counter.update(tokenizer(example['sourceString']))\n", " counter.update(tokenizer(example['targetString']))\n", " # Adding special tokens\n", " specials = ['', '', '', '']\n", " vocab = specials + [word for word, freq in counter.items() if freq >= min_freq]\n", " word2idx = {word: idx for idx, word in enumerate(vocab)}\n", " idx2word = {idx: word for word, idx in word2idx.items()}\n", " return word2idx, idx2word\n", "\n", "# Converting text to tensor (adjusted to fit special tokens within max_len)\n", "def text_to_tensor(text, vocab, tokenizer, max_len=52):\n", " tokens = tokenizer(text)[:max_len - 2] # Reserving space for and \n", " tokens = [''] + tokens + ['']\n", " tensor = [vocab.get(token, vocab['']) for token in tokens]\n", " return torch.tensor(tensor, dtype=torch.long)\n", "\n", "train_data = dataset['validation'] # Using validation as training data for demo\n", "test_data = dataset['test']\n", "\n", "# Building shared vocabulary (for simplicity, using both languages in one vocab)\n", "word2idx, idx2word = build_vocab(train_data, tokenize)\n", "\n", "# Hyperparameters for data\n", "max_len = 52 # Increased to account for and \n", "batch_size = 32\n", "\n", "train_data_list = list(train_data) # Convert Dataset to list once\n", "print(f\"Length of train_data_list: {len(train_data_list)}\")\n", "\n", "def get_batches(data_list, batch_size, max_len=52):\n", " total_batches = len(data_list) // batch_size + (1 if len(data_list) % batch_size else 0)\n", " print(f\"Total batches to process: {total_batches}\")\n", " for i in range(0, len(data_list), batch_size):\n", " batch = data_list[i:i + batch_size]\n", " src_batch = [text_to_tensor(example['sourceString'], word2idx, tokenize, max_len) for example in batch]\n", " tgt_batch = [text_to_tensor(example['targetString'], word2idx, tokenize, max_len) for example in batch]\n", " src_batch = nn.utils.rnn.pad_sequence(src_batch, padding_value=word2idx[''], batch_first=False).to(device)\n", " tgt_batch = nn.utils.rnn.pad_sequence(tgt_batch, padding_value=word2idx[''], batch_first=False).to(device)\n", " if src_batch.size(0) > max_len:\n", " src_batch = src_batch[:max_len, :]\n", " elif src_batch.size(0) < max_len:\n", " padding = torch.full((max_len - src_batch.size(0), src_batch.size(1)), word2idx[''], dtype=torch.long).to(device)\n", " src_batch = torch.cat([src_batch, padding], dim=0)\n", " if tgt_batch.size(0) > max_len:\n", " tgt_batch = tgt_batch[:max_len, :]\n", " elif tgt_batch.size(0) < max_len:\n", " padding = torch.full((max_len - tgt_batch.size(0), tgt_batch.size(1)), word2idx[''], dtype=torch.long).to(device)\n", " tgt_batch = torch.cat([tgt_batch, padding], dim=0)\n", " src_batch = src_batch.transpose(0, 1) # [batch_size, seq_len]\n", " tgt_batch = tgt_batch.transpose(0, 1) # [batch_size, seq_len]\n", " yield src_batch, tgt_batch\n", "\n", "\n", "print(\"Revised Chunk 1 (Seventh Iteration) completed: Dataset loaded and preprocessing debugged.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 469, "referenced_widgets": [ "4936c0a76984492f88275a1500cf394f", "4f189acddd754998acd4b4f9777469c4", "ba8cec14e32648f4847b2194618d0757", "2fa1d931574e4b118e524d3c252cc6e8", "ae617cff68f24e09a3c6b0ad96f219dc", "f84ff4aa43fa4d778611f988215adc43", "26ea2db896ef456b92e80f196172efce", "5c93f4c064a143b0b954a76194701100", "84229337aea946b1bfb9f4c394276791", "2954dab4bb914153acf9eb5527c8ef58", "25dd04b3a0674b539ac56147477f5a64", "76944c7c2ec147b29705cf13841891c7", "b9ed8a9466894003bf1bf35e6923ace2", "866ce20493ad47048c3cb87aad0e5909", "a8254a2af6a4431b81352e1eff75fcef", "5009ae410a2c4d3b91999ab0e35c6c86", "9db7fd2603c945d797239e2cba155e9b", "cf2612b047574ecc9d209c2689795c6d", "3120cfab5cd44f08af6944d9cd3d7659", "fd0e167445dd44cc83007e5aeefb6dc8", "34332f0ec0054de082528326af5bc4b2", "01826f384beb4abf84382add81b931b0", "eaef0d163ed14989b44a1d3f21d59ce3", "3ffe76ba7da14e98aca4dfe47b3076c6", "88168392fcb34d40a7c99f402e730393", "5ee168f8431249d7900001c84ecf20b7", "59b62288d28048ff9c691f79f4ad11b0", "fe6523eb73194f15a2428b542ae868e4", "4ccccd3029124a72be368bd724f9072c", "749c1b04cdb14c87b5ab95c86dcdd5f6", "09cd81148c4845958c087ee8c7b5451c", "9847b47b4310469ca2523199b3257e3a", "ddc0dcd753834e02b6d4057155dcb0a1", "bd58c69cf5ad494eaaf12bbf10f66661", "c948d400029a4ee6b7ac40faebfe17e7", "64f42dabd3eb48069167da0251f2d8fb", "504de4c8b10041b3863a9cbe040d5cf6", "6568660add1448ac81df0c6a67c47d66", "5ec8f06d517c4f7bb3ab639f91b792f3", "b7efe138f73a4811a14fd91d5f18766c", "99757ac60afb4aea8d77e49e199a74b0", "84da3eb589514c8a8c7fdce342ad6ac8", "8451c69261104dc4bc06924aa6b2b28a", "555ff104df5a460f9f459ec5fcffd8be", "1466ecf8d0ad429a97ee8126c03dd78b", "c836fef6c5f64691988b7294d32ae0ae", "b7303fdb822b4f559c136fd62781411e", "c9b3815919124920b9f5cd593f75c4b5", "4f462747f6524ab296f43975840b3468", "94ee64b3583d4826abef918380b4fc5a", "dd24590438d04d8c94493f81794d73ea", "3c04db996c374b5fb58541a10fc20d25", "b8fecaed88f341ada9c54993fb06eb67", "643834dda8f149559b1a610b610ca37b", "09b958f9ce894514970adc140ed97ff4", "49bc9b93252c459b905ddc7a03dfed71", "145e2e17263f4d798185b561941594e6", "870fafbef85f4009b86a868b253f5691", "ac290a5547fc41a28ee6669a6f79e259", "71d4d789d5f546d0b2a877cf17c588ec", "74186ec4da634d229d9a68af07dd48e0", "0eee54a4e2104331a32be3fcd5b1300e", "fdb0faa095b34c59a0875b1d1262dc24", "d97b2fa0c0b341af9d62cfe8e39694c9", "d1a219a28ac641819659e51e6dc6f467", "47e2aa076a7540deb8e85d2596b9c2cc", "f105c630bd4548ee953bfcb321194118", "d41c6bb5990f4dd690d44f226d05fd74", "7660d0e619e14f5b91fc04dee179bb4c", "f12ba11fb38e4df98b96847d28e0c456", "b858da19009a4bb795ccf8e5edd321c5", "e07084d61862429989a8c1d75b0204c3", "4e19113ec8824d7dbbd9ba93dd92cbb6", "4f0c1475c59e4880b06023f0c919c320", "703e40ca5e1b4077a9d21a3927ab075b", "5f64b257808d462bbabfb53c29bfba4d", "ae32cbf6e2544ab4bc61dfc54ba937c4" ] }, "id": "pT320YaDqXpA", "outputId": "b8330b80-0072-40f8-a008-50537c372594" }, "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "README.md: 0%| | 0.00/12.1k [00:00']).unsqueeze(1).unsqueeze(2)\n", " tgt_mask = (tgt != word2idx['']).unsqueeze(1).unsqueeze(3)\n", " seq_len = tgt.size(1)\n", " nopeak_mask = (1 - torch.triu(torch.ones(1, seq_len, seq_len), diagonal=1)).bool().to(device)\n", " tgt_mask = tgt_mask & nopeak_mask\n", " return src_mask, tgt_mask\n", "\n", " def forward(self, src, tgt):\n", " src_mask, tgt_mask = self.generate_mask(src, tgt)\n", " src_embedded = self.dropout(self.pos_encoding(self.src_embedding(src) * math.sqrt(self.d_model)))\n", " tgt_embedded = self.dropout(self.pos_encoding(self.tgt_embedding(tgt) * math.sqrt(self.d_model)))\n", "\n", " enc_output = src_embedded\n", " for enc_layer in self.encoder_layers:\n", " enc_output = enc_layer(enc_output, src_mask)\n", "\n", " dec_output = tgt_embedded\n", " for dec_layer in self.decoder_layers:\n", " dec_output = dec_layer(dec_output, enc_output, src_mask, tgt_mask)\n", "\n", " return self.fc_out(dec_output)\n", "\n", "print(\"Revised Chunk 2 (Fourth Iteration) completed: Transformer model fixed with max_len=52.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "gyPdrhT2qYwc", "outputId": "92531bf9-1692-4df5-9f13-2bd7568a08c8" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Revised Chunk 2 (Fourth Iteration) completed: Transformer model fixed with max_len=52.\n" ] } ] }, { "cell_type": "code", "source": [ "vocab_size = len(word2idx)\n", "model = Transformer(\n", " src_vocab_size=vocab_size,\n", " tgt_vocab_size=vocab_size,\n", " d_model=256,\n", " num_heads=8,\n", " num_layers=3,\n", " d_ff=1024,\n", " max_len=52,\n", " dropout=0.1\n", ").to(device)\n", "\n", "# Loss and optimizer\n", "criterion = nn.CrossEntropyLoss(ignore_index=word2idx[''])\n", "optimizer = optim.Adam(model.parameters(), lr=0.0001)\n", "\n", "# Training loop with progress feedback\n", "def train(model, data, epochs=20):\n", " model.train()\n", " total_batches = len(data) // batch_size + (1 if len(data) % batch_size else 0)\n", " print(f\"Total batches per epoch: {total_batches}\")\n", " for epoch in range(epochs):\n", " total_loss = 0\n", " for batch_idx, (src_batch, tgt_batch) in enumerate(get_batches(data, batch_size, max_len=52), 1):\n", " if batch_idx % 100 == 0: # Printing every 100 batches for feedback\n", " print(f\"Epoch {epoch + 1}, Batch {batch_idx}/{total_batches} \")\n", " optimizer.zero_grad()\n", " output = model(src_batch, tgt_batch[:, :-1])\n", " loss = criterion(output.view(-1, vocab_size), tgt_batch[:, 1:].reshape(-1))\n", " loss.backward()\n", " optimizer.step()\n", " total_loss += loss.item()\n", " avg_loss = total_loss / total_batches\n", " print(f\"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}\")\n", "\n", "# Main function\n", "def translate(model, sentence, max_len=52):\n", " model.eval()\n", " with torch.no_grad():\n", " src = text_to_tensor(sentence, word2idx, tokenize, max_len).unsqueeze(0).to(device)\n", " tgt = torch.tensor([word2idx['']], dtype=torch.long).unsqueeze(0).to(device)\n", " for _ in range(max_len):\n", " output = model(src, tgt)\n", " next_token = output[:, -1, :].argmax(dim=-1).item()\n", " if next_token == word2idx['']:\n", " break\n", " tgt = torch.cat([tgt, torch.tensor([[next_token]], dtype=torch.long).to(device)], dim=1)\n", " translated = [idx2word[idx.item()] for idx in tgt[0] if idx.item() in idx2word]\n", " return ' '.join(translated[1:])\n", "\n", "print(\"Starting training...\")\n", "train(model, train_data_list)\n", "print(\"Training completed.\")\n", "\n", "# Testing\n", "test_sentence = \"عمرك رايح المكسيك؟\"\n", "translated = translate(model, test_sentence)\n", "print(f\"Input: {test_sentence}\")\n", "print(f\"Translated: {translated}\")\n", "\n", "print(\"Chunk 3 completed: Training and inference implemented.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "collapsed": true, "id": "xCeVlZJHqcf2", "outputId": "7affbe88-8c41-44bc-ef2f-ffd09f38f4eb" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Starting training...\n", "Total batches per epoch: 611\n", "Total batches to process: 611\n", "Epoch 1, Batch 100/611 \n", "Epoch 1, Batch 200/611 \n", "Epoch 1, Batch 300/611 \n", "Epoch 1, Batch 400/611 \n", "Epoch 1, Batch 500/611 \n", "Epoch 1, Batch 600/611 \n", "Epoch 1/20, Loss: 6.1513\n", "Total batches to process: 611\n", "Epoch 2, Batch 100/611 \n", "Epoch 2, Batch 200/611 \n", "Epoch 2, Batch 300/611 \n", "Epoch 2, Batch 400/611 \n", "Epoch 2, Batch 500/611 \n", "Epoch 2, Batch 600/611 \n", "Epoch 2/20, Loss: 5.1121\n", "Total batches to process: 611\n", "Epoch 3, Batch 100/611 \n", "Epoch 3, Batch 200/611 \n", "Epoch 3, Batch 300/611 \n", "Epoch 3, Batch 400/611 \n", "Epoch 3, Batch 500/611 \n", "Epoch 3, Batch 600/611 \n", "Epoch 3/20, Loss: 4.6931\n", "Total batches to process: 611\n", "Epoch 4, Batch 100/611 \n", "Epoch 4, Batch 200/611 \n", "Epoch 4, Batch 300/611 \n", "Epoch 4, Batch 400/611 \n", "Epoch 4, Batch 500/611 \n", "Epoch 4, Batch 600/611 \n", "Epoch 4/20, Loss: 4.3833\n", "Total batches to process: 611\n", "Epoch 5, Batch 100/611 \n", "Epoch 5, Batch 200/611 \n", "Epoch 5, Batch 300/611 \n", "Epoch 5, Batch 400/611 \n", "Epoch 5, Batch 500/611 \n", "Epoch 5, Batch 600/611 \n", "Epoch 5/20, Loss: 4.1299\n", "Total batches to process: 611\n", "Epoch 6, Batch 100/611 \n", "Epoch 6, Batch 200/611 \n", "Epoch 6, Batch 300/611 \n", "Epoch 6, Batch 400/611 \n", "Epoch 6, Batch 500/611 \n", "Epoch 6, Batch 600/611 \n", "Epoch 6/20, Loss: 3.9104\n", "Total batches to process: 611\n", "Epoch 7, Batch 100/611 \n", "Epoch 7, Batch 200/611 \n", "Epoch 7, Batch 300/611 \n", "Epoch 7, Batch 400/611 \n", "Epoch 7, Batch 500/611 \n", "Epoch 7, Batch 600/611 \n", "Epoch 7/20, Loss: 3.7017\n", "Total batches to process: 611\n", "Epoch 8, Batch 100/611 \n", "Epoch 8, Batch 200/611 \n", "Epoch 8, Batch 300/611 \n", "Epoch 8, Batch 400/611 \n", "Epoch 8, Batch 500/611 \n", "Epoch 8, Batch 600/611 \n", "Epoch 8/20, Loss: 3.5075\n", "Total batches to process: 611\n", "Epoch 9, Batch 100/611 \n", "Epoch 9, Batch 200/611 \n", "Epoch 9, Batch 300/611 \n", "Epoch 9, Batch 400/611 \n", "Epoch 9, Batch 500/611 \n", "Epoch 9, Batch 600/611 \n", "Epoch 9/20, Loss: 3.3238\n", "Total batches to process: 611\n", "Epoch 10, Batch 100/611 \n", "Epoch 10, Batch 200/611 \n", "Epoch 10, Batch 300/611 \n", "Epoch 10, Batch 400/611 \n", "Epoch 10, Batch 500/611 \n", "Epoch 10, Batch 600/611 \n", "Epoch 10/20, Loss: 3.1490\n", "Total batches to process: 611\n", "Epoch 11, Batch 100/611 \n", "Epoch 11, Batch 200/611 \n", "Epoch 11, Batch 300/611 \n", "Epoch 11, Batch 400/611 \n", "Epoch 11, Batch 500/611 \n", "Epoch 11, Batch 600/611 \n", "Epoch 11/20, Loss: 2.9762\n", "Total batches to process: 611\n", "Epoch 12, Batch 100/611 \n", "Epoch 12, Batch 200/611 \n", "Epoch 12, Batch 300/611 \n", "Epoch 12, Batch 400/611 \n", "Epoch 12, Batch 500/611 \n", "Epoch 12, Batch 600/611 \n", "Epoch 12/20, Loss: 2.8136\n", "Total batches to process: 611\n", "Epoch 13, Batch 100/611 \n", "Epoch 13, Batch 200/611 \n", "Epoch 13, Batch 300/611 \n", "Epoch 13, Batch 400/611 \n", "Epoch 13, Batch 500/611 \n", "Epoch 13, Batch 600/611 \n", "Epoch 13/20, Loss: 2.6486\n", "Total batches to process: 611\n", "Epoch 14, Batch 100/611 \n", "Epoch 14, Batch 200/611 \n", "Epoch 14, Batch 300/611 \n", "Epoch 14, Batch 400/611 \n", "Epoch 14, Batch 500/611 \n", "Epoch 14, Batch 600/611 \n", "Epoch 14/20, Loss: 2.4945\n", "Total batches to process: 611\n", "Epoch 15, Batch 100/611 \n", "Epoch 15, Batch 200/611 \n", "Epoch 15, Batch 300/611 \n", "Epoch 15, Batch 400/611 \n", "Epoch 15, Batch 500/611 \n", "Epoch 15, Batch 600/611 \n", "Epoch 15/20, Loss: 2.3394\n", "Total batches to process: 611\n", "Epoch 16, Batch 100/611 \n", "Epoch 16, Batch 200/611 \n", "Epoch 16, Batch 300/611 \n", "Epoch 16, Batch 400/611 \n", "Epoch 16, Batch 500/611 \n", "Epoch 16, Batch 600/611 \n", "Epoch 16/20, Loss: 2.1968\n", "Total batches to process: 611\n", "Epoch 17, Batch 100/611 \n", "Epoch 17, Batch 200/611 \n", "Epoch 17, Batch 300/611 \n", "Epoch 17, Batch 400/611 \n", "Epoch 17, Batch 500/611 \n", "Epoch 17, Batch 600/611 \n", "Epoch 17/20, Loss: 2.0583\n", "Total batches to process: 611\n", "Epoch 18, Batch 100/611 \n", "Epoch 18, Batch 200/611 \n", "Epoch 18, Batch 300/611 \n", "Epoch 18, Batch 400/611 \n", "Epoch 18, Batch 500/611 \n", "Epoch 18, Batch 600/611 \n", "Epoch 18/20, Loss: 1.9222\n", "Total batches to process: 611\n", "Epoch 19, Batch 100/611 \n", "Epoch 19, Batch 200/611 \n", "Epoch 19, Batch 300/611 \n", "Epoch 19, Batch 400/611 \n", "Epoch 19, Batch 500/611 \n", "Epoch 19, Batch 600/611 \n", "Epoch 19/20, Loss: 1.7939\n", "Total batches to process: 611\n", "Epoch 20, Batch 100/611 \n", "Epoch 20, Batch 200/611 \n", "Epoch 20, Batch 300/611 \n", "Epoch 20, Batch 400/611 \n", "Epoch 20, Batch 500/611 \n", "Epoch 20, Batch 600/611 \n", "Epoch 20/20, Loss: 1.6632\n", "Training completed.\n", "Input: عمرك رايح المكسيك؟\n", "Translated: I'm going to take a \n", "Chunk 3 completed: Training and inference implemented.\n" ] } ] }, { "cell_type": "code", "source": [ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "# Instantiate the model (assuming train_dataset is already defined)\n", "model = Transformer(\n", " src_vocab_size=vocab_size,\n", " tgt_vocab_size=vocab_size\n", ").to(device)\n", "\n", "# Load model checkpoint and set to evaluation mode\n", "model.load_state_dict(torch.load(\"habibi.pth\", map_location=device))\n", "model.eval()\n", "\n", "def gradio_translate(text):\n", " return translate(model, text)\n", "\n", "interface = gr.Interface(\n", " fn=gradio_translate,\n", " inputs=gr.Textbox(lines=2, placeholder=\"Enter Arabic sentence here...\"),\n", " outputs=\"text\",\n", " title=\"Arabic to English Translator\",\n", " description=\"Translate Arabic sentences to English using a Transformer model.\"\n", ")\n", "\n", "interface.launch()\n", "\n", "print(\"Chunk 4 completed: Gradio interface deployed.\")" ], "metadata": { "id": "9Nr_qAIFyGMP", "colab": { "base_uri": "https://localhost:8080/", "height": 663 }, "outputId": "ac1a4b5b-81d1-42f9-c1c9-4278ffe14285" }, "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n", "\n", "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", "* Running on public URL: https://b4e3cb0ecd69a69020.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "
" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Chunk 4 completed: Gradio interface deployed.\n" ] } ] }, { "cell_type": "code", "source": [ "torch.save(model.state_dict(), \"habibi.pth\")\n", "print(\"Model checkpoint saved as transformer_code.pth\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7aRMjC_eb9Cd", "outputId": "f1b9217f-ca2b-4e88-f375-29aaa2ccf17c" }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Model checkpoint saved as transformer_code.pth\n" ] } ] } ] }