diff --git a/data/2024-09-21-14-44-50.m4a b/data/2024-09-21-14-44-50.m4a new file mode 100644 index 0000000000000000000000000000000000000000..099b8669d3f8acb4bd4192d06bab7d090d14c5b9 Binary files /dev/null and b/data/2024-09-21-14-44-50.m4a differ diff --git a/data/2024-10-12-17-17-28.m4a b/data/2024-10-12-17-17-28.m4a new file mode 100644 index 0000000000000000000000000000000000000000..ac7dcf023a97b9aaf185693bb6f25f9b1b31e936 Binary files /dev/null and b/data/2024-10-12-17-17-28.m4a differ diff --git a/data/2024-10-12-17-18-41.m4a b/data/2024-10-12-17-18-41.m4a new file mode 100644 index 0000000000000000000000000000000000000000..da501075da9d932d8951f2fa32a3ff7ff84014a8 Binary files /dev/null and b/data/2024-10-12-17-18-41.m4a differ diff --git a/data/2024-10-12-17-21-09.m4a b/data/2024-10-12-17-21-09.m4a new file mode 100644 index 0000000000000000000000000000000000000000..4585aac64e27ac35263570a3c17634fdf5895d6b Binary files /dev/null and b/data/2024-10-12-17-21-09.m4a differ diff --git a/data/2024-10-12-17-21-31.m4a b/data/2024-10-12-17-21-31.m4a new file mode 100644 index 0000000000000000000000000000000000000000..83a4002d871bee376f5a9545c144593e329fa367 Binary files /dev/null and b/data/2024-10-12-17-21-31.m4a differ diff --git a/data/2024-10-12-17-21-51.m4a b/data/2024-10-12-17-21-51.m4a new file mode 100644 index 0000000000000000000000000000000000000000..99e924d084ec041ca1adfcaae2abea01ddb0471f Binary files /dev/null and b/data/2024-10-12-17-21-51.m4a differ diff --git a/data/2024-10-12-17-22-08.m4a b/data/2024-10-12-17-22-08.m4a new file mode 100644 index 0000000000000000000000000000000000000000..02a8560ba41c19a2290bb836d490d3632d5e71aa Binary files /dev/null and b/data/2024-10-12-17-22-08.m4a differ diff --git a/data/2024-10-12-17-22-20.m4a b/data/2024-10-12-17-22-20.m4a new file mode 100644 index 0000000000000000000000000000000000000000..351233678ff038aad0b71496bc34378fd649d8f9 Binary files /dev/null and b/data/2024-10-12-17-22-20.m4a differ diff --git a/data/2024-10-12-17-22-31.m4a b/data/2024-10-12-17-22-31.m4a new file mode 100644 index 0000000000000000000000000000000000000000..e200112bf3d71d4e6fd330a0eb8357d3163046db Binary files /dev/null and b/data/2024-10-12-17-22-31.m4a differ diff --git a/data/2024-10-12-17-26-23.m4a b/data/2024-10-12-17-26-23.m4a new file mode 100644 index 0000000000000000000000000000000000000000..05c7425d63c41e0f867777c1ba030793bb241587 Binary files /dev/null and b/data/2024-10-12-17-26-23.m4a differ diff --git a/data/2024-10-12-17-27-28.m4a b/data/2024-10-12-17-27-28.m4a new file mode 100644 index 0000000000000000000000000000000000000000..97a7eda66c1b2441b074ec5c0d90fb1972d713b8 Binary files /dev/null and b/data/2024-10-12-17-27-28.m4a differ diff --git a/data/2024-10-12-17-27-52.m4a b/data/2024-10-12-17-27-52.m4a new file mode 100644 index 0000000000000000000000000000000000000000..16f1694964c5e37cad623e050ee7c92ce6251986 Binary files /dev/null and b/data/2024-10-12-17-27-52.m4a differ diff --git a/data/2024-10-12-17-28-24.m4a b/data/2024-10-12-17-28-24.m4a new file mode 100644 index 0000000000000000000000000000000000000000..f716c1923dbe6cee18030519e4e99783b1651a1d Binary files /dev/null and b/data/2024-10-12-17-28-24.m4a differ diff --git a/data/2024-10-12-17-28-56.m4a b/data/2024-10-12-17-28-56.m4a new file mode 100644 index 0000000000000000000000000000000000000000..93cd8866a32a22b8122c5e99523bb0151e93f883 Binary files /dev/null and b/data/2024-10-12-17-28-56.m4a differ diff --git a/data/2024-10-12-17-29-33.m4a b/data/2024-10-12-17-29-33.m4a new file mode 100644 index 0000000000000000000000000000000000000000..fcbb9b4d24126b689030e67f45ed1923d8b601ba Binary files /dev/null and b/data/2024-10-12-17-29-33.m4a differ diff --git a/data/2024-10-12-17-30-50.m4a b/data/2024-10-12-17-30-50.m4a new file mode 100644 index 0000000000000000000000000000000000000000..df2aa404029551c03d354337bf815d0298b1bed8 Binary files /dev/null and b/data/2024-10-12-17-30-50.m4a differ diff --git a/data/2024-10-12-17-30-57.m4a b/data/2024-10-12-17-30-57.m4a new file mode 100644 index 0000000000000000000000000000000000000000..8eff7917d0499c4428d6a4bcdb96079218b9a8d1 Binary files /dev/null and b/data/2024-10-12-17-30-57.m4a differ diff --git a/data/2024-10-12-17-32-03.m4a b/data/2024-10-12-17-32-03.m4a new file mode 100644 index 0000000000000000000000000000000000000000..238bcad83ed7e1fcd20f2846194878a9f98ece8a Binary files /dev/null and b/data/2024-10-12-17-32-03.m4a differ diff --git a/data/2024-10-12-17-34-06.m4a b/data/2024-10-12-17-34-06.m4a new file mode 100644 index 0000000000000000000000000000000000000000..e2703869b8abc89d724c535af326085cc4bc3bca Binary files /dev/null and b/data/2024-10-12-17-34-06.m4a differ diff --git a/data/2024-10-12-17-35-14.m4a b/data/2024-10-12-17-35-14.m4a new file mode 100644 index 0000000000000000000000000000000000000000..d456cab719c158edd216c255bfb6f9668daee75b Binary files /dev/null and b/data/2024-10-12-17-35-14.m4a differ diff --git a/data/2024-10-12-17-36-20.m4a b/data/2024-10-12-17-36-20.m4a new file mode 100644 index 0000000000000000000000000000000000000000..ff7db36af05f8e4567d348b27b855aae0d773ae5 Binary files /dev/null and b/data/2024-10-12-17-36-20.m4a differ diff --git a/data/2024-10-12-17-37-30.m4a b/data/2024-10-12-17-37-30.m4a new file mode 100644 index 0000000000000000000000000000000000000000..27d547ddb60038d13a2bca61b3d1741a04d3b240 Binary files /dev/null and b/data/2024-10-12-17-37-30.m4a differ diff --git a/data/2024-10-12-17-38-17.m4a b/data/2024-10-12-17-38-17.m4a new file mode 100644 index 0000000000000000000000000000000000000000..0b9970cdfab83fc5db889e38e85291f506932474 Binary files /dev/null and b/data/2024-10-12-17-38-17.m4a differ diff --git a/data/2024-10-12-17-38-36.m4a b/data/2024-10-12-17-38-36.m4a new file mode 100644 index 0000000000000000000000000000000000000000..57df1dfeed88f5f81e876ce6b477bc6164a5b9f3 Binary files /dev/null and b/data/2024-10-12-17-38-36.m4a differ diff --git a/data/2024-10-12-17-39-00.m4a b/data/2024-10-12-17-39-00.m4a new file mode 100644 index 0000000000000000000000000000000000000000..207023a5c9e1f9b1b376b94c20a37d42d10df131 Binary files /dev/null and b/data/2024-10-12-17-39-00.m4a differ diff --git a/data/2024-10-12-17-41-38.m4a b/data/2024-10-12-17-41-38.m4a new file mode 100644 index 0000000000000000000000000000000000000000..eec263d87240b8ce21f9b69c6ad50867c0a3ed23 Binary files /dev/null and b/data/2024-10-12-17-41-38.m4a differ diff --git a/data/2024-10-12-17-42-24.m4a b/data/2024-10-12-17-42-24.m4a new file mode 100644 index 0000000000000000000000000000000000000000..6131cc93ef8ea96cc128d86d5311350f27cde594 Binary files /dev/null and b/data/2024-10-12-17-42-24.m4a differ diff --git a/data/2024-10-12-17-43-22.m4a b/data/2024-10-12-17-43-22.m4a new file mode 100644 index 0000000000000000000000000000000000000000..0e91f709b3b7618f12e59091c2a4ab42fa4faee3 Binary files /dev/null and b/data/2024-10-12-17-43-22.m4a differ diff --git a/data/2024-10-12-17-44-39.m4a b/data/2024-10-12-17-44-39.m4a new file mode 100644 index 0000000000000000000000000000000000000000..79890c38a38441ef4cf3fb23cab9c0282769e481 Binary files /dev/null and b/data/2024-10-12-17-44-39.m4a differ diff --git a/data/2024-10-12-17-45-00.m4a b/data/2024-10-12-17-45-00.m4a new file mode 100644 index 0000000000000000000000000000000000000000..2eb2b37084316e4afa0a6ced0dc07f059eab6a70 Binary files /dev/null and b/data/2024-10-12-17-45-00.m4a differ diff --git a/data/2024-10-12-17-46-43.m4a b/data/2024-10-12-17-46-43.m4a new file mode 100644 index 0000000000000000000000000000000000000000..35efc0a15a464de755e2f72c261ce0f65f18f3e9 Binary files /dev/null and b/data/2024-10-12-17-46-43.m4a differ diff --git a/data/2024-10-12-17-47-04.m4a b/data/2024-10-12-17-47-04.m4a new file mode 100644 index 0000000000000000000000000000000000000000..be01b42842b02eca76ba6ef483e5cefe5f294be4 Binary files /dev/null and b/data/2024-10-12-17-47-04.m4a differ diff --git a/data/2024-10-12-17-47-17.m4a b/data/2024-10-12-17-47-17.m4a new file mode 100644 index 0000000000000000000000000000000000000000..f08af9e1daed17510c5cd7f6caaacf8160266e6c Binary files /dev/null and b/data/2024-10-12-17-47-17.m4a differ diff --git a/data/processed_audio__audio1_audio1_0.wav b/data/processed_audio__audio1_audio1_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..73607e0ff02beb33897e13408e12debeda2952aa Binary files /dev/null and b/data/processed_audio__audio1_audio1_0.wav differ diff --git a/data/processed_audio__audio1_audio1_1.wav b/data/processed_audio__audio1_audio1_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..b0e2d0feb02074a95df8fdca688c169da7f6745d Binary files /dev/null and b/data/processed_audio__audio1_audio1_1.wav differ diff --git a/data/processed_audio__audio1_audio1_2.wav b/data/processed_audio__audio1_audio1_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..eb316ac2cd436812c5d0fac6d9ce01ebd3ce305f Binary files /dev/null and b/data/processed_audio__audio1_audio1_2.wav differ diff --git a/data/processed_audio__audio1_audio1_3.wav b/data/processed_audio__audio1_audio1_3.wav new file mode 100644 index 0000000000000000000000000000000000000000..7e733f4a6c983c2951ba650f3ec7283570418bde Binary files /dev/null and b/data/processed_audio__audio1_audio1_3.wav differ diff --git a/data/processed_audio__audio1_audio1_4.wav b/data/processed_audio__audio1_audio1_4.wav new file mode 100644 index 0000000000000000000000000000000000000000..6364216ef3d16e836c729ce967b87ba0390575f1 Binary files /dev/null and b/data/processed_audio__audio1_audio1_4.wav differ diff --git a/data/processed_audio__audio1_audio1_5.wav b/data/processed_audio__audio1_audio1_5.wav new file mode 100644 index 0000000000000000000000000000000000000000..773b1b629632c7a3a8dea5e929ec50be033db5b9 Binary files /dev/null and b/data/processed_audio__audio1_audio1_5.wav differ diff --git a/data/processed_audio__audio1_audio1_6.wav b/data/processed_audio__audio1_audio1_6.wav new file mode 100644 index 0000000000000000000000000000000000000000..5db0d3125d486200d46d3aef48738fd5b23bc7f3 Binary files /dev/null and b/data/processed_audio__audio1_audio1_6.wav differ diff --git a/data/processed_audio__audio1_audio1_7.wav b/data/processed_audio__audio1_audio1_7.wav new file mode 100644 index 0000000000000000000000000000000000000000..324627467f36af2921db18226893e3c30c762739 Binary files /dev/null and b/data/processed_audio__audio1_audio1_7.wav differ diff --git a/data/processed_audio__audio1_audio1_8.wav b/data/processed_audio__audio1_audio1_8.wav new file mode 100644 index 0000000000000000000000000000000000000000..6fee57b6c9360b0d356a87ee4f6f4e0262c9f7b3 Binary files /dev/null and b/data/processed_audio__audio1_audio1_8.wav differ diff --git a/data/processed_audio__audio1_audio1_9.wav b/data/processed_audio__audio1_audio1_9.wav new file mode 100644 index 0000000000000000000000000000000000000000..1159bb6a3c20ae45ce2b1a8af539a1865117c0d4 Binary files /dev/null and b/data/processed_audio__audio1_audio1_9.wav differ diff --git a/data/processed_audio__audio2_audio2_0.wav b/data/processed_audio__audio2_audio2_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..e6473194c8caec4a1c0065757a6245ee8ece3a81 Binary files /dev/null and b/data/processed_audio__audio2_audio2_0.wav differ diff --git a/data/processed_audio__audio2_audio2_1.wav b/data/processed_audio__audio2_audio2_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..4d93481393f8ef45d5b0f70b10748e442a3b98b0 Binary files /dev/null and b/data/processed_audio__audio2_audio2_1.wav differ diff --git a/data/processed_audio__audio2_audio2_2.wav b/data/processed_audio__audio2_audio2_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..24434e58d4d61c6f6c699c4f8e3084cc8364e339 Binary files /dev/null and b/data/processed_audio__audio2_audio2_2.wav differ diff --git a/data/processed_audio__audio2_audio2_3.wav b/data/processed_audio__audio2_audio2_3.wav new file mode 100644 index 0000000000000000000000000000000000000000..a9347c04ddbb8b8c2bf44090569865939a25269d Binary files /dev/null and b/data/processed_audio__audio2_audio2_3.wav differ diff --git a/data/processed_audio__audio2_audio2_4.wav b/data/processed_audio__audio2_audio2_4.wav new file mode 100644 index 0000000000000000000000000000000000000000..4a52fc89a344a806a4aa41cdba0355a1b440e5bb Binary files /dev/null and b/data/processed_audio__audio2_audio2_4.wav differ diff --git a/data/processed_audio__audio2_audio2_5.wav b/data/processed_audio__audio2_audio2_5.wav new file mode 100644 index 0000000000000000000000000000000000000000..60d775ce2ccf0d39ceab727b1eeaf2ae9fea48a5 Binary files /dev/null and b/data/processed_audio__audio2_audio2_5.wav differ diff --git a/data/processed_audio__audio2_audio2_6.wav b/data/processed_audio__audio2_audio2_6.wav new file mode 100644 index 0000000000000000000000000000000000000000..800a430a8d18db5af247aaff0b33c424f682cd72 Binary files /dev/null and b/data/processed_audio__audio2_audio2_6.wav differ diff --git a/data/processed_audio__audio2_audio2_7.wav b/data/processed_audio__audio2_audio2_7.wav new file mode 100644 index 0000000000000000000000000000000000000000..00e71417c1e9c3e1db6481bbd861ad90ce758bca Binary files /dev/null and b/data/processed_audio__audio2_audio2_7.wav differ diff --git a/data/processed_audio__audio2_audio2_8.wav b/data/processed_audio__audio2_audio2_8.wav new file mode 100644 index 0000000000000000000000000000000000000000..8477124d1fcb68977e54843809ff42688510ac78 Binary files /dev/null and b/data/processed_audio__audio2_audio2_8.wav differ diff --git a/data/processed_audio__audio2_audio2_9.wav b/data/processed_audio__audio2_audio2_9.wav new file mode 100644 index 0000000000000000000000000000000000000000..de1e5d186fabe7b56d7b2b807e928706c63f7739 Binary files /dev/null and b/data/processed_audio__audio2_audio2_9.wav differ diff --git a/data/processed_audio__audio3_audio3_0.wav b/data/processed_audio__audio3_audio3_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..7e04fe6063bb5786ac0caf4b681aa037d8dbf7e0 Binary files /dev/null and b/data/processed_audio__audio3_audio3_0.wav differ diff --git a/data/processed_audio__audio3_audio3_1.wav b/data/processed_audio__audio3_audio3_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..017b51e1a38e67cdbbf02492c631550b6b0fa628 Binary files /dev/null and b/data/processed_audio__audio3_audio3_1.wav differ diff --git a/data/processed_audio__audio3_audio3_2.wav b/data/processed_audio__audio3_audio3_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..e8b186185f2710f77ffd1461d0c4ebaf44a6609c Binary files /dev/null and b/data/processed_audio__audio3_audio3_2.wav differ diff --git a/data/processed_audio__audio3_audio3_3.wav b/data/processed_audio__audio3_audio3_3.wav new file mode 100644 index 0000000000000000000000000000000000000000..b3ccfaf70036354dbd0f2779f8599c0849e87ced Binary files /dev/null and b/data/processed_audio__audio3_audio3_3.wav differ diff --git a/data/processed_audio__audio3_audio3_4.wav b/data/processed_audio__audio3_audio3_4.wav new file mode 100644 index 0000000000000000000000000000000000000000..e16d0d84d7bd8e951d7021534798c31ccf9aaf6c Binary files /dev/null and b/data/processed_audio__audio3_audio3_4.wav differ diff --git a/data/processed_audio__audio3_audio3_5.wav b/data/processed_audio__audio3_audio3_5.wav new file mode 100644 index 0000000000000000000000000000000000000000..945f666e4b29826dd77604f681ff3a6fb61ba1b7 Binary files /dev/null and b/data/processed_audio__audio3_audio3_5.wav differ diff --git a/data/processed_audio__audio3_audio3_6.wav b/data/processed_audio__audio3_audio3_6.wav new file mode 100644 index 0000000000000000000000000000000000000000..6b9f07e8dd2058199338f1c22a36196315c3acfe Binary files /dev/null and b/data/processed_audio__audio3_audio3_6.wav differ diff --git a/data/processed_audio__audio3_audio3_7.wav b/data/processed_audio__audio3_audio3_7.wav new file mode 100644 index 0000000000000000000000000000000000000000..19eb2a4e542c7a01f2eb31602911599b69792747 Binary files /dev/null and b/data/processed_audio__audio3_audio3_7.wav differ diff --git a/data/processed_audio__audio3_audio3_8.wav b/data/processed_audio__audio3_audio3_8.wav new file mode 100644 index 0000000000000000000000000000000000000000..32f8d3410a3f85887bfa294b7fcba027b0bfd204 Binary files /dev/null and b/data/processed_audio__audio3_audio3_8.wav differ diff --git a/data/processed_audio__audio3_audio3_9.wav b/data/processed_audio__audio3_audio3_9.wav new file mode 100644 index 0000000000000000000000000000000000000000..f3098e5179123312562c41f0971fe835836a188d Binary files /dev/null and b/data/processed_audio__audio3_audio3_9.wav differ diff --git a/data/processed_audio__audio3_audio4_0.wav b/data/processed_audio__audio3_audio4_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..bb0bdf062ffac239121413d53dcc53e8a8c4b537 Binary files /dev/null and b/data/processed_audio__audio3_audio4_0.wav differ diff --git a/data/processed_audio__audio3_audio4_1.wav b/data/processed_audio__audio3_audio4_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..90f48e741b5ff6d7798120a2406f0ce5b09d01a8 Binary files /dev/null and b/data/processed_audio__audio3_audio4_1.wav differ diff --git a/data/processed_audio__audio3_audio4_2.wav b/data/processed_audio__audio3_audio4_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..2d631dde87b46f82ee9ba2b4e5437006b700c954 Binary files /dev/null and b/data/processed_audio__audio3_audio4_2.wav differ diff --git a/data/processed_audio__audio3_audio4_3.wav b/data/processed_audio__audio3_audio4_3.wav new file mode 100644 index 0000000000000000000000000000000000000000..f897e8364ea0f90bc775835f79998d8099ed8e94 Binary files /dev/null and b/data/processed_audio__audio3_audio4_3.wav differ diff --git a/data/processed_audio__audio3_audio4_4.wav b/data/processed_audio__audio3_audio4_4.wav new file mode 100644 index 0000000000000000000000000000000000000000..94fec7541564b9c46217196691b343c8aff28ee2 Binary files /dev/null and b/data/processed_audio__audio3_audio4_4.wav differ diff --git a/data/processed_audio__audio3_audio4_5.wav b/data/processed_audio__audio3_audio4_5.wav new file mode 100644 index 0000000000000000000000000000000000000000..7319ccc83d40993a7ccfe9320dfd09d59d132192 Binary files /dev/null and b/data/processed_audio__audio3_audio4_5.wav differ diff --git a/data/processed_audio__audio3_audio4_6.wav b/data/processed_audio__audio3_audio4_6.wav new file mode 100644 index 0000000000000000000000000000000000000000..c0cd55143723284076a1b5ca9ae3e01f3f074b99 Binary files /dev/null and b/data/processed_audio__audio3_audio4_6.wav differ diff --git a/data/processed_audio__audio3_audio4_7.wav b/data/processed_audio__audio3_audio4_7.wav new file mode 100644 index 0000000000000000000000000000000000000000..c4413ef5cdf833be2ded3231c64eaba1907658fd Binary files /dev/null and b/data/processed_audio__audio3_audio4_7.wav differ diff --git a/data/processed_audio__audio3_audio4_8.wav b/data/processed_audio__audio3_audio4_8.wav new file mode 100644 index 0000000000000000000000000000000000000000..e44d4bf43381273520b664829a59557fdad207a7 Binary files /dev/null and b/data/processed_audio__audio3_audio4_8.wav differ diff --git a/data/processed_audio__audio3_audio4_9.wav b/data/processed_audio__audio3_audio4_9.wav new file mode 100644 index 0000000000000000000000000000000000000000..cad17af0853cb7e13df56519d85d5f1ac7d18fff Binary files /dev/null and b/data/processed_audio__audio3_audio4_9.wav differ diff --git a/model_v1.pth b/model_v1.pth new file mode 100644 index 0000000000000000000000000000000000000000..faa7bd93e54c8efdea2f1da453d2d283202fcb00 --- /dev/null +++ b/model_v1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7a58c5149d6e0ffce92cd08d6dad74f9e6f2c667571eed883c2a92e86a4b9b +size 134923 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..408f3b140a2ef9e1e895036dace37639fbcea426 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,51 @@ +appnope==0.1.4 +asttokens==2.4.1 +comm==0.2.2 +contourpy==1.3.0 +cycler==0.12.1 +debugpy==1.8.5 +decorator==5.1.1 +executing==2.1.0 +filelock==3.16.0 +fonttools==4.53.1 +fsspec==2024.9.0 +ipykernel==6.29.5 +ipython==8.27.0 +jedi==0.19.1 +Jinja2==3.1.4 +jupyter_client==8.6.2 +jupyter_core==5.7.2 +kiwisolver==1.4.7 +MarkupSafe==2.1.5 +matplotlib==3.9.2 +matplotlib-inline==0.1.7 +mpmath==1.3.0 +nest-asyncio==1.6.0 +networkx==3.3 +numpy==2.1.1 +packaging==24.1 +pandas==2.2.2 +parso==0.8.4 +pexpect==4.9.0 +pillow==10.4.0 +platformdirs==4.3.3 +prompt_toolkit==3.0.47 +psutil==6.0.0 +ptyprocess==0.7.0 +pure_eval==0.2.3 +pydub==0.25.1 +Pygments==2.18.0 +pyparsing==3.1.4 +python-dateutil==2.9.0.post0 +pytz==2024.2 +pyzmq==26.2.0 +six==1.16.0 +stack-data==0.6.3 +sympy==1.13.2 +torch==2.2.2 +torchaudio==2.2.2 +tornado==6.4.1 +traitlets==5.14.3 +typing_extensions==4.12.2 +tzdata==2024.1 +wcwidth==0.2.13 diff --git a/rnn_torchview b/rnn_torchview new file mode 100644 index 0000000000000000000000000000000000000000..455950d428e37ae78b94d119bb5a2b20c707151b --- /dev/null +++ b/rnn_torchview @@ -0,0 +1,72 @@ +strict digraph model { + graph [ordering=in rankdir=TB size="12.0,12.0"] + node [align=left fontname="Linux libertine" fontsize=10 height=0.2 margin=0 ranksep=0.1 shape=plaintext style=filled] + edge [fontsize=10] + 0 [label=< + + +
input-tensor
depth:0
(1, 10, 128)
> fillcolor=lightyellow] + 1 [label=< + + + + + + + + + + +
RNN
depth:1
input:(1, 10, 128)
output: (1, 10, 128), (1, 1, 128)
> fillcolor=darkseagreen1] + 2 [label=< + + + + + + + + + + +
Dropout
depth:1
input:(1, 10, 128)
output: (1, 10, 128)
> fillcolor=darkseagreen1] + 3 [label=< + + + + + + + + + + +
__getitem__
depth:1
input:(1, 10, 128)
output: (1, 128)
> fillcolor=aliceblue] + 4 [label=< + + + + + + + + + + +
Linear
depth:1
input:(1, 128)
output: (1, 1)
> fillcolor=darkseagreen1] + 5 [label=< + + +
output-tensor
depth:0
(1, 1)
> fillcolor=lightyellow] + 0 -> 1 + 1 -> 2 + 2 -> 3 + 3 -> 4 + 4 -> 5 +} diff --git a/rnn_torchview.png b/rnn_torchview.png new file mode 100644 index 0000000000000000000000000000000000000000..4d5242d600d6c69c8fb37cd560e2ea5d6d6151e8 Binary files /dev/null and b/rnn_torchview.png differ diff --git a/train_rnn.ipynb b/train_rnn.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c08b7547a6813ff1bded659a1d770594078c9f35 --- /dev/null +++ b/train_rnn.ipynb @@ -0,0 +1,816 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "from torch.utils.data import Dataset, DataLoader\n", + "import torchaudio\n", + "import numpy as np\n", + "import pandas as pd\n", + "import torchaudio.transforms as transforms\n", + "from pydub import AudioSegment\n", + "import matplotlib.pyplot as plt\n", + "import IPython\n", + "%matplotlib inline\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "IPython.display.Audio(\"./exp1/processed_audio/audio1/audio1_0.wav\")" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "# df work\n", + "\n", + "# create df with file paths and time points\n", + "\n", + "# directories to get audio files from\n", + "audio_dirs = [\"./data\"]\n", + "\n", + "# create list of file paths and time points\n", + "file_paths = []\n", + "time_points = []\n", + "sample_rates = []\n", + "\n", + "for audio_dir in audio_dirs:\n", + " for file in os.listdir(audio_dir):\n", + " if file.endswith(\".wav\"):\n", + " file_paths.append(f\"{audio_dir}/{file}\")\n", + " elif file.endswith(\".m4a\"):\n", + " file_paths.append(f\"{audio_dir}/{file}\")\n", + "\n", + "# time point is length of audio file minus 1\n", + "for file_path in file_paths:\n", + " audio = AudioSegment.from_file(file_path)\n", + " time_points.append(audio.duration_seconds - 1)\n", + "\n", + "for file_path in file_paths:\n", + " audio = AudioSegment.from_file(file_path)\n", + " sample_rates.append(audio.frame_rate)\n", + "\n", + "\n", + "\n", + "# create df\n", + "df = pd.DataFrame({\"file_path\": file_paths, \"time_point\": time_points, \"sample_rate\": sample_rates})\n", + "\n", + "\n", + "# train test split\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
file_pathtime_pointsample_rate
0./data/2024-10-12-17-22-31.m4a6.5528000
1./data/2024-10-12-17-47-04.m4a6.5528000
2./data/processed_audio__audio1_audio1_2.wav5.00048000
3./data/2024-10-12-17-29-33.m4a16.9208000
4./data/processed_audio__audio1_audio1_1.wav7.00048000
............
68./data/2024-10-12-17-46-43.m4a8.3448000
69./data/2024-10-12-17-28-24.m4a8.4728000
70./data/processed_audio__audio3_audio4_8.wav6.00048000
71./data/processed_audio__audio2_audio2_5.wav8.00048000
72./data/processed_audio__audio2_audio2_3.wav8.00048000
\n", + "

73 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " file_path time_point sample_rate\n", + "0 ./data/2024-10-12-17-22-31.m4a 6.552 8000\n", + "1 ./data/2024-10-12-17-47-04.m4a 6.552 8000\n", + "2 ./data/processed_audio__audio1_audio1_2.wav 5.000 48000\n", + "3 ./data/2024-10-12-17-29-33.m4a 16.920 8000\n", + "4 ./data/processed_audio__audio1_audio1_1.wav 7.000 48000\n", + ".. ... ... ...\n", + "68 ./data/2024-10-12-17-46-43.m4a 8.344 8000\n", + "69 ./data/2024-10-12-17-28-24.m4a 8.472 8000\n", + "70 ./data/processed_audio__audio3_audio4_8.wav 6.000 48000\n", + "71 ./data/processed_audio__audio2_audio2_5.wav 8.000 48000\n", + "72 ./data/processed_audio__audio2_audio2_3.wav 8.000 48000\n", + "\n", + "[73 rows x 3 columns]" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "class SlidingWindowAudioDataset(Dataset):\n", + " def __init__(self, df, sample_rate=16000, window_size=1, hop_length=0.1, n_fft=2048, n_mels=128):\n", + " self.df = df\n", + " self.sample_rate = sample_rate\n", + " self.window_size = window_size\n", + " self.hop_length = int(hop_length * sample_rate)\n", + " self.n_fft = n_fft\n", + " self.n_mels = n_mels\n", + " \n", + " def __len__(self):\n", + " return len(self.df)\n", + "\n", + " def __getitem__(self, idx):\n", + " file_path = self.df.iloc[idx]['file_path']\n", + " label_point_seconds = self.df.iloc[idx]['time_point']\n", + "\n", + " # Load audio file\n", + " waveform, sr = torchaudio.load(file_path)\n", + " \n", + " # Convert to mono if stereo\n", + " if waveform.shape[0] > 1:\n", + " waveform = waveform.mean(dim=0, keepdim=True)\n", + "\n", + " if sr != self.sample_rate:\n", + " resampler = torchaudio.transforms.Resample(sr, self.sample_rate)\n", + " waveform = resampler(waveform)\n", + "\n", + " \n", + " # Calculate mel spectrogram\n", + " mel_spectrogram = torchaudio.transforms.MelSpectrogram(\n", + " sample_rate=self.sample_rate,\n", + " n_fft=self.n_fft,\n", + " hop_length=self.hop_length,\n", + " n_mels=self.n_mels\n", + " )(waveform)\n", + " \n", + " # Apply sliding window\n", + " # Debugging information\n", + " # print(f\"File: {file_path}\")\n", + " # print(f\"Mel Spectrogram shape: {mel_spectrogram.shape}\")\n", + " # print(f\"Sample Rate: {self.sample_rate}\")\n", + " # print(f\"Hop Length: {self.hop_length}\")\n", + " # print(f\"Window Size: {self.window_size}\")\n", + " # print(f\"Waveform shape: {len(waveform[0])}\")\n", + " \n", + " # Calculate number of windows\n", + " total_duration = len(waveform[0]) / self.sample_rate\n", + " window_duration = self.window_size\n", + " hop_duration = self.hop_length / self.sample_rate\n", + " \n", + " num_windows = max(0, int((total_duration - window_duration) // hop_duration + 1))\n", + " \n", + " # print(f\"Total Duration: {total_duration:.2f} seconds\")\n", + " # print(f\"Window Duration: {window_duration} seconds\")\n", + " # print(f\"Hop Duration: {hop_duration:.2f} seconds\")\n", + " # print(f\"Calculated Num Windows: {num_windows}\")\n", + " \n", + " windows = []\n", + " labels = []\n", + " for i in range(num_windows):\n", + " start_time = i * self.hop_length / self.sample_rate\n", + " end_time = start_time + self.window_size\n", + " \n", + " window = mel_spectrogram[:, :, i:i+int(self.window_size * self.sample_rate / self.hop_length)]\n", + " # print(window.shape)\n", + "\n", + " # label point is 85%\n", + " # find middle of window and find the percentage based on the label point\n", + " middle = (start_time + end_time) / 2\n", + " # full time based on 85% mark\n", + " full_time = total_duration / (0.85 * 100) * 100\n", + " # label point\n", + " label = middle / full_time\n", + "\n", + " windows.append(window)\n", + " labels.append(label)\n", + "\n", + " # print(f\"Actual Num Windows: {len(windows)}\")\n", + " # print(\"---\")\n", + "\n", + " # print shape\n", + " # print(torch.stack(windows).shape)\n", + " # print(torch.tensor(labels).shape)\n", + " return torch.stack(windows), torch.tensor(labels)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "class AudioRNN(nn.Module):\n", + " def __init__(self, input_size, hidden_size, output_size, dropout, num_layers=1):\n", + " super(AudioRNN, self).__init__()\n", + " self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)\n", + " self.fc = nn.Linear(hidden_size, output_size)\n", + " self.hidden_size = hidden_size\n", + " self.hidden_state = None\n", + " self.hidden_size = hidden_size\n", + " self.num_layers = num_layers\n", + " self.dropout = nn.Dropout(dropout)\n", + "\n", + " def reset_hidden_state(self):\n", + " self.hidden_state = torch.zeros(self.num_layers, 1, self.hidden_size).to(device)\n", + "\n", + " def forward(self, x):\n", + " out, _ = self.rnn(x, self.hidden_state)\n", + " out = self.dropout(out)\n", + " out = self.fc(out[:, -1, :])\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "def train(model, device, loader, test_loader, criterion, optimizer, epochs, patience=3):\n", + " best_acc = 0\n", + " patience_counter = 0\n", + " epoch_losses = []\n", + " for epoch in range(epochs):\n", + " model.train()\n", + " running_loss = 0.0\n", + " for batch_idx, (data, target) in enumerate(loader):\n", + " for i in range(data.size(1)):\n", + " # the label we want is only the index i of the target\n", + " target_batch = target[:, i].to(device).float()\n", + " # get the data for the index i\n", + " data_batch = data[:, i, :, :].to(device)\n", + "\n", + " model.reset_hidden_state()\n", + "\n", + " data_batch = data_batch.squeeze(1)\n", + " data_batch = data_batch.permute(0, 2, 1)\n", + "\n", + " optimizer.zero_grad()\n", + " output = model(data_batch)\n", + " output = output.squeeze(1)\n", + " \n", + " loss = criterion(output, target_batch)\n", + " loss.backward()\n", + " optimizer.step()\n", + " running_loss += loss.item()\n", + " \n", + " epoch_losses.append(running_loss/(batch_idx+1))\n", + " print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss/(batch_idx+1)}')\n", + " # Plotting the training loss\n", + " IPython.display.clear_output(wait=True)\n", + " plt.plot(epoch_losses, marker='o', linestyle='-', color='b')\n", + " plt.xlabel('Steps')\n", + " plt.ylabel('Loss')\n", + " plt.grid(True)\n", + " plt.show()\n", + "\n", + " print(f\"Epoch: {epoch}\")\n", + " print(\"test loss\")\n", + " test_acc = test(model, device, test_loader)\n", + "\n", + " if test_acc > best_acc:\n", + " best_acc = test_acc\n", + " patience_counter = 0\n", + " else:\n", + " patience_counter += 1\n", + "\n", + " if patience_counter > patience:\n", + " print(\"Early Stopping\")\n", + " break\n", + "\n", + " print(\"train loss\")\n", + " train_acc = test(model, device, loader)\n", + "\n", + "\n", + "def test(model, device, loader):\n", + " total_loss = 0\n", + " total_correct = 0\n", + " total_no = 0\n", + " leeway = 0.1\n", + " crit = torch.nn.MSELoss()\n", + " model.eval()\n", + " with torch.no_grad():\n", + " for data, target in loader:\n", + " for i in range(data.size(1)):\n", + " data_batch = data[:, i, :, :].to(device)\n", + " target_batch = target[:, i].to(device)\n", + "\n", + " # reset hidden state\n", + " model.reset_hidden_state()\n", + "\n", + " # drop x y dimension\n", + " data_batch = data_batch.squeeze(1)\n", + " # wrong dimension order swap y z\n", + " data_batch = data_batch.permute(0, 2, 1)\n", + " # feed to model\n", + " output = model(data_batch)\n", + " output = output.squeeze(1)\n", + "\n", + " if abs(output.item() - target_batch.item()) < leeway:\n", + " total_correct += 1\n", + " total_no += 1\n", + "\n", + " # get accuracy with the output and target float\n", + "\n", + " loss = crit(output, target_batch)\n", + " total_loss += loss.item()\n", + " \n", + " avg_loss = total_loss / total_no\n", + " print(f'Validation Loss: {avg_loss}')\n", + " print(f'Accuracy: {total_correct / total_no}')\n", + " return total_correct / total_no" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset = SlidingWindowAudioDataset(train_df, \n", + " sample_rate=16000)\n", + "\n", + "test_dataset = SlidingWindowAudioDataset(test_df,\n", + " sample_rate=16000) \n", + "\n", + "batch_size = 1\n", + "train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n", + "test_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)\n", + "\n", + "# Model configuration\n", + "input_dim = 128 # Number of mel frequency bins\n", + "hidden_dim = 128\n", + "output_dim = 1\n", + "num_layers = 1\n", + "dropout = 0.3\n", + "learning_rate = 0.00003\n", + "\n", + "model = AudioRNN(input_dim, hidden_dim, output_dim, dropout, num_layers).to(device)\n", + "\n", + "# self, input_size, hidden_size, output_size, num_layers=1\n", + "\n", + "criterion = nn.MSELoss()\n", + "\n", + "\n", + "optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0003)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validation Loss: 0.629760848049892\n", + "Accuracy: 0.07122145704965567\n" + ] + }, + { + "data": { + "text/plain": [ + "0.07122145704965567" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test(model, device, test_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 30\n", + "test loss\n", + "Validation Loss: 0.011056410072496364\n", + "Accuracy: 0.7776368249365713\n", + "Early Stopping\n" + ] + } + ], + "source": [ + "train(model, device, train_loader, test_loader, criterion, optimizer, epochs=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validation Loss: 0.011056410072496364\n", + "Accuracy: 0.7776368249365713\n" + ] + }, + { + "data": { + "text/plain": [ + "0.7776368249365713" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test(model, device, test_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "model\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "input-tensor\n", + "depth:0\n", + "\n", + "(1, 10, 128)\n", + "\n", + "\n", + "\n", + "1\n", + "\n", + "\n", + "RNN\n", + "depth:1\n", + "\n", + "input:\n", + "\n", + "(1, 10, 128) \n", + "\n", + "output: \n", + "\n", + "(1, 10, 128), (1, 1, 128) \n", + "\n", + "\n", + "\n", + "0->1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "2\n", + "\n", + "\n", + "Dropout\n", + "depth:1\n", + "\n", + "input:\n", + "\n", + "(1, 10, 128) \n", + "\n", + "output: \n", + "\n", + "(1, 10, 128) \n", + "\n", + "\n", + "\n", + "1->2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "3\n", + "\n", + "\n", + "__getitem__\n", + "depth:1\n", + "\n", + "input:\n", + "\n", + "(1, 10, 128) \n", + "\n", + "output: \n", + "\n", + "(1, 128) \n", + "\n", + "\n", + "\n", + "2->3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "4\n", + "\n", + "\n", + "Linear\n", + "depth:1\n", + "\n", + "input:\n", + "\n", + "(1, 128) \n", + "\n", + "output: \n", + "\n", + "(1, 1) \n", + "\n", + "\n", + "\n", + "3->4\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "5\n", + "\n", + "\n", + "output-tensor\n", + "depth:0\n", + "\n", + "(1, 1)\n", + "\n", + "\n", + "\n", + "4->5\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# from torchviz import make_dot\n", + "from torchview import draw_graph\n", + "import matplotlib.pyplot as plt\n", + "# Visualize the model\n", + "# x = torch.randn(1, 10, input_size)\n", + "# y = model(x)\n", + "# make_dot(y, params=dict(list(model.named_parameters()) + [('x', x)])).render(\"rnn_torchviz\", format=\"png\")\n", + "\n", + "# Display the generated graph\n", + "# img = plt.imread(\"rnn_torchviz.png\")\n", + "# plt.imshow(img)\n", + "# plt.axis('off')\n", + "\n", + "# Visualize the model\n", + "x = torch.randn(1, 10, input_dim).to(device)\n", + "y = model(x)\n", + "graph = draw_graph(model, input_size=(1, 10, input_dim), device=device)\n", + "graph.visual_graph.render(\"rnn_torchview\", format=\"png\")\n", + "graph.visual_graph\n", + "\n", + "# Display the generated graph\n", + "# img = plt.imread(\"rnn_torchview.png\")\n", + "# plt.imshow(img)\n", + "# plt.axis('off')\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "==========================================================================================\n", + "Layer (type:depth-idx) Output Shape Param #\n", + "==========================================================================================\n", + "AudioRNN [1, 1] --\n", + "├─RNN: 1-1 [1, 10, 128] 33,024\n", + "├─Dropout: 1-2 [1, 10, 128] --\n", + "├─Linear: 1-3 [1, 1] 129\n", + "==========================================================================================\n", + "Total params: 33,153\n", + "Trainable params: 33,153\n", + "Non-trainable params: 0\n", + "Total mult-adds (Units.MEGABYTES): 0.33\n", + "==========================================================================================\n", + "Input size (MB): 0.01\n", + "Forward/backward pass size (MB): 0.01\n", + "Params size (MB): 0.13\n", + "Estimated Total Size (MB): 0.15\n", + "==========================================================================================" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from torchinfo import summary\n", + "\n", + "summary(model, input_size=(1, 10, input_dim), device=device)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "# save model\n", + "# name is time\n", + "import time\n", + "torch.save(model.state_dict(), f\"./model_{time.time()}.pth\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}