Technologic101 commited on
Commit
8c0e97b
·
1 Parent(s): 1b5ee9b

task: creates knowledge graph and synthetic queries

Browse files
Files changed (3) hide show
  1. poetry.lock +162 -1
  2. pyproject.toml +2 -0
  3. src/sdg.ipynb +672 -0
poetry.lock CHANGED
@@ -184,6 +184,19 @@ doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)",
184
  test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"]
185
  trio = ["trio (>=0.26.1)"]
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  [[package]]
188
  name = "appnope"
189
  version = "0.1.4"
@@ -987,6 +1000,19 @@ files = [
987
  graph = ["objgraph (>=1.7.2)"]
988
  profile = ["gprof2dot (>=2022.7.29)"]
989
 
 
 
 
 
 
 
 
 
 
 
 
 
 
990
  [[package]]
991
  name = "distro"
992
  version = "1.9.0"
@@ -5144,6 +5170,141 @@ files = [
5144
  {file = "queuelib-1.7.0.tar.gz", hash = "sha256:2855162096cf0230510890b354379ea1c0ff19d105d3147d349d2433bb222b08"},
5145
  ]
5146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5147
  [[package]]
5148
  name = "referencing"
5149
  version = "0.36.2"
@@ -7216,4 +7377,4 @@ cffi = ["cffi (>=1.11)"]
7216
  [metadata]
7217
  lock-version = "2.1"
7218
  python-versions = ">=3.9,<4.0"
7219
- content-hash = "910b49bc325e5394e4f5274f0cda941ef765b204de8ad7c3f5958c19ba54af33"
 
184
  test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"]
185
  trio = ["trio (>=0.26.1)"]
186
 
187
+ [[package]]
188
+ name = "appdirs"
189
+ version = "1.4.4"
190
+ description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
191
+ optional = false
192
+ python-versions = "*"
193
+ groups = ["main"]
194
+ markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
195
+ files = [
196
+ {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
197
+ {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
198
+ ]
199
+
200
  [[package]]
201
  name = "appnope"
202
  version = "0.1.4"
 
1000
  graph = ["objgraph (>=1.7.2)"]
1001
  profile = ["gprof2dot (>=2022.7.29)"]
1002
 
1003
+ [[package]]
1004
+ name = "diskcache"
1005
+ version = "5.6.3"
1006
+ description = "Disk Cache -- Disk and file backed persistent cache."
1007
+ optional = false
1008
+ python-versions = ">=3"
1009
+ groups = ["main"]
1010
+ markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
1011
+ files = [
1012
+ {file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"},
1013
+ {file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"},
1014
+ ]
1015
+
1016
  [[package]]
1017
  name = "distro"
1018
  version = "1.9.0"
 
5170
  {file = "queuelib-1.7.0.tar.gz", hash = "sha256:2855162096cf0230510890b354379ea1c0ff19d105d3147d349d2433bb222b08"},
5171
  ]
5172
 
5173
+ [[package]]
5174
+ name = "ragas"
5175
+ version = "0.2.13"
5176
+ description = ""
5177
+ optional = false
5178
+ python-versions = ">=3.9"
5179
+ groups = ["main"]
5180
+ markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
5181
+ files = [
5182
+ {file = "ragas-0.2.13-py3-none-any.whl", hash = "sha256:0a9c4014768cb6a1d962f9348ee2ea36732a1edafdf18d884ab020f4fe2d4acc"},
5183
+ {file = "ragas-0.2.13.tar.gz", hash = "sha256:33ebfd8c88465c7c86e639049138e38d3d3117d03eb68c0b2c98065c4608feb5"},
5184
+ ]
5185
+
5186
+ [package.dependencies]
5187
+ appdirs = "*"
5188
+ datasets = "*"
5189
+ diskcache = ">=5.6.3"
5190
+ langchain = "*"
5191
+ langchain-community = "*"
5192
+ langchain-core = "*"
5193
+ langchain_openai = "*"
5194
+ nest-asyncio = "*"
5195
+ numpy = "*"
5196
+ openai = ">1"
5197
+ pydantic = ">=2"
5198
+ tiktoken = "*"
5199
+
5200
+ [package.extras]
5201
+ all = ["datacompy", "llama_index", "nltk", "pandas", "rapidfuzz", "rouge_score", "sentence-transformers", "transformers"]
5202
+ dev = ["black[jupyter]", "datacompy", "fastembed", "graphene", "isort", "llama_index", "nltk", "notebook", "pandas", "pyright", "rapidfuzz", "rich", "rouge_score", "ruff", "sentence-transformers", "sphinx-autobuild", "transformers"]
5203
+ docs = ["mkdocs (>=1.6.1)", "mkdocs-autorefs", "mkdocs-gen-files", "mkdocs-git-committers-plugin-2", "mkdocs-git-revision-date-localized-plugin", "mkdocs-glightbox", "mkdocs-literate-nav", "mkdocs-material", "mkdocs-material[imaging]", "mkdocs-section-index", "mkdocstrings[python]"]
5204
+ test = ["llama_index", "nbmake", "pytest", "pytest-asyncio", "pytest-xdist[psutil]"]
5205
+
5206
+ [[package]]
5207
+ name = "rapidfuzz"
5208
+ version = "3.12.1"
5209
+ description = "rapid fuzzy string matching"
5210
+ optional = false
5211
+ python-versions = ">=3.9"
5212
+ groups = ["main"]
5213
+ markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
5214
+ files = [
5215
+ {file = "rapidfuzz-3.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dbb7ea2fd786e6d66f225ef6eef1728832314f47e82fee877cb2a793ebda9579"},
5216
+ {file = "rapidfuzz-3.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ae41361de05762c1eaa3955e5355de7c4c6f30d1ef1ea23d29bf738a35809ab"},
5217
+ {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc3c39e0317e7f68ba01bac056e210dd13c7a0abf823e7b6a5fe7e451ddfc496"},
5218
+ {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:69f2520296f1ae1165b724a3aad28c56fd0ac7dd2e4cff101a5d986e840f02d4"},
5219
+ {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34dcbf5a7daecebc242f72e2500665f0bde9dd11b779246c6d64d106a7d57c99"},
5220
+ {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:773ab37fccf6e0513891f8eb4393961ddd1053c6eb7e62eaa876e94668fc6d31"},
5221
+ {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ecf0e6de84c0bc2c0f48bc03ba23cef2c5f1245db7b26bc860c11c6fd7a097c"},
5222
+ {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4dc2ebad4adb29d84a661f6a42494df48ad2b72993ff43fad2b9794804f91e45"},
5223
+ {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8389d98b9f54cb4f8a95f1fa34bf0ceee639e919807bb931ca479c7a5f2930bf"},
5224
+ {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:165bcdecbfed9978962da1d3ec9c191b2ff9f1ccc2668fbaf0613a975b9aa326"},
5225
+ {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:129d536740ab0048c1a06ccff73c683f282a2347c68069affae8dbc423a37c50"},
5226
+ {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1b67e390261ffe98ec86c771b89425a78b60ccb610c3b5874660216fcdbded4b"},
5227
+ {file = "rapidfuzz-3.12.1-cp310-cp310-win32.whl", hash = "sha256:a66520180d3426b9dc2f8d312f38e19bc1fc5601f374bae5c916f53fa3534a7d"},
5228
+ {file = "rapidfuzz-3.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:82260b20bc7a76556cecb0c063c87dad19246a570425d38f8107b8404ca3ac97"},
5229
+ {file = "rapidfuzz-3.12.1-cp310-cp310-win_arm64.whl", hash = "sha256:3a860d103bbb25c69c2e995fdf4fac8cb9f77fb69ec0a00469d7fd87ff148f46"},
5230
+ {file = "rapidfuzz-3.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6d9afad7b16d01c9e8929b6a205a18163c7e61b6cd9bcf9c81be77d5afc1067a"},
5231
+ {file = "rapidfuzz-3.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb424ae7240f2d2f7d8dda66a61ebf603f74d92f109452c63b0dbf400204a437"},
5232
+ {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42149e6d13bd6d06437d2a954dae2184dadbbdec0fdb82dafe92860d99f80519"},
5233
+ {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:760ac95d788f2964b73da01e0bdffbe1bf2ad8273d0437565ce9092ae6ad1fbc"},
5234
+ {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2cf27e8e4bf7bf9d92ef04f3d2b769e91c3f30ba99208c29f5b41e77271a2614"},
5235
+ {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:00ceb8ff3c44ab0d6014106c71709c85dee9feedd6890eff77c814aa3798952b"},
5236
+ {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8b61c558574fbc093d85940c3264c08c2b857b8916f8e8f222e7b86b0bb7d12"},
5237
+ {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:346a2d8f17224e99f9ef988606c83d809d5917d17ad00207237e0965e54f9730"},
5238
+ {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d60d1db1b7e470e71ae096b6456e20ec56b52bde6198e2dbbc5e6769fa6797dc"},
5239
+ {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2477da227e266f9c712f11393182c69a99d3c8007ea27f68c5afc3faf401cc43"},
5240
+ {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:8499c7d963ddea8adb6cffac2861ee39a1053e22ca8a5ee9de1197f8dc0275a5"},
5241
+ {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:12802e5c4d8ae104fb6efeeb436098325ce0dca33b461c46e8df015c84fbef26"},
5242
+ {file = "rapidfuzz-3.12.1-cp311-cp311-win32.whl", hash = "sha256:e1061311d07e7cdcffa92c9b50c2ab4192907e70ca01b2e8e1c0b6b4495faa37"},
5243
+ {file = "rapidfuzz-3.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:c6e4ed63e204daa863a802eec09feea5448617981ba5d150f843ad8e3ae071a4"},
5244
+ {file = "rapidfuzz-3.12.1-cp311-cp311-win_arm64.whl", hash = "sha256:920733a28c3af47870835d59ca9879579f66238f10de91d2b4b3f809d1ebfc5b"},
5245
+ {file = "rapidfuzz-3.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f6235b57ae3faa3f85cb3f90c9fee49b21bd671b76e90fc99e8ca2bdf0b5e4a3"},
5246
+ {file = "rapidfuzz-3.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af4585e5812632c357fee5ab781c29f00cd06bea58f8882ff244cc4906ba6c9e"},
5247
+ {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5942dc4460e5030c5f9e1d4c9383de2f3564a2503fe25e13e89021bcbfea2f44"},
5248
+ {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b31ab59e1a0df5afc21f3109b6cfd77b34040dbf54f1bad3989f885cfae1e60"},
5249
+ {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97c885a7a480b21164f57a706418c9bbc9a496ec6da087e554424358cadde445"},
5250
+ {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d844c0587d969ce36fbf4b7cbf0860380ffeafc9ac5e17a7cbe8abf528d07bb"},
5251
+ {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93c95dce8917bf428064c64024de43ffd34ec5949dd4425780c72bd41f9d969"},
5252
+ {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:834f6113d538af358f39296604a1953e55f8eeffc20cb4caf82250edbb8bf679"},
5253
+ {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a940aa71a7f37d7f0daac186066bf6668d4d3b7e7ef464cb50bc7ba89eae1f51"},
5254
+ {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ec9eaf73501c9a7de2c6938cb3050392e2ee0c5ca3921482acf01476b85a7226"},
5255
+ {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c5ec360694ac14bfaeb6aea95737cf1a6cf805b5fe8ea7fd28814706c7fa838"},
5256
+ {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6b5e176524653ac46f1802bdd273a4b44a5f8d0054ed5013a8e8a4b72f254599"},
5257
+ {file = "rapidfuzz-3.12.1-cp312-cp312-win32.whl", hash = "sha256:6f463c6f1c42ec90e45d12a6379e18eddd5cdf74138804d8215619b6f4d31cea"},
5258
+ {file = "rapidfuzz-3.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:b894fa2b30cd6498a29e5c470cb01c6ea898540b7e048a0342775a5000531334"},
5259
+ {file = "rapidfuzz-3.12.1-cp312-cp312-win_arm64.whl", hash = "sha256:43bb17056c5d1332f517b888c4e57846c4b5f936ed304917eeb5c9ac85d940d4"},
5260
+ {file = "rapidfuzz-3.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:97f824c15bc6933a31d6e3cbfa90188ba0e5043cf2b6dd342c2b90ee8b3fd47c"},
5261
+ {file = "rapidfuzz-3.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a973b3f5cabf931029a3ae4a0f72e3222e53d412ea85fc37ddc49e1774f00fbf"},
5262
+ {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df7880e012228722dec1be02b9ef3898ed023388b8a24d6fa8213d7581932510"},
5263
+ {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c78582f50e75e6c2bc38c791ed291cb89cf26a3148c47860c1a04d6e5379c8e"},
5264
+ {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d7d9e6a04d8344b0198c96394c28874086888d0a2b2f605f30d1b27b9377b7d"},
5265
+ {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5620001fd4d6644a2f56880388179cc8f3767670f0670160fcb97c3b46c828af"},
5266
+ {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0666ab4c52e500af7ba5cc17389f5d15c0cdad06412c80312088519fdc25686d"},
5267
+ {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:27b4d440fa50b50c515a91a01ee17e8ede719dca06eef4c0cccf1a111a4cfad3"},
5268
+ {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:83dccfd5a754f2a0e8555b23dde31f0f7920601bfa807aa76829391ea81e7c67"},
5269
+ {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b572b634740e047c53743ed27a1bb3b4f93cf4abbac258cd7af377b2c4a9ba5b"},
5270
+ {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7fa7b81fb52902d5f78dac42b3d6c835a6633b01ddf9b202a3ca8443be4b2d6a"},
5271
+ {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b1d4fbff980cb6baef4ee675963c081f7b5d6580a105d6a4962b20f1f880e1fb"},
5272
+ {file = "rapidfuzz-3.12.1-cp313-cp313-win32.whl", hash = "sha256:3fe8da12ea77271097b303fa7624cfaf5afd90261002314e3b0047d36f4afd8d"},
5273
+ {file = "rapidfuzz-3.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:6f7e92fc7d2a7f02e1e01fe4f539324dfab80f27cb70a30dd63a95445566946b"},
5274
+ {file = "rapidfuzz-3.12.1-cp313-cp313-win_arm64.whl", hash = "sha256:e31be53d7f4905a6a038296d8b773a79da9ee9f0cd19af9490c5c5a22e37d2e5"},
5275
+ {file = "rapidfuzz-3.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bef5c91d5db776523530073cda5b2a276283258d2f86764be4a008c83caf7acd"},
5276
+ {file = "rapidfuzz-3.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:841e0c2a5fbe8fc8b9b1a56e924c871899932c0ece7fbd970aa1c32bfd12d4bf"},
5277
+ {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:046fc67f3885d94693a2151dd913aaf08b10931639cbb953dfeef3151cb1027c"},
5278
+ {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4d2d39b2e76c17f92edd6d384dc21fa020871c73251cdfa017149358937a41d"},
5279
+ {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5857dda85165b986c26a474b22907db6b93932c99397c818bcdec96340a76d5"},
5280
+ {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c26cd1b9969ea70dbf0dbda3d2b54ab4b2e683d0fd0f17282169a19563efeb1"},
5281
+ {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf56ea4edd69005786e6c80a9049d95003aeb5798803e7a2906194e7a3cb6472"},
5282
+ {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fbe7580b5fb2db8ebd53819171ff671124237a55ada3f64d20fc9a149d133960"},
5283
+ {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:018506a53c3b20dcbda8c93d4484b9eb1764c93d5ea16be103cf6b0d8b11d860"},
5284
+ {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:325c9c71b737fcd32e2a4e634c430c07dd3d374cfe134eded3fe46e4c6f9bf5d"},
5285
+ {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:930756639643e3aa02d3136b6fec74e5b9370a24f8796e1065cd8a857a6a6c50"},
5286
+ {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0acbd27543b158cb915fde03877383816a9e83257832818f1e803bac9b394900"},
5287
+ {file = "rapidfuzz-3.12.1-cp39-cp39-win32.whl", hash = "sha256:80ff9283c54d7d29b2d954181e137deee89bec62f4a54675d8b6dbb6b15d3e03"},
5288
+ {file = "rapidfuzz-3.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:fd37e53f0ed239d0cec27b250cec958982a8ba252ce64aa5e6052de3a82fa8db"},
5289
+ {file = "rapidfuzz-3.12.1-cp39-cp39-win_arm64.whl", hash = "sha256:4a4422e4f73a579755ab60abccb3ff148b5c224b3c7454a13ca217dfbad54da6"},
5290
+ {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b7cba636c32a6fc3a402d1cb2c70c6c9f8e6319380aaf15559db09d868a23e56"},
5291
+ {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b79286738a43e8df8420c4b30a92712dec6247430b130f8e015c3a78b6d61ac2"},
5292
+ {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dc1937198e7ff67e217e60bfa339f05da268d91bb15fec710452d11fe2fdf60"},
5293
+ {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b85817a57cf8db32dd5d2d66ccfba656d299b09eaf86234295f89f91be1a0db2"},
5294
+ {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04283c6f3e79f13a784f844cd5b1df4f518ad0f70c789aea733d106c26e1b4fb"},
5295
+ {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a718f740553aad5f4daef790191511da9c6eae893ee1fc2677627e4b624ae2db"},
5296
+ {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cbdf145c7e4ebf2e81c794ed7a582c4acad19e886d5ad6676086369bd6760753"},
5297
+ {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:0d03ad14a26a477be221fddc002954ae68a9e2402b9d85433f2d0a6af01aa2bb"},
5298
+ {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1187aeae9c89e838d2a0a2b954b4052e4897e5f62e5794ef42527bf039d469e"},
5299
+ {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd47dfb1bca9673a48b923b3d988b7668ee8efd0562027f58b0f2b7abf27144c"},
5300
+ {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187cdb402e223264eebed2fe671e367e636a499a7a9c82090b8d4b75aa416c2a"},
5301
+ {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d6899b41bf6c30282179f77096c1939f1454836440a8ab05b48ebf7026a3b590"},
5302
+ {file = "rapidfuzz-3.12.1.tar.gz", hash = "sha256:6a98bbca18b4a37adddf2d8201856441c26e9c981d8895491b5bc857b5f780eb"},
5303
+ ]
5304
+
5305
+ [package.extras]
5306
+ all = ["numpy"]
5307
+
5308
  [[package]]
5309
  name = "referencing"
5310
  version = "0.36.2"
 
7377
  [metadata]
7378
  lock-version = "2.1"
7379
  python-versions = ">=3.9,<4.0"
7380
+ content-hash = "1181d4bb4660438dd8901ea7e51a16185a13528ce1562948f12b76c237e25d6b"
pyproject.toml CHANGED
@@ -33,6 +33,8 @@ langchain-openai = ">=0.3.6,<0.4.0"
33
  langchain-community = ">=0.3.18,<0.4.0"
34
  faiss-cpu = "^1.7.4"
35
  jq = "^1.8.0"
 
 
36
 
37
  [tool.black]
38
  line-length = 88
 
33
  langchain-community = ">=0.3.18,<0.4.0"
34
  faiss-cpu = "^1.7.4"
35
  jq = "^1.8.0"
36
+ ragas = "^0.2.13"
37
+ rapidfuzz = "^3.12.1"
38
 
39
  [tool.black]
40
  line-length = 88
src/sdg.ipynb CHANGED
@@ -54,6 +54,678 @@
54
  " print(f\"Content: {documents[0].page_content}\")\n",
55
  " print(f\"Metadata: {documents[0].metadata}\")\n"
56
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  }
58
  ],
59
  "metadata": {
 
54
  " print(f\"Content: {documents[0].page_content}\")\n",
55
  " print(f\"Metadata: {documents[0].metadata}\")\n"
56
  ]
57
+ },
58
+ {
59
+ "cell_type": "markdown",
60
+ "metadata": {},
61
+ "source": [
62
+ "Now let's create a knowledge graph based on our design metadata."
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 13,
68
+ "metadata": {},
69
+ "outputs": [],
70
+ "source": [
71
+ "from ragas.llms import LangchainLLMWrapper\n",
72
+ "from ragas.embeddings import LangchainEmbeddingsWrapper\n",
73
+ "from langchain_openai import ChatOpenAI\n",
74
+ "from langchain_openai import OpenAIEmbeddings\n",
75
+ "\n",
76
+ "# using 4o-mini due to rate limits\n",
77
+ "generator_llm = LangchainLLMWrapper(ChatOpenAI(model=\"gpt-4o-mini\"))\n",
78
+ "generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 14,
84
+ "metadata": {},
85
+ "outputs": [
86
+ {
87
+ "data": {
88
+ "text/plain": [
89
+ "KnowledgeGraph(nodes: 141, relationships: 0)"
90
+ ]
91
+ },
92
+ "execution_count": 14,
93
+ "metadata": {},
94
+ "output_type": "execute_result"
95
+ }
96
+ ],
97
+ "source": [
98
+ "from ragas.testset.graph import KnowledgeGraph, Node, NodeType\n",
99
+ "\n",
100
+ "kg = KnowledgeGraph()\n",
101
+ "\n",
102
+ "for doc in documents:\n",
103
+ " kg.nodes.append(\n",
104
+ " Node(\n",
105
+ " type=NodeType.DOCUMENT,\n",
106
+ " properties={\"page_content\": doc.page_content, \"document_metadata\": doc.metadata}\n",
107
+ " )\n",
108
+ " )\n",
109
+ "\n",
110
+ "kg"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": 15,
116
+ "metadata": {},
117
+ "outputs": [
118
+ {
119
+ "data": {
120
+ "application/vnd.jupyter.widget-view+json": {
121
+ "model_id": "27552c622ca64d2abe4190ce6fb12d1d",
122
+ "version_major": 2,
123
+ "version_minor": 0
124
+ },
125
+ "text/plain": [
126
+ "Applying SummaryExtractor: 0%| | 0/141 [00:00<?, ?it/s]"
127
+ ]
128
+ },
129
+ "metadata": {},
130
+ "output_type": "display_data"
131
+ },
132
+ {
133
+ "data": {
134
+ "application/vnd.jupyter.widget-view+json": {
135
+ "model_id": "7e73db5f494c4db581b14baa2c23d12a",
136
+ "version_major": 2,
137
+ "version_minor": 0
138
+ },
139
+ "text/plain": [
140
+ "Applying CustomNodeFilter: 0%| | 0/141 [00:00<?, ?it/s]"
141
+ ]
142
+ },
143
+ "metadata": {},
144
+ "output_type": "display_data"
145
+ },
146
+ {
147
+ "data": {
148
+ "application/vnd.jupyter.widget-view+json": {
149
+ "model_id": "5e525eaa7bda4c52a876ae87835606a9",
150
+ "version_major": 2,
151
+ "version_minor": 0
152
+ },
153
+ "text/plain": [
154
+ "Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]: 0%| | 0/423 [00:00<?, ?it/s]"
155
+ ]
156
+ },
157
+ "metadata": {},
158
+ "output_type": "display_data"
159
+ },
160
+ {
161
+ "data": {
162
+ "application/vnd.jupyter.widget-view+json": {
163
+ "model_id": "74e2eae6725142e8922955012318f4d9",
164
+ "version_major": 2,
165
+ "version_minor": 0
166
+ },
167
+ "text/plain": [
168
+ "Applying OverlapScoreBuilder: 0%| | 0/1 [00:00<?, ?it/s]"
169
+ ]
170
+ },
171
+ "metadata": {},
172
+ "output_type": "display_data"
173
+ },
174
+ {
175
+ "data": {
176
+ "text/plain": [
177
+ "KnowledgeGraph(nodes: 141, relationships: 795)"
178
+ ]
179
+ },
180
+ "execution_count": 15,
181
+ "metadata": {},
182
+ "output_type": "execute_result"
183
+ }
184
+ ],
185
+ "source": [
186
+ "from ragas.testset.transforms import default_transforms, apply_transforms\n",
187
+ "\n",
188
+ "# why use these new variables?\n",
189
+ "transformer_llm = generator_llm\n",
190
+ "embedding_model = generator_embeddings\n",
191
+ "\n",
192
+ "default_transforms = default_transforms(documents=documents, llm=transformer_llm, embedding_model=embedding_model)\n",
193
+ "apply_transforms(kg, default_transforms)\n",
194
+ "kg"
195
+ ]
196
+ },
197
+ {
198
+ "cell_type": "code",
199
+ "execution_count": 17,
200
+ "metadata": {},
201
+ "outputs": [],
202
+ "source": [
203
+ "kg.save(\"css_zen_garden_design_data\")\n",
204
+ "design_kg = KnowledgeGraph.load(\"css_zen_garden_design_data\")"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "markdown",
209
+ "metadata": {},
210
+ "source": [
211
+ "Okay! With out knowledge graph of relationships, we can now generate a test set."
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": 18,
217
+ "metadata": {},
218
+ "outputs": [],
219
+ "source": [
220
+ "from ragas.testset import TestsetGenerator\n",
221
+ "\n",
222
+ "testset_generator = TestsetGenerator(\n",
223
+ " knowledge_graph=design_kg,\n",
224
+ " llm=generator_llm,\n",
225
+ " embedding_model=generator_embeddings\n",
226
+ ")\n"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": null,
232
+ "metadata": {},
233
+ "outputs": [
234
+ {
235
+ "data": {
236
+ "application/vnd.jupyter.widget-view+json": {
237
+ "model_id": "82e3c2beea444035a84257bf881f44bd",
238
+ "version_major": 2,
239
+ "version_minor": 0
240
+ },
241
+ "text/plain": [
242
+ "Generating personas: 0%| | 0/3 [00:00<?, ?it/s]"
243
+ ]
244
+ },
245
+ "metadata": {},
246
+ "output_type": "display_data"
247
+ },
248
+ {
249
+ "data": {
250
+ "application/vnd.jupyter.widget-view+json": {
251
+ "model_id": "f2c3a53c3df34620a4b17baec2c74c5a",
252
+ "version_major": 2,
253
+ "version_minor": 0
254
+ },
255
+ "text/plain": [
256
+ "Generating Scenarios: 0%| | 0/2 [00:00<?, ?it/s]"
257
+ ]
258
+ },
259
+ "metadata": {},
260
+ "output_type": "display_data"
261
+ },
262
+ {
263
+ "data": {
264
+ "application/vnd.jupyter.widget-view+json": {
265
+ "model_id": "18bd0fdb023045f78c6251dec02e9a54",
266
+ "version_major": 2,
267
+ "version_minor": 0
268
+ },
269
+ "text/plain": [
270
+ "Generating Samples: 0%| | 0/10 [00:00<?, ?it/s]"
271
+ ]
272
+ },
273
+ "metadata": {},
274
+ "output_type": "display_data"
275
+ },
276
+ {
277
+ "data": {
278
+ "text/html": [
279
+ "<div>\n",
280
+ "<style scoped>\n",
281
+ " .dataframe tbody tr th:only-of-type {\n",
282
+ " vertical-align: middle;\n",
283
+ " }\n",
284
+ "\n",
285
+ " .dataframe tbody tr th {\n",
286
+ " vertical-align: top;\n",
287
+ " }\n",
288
+ "\n",
289
+ " .dataframe thead th {\n",
290
+ " text-align: right;\n",
291
+ " }\n",
292
+ "</style>\n",
293
+ "<table border=\"1\" class=\"dataframe\">\n",
294
+ " <thead>\n",
295
+ " <tr style=\"text-align: right;\">\n",
296
+ " <th></th>\n",
297
+ " <th>user_input</th>\n",
298
+ " <th>reference_contexts</th>\n",
299
+ " <th>reference</th>\n",
300
+ " <th>synthesizer_name</th>\n",
301
+ " </tr>\n",
302
+ " </thead>\n",
303
+ " <tbody>\n",
304
+ " <tr>\n",
305
+ " <th>0</th>\n",
306
+ " <td>How do ornate flourishes contribute to the ove...</td>\n",
307
+ " <td>[{\"id\": \"135\", \"url\": \"https://www.csszengarde...</td>\n",
308
+ " <td>Ornate flourishes in web design add a touch of...</td>\n",
309
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
310
+ " </tr>\n",
311
+ " <tr>\n",
312
+ " <th>1</th>\n",
313
+ " <td>How does the nature-inspired design in the pro...</td>\n",
314
+ " <td>[{\"id\": \"132\", \"url\": \"https://www.csszengarde...</td>\n",
315
+ " <td>The nature-inspired design features a minimali...</td>\n",
316
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
317
+ " </tr>\n",
318
+ " <tr>\n",
319
+ " <th>2</th>\n",
320
+ " <td>Can you describe the design principles and vis...</td>\n",
321
+ " <td>[{\"id\": \"104\", \"url\": \"https://www.csszengarde...</td>\n",
322
+ " <td>The CSS Zen Garden design emphasizes a harmoni...</td>\n",
323
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
324
+ " </tr>\n",
325
+ " <tr>\n",
326
+ " <th>3</th>\n",
327
+ " <td>How does the use of burgundy in web design con...</td>\n",
328
+ " <td>[{\"id\": \"103\", \"url\": \"https://www.csszengarde...</td>\n",
329
+ " <td>The design features a dark burgundy background...</td>\n",
330
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
331
+ " </tr>\n",
332
+ " <tr>\n",
333
+ " <th>4</th>\n",
334
+ " <td>How vintage aesthetic show in this design?</td>\n",
335
+ " <td>[{\"id\": \"168\", \"url\": \"https://www.csszengarde...</td>\n",
336
+ " <td>The design cleverly combines a vintage aesthet...</td>\n",
337
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
338
+ " </tr>\n",
339
+ " <tr>\n",
340
+ " <th>5</th>\n",
341
+ " <td>How do the designs at CSS Zen Garden utilize a...</td>\n",
342
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"151\", \"url\": \"https://www....</td>\n",
343
+ " <td>The designs at CSS Zen Garden utilize a light ...</td>\n",
344
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
345
+ " </tr>\n",
346
+ " <tr>\n",
347
+ " <th>6</th>\n",
348
+ " <td>How do the typography-focused designs in the d...</td>\n",
349
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"155\", \"url\": \"https://www....</td>\n",
350
+ " <td>The typography-focused design in the dark them...</td>\n",
351
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
352
+ " </tr>\n",
353
+ " <tr>\n",
354
+ " <th>7</th>\n",
355
+ " <td>What are the illustrative elements used in the...</td>\n",
356
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"218\", \"url\": \"https://www....</td>\n",
357
+ " <td>The illustrative elements used in the designs ...</td>\n",
358
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
359
+ " </tr>\n",
360
+ " <tr>\n",
361
+ " <th>8</th>\n",
362
+ " <td>What are the key visual characteristics of the...</td>\n",
363
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"208\", \"url\": \"https://www....</td>\n",
364
+ " <td>The design at https://www.csszengarden.com/208...</td>\n",
365
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
366
+ " </tr>\n",
367
+ " <tr>\n",
368
+ " <th>9</th>\n",
369
+ " <td>Wht are the key visual characteristics of the ...</td>\n",
370
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"001\", \"url\": \"https://www....</td>\n",
371
+ " <td>The key visual characteristics of the Zen Gard...</td>\n",
372
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
373
+ " </tr>\n",
374
+ " </tbody>\n",
375
+ "</table>\n",
376
+ "</div>"
377
+ ],
378
+ "text/plain": [
379
+ " user_input \\\n",
380
+ "0 How do ornate flourishes contribute to the ove... \n",
381
+ "1 How does the nature-inspired design in the pro... \n",
382
+ "2 Can you describe the design principles and vis... \n",
383
+ "3 How does the use of burgundy in web design con... \n",
384
+ "4 How vintage aesthetic show in this design? \n",
385
+ "5 How do the designs at CSS Zen Garden utilize a... \n",
386
+ "6 How do the typography-focused designs in the d... \n",
387
+ "7 What are the illustrative elements used in the... \n",
388
+ "8 What are the key visual characteristics of the... \n",
389
+ "9 Wht are the key visual characteristics of the ... \n",
390
+ "\n",
391
+ " reference_contexts \\\n",
392
+ "0 [{\"id\": \"135\", \"url\": \"https://www.csszengarde... \n",
393
+ "1 [{\"id\": \"132\", \"url\": \"https://www.csszengarde... \n",
394
+ "2 [{\"id\": \"104\", \"url\": \"https://www.csszengarde... \n",
395
+ "3 [{\"id\": \"103\", \"url\": \"https://www.csszengarde... \n",
396
+ "4 [{\"id\": \"168\", \"url\": \"https://www.csszengarde... \n",
397
+ "5 [<1-hop>\\n\\n{\"id\": \"151\", \"url\": \"https://www.... \n",
398
+ "6 [<1-hop>\\n\\n{\"id\": \"155\", \"url\": \"https://www.... \n",
399
+ "7 [<1-hop>\\n\\n{\"id\": \"218\", \"url\": \"https://www.... \n",
400
+ "8 [<1-hop>\\n\\n{\"id\": \"208\", \"url\": \"https://www.... \n",
401
+ "9 [<1-hop>\\n\\n{\"id\": \"001\", \"url\": \"https://www.... \n",
402
+ "\n",
403
+ " reference \\\n",
404
+ "0 Ornate flourishes in web design add a touch of... \n",
405
+ "1 The nature-inspired design features a minimali... \n",
406
+ "2 The CSS Zen Garden design emphasizes a harmoni... \n",
407
+ "3 The design features a dark burgundy background... \n",
408
+ "4 The design cleverly combines a vintage aesthet... \n",
409
+ "5 The designs at CSS Zen Garden utilize a light ... \n",
410
+ "6 The typography-focused design in the dark them... \n",
411
+ "7 The illustrative elements used in the designs ... \n",
412
+ "8 The design at https://www.csszengarden.com/208... \n",
413
+ "9 The key visual characteristics of the Zen Gard... \n",
414
+ "\n",
415
+ " synthesizer_name \n",
416
+ "0 single_hop_specifc_query_synthesizer \n",
417
+ "1 single_hop_specifc_query_synthesizer \n",
418
+ "2 single_hop_specifc_query_synthesizer \n",
419
+ "3 single_hop_specifc_query_synthesizer \n",
420
+ "4 single_hop_specifc_query_synthesizer \n",
421
+ "5 multi_hop_specific_query_synthesizer \n",
422
+ "6 multi_hop_specific_query_synthesizer \n",
423
+ "7 multi_hop_specific_query_synthesizer \n",
424
+ "8 multi_hop_specific_query_synthesizer \n",
425
+ "9 multi_hop_specific_query_synthesizer "
426
+ ]
427
+ },
428
+ "execution_count": 21,
429
+ "metadata": {},
430
+ "output_type": "execute_result"
431
+ }
432
+ ],
433
+ "source": [
434
+ "from ragas.testset.synthesizers import default_query_distribution, \n",
435
+ "\n",
436
+ "query_distribution = default_query_distribution(\n",
437
+ " kg=design_kg,\n",
438
+ " llm=generator_llm\n",
439
+ ")\n",
440
+ "\n",
441
+ "testset = testset_generator.generate(testset_size=10, query_distribution=query_distribution)\n",
442
+ "testset.to_pandas()"
443
+ ]
444
+ },
445
+ {
446
+ "cell_type": "markdown",
447
+ "metadata": {},
448
+ "source": [
449
+ "Just for comparison, let's see what the simplified version with langchain docs looks like.\n",
450
+ "\n"
451
+ ]
452
+ },
453
+ {
454
+ "cell_type": "code",
455
+ "execution_count": 22,
456
+ "metadata": {},
457
+ "outputs": [
458
+ {
459
+ "data": {
460
+ "application/vnd.jupyter.widget-view+json": {
461
+ "model_id": "70a315ea06e3444ca84fbac63e51b678",
462
+ "version_major": 2,
463
+ "version_minor": 0
464
+ },
465
+ "text/plain": [
466
+ "Applying SummaryExtractor: 0%| | 0/141 [00:00<?, ?it/s]"
467
+ ]
468
+ },
469
+ "metadata": {},
470
+ "output_type": "display_data"
471
+ },
472
+ {
473
+ "data": {
474
+ "application/vnd.jupyter.widget-view+json": {
475
+ "model_id": "f1446a35386844e1b414feda2399b2df",
476
+ "version_major": 2,
477
+ "version_minor": 0
478
+ },
479
+ "text/plain": [
480
+ "Applying CustomNodeFilter: 0%| | 0/141 [00:00<?, ?it/s]"
481
+ ]
482
+ },
483
+ "metadata": {},
484
+ "output_type": "display_data"
485
+ },
486
+ {
487
+ "data": {
488
+ "application/vnd.jupyter.widget-view+json": {
489
+ "model_id": "6247060a50a7484ba9895280782e2235",
490
+ "version_major": 2,
491
+ "version_minor": 0
492
+ },
493
+ "text/plain": [
494
+ "Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]: 0%| | 0/423 [00:00<?, ?it/s]"
495
+ ]
496
+ },
497
+ "metadata": {},
498
+ "output_type": "display_data"
499
+ },
500
+ {
501
+ "data": {
502
+ "application/vnd.jupyter.widget-view+json": {
503
+ "model_id": "32df2719024e4fa395957525ab57abaa",
504
+ "version_major": 2,
505
+ "version_minor": 0
506
+ },
507
+ "text/plain": [
508
+ "Applying OverlapScoreBuilder: 0%| | 0/1 [00:00<?, ?it/s]"
509
+ ]
510
+ },
511
+ "metadata": {},
512
+ "output_type": "display_data"
513
+ },
514
+ {
515
+ "data": {
516
+ "application/vnd.jupyter.widget-view+json": {
517
+ "model_id": "6522481d960e4129aa6f0e0b0b5b2187",
518
+ "version_major": 2,
519
+ "version_minor": 0
520
+ },
521
+ "text/plain": [
522
+ "Generating personas: 0%| | 0/3 [00:00<?, ?it/s]"
523
+ ]
524
+ },
525
+ "metadata": {},
526
+ "output_type": "display_data"
527
+ },
528
+ {
529
+ "data": {
530
+ "application/vnd.jupyter.widget-view+json": {
531
+ "model_id": "4cc4b04da15d44ef9c76cc7afceb9a00",
532
+ "version_major": 2,
533
+ "version_minor": 0
534
+ },
535
+ "text/plain": [
536
+ "Generating Scenarios: 0%| | 0/2 [00:00<?, ?it/s]"
537
+ ]
538
+ },
539
+ "metadata": {},
540
+ "output_type": "display_data"
541
+ },
542
+ {
543
+ "data": {
544
+ "application/vnd.jupyter.widget-view+json": {
545
+ "model_id": "25c28f8088004e398f56cf22495f6714",
546
+ "version_major": 2,
547
+ "version_minor": 0
548
+ },
549
+ "text/plain": [
550
+ "Generating Samples: 0%| | 0/10 [00:00<?, ?it/s]"
551
+ ]
552
+ },
553
+ "metadata": {},
554
+ "output_type": "display_data"
555
+ },
556
+ {
557
+ "data": {
558
+ "text/html": [
559
+ "<div>\n",
560
+ "<style scoped>\n",
561
+ " .dataframe tbody tr th:only-of-type {\n",
562
+ " vertical-align: middle;\n",
563
+ " }\n",
564
+ "\n",
565
+ " .dataframe tbody tr th {\n",
566
+ " vertical-align: top;\n",
567
+ " }\n",
568
+ "\n",
569
+ " .dataframe thead th {\n",
570
+ " text-align: right;\n",
571
+ " }\n",
572
+ "</style>\n",
573
+ "<table border=\"1\" class=\"dataframe\">\n",
574
+ " <thead>\n",
575
+ " <tr style=\"text-align: right;\">\n",
576
+ " <th></th>\n",
577
+ " <th>user_input</th>\n",
578
+ " <th>reference_contexts</th>\n",
579
+ " <th>reference</th>\n",
580
+ " <th>synthesizer_name</th>\n",
581
+ " </tr>\n",
582
+ " </thead>\n",
583
+ " <tbody>\n",
584
+ " <tr>\n",
585
+ " <th>0</th>\n",
586
+ " <td>Can you tell me more about what makes this des...</td>\n",
587
+ " <td>[{\"id\": \"135\", \"url\": \"https://www.csszengarde...</td>\n",
588
+ " <td>This design is classic because it employs an e...</td>\n",
589
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
590
+ " </tr>\n",
591
+ " <tr>\n",
592
+ " <th>1</th>\n",
593
+ " <td>What makes this design typography-focused?</td>\n",
594
+ " <td>[{\"id\": \"132\", \"url\": \"https://www.csszengarde...</td>\n",
595
+ " <td>This design is typography-focused because it u...</td>\n",
596
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
597
+ " </tr>\n",
598
+ " <tr>\n",
599
+ " <th>2</th>\n",
600
+ " <td>How does the web design concept illustrated in...</td>\n",
601
+ " <td>[{\"id\": \"104\", \"url\": \"https://www.csszengarde...</td>\n",
602
+ " <td>The web design concept illustrated in the CSS ...</td>\n",
603
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
604
+ " </tr>\n",
605
+ " <tr>\n",
606
+ " <th>3</th>\n",
607
+ " <td>What make this design vintage?</td>\n",
608
+ " <td>[{\"id\": \"103\", \"url\": \"https://www.csszengarde...</td>\n",
609
+ " <td>This design features a vintage theme through i...</td>\n",
610
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
611
+ " </tr>\n",
612
+ " <tr>\n",
613
+ " <th>4</th>\n",
614
+ " <td>How does the vintage aesthetic influence the d...</td>\n",
615
+ " <td>[{\"id\": \"168\", \"url\": \"https://www.csszengarde...</td>\n",
616
+ " <td>The vintage aesthetic in the design is cleverl...</td>\n",
617
+ " <td>single_hop_specifc_query_synthesizer</td>\n",
618
+ " </tr>\n",
619
+ " <tr>\n",
620
+ " <th>5</th>\n",
621
+ " <td>What are the key visual characteristics that m...</td>\n",
622
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"201\", \"url\": \"https://www....</td>\n",
623
+ " <td>The design at CSS Zen Garden employs a strong ...</td>\n",
624
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
625
+ " </tr>\n",
626
+ " <tr>\n",
627
+ " <th>6</th>\n",
628
+ " <td>What are the visual characteristics of designs...</td>\n",
629
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"008\", \"url\": \"https://www....</td>\n",
630
+ " <td>The designs that effectively utilize bold typo...</td>\n",
631
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
632
+ " </tr>\n",
633
+ " <tr>\n",
634
+ " <th>7</th>\n",
635
+ " <td>What are the key visual characteristics of the...</td>\n",
636
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"151\", \"url\": \"https://www....</td>\n",
637
+ " <td>The key visual characteristics of the minimali...</td>\n",
638
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
639
+ " </tr>\n",
640
+ " <tr>\n",
641
+ " <th>8</th>\n",
642
+ " <td>How do the subtle background colors and subtle...</td>\n",
643
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"164\", \"url\": \"https://www....</td>\n",
644
+ " <td>The design featuring a minimalist layout with ...</td>\n",
645
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
646
+ " </tr>\n",
647
+ " <tr>\n",
648
+ " <th>9</th>\n",
649
+ " <td>In what ways do the designs from CSS Zen Garde...</td>\n",
650
+ " <td>[&lt;1-hop&gt;\\n\\n{\"id\": \"109\", \"url\": \"https://www....</td>\n",
651
+ " <td>The designs from CSS Zen Garden exemplify info...</td>\n",
652
+ " <td>multi_hop_specific_query_synthesizer</td>\n",
653
+ " </tr>\n",
654
+ " </tbody>\n",
655
+ "</table>\n",
656
+ "</div>"
657
+ ],
658
+ "text/plain": [
659
+ " user_input \\\n",
660
+ "0 Can you tell me more about what makes this des... \n",
661
+ "1 What makes this design typography-focused? \n",
662
+ "2 How does the web design concept illustrated in... \n",
663
+ "3 What make this design vintage? \n",
664
+ "4 How does the vintage aesthetic influence the d... \n",
665
+ "5 What are the key visual characteristics that m... \n",
666
+ "6 What are the visual characteristics of designs... \n",
667
+ "7 What are the key visual characteristics of the... \n",
668
+ "8 How do the subtle background colors and subtle... \n",
669
+ "9 In what ways do the designs from CSS Zen Garde... \n",
670
+ "\n",
671
+ " reference_contexts \\\n",
672
+ "0 [{\"id\": \"135\", \"url\": \"https://www.csszengarde... \n",
673
+ "1 [{\"id\": \"132\", \"url\": \"https://www.csszengarde... \n",
674
+ "2 [{\"id\": \"104\", \"url\": \"https://www.csszengarde... \n",
675
+ "3 [{\"id\": \"103\", \"url\": \"https://www.csszengarde... \n",
676
+ "4 [{\"id\": \"168\", \"url\": \"https://www.csszengarde... \n",
677
+ "5 [<1-hop>\\n\\n{\"id\": \"201\", \"url\": \"https://www.... \n",
678
+ "6 [<1-hop>\\n\\n{\"id\": \"008\", \"url\": \"https://www.... \n",
679
+ "7 [<1-hop>\\n\\n{\"id\": \"151\", \"url\": \"https://www.... \n",
680
+ "8 [<1-hop>\\n\\n{\"id\": \"164\", \"url\": \"https://www.... \n",
681
+ "9 [<1-hop>\\n\\n{\"id\": \"109\", \"url\": \"https://www.... \n",
682
+ "\n",
683
+ " reference \\\n",
684
+ "0 This design is classic because it employs an e... \n",
685
+ "1 This design is typography-focused because it u... \n",
686
+ "2 The web design concept illustrated in the CSS ... \n",
687
+ "3 This design features a vintage theme through i... \n",
688
+ "4 The vintage aesthetic in the design is cleverl... \n",
689
+ "5 The design at CSS Zen Garden employs a strong ... \n",
690
+ "6 The designs that effectively utilize bold typo... \n",
691
+ "7 The key visual characteristics of the minimali... \n",
692
+ "8 The design featuring a minimalist layout with ... \n",
693
+ "9 The designs from CSS Zen Garden exemplify info... \n",
694
+ "\n",
695
+ " synthesizer_name \n",
696
+ "0 single_hop_specifc_query_synthesizer \n",
697
+ "1 single_hop_specifc_query_synthesizer \n",
698
+ "2 single_hop_specifc_query_synthesizer \n",
699
+ "3 single_hop_specifc_query_synthesizer \n",
700
+ "4 single_hop_specifc_query_synthesizer \n",
701
+ "5 multi_hop_specific_query_synthesizer \n",
702
+ "6 multi_hop_specific_query_synthesizer \n",
703
+ "7 multi_hop_specific_query_synthesizer \n",
704
+ "8 multi_hop_specific_query_synthesizer \n",
705
+ "9 multi_hop_specific_query_synthesizer "
706
+ ]
707
+ },
708
+ "execution_count": 22,
709
+ "metadata": {},
710
+ "output_type": "execute_result"
711
+ }
712
+ ],
713
+ "source": [
714
+ "generator = TestsetGenerator(\n",
715
+ " llm=generator_llm,\n",
716
+ " embedding_model=generator_embeddings\n",
717
+ ")\n",
718
+ "\n",
719
+ "testset = generator.generate_with_langchain_docs(\n",
720
+ " documents=documents,\n",
721
+ " testset_size=10,\n",
722
+ " query_distribution=query_distribution\n",
723
+ ")\n",
724
+ "\n",
725
+ "testset.to_pandas()\n",
726
+ "\n",
727
+ "\n"
728
+ ]
729
  }
730
  ],
731
  "metadata": {