geekyrakshit
commited on
add: BM25s index
Browse files- corpus.jsonl +0 -0
- corpus.mmindex.json +1 -0
- data.csc.index.npy +3 -0
- indices.csc.index.npy +3 -0
- indptr.csc.index.npy +3 -0
- params.index.json +12 -0
- vocab.index.json +1 -0
corpus.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
corpus.mmindex.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[0,1233,2176,3549,3883,4785,5355,6519,7808,9133,10021,11038,11310,12543,13486,14859,15193,16095,16665,17829,19118,20443,21331,22348,22620,23853,24796,26169,26503,27405,27975,29139,30428,31753,32641,33658,33930,35163,36106,37479,37813,38715,39285,40449,41738,43063,43951,44968,45240,46473,47416,48789,49123,50025,50595,51759,53048,54373,55261,56278,56550,57783,58726,60099,60433,61335,61905,63069,64358,65683,66571,67588,67860,69093,70036,71409,71743,72645,73215,74379,75668,76993,77881,78898,79170,80403,81346,82719,83053,83955,84525,85689,86978,88303,89191,90208,90480,91713,92656,94029,94363,95265,95835,96999,98288,99613,100501,101518,101790,103023,103966,105339,105673,106575,107145,108309,109598,110923,111811,112828]
|
data.csc.index.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27b28af1e68c3aa13e2de9d43be1149295084d6a2ee70c05166e1991a1146d18
|
3 |
+
size 22688
|
indices.csc.index.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45b10bb24284766880fcfd676924089937168138730da3be63dda0a9e82b7e68
|
3 |
+
size 22688
|
indptr.csc.index.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c42433a1b039bc6b017e1a97d3fe4bb0a454b1d09f183fc5b50db4402127f36f
|
3 |
+
size 1452
|
params.index.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"k1": 1.5,
|
3 |
+
"b": 0.75,
|
4 |
+
"delta": 0.5,
|
5 |
+
"method": "lucene",
|
6 |
+
"idf_method": "lucene",
|
7 |
+
"dtype": "float32",
|
8 |
+
"int_dtype": "int32",
|
9 |
+
"num_docs": 120,
|
10 |
+
"version": "0.2.3",
|
11 |
+
"backend": "numpy"
|
12 |
+
}
|
vocab.index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"larger":0,"million":1,"µm":2,"thus":3,"mani":4,"coeffici":5,"use":6,"lar":7,"coat":8,"sediment":9,"differ":10,"occupi":11,"20":12,"form":13,"recognit":14,"abund":15,"import":16,"from":17,"histocompat":18,"compon":19,"cytoskeleton":20,"glycosyl":21,"nm":22,"larg":23,"high":24,"materi":25,"indic":26,"close":27,"glycogen":28,"phosphatas":29,"cisterna":30,"intern":31,"cholesterol":32,"which":33,"membran":34,"antigen":35,"stud":36,"up":37,"two":38,"number":39,"inclus":40,"machin":41,"separ":42,"newli":43,"enzym":44,"lipid":45,"exterior":46,"perinuclear":47,"conjunct":48,"messeng":49,"case":50,"can":51,"distinct":52,"where":53,"2013":54,"point":55,"cooper":56,"glycoprotein":57,"glycolipid":58,"produc":59,"rna":60,"intestin":61,"cytoplasm":62,"all":63,"blood":64,"role":65,"apparatus":66,"bud":67,"40s":68,"cytomembran":69,"some":70,"more":71,"cylindr":72,"assembl":73,"granu":74,"glycocalyx":75,"vesicl":76,"develop":77,"signific":78,"8s":79,"tubul":80,"accord":81,"catalys":82,"direct":83,"secret":84,"unequ":85,"lysosom":86,"strand":87,"pigment":88,"subunit":89,"40":90,"take":91,"therefor":92,"receptor":93,"hormon":94,"protein":95,"particular":96,"concern":97,"diffus":98,"interconnect":99,"found":100,"begin":101,"structur":102,"free":103,"cytochrom":104,"granul":105,"smaller":106,"near":107,"polysom":108,"golgi":109,"brush":110,"step":111,"made":112,"pass":113,"6phosphat":114,"25":115,"size":116,"sarcoplasm":117,"locat":118,"specif":119,"one":120,"product":121,"each":122,"have":123,"passag":124,"function":125,"fuzzi":126,"activ":127,"via":128,"carbohydraterich":129,"epithelium":130,"channel":131,"after":132,"surfac":133,"system":134,"outsid":135,"al":136,"line":137,"insert":138,"intramembran":139,"storag":140,"5s":141,"type":142,"outer":143,"diamet":144,"store":145,"onli":146,"muscl":147,"filament":148,"ribosom":149,"mrna":150,"most":151,"reticulum":152,"fig":153,"small":154,"transloc":155,"73":156,"border":157,"calcium":158,"see":159,"macromolecular":160,"consequ":161,"be":162,"gluconeogenesi":163,"intermedi":164,"site":165,"when":166,"group":167,"tissu":168,"portion":169,"select":170,"compos":171,"consist":172,"process":173,"make":174,"ribonucl":175,"composit":176,"possess":177,"sequenc":178,"signal":179,"translat":180,"substrat":181,"also":182,"steroid":183,"smooth":184,"togeth":185,"fold":186,"flatten":187,"transplant":188,"place":189,"microvilli":190,"cytoskelet":191,"ser":192,"28s":193,"like":194,"convolut":195,"part":196,"33":197,"read":198,"organ":199,"nuclear":200,"phospholipid":201,"polyribosom":202,"associ":203,"content":204,"peroxisom":205,"incorpor":206,"involv":207,"synthesi":208,"becom":209,"off":210,"those":211,"sac":212,"carbohydr":213,"precis":214,"new":215,"matur":216,"major":217,"endoplasm":218,"bound":219,"mitochondria":220,"cytosol":221,"play":222,"apic":223,"droplet":224,"glucos":225,"lack":226,"length":227,"enterocyt":228,"nucleoplasm":229,"organell":230,"serv":231,"motil":232,"within":233,"agranular":234,"mechan":235,"ad":236,"bravo":237,"transport":238,"great":239,"divid":240,"bind":241,"contract":242,"18s":243,"typic":244,"envelop":245,"gel":246,"caviti":247,"rough":248,"variant":249,"digest":250,"through":251,"enzymat":252,"contain":253,"includ":254,"appropri":255,"act":256,"extern":257,"molecul":258,"nucleotid":259,"well":260,"pack":261,"it":262,"has":263,"compart":264,"membranebound":265,"5e":266,"lipoprotein":267,"synthes":268,"general":269,"60s":270,"henc":271,"absorpt":272,"erythrocyt":273,"metabol":274,"rer":275,"other":276,"respons":277,"increas":278,"access":279,"although":280,"compat":281,"hepatocyt":282,"cell":283,"about":284,"convert":285,"various":286,"integr":287,"detoxif":288,"complex":289,"dock":290,"call":291,"colloid":292,"individu":293,"further":294,"along":295,"enclos":296,"long":297,"may":298,"target":299,"regul":300,"nucleolus":301,"lipophil":302,"ligand":303,"continu":304,"dure":305,"move":306,"releas":307,"amino":308,"accumul":309,"remain":310,"membranelin":311,"immun":312,"attach":313,"layer":314,"rrna":315,"three":316,"embed":317,"project":318,"intracellular":319,"approxim":320,"cistern":321,"secretori":322,"plasma":323,"p450":324,"et":325,"space":326,"ultracentrifug":327,"acid":328,"vari":329}
|