Spaces:
Sleeping
Sleeping
| *.7z filter=lfs diff=lfs merge=lfs -text | |
| *.arrow filter=lfs diff=lfs merge=lfs -text | |
| *.bin filter=lfs diff=lfs merge=lfs -text | |
| *.bz2 filter=lfs diff=lfs merge=lfs -text | |
| *.ckpt filter=lfs diff=lfs merge=lfs -text | |
| *.ftz filter=lfs diff=lfs merge=lfs -text | |
| *.gz filter=lfs diff=lfs merge=lfs -text | |
| *.h5 filter=lfs diff=lfs merge=lfs -text | |
| *.joblib filter=lfs diff=lfs merge=lfs -text | |
| *.lfs.* filter=lfs diff=lfs merge=lfs -text | |
| *.mlmodel filter=lfs diff=lfs merge=lfs -text | |
| *.model filter=lfs diff=lfs merge=lfs -text | |
| *.msgpack filter=lfs diff=lfs merge=lfs -text | |
| *.npy filter=lfs diff=lfs merge=lfs -text | |
| *.npz filter=lfs diff=lfs merge=lfs -text | |
| *.onnx filter=lfs diff=lfs merge=lfs -text | |
| *.ot filter=lfs diff=lfs merge=lfs -text | |
| *.parquet filter=lfs diff=lfs merge=lfs -text | |
| *.pb filter=lfs diff=lfs merge=lfs -text | |
| *.pickle filter=lfs diff=lfs merge=lfs -text | |
| *.pkl filter=lfs diff=lfs merge=lfs -text | |
| *.pt filter=lfs diff=lfs merge=lfs -text | |
| *.pth filter=lfs diff=lfs merge=lfs -text | |
| *.rar filter=lfs diff=lfs merge=lfs -text | |
| *.safetensors filter=lfs diff=lfs merge=lfs -text | |
| saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| *.tar.* filter=lfs diff=lfs merge=lfs -text | |
| *.tar filter=lfs diff=lfs merge=lfs -text | |
| *.tflite filter=lfs diff=lfs merge=lfs -text | |
| *.tgz filter=lfs diff=lfs merge=lfs -text | |
| *.wasm filter=lfs diff=lfs merge=lfs -text | |
| *.xz filter=lfs diff=lfs merge=lfs -text | |
| *.zip filter=lfs diff=lfs merge=lfs -text | |
| *.zst filter=lfs diff=lfs merge=lfs -text | |
| *tfevents* filter=lfs diff=lfs merge=lfs -text | |
| images/llm360_logo.png filter=lfs diff=lfs merge=lfs -text | |
| data/dataset_inclusion.csv filter=lfs diff=lfs merge=lfs -text | |
| data/cluster_dist.json filter=lfs diff=lfs merge=lfs -text | |
| data/lorem_ipsum.json filter=lfs diff=lfs merge=lfs -text | |
| data/sample_non_en.json filter=lfs diff=lfs merge=lfs -text | |
| data/sample_url_exclusion.json filter=lfs diff=lfs merge=lfs -text | |
| data/web_filter_pipeline.json filter=lfs diff=lfs merge=lfs -text | |
| data/repeat_line_frac.jsonl filter=lfs diff=lfs merge=lfs -text | |
| data/sample_doc_stat.json filter=lfs diff=lfs merge=lfs -text | |
| data/sample_dup_ngram.json filter=lfs diff=lfs merge=lfs -text | |
| data/sample_terminal_punc.json filter=lfs diff=lfs merge=lfs -text | |
| data/sample_warc.json filter=lfs diff=lfs merge=lfs -text | |
| data/all_signals.json filter=lfs diff=lfs merge=lfs -text | |
| data/sample_top_ngram.json filter=lfs diff=lfs merge=lfs -text | |
| data/curated_samples filter=lfs diff=lfs merge=lfs -text | |
| data/dataset_details.csv filter=lfs diff=lfs merge=lfs -text | |
| data/meta_non_web.py filter=lfs diff=lfs merge=lfs -text | |
| data/mbzuai-llm-us-east-1[[:space:]]-[[:space:]]S3[[:space:]]bucket[[:space:]]_[[:space:]]S3[[:space:]]_[[:space:]]us-east-1.mhtml filter=lfs diff=lfs merge=lfs -text | |
| data/non_web_urls.py filter=lfs diff=lfs merge=lfs -text | |
| data/sample_wet.json filter=lfs diff=lfs merge=lfs -text | |
| data/url_blocklist.py filter=lfs diff=lfs merge=lfs -text | |
| data/web_pipeline_comparison.csv filter=lfs diff=lfs merge=lfs -text | |
| data/bad_url_doc.jsonl filter=lfs diff=lfs merge=lfs -text | |
| data/sample.py filter=lfs diff=lfs merge=lfs -text | |
| data/sample_bad_urls.py filter=lfs diff=lfs merge=lfs -text | |
| data/sample_en_low.json filter=lfs diff=lfs merge=lfs -text | |
| data/sample_java.jsonl filter=lfs diff=lfs merge=lfs -text | |
| data/toxic_lines.json filter=lfs diff=lfs merge=lfs -text | |
| data/dataset_inclusion_size.csv filter=lfs diff=lfs merge=lfs -text | |
| data/line_info.json filter=lfs diff=lfs merge=lfs -text | |
| data/sample_refinedweb_line.json filter=lfs diff=lfs merge=lfs -text | |