creator
#1
by
SsyzeChen
- opened
- .gitattributes +1 -0
- HLLM_Creator/amazon_books_test/_SUCCESS +0 -0
- HLLM_Creator/amazon_books_test/part-00000-ac90a994-1f17-4795-9846-aed3a3eafef3-c000.snappy.parquet +3 -0
- HLLM_Creator/amazon_books_train/_SUCCESS +0 -0
- HLLM_Creator/amazon_books_train/part-00000-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
- HLLM_Creator/amazon_books_train/part-00001-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
- HLLM_Creator/amazon_books_train/part-00002-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
- HLLM_Creator/amazon_books_train/part-00003-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
- HLLM_Creator/amazon_books_train/part-00004-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
- HLLM_Creator/amazon_books_train/part-00005-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
- HLLM_Creator/amazon_books_train/part-00006-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
- HLLM_Creator/amazon_books_train/part-00007-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
- HLLM_Creator/fake_train_data/train.parquet +3 -0
- HLLM_Creator/pretrained_model/cluster_256.pt +3 -0
- HLLM_Creator/pretrained_model/cluster_64.pt +3 -0
- HLLM_Creator/pretrained_model/cluster_8.pt +3 -0
- HLLM_Creator/pretrained_model/config.json +3 -0
- HLLM_Creator/pretrained_model/generation_config.json +3 -0
- HLLM_Creator/pretrained_model/merges.txt +0 -0
- HLLM_Creator/pretrained_model/pytorch_model.bin +3 -0
- HLLM_Creator/pretrained_model/rank0_user_emb.pt +3 -0
- HLLM_Creator/pretrained_model/rank1_user_emb.pt +3 -0
- HLLM_Creator/pretrained_model/rank2_user_emb.pt +3 -0
- HLLM_Creator/pretrained_model/rank3_user_emb.pt +3 -0
- HLLM_Creator/pretrained_model/rank4_user_emb.pt +3 -0
- HLLM_Creator/pretrained_model/rank5_user_emb.pt +3 -0
- HLLM_Creator/pretrained_model/rank6_user_emb.pt +3 -0
- HLLM_Creator/pretrained_model/rank7_user_emb.pt +3 -0
- HLLM_Creator/pretrained_model/tokenizer.json +3 -0
- HLLM_Creator/pretrained_model/tokenizer_config.json +3 -0
- HLLM_Creator/pretrained_model/vocab.json +3 -0
- HLLM_Creator/pretrained_model/zero3_merge_states.pt +3 -0
- README.md +17 -3
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
35 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
36 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
35 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
36 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
37 |
+
HLLM_Creator/pretrained_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
HLLM_Creator/amazon_books_test/_SUCCESS
ADDED
File without changes
|
HLLM_Creator/amazon_books_test/part-00000-ac90a994-1f17-4795-9846-aed3a3eafef3-c000.snappy.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6da2a1db6dd73acffc7a4ea6684081aada888dbe4e0608d59990ca275dd937fd
|
3 |
+
size 3545446
|
HLLM_Creator/amazon_books_train/_SUCCESS
ADDED
File without changes
|
HLLM_Creator/amazon_books_train/part-00000-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbe1b7c8fda773b69b767b3596d0330096153ae16b3c439909af6620f13031bb
|
3 |
+
size 405487164
|
HLLM_Creator/amazon_books_train/part-00001-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b5f1f4d5cc3a2287e8e8b9566a437cd25b90c032b73f4ffa1b846da9cc18802
|
3 |
+
size 413227305
|
HLLM_Creator/amazon_books_train/part-00002-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fa4b9ede35b456922191645688750dbfd8b76ec7deb08388d95d37956b95de1
|
3 |
+
size 417266292
|
HLLM_Creator/amazon_books_train/part-00003-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13f30cb7c37bcf7cc355c5527578cca9b68e733bd623ab8e178e7a668c4ff537
|
3 |
+
size 410501828
|
HLLM_Creator/amazon_books_train/part-00004-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:184973c942622fd96d08b75dd7b1d8c156a7341e1570c3cd35efce68e9cf929f
|
3 |
+
size 415967964
|
HLLM_Creator/amazon_books_train/part-00005-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6b621d097e223d9c07915fc3ed38f67281ae36b50c893fa868ef5facb891b33
|
3 |
+
size 410535658
|
HLLM_Creator/amazon_books_train/part-00006-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64a4f5b208f44d5341a6d21a6e6b39cc44b7f6ba642c3fc280a13c3afd83c5da
|
3 |
+
size 409545410
|
HLLM_Creator/amazon_books_train/part-00007-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff1629e1a57e2afdc46790bfe4652c3d0d1eb437c8e110d9f0081282700998c4
|
3 |
+
size 412271816
|
HLLM_Creator/fake_train_data/train.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5acbc8fe4402aa636262b2977715074d268e3802e09438b0a93ccc12abc2dc65
|
3 |
+
size 18082
|
HLLM_Creator/pretrained_model/cluster_256.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0102f103326cf92cfbfd7388c648c53e2cfc5c6b211b41632da9524cb627d1f5
|
3 |
+
size 4195504
|
HLLM_Creator/pretrained_model/cluster_64.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34134d74428f1773911c5d2d7568298ee6ecc44495a0b251c31144bb0d2532c6
|
3 |
+
size 1049771
|
HLLM_Creator/pretrained_model/cluster_8.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7419073ae31eb8a30fc5df3a0bde882307c87e62a134f41ec308f5a173072102
|
3 |
+
size 132262
|
HLLM_Creator/pretrained_model/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7c4eadfbbf522470667b797a3c89be2524832d2d599797248dc304fff447c30
|
3 |
+
size 728
|
HLLM_Creator/pretrained_model/generation_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2325da0f15bb848e018c5ae071b7943332e9f871d6b60e2ed22ca97d4cb993d2
|
3 |
+
size 239
|
HLLM_Creator/pretrained_model/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
HLLM_Creator/pretrained_model/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:148a5c032b90e71b80ebcecf9828c0bd8294c9318dc46b2c2ae024197b1f8747
|
3 |
+
size 32763074726
|
HLLM_Creator/pretrained_model/rank0_user_emb.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e143d13b923018cc5b4ad18a5edfda7c159d1b5da1bd67824951be79a4b9ad16
|
3 |
+
size 134218943
|
HLLM_Creator/pretrained_model/rank1_user_emb.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d94f5b46004427f4f09fe62310afc84da74ad6e225f8d29382e8327775dcfe9
|
3 |
+
size 134218943
|
HLLM_Creator/pretrained_model/rank2_user_emb.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b236a7fd7e8ca8dafa95a69be64c415a162a87117b916989562410f8fa5b44f
|
3 |
+
size 134218943
|
HLLM_Creator/pretrained_model/rank3_user_emb.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc2c4370f30ac08995f219c3c513635cef93b9972d7505143b4d878f7fdd95cb
|
3 |
+
size 134218943
|
HLLM_Creator/pretrained_model/rank4_user_emb.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0b9d438fb9ed8883e466adf82e28ba86e8976c9b972996b27e556b1e0906b0f
|
3 |
+
size 134218943
|
HLLM_Creator/pretrained_model/rank5_user_emb.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef6cca73a641068cec0f351027f2d3f6553cc7621e5c2f4fc6f3981835a3e19e
|
3 |
+
size 134218943
|
HLLM_Creator/pretrained_model/rank6_user_emb.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0669a784b5829b8051d85d2bd8ec59f13fd741e298e49caedaa72166cdddc0a2
|
3 |
+
size 134218943
|
HLLM_Creator/pretrained_model/rank7_user_emb.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc1d48a2bce97be3184d0cf1133f6c31ff79c3ad36432cd7b7311b348f45fd55
|
3 |
+
size 134218943
|
HLLM_Creator/pretrained_model/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
3 |
+
size 11422654
|
HLLM_Creator/pretrained_model/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a7303fcb1a27ede63134a2cbd61d5282c247ca6d769ce4746d4ffa124aedd63
|
3 |
+
size 9675
|
HLLM_Creator/pretrained_model/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
|
3 |
+
size 2776833
|
HLLM_Creator/pretrained_model/zero3_merge_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2729c4faf9aa15dffd20539efb63f5da532888a1f4bf2b72045a72aa41b92fb
|
3 |
+
size 41916132736
|
README.md
CHANGED
@@ -3,14 +3,16 @@ license: apache-2.0
|
|
3 |
base_model:
|
4 |
- TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
5 |
- baichuan-inc/Baichuan2-7B-Base
|
|
|
6 |
---
|
7 |
|
8 |
# Model Card for HLLM
|
9 |
|
10 |
-
[](https://github.com/bytedance/HLLM)
|
12 |
|
13 |
-
This repo is used for hosting HLLM
|
14 |
|
15 |
For more details or tutorials see https://github.com/bytedance/HLLM.
|
16 |
|
@@ -19,7 +21,12 @@ For more details or tutorials see https://github.com/bytedance/HLLM.
|
|
19 |
- HLLM effectively transfers the world knowledge encoded during the LLM pre-training stage into the recommendation model, encompassing both item feature extraction and user interest modeling. Nevertheless, task-specific fine-tuning with recommendation objectives is essential.
|
20 |
- HLLM exhibits excellent scalability, with performance continuously improving as the data volume and model parameters increase. This scalability highlights the potential of the proposed approach when applied to even larger datasets and model sizes.
|
21 |
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
| Method | Dataset | Negatives | R@10 | R@50 | R@200 | N@10 | N@50 | N@200 |
|
25 |
| ------------- | ------- |---------- | ---------- | --------- |---------- | --------- | --------- | --------- |
|
@@ -42,4 +49,11 @@ author={Junyi Chen and Lu Chi and Bingyue Peng and Zehuan Yuan},
|
|
42 |
journal={arXiv preprint arXiv:2409.12740},
|
43 |
year={2024}
|
44 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
```
|
|
|
3 |
base_model:
|
4 |
- TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
5 |
- baichuan-inc/Baichuan2-7B-Base
|
6 |
+
- Qwen/Qwen3-8B
|
7 |
---
|
8 |
|
9 |
# Model Card for HLLM
|
10 |
|
11 |
+
[](https://arxiv.org/abs/2409.12740)
|
12 |
+
[](https://arxiv.org/abs/2508.18118)
|
13 |
[](https://github.com/bytedance/HLLM)
|
14 |
|
15 |
+
This repo is used for hosting HLLM and HLLM-Creator checkpoints.
|
16 |
|
17 |
For more details or tutorials see https://github.com/bytedance/HLLM.
|
18 |
|
|
|
21 |
- HLLM effectively transfers the world knowledge encoded during the LLM pre-training stage into the recommendation model, encompassing both item feature extraction and user interest modeling. Nevertheless, task-specific fine-tuning with recommendation objectives is essential.
|
22 |
- HLLM exhibits excellent scalability, with performance continuously improving as the data volume and model parameters increase. This scalability highlights the potential of the proposed approach when applied to even larger datasets and model sizes.
|
23 |
|
24 |
+
HLLM-Creator is designed for personalized creative generation:
|
25 |
+
- HLLM-Creator enables precise user interest modeling and fine-grained content personalization.
|
26 |
+
- A Chain-of-Thought-based data construction pipeline is developed to expand personalization space and ensure factual consistency, effectively reducing hallucinations in generated titles.
|
27 |
+
- A flexible and efficient inference scheme is developed for large-scale industrial deployment, with significant positive results in Douyin search advertising demonstrating its real-world impact.
|
28 |
+
|
29 |
+
## Comparison with state-of-the-art methods (HLLM)
|
30 |
|
31 |
| Method | Dataset | Negatives | R@10 | R@50 | R@200 | N@10 | N@50 | N@200 |
|
32 |
| ------------- | ------- |---------- | ---------- | --------- |---------- | --------- | --------- | --------- |
|
|
|
49 |
journal={arXiv preprint arXiv:2409.12740},
|
50 |
year={2024}
|
51 |
}
|
52 |
+
|
53 |
+
@article{HLLM-Creator,
|
54 |
+
title={HLLM-Creator: Hierarchical LLM-based Personalized Creative Generation},
|
55 |
+
author={Junyi Chen and Lu Chi and Siliang Xu and Shiwei Ran and Bingyue Peng and Zehuan Yuan},
|
56 |
+
journal={arXiv preprint arXiv:2508.18118},
|
57 |
+
year={2025}
|
58 |
+
}
|
59 |
```
|