creator

by SsyzeChen - opened 21 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+151490

-3

Files changed (33) hide show

.gitattributes +1 -0
HLLM_Creator/amazon_books_test/_SUCCESS +0 -0
HLLM_Creator/amazon_books_test/part-00000-ac90a994-1f17-4795-9846-aed3a3eafef3-c000.snappy.parquet +3 -0
HLLM_Creator/amazon_books_train/_SUCCESS +0 -0
HLLM_Creator/amazon_books_train/part-00000-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
HLLM_Creator/amazon_books_train/part-00001-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
HLLM_Creator/amazon_books_train/part-00002-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
HLLM_Creator/amazon_books_train/part-00003-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
HLLM_Creator/amazon_books_train/part-00004-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
HLLM_Creator/amazon_books_train/part-00005-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
HLLM_Creator/amazon_books_train/part-00006-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
HLLM_Creator/amazon_books_train/part-00007-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet +3 -0
HLLM_Creator/fake_train_data/train.parquet +3 -0
HLLM_Creator/pretrained_model/cluster_256.pt +3 -0
HLLM_Creator/pretrained_model/cluster_64.pt +3 -0
HLLM_Creator/pretrained_model/cluster_8.pt +3 -0
HLLM_Creator/pretrained_model/config.json +3 -0
HLLM_Creator/pretrained_model/generation_config.json +3 -0
HLLM_Creator/pretrained_model/merges.txt +0 -0
HLLM_Creator/pretrained_model/pytorch_model.bin +3 -0
HLLM_Creator/pretrained_model/rank0_user_emb.pt +3 -0
HLLM_Creator/pretrained_model/rank1_user_emb.pt +3 -0
HLLM_Creator/pretrained_model/rank2_user_emb.pt +3 -0
HLLM_Creator/pretrained_model/rank3_user_emb.pt +3 -0
HLLM_Creator/pretrained_model/rank4_user_emb.pt +3 -0
HLLM_Creator/pretrained_model/rank5_user_emb.pt +3 -0
HLLM_Creator/pretrained_model/rank6_user_emb.pt +3 -0
HLLM_Creator/pretrained_model/rank7_user_emb.pt +3 -0
HLLM_Creator/pretrained_model/tokenizer.json +3 -0
HLLM_Creator/pretrained_model/tokenizer_config.json +3 -0
HLLM_Creator/pretrained_model/vocab.json +3 -0
HLLM_Creator/pretrained_model/zero3_merge_states.pt +3 -0
README.md +17 -3

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.csv filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.csv filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+HLLM_Creator/pretrained_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text

HLLM_Creator/amazon_books_test/_SUCCESS ADDED Viewed

File without changes

HLLM_Creator/amazon_books_test/part-00000-ac90a994-1f17-4795-9846-aed3a3eafef3-c000.snappy.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6da2a1db6dd73acffc7a4ea6684081aada888dbe4e0608d59990ca275dd937fd
+size 3545446

HLLM_Creator/amazon_books_train/_SUCCESS ADDED Viewed

File without changes

HLLM_Creator/amazon_books_train/part-00000-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbe1b7c8fda773b69b767b3596d0330096153ae16b3c439909af6620f13031bb
+size 405487164

HLLM_Creator/amazon_books_train/part-00001-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b5f1f4d5cc3a2287e8e8b9566a437cd25b90c032b73f4ffa1b846da9cc18802
+size 413227305

HLLM_Creator/amazon_books_train/part-00002-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5fa4b9ede35b456922191645688750dbfd8b76ec7deb08388d95d37956b95de1
+size 417266292

HLLM_Creator/amazon_books_train/part-00003-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13f30cb7c37bcf7cc355c5527578cca9b68e733bd623ab8e178e7a668c4ff537
+size 410501828

HLLM_Creator/amazon_books_train/part-00004-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:184973c942622fd96d08b75dd7b1d8c156a7341e1570c3cd35efce68e9cf929f
+size 415967964

HLLM_Creator/amazon_books_train/part-00005-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6b621d097e223d9c07915fc3ed38f67281ae36b50c893fa868ef5facb891b33
+size 410535658

HLLM_Creator/amazon_books_train/part-00006-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64a4f5b208f44d5341a6d21a6e6b39cc44b7f6ba642c3fc280a13c3afd83c5da
+size 409545410

HLLM_Creator/amazon_books_train/part-00007-c5aab1bd-7021-4244-8127-d140986254d0-c000.snappy.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff1629e1a57e2afdc46790bfe4652c3d0d1eb437c8e110d9f0081282700998c4
+size 412271816

HLLM_Creator/fake_train_data/train.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5acbc8fe4402aa636262b2977715074d268e3802e09438b0a93ccc12abc2dc65
+size 18082

HLLM_Creator/pretrained_model/cluster_256.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0102f103326cf92cfbfd7388c648c53e2cfc5c6b211b41632da9524cb627d1f5
+size 4195504

HLLM_Creator/pretrained_model/cluster_64.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34134d74428f1773911c5d2d7568298ee6ecc44495a0b251c31144bb0d2532c6
+size 1049771

HLLM_Creator/pretrained_model/cluster_8.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7419073ae31eb8a30fc5df3a0bde882307c87e62a134f41ec308f5a173072102
+size 132262

HLLM_Creator/pretrained_model/config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7c4eadfbbf522470667b797a3c89be2524832d2d599797248dc304fff447c30
+size 728

HLLM_Creator/pretrained_model/generation_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2325da0f15bb848e018c5ae071b7943332e9f871d6b60e2ed22ca97d4cb993d2
+size 239

HLLM_Creator/pretrained_model/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

HLLM_Creator/pretrained_model/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:148a5c032b90e71b80ebcecf9828c0bd8294c9318dc46b2c2ae024197b1f8747
+size 32763074726

HLLM_Creator/pretrained_model/rank0_user_emb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e143d13b923018cc5b4ad18a5edfda7c159d1b5da1bd67824951be79a4b9ad16
+size 134218943

HLLM_Creator/pretrained_model/rank1_user_emb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d94f5b46004427f4f09fe62310afc84da74ad6e225f8d29382e8327775dcfe9
+size 134218943

HLLM_Creator/pretrained_model/rank2_user_emb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b236a7fd7e8ca8dafa95a69be64c415a162a87117b916989562410f8fa5b44f
+size 134218943

HLLM_Creator/pretrained_model/rank3_user_emb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc2c4370f30ac08995f219c3c513635cef93b9972d7505143b4d878f7fdd95cb
+size 134218943

HLLM_Creator/pretrained_model/rank4_user_emb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0b9d438fb9ed8883e466adf82e28ba86e8976c9b972996b27e556b1e0906b0f
+size 134218943

HLLM_Creator/pretrained_model/rank5_user_emb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef6cca73a641068cec0f351027f2d3f6553cc7621e5c2f4fc6f3981835a3e19e
+size 134218943

HLLM_Creator/pretrained_model/rank6_user_emb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0669a784b5829b8051d85d2bd8ec59f13fd741e298e49caedaa72166cdddc0a2
+size 134218943

HLLM_Creator/pretrained_model/rank7_user_emb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc1d48a2bce97be3184d0cf1133f6c31ff79c3ad36432cd7b7311b348f45fd55
+size 134218943

HLLM_Creator/pretrained_model/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

HLLM_Creator/pretrained_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a7303fcb1a27ede63134a2cbd61d5282c247ca6d769ce4746d4ffa124aedd63
+size 9675

HLLM_Creator/pretrained_model/vocab.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
+size 2776833

HLLM_Creator/pretrained_model/zero3_merge_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2729c4faf9aa15dffd20539efb63f5da532888a1f4bf2b72045a72aa41b92fb
+size 41916132736

README.md CHANGED Viewed

@@ -3,14 +3,16 @@ license: apache-2.0
 base_model:
 - TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 - baichuan-inc/Baichuan2-7B-Base
 ---
 # Model Card for HLLM
-[![arXiv](https://img.shields.io/badge/arXiv%20paper-2409.12740-da282a.svg)](https://arxiv.org/abs/2409.12740)
 [![GitHub](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)](https://github.com/bytedance/HLLM)
-This repo is used for hosting HLLM's checkpoints.
 For more details or tutorials see https://github.com/bytedance/HLLM.
@@ -19,7 +21,12 @@ For more details or tutorials see https://github.com/bytedance/HLLM.
 - HLLM effectively transfers the world knowledge encoded during the LLM pre-training stage into the recommendation model, encompassing both item feature extraction and user interest modeling. Nevertheless, task-specific fine-tuning with recommendation objectives is essential.
 - HLLM exhibits excellent scalability, with performance continuously improving as the data volume and model parameters increase. This scalability highlights the potential of the proposed approach when applied to even larger datasets and model sizes.
-## Comparison with state-of-the-art methods
 | Method        | Dataset | Negatives | R@10       | R@50      | R@200     | N@10      | N@50      | N@200     |
 | ------------- | ------- |---------- | ---------- | --------- |---------- | --------- | --------- | --------- |
@@ -42,4 +49,11 @@ author={Junyi Chen and Lu Chi and Bingyue Peng and Zehuan Yuan},
 journal={arXiv preprint arXiv:2409.12740},
 year={2024}
 }
 ```

 base_model:
 - TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 - baichuan-inc/Baichuan2-7B-Base
+- Qwen/Qwen3-8B
 ---
 # Model Card for HLLM
+[![HLLM](https://img.shields.io/badge/HLLM%20paper-2409.12740-da282a.svg)](https://arxiv.org/abs/2409.12740)
+[![HLLM_Creator](https://img.shields.io/badge/HLLM--Creator%20-2508.18118-da282a.svg)](https://arxiv.org/abs/2508.18118)
 [![GitHub](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)](https://github.com/bytedance/HLLM)
+This repo is used for hosting HLLM and HLLM-Creator checkpoints.
 For more details or tutorials see https://github.com/bytedance/HLLM.
 - HLLM effectively transfers the world knowledge encoded during the LLM pre-training stage into the recommendation model, encompassing both item feature extraction and user interest modeling. Nevertheless, task-specific fine-tuning with recommendation objectives is essential.
 - HLLM exhibits excellent scalability, with performance continuously improving as the data volume and model parameters increase. This scalability highlights the potential of the proposed approach when applied to even larger datasets and model sizes.
+HLLM-Creator is designed for personalized creative generation:
+- HLLM-Creator enables precise user interest modeling and fine-grained content personalization.
+- A Chain-of-Thought-based data construction pipeline is developed to expand personalization space and ensure factual consistency, effectively reducing hallucinations in generated titles.
+- A flexible and efficient inference scheme is developed for large-scale industrial deployment, with significant positive results in Douyin search advertising demonstrating its real-world impact.
+## Comparison with state-of-the-art methods (HLLM)
 | Method        | Dataset | Negatives | R@10       | R@50      | R@200     | N@10      | N@50      | N@200     |
 | ------------- | ------- |---------- | ---------- | --------- |---------- | --------- | --------- | --------- |
 journal={arXiv preprint arXiv:2409.12740},
 year={2024}
 }
+@article{HLLM-Creator,
+title={HLLM-Creator: Hierarchical LLM-based Personalized Creative Generation},
+author={Junyi Chen and Lu Chi and Siliang Xu and Shiwei Ran and Bingyue Peng and Zehuan Yuan},
+journal={arXiv preprint arXiv:2508.18118},
+year={2025}
+}
 ```