Model Card for Search-TTA-Sound
Fine-tuned on laion/clap-htsat-fused
.
Citation
@article{tan2025searchtta,
author = {Derek Ming Siang Tan, Shailesh, Boyang Liu, Alok Raj, Qi Xuan Ang, Weiheng Dai, Tanishq Duhan, Jimmy Chiun, Yuhong Cao, Florian Shkurti, Guillaume Sartoretti},
title = {Search-TTA: A Multimodal Test-Time Adaptation Framework for Visual Search in the Wild},
journal = {Under Review},
year = {2025},
url = {https://arxiv.org/abs/2505.11350},
}
@misc{wu2024largescalecontrastivelanguageaudiopretraining,
title={Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation},
author={Yusong Wu and Ke Chen and Tianyu Zhang and Yuchen Hui and Marianna Nezhurina and Taylor Berg-Kirkpatrick and Shlomo Dubnov},
year={2024},
eprint={2211.06687},
archivePrefix={arXiv},
primaryClass={cs.SD},
url={https://arxiv.org/abs/2211.06687},
}
- Downloads last month
- 164
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support