Model Card for Search-TTA-Sound
Fine-tuned on laion/clap-htsat-fused
.
Citation
@inproceedings{tan2025searchtta,
title = {Search-TTA: A Multimodal Test-Time Adaptation Framework for Visual Search in the Wild},
author = {Derek Ming Siang Tan, Shailesh, Boyang Liu, Alok Raj, Qi Xuan Ang, Weiheng Dai, Tanishq Duhan, Jimmy Chiun, Yuhong Cao, Florian Shkurti, Guillaume Sartoretti},
booktitle = {Conference on Robot Learning},
year = {2025},
url = {https://arxiv.org/abs/2505.11350}
}
@misc{wu2024largescalecontrastivelanguageaudiopretraining,
title={Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation},
author={Yusong Wu and Ke Chen and Tianyu Zhang and Yuchen Hui and Marianna Nezhurina and Taylor Berg-Kirkpatrick and Shlomo Dubnov},
year={2024},
eprint={2211.06687},
archivePrefix={arXiv},
primaryClass={cs.SD},
url={https://arxiv.org/abs/2211.06687},
}
- Downloads last month
- 428
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support