TOSSS: A CVE-based Software Security Benchmark for Large Language Models
Published in Under Submission, 2026
Submitted to a conference. TOSSS is a CVE-based software security benchmark designed to evaluate the capabilities of large language models in identifying and reasoning about software vulnerabilities.
@article{damie2026tosss,
author = {Marc Damie and Murat Bilgehan Ertan and Domenico Essoussi and Angela Makhanu and Ga\"{e}tan Peter and Roos Wensveen},
title = {{TOSSS}: a {CVE}-based Software Security Benchmark for Large Language Models},
journal = {CoRR},
volume = {abs/2603.10969},
year = {2026},
url = {https://doi.org/10.48550/arXiv.2603.10969},
doi = {10.48550/ARXIV.2603.10969},
eprinttype = {arXiv},
eprint = {2603.10969}
}
