@inproceedings{af386c17386547a39a8eb0edaf98446f,
title = "DETR with Additional Object Instance-Specific Features for Encoder",
abstract = "This paper focuses on the process of developing from a convolutional neural network (CNN)-based target detection method to a transformer-based DETR paradigm-based target detection method. DETR adopts a Transformer-based end-to-end detection method, and it does not use the traditional anchor box and non-maximum suppression by transforming target detection into a set prediction problem. DETR has shown competitive results on public datasets and brought new ideas and methods to the field of object detection. We observed that DETR and DETE-like models include backbone and encoder that have same effect on the image, that is, they both did the same feature extraction function. We propose to add additional embedding module, which represents the full class information, and establishes global attention between feature tokens to provide prior knowledge for the extractor.",
keywords = "Deep Learning, DETR, Object Detection, Transformer",
author = "Yao Wang and Ha, \{Jong Eun\}",
note = "Publisher Copyright: {\textcopyright} 2023 ICROS.; 23rd International Conference on Control, Automation and Systems, ICCAS 2023 ; Conference date: 17-10-2023 Through 20-10-2023",
year = "2023",
doi = "10.23919/ICCAS59377.2023.10316851",
language = "English",
series = "International Conference on Control, Automation and Systems",
publisher = "IEEE Computer Society",
pages = "238--240",
booktitle = "23rd International Conference on Control, Automation and Systems, ICCAS 2023",
}