@inproceedings{a804e34fc4414c388c250ff4c8ec595d,
title = "Improving Vision Transformer with Multi-Task Training",
abstract = "Self-supervised learning methods have shown excellent performance in improving the performance of existing networks by learning visual representations from large amounts of unlabeled data. In this paper, we propose a end-to-end multi-task self-supervision method for vision transformer. The network is given two task: inpainting, position prediction. Given a masked image, the network predicts the missing pixel information and also predicts the position of the given puzzle patches. Through classification experiment, we demonstrate that the proposed method improves performance of the network compared to the direct supervised learning method.",
keywords = "Deep Learning, Self-Supervision, Vision Transformer",
author = "Ahn, \{Woo Jin\} and Yang, \{Geun Yeong\} and Choi, \{Hyun Duck\} and Lim, \{Myo Taeg\} and Kang, \{Tae Koo\}",
note = "Publisher Copyright: {\textcopyright} 2022 ICROS.; 22nd International Conference on Control, Automation and Systems, ICCAS 2022 ; Conference date: 27-11-2022 Through 01-12-2022",
year = "2022",
doi = "10.23919/ICCAS55662.2022.10003833",
language = "English",
series = "International Conference on Control, Automation and Systems",
publisher = "IEEE Computer Society",
pages = "1963--1965",
booktitle = "2022 22nd International Conference on Control, Automation and Systems, ICCAS 2022",
}