@inproceedings{0bcb0482671a432da81437c2b07e28c0,
title = "ActionNet-VE Dataset: A Dataset for Describing Visual Events by Extending VIRAT Ground 2.0",
abstract = "This paper introduces a dataset for recognizing and describing interactive events between objects of interest including persons, cars, bikes, and carried objects. Although there have been many video datasets for human activity recognition, most of them focus on persons and their actions and sometimes ignore the specific information on related objects, such as their object type and minimum bounding boxes, in annotations. ActionNet-VE dataset was designed to include full annotations on all objects and events of interest occurred in a video clip for describing the semantics of the event. The dataset adopt 75 video clips from VIRAT Ground 2.0, and extend annotations on the events and their related objects. In addition, the dataset describes semantics of each events by using elements of sentences, such as verb, subject, and objects.",
keywords = "and VIRAT, interactive events, Video dataset, video interpretation, visual events",
author = "Jinyoung Moon and Yongjin Kwon and Kyuchang Kang and Jongyoul Park",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; 8th International Conference on Signal Processing, Image Processing and Pattern Recognition, SIP 2015 ; Conference date: 25-11-2015 Through 28-11-2015",
year = "2016",
month = mar,
day = "11",
doi = "10.1109/SIP.2015.9",
language = "English",
series = "Proceedings - 8th International Conference on Signal Processing, Image Processing and Pattern Recognition, SIP 2015",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1--4",
editor = "Byeong-Ho Kang and Carlos Ramos",
booktitle = "Proceedings - 8th International Conference on Signal Processing, Image Processing and Pattern Recognition, SIP 2015",
}