@inproceedings{4734b12ea37e4413aba8b7f43cce891a,
title = "A zoom-in analysis of I/O logs to detect root causes of I/O performance bottlenecks",
abstract = "Scientific applications frequently spend a large fraction of their execution time in reading and writing data on parallel file systems. Identifying these I/O performance bottlenecks and attributing root causes are critical steps toward devising optimization strategies. Several existing studies analyze I/O logs of a set of benchmarks or applications that were run with controlled behaviors. However, there is still a lack of general approach that systematically identifies I/O performance bottlenecks for applications running 'in the wild' on production systems. In this study, we have developed an analysis approach of 'zooming in' from platform-wide to application-wide to job-level I/O logs for identifying I/O bottlenecks in arbitrary scientific applications. We analyze the logs collected on a Cray XC40 system in production over a two-month period. This study results in several insights for application developers to use in optimizing I/O behavior.",
keywords = "Darshan, IO Analysis, IO Trace, Lustre Monitoring Tools, Slurm",
author = "Teng Wang and Suren Byna and Lockwood, \{Glenn K.\} and Shane Snyder and Philip Carns and Sunggon Kim and Wright, \{Nicholas J.\}",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 19th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing, CCGrid 2019 ; Conference date: 14-05-2019 Through 17-05-2019",
year = "2019",
month = may,
doi = "10.1109/CCGRID.2019.00021",
language = "English",
series = "Proceedings - 19th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing, CCGrid 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "102--111",
booktitle = "Proceedings - 19th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing, CCGrid 2019",
}