cfaed Publications
OCC: An Automated End-to-End Machine Learning Optimizing Compiler for Computing-In-Memory
Reference
Adam Siemieniuk, Lorenzo Chelini, Asif Ali Khan, Jeronimo Castrillon, Andi Drebes, Henk Corporaal, Tobias Grosser, Martin Kong, "OCC: An Automated End-to-End Machine Learning Optimizing Compiler for Computing-In-Memory", In IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD), IEEE Press, vol. 41, no. 6, pp. 1674-1686, Aug 2021. [doi]
Bibtex
@Article{khan_tcad21,
author = {Adam Siemieniuk and Lorenzo Chelini and Asif Ali Khan and Jeronimo Castrillon and Andi Drebes and Henk Corporaal and Tobias Grosser and Martin Kong},
journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD)},
title = {{OCC}: An Automated End-to-End Machine Learning Optimizing Compiler for Computing-In-Memory},
month = aug,
volume={41},
number={6},
pages={1674--1686},
numpages = {12 pp},
doi = {10.1109/TCAD.2021.3101464},
issn = {1937-4151},
url = {https://ieeexplore.ieee.org/document/9502921},
publisher = {IEEE Press},
year = {2021},
abstract = {Memristive devices promise an alternative approach toward non-Von Neumann architectures, where specific computational tasks are performed within the memory devices. In the machine learning (ML) domain, crossbar arrays of resistive devices have shown great promise for ML inference, as they allow for hardware acceleration of matrix multiplications. But, to enable widespread adoption of these novel architectures, it is critical to have an automatic compilation flow as opposed to relying on a manual mapping of specific kernels on the crossbar arrays. We demonstrate the programmability of memristor-based accelerators using the new compiler design principle of multilevel rewriting, where a hierarchy of abstractions lowers programs level-by-level and perform code transformations at the most suitable abstraction. In particular, we develop a prototype compiler, which progressively lowers a mathematical notation for tensor operations arising in ML workloads, to fixed-function memristor-based hardware blocks.},
}
author = {Adam Siemieniuk and Lorenzo Chelini and Asif Ali Khan and Jeronimo Castrillon and Andi Drebes and Henk Corporaal and Tobias Grosser and Martin Kong},
journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD)},
title = {{OCC}: An Automated End-to-End Machine Learning Optimizing Compiler for Computing-In-Memory},
month = aug,
volume={41},
number={6},
pages={1674--1686},
numpages = {12 pp},
doi = {10.1109/TCAD.2021.3101464},
issn = {1937-4151},
url = {https://ieeexplore.ieee.org/document/9502921},
publisher = {IEEE Press},
year = {2021},
abstract = {Memristive devices promise an alternative approach toward non-Von Neumann architectures, where specific computational tasks are performed within the memory devices. In the machine learning (ML) domain, crossbar arrays of resistive devices have shown great promise for ML inference, as they allow for hardware acceleration of matrix multiplications. But, to enable widespread adoption of these novel architectures, it is critical to have an automatic compilation flow as opposed to relying on a manual mapping of specific kernels on the crossbar arrays. We demonstrate the programmability of memristor-based accelerators using the new compiler design principle of multilevel rewriting, where a hierarchy of abstractions lowers programs level-by-level and perform code transformations at the most suitable abstraction. In particular, we develop a prototype compiler, which progressively lowers a mathematical notation for tensor operations arising in ML workloads, to fixed-function memristor-based hardware blocks.},
}
Downloads
2107_Khan_TCAD [PDF]
Related Paths
Permalink
https://cfaed.tu-dresden.de/publications?pubId=3157