Betancourt, F., Wong, K., Asemota, E., Marshall, Q., Nichols, D., Tomov, S. "openDIEL: A Parallel Workflow Engine and DataAnalytics Framework,"In Practice and Experience in Advanced Research Computing (PEARC ’19), ACM, Chicago, IL, July 28-August 1, 2019 [pdf] [bibtex]
@article{icl:970, author = {Betancourt, F. and Wong, K. and Asemota, E. and Marshall, Q. and Nichols, D. and Tomov, S.}, title = {openDIEL: A Parallel Workflow Engine and DataAnalytics Framework}, booktitle = {In Practice and Experience in Advanced Research Computing (PEARC ’19)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Chicago, IL}, month = {July}, year = {2019} }
Nichols, D., Wong, K., Tomov, S., Ng, L., Chen, S., Gessinger, A. "MagmaDNN: Accelerated Deep Learning Using MAGMA,"In Practice and Experience in Advanced Research Computing (PEARC ’19), ACM, Chicago, IL, July 28-August 1, 2019 [pdf] [bibtex]
@article{icl:971, author = {Nichols, D. and Wong, K. and Tomov, S. and Ng, L. and Chen, S. and Gessinger, A.}, title = {MagmaDNN: Accelerated Deep Learning Using MAGMA}, booktitle = {In Practice and Experience in Advanced Research Computing (PEARC ’19)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Chicago, IL}, month = {July}, year = {2019} }
Wong, K., Tomov, S., Dongarra, J. "Hands-on Research and Training in High-Performance Data Sciences, Data Analytics, and Machine Learning for Emerging Environments,"ISC High Performance 2019, "HPC Education and Training for Emerging Technologies” workshop, Springer International Publishing, Frankfurt, Germany, June 20, 2019 [pdf] [bibtex]
@inproceedings{icl:967, author = {Wong, K. and Tomov, S. and Dongarra, J.}, title = {Hands-on Research and Training in High-Performance Data Sciences, Data Analytics, and Machine Learning for Emerging Environments}, booktitle = {ISC High Performance 2019, "HPC Education and Training for Emerging Technologies” workshop}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Frankfurt, Germany}, month = {June}, year = {2019} }
Nichols, D., Tomov, N.-S., Betancourt, F., Tomov, S., Wong, K., Dongarra, J. "MagmaDNN: Towards High-Performance Data Analytics and Machine Learning for Data-Driven Scientific Computing,"ISC High Performance 2019, "Scalable Data Analytics in Scientific Computing” workshop, Springer International Publishing, Frankfurt, Germany, June 20, 2019 [pdf] [bibtex]
@inproceedings{icl:968, author = {Nichols, D. and Tomov, N.-S. and Betancourt, F. and Tomov, S. and Wong, K. and Dongarra, J.}, title = {MagmaDNN: Towards High-Performance Data Analytics and Machine Learning for Data-Driven Scientific Computing}, booktitle = {ISC High Performance 2019, "Scalable Data Analytics in Scientific Computing” workshop}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Frankfurt, Germany}, month = {June}, year = {2019} }
Abdelfattah, A., Tomov, S., Dongarra, J. "Fast Batched Matrix Multiplication for Small Sizes using Half Precision Arithmetic on GPUs,"33rd IEEE International Parallel and Distributed Processing Symposium (IPDPS), IEEE, Rio de Janeiro, Brazil, May 20-24, 2019 [bibtex]
@inproceedings{icl:969, author = {Abdelfattah, A. and Tomov, S. and Dongarra, J.}, title = {Fast Batched Matrix Multiplication for Small Sizes using Half Precision Arithmetic on GPUs}, booktitle = {33rd IEEE International Parallel and Distributed Processing Symposium (IPDPS)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Rio de Janeiro, Brazil}, month = {May}, year = {2019} }
Tomov, S., Haidar, A., Ayala, A., Schultz, D., Dongarra, J. "Design and Implementation for FFT-ECP on Distributed Accelerated Systems,"ECP WBS 2.3.3.09 Milestone Report, Innovative Computing Laboratory, University of Tennessee, FFT-ECP ST-MS-10-1410, April 4, 2019 [pdf] [bibtex]
@article{icl:966, author = {Tomov, S. and Haidar, A. and Ayala, A. and Schultz, D. and Dongarra, J.}, title = {Design and Implementation for FFT-ECP on Distributed Accelerated Systems}, booktitle = {ECP WBS 2.3.3.09 Milestone Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {FFT-ECP ST-MS-10-1410}, month = {April}, year = {2019} }
Tomov, S., Haidar, A., Schultz, D., Dongarra, J. "Evaluation and Design of FFT for Distributed Accelerated Systems,"ECP WBS 2.3.3.09 Milestone Report, Innovative Computing Laboratory, University of Tennessee, FFT-ECP ST-MS-10-1216, October 1, 2018 [pdf] [bibtex]
@article{icl:965, author = {Tomov, S. and Haidar, A. and Schultz, D. and Dongarra, J.}, title = {Evaluation and Design of FFT for Distributed Accelerated Systems}, booktitle = {ECP WBS 2.3.3.09 Milestone Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {FFT-ECP ST-MS-10-1216}, month = {October}, year = {2018} }
Yamazaki, I., Tomov, S., Dongarra, J. "Sampling Algorithms to Update Truncated SVD,"IEEE International Conference on Big Data, Boston, MA, December 11-14, 2017 [pdf] [bibtex]
@inproceedings{icl:963, author = {Yamazaki, I. and Tomov, S. and Dongarra, J.}, title = {Sampling Algorithms to Update Truncated SVD}, booktitle = {IEEE International Conference on Big Data}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Boston, MA}, month = {December}, year = {2017} }
Dongarra, J., Haidar, A., Hernandez, O., Tomov, S., Gorentla Venkata, M. "POMPEI: Programming with OpenMP4 for Exascale Investigations,"University of Tennessee Computer Science Technical Report, UT-EECS-17-754, December 7, 2017 [pdf] [bibtex]
@techreport{icl:961, author = {Dongarra, J. and Haidar, A. and Hernandez, O. and Tomov, S. and Gorentla Venkata, M.}, title = {POMPEI: Programming with OpenMP4 for Exascale Investigations}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {December}, year = {2017} }
Haidar, A., Abdelfatah, A., Zounon, M., Tomov, S., Dongarra, J. "A Guide For Achieving High Performance With Very Small Matrices on GPU: A case Study of Batched LU and Cholesky Factorizations,"IEEE Transactions on Parallel and Distributed Systems, DOI: 10.1109/TPDS.2017.2783929, December, 2017 [bibtex]
@article{icl:962, author = {Haidar, A. and Abdelfatah, A. and Zounon, M. and Tomov, S. and Dongarra, J.}, title = {A Guide For Achieving High Performance With Very Small Matrices on GPU: A case Study of Batched LU and Cholesky Factorizations}, booktitle = {IEEE Transactions on Parallel and Distributed Systems, DOI: 10.1109/TPDS.2017.2783929}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {December}, year = {2017} }
Haidar, A., Wu, P., Tomov, S., Dongarra, J. "Investigating Half Precision Arithmetic to Accelerate Dense Linear System Solvers,"ScalA17: 8th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems, ACM, Denver, Colorado, November 12-17, 2017 [pdf] [bibtex]
@inproceedings{icl:959, author = {Haidar, A. and Wu, P. and Tomov, S. and Dongarra, J.}, title = {Investigating Half Precision Arithmetic to Accelerate Dense Linear System Solvers}, booktitle = {ScalA17: 8th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Denver, Colorado}, month = {November}, year = {2017} }
Gates, M., Tomov, S., Dongarra, J. "Accelerating the SVD Two Stage Bidiagonal Reduction and Divide and Conquer Using GPUs,"Parallel Computing, 71, November, 2017 [bibtex]
@article{icl:960, author = {Gates, M. and Tomov, S. and Dongarra, J.}, title = {Accelerating the SVD Two Stage Bidiagonal Reduction and Divide and Conquer Using GPUs}, booktitle = {Parallel Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {71}, month = {November}, year = {2017} }
Haidar, A., Jagode, H., YarKhan, A., Vaccaro, P., Tomov, S. , Dongarra, J. "Power-aware Computing: Measurement, Control, and Performance Analysis for Intel Xeon Phi,"2017 IEEE High Performance Extreme Computing Conference (HPEC'17), Best Paper Finalist , IEEE, Waltham, MA, September 12-14, 2017 [pdf] [bibtex]
@inproceedings{icl:955, author = {Haidar, A. and Jagode, H. and YarKhan, A. and Vaccaro, P. and Tomov, S. , Dongarra, J.}, title = {Power-aware Computing: Measurement, Control, and Performance Analysis for Intel Xeon Phi}, booktitle = {2017 IEEE High Performance Extreme Computing Conference (HPEC'17), Best Paper Finalist }, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Waltham, MA}, month = {September}, year = {2017} }
Haidar, A., Kabir, K., Fayad, D., Tomov, S., Dongarra, J. "Out Of Memory SVD Solver for Big Data,"2017 IEEE High Performance Extreme Computing Conference (HPEC'17), IEEE, Waltham, MA, September 12-14, 2017 [pdf] [bibtex]
@inproceedings{icl:956, author = {Haidar, A. and Kabir, K. and Fayad, D. and Tomov, S. and Dongarra, J.}, title = {Out Of Memory SVD Solver for Big Data}, booktitle = {2017 IEEE High Performance Extreme Computing Conference (HPEC'17)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Waltham, MA}, month = {September}, year = {2017} }
Kabir, K., Haidar, A., Tomov, S., Bouteiller, A., Dongarra, J. "A Framework for Out of Memory SVD Algorithms,"ISC High Performance 2017, Springer International Publishing, Frankfurt, Germany, pp. 158-178, June 19-21, 2017 [pdf] [bibtex]
@article{icl:926, author = {Kabir, K. and Haidar, A. and Tomov, S. and Bouteiller, A. and Dongarra, J.}, title = {A Framework for Out of Memory SVD Algorithms}, booktitle = {ISC High Performance 2017}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 158-178}, address = {Frankfurt, Germany}, month = {June}, year = {2017} }
Kabir, K., Haidar, A., Tomov, S., Bouteiller, A., Dongarra, J. "A Framework for Out of Memory SVD Algorithms,"ISC High Performance 2017, Springer International Publishing, Frankfurt, Germany, pp. 158-178, June 19-21, 2017 [pdf] [bibtex]
@article{icl:927, author = {Kabir, K. and Haidar, A. and Tomov, S. and Bouteiller, A. and Dongarra, J.}, title = {A Framework for Out of Memory SVD Algorithms}, booktitle = {ISC High Performance 2017}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 158-178}, address = {Frankfurt, Germany}, month = {June}, year = {2017} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "Novel HPC Techniques to Batch Execution of Many Variable Size BLAS Computations on GPUs,"International Conference on Supercomputing (ICS'17), ACM, Chicago, Illinois, pp. 1-10, June 14-16, 2017 [bibtex]
@inproceedings{icl:928, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Novel HPC Techniques to Batch Execution of Many Variable Size BLAS Computations on GPUs}, booktitle = {International Conference on Supercomputing (ICS'17)}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 1-10}, address = {Chicago, Illinois}, month = {June}, year = {2017} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "Novel HPC Techniques to Batch Execution of Many Variable Size BLAS Computations on GPUs,"International Conference on Supercomputing (ICS'17), ACM, Chicago, Illinois, pp. 1-10, June 14-16, 2017 [pdf] [bibtex]
@inproceedings{icl:929, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Novel HPC Techniques to Batch Execution of Many Variable Size BLAS Computations on GPUs}, booktitle = {International Conference on Supercomputing (ICS'17)}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 1-10}, address = {Chicago, Illinois}, month = {June}, year = {2017} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "Factorization and Inversion of a Million Matrices using GPUs: Challenges and Countermeasures,"International Conference on Computational Science (ICCS'17), Zurich, Switzerland, pp. 606-615, June 12-14, 2017 [pdf] [bibtex]
@inproceedings{icl:930, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Factorization and Inversion of a Million Matrices using GPUs: Challenges and Countermeasures}, booktitle = {International Conference on Computational Science (ICCS'17)}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 606-615}, address = {Zurich, Switzerland}, month = {June}, year = {2017} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "Factorization and Inversion of a Million Matrices using GPUs: Challenges and Countermeasures,"International Conference on Computational Science (ICCS'17), Zurich, Switzerland, pp. 606-615, June 12-14, 2017 [pdf] [bibtex]
@inproceedings{icl:931, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Factorization and Inversion of a Million Matrices using GPUs: Challenges and Countermeasures}, booktitle = {International Conference on Computational Science (ICCS'17)}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 606-615}, address = {Zurich, Switzerland}, month = {June}, year = {2017} }
Dong, T., Haidar, A., Tomov, S., Dongarra, J. "Optimizing the SVD Bidiagonalization Process for a Batch of Small Matrices,"International Conference on Computational Science (ICCS'17), Zurich, Switzerland, pp. 1008-1018, June 12-14, 2017 [pdf] [bibtex]
@inproceedings{icl:932, author = {Dong, T. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Optimizing the SVD Bidiagonalization Process for a Batch of Small Matrices}, booktitle = {International Conference on Computational Science (ICCS'17)}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 1008-1018}, address = {Zurich, Switzerland}, month = {June}, year = {2017} }
Dong, T., Haidar, A., Tomov, S., Dongarra, J. "Optimizing the SVD Bidiagonalization Process for a Batch of Small Matrices,"International Conference on Computational Science (ICCS'17), Zurich, Switzerland, pp. 1008-1018, June 12-14, 2017 [pdf] [bibtex]
@inproceedings{icl:933, author = {Dong, T. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Optimizing the SVD Bidiagonalization Process for a Batch of Small Matrices}, booktitle = {International Conference on Computational Science (ICCS'17)}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 1008-1018}, address = {Zurich, Switzerland}, month = {June}, year = {2017} }
Yamazaki, I., Nooshabadi, S., Tomov, S., Dongarra, J. "Structure-aware Linear Solver for Realtime Convex Optimization for Embedded Systems,"IEEE Embedded Systems Letters, IEEE, Vol. PP, No. 99, May 2, 2017 [pdf] [bibtex]
@article{icl:936, author = {Yamazaki, I. and Nooshabadi, S. and Tomov, S. and Dongarra, J.}, title = {Structure-aware Linear Solver for Realtime Convex Optimization for Embedded Systems}, booktitle = {IEEE Embedded Systems Letters}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. PP, No. 99}, month = {May}, year = {2017} }
Yamazaki, I., Nooshabadi, S., Tomov, S., Dongarra, J. "Structure-aware Linear Solver for Realtime Convex Optimization for Embedded Systems,"IEEE Embedded Systems Letters, IEEE, Vol. PP, No. 99, May 2, 2017 [pdf] [bibtex]
@article{icl:937, author = {Yamazaki, I. and Nooshabadi, S. and Tomov, S. and Dongarra, J.}, title = {Structure-aware Linear Solver for Realtime Convex Optimization for Embedded Systems}, booktitle = {IEEE Embedded Systems Letters}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. PP, No. 99}, month = {May}, year = {2017} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "Fast Cholesky Factorization on GPUs for Batch and Native Modes in MAGMA,"Journal of Computational Science, Elsevier, Vol. 20, 85-93, May, 2017 [bibtex]
@article{icl:951, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Fast Cholesky Factorization on GPUs for Batch and Native Modes in MAGMA}, booktitle = {Journal of Computational Science}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 20}, pages = {85-93}, month = {May}, year = {2017} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "Fast Cholesky Factorization on GPUs for Batch and Native Modes in MAGMA,"Journal of Computational Science, Elsevier, Vol. 20, 85-93, May, 2017 [bibtex]
@article{icl:952, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Fast Cholesky Factorization on GPUs for Batch and Native Modes in MAGMA}, booktitle = {Journal of Computational Science}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 20}, pages = {85-93}, month = {May}, year = {2017} }
Abdelfattah, A., Baboulin, M., Dobrev, V., Dongarra, J., Haidar, A., Karlin, I., Kolev, Tz., Masliah, I., Tomov, S. "Small Tensor Operations on Advanced Architectures for High-order Applications,"University of Tennessee Computer Science Technical Report, UT-EECS-17-749, April 18, 2017 [pdf] [bibtex]
@techreport{icl:924, author = {Abdelfattah, A. and Baboulin, M. and Dobrev, V. and Dongarra, J. and Haidar, A. and Karlin, I. and Kolev, Tz. and Masliah, I. and Tomov, S.}, title = {Small Tensor Operations on Advanced Architectures for High-order Applications}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {April}, year = {2017} }
Abdelfattah, A., Baboulin, M., Dobrev, V., Dongarra, J., Haidar, A., Karlin, I., Kolev, Tz., Masliah, I., Tomov, S. "Small Tensor Operations on Advanced Architectures for High-order Applications,"University of Tennessee Computer Science Technical Report, UT-EECS-17-749, April 18, 2017 [pdf] [bibtex]
@techreport{icl:925, author = {Abdelfattah, A. and Baboulin, M. and Dobrev, V. and Dongarra, J. and Haidar, A. and Karlin, I. and Kolev, Tz. and Masliah, I. and Tomov, S.}, title = {Small Tensor Operations on Advanced Architectures for High-order Applications}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {April}, year = {2017} }
Haidar, A., Abdelfatah, A., Tomov, S., Dongarra, J. "High-performance Cholesky Factorization for GPU-only Execution,"Proceedings of the General Purpose GPUs (GPGPU-10), ACM, Austin, TX, pp. 42-52, February 5, 2017 [pdf] [bibtex]
@inproceedings{icl:934, author = {Haidar, A. and Abdelfatah, A. and Tomov, S. and Dongarra, J.}, title = {High-performance Cholesky Factorization for GPU-only Execution}, booktitle = {Proceedings of the General Purpose GPUs (GPGPU-10)}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 42-52}, address = {Austin, TX}, month = {February}, year = {2017} }
Haidar, A., Abdelfatah, A., Tomov, S., Dongarra, J. "High-performance Cholesky Factorization for GPU-only Execution,"Proceedings of the General Purpose GPUs (GPGPU-10), ACM, Austin, TX, pp. 42-52, February 5, 2017 [pdf] [bibtex]
@inproceedings{icl:935, author = {Haidar, A. and Abdelfatah, A. and Tomov, S. and Dongarra, J.}, title = {High-performance Cholesky Factorization for GPU-only Execution}, booktitle = {Proceedings of the General Purpose GPUs (GPGPU-10)}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 42-52}, address = {Austin, TX}, month = {February}, year = {2017} }
Baboulin, M., Dongarra, J., Remy, A., Tomov, S., Yamazaki, I. "Solving dense symmetric indefinite systems using GPUs,"Concurrency and Computation: Practice and Experience, Special Issues on Parallel Processing and Applied Mathematics (PPAM'15) eds. Vol. 29, Issue 9, 2017 [bibtex]
@article{icl:923, author = {Baboulin, M. and Dongarra, J. and Remy, A. and Tomov, S. and Yamazaki, I.}, title = {Solving dense symmetric indefinite systems using GPUs}, booktitle = {Concurrency and Computation: Practice and Experience}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 29, Issue 9}, year = {2017} }
Lopez, M., Larrea, V., Joubert, W., Hernandez, O., Haidar, A., Tomov, S., Dongarra, J. "Evaluation of Directive-based Performance Portable Programming Models,"International Journal of High Performance Computing and Networking (IJHPCN), (In Press), 2017 [bibtex]
@article{icl:938, author = {Lopez, M. and Larrea, V. and Joubert, W. and Hernandez, O. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Evaluation of Directive-based Performance Portable Programming Models}, booktitle = {International Journal of High Performance Computing and Networking (IJHPCN)}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {(In Press)}, year = {2017} }
Lopez, M., Larrea, V., Joubert, W., Hernandez, O., Haidar, A., Tomov, S., Dongarra, J. "Evaluation of Directive-based Performance Portable Programming Models,"International Journal of High Performance Computing and Networking (IJHPCN), (In Press), 2017 [bibtex]
@article{icl:939, author = {Lopez, M. and Larrea, V. and Joubert, W. and Hernandez, O. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Evaluation of Directive-based Performance Portable Programming Models}, booktitle = {International Journal of High Performance Computing and Networking (IJHPCN)}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {(In Press)}, year = {2017} }
Abdelfatah, A., Haidar, A., Tomov, S., Dongarra, J. "Fast Cholesky Factorization on GPUs for Batch and Native Modes in MAGMA,"University of Tennessee Computer Science Technical Report, UT-EECS-16-748, December 28, 2016 [pdf] [bibtex]
@techreport{icl:918, author = {Abdelfatah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Fast Cholesky Factorization on GPUs for Batch and Native Modes in MAGMA}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {December}, year = {2016} }
Haidar, A., Abdelfatah, A., Tomov, S., Dongarra, J. "High-performance Cholesky factorization for GPU-only execution,"University of Tennessee Computer Science Technical Report, UT-EECS-16-747, December 26, 2016 [pdf] [bibtex]
@techreport{icl:917, author = {Haidar, A. and Abdelfatah, A. and Tomov, S. and Dongarra, J.}, title = {High-performance Cholesky factorization for GPU-only execution}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {December}, year = {2016} }
Lopez, M., Larrea, V., Joubert, W., Hernandez, O., Haidar, A., Tomov, S., Dongarra, J. "Towards Achieving Performance Portability Using Directives for Accelerators,"The International Conference for High Performance Computing, Networking, Storage and Analysis (SC'16), Third Workshop on Accelerator Programming Using Directives (WACCPD), Salt Lake City, Utah, November 13-18, 2016 [pdf] [bibtex]
@inproceedings{icl:915, author = {Lopez, M. and Larrea, V. and Joubert, W. and Hernandez, O. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Towards Achieving Performance Portability Using Directives for Accelerators}, booktitle = {The International Conference for High Performance Computing, Networking, Storage and Analysis (SC'16), Third Workshop on Accelerator Programming Using Directives (WACCPD)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Salt Lake City, Utah}, month = {November}, year = {2016} }
Haidar, A., Tomov, S., Arturov, K., Guney, M., Story, S., Dongarra, J. "LU, QR, and Cholesky Factorizations: Programming Model, Performance Analysis and Optimization Techniques for the Intel Knights Landing Xeon Phi,"IEEE High Performance Extreme Computing Conference (HPEC'16), Waltham, MA, September 13-15, 2016 [bibtex]
@inproceedings{icl:911, author = {Haidar, A. and Tomov, S. and Arturov, K. and Guney, M. and Story, S. and Dongarra, J.}, title = {LU, QR, and Cholesky Factorizations: Programming Model, Performance Analysis and Optimization Techniques for the Intel Knights Landing Xeon Phi}, booktitle = {IEEE High Performance Extreme Computing Conference (HPEC'16)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Waltham, MA}, month = {September}, year = {2016} }
Haidar, A., Brock, B., Tomov, S., Guidry, M., Billings, J., Shyles, D., Dongarra, J. "Performance Analysis and Acceleration of Explicit Integration for Large Kinetic Networks using Batched GPU Computations,"2016 IEEE High Performance Extreme Computing Conference (HPEC ‘16), September 13-15, 2016 [pdf] [bibtex]
@inproceedings{icl:912, author = {Haidar, A. and Brock, B. and Tomov, S. and Guidry, M. and Billings, J. and Shyles, D. and Dongarra, J.}, title = {Performance Analysis and Acceleration of Explicit Integration for Large Kinetic Networks using Batched GPU Computations}, booktitle = {2016 IEEE High Performance Extreme Computing Conference (HPEC ‘16)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {September}, year = {2016} }
Masliah, I., Abdelfattah, A., Haidar, A., Tomov, S., Baboulin, M., Falcou, J., Dongarra, J. "High-performance matrix-matrix multiplications of very small matrices,"22nd International European Conference on Parallel and Distributed Computing (Euro-Par'16), Grenoble, France, August 22-26, 2016 [pdf] [bibtex]
@inproceedings{icl:910, author = {Masliah, I. and Abdelfattah, A. and Haidar, A. and Tomov, S. and Baboulin, M. and Falcou, J. and Dongarra, J.}, title = {High-performance matrix-matrix multiplications of very small matrices}, booktitle = {22nd International European Conference on Parallel and Distributed Computing (Euro-Par'16)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Grenoble, France}, month = {August}, year = {2016} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "Performance, Design, and Autotuning of Batched GEMM for GPUs,"The International Supercomputing Conference (ISC High Performance 2016), Frankfurt, Germany, June 19-23, 2016 [pdf] [bibtex]
@inproceedings{icl:906, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Performance, Design, and Autotuning of Batched GEMM for GPUs}, booktitle = {The International Supercomputing Conference (ISC High Performance 2016)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Frankfurt, Germany}, month = {June}, year = {2016} }
Abdelfattah, A., Baboulin, M., Dobrev, V., Dongarra, J., Earl, C., Falcou, J., Haidar, A., Karlin, I., Kolev, Tz., Masliah, I., Tomov, S. "High-Performance Tensor Contractions for GPUs,"International Conference on Computational Science (ICCS'16), San Diego, California, U.S.A., June 6-8, 2016 [pdf] [bibtex]
@inproceedings{icl:904, author = {Abdelfattah, A. and Baboulin, M. and Dobrev, V. and Dongarra, J. and Earl, C. and Falcou, J. and Haidar, A. and Karlin, I. and Kolev, Tz. and Masliah, I. and Tomov, S.}, title = {High-Performance Tensor Contractions for GPUs}, booktitle = {International Conference on Computational Science (ICCS'16)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {San Diego, California, U.S.A.}, month = {June}, year = {2016} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "Performance Tuning and Optimization Techniques of Fixed and Variable Size Batched Cholesky Factorization on GPUs,"International Conference on Computational Science (ICCS'16), San Diego, California, U.S.A., June 6-8, 2016 [pdf] [bibtex]
@inproceedings{icl:905, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Performance Tuning and Optimization Techniques of Fixed and Variable Size Batched Cholesky Factorization on GPUs}, booktitle = {International Conference on Computational Science (ICCS'16)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {San Diego, California, U.S.A.}, month = {June}, year = {2016} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "On the Development of Variable Size Batched Computation for Heterogeneous Parallel Architectures,"The 17th IEEE International Workshop on Parallel and Distributed Scientific and Engineering Computing (PDSEC 2016), IPDPS 2016, IEEE, Chicago, IL, USA, May 27, 2016 [pdf] [bibtex]
@inproceedings{icl:903, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {On the Development of Variable Size Batched Computation for Heterogeneous Parallel Architectures}, booktitle = {The 17th IEEE International Workshop on Parallel and Distributed Scientific and Engineering Computing (PDSEC 2016), IPDPS 2016, IEEE}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Chicago, IL, USA}, month = {May}, year = {2016} }
Newburn, CJ., Bansal, G., Wood, M., Crivelli, L., Planas, J., Duran, A., Souza, P., Borges, L., Luszczek, P., Tomov, S., Dongarra, J., Anzt, H., Gates, M., Haidar, A., Jia, Y., Kabir, K., Yamazaki, I., Labarta, J. "Heterogeneous Streaming,"The Sixth International Workshop on Accelerators and Hybrid Exascale Systems (AsHES), IPDPS 2016, IEEE, Chicago, IL, USA, May 23, 2016 [pdf] [bibtex]
@inproceedings{icl:902, author = {Newburn, CJ. and Bansal, G. and Wood, M. and Crivelli, L. and Planas, J. and Duran, A. and Souza, P. and Borges, L. and Luszczek, P. and Tomov, S. and Dongarra, J. and Anzt, H. and Gates, M. and Haidar, A. and Jia, Y. and Kabir, K. and Yamazaki, I. and Labarta, J.}, title = {Heterogeneous Streaming}, booktitle = {The Sixth International Workshop on Accelerators and Hybrid Exascale Systems (AsHES), IPDPS 2016, IEEE}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Chicago, IL, USA}, month = {May}, year = {2016} }
Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J. "Performance, Design, and Autotuning of Batched GEMM for GPUs,"University of Tennessee Computer Science Technical Report, UT-EECS-16-739, February 1, 2016 [pdf] [bibtex]
@techreport{icl:895, author = {Abdelfattah, A. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Performance, Design, and Autotuning of Batched GEMM for GPUs}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {February}, year = {2016} }
Abdelfattah, A., Baboulin, M., Dobrev, V., Dongarra, J., Earl, C., Falcou, J., Haidar, A., Karlin, I., Kolev, Tz., Masliah, I., Tomov, S. "High-Performance Tensor Contractions for GPUs,"University of Tennessee Computer Science Technical Report, UT-EECS-16-738, January 21, 2016 [pdf] [bibtex]
@techreport{icl:894, author = {Abdelfattah, A. and Baboulin, M. and Dobrev, V. and Dongarra, J. and Earl, C. and Falcou, J. and Haidar, A. and Karlin, I. and Kolev, Tz. and Masliah, I. and Tomov, S.}, title = {High-Performance Tensor Contractions for GPUs}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {January}, year = {2016} }
Yamazaki, I., Tomov, S., and Dongarra, J. "Non-GPU-resident Dense Symmetric Indefinite Factorization,"Concurrency and Computation: Practice and Experience, 2016 [bibtex]
@article{icl:913, author = {Yamazaki, I. and Tomov, S. and and Dongarra, J.}, title = {Non-GPU-resident Dense Symmetric Indefinite Factorization}, booktitle = {Concurrency and Computation: Practice and Experience}, institution = {Innovative Computing Laboratory, University of Tennessee}, year = {2016} }
Haidar, A., Jia, Y., Luszczek, P., Tomov, S., YarKhan, A., Dongarra, J. "Weighted Dynamic Scheduling with Many Parallelism Grains for Offloading of Numerical Workloads to Multiple Varied Accelerators,"Proceedings of the 6th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA'15), ACM, New York, NY, USA, No. 5, November 16, 2015 [pdf] [bibtex]
@inproceedings{icl:909, author = {Haidar, A. and Jia, Y. and Luszczek, P. and Tomov, S. and YarKhan, A. and Dongarra, J.}, title = {Weighted Dynamic Scheduling with Many Parallelism Grains for Offloading of Numerical Workloads to Multiple Varied Accelerators}, booktitle = {Proceedings of the 6th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA'15)}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {No. 5}, address = {New York, NY, USA}, month = {November}, year = {2015} }
Mary, T., Yamazaki, I., Kurzak, J., Luszczek, P., Tomov, S., Dongarra, J. "Performance of Random Sampling for Computing Low-rank Approximations of a Dense Matrix on GPUs,"The International Conference for High Performance Computing, Networking, Storage and Analysis (SC 15), Austin, TX, Nov. 15, 2015 [bibtex]
@inproceedings{icl:881, author = {Mary, T. and Yamazaki, I. and Kurzak, J. and Luszczek, P. and Tomov, S. and Dongarra, J.}, title = {Performance of Random Sampling for Computing Low-rank Approximations of a Dense Matrix on GPUs}, booktitle = {The International Conference for High Performance Computing, Networking, Storage and Analysis (SC 15)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Austin, TX}, month = {Nov}, year = {2015} }
Yamazaki, I., Tomov, S., Kurzak, J., Dongarra, J., Barlow, J. "Mixed-precision Block Gram Schmidt Orthogonalization,"6th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems, Austin, TX, November, 2015 [bibtex]
@inproceedings{icl:889, author = {Yamazaki, I. and Tomov, S. and Kurzak, J. and Dongarra, J. and Barlow, J.}, title = {Mixed-precision Block Gram Schmidt Orthogonalization}, booktitle = {6th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Austin, TX}, month = {November}, year = {2015} }
Baboulin,, M., Dongarra, J., Remy, A., Tomov, S., Yamazaki, I. "Dense Symmetric Indefinite Factorization on GPU acclerated architectures,"International Conference on Parallel Processing and Applied Mathematics (PPAM), Krakow, Poland, Sep. 6-9, 2015 [bibtex]
@inproceedings{icl:877, author = {Baboulin,, M. and Dongarra, J. and Remy, A. and Tomov, S. and Yamazaki, I.}, title = {Dense Symmetric Indefinite Factorization on GPU acclerated architectures}, booktitle = {International Conference on Parallel Processing and Applied Mathematics (PPAM)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Krakow, Poland}, month = {Sep}, year = {2015} }
Haidar, A., Luszczek, P., Tomov, S., Dongarra, J. "Batched Matrix Computations on Hardware Accelerators,"EuroMPI/Asia 2015 Workshop, Bordeaux, France, September, 2015 [bibtex]
@inproceedings{icl:839, author = {Haidar, A. and Luszczek, P. and Tomov, S. and Dongarra, J.}, title = {Batched Matrix Computations on Hardware Accelerators}, booktitle = {EuroMPI/Asia 2015 Workshop}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Bordeaux, France}, month = {September}, year = {2015} }
Haidar, A., Tomov, S., Luszczek, P., Dongarra, J. "MAGMA Embedded: Towards a Dense Linear Algebra Library for Energy Efficient Extreme Computing,"19th IEEE High Performance Extreme Computing Conference (HPEC 2015), Best Paper Award, IEEE, Waltham, MA, September, 2015 [pdf] [bibtex]
@inproceedings{icl:871, author = {Haidar, A. and Tomov, S. and Luszczek, P. and Dongarra, J.}, title = {MAGMA Embedded: Towards a Dense Linear Algebra Library for Energy Efficient Extreme Computing}, booktitle = {19th IEEE High Performance Extreme Computing Conference (HPEC 2015), Best Paper Award}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Waltham, MA}, month = {September}, year = {2015} }
YarKhan, A., Haidar, A., Cao, C., Luszczek, P., Tomov, S., Dongarra, J. "Cholesky Across Accelerators,"17th IEEE International Conference on High Performance Computing and Communications (HPCC 2015), IEEE, Elizabeth, NJ, August, 2015 [bibtex]
@inproceedings{icl:870, author = {YarKhan, A. and Haidar, A. and Cao, C. and Luszczek, P. and Tomov, S. and Dongarra, J.}, title = {Cholesky Across Accelerators}, booktitle = {17th IEEE International Conference on High Performance Computing and Communications (HPCC 2015)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Elizabeth, NJ}, month = {August}, year = {2015} }
Kabir, K., Haidar, A., Tomov, S., and Dongarra, J. "On the Design, Development, and Analysis of Optimized Matrix-Vector Multiplication Routines for Coprocessors,"ISC High Performance 2015, Frankfurt, Germany, July 12-16, 2015 [pdf] [bibtex]
@article{icl:854, author = {Kabir, K. and Haidar, A. and Tomov, S. and and Dongarra, J.}, title = {On the Design, Development, and Analysis of Optimized Matrix-Vector Multiplication Routines for Coprocessors}, booktitle = {ISC High Performance 2015}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Frankfurt, Germany}, month = {July}, year = {2015} }
Haidar, A., Dong, T., Tomov, S., Luszczek, P., Dongarra, J. "Framework for Batched and GPU-resident Factorization Algorithms Applied to Block Householder Transformations,"ISC HPC, Springer LNCS, Frankfurt, Germany, July 12-16, 2015 [pdf] [bibtex]
@article{icl:858, author = {Haidar, A. and Dong, T. and Tomov, S. and Luszczek, P. and Dongarra, J.}, title = {Framework for Batched and GPU-resident Factorization Algorithms Applied to Block Householder Transformations}, booktitle = {ISC HPC}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Frankfurt, Germany}, month = {July}, year = {2015} }
Kabir, K., Haidar, A., Tomov, S., and Dongarra, J. "Performance Analysis and Optimisation of Two-Sided Factorization Algorithms for Heterogeneous Platform,"The International Conference on Computational Science (ICCS 2015), Reykjavík, Iceland, June 1-3, 2015 [pdf] [bibtex]
@inproceedings{icl:853, author = {Kabir, K. and Haidar, A. and Tomov, S. and and Dongarra, J.}, title = {Performance Analysis and Optimisation of Two-Sided Factorization Algorithms for Heterogeneous Platform}, booktitle = {The International Conference on Computational Science (ICCS 2015)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Reykjavík, Iceland}, month = {June}, year = {2015} }
Kabir, K., Haidar, A., Tomov, S., and Dongarra, J. "Performance Analysis and Design of a Hessenberg Reduction using Stabilized Blocked Elementary Transformations for New Architectures,"The Spring Simulation Multi-Conference 2015 (SpringSim'15), Alexandria, VA, April 12-15, 2015 [pdf] [bibtex]
@inproceedings{icl:852, author = {Kabir, K. and Haidar, A. and Tomov, S. and and Dongarra, J.}, title = {Performance Analysis and Design of a Hessenberg Reduction using Stabilized Blocked Elementary Transformations for New Architectures}, booktitle = {The Spring Simulation Multi-Conference 2015 (SpringSim'15)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Alexandria, VA}, month = {April}, year = {2015} }
Haidar, A., Dong, T., Luszczek, P., Tomov, S., and Dongarra, J. "Batched matrix computations on hardware accelerators based on GPUs,"International Journal of High Performance Computing Applications, Sage Publications, Inc., February 9, 2015 [bibtex]
@article{icl:850, author = {Haidar, A. and Dong, T. and Luszczek, P. and Tomov, S. and and Dongarra, J.}, title = {Batched matrix computations on hardware accelerators based on GPUs}, booktitle = {International Journal of High Performance Computing Applications}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {February}, year = {2015} }
Haidar, A., Dong, T., Luszczek, P., Tomov, S., and Dongarra, J. "Optimization for performance and energy for batched matrix computations on GPUs,"GPGPU 2015 Proceedings of the 8th Workshop on General Purpose Processing using GPUs, ACM, San Francisco, CA, pp. 59-69, February 7, 2015 [bibtex]
@inproceedings{icl:849, author = {Haidar, A. and Dong, T. and Luszczek, P. and Tomov, S. and and Dongarra, J.}, title = {Optimization for performance and energy for batched matrix computations on GPUs}, booktitle = {GPGPU 2015 Proceedings of the 8th Workshop on General Purpose Processing using GPUs}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 59-69}, address = {San Francisco, CA}, month = {February}, year = {2015} }
Anzt, H., Tomov, S., Dongarra, J. "Energy efficiency and performance frontiers for sparse computations on GPU supercomputers,"Proceedings of the Sixth International Workshop on Programming Models and Applications for Multicores and Manycores (PMAM '15), ACM, San Francisco, CA, February, 2015 [pdf] [bibtex]
@inproceedings{icl:848, author = {Anzt, H. and Tomov, S. and Dongarra, J.}, title = {Energy efficiency and performance frontiers for sparse computations on GPU supercomputers}, booktitle = {Proceedings of the Sixth International Workshop on Programming Models and Applications for Multicores and Manycores (PMAM '15)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {San Francisco, CA}, month = {February}, year = {2015} }
Haidar, A., Dongarra, J., Kabir, K., Gates, M., Luszczek, P., Tomov, S., Jia, Y. "HPC Programming on Intel Many-Integrated-Core Hardware with MAGMA Port to Xeon Phi,"Scientific Computing, IO Press, Vol. 23, No. 1, January, 2015 [pdf] [bibtex]
@article{icl:826, author = {Haidar, A. and Dongarra, J. and Kabir, K. and Gates, M. and Luszczek, P. and Tomov, S. and Jia, Y.}, title = {HPC Programming on Intel Many-Integrated-Core Hardware with MAGMA Port to Xeon Phi}, booktitle = {Scientific Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 23, No. 1}, month = {January}, year = {2015} }
Yamazaki, I., Tomov, S., Dongarra, J. "Computing Low-rank Approximation of a Dense Matrix on Multicore CPUs with a GPU and its Application to Solving a Hierarchically Semiseparable Linear System of Equations,"Scientific Programming, 2015, 2015, 2015 [bibtex]
@article{icl:842, author = {Yamazaki, I. and Tomov, S. and Dongarra, J.}, title = {Computing Low-rank Approximation of a Dense Matrix on Multicore CPUs with a GPU and its Application to Solving a Hierarchically Semiseparable Linear System of Equations}, booktitle = {Scientific Programming}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {2015}, year = {2015} }
Yamazaki, I., Tomov, S., and Dongarra, J. "Mixed-Precision Cholesky QR Factorization and its Case Studies on Multicore CPU with Multiple GPUs,"SIAM Journal on Scientific Computing, Vol. 37, No. 3, C307-C330, 2015 [bibtex]
@article{icl:847, author = {Yamazaki, I. and Tomov, S. and and Dongarra, J.}, title = {Mixed-Precision Cholesky QR Factorization and its Case Studies on Multicore CPU with Multiple GPUs}, booktitle = {SIAM Journal on Scientific Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 37, No. 3}, pages = {C307-C330}, year = {2015} }
Anzt, H., Sawyer, W., Tomov, S., Luszczek, P., Dongarra, J. "Acceleration of GPU-based Krylov solvers via Data Transfer Reduction,"IJHPCA special issue for ASHES workshop, 2015 [bibtex]
@inproceedings{icl:857, author = {Anzt, H. and Sawyer, W. and Tomov, S. and Luszczek, P. and Dongarra, J.}, title = {Acceleration of GPU-based Krylov solvers via Data Transfer Reduction}, booktitle = {IJHPCA special issue for ASHES workshop}, institution = {Innovative Computing Laboratory, University of Tennessee}, year = {2015} }
Abalenkovs, M., Abdelfattah, A., Dongarra, J., Gates, M., Haidar, A., Kurzak, J., Luszczek, P., Tomov, S., Yamazaki, I., YarKhan, A. "Parallel Programming Models for Dense Linear Algebra on Heterogeneous Systems,"Supercomputing frontiers and innovations, Vol. 2, No. 4, pp. 67-86, 2015 [pdf] [bibtex]
@article{icl:901, author = {Abalenkovs, M. and Abdelfattah, A. and Dongarra, J. and Gates, M. and Haidar, A. and Kurzak, J. and Luszczek, P. and Tomov, S. and Yamazaki, I. and YarKhan, A.}, title = {Parallel Programming Models for Dense Linear Algebra on Heterogeneous Systems}, booktitle = {Supercomputing frontiers and innovations}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 2, No. 4}, pages = {pp. 67-86}, year = {2015} }
Yamazaki, I., Tomov, S., Dongarra, J. "Deflation Strategies to Improve the Convergence of Communication-Avoiding GMRES,"5th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems, New Orleans, LA, Nov. 17, 2014 [pdf] [bibtex]
@inproceedings{icl:822, author = {Yamazaki, I. and Tomov, S. and Dongarra, J.}, title = {Deflation Strategies to Improve the Convergence of Communication-Avoiding GMRES}, booktitle = {5th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {New Orleans, LA}, month = {Nov}, year = {2014} }
Haidar, A., Cao, C., Yamazaki, I., Dongarra, J., Gates, M., Luszczek, P., Tomov, S. "Performance and Portability with OpenCL for Throughput-Oriented HPC Workloads Across Accelerators, Coprocessors, and Multicore Processors,"Scala 2014, ACM, New Orleans, LA, November 17, 2014 [pdf] [bibtex]
@article{icl:825, author = {Haidar, A. and Cao, C. and Yamazaki, I. and Dongarra, J. and Gates, M. and Luszczek, P. and Tomov, S.}, title = {Performance and Portability with OpenCL for Throughput-Oriented HPC Workloads Across Accelerators, Coprocessors, and Multicore Processors}, booktitle = {Scala 2014}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {New Orleans, LA}, month = {November}, year = {2014} }
Yamazaki, I., Rajamanickam, S., Boman, E., Hoemmen, M., Heroux, M., Tomov, S. "Domain Decomposition Preconditioners for Communication-Avoiding Krylov Methods on a Hybrid CPU/GPU Cluster,"The International Conference for High Performance Computing, Networking, Storage and Analysis (SC), New Orleans, LA, November, 2014 [bibtex]
@inproceedings{icl:819, author = {Yamazaki, I. and Rajamanickam, S. and Boman, E. and Hoemmen, M. and Heroux, M. and Tomov, S.}, title = {Domain Decomposition Preconditioners for Communication-Avoiding Krylov Methods on a Hybrid CPU/GPU Cluster}, booktitle = {The International Conference for High Performance Computing, Networking, Storage and Analysis (SC)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {New Orleans, LA}, month = {November}, year = {2014} }
Anzt, H., Tomov, S., Dongarra, J. "Accelerating the LOBPCG method on GPUs using a blocked Sparse Matrix Vector Product,"University of Tennessee Computer Science Technical Report, University of Tennessee, Knoxville, TN, UT-EECS-14-731, October 17, 2014 [pdf] [bibtex]
@techreport{icl:834, author = {Anzt, H. and Tomov, S. and Dongarra, J.}, title = {Accelerating the LOBPCG method on GPUs using a blocked Sparse Matrix Vector Product}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Knoxville, TN}, month = {October}, year = {2014} }
Dong, T., Haidar, A., Tomov, S., Dongarra, J. "A Fast Batched Cholesky Factorization on a GPU,"2014 International Conference on Parallel Processing (ICPP-2014), Minneapolis, MN, September, 2014 [pdf] [bibtex]
@inproceedings{icl:779, author = {Dong, T. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {A Fast Batched Cholesky Factorization on a GPU}, booktitle = {2014 International Conference on Parallel Processing (ICPP-2014)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Minneapolis, MN}, month = {September}, year = {2014} }
Dong, T., Haidar, A., Luszczek, P., Harris, J., Tomov, S., and Dongarra, J. "LU Factorization of Small Matrices: Accelerating Batched DGETRF on the GPU,"16th IEEE International Conference on High Performance Computing and Communications (HPCC), Paris, France, pp. 157-161, August 20-22, 2014 [pdf] [bibtex]
@inproceedings{icl:851, author = {Dong, T. and Haidar, A. and Luszczek, P. and Harris, J. and Tomov, S. and and Dongarra, J.}, title = {LU Factorization of Small Matrices: Accelerating Batched DGETRF on the GPU}, booktitle = {16th IEEE International Conference on High Performance Computing and Communications (HPCC)}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 157-161}, address = {Paris, France}, month = {August}, year = {2014} }
Dongarra, J., Gates, M., Haidar, A., Kurzak, J., Luszczek, P., Tomov, S., Yamazaki, I. "Accelerating Numerical Dense Linear Algebra Calculations with GPUs,"Numerical Calculations with GPUs, Volodymyr Kindratenko, eds., eds. Springer International Publishing, pp. 3-28, July, 2014 [pdf] [bibtex]
@article{icl:817, author = {Dongarra, J. and Gates, M. and Haidar, A. and Kurzak, J. and Luszczek, P. and Tomov, S. and Yamazaki, I.}, title = {Accelerating Numerical Dense Linear Algebra Calculations with GPUs}, booktitle = {Numerical Calculations with GPUs}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {pp. 3-28}, month = {July}, year = {2014} }
Anzt, H., Lukarski, D., Tomov, S., Dongarra, J. "Self-Adaptive Multiprecision Preconditioners on Multicore and Manycore Architectures,"VECPAR 2014, Eugene, OR, June, 2014 [pdf] [bibtex]
@article{icl:778, author = {Anzt, H. and Lukarski, D. and Tomov, S. and Dongarra, J.}, title = {Self-Adaptive Multiprecision Preconditioners on Multicore and Manycore Architectures}, booktitle = {VECPAR 2014}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Eugene, OR}, month = {June}, year = {2014} }
Haidar, A., Luszczek, P., Tomov, S., Dongarra, J. "Heterogeneous Acceleration for Linear Algebra in Mulit-Coprocessor Environments,"VECPAR 2014, Eugene, OR, June, 2014 [pdf] [bibtex]
@article{icl:799, author = {Haidar, A. and Luszczek, P. and Tomov, S. and Dongarra, J.}, title = {Heterogeneous Acceleration for Linear Algebra in Mulit-Coprocessor Environments}, booktitle = {VECPAR 2014}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Eugene, OR}, month = {June}, year = {2014} }
Dongarra, J., Haidar, A., Kurzak, J., Luszczek, P., Tomov, S., YarKhan, A. "Model-Driven One-Sided Factorizations on Multicore Accelerated Systems,"International Journal on Supercomputing Frontiers and Innovations, Vol. 1, No. 1, June, 2014 [pdf] [bibtex]
@article{icl:816, author = {Dongarra, J. and Haidar, A. and Kurzak, J. and Luszczek, P. and Tomov, S. and YarKhan, A.}, title = {Model-Driven One-Sided Factorizations on Multicore Accelerated Systems}, booktitle = {International Journal on Supercomputing Frontiers and Innovations}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 1, No. 1}, month = {June}, year = {2014} }
Cao, C., Dongarra, J., Du, P., Gates, M., Luszczek, P., Tomov, S. "clMAGMA: High Performance Dense Linear Algebra with OpenCL,"International Workshop on OpenCL, Bristol University, England, May 12-13, 2014 [pdf] [bibtex]
@inproceedings{icl:833, author = {Cao, C. and Dongarra, J. and Du, P. and Gates, M. and Luszczek, P. and Tomov, S.}, title = {clMAGMA: High Performance Dense Linear Algebra with OpenCL}, booktitle = {International Workshop on OpenCL}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Bristol University, England}, month = {May}, year = {2014} }
Anzt, H., Tomov, S., Luszczek, P., Yamazaki, I., Dongarra, J., Sawyer, W. "Optimizing Krylov Subspace Solvers on Graphics Processing Units,"Third International Workshop on Accelerators and Hybrid Exascale Systems (AsHES), IPDPS 2014, IEEE, Phoenix, AZ, May, 2014 [pdf] [bibtex]
@inproceedings{icl:830, author = {Anzt, H. and Tomov, S. and Luszczek, P. and Yamazaki, I. and Dongarra, J. and Sawyer, W.}, title = {Optimizing Krylov Subspace Solvers on Graphics Processing Units}, booktitle = {Third International Workshop on Accelerators and Hybrid Exascale Systems (AsHES), IPDPS 2014}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Phoenix, AZ}, month = {May}, year = {2014} }
Donfack, S., Tomov, S., Dongarra, J. "Dynamically balanced synchronization-avoiding LU factorization with multicore and GPUs,"Fourth International Workshop on Accelerators and Hybrid Exascale Systems (AsHES), IPDPS 2014, IEEE, Phoenix, AZ, May, 2014 [pdf] [bibtex]
@inproceedings{icl:831, author = {Donfack, S. and Tomov, S. and Dongarra, J.}, title = {Dynamically balanced synchronization-avoiding LU factorization with multicore and GPUs}, booktitle = {Fourth International Workshop on Accelerators and Hybrid Exascale Systems (AsHES), IPDPS 2014}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Phoenix, AZ}, month = {May}, year = {2014} }
Dong, T., Haidar, A., Tomov, S., Dongarra, J. "Batched Cholesky Factorization on a GPU,"VECPAR 2014 (Submitted), Eugene, OR, January, 2014 [bibtex]
@article{icl:775, author = {Dong, T. and Haidar, A. and Tomov, S. and Dongarra, J.}, title = {Batched Cholesky Factorization on a GPU}, booktitle = {VECPAR 2014 (Submitted)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Eugene, OR}, month = {January}, year = {2014} }
Du, P., Luszczek, P., Tomov, S., Dongarra, J. "Soft Error Resilient QR Factorization for Hybrid System with GPGPU,"Journal of Computational Science, Vassil Alexandrov eds. eds. Elsevier B.V., Vol. 4, No. 6, pp. 457-464, November, 2013 [pdf] [bibtex]
@article{icl:796, author = {Du, P. and Luszczek, P. and Tomov, S. and Dongarra, J.}, title = {Soft Error Resilient QR Factorization for Hybrid System with GPGPU}, booktitle = {Journal of Computational Science}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 4, No. 6}, pages = {pp. 457-464}, month = {November}, year = {2013} }
Dongarra, J., Gates, M., Haidar, A., Jia, Y., Kabir, K., Luszczek, P., Tomov, S. "Portable HPC Programming on Intel Many-Integrated-Core Hardware with MAGMA Port to Xeon Phi,"PPAM 2013, Warsaw, Poland, September, 2013 [pdf] [bibtex]
@article{icl:794, author = {Dongarra, J. and Gates, M. and Haidar, A. and Jia, Y. and Kabir, K. and Luszczek, P. and Tomov, S.}, title = {Portable HPC Programming on Intel Many-Integrated-Core Hardware with MAGMA Port to Xeon Phi}, booktitle = {PPAM 2013}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Warsaw, Poland}, month = {September}, year = {2013} }
Haidar, A., Tomov, S., Dongarra, J., Solca, R., Schulthess, T. "A Novel Hybrid CPU-GPU Generalized Eigensolver for Electronic Structure Calculations Based on Fine Grained Memory Aware Tasks,"International Journal of High Performance Computing Applications, August, 2013 [pdf] [bibtex]
@article{icl:801, author = {Haidar, A. and Tomov, S. and Dongarra, J. and Solca, R. and Schulthess, T.}, title = {A Novel Hybrid CPU-GPU Generalized Eigensolver for Electronic Structure Calculations Based on Fine Grained Memory Aware Tasks}, booktitle = {International Journal of High Performance Computing Applications}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {August}, year = {2013} }
Anzt, H., Tomov, S., Dongarra, J., Heuveline, V. "A Block-Asynchronous Relaxation Method for Graphics Processing Units,"Journal of Parallel and Distributed Computing, June, 2013 [pdf] [bibtex]
@article{icl:783, author = {Anzt, H. and Tomov, S. and Dongarra, J. and Heuveline, V.}, title = {A Block-Asynchronous Relaxation Method for Graphics Processing Units}, booktitle = {Journal of Parallel and Distributed Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {June}, year = {2013} }
Haidar, A., Solca, R., Gates, M., Tomov, S., Schulthess, T., Dongarra, J. "Leading Edge Hybrid Multi-GPU Algorithms for Generalized Eigenproblems in Electronic Structure Calculations,"International Supercomputing Conference ISC, Lecture Notes in Computer Science, Leipzig, Germany, Vol. 7905, pp. 67-80, June, 2013 [pdf] [bibtex]
@inproceedings{icl:798, author = {Haidar, A. and Solca, R. and Gates, M. and Tomov, S. and Schulthess, T. and Dongarra, J.}, title = {Leading Edge Hybrid Multi-GPU Algorithms for Generalized Eigenproblems in Electronic Structure Calculations}, booktitle = {International Supercomputing Conference ISC, Lecture Notes in Computer Science}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 7905}, pages = {pp. 67-80}, address = {Leipzig, Germany}, month = {June}, year = {2013} }
Chongxiao, C., Dongarra, J., Du, P., Gates, M., Luszczek, P., Tomov, S. "clMAGMA: High Performance Dense Linear Algebra with OpenCL,"University of Tennessee Computer Science Technical Report (Lawn 275), UT-CS-13-706, March, 2013 [pdf] [bibtex]
@techreport{icl:737, author = {Chongxiao, C. and Dongarra, J. and Du, P. and Gates, M. and Luszczek, P. and Tomov, S.}, title = {clMAGMA: High Performance Dense Linear Algebra with OpenCL}, booktitle = {University of Tennessee Computer Science Technical Report (Lawn 275)}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {UT-CS-13-706}, month = {March}, year = {2013} }
Baboulin, M., Dongarra, J., Herrmann, J., Tomov, S. "Accelerating linear system solutions using randomization techniques,"ACM Transactions on Mathematical Software (TOMS), Vol. 39, No 2, February, 2013 [bibtex]
@article{icl:741, author = {Baboulin, M. and Dongarra, J. and Herrmann, J. and Tomov, S.}, title = {Accelerating linear system solutions using randomization techniques}, booktitle = {ACM Transactions on Mathematical Software (TOMS)}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 39, No 2}, month = {February}, year = {2013} }
Bosilca, G., Bouteiller, A., Danalis, A., Herault, T., Kurzak, J., Luszczek, P., Tomov, S., and J. Dongarra "Scalable Dense Linear Algebra on Heterogeneous Hardware,"HPC: Transition Towards Exascale Processing, in the series Advances in Parallel Computing, IOS Press, 2013 [pdf] [bibtex]
@article{icl:758, author = {Bosilca, G. and Bouteiller, A. and Danalis, A. and Herault, T. and Kurzak, J. and Luszczek, P. and Tomov, S. and and J. Dongarra}, title = {Scalable Dense Linear Algebra on Heterogeneous Hardware}, booktitle = {HPC: Transition Towards Exascale Processing, in the series Advances in Parallel Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, year = {2013} }
Vetter, J., Glassbrook, R., Schwan, K., Yalamanchili, S., Horton, M., Gavrilovska, A., Slawinska, M., Meredith, J., Roth, P., Spafford, K., Tomov, S., Wynkoop, J. "Keeneland: Computational Science using Heterogeneous GPU Computing,"Contemporary High Performance Computing: From Petascale Toward Exascale, Jeffrey Vetter eds. eds. Taylor and Francis, CRC Computational Science Series, Boca Raton, FL, Chapter 7, 2013 [pdf] [bibtex]
@article{icl:808, author = {Vetter, J. and Glassbrook, R. and Schwan, K. and Yalamanchili, S. and Horton, M. and Gavrilovska, A. and Slawinska, M. and Meredith, J. and Roth, P. and Spafford, K. and Tomov, S. and Wynkoop, J.}, title = {Keeneland: Computational Science using Heterogeneous GPU Computing}, booktitle = {Contemporary High Performance Computing: From Petascale Toward Exascale}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Chapter 7}, address = {Boca Raton, FL}, year = {2013} }
Solcà, R., Haidar, A., Tomov, S., Dongarra, J., Schulthess, T. "A Novel Hybrid CPU-GPU Generalized Eigensolver for Electronic Structure Calculations Based on Fine Grained Memory Aware Tasks,"Supercomputing '12 (poster), Salt Lake City, Utah, November, 2012 [bibtex]
@article{icl:729, author = {Solcà, R. and Haidar, A. and Tomov, S. and Dongarra, J. and Schulthess, T.}, title = {A Novel Hybrid CPU-GPU Generalized Eigensolver for Electronic Structure Calculations Based on Fine Grained Memory Aware Tasks}, booktitle = {Supercomputing '12 (poster)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Salt Lake City, Utah}, month = {November}, year = {2012} }
Agullo, E., Bosilca, G., Castagnède, C., Dongarra, J., Ltaief, H., Tomov, S. "Matrices Over Runtime Systems at Exascale,"Supercomputing '12 (poster), Salt Lake City, Utah, November, 2012 [bibtex]
@article{icl:730, author = {Agullo, E. and Bosilca, G. and Castagnède, C. and Dongarra, J. and Ltaief, H. and Tomov, S.}, title = {Matrices Over Runtime Systems at Exascale}, booktitle = {Supercomputing '12 (poster)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Salt Lake City, Utah}, month = {November}, year = {2012} }
Dong, T., Kolev, T., Rieben, R., Dobrev, V., Tomov, S., Dongarra, J. "Acceleration of the BLAST Hydro Code on GPU,"Supercomputing '12 (poster), Salt Lake City, Utah, November, 2012 [bibtex]
@article{icl:731, author = {Dong, T. and Kolev, T. and Rieben, R. and Dobrev, V. and Tomov, S. and Dongarra, J.}, title = {Acceleration of the BLAST Hydro Code on GPU}, booktitle = {Supercomputing '12 (poster)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Salt Lake City, Utah}, month = {November}, year = {2012} }
Donfack, S., Tomov, S., Dongarra, J. "Performance evaluation of LU factorization through hardware counter measurements,"University of Tennessee Computer Science Technical Report, ut-cs-12-700, October, 2012 [pdf] [bibtex]
@techreport{icl:714, author = {Donfack, S. and Tomov, S. and Dongarra, J.}, title = {Performance evaluation of LU factorization through hardware counter measurements}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {October}, year = {2012} }
Anzt, H., Tomov, S., Dongarra, J., Heuveline, V. "A Block-Asynchronous Relaxation Method for Graphics Processing Units,"Journal of Parallel and Distributed Computing (submitted), October, 2012 [pdf] [bibtex]
@article{icl:719, author = {Anzt, H. and Tomov, S. and Dongarra, J. and Heuveline, V.}, title = {A Block-Asynchronous Relaxation Method for Graphics Processing Units}, booktitle = {Journal of Parallel and Distributed Computing (submitted)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {October}, year = {2012} }
Du, P., Tomov, S., and Dongarra, J. "Providing GPU Capability to LU and QR within the ScaLAPACK Framework,"University of Tennessee Computer Science Technical Report, UT-CS-12-699 (lawn272), UT-CS-12-699, September 12, 2012 [pdf] [bibtex]
@techreport{icl:879, author = {Du, P. and Tomov, S. and and Dongarra, J.}, title = {Providing GPU Capability to LU and QR within the ScaLAPACK Framework}, booktitle = {University of Tennessee Computer Science Technical Report, UT-CS-12-699 (lawn272)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {September}, year = {2012} }
Du, P., Tomov, S., Dongarra, J. "Providing GPU Capability to LU and QR within the ScaLAPACK Framework,"University of Tennessee Computer Science Technical Report (also LAWN 272), UT-CS-12-699, September, 2012 [pdf] [bibtex]
@techreport{icl:715, author = {Du, P. and Tomov, S. and Dongarra, J.}, title = {Providing GPU Capability to LU and QR within the ScaLAPACK Framework}, booktitle = {University of Tennessee Computer Science Technical Report (also LAWN 272)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {September}, year = {2012} }
Anzt, H., Tomov, S., Dongarra, J., Heuveline, V. "Weighted Block-Asynchronous Iteration on GPU-Accelerated Systems,"Tenth International Workshop on Algorithms, Models and Tools for Parallel Computing on Heterogeneous Platforms (Best Paper), Rhodes Island, Greece, August, 2012 [pdf] [bibtex]
@inproceedings{icl:713, author = {Anzt, H. and Tomov, S. and Dongarra, J. and Heuveline, V.}, title = {Weighted Block-Asynchronous Iteration on GPU-Accelerated Systems}, booktitle = {Tenth International Workshop on Algorithms, Models and Tools for Parallel Computing on Heterogeneous Platforms (Best Paper)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Rhodes Island, Greece}, month = {August}, year = {2012} }
Du, P., Weber, R., Luszczek, P., Tomov, S., Peterson, G., Dongarra, J. "From CUDA to OpenCL: Towards a Performance-portable Solution for Multi-platform GPU Programming,"Parallel Computing, Vol. 38, No. 8, pp. 391-407, August, 2012 [bibtex]
@article{icl:725, author = {Du, P. and Weber, R. and Luszczek, P. and Tomov, S. and Peterson, G. and Dongarra, J.}, title = {From CUDA to OpenCL: Towards a Performance-portable Solution for Multi-platform GPU Programming}, booktitle = {Parallel Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 38, No. 8}, pages = {pp. 391-407}, month = {August}, year = {2012} }
Kasichayanula, K., Terpstra, D., Luszczek, P., Tomov, S., Moore, S., Peterson, G. "Power Aware Computing on GPUs,"SAAHPC '12 (Best Paper Award), Argonne, IL, July 10-11, 2012 [pdf] [bibtex]
@article{icl:686, author = {Kasichayanula, K. and Terpstra, D. and Luszczek, P. and Tomov, S. and Moore, S. and Peterson, G.}, title = {Power Aware Computing on GPUs}, booktitle = {SAAHPC '12 (Best Paper Award)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Argonne, IL}, month = {July}, year = {2012} }
Yamazaki, I., Tomov, S., Dongarra, J. "One-sided dense matrix factorizations on a multicore with multiple GPU accelerators,"The International Conference on Computational Science (ICCS), June 4, 2012 [bibtex]
@inproceedings{icl:678, author = {Yamazaki, I. and Tomov, S. and Dongarra, J.}, title = {One-sided dense matrix factorizations on a multicore with multiple GPU accelerators}, booktitle = {The International Conference on Computational Science (ICCS)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {June}, year = {2012} }
Song, F., Tomov, S., Dongarra, J. "Enabling and Scaling Matrix Computations on Heterogeneous Multi-Core and Multi-GPU Systems,"26th ACM International Conference on Supercomputing (ICS 2012), ACM, San Servolo Island, Venice, Italy, June, 2012 [pdf] [bibtex]
@inproceedings{icl:669, author = {Song, F. and Tomov, S. and Dongarra, J.}, title = {Enabling and Scaling Matrix Computations on Heterogeneous Multi-Core and Multi-GPU Systems}, booktitle = {26th ACM International Conference on Supercomputing (ICS 2012)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {San Servolo Island, Venice, Italy}, month = {June}, year = {2012} }
Baboulin, M., Donfack, S., Dongarra, J., Grigori, L., Remi, A., Tomov, S. "A class of communication-avoiding algorithms for solving general dense linear systems on CPU/GPU parallel machines,"Proc. of the International Conference on Computational Science (ICCS) , 9, 17-26, June, 2012 [bibtex]
@inproceedings{icl:685, author = {Baboulin, M. and Donfack, S. and Dongarra, J. and Grigori, L. and Remi, A. and Tomov, S.}, title = {A class of communication-avoiding algorithms for solving general dense linear systems on CPU/GPU parallel machines}, booktitle = {Proc. of the International Conference on Computational Science (ICCS) }, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {9}, pages = {17-26}, month = {June}, year = {2012} }
Anzt, H., Tomov, S., Gates, M., Dongarra, J., Heuveline, V. "Block-asynchronous Multigrid Smoothers for GPU-accelerated Systems,"ICCS 2012, Omaha, NE, June, 2012 [pdf] [bibtex]
@article{icl:697, author = {Anzt, H. and Tomov, S. and Gates, M. and Dongarra, J. and Heuveline, V.}, title = {Block-asynchronous Multigrid Smoothers for GPU-accelerated Systems}, booktitle = {ICCS 2012}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Omaha, NE}, month = {June}, year = {2012} }
Vomel, C., Tomov, S., Dongarra, J. "Divide and Conquer on Hybrid GPU-Accelerated Multicore Systems,"SIAM Journal on Scientific Computing, 34 (2), C70-C82, April 12, 2012 [bibtex]
@article{icl:684, author = {Vomel, C. and Tomov, S. and Dongarra, J.}, title = {Divide and Conquer on Hybrid GPU-Accelerated Multicore Systems}, booktitle = {SIAM Journal on Scientific Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {34 (2)}, pages = {C70-C82}, month = {April}, year = {2012} }
Baboulin, M., Dongarra, J., Herrmann, J., Tomov, S. "Accelerating Linear System Solutions Using Randomization Techniques,"ACM Transactions on Mathematical Software (accepted) (also LAWN 246), March, 2012 [pdf] [bibtex]
@article{icl:721, author = {Baboulin, M. and Dongarra, J. and Herrmann, J. and Tomov, S.}, title = {Accelerating Linear System Solutions Using Randomization Techniques}, booktitle = {ACM Transactions on Mathematical Software (accepted) (also LAWN 246)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {March}, year = {2012} }
Dongarra, J., Kurzak, J., Luszczek, P., Tomov, S. "Dense Linear Algebra on Accelerated Multicore Hardware,"High Performance Scientific Computing: Algorithms and Applications, Berry, M., et al. eds. Springer-Verlag, London, UK, 2012 [bibtex]
@article{icl:703, author = {Dongarra, J. and Kurzak, J. and Luszczek, P. and Tomov, S.}, title = {Dense Linear Algebra on Accelerated Multicore Hardware}, booktitle = {High Performance Scientific Computing: Algorithms and Applications}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {London, UK}, year = {2012} }
Kurzak, J., Luszczek, P., Tomov, S., Dongarra, J. "Preliminary Results of Autotuning GEMM Kernels for the NVIDIA Kepler Architecture,"LAWN 267, 2012 [pdf] [bibtex]
@article{icl:718, author = {Kurzak, J. and Luszczek, P. and Tomov, S. and Dongarra, J.}, title = {Preliminary Results of Autotuning GEMM Kernels for the NVIDIA Kepler Architecture}, booktitle = {LAWN 267}, institution = {Innovative Computing Laboratory, University of Tennessee}, year = {2012} }
Anzt, H., Tomov, S., Gates, M., Dongarra, J., Heuveline, V. "Block-asynchronous Multigrid Smoothers for GPU-accelerated Systems," UT-CS-11-689, December 6, 2011 [pdf] [bibtex]
@article{icl:661, author = {Anzt, H. and Tomov, S. and Gates, M. and Dongarra, J. and Heuveline, V.}, title = {Block-asynchronous Multigrid Smoothers for GPU-accelerated Systems}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {December}, year = {2011} }
Agullo, E., Augonnet, C., Dongarra, J., Faverge, M., Langou, J., Ltaief, H., Tomov, S. "LU Factorization for Accelerator-based Systems,"IEEE/ACS AICCSA 2011, Sharm-El-Sheikh, Egypt, December, 2011 [pdf] [bibtex]
@article{icl:599, author = {Agullo, E. and Augonnet, C. and Dongarra, J. and Faverge, M. and Langou, J. and Ltaief, H. and Tomov, S.}, title = {LU Factorization for Accelerator-based Systems}, booktitle = {IEEE/ACS AICCSA 2011}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Sharm-El-Sheikh, Egypt}, month = {December}, year = {2011} }
Anzt, H., Tomov, S., Dongarra, J., Heuveline, V. "A Block-Asynchronous Relaxation Method for Graphics Processing Units,"University of Tennessee Computer Science Technical Report, UT-CS-11-687 / LAWN 258, November 30, 2011 [pdf] [bibtex]
@techreport{icl:656, author = {Anzt, H. and Tomov, S. and Dongarra, J. and Heuveline, V.}, title = {A Block-Asynchronous Relaxation Method for Graphics Processing Units}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {November}, year = {2011} }
Nath, R., Tomov, S., Dong, T., Dongarra, J. "Optimizing Symmetric Dense Matrix-Vector Multiplication on GPUs,"ACM/IEEE Conference on Supercomputing (SC’11), Seattle, WA, November 12-18, 2011 [pdf] [bibtex]
@inproceedings{icl:632, author = {Nath, R. and Tomov, S. and Dong, T. and Dongarra, J.}, title = {Optimizing Symmetric Dense Matrix-Vector Multiplication on GPUs}, booktitle = {ACM/IEEE Conference on Supercomputing (SC’11)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Seattle, WA}, month = {November}, year = {2011} }
Malony, A., Biersdorff, S., Shende, S., Jagode, H., Tomov, S., Juckeland, G., Dietrich, R., Duncan Poole, P., Lamb, C. "Parallel Performance Measurement of Heterogeneous Parallel Systems with GPUs,"International Conference on Parallel Processing (ICPP'11), Taipei, Taiwan, September 13-16, 2011 [bibtex]
@inproceedings{icl:633, author = {Malony, A. and Biersdorff, S. and Shende, S. and Jagode, H. and Tomov, S. and Juckeland, G. and Dietrich, R. and Duncan Poole, P. and Lamb, C.}, title = {Parallel Performance Measurement of Heterogeneous Parallel Systems with GPUs}, booktitle = {International Conference on Parallel Processing (ICPP'11)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Taipei, Taiwan}, month = {September}, year = {2011} }
Baboulin, M., Dongarra, J., Herrmann, J., Tomov, S. "Accelerating Linear System Solutions Using Randomization Techniques,"INRIA RR-7616 / LAWN #246 (presented at International AMMCS’11), Waterloo, Ontario, Canada, July 25-29, 2011 [bibtex]
@article{icl:637, author = {Baboulin, M. and Dongarra, J. and Herrmann, J. and Tomov, S.}, title = {Accelerating Linear System Solutions Using Randomization Techniques}, booktitle = {INRIA RR-7616 / LAWN #246 (presented at International AMMCS’11)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Waterloo, Ontario, Canada}, month = {July}, year = {2011} }
Horton, M., Tomov, S., Dongarra, J. "A Class of Hybrid LAPACK Algorithms for Multicore and GPU Architectures,"Symposium for Application Accelerators in High Performance Computing (SAAHPC'11), Knoxville, TN, July 19-20, 2011 [pdf] [bibtex]
@inproceedings{icl:640, author = {Horton, M. and Tomov, S. and Dongarra, J.}, title = {A Class of Hybrid LAPACK Algorithms for Multicore and GPU Architectures}, booktitle = {Symposium for Application Accelerators in High Performance Computing (SAAHPC'11)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Knoxville, TN}, month = {July}, year = {2011} }
Du, P., Luszczek, P., Tomov, S., Dongarra, J. "Soft Error Resilient QR Factorization for Hybrid System,"UT-CS-11-675 (also LAPACK Working Note #252), ICL-CS-11-675, July 1, 2011 [pdf] [bibtex]
@article{icl:635, author = {Du, P. and Luszczek, P. and Tomov, S. and Dongarra, J.}, title = {Soft Error Resilient QR Factorization for Hybrid System}, booktitle = {UT-CS-11-675 (also LAPACK Working Note #252)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {July}, year = {2011} }
Bosilca, G., Bouteiller, A., Herault, T., Lemarier, P., Saengpatsa, N., Tomov, S., Dongarra, J. "Performance Portability of a GPU Enabled Factorization with the DAGuE Framework,"IEEE Cluster: workshop on Parallel Programming on Accelerator Clusters (PPAC), June 24, 2011 [pdf] [bibtex]
@inproceedings{icl:636, author = {Bosilca, G. and Bouteiller, A. and Herault, T. and Lemarier, P. and Saengpatsa, N. and Tomov, S. and Dongarra, J.}, title = {Performance Portability of a GPU Enabled Factorization with the DAGuE Framework}, booktitle = {IEEE Cluster: workshop on Parallel Programming on Accelerator Clusters (PPAC)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {June}, year = {2011} }
Fengguang, S., Tomov, S., Dongarra, J. "Efficient Support for Matrix Computations on Heterogeneous Multi-core and Multi-GPU Architectures,"University of Tennessee Computer Science Technical Report, UT-CS-11-668, (also Lawn 250), June 16, 2011 [pdf] [bibtex]
@techreport{icl:628, author = {Fengguang, S. and Tomov, S. and Dongarra, J.}, title = {Efficient Support for Matrix Computations on Heterogeneous Multi-core and Multi-GPU Architectures}, booktitle = {University of Tennessee Computer Science Technical Report, UT-CS-11-668, (also Lawn 250)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {June}, year = {2011} }
Bosilca, G., Bouteiller, A., Herault, T., Lemarinier, P., Saengpatsa, N., Tomov, S., Dongarra, J. "A Unified HPC Environment for Hybrid Manycore/GPU Distributed Systems,"IEEE International Parallel and Distributed Processing Symposium (submitted), Anchorage, AK, May 16-20, 2011 [bibtex]
@inproceedings{icl:593, author = {Bosilca, G. and Bouteiller, A. and Herault, T. and Lemarinier, P. and Saengpatsa, N. and Tomov, S. and Dongarra, J.}, title = {A Unified HPC Environment for Hybrid Manycore/GPU Distributed Systems}, booktitle = {IEEE International Parallel and Distributed Processing Symposium (submitted)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Anchorage, AK}, month = {May}, year = {2011} }
Kurzak, J., Tomov, S., Dongarra, J. "Autotuning GEMMs for Fermi,"University of Tennessee Computer Science Technical Report, UT-CS-11-671, (also Lawn 245), April 18, 2011 [pdf] [bibtex]
@techreport{icl:630, author = {Kurzak, J. and Tomov, S. and Dongarra, J.}, title = {Autotuning GEMMs for Fermi}, booktitle = {University of Tennessee Computer Science Technical Report, UT-CS-11-671, (also Lawn 245)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {April}, year = {2011} }
Malony, A., Biersdorff, S., Shende, S., Jagode, H., Tomov, S., Juckeland, G., Dietrich, R., Poole, D., Lamb, C. "Parallel Performance Measurement of Heterogeneous Parallel Systems with GPUs,"ICPP 2011 (submitted), Taipei, Taiwan, 2011 [pdf] [bibtex]
@article{icl:608, author = {Malony, A. and Biersdorff, S. and Shende, S. and Jagode, H. and Tomov, S. and Juckeland, G. and Dietrich, R. and Poole, D. and Lamb, C.}, title = {Parallel Performance Measurement of Heterogeneous Parallel Systems with GPUs}, booktitle = {ICPP 2011 (submitted)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Taipei, Taiwan}, year = {2011} }
Agullo, E., Augonnet, C., Dongarra, J., Ltaief, H., Namyst, R., Thibault, S., Tomov, S. "A Hybridization Methodology for High-Performance Linear Algebra Software for GPUs,"in GPU Computing Gems, Jade Edition, Hwu, W. eds. Elsevier, 2, 473-484, 2011 [bibtex]
@article{icl:653, author = {Agullo, E. and Augonnet, C. and Dongarra, J. and Ltaief, H. and Namyst, R. and Thibault, S. and Tomov, S.}, title = {A Hybridization Methodology for High-Performance Linear Algebra Software for GPUs}, booktitle = {in GPU Computing Gems, Jade Edition}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {2}, pages = {473-484}, year = {2011} }
Agullo, E., Augonnet, C., Dongarra, J., Ltaief, H., Namyst, R., Thibault, S., Tomov, S. "GPU Computing Gems, Jade Edition,"ISBN: 9780123859631, Wen-mei W. Hwu eds. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 473-484 (Chapter 34), 2011 [bibtex]
@article{icl:920, author = {Agullo, E. and Augonnet, C. and Dongarra, J. and Ltaief, H. and Namyst, R. and Thibault, S. and Tomov, S.}, title = {GPU Computing Gems, Jade Edition}, booktitle = {ISBN: 9780123859631}, institution = {Innovative Computing Laboratory, University of Tennessee}, pages = {473-484 (Chapter 34)}, address = {San Francisco, CA, USA}, year = {2011} }
Nath, R., Tomov, S., Dongarra, J. "Blas for GPUs, Scientific Computing with Multicore and Accelerators,"Chapman & Hall/CRC Computational Science, Kurzak, J., Bader, D., Dongarra, J. eds. Chapman & Hall/CRC Computational Science, December 7, 2010 [pdf] [bibtex]
@article{icl:845, author = {Nath, R. and Tomov, S. and Dongarra, J.}, title = {Blas for GPUs, Scientific Computing with Multicore and Accelerators}, booktitle = {Chapman & Hall/CRC Computational Science}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {December}, year = {2010} }
Tomov, S., Dongarra, J. "Dense Linear Algebra for Hybrid GPU-based Systems, Scientific Computing with Multicore and Accelerators,"Chapman & Hall/CRC Computational Science, Kurzak, J., Bader, D., Dongarra, J. eds. Chapman & Hall/CRC Computational Science, December 7, 2010 [bibtex]
@article{icl:846, author = {Tomov, S. and Dongarra, J.}, title = {Dense Linear Algebra for Hybrid GPU-based Systems, Scientific Computing with Multicore and Accelerators}, booktitle = {Chapman & Hall/CRC Computational Science}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {December}, year = {2010} }
Tomov, S., Faverge, M., Luszczek, P., Dongarra, J. "Using MAGMA with PGI Fortran,"PGI Insider, November 15, 2010 [htm] [bibtex]
@article{icl:620, author = {Tomov, S. and Faverge, M. and Luszczek, P. and Dongarra, J.}, title = {Using MAGMA with PGI Fortran}, booktitle = {PGI Insider}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {November}, year = {2010} }
Agullo, E., Augonnet, C., Dongarra, J., Faverge, M., Ltaief, H., Thibault, S., Tomov, S. "QR Factorization on a Multicore Node Enhanced with Multiple GPU Accelerators,"Proceedings of IPDPS 2011, Anchorage, AK, ICL-UT-10-04, October 1, 2010 [pdf] [bibtex]
@inproceedings{icl:577, author = {Agullo, E. and Augonnet, C. and Dongarra, J. and Faverge, M. and Ltaief, H. and Thibault, S. and Tomov, S.}, title = {QR Factorization on a Multicore Node Enhanced with Multiple GPU Accelerators}, booktitle = {Proceedings of IPDPS 2011}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Anchorage, AK}, month = {October}, year = {2010} }
Du, P., Luszczek, P., Tomov, S., Dongarra, J. "Mixed-Tool Performance Analysis on Hybrid Multicore Architectures,"First International Workshop on Parallel Software Tools and Tool Infrastructures (PSTI 2010), San Diego, CA, Sept. 13-16, 2010 [pdf] [bibtex]
@inproceedings{icl:562, author = {Du, P. and Luszczek, P. and Tomov, S. and Dongarra, J.}, title = {Mixed-Tool Performance Analysis on Hybrid Multicore Architectures}, booktitle = {First International Workshop on Parallel Software Tools and Tool Infrastructures (PSTI 2010)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {San Diego, CA}, month = {Sept}, year = {2010} }
Du, P., Weber, R., Luszczek, P., Tomov, S., Peterson, G., Dongarra, J. "From CUDA to OpenCL: Towards a Performance-portable Solution for Multiplatform GPU Programming,"Parallel Computing (submitted), August, 2010 [bibtex]
@article{icl:583, author = {Du, P. and Weber, R. and Luszczek, P. and Tomov, S. and Peterson, G. and Dongarra, J.}, title = {From CUDA to OpenCL: Towards a Performance-portable Solution for Multiplatform GPU Programming}, booktitle = {Parallel Computing (submitted)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {August}, year = {2010} }
Vomel, C., Tomov, S., Dongarra, J. "Divide & Conquer on Hybrid GPU-Accelerated Multicore Systems,"SIAM Journal on Scientific Computing (submitted), August, 2010 [bibtex]
@article{icl:639, author = {Vomel, C. and Tomov, S. and Dongarra, J.}, title = {Divide & Conquer on Hybrid GPU-Accelerated Multicore Systems}, booktitle = {SIAM Journal on Scientific Computing (submitted)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {August}, year = {2010} }
Nath, R., Tomov, S., Dongarra, J. "An Improved MAGMA GEMM for Fermi GPUs,"University of Tennessee Computer Science Technical Report, UT-CS-10-655 (also LAPACK working note 227), July 29, 2010 [pdf] [bibtex]
@techreport{icl:548, author = {Nath, R. and Tomov, S. and Dongarra, J.}, title = {An Improved MAGMA GEMM for Fermi GPUs}, booktitle = {University of Tennessee Computer Science Technical Report}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {July}, year = {2010} }
Ltaief, H., Tomov, S., Nath, R., Du, P., Dongarra, J. "A Scalable High Performant Cholesky Factorization for Multicore with GPU Accelerators,"Proc. of VECPAR'10 (to appear), Berkeley, CA, June 22-25, 2010 [pdf] [bibtex]
@article{icl:521, author = {Ltaief, H. and Tomov, S. and Nath, R. and Du, P. and Dongarra, J.}, title = {A Scalable High Performant Cholesky Factorization for Multicore with GPU Accelerators}, booktitle = {Proc. of VECPAR'10 (to appear)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Berkeley, CA}, month = {June}, year = {2010} }
Nath, R., Tomov, S., Dongarra, J. "Accelerating GPU Kernels for Dense Linear Algebra,"Proc. of VECPAR'10, Berkeley, CA, June 22-25, 2010 [pdf] [bibtex]
@article{icl:546, author = {Nath, R. and Tomov, S. and Dongarra, J.}, title = {Accelerating GPU Kernels for Dense Linear Algebra}, booktitle = {Proc. of VECPAR'10}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Berkeley, CA}, month = {June}, year = {2010} }
Bernholc, J., Hodak, M., Lu, W., Moore, S., Tomov, S. "Scalability Study of a Quantum Simulation Code,"PARA 2010, Reykjavik, Iceland, June 6-9, 2010 [bibtex]
@article{icl:554, author = {Bernholc, J. and Hodak, M. and Lu, W. and Moore, S. and Tomov, S.}, title = {Scalability Study of a Quantum Simulation Code}, booktitle = {PARA 2010}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Reykjavik, Iceland}, month = {June}, year = {2010} }
Tomov, S., Lu., W., Bernholc, J., Moore, S., Dongarra, J. "Performance Evaluation for Petascale Quantum Simulation Tools,"Proceedings of the Cray Users' Group Meeting, Atlanta, GA, May 4, 2010 [bibtex]
@inproceedings{icl:584, author = {Tomov, S. and Lu. and W. and Bernholc, J. and Moore, S. and Dongarra, J.}, title = {Performance Evaluation for Petascale Quantum Simulation Tools}, booktitle = {Proceedings of the Cray Users' Group Meeting}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Atlanta, GA}, month = {May}, year = {2010} }
Ltaief, H., Tomov, S., Nath, R., Dongarra, J. "Hybrid Multicore Cholesky Factorization with Multiple GPU Accelerators,"IEEE Transaction on Parallel and Distributed Systems (submitted), March 26, 2010 [pdf] [bibtex]
@article{icl:526, author = {Ltaief, H. and Tomov, S. and Nath, R. and Dongarra, J.}, title = {Hybrid Multicore Cholesky Factorization with Multiple GPU Accelerators}, booktitle = {IEEE Transaction on Parallel and Distributed Systems (submitted)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {March}, year = {2010} }
Tomov, S., Nath, R., Ltaief, H., Dongarra, J. "Dense Linear Algebra Solvers for Multicore with GPU Accelerators,"Proc. of IPDPS'10, Atlanta, GA, January 15, 2010 [pdf] [bibtex]
@article{icl:523, author = {Tomov, S. and Nath, R. and Ltaief, H. and Dongarra, J.}, title = {Dense Linear Algebra Solvers for Multicore with GPU Accelerators}, booktitle = {Proc. of IPDPS'10}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Atlanta, GA}, month = {January}, year = {2010} }
Tomov, S., Nath, R., Dongarra, J. "Accelerating the reduction to upper Hessenberg, tridiagonal, and bidiagonal forms through hybrid GPU-based computing,"Parallel Computing, vol. 36, number 12, pp. 645-654, June 19, 2010 [pdf] [bibtex]
@article{icl:547, author = {Tomov, S. and Nath, R. and Dongarra, J.}, title = {Accelerating the reduction to upper Hessenberg, tridiagonal, and bidiagonal forms through hybrid GPU-based computing}, booktitle = {Parallel Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {vol. 36, number 12}, pages = {pp. 645-654}, month = {June}, year = {2010} }
Nath, R., Tomov, S., Dongarra, J. "An Improved MAGMA GEMM for Fermi GPUs,"International Journal of High Performance Computing, vol. 24, no. 4, 511-515, November 18, 2010 [bibtex]
@article{icl:582, author = {Nath, R. and Tomov, S. and Dongarra, J.}, title = {An Improved MAGMA GEMM for Fermi GPUs}, booktitle = {International Journal of High Performance Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {vol. 24, no. 4}, pages = {511-515}, month = {November}, year = {2010} }
Tomov, S., Dongarra, J., Baboulin, M. "Towards Dense Linear Algebra for Hybrid GPU Accelerated Manycore Systems,"Parallel Computing, Vol. 36, Number 5-6, pp. 232-240, 2010 [pdf] [bibtex]
@article{icl:564, author = {Tomov, S. and Dongarra, J. and Baboulin, M.}, title = {Towards Dense Linear Algebra for Hybrid GPU Accelerated Manycore Systems}, booktitle = {Parallel Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 36, Number 5-6}, pages = {pp. 232-240}, year = {2010} }
Agullo, E., Augonnet, C., Dongarra, J., Ltaief, H., Namyst, R., Thibault, S., and Tomov, S. "Faster, Cheaper, Better - a Hybridization Methodology to Develop Linear Algebra Software for GPUs,"LAPACK Working Note 230, 2010 [pdf] [bibtex]
@article{icl:585, author = {Agullo, E. and Augonnet, C. and Dongarra, J. and Ltaief, H. and Namyst, R. and Thibault, S. and and Tomov, S.}, title = {Faster, Cheaper, Better - a Hybridization Methodology to Develop Linear Algebra Software for GPUs}, booktitle = {LAPACK Working Note 230}, institution = {Innovative Computing Laboratory, University of Tennessee}, year = {2010} }
Du, P., Weber, R., Luszczek, P., Tomov, S., Peterson, G., Dongarra, J. "From CUDA to OpenCL: Towards a Performance-portable Solution for Multi-platform GPU Programming,"Parallel Computing (submitted), 2010 [bibtex]
@article{icl:638, author = {Du, P. and Weber, R. and Luszczek, P. and Tomov, S. and Peterson, G. and Dongarra, J. }, title = {From CUDA to OpenCL: Towards a Performance-portable Solution for Multi-platform GPU Programming}, booktitle = {Parallel Computing (submitted)}, institution = {Innovative Computing Laboratory, University of Tennessee}, year = {2010} }
Li, Y., Dongarra, J., Tomov, S. "A note on auto-tuning GEMM for GPUs,"Proc. of ICCS'09, Baton Rouge, LA, UT-CS-09-635, May 25-27, 2009 [pdf] [bibtex]
@article{icl:471, author = {Li, Y. and Dongarra, J. and Tomov, S.}, title = {A note on auto-tuning GEMM for GPUs}, booktitle = {Proc. of ICCS'09}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Baton Rouge, LA}, month = {May}, year = {2009} }
Li, Y., Dongarra, J., Tomov, S. "A Note on Auto-tuning GEMM for GPUs,"Computational Science – ICCS 2009, Proceedings of the 9th International Conference, Lecture Notes in Computer Science: Theoretical Computer Science and General Issues, Allen, G., Nabrzyski, J., Seidel, E., van Albada, G.D., Dongarra, J., Sloot, P.M.A. eds. Baton Rouge, LA, Parts I-II, Vols. 5544-5545, pp. 884-892, May 25-27, 2009 [bibtex]
@inproceedings{icl:512, author = {Li, Y. and Dongarra, J. and Tomov, S.}, title = {A Note on Auto-tuning GEMM for GPUs}, booktitle = {Computational Science – ICCS 2009, Proceedings of the 9th International Conference, Lecture Notes in Computer Science: Theoretical Computer Science and General Issues}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Parts I-II, Vols. 5544-5545}, pages = {pp. 884-892}, address = {Baton Rouge, LA}, month = {May}, year = {2009} }
Tomov, S., Dongarra, J. "Accelerating the Reduction to Upper Hessenberg Form Through Hybrid GPU-based Computing,"University of Tennessee Computer Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219), May 24, 2009 [pdf] [bibtex]
@techreport{icl:485, author = {Tomov, S. and Dongarra, J.}, title = {Accelerating the Reduction to Upper Hessenberg Form Through Hybrid GPU-based Computing}, booktitle = {University of Tennessee Computer Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {May}, year = {2009} }
Tomov, S., Lu, W., Bernholc, J., Moore, S., Dongarra, J. "Performance evaluation for petascale quantum simulation tools,"Proceedings of CUG09, Atlanta, GA, May 4-7, 2009 [pdf] [bibtex]
@inproceedings{icl:478, author = {Tomov, S. and Lu, W. and Bernholc, J. and Moore, S. and Dongarra, J.}, title = {Performance evaluation for petascale quantum simulation tools}, booktitle = {Proceedings of CUG09}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Atlanta, GA}, month = {May}, year = {2009} }
Agullo, E., Demmel, J., Dongarra, J., Hadri, B., Kurzak, J., Langou, J., Ltaief, H., Luszczek, P., Tomov, S. "Numerical linear algebra on emerging architectures: The PLASMA and MAGMA projects,"Journal of Physics: Conference Series, Vol. 180, 2009 [pdf] [bibtex]
@inproceedings{icl:486, author = {Agullo, E. and Demmel, J. and Dongarra, J. and Hadri, B. and Kurzak, J. and Langou, J. and Ltaief, H. and Luszczek, P. and Tomov, S.}, title = {Numerical linear algebra on emerging architectures: The PLASMA and MAGMA projects}, booktitle = {Journal of Physics: Conference Series}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 180}, year = {2009} }
Canning, A., Dongarra, J., Langou, J., Marques, O., Tomov, S., Voemel, C., Wang, L.-W. "Interior State Computation of Nano Structures,"PARA 2008, 9th International Workshop on State-of-the-Art in Scientific and Parallel Computing, Trondheim, Norway, May 13-16, 2008 [pdf] [bibtex]
@inproceedings{icl:416, author = {Canning, A. and Dongarra, J. and Langou, J. and Marques, O. and Tomov, S. and Voemel, C. and Wang, L.-W.}, title = {Interior State Computation of Nano Structures}, booktitle = {PARA 2008, 9th International Workshop on State-of-the-Art in Scientific and Parallel Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Trondheim, Norway}, month = {May}, year = {2008} }
Baboulin, M., Tomov, S., Dongarra, J. "Some Issues in Dense Linear Algebra for Multicore and Special Purpose Architectures,"PARA 2008, 9th International Workshop on State-of-the-Art in Scientific and Parallel Computing, Trondheim Norway, May 13-16, 2008 [bibtex]
@inproceedings{icl:516, author = {Baboulin, M. and Tomov, S. and Dongarra, J.}, title = {Some Issues in Dense Linear Algebra for Multicore and Special Purpose Architectures}, booktitle = {PARA 2008, 9th International Workshop on State-of-the-Art in Scientific and Parallel Computing}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Trondheim Norway}, month = {May}, year = {2008} }
Baboulin, M., Dongarra, J., Tomov, S. "Some Issues in Dense Linear Algebra for Multicore and Special Purpose Architectures,"University of Tennessee Computer Science Technical Report, UT-CS-08-615 (also LAPACK Working Note 200), May 6, 2008 [pdf] [bibtex]
@techreport{icl:415, author = {Baboulin, M. and Dongarra, J. and Tomov, S.}, title = {Some Issues in Dense Linear Algebra for Multicore and Special Purpose Architectures}, booktitle = {University of Tennessee Computer Science Technical Report, UT-CS-08-615 (also LAPACK Working Note 200)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {May}, }
Dongarra, J., Moore, S., Peterson, G., Tomov, S., Allred, J., Natoli, V., Richie, D. "Exploring New Architectures in Accelerating CFD for Air Force Applications,"Proceedings of the DoD HPCMP User Group Conference, Seattle, Washington, July 14-17, 2008 [pdf] [bibtex]
@inproceedings{icl:440, author = {Dongarra, J. and Moore, S. and Peterson, G. and Tomov, S. and Allred, J. and Natoli, V. and Richie, D.}, title = {Exploring New Architectures in Accelerating CFD for Air Force Applications}, booktitle = {Proceedings of the DoD HPCMP User Group Conference}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Seattle, Washington}, month = {July}, }
Tomov, S., Dongarra, J., Baboulin, M. "Towards Dense Linear Algebra for Hybrid GPU Accelerated Manycore Systems,"University of Tennessee Computer Science Technical Report, UT-CS-08-632 (also LAPACK Working Note 210), October 17, 2008 [pdf] [bibtex]
@techreport{icl:443, author = {Tomov, S. and Dongarra, J. and Baboulin, M.}, title = {Towards Dense Linear Algebra for Hybrid GPU Accelerated Manycore Systems}, booktitle = {University of Tennessee Computer Science Technical Report, UT-CS-08-632 (also LAPACK Working Note 210)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {October}, }
Vomel, C., Tomov, S., Marques, O., Canning, A., Wang, L.-W., Dongarra, J. "State-of-the-Art Eigensolvers for Electronic Structure Calculations of Large Scale Nano-Systems,"Journal of Computational Physics, Vol. 227, Issue15, pp. 7113-7124, July, 2008 [bibtex]
@article{icl:447, author = {Vomel, C. and Tomov, S. and Marques, O. and Canning, A. and Wang, L.-W. and Dongarra, J.}, title = {State-of-the-Art Eigensolvers for Electronic Structure Calculations of Large Scale Nano-Systems}, booktitle = {Journal of Computational Physics}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol. 227, Issue15}, pages = {pp. 7113-7124}, month = {July}, }
Buttari, A., Dongarra, J., Kurzak, J., Langou, J., Langou, J., Luszczek, P., Tomov, S. "Exploiting Mixed Precision Floating Point Hardware in Scientific Computations,"in High Performance Computing and Grids in Action, Grandinetti, L. eds. IOS Press, Amsterdam, 2008 [pdf] [bibtex]
@article{icl:449, author = {Buttari, A. and Dongarra, J. and Kurzak, J. and Langou, J. and Langou, J. and Luszczek, P. and Tomov, S.}, title = {Exploiting Mixed Precision Floating Point Hardware in Scientific Computations}, booktitle = {in High Performance Computing and Grids in Action}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Amsterdam}, }
Buttari, A., Dongarra, J., Kurzak, J., Luszczek, P., Tomov, S. "Using Mixed Precision for Sparse Matrix Computations to Enhance the Performance while Achieving 64-bit Accuracy,"ACM Transactions on Mathematical Software, Vol 34, Number 4, pp. 17-22, 2008 [pdf] [bibtex]
@article{icl:424, author = {Buttari, A. and Dongarra, J. and Kurzak, J. and Luszczek, P. and Tomov, S.}, title = {Using Mixed Precision for Sparse Matrix Computations to Enhance the Performance while Achieving 64-bit Accuracy}, booktitle = {ACM Transactions on Mathematical Software}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Vol 34, Number 4}, pages = {pp. 17-22}, year = {2008} }
Buttari, A., Dongarra, J., Kurzak, J., Langou, J., Langou, Jn., Luszczek, P., Tomov, S. "Exploiting Mixed Precision Floating Point Hardware in Scientific Computations,"In High Performance Computing and Grids in Action (to appear), Lucio Grandinetti eds. IOS Press, Amsterdam, 2007 [pdf] [bibtex]
@article{icl:392, author = {Buttari, A. and Dongarra, J. and Kurzak, J. and Langou, J. and Langou, Jn. and Luszczek, P. and Tomov, S.}, title = {Exploiting Mixed Precision Floating Point Hardware in Scientific Computations}, booktitle = {In High Performance Computing and Grids in Action (to appear)}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Amsterdam}, year = {2007} }
Vo¨mel, C., Tomov, S., Wang, L-W., Marques, O., Dongarra, J. "The Use of Bulk States to Accelerate the Band Edge State Calculation of a Semiconductor Quantum Dot,"Journal of Computational Physics, Volume 223, pp. 774-782, 2007 [pdf] [bibtex]
@article{icl:401, author = {Vo¨mel, C. and Tomov, S. and Wang, L-W. and Marques, O. and Dongarra, J.}, title = {The Use of Bulk States to Accelerate the Band Edge State Calculation of a Semiconductor Quantum Dot}, booktitle = {Journal of Computational Physics}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Volume 223}, pages = {pp. 774-782}, year = {2007} }
Demmel, J., Dongarra, J., Parlett, B., Kahan, W., Gu, M., Bindel, D., Hida, Y., Li, X., Marques, O., Riedy, E. J., Voemel, C., Langou, J., Luszczek, P., Kurzak, J., Buttari, A., Langou, J., Tomov, S. "Prospectus for the Next LAPACK and ScaLAPACK Libraries,"PARA 2006, Umea, Sweden, June, 2006 [pdf] [bibtex]
@article{icl:370, author = {Demmel, J. and Dongarra, J. and Parlett, B. and Kahan, W. and Gu, M. and Bindel, D. and Hida, Y. and Li, X. and Marques, O. and Riedy, E. J. and Voemel, C. and Langou, J. and Luszczek, P. and Kurzak, J. and Buttari, A. and Langou, J. and Tomov, S.}, title = {Prospectus for the Next LAPACK and ScaLAPACK Libraries}, booktitle = {PARA 2006}, institution = {Innovative Computing Laboratory, University of Tennessee}, address = {Umea, Sweden}, month = {June}, year = {2006} }
Canning, A., Dongarra, J., Langou, J., Marques, O., Tomov, S., Voemel, C., Wang, L-W. "Towards bulk based preconditioning for quantum dot computations,"IEEE/ACM Proceedings of HPCNano SC06 (to appear), 2006 [pdf] [bibtex]
@inproceedings{icl:324, author = {Canning, A. and Dongarra, J. and Langou, J. and Marques, O. and Tomov, S. and Voemel, C. and Wang, L-W.}, title = {Towards bulk based preconditioning for quantum dot computations}, booktitle = {IEEE/ACM Proceedings of HPCNano SC06 (to appear)}, institution = {Innovative Computing Laboratory, University of Tennessee}, }
Canning, A., Dongarra, J., Langou, J., Marques, O., Tomov, S., Voemel, C., Wang, L-W. "Performance evaluation of eigensolvers in nano-structure computations,"IEEE/ACM Proceedings of HPCNano SC06 (to appear), 2006 [pdf] [bibtex]
@inproceedings{icl:325, author = {Canning, A. and Dongarra, J. and Langou, J. and Marques, O. and Tomov, S. and Voemel, C. and Wang, L-W.}, title = {Performance evaluation of eigensolvers in nano-structure computations}, booktitle = {IEEE/ACM Proceedings of HPCNano SC06 (to appear)}, institution = {Innovative Computing Laboratory, University of Tennessee}, }
Voemel, C., Tomov, S., Wang, L-W., Marques, O., Dongarra, J. "The use of bulk states to accelerate the band edge state calculation of a semiconductor quantum dot,"Journal of Computational Physics (submitted), 2006 [pdf] [bibtex]
@article{icl:326, author = {Voemel, C. and Tomov, S. and Wang, L-W. and Marques, O. and Dongarra, J.}, title = {The use of bulk states to accelerate the band edge state calculation of a semiconductor quantum dot}, booktitle = {Journal of Computational Physics (submitted)}, institution = {Innovative Computing Laboratory, University of Tennessee}, }
Zunger, A., Franceschetti, A., Bester, G., Jones, W. B., Kim, K., Graf, P. A., Wang, L-W., Canning, A., Marques, O., Voemel, C., Dongarra, J., Langou, J., Tomov, S. "Predicting the electronic properties of 3D, million-atom semiconductor nanostructure architectures,"J. Phys.: Conf. Ser. 46, doi:10.1088/1742-6596/46/1/040, 292-298, 2006 [pdf] [bibtex]
@article{icl:327, author = {Zunger, A. and Franceschetti, A. and Bester, G. and Jones, W. B. and Kim, K. and Graf, P. A. and Wang, L-W. and Canning, A. and Marques, O. and Voemel, C. and Dongarra, J. and Langou, J. and Tomov, S.}, title = {Predicting the electronic properties of 3D, million-atom semiconductor nanostructure architectures}, booktitle = {J. Phys.: Conf. Ser. 46}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {doi:10.1088/1742-6596/46/1/040}, pages = {292-298}, }
Tomov, S., Langou, J., Dongarra, J., Canning, A., Wang, L-W. "Conjugate-Gradient Eigenvalue Solvers in Computing Electronic Properties of Nanostructure Architectures,"International Journal of Computational Science and Engineering, Volume 2, Number 3/ 4, pp. 205-212, 2006 [pdf] [bibtex]
@article{icl:402, author = {Tomov, S. and Langou, J. and Dongarra, J. and Canning, A. and Wang, L-W.}, title = {Conjugate-Gradient Eigenvalue Solvers in Computing Electronic Properties of Nanostructure Architectures}, booktitle = {International Journal of Computational Science and Engineering}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Volume 2, Number 3/ 4}, pages = {pp. 205-212}, year = {2006} }
Tomov, S., Langou, J., Canning, A., Wang, L.-W., Dongarra, J. "Comparison of Nonlinear Conjugate-Gradient methods for computing the Electronic Properties of Nanostructure Architectures,"Proceedings of 5th International Conference on Computational Science (ICCS), Sunderman, V.S., van Albada, G.D., Sloot, P.M.A., Dongarra, J. eds. Springer's Lecture Notes in Computer Science, Atlanta, GA, USA, Part III, pp. 317-325, May, 22-25, 2005 [pdf] [bibtex]
@inproceedings{icl:284, author = {Tomov, S. and Langou, J. and Canning, A. and Wang, L.-W. and Dongarra, J.}, title = {Comparison of Nonlinear Conjugate-Gradient methods for computing the Electronic Properties of Nanostructure Architectures}, booktitle = {Proceedings of 5th International Conference on Computational Science (ICCS)}, institution = {Innovative Computing Laboratory, University of Tennessee}, volume = {Part III}, pages = {pp. 317-325}, address = {Atlanta, GA, USA}, month = {May}, }
Tomov, S., Langou, J., Canning, A., Wang, L-W., Dongarra, J. "Conjugate-Gradient Eigenvalue Solvers in Computing Electronic Properties of Nanostructure Architectures,"International Journal of Computational Science and Engineering (to appear), June, 2005 [pdf] [bibtex]
@article{icl:292, author = {Tomov, S. and Langou, J. and Canning, A. and Wang, L-W. and Dongarra, J.}, title = {Conjugate-Gradient Eigenvalue Solvers in Computing Electronic Properties of Nanostructure Architectures}, booktitle = {International Journal of Computational Science and Engineering (to appear)}, institution = {Innovative Computing Laboratory, University of Tennessee}, month = {June}, }