Senior Lecturer
Email: dario@unizar.es
Address: Campus Río Ebro, University of Zaragoza
C/María de Luna 1, Ada Byron Building,
50018, Zaragoza, Spain
ABOUT ME
—
PUBLICATIONS
2022
Proceedings Articles
Gracia, Darío Suárez; Valero, Alejandro; Tejero, Rubén Gran; Villarroya-Gaudó, María; Viñals, Víctor
peRISCVcope: A Tiny Teaching-Oriented RISC-V Interpreter Proceedings Article
In: Proceedings of the 37th Conference on Design of Circuits and Integrated Circuits (DCIS 2022), pp. 1-6, 2022, ISBN: 978-1-6654-5950-1.
@inproceedings{Gracia2022,
title = {peRISCVcope: A Tiny Teaching-Oriented RISC-V Interpreter},
author = {Darío Suárez Gracia and Alejandro Valero and Rubén Gran Tejero and María Villarroya-Gaudó and Víctor Viñals},
url = {https://ieeexplore.ieee.org/document/9970050},
doi = {https://doi.org/10.1109/DCIS55711.2022.9970050},
isbn = {978-1-6654-5950-1},
year = {2022},
date = {2022-11-16},
urldate = {2022-11-16},
booktitle = {Proceedings of the 37th Conference on Design of Circuits and Integrated Circuits (DCIS 2022)},
pages = {1-6},
abstract = {The fast advances of computer systems translate into a growing demand of methodologies and tools to introduce those novelties into classes. Among the plethora of those advances, virtualization has become an essential technology in almost every relevant system stack, from connected cars to hyperscaled cloud servers. However, introducing those technologies into the classroom remains a challenging task because of the huge complexity of their software components that may hinder the learning process of students. peRISCVcope aims to help in this area by proposing a tiny yet powerful interpreter to dig into virtualization technologies, such as the implementation of trap&emulate hypervisors. With less than 2,000 lines of code, and thanks to the conciseness of the RV32I base instruction set of RISC-V, peRISCVcope enables students to make virtualization knowledge their own. This paper presents our experiences developing and testing a virtualization laboratory where students implement parts of an interpreter. After the practical experience, peRISCVcope has been proved as a useful pedagogical tool, and, most importantly, students have positively rated the experience.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Journal Articles
Soria-Pardos, Víctor; Armejach, Adrià; Suárez, Darío; Moretó, Miquel
On the use of many-core Marvell ThunderX2 processor for HPC workloads Journal Article
In: The Journal of Supercomputing, vol. 77, no. 4, pp. 3315–3338, 2021.
@article{soria2021use,
title = {On the use of many-core Marvell ThunderX2 processor for HPC workloads},
author = {Víctor Soria-Pardos and Adrià Armejach and Darío Suárez and Miquel Moretó},
url = {https://zaguan.unizar.es/record/112382/files/texto_completo.pdf},
doi = {10.1007/s11227-020-03397-6},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {The Journal of Supercomputing},
volume = {77},
number = {4},
pages = {3315–3338},
publisher = {Springer US New York},
abstract = {Marvell’s ThunderX2 has been the first Arm-based processor with deployments in large-scale HPC production systems, challenging the dominance that x86 processors had in the last decades. While x86 processors and its software stack have been characterized in detail, the behavior of Arm counterparts is not well known, limiting its adoption. This work methodically characterizes performance and power efficiency of the ThunderX2 running different HPC workloads compiled with two state-of-the-art compilers, GCC and Arm HPC Compiler. We study the maturity of available compilers and find that the Arm HPC Compiler is able to apply additional optimizations, resulting in better performance than GCC. In addition, we also compare both performance and power with respect to an Intel Skylake processor. Despite the faster single thread performance of Skylake, ThunderX2 is able to match performance on multi-threaded workloads due to its superior memory bandwidth. However, power efficiency of ThunderX2 is far from matching Skylake-based processors when AVX512 extensions are used.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Valero, Alejandro; Tejero, Ruben Gran; Gracia, Darío Suárez; Georgescu, Emanuel A.; Ezpeleta, Joaquín; Álvarez, Pedro; Muñoz, Adolfo; Ramos, Luis M.; Ibáñez, Pablo
A learning experience toward the understanding of abstraction-level interactions in parallel applications Journal Article
In: J. Parallel Distributed Comput., vol. 156, pp. 38–52, 2021.
@article{DBLP:journals/jpdc/ValeroTGGEAMRI21,
title = {A learning experience toward the understanding of abstraction-level
interactions in parallel applications},
author = {Alejandro Valero and Ruben Gran Tejero and Darío Suárez Gracia and Emanuel A. Georgescu and Joaquín Ezpeleta and Pedro Álvarez and Adolfo Muñoz and Luis M. Ramos and Pablo Ibáñez},
url = {https://doi.org/10.1016/j.jpdc.2021.05.008},
doi = {10.1016/j.jpdc.2021.05.008},
year = {2021},
date = {2021-01-01},
journal = {J. Parallel Distributed Comput.},
volume = {156},
pages = {38--52},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2020
Journal Articles
Valero, Alejandro; Gracia, Darío Suárez; Tejero, Rubén Gran
DC-Patch: A Microarchitectural Fault Patching Technique for GPU Register Files Journal Article
In: IEEE Access, vol. 8, pp. 173276-173288, 2020, ISSN: 2169-3536.
@article{Valero2020,
title = {DC-Patch: A Microarchitectural Fault Patching Technique for GPU Register Files},
author = {Alejandro Valero and Darío Suárez Gracia and Rubén Gran Tejero},
url = {https://ieeexplore.ieee.org/document/9203907},
doi = {https://doi.org/10.1109/ACCESS.2020.3025899},
issn = {2169-3536},
year = {2020},
date = {2020-09-22},
urldate = {2020-09-22},
journal = {IEEE Access},
volume = {8},
pages = {173276-173288},
abstract = {The ever-increasing parallelism demand of General-Purpose Graphics Processing Unit (GPGPU) applications pushes toward larger and more energy-hungry register files in successive GPU generations. Reducing the supply voltage beyond its safe limit is an effective way to improve the energy efficiency of register files. However, at these operating voltages, the reliability of the circuit is compromised. This work aims to tolerate permanent faults from process variations in large GPU register files operating below the safe supply voltage limit. To do so, this paper proposes a microarchitectural patching technique, DC-Patch, exploiting the inherent data redundancy of applications to compress registers at run-time with neither compiler assistance nor instruction set modifications. Instead of disabling an entire faulty register file entry, DC-Patch leverages the reliable cells within a faulty entry to store compressed register values. Experimental results show that, with more than a third of faulty register entries, DC-Patch ensures a reliable operation of the register file and reduces the energy consumption by 47% with respect to a conventional register file working at nominal supply voltage. The energy savings are 21% compared to a voltage noise smoothing scheme operating at the safe supply voltage limit. These benefits are obtained with less than 2 and 6% impact on the system performance and area, respectively.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2019
Journal Articles
Nunez-Yanez, Jose; Amiri, Sam; Hosseinabady, Mohammad; Rodríguez, Andrés; Asenjo, Rafael; Navarro, Angeles; Suarez, Dario; Gran, Ruben
Simultaneous multiprocessing in a software-defined heterogeneous FPGA Journal Article
In: The Journal of Supercomputing, vol. 75, no. 8, pp. 4078–4095, 2019.
@article{nunez2019simultaneous,
title = {Simultaneous multiprocessing in a software-defined heterogeneous FPGA},
author = {Jose Nunez-Yanez and Sam Amiri and Mohammad Hosseinabady and Andrés Rodríguez and Rafael Asenjo and Angeles Navarro and Dario Suarez and Ruben Gran},
year = {2019},
date = {2019-01-01},
journal = {The Journal of Supercomputing},
volume = {75},
number = {8},
pages = {4078--4095},
publisher = {Springer US},
keywords = {},
pubstate = {published},
tppubtype = {article}
}