Senior Lecturer
Email: dario@unizar.es
Address: Campus Río Ebro, University of Zaragoza
C/María de Luna 1, Ada Byron Building,
50018, Zaragoza, Spain
ABOUT ME
—
PUBLICATIONS
2025
Journal Articles
Pedrajas, Samuel Pérez; Resano, Javier; Gracia, Darío Suárez
BnnRV: Hardware and Software Optimizations for Weight Sampling in Bayesian Neural Networks on Edge RISC-V Cores Journal Article
In: IEEE Transactions on Circuits and Systems for Artificial Intelligence, pp. 1-12, 2025, ISSN: 2996-6647.
@article{11216142,
title = {BnnRV: Hardware and Software Optimizations for Weight Sampling in Bayesian Neural Networks on Edge RISC-V Cores},
author = {Samuel Pérez Pedrajas and Javier Resano and Darío Suárez Gracia},
doi = {10.1109/TCASAI.2025.3625517},
issn = {2996-6647},
year = {2025},
date = {2025-01-01},
journal = {IEEE Transactions on Circuits and Systems for Artificial Intelligence},
pages = {1-12},
abstract = {Bayesian Neural Networks (BNN) allow prediction uncertainty estimation, making them a more suitable option for safety-critical applications. However, in BNNs, the forward-pass computational cost is significantly higher than in traditional neural networks (NN), due to the overhead generated by weight sampling. This limits their deployment in edge systems. This paper presents an optimization that allows using lower-cost Uniform distribution sampling instead of Gaussian sampling during BNN inference. Building upon this optimization, this paper proposes a lightweight RISC-V instruction set architecture extension that accelerates BNN inference by introducing fixed point arithmetic operations and an efficient Uniform random number generator. The flexibility of RISC-V enables such domain-specific acceleration, narrowing the performance gap between NNs and BNNs for edge machine learning workloads. The proposed software and hardware optimizations achieve an average speedup of 8.93× while reducing energy consumption per forward pass by 87.12%, increasing image/J efficiency by 8.19×. They have been designed to maintain accuracy, calibration, and uncertainty quality, while optimizing execution efficiency. This has been verified with an extensive validation process that considers relevant model architectures. Additionally, our results highlight that weight sampling is no longer the BNN inference performance bottleneck, shifting the primary limiting factor to control overhead.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
Proceedings Articles
Pérez, Samuel; Resano, Javier; Gracia, Darío Suárez
Accelerating Bayesian Neural Networks on Low-Power Edge RISC-V Processors Proceedings Article
In: 2024 IEEE 24th International Conference on Nanotechnology (NANO), pp. 507-512, 2024, ISSN: 1944-9380.
@inproceedings{10628877,
title = {Accelerating Bayesian Neural Networks on Low-Power Edge RISC-V Processors},
author = {Samuel Pérez and Javier Resano and Darío Suárez Gracia},
doi = {10.1109/NANO61778.2024.10628877},
issn = {1944-9380},
year = {2024},
date = {2024-07-01},
booktitle = {2024 IEEE 24th International Conference on Nanotechnology (NANO)},
pages = {507-512},
abstract = {Neural Networks (NN s) are a very popular solution for classification tasks. As the combination of Internet of Things (IoT) with Machine Learning (ML), also known as TinyML, grows in popularity, more NN are being executed on low-end edge systems. The reliability of the predictions is crucial for safety-critical applications. Bayesian Neural Networks (BNNs) address this issue by calculating uncertainty metrics with their predictions at the cost of increasing computing requirements. This work addresses the challenges of executing BNNs inference on low-end systems. BNNs require multiple forward passes in which the weights are sampled from distributions. This sampling process can take up to 85,13% of execution time. This work optimizes the weight sampling and integrates it within a low cost custom extension for a RISC- V CPU, improving speedup up to x 8,10 and similar energy savings.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Proceedings Articles
Gracia, Darío Suárez; Valero, Alejandro; Tejero, Rubén Gran; Villarroya-Gaudó, María; Viñals, Víctor
peRISCVcope: A Tiny Teaching-Oriented RISC-V Interpreter Proceedings Article
In: Proceedings of the 37th Conference on Design of Circuits and Integrated Circuits (DCIS 2022), pp. 1-6, 2022, ISBN: 978-1-6654-5950-1.
@inproceedings{Gracia2022,
title = {peRISCVcope: A Tiny Teaching-Oriented RISC-V Interpreter},
author = {Darío Suárez Gracia and Alejandro Valero and Rubén Gran Tejero and María Villarroya-Gaudó and Víctor Viñals},
url = {https://ieeexplore.ieee.org/document/9970050},
doi = {https://doi.org/10.1109/DCIS55711.2022.9970050},
isbn = {978-1-6654-5950-1},
year = {2022},
date = {2022-11-16},
urldate = {2022-11-16},
booktitle = {Proceedings of the 37th Conference on Design of Circuits and Integrated Circuits (DCIS 2022)},
pages = {1-6},
abstract = {The fast advances of computer systems translate into a growing demand of methodologies and tools to introduce those novelties into classes. Among the plethora of those advances, virtualization has become an essential technology in almost every relevant system stack, from connected cars to hyperscaled cloud servers. However, introducing those technologies into the classroom remains a challenging task because of the huge complexity of their software components that may hinder the learning process of students. peRISCVcope aims to help in this area by proposing a tiny yet powerful interpreter to dig into virtualization technologies, such as the implementation of trap&emulate hypervisors. With less than 2,000 lines of code, and thanks to the conciseness of the RV32I base instruction set of RISC-V, peRISCVcope enables students to make virtualization knowledge their own. This paper presents our experiences developing and testing a virtualization laboratory where students implement parts of an interpreter. After the practical experience, peRISCVcope has been proved as a useful pedagogical tool, and, most importantly, students have positively rated the experience.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Journal Articles
Valero, Alejandro; Tejero, Ruben Gran; Gracia, Darío Suárez; Georgescu, Emanuel A.; Ezpeleta, Joaquín; Álvarez, Pedro; Muñoz, Adolfo; Ramos, Luis M.; Ibáñez, Pablo
A learning experience toward the understanding of abstraction-level interactions in parallel applications Journal Article
In: J. Parallel Distributed Comput., vol. 156, pp. 38–52, 2021.
@article{DBLP:journals/jpdc/ValeroTGGEAMRI21,
title = {A learning experience toward the understanding of abstraction-level
interactions in parallel applications},
author = {Alejandro Valero and Ruben Gran Tejero and Darío Suárez Gracia and Emanuel A. Georgescu and Joaquín Ezpeleta and Pedro Álvarez and Adolfo Muñoz and Luis M. Ramos and Pablo Ibáñez},
url = {https://doi.org/10.1016/j.jpdc.2021.05.008},
doi = {10.1016/j.jpdc.2021.05.008},
year = {2021},
date = {2021-01-01},
journal = {J. Parallel Distributed Comput.},
volume = {156},
pages = {38--52},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2020
Journal Articles
Valero, Alejandro; Gracia, Darío Suárez; Tejero, Rubén Gran
DC-Patch: A Microarchitectural Fault Patching Technique for GPU Register Files Journal Article
In: IEEE Access, vol. 8, pp. 173276-173288, 2020, ISSN: 2169-3536.
@article{Valero2020,
title = {DC-Patch: A Microarchitectural Fault Patching Technique for GPU Register Files},
author = {Alejandro Valero and Darío Suárez Gracia and Rubén Gran Tejero},
url = {https://ieeexplore.ieee.org/document/9203907},
doi = {https://doi.org/10.1109/ACCESS.2020.3025899},
issn = {2169-3536},
year = {2020},
date = {2020-09-22},
urldate = {2020-09-22},
journal = {IEEE Access},
volume = {8},
pages = {173276-173288},
abstract = {The ever-increasing parallelism demand of General-Purpose Graphics Processing Unit (GPGPU) applications pushes toward larger and more energy-hungry register files in successive GPU generations. Reducing the supply voltage beyond its safe limit is an effective way to improve the energy efficiency of register files. However, at these operating voltages, the reliability of the circuit is compromised. This work aims to tolerate permanent faults from process variations in large GPU register files operating below the safe supply voltage limit. To do so, this paper proposes a microarchitectural patching technique, DC-Patch, exploiting the inherent data redundancy of applications to compress registers at run-time with neither compiler assistance nor instruction set modifications. Instead of disabling an entire faulty register file entry, DC-Patch leverages the reliable cells within a faulty entry to store compressed register values. Experimental results show that, with more than a third of faulty register entries, DC-Patch ensures a reliable operation of the register file and reduces the energy consumption by 47% with respect to a conventional register file working at nominal supply voltage. The energy savings are 21% compared to a voltage noise smoothing scheme operating at the safe supply voltage limit. These benefits are obtained with less than 2 and 6% impact on the system performance and area, respectively.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}