2023
Artículos de revista
Mikkelsen, Carl Christian Kjelgaard; López‐Villellas, Lorién; García‐Risueño, Pablo
Newton’s method revisited: How accurate do we have to be? Artículo de revista
En: Concurrency and Computation: Practice and Experience, vol. 36, no 10, 2023, ISSN: 1532-0634.
@article{KjelgaardMikkelsen2023,
title = {Newton’s method revisited: How accurate do we have to be?},
author = {Carl Christian Kjelgaard Mikkelsen and Lorién López‐Villellas and Pablo García‐Risueño},
url = {http://dx.doi.org/10.1002/cpe.7853},
doi = {10.1002/cpe.7853},
issn = {1532-0634},
year = {2023},
date = {2023-07-01},
journal = {Concurrency and Computation: Practice and Experience},
volume = {36},
number = {10},
publisher = {Wiley},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Navarro-Torres, Agustín; Alastruey-Benedé, Jesús; Ibáñez, Pablo; Viñals-Yúfera, Víctor
BALANCER: bandwidth allocation and cache partitioning for multicore processors Artículo de revista
En: The Journal of Supercomputing, pp. 1–25, 2023.
@article{navarro2023balancer,
title = {BALANCER: bandwidth allocation and cache partitioning for multicore processors},
author = {Agustín Navarro-Torres and Jesús Alastruey-Benedé and Pablo Ibáñez and Víctor Viñals-Yúfera},
url = {https://doi.org/10.1007/s11227-023-05070-0},
doi = {10.1007/s11227-023-05070-0},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {The Journal of Supercomputing},
pages = {1--25},
publisher = {Springer},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
López-Villellas, Lorién; Mikkelsen, Carl Christian Kjelgaard; Galano-Frutos, Juan José; Marco-Sola, Santiago; Alastruey-Benedé, Jesús; Ibáñez, Pablo; Moretó, Miquel; Sancho, Javier; García-Risueño, Pablo
Accurate and efficient constrained molecular dynamics of polymers using Newton’s method and special purpose code Artículo de revista
En: Computer Physics Communications, vol. 288, pp. 108742, 2023, ISSN: 0010-4655.
@article{LOPEZVILLELLAS2023108742,
title = {Accurate and efficient constrained molecular dynamics of polymers using Newton's method and special purpose code},
author = {Lorién López-Villellas and Carl Christian Kjelgaard Mikkelsen and Juan José Galano-Frutos and Santiago Marco-Sola and Jesús Alastruey-Benedé and Pablo Ibáñez and Miquel Moretó and Javier Sancho and Pablo García-Risueño},
url = {https://www.sciencedirect.com/science/article/pii/S0010465523000875},
doi = {https://doi.org/10.1016/j.cpc.2023.108742},
issn = {0010-4655},
year = {2023},
date = {2023-01-01},
journal = {Computer Physics Communications},
volume = {288},
pages = {108742},
abstract = {In molecular dynamics simulations we can often increase the time step by imposing constraints on bond lengths and bond angles. This allows us to extend the length of the time interval and therefore the range of physical phenomena that we can afford to simulate. We examine the existing algorithms and software for solving nonlinear constraint equations in parallel and we explain why it is necessary to advance the state-of-the-art. We present ILVES-PC, a new algorithm for imposing bond constraints on proteins accurately and efficiently. It solves the same system of differential algebraic equations as the celebrated SHAKE algorithm, but ILVES-PC solves the nonlinear constraint equations using Newton's method rather than the nonlinear Gauss-Seidel method. Moreover, ILVES-PC solves the necessary linear systems using a specialized linear solver that exploits the structure of the protein. ILVES-PC can rapidly solve constraint equations as accurately as the hardware will allow. The run-time of ILVES-PC is proportional to the number of constraints. We have integrated ILVES-PC into GROMACS and simulated proteins of different sizes. Compared with SHAKE, we have achieved speedups of up to 4.9× in single-threaded executions and up to 76× in shared-memory multi-threaded executions. Moreover, ILVES-PC is more accurate than P-LINCS algorithm. Our work is a proof-of-concept of the utility of software designed specifically for the simulation of polymers.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Langarita, Rubén; Armejach, Adrià; Ibáñez, Pablo; Alastruey-Benedé, Jesús; Moretó, Miquel
Porting and Optimizing BWA-MEM2 Using the Fujitsu A64FX Processor Artículo de revista
En: IEEE/ACM Transactions on Computational Biology and Bioinformatics, vol. 20, no 5, pp. 3139-3153, 2023.
@article{10093071,
title = {Porting and Optimizing BWA-MEM2 Using the Fujitsu A64FX Processor},
author = {Rubén Langarita and Adrià Armejach and Pablo Ibáñez and Jesús Alastruey-Benedé and Miquel Moretó},
doi = {10.1109/TCBB.2023.3264514},
year = {2023},
date = {2023-01-01},
journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
volume = {20},
number = {5},
pages = {3139-3153},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Escuin, Carlos; Ibáñez, Pablo; Navarro, Denis; Monreal, Teresa; Llabería, José M; Viñals, Víctor
L2C2: Last-level compressed-contents non-volatile cache and a procedure to forecast performance and lifetime Artículo de revista
En: Plos one, vol. 18, no 2, pp. e0278346, 2023.
@article{escuin2023l2c2,
title = {L2C2: Last-level compressed-contents non-volatile cache and a procedure to forecast performance and lifetime},
author = {Carlos Escuin and Pablo Ibáñez and Denis Navarro and Teresa Monreal and José M Llabería and Víctor Viñals},
year = {2023},
date = {2023-01-01},
journal = {Plos one},
volume = {18},
number = {2},
pages = {e0278346},
publisher = {Public Library of Science San Francisco, CA USA},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Aramendía, Maite; Leite, Diego; Resano, Javier; Resano, Martín; Billimoria, Kharmen; Goenaga-Infante, Heidi
En: Nanomaterials, vol. 13, no 17, 2023, ISSN: 2079-4991.
@article{nano13172392,
title = {Isotope Dilution Analysis for Particle Mass Determination Using Single-Particle Inductively Coupled Plasma Time-of-Flight Mass Spectrometry: Application to Size Determination of Silver Nanoparticles},
author = {Maite Aramendía and Diego Leite and Javier Resano and Martín Resano and Kharmen Billimoria and Heidi Goenaga-Infante},
url = {https://www.mdpi.com/2079-4991/13/17/2392},
doi = {10.3390/nano13172392},
issn = {2079-4991},
year = {2023},
date = {2023-01-01},
journal = {Nanomaterials},
volume = {13},
number = {17},
abstract = {This paper describes methodology based on the application of isotope dilution (ID) in single-particle inductively coupled plasma time-of-flight mass spectrometry (spICP-ToFMS) mode for the mass determination (and sizing) of silver nanoparticles (AgNPs). For this purpose, and considering that the analytical signal in spICP-MS shows a transient nature, an isotope dilution equation used for online work was adapted and used for the mass determination of individual NPs. The method proposed measures NP isotope ratios in a particle-to-particle approach, which allows for the characterization of NP mass (and size) distributions and not only the mean size of the distribution. For the best results to be obtained, our method development (undertaken through the analysis of the reference material NIST RM 8017) included the optimization of the working conditions for the best precision and accuracy in isotope ratios of individual NPs, which had been only reported to date with multicollector instruments. It is shown that the precision of the measurement of these ratios is limited by the magnitude of the signals obtained for each NP in the mass analyzer (counting statistics). However, the uncertainty obtained for the sizing of NPs in this approach can be improved by careful method optimization, where the most important parameters are shown to be the selection of the spike isotopic composition and concentration. Although only AgNPs were targeted in this study, the method presented, with the corresponding adaptations, could be applied to NPs of any other composition that include an element with different naturally available isotopes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
García-Poyo, M. Carmen; Bérail, Sylvain; Ronzani, Anne Laure; Rello, Luis; García-González, Elena; Nakadi, Flávio V.; Aramendía, Maite; Resano, Javier; Resano, Martín; Pécheyran, Christophe
Cu fractionation, isotopic analysis, and data processing via machine learning: new approaches for the diagnosis and follow up of Wilson’s disease via ICP-MS Artículo de revista
En: J. Anal. At. Spectrom., vol. 38, iss. 1, pp. 229-242, 2023.
@article{D2JA00267A,
title = {Cu fractionation, isotopic analysis, and data processing via machine learning: new approaches for the diagnosis and follow up of Wilson's disease via ICP-MS},
author = {M. Carmen García-Poyo and Sylvain Bérail and Anne Laure Ronzani and Luis Rello and Elena García-González and Flávio V. Nakadi and Maite Aramendía and Javier Resano and Martín Resano and Christophe Pécheyran},
url = {http://dx.doi.org/10.1039/D2JA00267A},
doi = {10.1039/D2JA00267A},
year = {2023},
date = {2023-01-01},
journal = {J. Anal. At. Spectrom.},
volume = {38},
issue = {1},
pages = {229-242},
publisher = {The Royal Society of Chemistry},
abstract = {Information about Cu fractionation and Cu isotopic composition can be paramount when investigating Wilson's disease (WD). This information can provide a better understanding of the metabolism of Cu. Most importantly, it may provide an easy way to diagnose and to follow the evolution of WD patients. For such purposes, protocols for Cu determination and Cu isotopic analysis via inductively coupled plasma mass spectrometry were investigated in this work, both in bulk serum and in the exchangeable copper (CuEXC) fractions. The CuEXC protocol provided satisfactory recovery values. Also, no significant mass fractionation during the whole analytical procedure (CuEXC production and/or Cu isolation) was detected. Analyses were carried out in controls (healthy persons), newborns, patients with hepatic disorders, and WD patients. While the results for Cu isotopic analysis are relevant (e.g., δ65Cu values were lower for both WD patients under chelating treatment and patients with hepatic problems in comparison with those values obtained for WD patients under Zn treatments, controls, and newborns) to comprehend Cu metabolism and to follow up the disease, the parameter that can help to better discern between WD patients and the rest of the patients tested (non-WD) was found to be the REC (relative exchangeable Cu). In this study, all the WD patients showed a REC higher than 17%, while the rest showed lower values. However, since establishing a universal threshold is complicated, machine learning was investigated to produce a model that can differentiate between WD and non-WD samples with excellent results (100% accuracy, albeit for a limited sample set). Most importantly, unlike other ML approaches, our model can also provide an uncertainty metric to indicate the reliability of the prediction, overall opening new ways to diagnose WD.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
López-Villellas, Lorién; Mikkelsen, Carl Christian Kjelgaard; Galano-Frutos, Juan José; Marco-Sola, Santiago; Alastruey-Benedé, Jesús; Ibáñez, Pablo; Moretó, Miquel; Sancho, Javier; García-Risueño, Pablo
Accurate and efficient constrained molecular dynamics of polymers using Newton’s method and special purpose code Artículo de revista
En: Computer Physics Communications, vol. 288, pp. 108742, 2023.
@article{lopez2023accurate,
title = {Accurate and efficient constrained molecular dynamics of polymers using Newton's method and special purpose code},
author = {Lorién López-Villellas and Carl Christian Kjelgaard Mikkelsen and Juan José Galano-Frutos and Santiago Marco-Sola and Jesús Alastruey-Benedé and Pablo Ibáñez and Miquel Moretó and Javier Sancho and Pablo García-Risueño},
year = {2023},
date = {2023-01-01},
journal = {Computer Physics Communications},
volume = {288},
pages = {108742},
publisher = {North-Holland},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Proceedings Articles
Toca-Díaz, Yamilka; Muñoz, Nicolás Landeros; Tejero, Ruben Gran; Valero, Alejandro
On Fault-Tolerant Microarchitectural Techniques for Voltage Underscaling in On-Chip Memories of CNN Accelerators Proceedings Article
En: pp. 138-145, 2023, ISBN: 979-8-3503-4419-6.
@inproceedings{Toca-Díaz2023,
title = {On Fault-Tolerant Microarchitectural Techniques for Voltage Underscaling in On-Chip Memories of CNN Accelerators},
author = {Yamilka Toca-Díaz and Nicolás Landeros Muñoz and Ruben Gran Tejero and Alejandro Valero},
url = {https://ieeexplore.ieee.org/document/10456839},
doi = {https://doi.org/10.1109/DSD60849.2023.00029},
isbn = {979-8-3503-4419-6},
year = {2023},
date = {2023-09-06},
urldate = {2023-09-06},
journal = {Proceedings of the 26th Euromicro Conference on Digital System Design (DSD 2023)},
pages = {138-145},
abstract = {Aggressively underscaling the supply voltage (Vdd) below the safe voltage (Vmin) margin is an effective solution to attain substantial energy savings. Unfortunately, operating at such low voltages is challenging due to the high number of permanent faults as a result of variations in the manufacturing process of current technology nodes. This work characterizes the impact of permanent faults on the accuracy of a Convolutional Neural Network (CNN) inference accelerator with on-chip activation memories supplied at low Vdd below Vmin. Based on these observations, this paper proposes a couple of low-cost microarchitectural techniques, referred to as flipping and patching, that ensure the accuracy of CNN applications despite the presence of permanent faults. Contrary to prior work, the proposed techniques are transparent to the programmer and do not depend on application characteristics. Experimental results show that the proposed techniques maintain the original CNN accuracy with a minimal impact on system performance (less than 0.05%), while reducing the energy consumption of activation memories by 11.2% and 46.7% compared to those of a conventional accelerator operating at safe and nominal supply voltages, respectively.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
López-Villellas, Lorién; Pineda-Sánchez, Esteve; Badouh, Asaf; Marco-Sola, Santiago; Ibáñez, Pablo; Alastruey-Benedé, Jesús; Moretó, Miquel
RISC-V for Genome Data Analysis: Opportunities and Challenges Proceedings Article
En: 2023 38th Conference on Design of Circuits and Integrated Systems (DCIS), pp. 1-6, 2023.
@inproceedings{10335997,
title = {RISC-V for Genome Data Analysis: Opportunities and Challenges},
author = {Lorién López-Villellas and Esteve Pineda-Sánchez and Asaf Badouh and Santiago Marco-Sola and Pablo Ibáñez and Jesús Alastruey-Benedé and Miquel Moretó},
doi = {10.1109/DCIS58620.2023.10335997},
year = {2023},
date = {2023-01-01},
booktitle = {2023 38th Conference on Design of Circuits and Integrated Systems (DCIS)},
pages = {1-6},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Mikkelsen, Carl Christian Kjelgaard; López-Villellas, Lorién; García-Risueño, Pablo
How Accurate Does Newton Have to Be? Proceedings Article
En: Wyrzykowski, Roman; Dongarra, Jack; Deelman, Ewa; Karczewski, Konrad (Ed.): Parallel Processing and Applied Mathematics, pp. 3–15, Springer International Publishing, Cham, 2023, ISBN: 978-3-031-30442-2.
@inproceedings{10.1007/978-3-031-30442-2_1,
title = {How Accurate Does Newton Have to Be?},
author = {Carl Christian Kjelgaard Mikkelsen and Lorién López-Villellas and Pablo García-Risueño},
editor = {Roman Wyrzykowski and Jack Dongarra and Ewa Deelman and Konrad Karczewski},
isbn = {978-3-031-30442-2},
year = {2023},
date = {2023-01-01},
booktitle = {Parallel Processing and Applied Mathematics},
pages = {3–15},
publisher = {Springer International Publishing},
address = {Cham},
abstract = {We analyze the convergence of quasi-Newton methods in exact and finite precision arithmetic. In particular, we derive an upper bound for the stagnation level and we show that any sufficiently exact quasi-Newton method will converge quadratically until stagnation. In the absence of sufficient accuracy, we are likely to retain rapid linear convergence. We confirm our analysis by computing square roots and solving bond constraint equations in the context of molecular dynamics. We briefly discuss implications for parallel solvers.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Escuin, Carlos; García-Redondo, Fernando; Zahedi, Mahdi; Ibáñez, Pablo; Monreal, Teresa; Viñals, Víctor; Llabería, José María; Myers, James; Ryckaert, Julien; Biswas, Dwaipayan; Catthoor, Francky
MNEMOSENE++: Scalable Multi-Tile Design with Enhanced Buffering and VGSOT-MRAM based Compute-in-Memory Crossbar Array Proceedings Article
En: 2023 30th IEEE International Conference on Electronics, Circuits and Systems (ICECS), pp. 1-5, 2023.
@inproceedings{10382874,
title = {MNEMOSENE++: Scalable Multi-Tile Design with Enhanced Buffering and VGSOT-MRAM based Compute-in-Memory Crossbar Array},
author = {Carlos Escuin and Fernando García-Redondo and Mahdi Zahedi and Pablo Ibáñez and Teresa Monreal and Víctor Viñals and José María Llabería and James Myers and Julien Ryckaert and Dwaipayan Biswas and Francky Catthoor},
doi = {10.1109/ICECS58634.2023.10382874},
year = {2023},
date = {2023-01-01},
booktitle = {2023 30th IEEE International Conference on Electronics, Circuits and Systems (ICECS)},
pages = {1-5},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Escuin, Carlos; Khan, Asif Ali; Ibáñez, Pablo; Monreal, Teresa; Castrillon, Jeronimo; Viñals, Víctor
Compression-Aware and Performance-Efficient Insertion Policies for Long-Lasting Hybrid LLCs Proceedings Article
En: 2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp. 179-192, 2023.
@inproceedings{10070968,
title = {Compression-Aware and Performance-Efficient Insertion Policies for Long-Lasting Hybrid LLCs},
author = {Carlos Escuin and Asif Ali Khan and Pablo Ibáñez and Teresa Monreal and Jeronimo Castrillon and Víctor Viñals},
doi = {10.1109/HPCA56546.2023.10070968},
year = {2023},
date = {2023-01-01},
booktitle = {2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)},
pages = {179-192},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Escuin, Carlos; Khan, Asif Ali; Ibánez, Pablo; Monreal, Teresa; Navarro, Denis; Llabería, José M; Castrillon, Jeronimo; Viñals, Víctor
Leveraging data compression for performance-efficient and long-lasting nvm-based last-level caches Proceedings Article
En: 14th Annual Non-Volatile Memory Workshop. University of Califronia San Diego, 2023.
@inproceedings{escuin2023leveraging,
title = {Leveraging data compression for performance-efficient and long-lasting nvm-based last-level caches},
author = {Carlos Escuin and Asif Ali Khan and Pablo Ibánez and Teresa Monreal and Denis Navarro and José M Llabería and Jeronimo Castrillon and Víctor Viñals},
year = {2023},
date = {2023-01-01},
booktitle = {14th Annual Non-Volatile Memory Workshop. University of Califronia San Diego},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
López-Villellas, Lorién; Pineda-Sánchez, Esteve; Badouh, Asaf; Marco-Sola, Santiago; Ibáñez, Pablo; Alastruey-Benedé, Jesús; Moretó, Miquel
RISC-V for Genome Data Analysis: Opportunities and Challenges Proceedings Article
En: 2023 38th Conference on Design of Circuits and Integrated Systems (DCIS), pp. 1–6, 2023.
@inproceedings{lopez2023risc,
title = {RISC-V for Genome Data Analysis: Opportunities and Challenges},
author = {Lorién López-Villellas and Esteve Pineda-Sánchez and Asaf Badouh and Santiago Marco-Sola and Pablo Ibáñez and Jesús Alastruey-Benedé and Miquel Moretó},
year = {2023},
date = {2023-01-01},
booktitle = {2023 38th Conference on Design of Circuits and Integrated Systems (DCIS)},
pages = {1–6},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Soria-Pardos, Víctor; Armejach, Adria; Mück, Tiago; Suárez-Gracia, Dario; Joao, José; Rico, Alejandro; Moretó, Miquel
DynAMO: Improving Parallelism Through Dynamic Placement of Atomic Memory Operations Proceedings Article
En: Proceedings of the 50th Annual International Symposium on Computer Architecture, pp. 1–13, ACM, 2023.
@inproceedings{soria2023dynamo,
title = {DynAMO: Improving Parallelism Through Dynamic Placement of Atomic Memory Operations},
author = {Víctor Soria-Pardos and Adria Armejach and Tiago Mück and Dario Suárez-Gracia and José Joao and Alejandro Rico and Miquel Moretó},
url = {https://dl.acm.org/doi/abs/10.1145/3579371.3589065},
doi = {10.1145/3579371.3589065},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
booktitle = {Proceedings of the 50th Annual International Symposium on Computer Architecture},
pages = {1–13},
publisher = {ACM},
abstract = {With increasing core counts in modern multi-core designs, the overhead of synchronization jeopardizes the scalability and efficiency of parallel applications. To mitigate these overheads, modern cache-coherent protocols offer support for Atomic Memory Operations (AMOs) that can be executed near-core (near) or remotely in the on-chip memory hierarchy (far).
This paper evaluates current available static AMO execution policies implemented in multi-core Systems-on-Chip (SoC) designs, which select AMOs' execution placement (near or far) based on the cache block coherence state. We propose three static policies and show that the performance of static policies is application dependent. Moreover, we show that one of our proposed static policies outperforms currently available implementations.
Furthermore, we propose DynAMO, a predictor that selects the best location to execute the AMOs. DynAMO identifies the different locality patterns to make informed decisions, improving AMO latency and increasing overall throughput. DynAMO outperforms the best-performing static policy and provides geometric mean speed-ups of 1.09× across all workloads and 1.31× on AMO-intensive applications with respect to executing all AMOs near.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
This paper evaluates current available static AMO execution policies implemented in multi-core Systems-on-Chip (SoC) designs, which select AMOs' execution placement (near or far) based on the cache block coherence state. We propose three static policies and show that the performance of static policies is application dependent. Moreover, we show that one of our proposed static policies outperforms currently available implementations.
Furthermore, we propose DynAMO, a predictor that selects the best location to execute the AMOs. DynAMO identifies the different locality patterns to make informed decisions, improving AMO latency and increasing overall throughput. DynAMO outperforms the best-performing static policy and provides geometric mean speed-ups of 1.09× across all workloads and 1.31× on AMO-intensive applications with respect to executing all AMOs near.
Siracusa, M.; Soria-Pardos, V.; Sgherzi, F.; Randall, J.; Joseph, D. J.; Planas, M. Moretó; Armejach, A.
A Tensor Marshaling Unit for Sparse Tensor Algebra on General-Purpose Processors Proceedings Article
En: Proceedings of the 56th Annual IEEE/ACM International Symposium on Microarchitecture, ACM, 2023.
@inproceedings{siracusa2023tensor,
title = {A Tensor Marshaling Unit for Sparse Tensor Algebra on General-Purpose Processors},
author = {M. Siracusa and V. Soria-Pardos and F. Sgherzi and J. Randall and D. J. Joseph and M. Moretó Planas and A. Armejach},
url = {https://dl.acm.org/doi/abs/10.1145/3613424.3614284},
doi = {10.1145/3613424.361428},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
booktitle = {Proceedings of the 56th Annual IEEE/ACM International Symposium on Microarchitecture},
publisher = {ACM},
abstract = {This paper proposes the Tensor Marshaling Unit (TMU), a near-core programmable dataflow engine for multicore architectures that accelerates tensor traversals and merging, the most critical operations of sparse tensor workloads running on today’s computing infrastructures. The TMU leverages a novel multi-lane design that enables parallel tensor loading and merging, which naturally produces vector operands that are marshaled into the core for efficient SIMD computation. The TMU supports all the necessary primitives to be tensor-format and tensor-algebra complete. We evaluate the TMU on a simulated multicore system using a broad set of tensor algebra workloads, achieving 3.6 ×, 2.8 ×, and 4.9 × speedups over memory-intensive, compute-intensive, and merge-intensive vectorized software implementations, respectively.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Doblas, Max; Candón, Gerard; Carril, Xavier; Domínguez, Marc; Erra, Enric; González, Alberto; Hernández, César; Jiménez, Víctor; Kostalampros, Vatistas; Langarita, Rubén; Leyva, Neiel; López-Paradís, Guillem; Mendoza, Jonnatan; Oltra, Josep; Pavón, Julián; Ramírez, Cristóbal; Rodas, Narcís; Reggiani, Enrico; Rodríguez, Mario; Rojas, Carlos; Ruiz, Abraham; Safadi, Hugo; Soria, Víctor; Suanes, Alejandro; Vargas, Iván; Arreza, Fernando; Figueras, Roger; Fontova-Musté, Pau; Marimon, Joan; Martínez, Ricardo; Moreno, Sergio; Sacristán, Jordi; Alonso, Oscar; Aragonés, Xavier; Cristal, Adrián; Diéguez, Ángel; López, Manuel; Mateo, Diego; Moll, Francesc; Moretó, Miquel; Palomar, Oscar; Ramírez, Marco A; Serra-Graells, Francesc; Sonmez, Nehir; Terés, Lluís; Unsal, Osman; Valero, Mateo; Villa, Luis
Sargantana: An Academic SoC RISC-V Processor in 22nm FDSOI Technology Proceedings Article
En: 2023 38th Conference on Design of Circuits and Integrated Systems (DCIS), pp. 1–6, IEEE 2023.
@inproceedings{doblas2023sargantana,
title = {Sargantana: An Academic SoC RISC-V Processor in 22nm FDSOI Technology},
author = {Max Doblas and Gerard Candón and Xavier Carril and Marc Domínguez and Enric Erra and Alberto González and César Hernández and Víctor Jiménez and Vatistas Kostalampros and Rubén Langarita and Neiel Leyva and Guillem López-Paradís and Jonnatan Mendoza and Josep Oltra and Julián Pavón and Cristóbal Ramírez and Narcís Rodas and Enrico Reggiani and Mario Rodríguez and Carlos Rojas and Abraham Ruiz and Hugo Safadi and Víctor Soria and Alejandro Suanes and Iván Vargas and Fernando Arreza and Roger Figueras and Pau Fontova-Musté and Joan Marimon and Ricardo Martínez and Sergio Moreno and Jordi Sacristán and Oscar Alonso and Xavier Aragonés and Adrián Cristal and Ángel Diéguez and Manuel López and Diego Mateo and Francesc Moll and Miquel Moretó and Oscar Palomar and Marco A Ramírez and Francesc Serra-Graells and Nehir Sonmez and Lluís Terés and Osman Unsal and Mateo Valero and Luis Villa},
doi = {10.1109/DCIS58620.2023.10335976},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
booktitle = {2023 38th Conference on Design of Circuits and Integrated Systems (DCIS)},
pages = {1–6},
organization = {IEEE},
abstract = {This paper describes the Sargantana System on chip (SoC), a 64-bit RISC-V single core processor designed by a number of academic institutions and manufactured in 22 nm FDSOI technology: BSC, UPC, UB, UAB, CIC-IPN and IMB-CNM (CSIC). The SoC includes the processor as well as, among other components, a Phase Locked Loop (PLL) operating up to 2 GHz, interfaces to HyperRAM and a Serdes up to 8 Gbps. The processor has demonstrated experimental correct operation at 800 MHz.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Artículos de revista
Muñoz, Nicolás Landeros; Valero, Alejandro; Tejero, Rubén Gran; Zoni, Davide
Gated-CNN: Combating NBTI and HCI aging effects in on-chip activation memories of Convolutional Neural Network accelerators Artículo de revista
En: Journal of Systems Architecture, vol. 128, pp. 1-13, 2022, ISSN: 1383-7621.
@article{Muñoz2022,
title = {Gated-CNN: Combating NBTI and HCI aging effects in on-chip activation memories of Convolutional Neural Network accelerators},
author = {Nicolás Landeros Muñoz and Alejandro Valero and Rubén Gran Tejero and Davide Zoni},
url = {https://www.sciencedirect.com/science/article/pii/S1383762122001072},
doi = {https://doi.org/10.1016/j.sysarc.2022.102553},
issn = {1383-7621},
year = {2022},
date = {2022-07-01},
urldate = {2022-07-01},
journal = {Journal of Systems Architecture},
volume = {128},
pages = {1-13},
abstract = {Negative Bias Temperature Instability (NBTI) and Hot Carrier Injection (HCI) are two of the main reliability threats in current technology nodes. These aging phenomena degrade the transistor’s threshold voltage (Vth) over the lifetime of a digital circuit, resulting in slower transistors that eventually lead to a faulty operation when the critical paths become longer than the processor cycle time. Among all the transistors on a chip, the most vulnerable transistors to such wearout effects are those used to implement SRAM storage, since memory cells are continuously degrading. In particular, NBTI ages PMOS cell transistors when a given logic value is stored for a long period (i.e., a long duty cycle), whereas HCI ages NMOS cell transistors not only when the stored value flips but also when it is accessed. This work focuses on mitigating aging in the on-chip SRAM memories of Convolutional Neural Network (CNN) accelerators storing activations. This paper makes two main contributions. At the software level, we quantify the aging induced by current CNN benchmarks with a characterization study of duty cycle, flip, and access patterns in every activation memory cell. Based on the insights from this study, this work proposes a novel microarchitectural technique, Gated-CNN, that ensures a uniform aging degradation of every memory cell. To do so, Gated-CNN exploits power-gating and address rotation techniques tailored to the memory demands and temporal/spatial localities exhibited by CNN applications, as well as the memory organization and management of CNN accelerators. Experimental results show that, compared to a conventional design, the average Vth degradation savings are at least as much as 49% depending on the type of transistor.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Langarita, Rubén; Armejach, Adrià; Setoain, Javier; Ibáñez-Marín, Pablo; Alastruey-Benedé, Jesús; Moretó, Miquel
Compressed Sparse FM-Index: Fast Sequence Alignment Using Large K-Steps Artículo de revista
En: IEEE ACM Trans. Comput. Biol. Bioinform., vol. 19, no 1, pp. 355–368, 2022.
@article{DBLP:journals/tcbb/LangaritaASIAM22,
title = {Compressed Sparse FM-Index: Fast Sequence Alignment Using Large K-Steps},
author = {Rubén Langarita and Adrià Armejach and Javier Setoain and Pablo Ibáñez-Marín and Jesús Alastruey-Benedé and Miquel Moretó},
url = {https://doi.org/10.1109/TCBB.2020.3000253},
doi = {10.1109/TCBB.2020.3000253},
year = {2022},
date = {2022-01-01},
journal = {IEEE ACM Trans. Comput. Biol. Bioinform.},
volume = {19},
number = {1},
pages = {355--368},
keywords = {},
pubstate = {published},
tppubtype = {article}
}