From 6023896b0a4d8bf101b210b155721d74a2a06b0c Mon Sep 17 00:00:00 2001 From: Parshintsev Anatoly Date: Mon, 24 Apr 2023 21:36:36 +0300 Subject: [PATCH 001/127] fixup sb_write/sb_read to handle exceptions properly system bus read/write operations could lead to a variety of memory-related exceptions. Before this patch not every memory exception was handled. This could lead to simulator crashes: an example is when debugger (like OpenOCD) issues non-aligned memory read. Signed-off-by: Parshintsev Anatoly --- riscv/debug_module.cc | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/riscv/debug_module.cc b/riscv/debug_module.cc index 27dbe66ecb..9018ccf544 100644 --- a/riscv/debug_module.cc +++ b/riscv/debug_module.cc @@ -314,7 +314,7 @@ void debug_module_t::sb_read() } else { sbcs.error = 3; } - } catch (trap_load_access_fault& t) { + } catch (const mem_trap_t& ) { sbcs.error = 2; } } @@ -323,17 +323,21 @@ void debug_module_t::sb_write() { reg_t address = ((uint64_t) sbaddress[1] << 32) | sbaddress[0]; D(fprintf(stderr, "sb_write() 0x%x @ 0x%lx\n", sbdata[0], address)); - if (sbcs.sbaccess == 0 && config.max_sba_data_width >= 8) { - sim->debug_mmu->store(address, sbdata[0]); - } else if (sbcs.sbaccess == 1 && config.max_sba_data_width >= 16) { - sim->debug_mmu->store(address, sbdata[0]); - } else if (sbcs.sbaccess == 2 && config.max_sba_data_width >= 32) { - sim->debug_mmu->store(address, sbdata[0]); - } else if (sbcs.sbaccess == 3 && config.max_sba_data_width >= 64) { - sim->debug_mmu->store(address, - (((uint64_t) sbdata[1]) << 32) | sbdata[0]); - } else { - sbcs.error = 3; + try { + if (sbcs.sbaccess == 0 && config.max_sba_data_width >= 8) { + sim->debug_mmu->store(address, sbdata[0]); + } else if (sbcs.sbaccess == 1 && config.max_sba_data_width >= 16) { + sim->debug_mmu->store(address, sbdata[0]); + } else if (sbcs.sbaccess == 2 && config.max_sba_data_width >= 32) { + sim->debug_mmu->store(address, sbdata[0]); + } else if (sbcs.sbaccess == 3 && config.max_sba_data_width >= 64) { + sim->debug_mmu->store(address, + (((uint64_t) sbdata[1]) << 32) | sbdata[0]); + } else { + sbcs.error = 3; + } + } catch (const mem_trap_t& ) { + sbcs.error = 2; } } From afe3987685f11058b28988ac9d7e484368246937 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 14 Apr 2023 22:35:12 +0800 Subject: [PATCH 002/127] Add convertion function between binary float16 and float32 in softfloat --- softfloat/bf16_to_f32.c | 80 ++++++++++++++++++++++++ softfloat/f32_to_bf16.c | 92 +++++++++++++++++++++++++++ softfloat/internals.h | 6 ++ softfloat/s_roundPackToBF16.c | 113 ++++++++++++++++++++++++++++++++++ softfloat/softfloat.h | 2 + softfloat/softfloat.mk.in | 3 + softfloat/softfloat_types.h | 1 + softfloat/specialize.h | 19 ++++++ 8 files changed, 316 insertions(+) create mode 100644 softfloat/bf16_to_f32.c create mode 100644 softfloat/f32_to_bf16.c create mode 100644 softfloat/s_roundPackToBF16.c diff --git a/softfloat/bf16_to_f32.c b/softfloat/bf16_to_f32.c new file mode 100644 index 0000000000..7e49002915 --- /dev/null +++ b/softfloat/bf16_to_f32.c @@ -0,0 +1,80 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +float32_t bf16_to_f32( bfloat16_t a ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + bool sign; + int_fast16_t exp; + uint_fast16_t frac; + struct commonNaN commonNaN; + uint_fast32_t uiZ; + union ui32_f32 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signBF16UI( uiA ); + exp = expBF16UI( uiA ); + frac = fracBF16UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0xFF ) { + if ( frac ) { + softfloat_bf16UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToF32UI( &commonNaN ); + } else { + uiZ = packToF32UI( sign, 0xFF, 0 ); + } + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uiZ = packToF32UI( sign, exp, (uint_fast32_t) frac<<16 ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} diff --git a/softfloat/f32_to_bf16.c b/softfloat/f32_to_bf16.c new file mode 100644 index 0000000000..92a2e6d850 --- /dev/null +++ b/softfloat/f32_to_bf16.c @@ -0,0 +1,92 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +bfloat16_t f32_to_bf16( float32_t a ) +{ + union ui32_f32 uA; + uint_fast32_t uiA; + bool sign; + int_fast16_t exp; + uint_fast32_t frac; + struct commonNaN commonNaN; + struct exp16_sig32 normExpSig; + uint_fast16_t uiZ, frac16; + union ui16_f16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signF32UI( uiA ); + exp = expF32UI( uiA ); + frac = fracF32UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0xFF ) { + if ( frac ) { + softfloat_f32UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToBF16UI( &commonNaN ); + } else { + uiZ = packToBF16UI( sign, 0xFF, 0 ); + } + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( ! (exp | frac) ) { + uiZ = packToBF16UI( sign, 0, 0 ); + goto uiZ; + } else if ( !exp ) { + normExpSig = softfloat_normSubnormalF32Sig( frac ); + exp = normExpSig.exp; + frac = normExpSig.sig; + } + frac16 = frac>>9 | ((frac & 0x1FF) != 0); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + return softfloat_roundPackToBF16( sign, exp - 1, frac16 | 0x4000 ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} diff --git a/softfloat/internals.h b/softfloat/internals.h index 55585e967e..ae94427f07 100644 --- a/softfloat/internals.h +++ b/softfloat/internals.h @@ -89,6 +89,11 @@ int_fast64_t softfloat_roundMToI64( bool, uint32_t *, uint_fast8_t, bool ); #define fracF16UI( a ) ((a) & 0x03FF) #define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig)) +#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15)) +#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF) +#define fracBF16UI( a ) ((a) & 0x07F) +#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig)) + #define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF)) struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; }; @@ -103,6 +108,7 @@ float16_t softfloat_mulAddF16( uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t ); +bfloat16_t softfloat_roundPackToBF16( bool, int_fast16_t, uint_fast16_t ); /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ #define signF32UI( a ) ((bool) ((uint32_t) (a)>>31)) diff --git a/softfloat/s_roundPackToBF16.c b/softfloat/s_roundPackToBF16.c new file mode 100644 index 0000000000..f3d0b75f43 --- /dev/null +++ b/softfloat/s_roundPackToBF16.c @@ -0,0 +1,113 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include "platform.h" +#include "internals.h" +#include "softfloat.h" + +bfloat16_t + softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig ) +{ + uint_fast8_t roundingMode; + bool roundNearEven; + uint_fast8_t roundIncrement, roundBits; + bool isTiny; + uint_fast16_t uiZ; + union ui16_f16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + roundingMode = softfloat_roundingMode; + roundNearEven = (roundingMode == softfloat_round_near_even); + roundIncrement = 0x40; + if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) { + roundIncrement = + (roundingMode + == (sign ? softfloat_round_min : softfloat_round_max)) + ? 0x7F + : 0; + } + roundBits = sig & 0x7F; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( 0xFD <= (unsigned int) exp ) { + if ( exp < 0 ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + isTiny = + (softfloat_detectTininess == softfloat_tininess_beforeRounding) + || (exp < -1) || (sig + roundIncrement < 0x8000); + sig = softfloat_shiftRightJam32( sig, -exp ); + exp = 0; + roundBits = sig & 0x7F; + if ( isTiny && roundBits ) { + softfloat_raiseFlags( softfloat_flag_underflow ); + } + } else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + softfloat_raiseFlags( + softfloat_flag_overflow | softfloat_flag_inexact ); + uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement; + goto uiZ; + } + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + sig = (sig + roundIncrement)>>7; + if ( roundBits ) { + softfloat_exceptionFlags |= softfloat_flag_inexact; +#ifdef SOFTFLOAT_ROUND_ODD + if ( roundingMode == softfloat_round_odd ) { + sig |= 1; + goto packReturn; + } +#endif + } + sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven); + if ( ! sig ) exp = 0; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + packReturn: + uiZ = packToBF16UI( sign, exp, sig ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index bdac1be263..eb78d74de7 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -154,6 +154,7 @@ uint_fast64_t f16_to_ui64_r_minMag( float16_t, bool ); int_fast32_t f16_to_i32_r_minMag( float16_t, bool ); int_fast64_t f16_to_i64_r_minMag( float16_t, bool ); float32_t f16_to_f32( float16_t ); +float32_t bf16_to_f32( bfloat16_t ); float64_t f16_to_f64( float16_t ); #ifdef SOFTFLOAT_FAST_INT64 extFloat80_t f16_to_extF80( float16_t ); @@ -196,6 +197,7 @@ uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool ); int_fast32_t f32_to_i32_r_minMag( float32_t, bool ); int_fast64_t f32_to_i64_r_minMag( float32_t, bool ); float16_t f32_to_f16( float32_t ); +bfloat16_t f32_to_bf16( float32_t ); float64_t f32_to_f64( float32_t ); #ifdef SOFTFLOAT_FAST_INT64 extFloat80_t f32_to_extF80( float32_t ); diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in index e7f4a3e415..9c780ac751 100644 --- a/softfloat/softfloat.mk.in +++ b/softfloat/softfloat.mk.in @@ -45,6 +45,7 @@ softfloat_c_srcs = \ f16_sqrt.c \ f16_sub.c \ f16_to_f128.c \ + bf16_to_f32.c \ f16_to_f32.c \ f16_to_f64.c \ f16_to_i8.c \ @@ -76,6 +77,7 @@ softfloat_c_srcs = \ f32_sqrt.c \ f32_sub.c \ f32_to_f128.c \ + f32_to_bf16.c \ f32_to_f16.c \ f32_to_f64.c \ f32_to_i16.c \ @@ -181,6 +183,7 @@ softfloat_c_srcs = \ s_roundMToUI64.c \ s_roundPackMToI64.c \ s_roundPackMToUI64.c \ + s_roundPackToBF16.c \ s_roundPackToF128.c \ s_roundPackToF16.c \ s_roundPackToF32.c \ diff --git a/softfloat/softfloat_types.h b/softfloat/softfloat_types.h index af1888f9b9..34c518f438 100644 --- a/softfloat/softfloat_types.h +++ b/softfloat/softfloat_types.h @@ -48,6 +48,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | (typically 'float' and 'double', and possibly 'long double'). *----------------------------------------------------------------------------*/ typedef struct { uint16_t v; } float16_t; +typedef float16_t bfloat16_t; typedef struct { uint32_t v; } float32_t; typedef struct { uint64_t v; } float64_t; typedef struct { uint64_t v[2]; } float128_t; diff --git a/softfloat/specialize.h b/softfloat/specialize.h index 19504b6b62..fb3761d7c7 100644 --- a/softfloat/specialize.h +++ b/softfloat/specialize.h @@ -98,6 +98,11 @@ struct commonNaN { char _unused; }; *----------------------------------------------------------------------------*/ #define defaultNaNF16UI 0x7E00 +/*---------------------------------------------------------------------------- +| The bit pattern for a default generated binary 16-bit floating-point NaN. +*----------------------------------------------------------------------------*/ +#define defaultNaNBF16UI 0x7FC0 + /*---------------------------------------------------------------------------- | Returns true when 16-bit unsigned integer `uiA' has the bit pattern of a | 16-bit floating-point signaling NaN. @@ -113,6 +118,20 @@ struct commonNaN { char _unused; }; *----------------------------------------------------------------------------*/ #define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) +/*---------------------------------------------------------------------------- +| Assuming `uiA' has the bit pattern of a binary 16-bit floating-point NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ +#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x040) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by `aPtr' into a binary 16-bit floating-point +| NaN, and returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ +#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI) + /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by `aPtr' into a 16-bit floating-point | NaN, and returns the bit pattern of this value as an unsigned integer. From fecdad2b62cbc4a6baa51d301f0a49d89b81d675 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 14 Apr 2023 22:36:39 +0800 Subject: [PATCH 003/127] Add isa string support for Zfbfmin/Zvfbfmin/Zvfbfwma --- riscv/isa_parser.cc | 14 ++++++++++++++ riscv/isa_parser.h | 3 +++ 2 files changed, 17 insertions(+) diff --git a/riscv/isa_parser.cc b/riscv/isa_parser.cc index 7335a147e4..be5e51b7be 100644 --- a/riscv/isa_parser.cc +++ b/riscv/isa_parser.cc @@ -139,6 +139,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) } else if (ext_str == "zdinx") { extension_table[EXT_ZFINX] = true; extension_table[EXT_ZDINX] = true; + } else if (ext_str == "zfbfmin") { + extension_table[EXT_ZFBFMIN] = true; } else if (ext_str == "zfinx") { extension_table[EXT_ZFINX] = true; } else if (ext_str == "zhinx") { @@ -232,6 +234,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZICOND] = true; } else if (ext_str == "zihpm") { extension_table[EXT_ZIHPM] = true; + } else if (ext_str == "zvfbfmin") { + extension_table[EXT_ZVFBFMIN] = true; + } else if (ext_str == "zvfbfwma") { + extension_table[EXT_ZVFBFWMA] = true; } else if (ext_str == "sstc") { extension_table[EXT_SSTC] = true; } else if (ext_str[0] == 'x') { @@ -279,6 +285,14 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) bad_isa_string(str, ("can't parse: " + std::string(p)).c_str()); } + if (extension_table[EXT_ZFBFMIN] && !extension_table['F']) { + bad_isa_string(str, "'Zfbfmin' extension requires 'F' extension"); + } + + if ((extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZVFBFWMA]) && !extension_table['V']) { + bad_isa_string(str, "'Zvfbfmin/Zvfbfwma' extension requires 'V' extension"); + } + if (extension_table['C']) { extension_table[EXT_ZCA] = true; if (extension_table['F'] && max_xlen == 32) diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 9effd164d8..04859b6ec4 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -49,6 +49,7 @@ typedef enum { EXT_SVINVAL, EXT_ZDINX, EXT_ZFA, + EXT_ZFBFMIN, EXT_ZFINX, EXT_ZHINX, EXT_ZHINXMIN, @@ -57,6 +58,8 @@ typedef enum { EXT_ZICNTR, EXT_ZICOND, EXT_ZIHPM, + EXT_ZVFBFMIN, + EXT_ZVFBFWMA, EXT_XZBP, EXT_XZBS, EXT_XZBE, From c12d0782173ba00531bd48f653238d81cb9c3484 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 14 Apr 2023 22:38:49 +0800 Subject: [PATCH 004/127] Update encoding.h to add instructions for BF16 extensions --- riscv/encoding.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/riscv/encoding.h b/riscv/encoding.h index 48cb5c0058..e39f535ceb 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (5adef50) + * https://github.com/riscv/riscv-opcodes (8d70e77) */ #ifndef RISCV_CSR_ENCODING_H @@ -751,6 +751,8 @@ #define MASK_FCLASS_Q 0xfff0707f #define MATCH_FCLASS_S 0xe0001053 #define MASK_FCLASS_S 0xfff0707f +#define MATCH_FCVT_BF16_S 0x44800053 +#define MASK_FCVT_BF16_S 0xfff0007f #define MATCH_FCVT_D_H 0x42200053 #define MASK_FCVT_D_H 0xfff0007f #define MATCH_FCVT_D_L 0xd2200053 @@ -809,6 +811,8 @@ #define MASK_FCVT_Q_W 0xfff0007f #define MATCH_FCVT_Q_WU 0xd6100053 #define MASK_FCVT_Q_WU 0xfff0007f +#define MATCH_FCVT_S_BF16 0x40600053 +#define MASK_FCVT_S_BF16 0xfff0007f #define MATCH_FCVT_S_D 0x40100053 #define MASK_FCVT_S_D 0xfff0007f #define MATCH_FCVT_S_H 0x40200053 @@ -2165,6 +2169,8 @@ #define MASK_VFNCVT_X_F_W 0xfc0ff07f #define MATCH_VFNCVT_XU_F_W 0x48081057 #define MASK_VFNCVT_XU_F_W 0xfc0ff07f +#define MATCH_VFNCVTBF16_F_F_W 0x480e9057 +#define MASK_VFNCVTBF16_F_F_W 0xfc0ff07f #define MATCH_VFNMACC_VF 0xb4005057 #define MASK_VFNMACC_VF 0xfc00707f #define MATCH_VFNMACC_VV 0xb4001057 @@ -2241,10 +2247,16 @@ #define MASK_VFWCVT_X_F_V 0xfc0ff07f #define MATCH_VFWCVT_XU_F_V 0x48041057 #define MASK_VFWCVT_XU_F_V 0xfc0ff07f +#define MATCH_VFWCVTBF16_F_F_V 0x48069057 +#define MASK_VFWCVTBF16_F_F_V 0xfc0ff07f #define MATCH_VFWMACC_VF 0xf0005057 #define MASK_VFWMACC_VF 0xfc00707f #define MATCH_VFWMACC_VV 0xf0001057 #define MASK_VFWMACC_VV 0xfc00707f +#define MATCH_VFWMACCBF16_VF 0xec005057 +#define MASK_VFWMACCBF16_VF 0xfc00707f +#define MATCH_VFWMACCBF16_VV 0xec001057 +#define MASK_VFWMACCBF16_VV 0xfc00707f #define MATCH_VFWMSAC_VF 0xf8005057 #define MASK_VFWMSAC_VF 0xfc00707f #define MATCH_VFWMSAC_VV 0xf8001057 @@ -3392,8 +3404,11 @@ #define INSN_FIELD_AMOOP 0xf8000000 #define INSN_FIELD_NF 0xe0000000 #define INSN_FIELD_SIMM5 0xf8000 +#define INSN_FIELD_ZIMM5 0xf8000 #define INSN_FIELD_ZIMM10 0x3ff00000 #define INSN_FIELD_ZIMM11 0x7ff00000 +#define INSN_FIELD_ZIMM6HI 0x4000000 +#define INSN_FIELD_ZIMM6LO 0xf8000 #define INSN_FIELD_C_NZUIMM10 0x1fe0 #define INSN_FIELD_C_UIMM7LO 0x60 #define INSN_FIELD_C_UIMM7HI 0x1c00 @@ -3636,6 +3651,7 @@ DECLARE_INSN(fclass_d, MATCH_FCLASS_D, MASK_FCLASS_D) DECLARE_INSN(fclass_h, MATCH_FCLASS_H, MASK_FCLASS_H) DECLARE_INSN(fclass_q, MATCH_FCLASS_Q, MASK_FCLASS_Q) DECLARE_INSN(fclass_s, MATCH_FCLASS_S, MASK_FCLASS_S) +DECLARE_INSN(fcvt_bf16_s, MATCH_FCVT_BF16_S, MASK_FCVT_BF16_S) DECLARE_INSN(fcvt_d_h, MATCH_FCVT_D_H, MASK_FCVT_D_H) DECLARE_INSN(fcvt_d_l, MATCH_FCVT_D_L, MASK_FCVT_D_L) DECLARE_INSN(fcvt_d_lu, MATCH_FCVT_D_LU, MASK_FCVT_D_LU) @@ -3665,6 +3681,7 @@ DECLARE_INSN(fcvt_q_lu, MATCH_FCVT_Q_LU, MASK_FCVT_Q_LU) DECLARE_INSN(fcvt_q_s, MATCH_FCVT_Q_S, MASK_FCVT_Q_S) DECLARE_INSN(fcvt_q_w, MATCH_FCVT_Q_W, MASK_FCVT_Q_W) DECLARE_INSN(fcvt_q_wu, MATCH_FCVT_Q_WU, MASK_FCVT_Q_WU) +DECLARE_INSN(fcvt_s_bf16, MATCH_FCVT_S_BF16, MASK_FCVT_S_BF16) DECLARE_INSN(fcvt_s_d, MATCH_FCVT_S_D, MASK_FCVT_S_D) DECLARE_INSN(fcvt_s_h, MATCH_FCVT_S_H, MASK_FCVT_S_H) DECLARE_INSN(fcvt_s_l, MATCH_FCVT_S_L, MASK_FCVT_S_L) @@ -4343,6 +4360,7 @@ DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W) DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W) DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W) DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W) +DECLARE_INSN(vfncvtbf16_f_f_w, MATCH_VFNCVTBF16_F_F_W, MASK_VFNCVTBF16_F_F_W) DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF) DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF) @@ -4381,8 +4399,11 @@ DECLARE_INSN(vfwcvt_rtz_x_f_v, MATCH_VFWCVT_RTZ_X_F_V, MASK_VFWCVT_RTZ_X_F_V) DECLARE_INSN(vfwcvt_rtz_xu_f_v, MATCH_VFWCVT_RTZ_XU_F_V, MASK_VFWCVT_RTZ_XU_F_V) DECLARE_INSN(vfwcvt_x_f_v, MATCH_VFWCVT_X_F_V, MASK_VFWCVT_X_F_V) DECLARE_INSN(vfwcvt_xu_f_v, MATCH_VFWCVT_XU_F_V, MASK_VFWCVT_XU_F_V) +DECLARE_INSN(vfwcvtbf16_f_f_v, MATCH_VFWCVTBF16_F_F_V, MASK_VFWCVTBF16_F_F_V) DECLARE_INSN(vfwmacc_vf, MATCH_VFWMACC_VF, MASK_VFWMACC_VF) DECLARE_INSN(vfwmacc_vv, MATCH_VFWMACC_VV, MASK_VFWMACC_VV) +DECLARE_INSN(vfwmaccbf16_vf, MATCH_VFWMACCBF16_VF, MASK_VFWMACCBF16_VF) +DECLARE_INSN(vfwmaccbf16_vv, MATCH_VFWMACCBF16_VV, MASK_VFWMACCBF16_VV) DECLARE_INSN(vfwmsac_vf, MATCH_VFWMSAC_VF, MASK_VFWMSAC_VF) DECLARE_INSN(vfwmsac_vv, MATCH_VFWMSAC_VV, MASK_VFWMSAC_VV) DECLARE_INSN(vfwmul_vf, MATCH_VFWMUL_VF, MASK_VFWMUL_VF) From 40dce7899b7a42d06413071c542606d4c0249174 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 14 Apr 2023 22:44:31 +0800 Subject: [PATCH 005/127] Add support for new instructions of Zfbfmin extension --- riscv/decode_macros.h | 2 ++ riscv/insns/fcvt_bf16_s.h | 5 +++++ riscv/insns/fcvt_s_bf16.h | 5 +++++ riscv/riscv.mk.in | 12 ++++++++++-- 4 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 riscv/insns/fcvt_bf16_s.h create mode 100644 riscv/insns/fcvt_s_bf16.h diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h index 6bdd574989..7ba132c196 100644 --- a/riscv/decode_macros.h +++ b/riscv/decode_macros.h @@ -74,6 +74,7 @@ typedef unsigned __int128 uint128_t; #define FRS2 READ_FREG(insn.rs2()) #define FRS3 READ_FREG(insn.rs3()) #define FRS1_H READ_FREG_H(insn.rs1()) +#define FRS1_BF FRS1_H #define FRS1_F READ_FREG_F(insn.rs1()) #define FRS1_D READ_FREG_D(insn.rs1()) #define FRS2_H READ_FREG_H(insn.rs2()) @@ -95,6 +96,7 @@ do { \ WRITE_FRD(value); \ } \ } while (0) +#define WRITE_FRD_BF WRITE_FRD_H #define WRITE_FRD_F(value) \ do { \ if (p->extension_enabled(EXT_ZFINX)) \ diff --git a/riscv/insns/fcvt_bf16_s.h b/riscv/insns/fcvt_bf16_s.h new file mode 100644 index 0000000000..d625df893d --- /dev/null +++ b/riscv/insns/fcvt_bf16_s.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFBFMIN); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD_BF(f32_to_bf16(FRS1_F)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_s_bf16.h b/riscv/insns/fcvt_s_bf16.h new file mode 100644 index 0000000000..59a55cb191 --- /dev/null +++ b/riscv/insns/fcvt_s_bf16.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFBFMIN); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD_F(bf16_to_f32(FRS1_BF)); +set_fp_exceptions; diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 55fadc0258..9e49c89d86 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1356,8 +1356,15 @@ riscv_insn_ext_cmo = \ cbo_zero \ riscv_insn_ext_zicond = \ - czero_eqz \ - czero_nez \ + czero_eqz \ + czero_nez \ + +riscv_insn_ext_zfbfmin = \ + fcvt_bf16_s \ + fcvt_s_bf16 \ + +riscv_insn_ext_bf16 = \ + $(riscv_insn_ext_zfbfmin) \ riscv_insn_list = \ $(riscv_insn_ext_a) \ @@ -1383,6 +1390,7 @@ riscv_insn_list = \ $(riscv_insn_smrnmi) \ $(riscv_insn_ext_cmo) \ $(riscv_insn_ext_zicond) \ + $(riscv_insn_ext_bf16) \ riscv_gen_srcs = $(addsuffix .cc,$(riscv_insn_list)) From 8aacc4effde92122a25beadac594162187767d7e Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 14 Apr 2023 22:47:51 +0800 Subject: [PATCH 006/127] Add support for new instructions of Zvfbfmin extension --- riscv/insns/vfncvtbf16_f_f_w.h | 5 +++++ riscv/insns/vfwcvtbf16_f_f_v.h | 5 +++++ riscv/riscv.mk.in | 5 +++++ riscv/v_ext_macros.h | 22 ++++++++++++++++++++++ 4 files changed, 37 insertions(+) create mode 100644 riscv/insns/vfncvtbf16_f_f_w.h create mode 100644 riscv/insns/vfwcvtbf16_f_f_v.h diff --git a/riscv/insns/vfncvtbf16_f_f_w.h b/riscv/insns/vfncvtbf16_f_f_w.h new file mode 100644 index 0000000000..4708802518 --- /dev/null +++ b/riscv/insns/vfncvtbf16_f_f_w.h @@ -0,0 +1,5 @@ +// vfncvtbf16.f.f.w vd, vs2, vm +VI_VFP_NCVT_BF16_TO_FP( + { vd = f32_to_bf16(vs2); }, // BODY16 + { require_extension(EXT_ZVFBFMIN); } // CHECK16 +) diff --git a/riscv/insns/vfwcvtbf16_f_f_v.h b/riscv/insns/vfwcvtbf16_f_f_v.h new file mode 100644 index 0000000000..ee9a59ca92 --- /dev/null +++ b/riscv/insns/vfwcvtbf16_f_f_v.h @@ -0,0 +1,5 @@ +// vfwcvtbf16.f.f.v vd, vs2, vm +VI_VFP_WCVT_FP_TO_BF16( + { vd = bf16_to_f32(vs2); }, // BODY16 + { require_extension(EXT_ZVFBFMIN); } // CHECK16 +) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 9e49c89d86..a83bec280c 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1363,8 +1363,13 @@ riscv_insn_ext_zfbfmin = \ fcvt_bf16_s \ fcvt_s_bf16 \ +riscv_insn_ext_zvfbfmin = \ + vfncvtbf16_f_f_w \ + vfwcvtbf16_f_f_v \ + riscv_insn_ext_bf16 = \ $(riscv_insn_ext_zfbfmin) \ + $(riscv_insn_ext_zvfbfmin) \ riscv_insn_list = \ $(riscv_insn_ext_a) \ diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index 8b0d0fde11..376c3307b9 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -1980,6 +1980,17 @@ reg_t index[P.VU.vlmax]; \ break; \ } +#define VI_VFP_WCVT_FP_TO_BF16(BODY, CHECK) \ + VI_CHECK_DSS(false); \ + switch (P.VU.vsew) { \ + case e16: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 32), CHECK, BODY); } \ + break; \ + default: \ + require(0); \ + break; \ + } + #define VI_VFP_WCVT_INT_TO_FP(BODY8, BODY16, BODY32, \ CHECK8, CHECK16, CHECK32, \ sign) \ @@ -2030,6 +2041,17 @@ reg_t index[P.VU.vlmax]; \ break; \ } +#define VI_VFP_NCVT_BF16_TO_FP(BODY, CHECK) \ + VI_CHECK_SDS(false); \ + switch (P.VU.vsew) { \ + case e16: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(32, 16), CHECK, BODY); } \ + break; \ + default: \ + require(0); \ + break; \ + } + #define VI_VFP_NCVT_INT_TO_FP(BODY32, BODY64, \ CHECK32, CHECK64, \ sign) \ From 48f66191758f3bca04e6d7e85348f266df148c14 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 14 Apr 2023 22:49:24 +0800 Subject: [PATCH 007/127] Add support for new instructions of Zvfbfwma extension --- riscv/insns/vfwmaccbf16_vf.h | 5 ++++ riscv/insns/vfwmaccbf16_vv.h | 5 ++++ riscv/riscv.mk.in | 5 ++++ riscv/v_ext_macros.h | 54 ++++++++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+) create mode 100644 riscv/insns/vfwmaccbf16_vf.h create mode 100644 riscv/insns/vfwmaccbf16_vv.h diff --git a/riscv/insns/vfwmaccbf16_vf.h b/riscv/insns/vfwmaccbf16_vf.h new file mode 100644 index 0000000000..2c77b3be18 --- /dev/null +++ b/riscv/insns/vfwmaccbf16_vf.h @@ -0,0 +1,5 @@ +// vfwmaccbf16.vf vd, vs2, rs1 +VI_VFP_BF16_VF_LOOP_WIDE +({ + vd = f32_mulAdd(rs1, vs2, vd); +}) diff --git a/riscv/insns/vfwmaccbf16_vv.h b/riscv/insns/vfwmaccbf16_vv.h new file mode 100644 index 0000000000..bd8f30505d --- /dev/null +++ b/riscv/insns/vfwmaccbf16_vv.h @@ -0,0 +1,5 @@ +// vfwmaccbf16.vv vd, vs2, vs1 +VI_VFP_BF16_VV_LOOP_WIDE +({ + vd = f32_mulAdd(vs1, vs2, vd); +}) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index a83bec280c..1cfe6275f0 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1367,9 +1367,14 @@ riscv_insn_ext_zvfbfmin = \ vfncvtbf16_f_f_w \ vfwcvtbf16_f_f_v \ +riscv_insn_ext_zvfbfwma = \ + vfwmaccbf16_vv \ + vfwmaccbf16_vf \ + riscv_insn_ext_bf16 = \ $(riscv_insn_ext_zfbfmin) \ $(riscv_insn_ext_zvfbfmin) \ + $(riscv_insn_ext_zvfbfwma) \ riscv_insn_list = \ $(riscv_insn_ext_a) \ diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index 376c3307b9..41256c7a59 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -1488,11 +1488,27 @@ reg_t index[P.VU.vlmax]; \ reg_t UNUSED rs2_num = insn.rs2(); \ softfloat_roundingMode = STATE.frm->read(); +#define VI_VFP_BF16_COMMON \ + require_fp; \ + require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFBFWMA))); \ + require_vector(true); \ + require(STATE.frm->read() < 0x5); \ + reg_t UNUSED vl = P.VU.vl->read(); \ + reg_t UNUSED rd_num = insn.rd(); \ + reg_t UNUSED rs1_num = insn.rs1(); \ + reg_t UNUSED rs2_num = insn.rs2(); \ + softfloat_roundingMode = STATE.frm->read(); + #define VI_VFP_LOOP_BASE \ VI_VFP_COMMON \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); +#define VI_VFP_BF16_LOOP_BASE \ + VI_VFP_BF16_COMMON \ + for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ + VI_LOOP_ELEMENT_SKIP(); + #define VI_VFP_LOOP_CMP_BASE \ VI_VFP_COMMON \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ @@ -1818,6 +1834,25 @@ reg_t index[P.VU.vlmax]; \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_END +#define VI_VFP_BF16_VF_LOOP_WIDE(BODY) \ + VI_CHECK_DSS(false); \ + VI_VFP_BF16_LOOP_BASE \ + switch (P.VU.vsew) { \ + case e16: { \ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = bf16_to_f32(P.VU.elt(rs2_num, i)); \ + float32_t rs1 = bf16_to_f32(FRS1_BF); \ + BODY; \ + set_fp_exceptions; \ + break; \ + } \ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + #define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DSS(true); \ VI_VFP_LOOP_BASE \ @@ -1845,6 +1880,25 @@ reg_t index[P.VU.vlmax]; \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_END +#define VI_VFP_BF16_VV_LOOP_WIDE(BODY) \ + VI_CHECK_DSS(true); \ + VI_VFP_BF16_LOOP_BASE \ + switch (P.VU.vsew) { \ + case e16: { \ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = bf16_to_f32(P.VU.elt(rs2_num, i)); \ + float32_t vs1 = bf16_to_f32(P.VU.elt(rs1_num, i)); \ + BODY; \ + set_fp_exceptions; \ + break; \ + } \ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + #define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DDS(false); \ VI_VFP_LOOP_BASE \ From 62478900e5f7fd872a2fea5c7b4098a570776e18 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 14 Apr 2023 22:50:43 +0800 Subject: [PATCH 008/127] Add flh/fsh/fmv_h_x/fmv_x_h instructions to Zvfbfmin/Zvfbfwma extensions --- riscv/insns/flh.h | 2 +- riscv/insns/fmv_h_x.h | 2 +- riscv/insns/fmv_x_h.h | 2 +- riscv/insns/fsh.h | 2 +- riscv/isa_parser.cc | 4 ++++ riscv/isa_parser.h | 1 + 6 files changed, 9 insertions(+), 4 deletions(-) diff --git a/riscv/insns/flh.h b/riscv/insns/flh.h index befff2cd3a..67b538a797 100644 --- a/riscv/insns/flh.h +++ b/riscv/insns/flh.h @@ -1,3 +1,3 @@ -require_extension(EXT_ZFHMIN); +require_extension(EXT_INTERNAL_ZFH_MOVE); require_fp; WRITE_FRD(f16(MMU.load(RS1 + insn.i_imm()))); diff --git a/riscv/insns/fmv_h_x.h b/riscv/insns/fmv_h_x.h index e55d607b17..bc2155cd21 100644 --- a/riscv/insns/fmv_h_x.h +++ b/riscv/insns/fmv_h_x.h @@ -1,3 +1,3 @@ -require_extension(EXT_ZFHMIN); +require_extension(EXT_INTERNAL_ZFH_MOVE); require_fp; WRITE_FRD(f16(RS1)); diff --git a/riscv/insns/fmv_x_h.h b/riscv/insns/fmv_x_h.h index 7a2e5ff6bf..ca823c17e5 100644 --- a/riscv/insns/fmv_x_h.h +++ b/riscv/insns/fmv_x_h.h @@ -1,3 +1,3 @@ -require_extension(EXT_ZFHMIN); +require_extension(EXT_INTERNAL_ZFH_MOVE); require_fp; WRITE_RD(sext32((int16_t)(FRS1.v[0]))); diff --git a/riscv/insns/fsh.h b/riscv/insns/fsh.h index dfd6bc5ca9..142d4d41de 100644 --- a/riscv/insns/fsh.h +++ b/riscv/insns/fsh.h @@ -1,3 +1,3 @@ -require_extension(EXT_ZFHMIN); +require_extension(EXT_INTERNAL_ZFH_MOVE); require_fp; MMU.store(RS1 + insn.s_imm(), FRS2.v[0]); diff --git a/riscv/isa_parser.cc b/riscv/isa_parser.cc index be5e51b7be..bd73b0c39f 100644 --- a/riscv/isa_parser.cc +++ b/riscv/isa_parser.cc @@ -293,6 +293,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) bad_isa_string(str, "'Zvfbfmin/Zvfbfwma' extension requires 'V' extension"); } + if (extension_table[EXT_ZFBFMIN] || extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZFHMIN]) { + extension_table[EXT_INTERNAL_ZFH_MOVE] = true; + } + if (extension_table['C']) { extension_table[EXT_ZCA] = true; if (extension_table['F'] && max_xlen == 32) diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 04859b6ec4..7558116869 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -69,6 +69,7 @@ typedef enum { EXT_XZBR, EXT_XZBT, EXT_SSTC, + EXT_INTERNAL_ZFH_MOVE, NUM_ISA_EXTENSIONS } isa_extension_t; From a2e8ad3d5f0bc8856da947df7c216d114179dc34 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 14 Apr 2023 22:51:19 +0800 Subject: [PATCH 009/127] Add dsasm support for BF16 extensions --- disasm/disasm.cc | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index fef9facab4..d797a91dc1 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1184,14 +1184,17 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) } if (isa->extension_enabled(EXT_ZFHMIN)) { - DEFINE_FLOAD(flh) - DEFINE_FSTORE(fsh) DEFINE_FR1TYPE(fcvt_h_s); DEFINE_FR1TYPE(fcvt_h_d); DEFINE_FR1TYPE(fcvt_h_q); DEFINE_FR1TYPE(fcvt_s_h); DEFINE_FR1TYPE(fcvt_d_h); DEFINE_FR1TYPE(fcvt_q_h); + } + + if (isa->extension_enabled(EXT_INTERNAL_ZFH_MOVE)) { + DEFINE_FLOAD(flh) + DEFINE_FSTORE(fsh) DEFINE_XFTYPE(fmv_h_x); DEFINE_FXTYPE(fmv_x_h); } @@ -1239,6 +1242,11 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DEFINE_FX2TYPE(fle_q); } + if (isa->extension_enabled(EXT_ZFBFMIN)) { + DEFINE_FR1TYPE(fcvt_bf16_s); + DEFINE_FR1TYPE(fcvt_s_bf16); + } + // ext-h if (isa->extension_enabled('H')) { DEFINE_XLOAD_BASE(hlv_b) @@ -1787,6 +1795,16 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) } } + if (isa->extension_enabled(EXT_ZVFBFMIN)) { + DEFINE_VECTOR_V(vfncvtbf16_f_f_w); + DEFINE_VECTOR_V(vfwcvtbf16_f_f_v); + } + + if (isa->extension_enabled(EXT_ZVFBFWMA)) { + DEFINE_VECTOR_VV(vfwmaccbf16_vv); + DEFINE_VECTOR_VF(vfwmaccbf16_vf); + } + #define DEFINE_PI3TYPE(code) add_pitype3_insn(this, #code, match_##code, mask_##code); #define DEFINE_PI4TYPE(code) add_pitype4_insn(this, #code, match_##code, mask_##code); #define DEFINE_PI5TYPE(code) add_pitype5_insn(this, #code, match_##code, mask_##code); From 8e800d05a4e4322048c942a9925684d6ce4f9de4 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Mon, 29 May 2023 09:04:10 +0800 Subject: [PATCH 010/127] Add BF16 extensions to README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 9455bfae6e..8d5dc45b94 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,9 @@ Spike supports the following RISC-V ISA features: - Zcd extension, v1.0 - Zcmp extension, v1.0 - Zcmt extension, v1.0 + - Zfbfmin extension, v0.6 + - Zvfbfmin extension, v0.6 + - Zvfbfwma extension, v0.6 As a Spike extension, the remainder of the proposed [Bit-Manipulation Extensions](https://github.com/riscv/riscv-bitmanip) From bb101c7a2c1bd751a34d65ea441faede408fa3d7 Mon Sep 17 00:00:00 2001 From: Tim Newsome Date: Thu, 1 Jun 2023 13:19:37 -0700 Subject: [PATCH 011/127] dscr.ebreakh is now dcsr.ebreakv[su] This change was made ages ago in the spec. I did not actually test that the new privilege checks in ebreak and c.ebreak are correct, but all the existing debug tests still pass. --- riscv/csrs.cc | 9 ++++++--- riscv/csrs.h | 3 ++- riscv/insns/c_ebreak.h | 10 ++++++---- riscv/insns/ebreak.h | 10 ++++++---- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/riscv/csrs.cc b/riscv/csrs.cc index 95b5e22348..7ea07d104c 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -1232,9 +1232,10 @@ dcsr_csr_t::dcsr_csr_t(processor_t* const proc, const reg_t addr): prv(0), step(false), ebreakm(false), - ebreakh(false), ebreaks(false), ebreaku(false), + ebreakvs(false), + ebreakvu(false), halt(false), v(false), cause(0) { @@ -1250,9 +1251,10 @@ reg_t dcsr_csr_t::read() const noexcept { reg_t result = 0; result = set_field(result, DCSR_XDEBUGVER, 1); result = set_field(result, DCSR_EBREAKM, ebreakm); - result = set_field(result, DCSR_EBREAKH, ebreakh); result = set_field(result, DCSR_EBREAKS, ebreaks); result = set_field(result, DCSR_EBREAKU, ebreaku); + result = set_field(result, CSR_DCSR_EBREAKVS, ebreakvs); + result = set_field(result, CSR_DCSR_EBREAKVU, ebreakvu); result = set_field(result, DCSR_STOPCYCLE, 0); result = set_field(result, DCSR_STOPTIME, 0); result = set_field(result, DCSR_CAUSE, cause); @@ -1267,9 +1269,10 @@ bool dcsr_csr_t::unlogged_write(const reg_t val) noexcept { step = get_field(val, DCSR_STEP); // TODO: ndreset and fullreset ebreakm = get_field(val, DCSR_EBREAKM); - ebreakh = get_field(val, DCSR_EBREAKH); ebreaks = get_field(val, DCSR_EBREAKS); ebreaku = get_field(val, DCSR_EBREAKU); + ebreakvs = get_field(val, CSR_DCSR_EBREAKVS); + ebreakvu = get_field(val, CSR_DCSR_EBREAKVU); halt = get_field(val, DCSR_HALT); v = proc->extension_enabled('H') ? get_field(val, CSR_DCSR_V) : false; return true; diff --git a/riscv/csrs.h b/riscv/csrs.h index 19aefca139..07d6d82ac5 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -663,9 +663,10 @@ class dcsr_csr_t: public csr_t { uint8_t prv; bool step; bool ebreakm; - bool ebreakh; bool ebreaks; bool ebreaku; + bool ebreakvs; + bool ebreakvu; bool halt; bool v; uint8_t cause; diff --git a/riscv/insns/c_ebreak.h b/riscv/insns/c_ebreak.h index 14b5136310..4ea27a751e 100644 --- a/riscv/insns/c_ebreak.h +++ b/riscv/insns/c_ebreak.h @@ -1,8 +1,10 @@ require_extension(EXT_ZCA); -if (!STATE.debug_mode && - ((STATE.prv == PRV_M && STATE.dcsr->ebreakm) || - (STATE.prv == PRV_S && STATE.dcsr->ebreaks) || - (STATE.prv == PRV_U && STATE.dcsr->ebreaku))) { +if (!STATE.debug_mode && ( + (!STATE.v && STATE.prv == PRV_M && STATE.dcsr->ebreakm) || + (!STATE.v && STATE.prv == PRV_S && STATE.dcsr->ebreaks) || + (!STATE.v && STATE.prv == PRV_U && STATE.dcsr->ebreaku) || + (STATE.v && STATE.prv == PRV_S && STATE.dcsr->ebreakvs) || + (STATE.v && STATE.prv == PRV_U && STATE.dcsr->ebreakvu))) { throw trap_debug_mode(); } else { throw trap_breakpoint(STATE.v, pc); diff --git a/riscv/insns/ebreak.h b/riscv/insns/ebreak.h index 227ab93527..0cd2f190fe 100644 --- a/riscv/insns/ebreak.h +++ b/riscv/insns/ebreak.h @@ -1,7 +1,9 @@ -if (!STATE.debug_mode && - ((STATE.prv == PRV_M && STATE.dcsr->ebreakm) || - (STATE.prv == PRV_S && STATE.dcsr->ebreaks) || - (STATE.prv == PRV_U && STATE.dcsr->ebreaku))) { +if (!STATE.debug_mode && ( + (!STATE.v && STATE.prv == PRV_M && STATE.dcsr->ebreakm) || + (!STATE.v && STATE.prv == PRV_S && STATE.dcsr->ebreaks) || + (!STATE.v && STATE.prv == PRV_U && STATE.dcsr->ebreaku) || + (STATE.v && STATE.prv == PRV_S && STATE.dcsr->ebreakvs) || + (STATE.v && STATE.prv == PRV_U && STATE.dcsr->ebreakvu))) { throw trap_debug_mode(); } else { throw trap_breakpoint(STATE.v, pc); From 047491581cd6437620a19b51594a35c158a53466 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 2 Jun 2023 23:41:49 +0800 Subject: [PATCH 012/127] Fix bugs in disassembling code for cm.mva01s/mvsa01 instructions. (Resolved issue #1370) --- disasm/disasm.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index fef9facab4..2fce1a1d10 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -236,6 +236,18 @@ struct : public arg_t { } } rvc_rs2s; +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[RVC_R1S]; + } +} rvc_r1s; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[RVC_R2S]; + } +} rvc_r2s; + struct : public arg_t { std::string to_string(insn_t insn) const { return fpr_name[insn.rvc_rs2s()]; @@ -1347,8 +1359,8 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DISASM_INSN("cm.popretz", cm_popretz, 0, {&rvcm_pushpop_rlist, &rvcm_pop_stack_adj_64}); } - DISASM_INSN("cm.mva01s", cm_mva01s, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("cm.mvsa01", cm_mvsa01, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("cm.mva01s", cm_mva01s, 0, {&rvc_r1s, &rvc_r2s}); + DISASM_INSN("cm.mvsa01", cm_mvsa01, 0, {&rvc_r1s, &rvc_r2s}); } if (isa->extension_enabled(EXT_ZCMT)) { From cf7e434c8005fc79f563be98542aa1d42a85f869 Mon Sep 17 00:00:00 2001 From: "demin.han" Date: Thu, 8 Jun 2023 10:51:31 +0800 Subject: [PATCH 013/127] Replace ternary operator with std:min --- riscv/vector_unit.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/riscv/vector_unit.cc b/riscv/vector_unit.cc index ff3dd82fb9..9128df63ee 100644 --- a/riscv/vector_unit.cc +++ b/riscv/vector_unit.cc @@ -54,11 +54,11 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new if (vlmax == 0) { vl->write_raw(0); } else if (rd == 0 && rs1 == 0) { - vl->write_raw(vl->read() > vlmax ? vlmax : vl->read()); + vl->write_raw(std::min(vl->read(), vlmax)); } else if (rd != 0 && rs1 == 0) { vl->write_raw(vlmax); } else if (rs1 != 0) { - vl->write_raw(reqVL > vlmax ? vlmax : reqVL); + vl->write_raw(std::min(reqVL, vlmax)); } vstart->write_raw(0); From 03b47351e69ae954e5b078e18cc10bf21df4712c Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Fri, 9 Jun 2023 09:28:15 +0800 Subject: [PATCH 014/127] Fix PMP checking region of cache-block management instructions The spec says "The PMP access control bits shall be the same for all physical addresses in the cache block [... else] the behavior of a CBO instruction is UNSPECIFIED." Thus, we only need to check the byte rs1 points to (instead of the entire cache block). --- riscv/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv/mmu.h b/riscv/mmu.h index 5a4835c3ce..efc6e9de14 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -214,7 +214,7 @@ class mmu_t void clean_inval(reg_t addr, bool clean, bool inval) { convert_load_traps_to_store_traps({ - const reg_t paddr = translate(generate_access_info(addr, LOAD, {false, false, false}), blocksz) & ~(blocksz - 1); + const reg_t paddr = translate(generate_access_info(addr, LOAD, {false, false, false}), 1); if (sim->reservable(paddr)) { if (tracer.interested_in_range(paddr, paddr + PGSIZE, LOAD)) tracer.clean_invalidate(paddr, blocksz, clean, inval); From cfe79e06fbfbe2d598693e7aa035a1f6e823d71c Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Mon, 12 Jun 2023 15:52:45 -0700 Subject: [PATCH 015/127] ci: CI should check each commit in a PR test --- .github/workflows/continuous-integration.yml | 22 ++++++++++++++++---- ci-tests/build-spike | 2 ++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 51b65a1e05..d8c9a0255d 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -21,23 +21,37 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 + with: + # checkout full tree + fetch-depth: 0 - name: Install Dependencies run: sudo xargs apt-get install -y < .github/workflows/apt-packages.txt - run: | - ci-tests/build-spike - ci-tests/test-spike + for commit in $(git rev-list origin/master..HEAD); do + git checkout $commit + echo "Checking commit $commit" + ci-tests/build-spike + ci-tests/test-spike + done test-macos: name: Test Spike build (MacOS) runs-on: macos-12 steps: - uses: actions/checkout@v2 + with: + # checkout full tree + fetch-depth: 0 - name: Install Dependencies run: xargs brew install < .github/workflows/brew-packages.txt - run: | - ci-tests/build-spike - ci-tests/test-spike + for commit in $(git rev-list origin/master..HEAD); do + git checkout $commit + echo "Checking commit $commit" + ci-tests/build-spike + ci-tests/test-spike + done diff --git a/ci-tests/build-spike b/ci-tests/build-spike index 5eb7b58030..9c3fb373a3 100755 --- a/ci-tests/build-spike +++ b/ci-tests/build-spike @@ -3,6 +3,8 @@ set -e DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +rm -rf build + mkdir build cd build mkdir install From 903ec29f902da41537411e210e7b6002eed7fb7e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 16:49:27 -0700 Subject: [PATCH 016/127] Remove legacy debug test These are now tested in CI using the riscv-tests repository. --- tests/ebreak.py | 26 ----------- tests/ebreak.s | 5 -- tests/testlib.py | 116 ----------------------------------------------- 3 files changed, 147 deletions(-) delete mode 100755 tests/ebreak.py delete mode 100644 tests/ebreak.s delete mode 100644 tests/testlib.py diff --git a/tests/ebreak.py b/tests/ebreak.py deleted file mode 100755 index dd7e65878b..0000000000 --- a/tests/ebreak.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/python - -import os -import testlib -import unittest -import tempfile -import time - -class EbreakTest(unittest.TestCase): - def setUp(self): - self.binary = testlib.compile("ebreak.s") - - def test_noport(self): - """Make sure that we can run past ebreak when --gdb-port isn't used.""" - spike = testlib.Spike(self.binary, with_gdb=False, timeout=10) - result = spike.wait() - self.assertEqual(result, 0) - - def test_nogdb(self): - """Make sure that we can run past ebreak when gdb isn't attached.""" - spike = testlib.Spike(self.binary, timeout=10) - result = spike.wait() - self.assertEqual(result, 0) - -if __name__ == '__main__': - unittest.main() diff --git a/tests/ebreak.s b/tests/ebreak.s deleted file mode 100644 index 99f3e07ccd..0000000000 --- a/tests/ebreak.s +++ /dev/null @@ -1,5 +0,0 @@ - .global main -main: - li a0, 0 - ebreak - ret diff --git a/tests/testlib.py b/tests/testlib.py deleted file mode 100644 index d5e8d795c9..0000000000 --- a/tests/testlib.py +++ /dev/null @@ -1,116 +0,0 @@ -import os.path -import pexpect -import subprocess -import tempfile -import testlib -import unittest - -# Note that gdb comes with its own testsuite. I was unable to figure out how to -# run that testsuite against the spike simulator. - -def find_file(path): - for directory in (os.getcwd(), os.path.dirname(testlib.__file__)): - fullpath = os.path.join(directory, path) - if os.path.exists(fullpath): - return fullpath - return None - -def compile(*args): - """Compile a single .c file into a binary.""" - dst = os.path.splitext(args[0])[0] - cc = os.path.expandvars("$RISCV/bin/riscv64-unknown-elf-gcc") - cmd = [cc, "-g", "-O", "-o", dst] - for arg in args: - found = find_file(arg) - if found: - cmd.append(found) - else: - cmd.append(arg) - cmd = " ".join(cmd) - result = os.system(cmd) - assert result == 0, "%r failed" % cmd - return dst - -def unused_port(): - # http://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python/2838309#2838309 - import socket - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.bind(("",0)) - port = s.getsockname()[1] - s.close() - return port - -class Spike(object): - def __init__(self, binary, halted=False, with_gdb=True, timeout=None): - """Launch spike. Return tuple of its process and the port it's running on.""" - cmd = [] - if timeout: - cmd += ["timeout", str(timeout)] - - cmd += [find_file("spike")] - if halted: - cmd.append('-H') - if with_gdb: - self.port = unused_port() - cmd += ['--gdb-port', str(self.port)] - cmd.append('pk') - if binary: - cmd.append(binary) - logfile = open("spike.log", "w") - self.process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=logfile, - stderr=logfile) - - def __del__(self): - try: - self.process.kill() - self.process.wait() - except OSError: - pass - - def wait(self, *args, **kwargs): - return self.process.wait(*args, **kwargs) - -class Gdb(object): - def __init__(self): - path = os.path.expandvars("$RISCV/bin/riscv64-unknown-elf-gdb") - self.child = pexpect.spawn(path) - self.child.logfile = file("gdb.log", "w") - self.wait() - self.command("set width 0") - self.command("set height 0") - # Force consistency. - self.command("set print entry-values no") - - def wait(self): - """Wait for prompt.""" - self.child.expect("\(gdb\)") - - def command(self, command, timeout=-1): - self.child.sendline(command) - self.child.expect("\n", timeout=timeout) - self.child.expect("\(gdb\)", timeout=timeout) - return self.child.before.strip() - - def c(self, wait=True): - if wait: - return self.command("c") - else: - self.child.sendline("c") - self.child.expect("Continuing") - - def interrupt(self): - self.child.send("\003"); - self.child.expect("\(gdb\)") - - def x(self, address, size='w'): - output = self.command("x/%s %s" % (size, address)) - value = int(output.split(':')[1].strip(), 0) - return value - - def p(self, obj): - output = self.command("p %s" % obj) - value = int(output.split('=')[-1].strip()) - return value - - def stepi(self): - return self.command("stepi") From 396c61f54e54a2be846c98a0a3489c107cbd8bbb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 16:49:57 -0700 Subject: [PATCH 017/127] Restore MCPPBS unit-testing flow --- Makefile.in | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/Makefile.in b/Makefile.in index c922e849bd..69f0405d7b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -256,7 +256,7 @@ $(2)_test_objs := $$(patsubst %.cc, %.o, $$($(2)_test_srcs)) $(2)_test_deps := $$(patsubst %.o, %.d, $$($(2)_test_objs)) $(2)_test_exes := $$(patsubst %.t.cc, %-utst, $$($(2)_test_srcs)) $(2)_test_outs := $$(patsubst %, %.out, $$($(2)_test_exes)) -$(2)_test_libs := $(1) $$($(2)_reverse_deps) utst +$(2)_test_libs := $(1) $$($(2)_reverse_deps) $(2)_test_libnames := $$(patsubst %, lib%.a, $$($(2)_test_libs)) $(2)_test_libarg := $$(patsubst %, -l%, $$($(2)_test_libs)) @@ -274,7 +274,8 @@ $(2)_junk += \ # Run unit tests $$($(2)_test_outs) : %.out : % - $(RUN) $(RUNFLAGS) ./$$< default | tee $$@ + ./$$< default + touch $$@ $(2)_junk += $$($(2)_test_outs) @@ -359,20 +360,8 @@ deps : $(deps) # Check #------------------------------------------------------------------------- -bintest_outs = $(bintests:=.out) -junk += $(bintest_outs) -%.out: % all - ./$* < /dev/null 2>&1 | tee $@ - -check-cpp : $(test_outs) - @echo - ! grep -h -e'Unit Tests' -e'FAILED' -e'Segmentation' $^ < /dev/null - @echo - -check-bin : $(bintest_outs) - ! tail -n 1 $^ < /dev/null 2>&1 | grep FAILED - -check : check-cpp check-bin +check : $(test_outs) + echo; grep -h -e'Unit Tests' -e'FAILED' -e'Segementation' $^ < /dev/null; echo .PHONY : check From e58d89aa2c38ca40e68ad4e010c91239c4794e00 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 16:52:43 -0700 Subject: [PATCH 018/127] Add C.EBREAK, C.JALR, and C.JR to overlap list This isn't a functional change; we just failed to notate that C.EBREAK and C.JALR overlap C.ADD, and C.JR overlaps C.MV. --- riscv/overlap_list.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/riscv/overlap_list.h b/riscv/overlap_list.h index fc3b307743..a30c770e60 100644 --- a/riscv/overlap_list.h +++ b/riscv/overlap_list.h @@ -9,3 +9,6 @@ DECLARE_OVERLAP_INSN(cm_mva01s, EXT_ZCMP) DECLARE_OVERLAP_INSN(cm_mvsa01, EXT_ZCMP) DECLARE_OVERLAP_INSN(cm_jalt, EXT_ZCMT) DECLARE_OVERLAP_INSN(c_fsd, EXT_ZCD) +DECLARE_OVERLAP_INSN(c_ebreak, EXT_ZCA) +DECLARE_OVERLAP_INSN(c_jalr, EXT_ZCA) +DECLARE_OVERLAP_INSN(c_jr, EXT_ZCA) From 057cfbcca6dc6c65f1fd69b754e499ccabebe273 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 16:53:34 -0700 Subject: [PATCH 019/127] Add test that ensures opcodes don't overlap unless explicitly specified --- riscv/check-opcode-overlap.t.cc | 57 +++++++++++++++++++++++++++++++++ riscv/riscv.mk.in | 3 +- 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 riscv/check-opcode-overlap.t.cc diff --git a/riscv/check-opcode-overlap.t.cc b/riscv/check-opcode-overlap.t.cc new file mode 100644 index 0000000000..2922001278 --- /dev/null +++ b/riscv/check-opcode-overlap.t.cc @@ -0,0 +1,57 @@ +#include "decode.h" +#include "common.h" +#include +#include +#include +#include + +struct opcode { + insn_bits_t match; + insn_bits_t mask; + std::string name; +}; + +static void check_overlap(const opcode& a, const opcode& b) +{ + if ((a.match & b.mask) == b.match) { + fprintf(stderr, "Instruction %s (%" PRIx64 ") overlaps instruction %s (%" PRIx64 ", mask %" PRIx64 ")\n", + a.name.c_str(), a.match, b.name.c_str(), b.match, b.mask); + exit(-1); + } +} + +int main() +{ + #define DECLARE_INSN(name, match, mask) \ + const insn_bits_t UNUSED name##_match = (match), name##_mask = (mask); + #include "encoding.h" + #undef DECLARE_INSN + + static const opcode static_list[] = { + #define DEFINE_INSN(name) \ + {name##_match, name##_mask, #name}, + #include "insn_list.h" + #undef DEFINE_INSN + }; + + std::unordered_set overlap_list; + #define DECLARE_OVERLAP_INSN(name, ext) \ + overlap_list.insert(std::string(#name)); + #include "overlap_list.h" + #undef DECLARE_OVERLAP_INSN + + std::vector list; + for (size_t i = 0; i < sizeof(static_list) / sizeof(static_list[0]); i++) { + if (!overlap_list.count(static_list[i].name)) + list.push_back(&static_list[i]); + } + + for (size_t i = 1; i < list.size(); i++) { + for (size_t j = 0; j < i; j++) { + check_overlap(*list[i], *list[j]); + check_overlap(*list[j], *list[i]); + } + } + + return 0; +} diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 1cfe6275f0..ac45b2896a 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -75,7 +75,8 @@ riscv_srcs = \ cfg.cc \ $(riscv_gen_srcs) \ -riscv_test_srcs = +riscv_test_srcs = \ + check-opcode-overlap.t.cc \ riscv_gen_hdrs = \ insn_list.h \ From 86b3e7851f6b0dacb17f61cd8ee77f8fc1a103b6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 16:54:31 -0700 Subject: [PATCH 020/127] Run 'make check' in CI --- ci-tests/build-spike | 1 + 1 file changed, 1 insertion(+) diff --git a/ci-tests/build-spike b/ci-tests/build-spike index 9c3fb373a3..058defdfc6 100755 --- a/ci-tests/build-spike +++ b/ci-tests/build-spike @@ -10,6 +10,7 @@ cd build mkdir install CXXFLAGS="-Wnon-virtual-dtor" CFLAGS="-Werror -Wignored-qualifiers -Wunused-function -Wunused-parameter -Wunused-variable" $DIR/../configure --prefix=`pwd`/install make -j"$(nproc 2> /dev/null || sysctl -n hw.ncpu)" +make check make install # check that help message prints without error From 3b6732458f5b6b47e87caad8cf9b03be13692f4f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 18 Jun 2023 01:20:44 -0700 Subject: [PATCH 021/127] Add CMOV to overlap list, as it overlaps CZERO.EQZ --- riscv/overlap_list.h | 1 + 1 file changed, 1 insertion(+) diff --git a/riscv/overlap_list.h b/riscv/overlap_list.h index a30c770e60..d8b1225866 100644 --- a/riscv/overlap_list.h +++ b/riscv/overlap_list.h @@ -12,3 +12,4 @@ DECLARE_OVERLAP_INSN(c_fsd, EXT_ZCD) DECLARE_OVERLAP_INSN(c_ebreak, EXT_ZCA) DECLARE_OVERLAP_INSN(c_jalr, EXT_ZCA) DECLARE_OVERLAP_INSN(c_jr, EXT_ZCA) +DECLARE_OVERLAP_INSN(cmov, EXT_XZBT) From fff2699cfdcf9a2fe7e004ca92c299a2c11c37a1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 14:25:27 -0700 Subject: [PATCH 022/127] Remove instructions that belong only to Xbitmanip --- riscv/insns/bcompress.h | 9 -------- riscv/insns/bcompressw.h | 10 --------- riscv/insns/bdecompress.h | 9 -------- riscv/insns/bdecompressw.h | 10 --------- riscv/insns/bfp.h | 10 --------- riscv/insns/bfpw.h | 9 -------- riscv/insns/bmatflip.h | 11 ---------- riscv/insns/bmator.h | 29 -------------------------- riscv/insns/bmatxor.h | 29 -------------------------- riscv/insns/clmulhw.h | 6 ------ riscv/insns/clmulrw.h | 6 ------ riscv/insns/clmulw.h | 6 ------ riscv/insns/cmov.h | 2 -- riscv/insns/crc32_b.h | 5 ----- riscv/insns/crc32_d.h | 6 ------ riscv/insns/crc32_h.h | 5 ----- riscv/insns/crc32_w.h | 5 ----- riscv/insns/crc32c_b.h | 5 ----- riscv/insns/crc32c_d.h | 6 ------ riscv/insns/crc32c_h.h | 5 ----- riscv/insns/crc32c_w.h | 5 ----- riscv/insns/fsl.h | 9 -------- riscv/insns/fslw.h | 10 --------- riscv/insns/fsriw.h | 10 --------- riscv/insns/gorc.h | 10 --------- riscv/insns/gorciw.h | 11 ---------- riscv/insns/gorcw.h | 10 --------- riscv/insns/grev.h | 10 --------- riscv/insns/greviw.h | 11 ---------- riscv/insns/grevw.h | 10 --------- riscv/insns/packuw.h | 5 ----- riscv/insns/shfl.h | 9 -------- riscv/insns/shflw.h | 9 -------- riscv/insns/slo.h | 2 -- riscv/insns/sloi.h | 3 --- riscv/insns/sloiw.h | 3 --- riscv/insns/slow.h | 3 --- riscv/insns/sro.h | 2 -- riscv/insns/sroi.h | 3 --- riscv/insns/sroiw.h | 3 --- riscv/insns/srow.h | 3 --- riscv/insns/unshfl.h | 9 -------- riscv/insns/unshflw.h | 9 -------- riscv/insns/xperm16.h | 2 -- riscv/insns/xperm32.h | 3 --- riscv/overlap_list.h | 1 - riscv/riscv.mk.in | 42 -------------------------------------- 47 files changed, 390 deletions(-) delete mode 100644 riscv/insns/bcompress.h delete mode 100644 riscv/insns/bcompressw.h delete mode 100644 riscv/insns/bdecompress.h delete mode 100644 riscv/insns/bdecompressw.h delete mode 100644 riscv/insns/bfp.h delete mode 100644 riscv/insns/bfpw.h delete mode 100644 riscv/insns/bmatflip.h delete mode 100644 riscv/insns/bmator.h delete mode 100644 riscv/insns/bmatxor.h delete mode 100644 riscv/insns/clmulhw.h delete mode 100644 riscv/insns/clmulrw.h delete mode 100644 riscv/insns/clmulw.h delete mode 100644 riscv/insns/cmov.h delete mode 100644 riscv/insns/crc32_b.h delete mode 100644 riscv/insns/crc32_d.h delete mode 100644 riscv/insns/crc32_h.h delete mode 100644 riscv/insns/crc32_w.h delete mode 100644 riscv/insns/crc32c_b.h delete mode 100644 riscv/insns/crc32c_d.h delete mode 100644 riscv/insns/crc32c_h.h delete mode 100644 riscv/insns/crc32c_w.h delete mode 100644 riscv/insns/fsl.h delete mode 100644 riscv/insns/fslw.h delete mode 100644 riscv/insns/fsriw.h delete mode 100644 riscv/insns/gorc.h delete mode 100644 riscv/insns/gorciw.h delete mode 100644 riscv/insns/gorcw.h delete mode 100644 riscv/insns/grev.h delete mode 100644 riscv/insns/greviw.h delete mode 100644 riscv/insns/grevw.h delete mode 100644 riscv/insns/packuw.h delete mode 100644 riscv/insns/shfl.h delete mode 100644 riscv/insns/shflw.h delete mode 100644 riscv/insns/slo.h delete mode 100644 riscv/insns/sloi.h delete mode 100644 riscv/insns/sloiw.h delete mode 100644 riscv/insns/slow.h delete mode 100644 riscv/insns/sro.h delete mode 100644 riscv/insns/sroi.h delete mode 100644 riscv/insns/sroiw.h delete mode 100644 riscv/insns/srow.h delete mode 100644 riscv/insns/unshfl.h delete mode 100644 riscv/insns/unshflw.h delete mode 100644 riscv/insns/xperm16.h delete mode 100644 riscv/insns/xperm32.h diff --git a/riscv/insns/bcompress.h b/riscv/insns/bcompress.h deleted file mode 100644 index 579346f463..0000000000 --- a/riscv/insns/bcompress.h +++ /dev/null @@ -1,9 +0,0 @@ -require_extension(EXT_XZBE); -uint64_t c = 0, i = 0, data = zext_xlen(RS1), mask = zext_xlen(RS2); -while (mask) { - uint64_t b = mask & ~((mask | (mask-1)) + 1); - c |= (data & b) >> (ctz(b) - i); - i += popcount(b); - mask -= b; -} -WRITE_RD(sext_xlen(c)); diff --git a/riscv/insns/bcompressw.h b/riscv/insns/bcompressw.h deleted file mode 100644 index 2c1017cd17..0000000000 --- a/riscv/insns/bcompressw.h +++ /dev/null @@ -1,10 +0,0 @@ -require_rv64; -require_extension(EXT_XZBE); -uint64_t c = 0, i = 0, data = zext32(RS1), mask = zext32(RS2); -while (mask) { - uint64_t b = mask & ~((mask | (mask-1)) + 1); - c |= (data & b) >> (ctz(b) - i); - i += popcount(b); - mask -= b; -} -WRITE_RD(sext32(c)); diff --git a/riscv/insns/bdecompress.h b/riscv/insns/bdecompress.h deleted file mode 100644 index 2894be0143..0000000000 --- a/riscv/insns/bdecompress.h +++ /dev/null @@ -1,9 +0,0 @@ -require_extension(EXT_XZBE); -uint64_t c = 0, i = 0, data = zext_xlen(RS1), mask = zext_xlen(RS2); -while (mask) { - uint64_t b = mask & ~((mask | (mask-1)) + 1); - c |= (data << (ctz(b) - i)) & b; - i += popcount(b); - mask -= b; -} -WRITE_RD(sext_xlen(c)); diff --git a/riscv/insns/bdecompressw.h b/riscv/insns/bdecompressw.h deleted file mode 100644 index 468a7260ae..0000000000 --- a/riscv/insns/bdecompressw.h +++ /dev/null @@ -1,10 +0,0 @@ -require_rv64; -require_extension(EXT_XZBE); -uint64_t c = 0, i = 0, data = zext32(RS1), mask = zext32(RS2); -while (mask) { - uint64_t b = mask & ~((mask | (mask-1)) + 1); - c |= (data << (ctz(b) - i)) & b; - i += popcount(b); - mask -= b; -} -WRITE_RD(sext32(c)); diff --git a/riscv/insns/bfp.h b/riscv/insns/bfp.h deleted file mode 100644 index 886d840531..0000000000 --- a/riscv/insns/bfp.h +++ /dev/null @@ -1,10 +0,0 @@ -require_extension(EXT_XZBF); -reg_t cfg = RS2 >> (xlen/2); -if ((cfg >> 30) == 2) - cfg = cfg >> 16; -int len = (cfg >> 8) & (xlen/2-1); -int off = cfg & (xlen-1); -len = len ? len : xlen/2; -reg_t mask = ~(~reg_t(0) << len) << off; -reg_t data = RS2 << off; -WRITE_RD(sext_xlen((data & mask) | (RS1 & ~mask))); diff --git a/riscv/insns/bfpw.h b/riscv/insns/bfpw.h deleted file mode 100644 index 42479e72f4..0000000000 --- a/riscv/insns/bfpw.h +++ /dev/null @@ -1,9 +0,0 @@ -require_rv64; -require_extension(EXT_XZBF); -reg_t cfg = RS2 >> 16; -int len = (cfg >> 8) & 15; -int off = cfg & 31; -len = len ? len : 16; -reg_t mask = ~(~reg_t(0) << len) << off; -reg_t data = RS2 << off; -WRITE_RD(sext32((data & mask) | (RS1 & ~mask))); diff --git a/riscv/insns/bmatflip.h b/riscv/insns/bmatflip.h deleted file mode 100644 index c10df8f9a1..0000000000 --- a/riscv/insns/bmatflip.h +++ /dev/null @@ -1,11 +0,0 @@ -require_rv64; -require_extension(EXT_XZBM); -reg_t x = RS1; -for (int i = 0; i < 3; i++) { - x = (x & 0xFFFF00000000FFFFLL) | ((x & 0x0000FFFF00000000LL) >> 16) | ((x & 0x00000000FFFF0000LL) << 16); - x = (x & 0xFF0000FFFF0000FFLL) | ((x & 0x00FF000000FF0000LL) >> 8) | ((x & 0x0000FF000000FF00LL) << 8); - x = (x & 0xF00FF00FF00FF00FLL) | ((x & 0x0F000F000F000F00LL) >> 4) | ((x & 0x00F000F000F000F0LL) << 4); - x = (x & 0xC3C3C3C3C3C3C3C3LL) | ((x & 0x3030303030303030LL) >> 2) | ((x & 0x0C0C0C0C0C0C0C0CLL) << 2); - x = (x & 0x9999999999999999LL) | ((x & 0x4444444444444444LL) >> 1) | ((x & 0x2222222222222222LL) << 1); -} -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/bmator.h b/riscv/insns/bmator.h deleted file mode 100644 index 33057ca04e..0000000000 --- a/riscv/insns/bmator.h +++ /dev/null @@ -1,29 +0,0 @@ -require_rv64; -require_extension(EXT_XZBM); - -// transpose of rs2 -int64_t rs2t = RS2; -for (int i = 0; i < 3; i++) { - rs2t = (rs2t & 0xFFFF00000000FFFFLL) | ((rs2t & 0x0000FFFF00000000LL) >> 16) | ((rs2t & 0x00000000FFFF0000LL) << 16); - rs2t = (rs2t & 0xFF0000FFFF0000FFLL) | ((rs2t & 0x00FF000000FF0000LL) >> 8) | ((rs2t & 0x0000FF000000FF00LL) << 8); - rs2t = (rs2t & 0xF00FF00FF00FF00FLL) | ((rs2t & 0x0F000F000F000F00LL) >> 4) | ((rs2t & 0x00F000F000F000F0LL) << 4); - rs2t = (rs2t & 0xC3C3C3C3C3C3C3C3LL) | ((rs2t & 0x3030303030303030LL) >> 2) | ((rs2t & 0x0C0C0C0C0C0C0C0CLL) << 2); - rs2t = (rs2t & 0x9999999999999999LL) | ((rs2t & 0x4444444444444444LL) >> 1) | ((rs2t & 0x2222222222222222LL) << 1); -} - -int64_t rs1 = RS1; -uint8_t u[8]; // rows of rs1 -uint8_t v[8]; // cols of rs2 - -for (int i = 0; i < 8; i++) { - u[i] = rs1 >> (i*8); - v[i] = rs2t >> (i*8); -} - -uint64_t x = 0; -for (int i = 0; i < 64; i++) { - if ((u[i / 8] & v[i % 8]) != 0) - x |= 1LL << i; -} - -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/bmatxor.h b/riscv/insns/bmatxor.h deleted file mode 100644 index ca2d096715..0000000000 --- a/riscv/insns/bmatxor.h +++ /dev/null @@ -1,29 +0,0 @@ -require_rv64; -require_extension(EXT_XZBM); - -// transpose of rs2 -int64_t rs2t = RS2; -for (int i = 0; i < 3; i++) { - rs2t = (rs2t & 0xFFFF00000000FFFFLL) | ((rs2t & 0x0000FFFF00000000LL) >> 16) | ((rs2t & 0x00000000FFFF0000LL) << 16); - rs2t = (rs2t & 0xFF0000FFFF0000FFLL) | ((rs2t & 0x00FF000000FF0000LL) >> 8) | ((rs2t & 0x0000FF000000FF00LL) << 8); - rs2t = (rs2t & 0xF00FF00FF00FF00FLL) | ((rs2t & 0x0F000F000F000F00LL) >> 4) | ((rs2t & 0x00F000F000F000F0LL) << 4); - rs2t = (rs2t & 0xC3C3C3C3C3C3C3C3LL) | ((rs2t & 0x3030303030303030LL) >> 2) | ((rs2t & 0x0C0C0C0C0C0C0C0CLL) << 2); - rs2t = (rs2t & 0x9999999999999999LL) | ((rs2t & 0x4444444444444444LL) >> 1) | ((rs2t & 0x2222222222222222LL) << 1); -} - -int64_t rs1 = RS1; -uint8_t u[8]; // rows of rs1 -uint8_t v[8]; // cols of rs2 - -for (int i = 0; i < 8; i++) { - u[i] = rs1 >> (i*8); - v[i] = rs2t >> (i*8); -} - -uint64_t x = 0; -for (int i = 0; i < 64; i++) { - if (popcount(u[i / 8] & v[i % 8]) & 1) - x |= 1LL << i; -} - -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/clmulhw.h b/riscv/insns/clmulhw.h deleted file mode 100644 index f41acb0e83..0000000000 --- a/riscv/insns/clmulhw.h +++ /dev/null @@ -1,6 +0,0 @@ -require_extension(EXT_XZBC); -reg_t a = zext32(RS1), b = zext32(RS2), x = 0; -for (int i = 1; i < 32; i++) - if ((b >> i) & 1) - x ^= a >> (32-i); -WRITE_RD(sext32(x)); diff --git a/riscv/insns/clmulrw.h b/riscv/insns/clmulrw.h deleted file mode 100644 index 784859ae74..0000000000 --- a/riscv/insns/clmulrw.h +++ /dev/null @@ -1,6 +0,0 @@ -require_extension(EXT_XZBC); -reg_t a = zext32(RS1), b = zext32(RS2), x = 0; -for (int i = 0; i < 32; i++) - if ((b >> i) & 1) - x ^= a >> (31-i); -WRITE_RD(sext32(x)); diff --git a/riscv/insns/clmulw.h b/riscv/insns/clmulw.h deleted file mode 100644 index 5bb753fe3d..0000000000 --- a/riscv/insns/clmulw.h +++ /dev/null @@ -1,6 +0,0 @@ -require_extension(EXT_XZBC); -reg_t a = zext32(RS1), b = zext32(RS2), x = 0; -for (int i = 0; i < 32; i++) - if ((b >> i) & 1) - x ^= a << i; -WRITE_RD(sext32(x)); diff --git a/riscv/insns/cmov.h b/riscv/insns/cmov.h deleted file mode 100644 index c7551bc645..0000000000 --- a/riscv/insns/cmov.h +++ /dev/null @@ -1,2 +0,0 @@ -require_extension(EXT_XZBT); -WRITE_RD(RS2 ? RS1 : RS3); diff --git a/riscv/insns/crc32_b.h b/riscv/insns/crc32_b.h deleted file mode 100644 index 3111fe5728..0000000000 --- a/riscv/insns/crc32_b.h +++ /dev/null @@ -1,5 +0,0 @@ -require_extension(EXT_XZBR); -reg_t x = zext_xlen(RS1); -for (int i = 0; i < 8; i++) - x = (x >> 1) ^ (0xEDB88320 & ~((x&1)-1)); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/crc32_d.h b/riscv/insns/crc32_d.h deleted file mode 100644 index 7fd7a38f2b..0000000000 --- a/riscv/insns/crc32_d.h +++ /dev/null @@ -1,6 +0,0 @@ -require_rv64; -require_extension(EXT_XZBR); -reg_t x = zext_xlen(RS1); -for (int i = 0; i < 64; i++) - x = (x >> 1) ^ (0xEDB88320 & ~((x&1)-1)); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/crc32_h.h b/riscv/insns/crc32_h.h deleted file mode 100644 index 5063fefd6d..0000000000 --- a/riscv/insns/crc32_h.h +++ /dev/null @@ -1,5 +0,0 @@ -require_extension(EXT_XZBR); -reg_t x = zext_xlen(RS1); -for (int i = 0; i < 16; i++) - x = (x >> 1) ^ (0xEDB88320 & ~((x&1)-1)); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/crc32_w.h b/riscv/insns/crc32_w.h deleted file mode 100644 index 6e425ab8d9..0000000000 --- a/riscv/insns/crc32_w.h +++ /dev/null @@ -1,5 +0,0 @@ -require_extension(EXT_XZBR); -reg_t x = zext_xlen(RS1); -for (int i = 0; i < 32; i++) - x = (x >> 1) ^ (0xEDB88320 & ~((x&1)-1)); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/crc32c_b.h b/riscv/insns/crc32c_b.h deleted file mode 100644 index d11b0dda87..0000000000 --- a/riscv/insns/crc32c_b.h +++ /dev/null @@ -1,5 +0,0 @@ -require_extension(EXT_XZBR); -reg_t x = zext_xlen(RS1); -for (int i = 0; i < 8; i++) - x = (x >> 1) ^ (0x82F63B78 & ~((x&1)-1)); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/crc32c_d.h b/riscv/insns/crc32c_d.h deleted file mode 100644 index 81175fd9c1..0000000000 --- a/riscv/insns/crc32c_d.h +++ /dev/null @@ -1,6 +0,0 @@ -require_rv64; -require_extension(EXT_XZBR); -reg_t x = zext_xlen(RS1); -for (int i = 0; i < 64; i++) - x = (x >> 1) ^ (0x82F63B78 & ~((x&1)-1)); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/crc32c_h.h b/riscv/insns/crc32c_h.h deleted file mode 100644 index ef5817d99e..0000000000 --- a/riscv/insns/crc32c_h.h +++ /dev/null @@ -1,5 +0,0 @@ -require_extension(EXT_XZBR); -reg_t x = zext_xlen(RS1); -for (int i = 0; i < 16; i++) - x = (x >> 1) ^ (0x82F63B78 & ~((x&1)-1)); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/crc32c_w.h b/riscv/insns/crc32c_w.h deleted file mode 100644 index 8793540297..0000000000 --- a/riscv/insns/crc32c_w.h +++ /dev/null @@ -1,5 +0,0 @@ -require_extension(EXT_XZBR); -reg_t x = zext_xlen(RS1); -for (int i = 0; i < 32; i++) - x = (x >> 1) ^ (0x82F63B78 & ~((x&1)-1)); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/fsl.h b/riscv/insns/fsl.h deleted file mode 100644 index 53a21608d9..0000000000 --- a/riscv/insns/fsl.h +++ /dev/null @@ -1,9 +0,0 @@ -require_extension(EXT_XZBT); -int shamt = RS2 & (2*xlen-1); -reg_t a = RS1, b = RS3; -if (shamt >= xlen) { - a = RS3, b = RS1; - shamt -= xlen; -} -int rshamt = -shamt & (xlen-1); -WRITE_RD(sext_xlen(shamt ? (a << shamt) | (zext_xlen(b) >> rshamt) : a)); diff --git a/riscv/insns/fslw.h b/riscv/insns/fslw.h deleted file mode 100644 index 83940105b8..0000000000 --- a/riscv/insns/fslw.h +++ /dev/null @@ -1,10 +0,0 @@ -require_rv64; -require_extension(EXT_XZBT); -int shamt = RS2 & 63; -reg_t a = RS1, b = RS3; -if (shamt >= 32) { - a = RS3, b = RS1; - shamt -= 32; -} -int rshamt = -shamt & 31; -WRITE_RD(sext32(shamt ? (a << shamt) | (zext32(b) >> rshamt) : a)); diff --git a/riscv/insns/fsriw.h b/riscv/insns/fsriw.h deleted file mode 100644 index 7956de7ce7..0000000000 --- a/riscv/insns/fsriw.h +++ /dev/null @@ -1,10 +0,0 @@ -require_rv64; -require_extension(EXT_XZBT); -int shamt = SHAMT & 63; -reg_t a = RS1, b = RS3; -if (shamt >= 32) { - a = RS3, b = RS1; - shamt -= 32; -} -int rshamt = -shamt & 31; -WRITE_RD(sext32(shamt ? (b << rshamt) | (zext32(a) >> shamt) : a)); diff --git a/riscv/insns/gorc.h b/riscv/insns/gorc.h deleted file mode 100644 index ffe441347d..0000000000 --- a/riscv/insns/gorc.h +++ /dev/null @@ -1,10 +0,0 @@ -require_extension(EXT_XZBP); -reg_t x = RS1; -int shamt = RS2 & (xlen-1); -if (shamt & 1) x |= ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); -if (shamt & 2) x |= ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); -if (shamt & 4) x |= ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); -if (shamt & 8) x |= ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); -if (shamt & 16) x |= ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); -if (shamt & 32) x |= ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/gorciw.h b/riscv/insns/gorciw.h deleted file mode 100644 index 44ade807ea..0000000000 --- a/riscv/insns/gorciw.h +++ /dev/null @@ -1,11 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -require(SHAMT < 32); -reg_t x = RS1; -int shamt = SHAMT; -if (shamt & 1) x |= ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); -if (shamt & 2) x |= ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); -if (shamt & 4) x |= ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); -if (shamt & 8) x |= ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); -if (shamt & 16) x |= ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); -WRITE_RD(sext32(x)); diff --git a/riscv/insns/gorcw.h b/riscv/insns/gorcw.h deleted file mode 100644 index 611b3caa43..0000000000 --- a/riscv/insns/gorcw.h +++ /dev/null @@ -1,10 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -reg_t x = RS1; -int shamt = RS2 & 31; -if (shamt & 1) x |= ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); -if (shamt & 2) x |= ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); -if (shamt & 4) x |= ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); -if (shamt & 8) x |= ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); -if (shamt & 16) x |= ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); -WRITE_RD(sext32(x)); diff --git a/riscv/insns/grev.h b/riscv/insns/grev.h deleted file mode 100644 index 7181b3cda8..0000000000 --- a/riscv/insns/grev.h +++ /dev/null @@ -1,10 +0,0 @@ -require_extension(EXT_XZBP); -reg_t x = RS1; -int shamt = RS2 & (xlen-1); -if (shamt & 1) x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); -if (shamt & 2) x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); -if (shamt & 4) x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); -if (shamt & 8) x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); -if (shamt & 16) x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); -if (shamt & 32) x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/greviw.h b/riscv/insns/greviw.h deleted file mode 100644 index 004ecf347c..0000000000 --- a/riscv/insns/greviw.h +++ /dev/null @@ -1,11 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -require(SHAMT < 32); -reg_t x = RS1; -int shamt = SHAMT; -if (shamt & 1) x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); -if (shamt & 2) x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); -if (shamt & 4) x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); -if (shamt & 8) x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); -if (shamt & 16) x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); -WRITE_RD(sext32(x)); diff --git a/riscv/insns/grevw.h b/riscv/insns/grevw.h deleted file mode 100644 index 3fbcf228d2..0000000000 --- a/riscv/insns/grevw.h +++ /dev/null @@ -1,10 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -reg_t x = RS1; -int shamt = RS2 & 31; -if (shamt & 1) x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); -if (shamt & 2) x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); -if (shamt & 4) x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); -if (shamt & 8) x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); -if (shamt & 16) x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); -WRITE_RD(sext32(x)); diff --git a/riscv/insns/packuw.h b/riscv/insns/packuw.h deleted file mode 100644 index 1b3f7d5f54..0000000000 --- a/riscv/insns/packuw.h +++ /dev/null @@ -1,5 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -reg_t lo = zext32(RS1) >> 16; -reg_t hi = zext32(RS2) >> 16 << 16; -WRITE_RD(sext32(lo | hi)); diff --git a/riscv/insns/shfl.h b/riscv/insns/shfl.h deleted file mode 100644 index 3004871e2c..0000000000 --- a/riscv/insns/shfl.h +++ /dev/null @@ -1,9 +0,0 @@ -require_extension(EXT_XZBP); -reg_t x = RS1; -int shamt = RS2 & ((xlen-1) >> 1); -if (shamt & 16) x = (x & 0xFFFF00000000FFFFLL) | ((x & 0x0000FFFF00000000LL) >> 16) | ((x & 0x00000000FFFF0000LL) << 16); -if (shamt & 8) x = (x & 0xFF0000FFFF0000FFLL) | ((x & 0x00FF000000FF0000LL) >> 8) | ((x & 0x0000FF000000FF00LL) << 8); -if (shamt & 4) x = (x & 0xF00FF00FF00FF00FLL) | ((x & 0x0F000F000F000F00LL) >> 4) | ((x & 0x00F000F000F000F0LL) << 4); -if (shamt & 2) x = (x & 0xC3C3C3C3C3C3C3C3LL) | ((x & 0x3030303030303030LL) >> 2) | ((x & 0x0C0C0C0C0C0C0C0CLL) << 2); -if (shamt & 1) x = (x & 0x9999999999999999LL) | ((x & 0x4444444444444444LL) >> 1) | ((x & 0x2222222222222222LL) << 1); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/shflw.h b/riscv/insns/shflw.h deleted file mode 100644 index 06ee36045e..0000000000 --- a/riscv/insns/shflw.h +++ /dev/null @@ -1,9 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -reg_t x = RS1; -int shamt = RS2 & 15; -if (shamt & 8) x = (x & 0xFF0000FFFF0000FFLL) | ((x & 0x00FF000000FF0000LL) >> 8) | ((x & 0x0000FF000000FF00LL) << 8); -if (shamt & 4) x = (x & 0xF00FF00FF00FF00FLL) | ((x & 0x0F000F000F000F00LL) >> 4) | ((x & 0x00F000F000F000F0LL) << 4); -if (shamt & 2) x = (x & 0xC3C3C3C3C3C3C3C3LL) | ((x & 0x3030303030303030LL) >> 2) | ((x & 0x0C0C0C0C0C0C0C0CLL) << 2); -if (shamt & 1) x = (x & 0x9999999999999999LL) | ((x & 0x4444444444444444LL) >> 1) | ((x & 0x2222222222222222LL) << 1); -WRITE_RD(sext32(x)); diff --git a/riscv/insns/slo.h b/riscv/insns/slo.h deleted file mode 100644 index a27ec37e2e..0000000000 --- a/riscv/insns/slo.h +++ /dev/null @@ -1,2 +0,0 @@ -require_extension(EXT_XZBP); -WRITE_RD(sext_xlen(~((~RS1) << (RS2 & (xlen-1))))); diff --git a/riscv/insns/sloi.h b/riscv/insns/sloi.h deleted file mode 100644 index 62278b030b..0000000000 --- a/riscv/insns/sloi.h +++ /dev/null @@ -1,3 +0,0 @@ -require(SHAMT < xlen); -require_extension(EXT_XZBP); -WRITE_RD(sext_xlen(~((~RS1) << SHAMT))); diff --git a/riscv/insns/sloiw.h b/riscv/insns/sloiw.h deleted file mode 100644 index 492c94a112..0000000000 --- a/riscv/insns/sloiw.h +++ /dev/null @@ -1,3 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -WRITE_RD(sext32(~((~RS1) << SHAMT))); diff --git a/riscv/insns/slow.h b/riscv/insns/slow.h deleted file mode 100644 index 04c90a45d4..0000000000 --- a/riscv/insns/slow.h +++ /dev/null @@ -1,3 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -WRITE_RD(sext32(~((~RS1) << (RS2 & 0x1F)))); diff --git a/riscv/insns/sro.h b/riscv/insns/sro.h deleted file mode 100644 index 3ac050daff..0000000000 --- a/riscv/insns/sro.h +++ /dev/null @@ -1,2 +0,0 @@ -require_extension(EXT_XZBP); -WRITE_RD(sext_xlen(~((zext_xlen(~RS1)) >> (RS2 & (xlen-1))))); diff --git a/riscv/insns/sroi.h b/riscv/insns/sroi.h deleted file mode 100644 index e878892800..0000000000 --- a/riscv/insns/sroi.h +++ /dev/null @@ -1,3 +0,0 @@ -require(SHAMT < xlen); -require_extension(EXT_XZBP); -WRITE_RD(sext_xlen(~((zext_xlen(~RS1)) >> SHAMT))); diff --git a/riscv/insns/sroiw.h b/riscv/insns/sroiw.h deleted file mode 100644 index 83480705fa..0000000000 --- a/riscv/insns/sroiw.h +++ /dev/null @@ -1,3 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -WRITE_RD(sext32(~((~(uint32_t)RS1) >> SHAMT))); diff --git a/riscv/insns/srow.h b/riscv/insns/srow.h deleted file mode 100644 index 808af8dbbe..0000000000 --- a/riscv/insns/srow.h +++ /dev/null @@ -1,3 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -WRITE_RD(sext32(~((~(uint32_t)RS1) >> (RS2 & 0x1F)))); diff --git a/riscv/insns/unshfl.h b/riscv/insns/unshfl.h deleted file mode 100644 index 78990b876f..0000000000 --- a/riscv/insns/unshfl.h +++ /dev/null @@ -1,9 +0,0 @@ -require_extension(EXT_XZBP); -reg_t x = RS1; -int shamt = RS2 & ((xlen-1) >> 1); -if (shamt & 1) x = (x & 0x9999999999999999LL) | ((x & 0x4444444444444444LL) >> 1) | ((x & 0x2222222222222222LL) << 1); -if (shamt & 2) x = (x & 0xC3C3C3C3C3C3C3C3LL) | ((x & 0x3030303030303030LL) >> 2) | ((x & 0x0C0C0C0C0C0C0C0CLL) << 2); -if (shamt & 4) x = (x & 0xF00FF00FF00FF00FLL) | ((x & 0x0F000F000F000F00LL) >> 4) | ((x & 0x00F000F000F000F0LL) << 4); -if (shamt & 8) x = (x & 0xFF0000FFFF0000FFLL) | ((x & 0x00FF000000FF0000LL) >> 8) | ((x & 0x0000FF000000FF00LL) << 8); -if (shamt & 16) x = (x & 0xFFFF00000000FFFFLL) | ((x & 0x0000FFFF00000000LL) >> 16) | ((x & 0x00000000FFFF0000LL) << 16); -WRITE_RD(sext_xlen(x)); diff --git a/riscv/insns/unshflw.h b/riscv/insns/unshflw.h deleted file mode 100644 index 776534e742..0000000000 --- a/riscv/insns/unshflw.h +++ /dev/null @@ -1,9 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -reg_t x = RS1; -int shamt = RS2 & 15; -if (shamt & 1) x = (x & 0x9999999999999999LL) | ((x & 0x4444444444444444LL) >> 1) | ((x & 0x2222222222222222LL) << 1); -if (shamt & 2) x = (x & 0xC3C3C3C3C3C3C3C3LL) | ((x & 0x3030303030303030LL) >> 2) | ((x & 0x0C0C0C0C0C0C0C0CLL) << 2); -if (shamt & 4) x = (x & 0xF00FF00FF00FF00FLL) | ((x & 0x0F000F000F000F00LL) >> 4) | ((x & 0x00F000F000F000F0LL) << 4); -if (shamt & 8) x = (x & 0xFF0000FFFF0000FFLL) | ((x & 0x00FF000000FF0000LL) >> 8) | ((x & 0x0000FF000000FF00LL) << 8); -WRITE_RD(sext32(x)); diff --git a/riscv/insns/xperm16.h b/riscv/insns/xperm16.h deleted file mode 100644 index 6b0ad51f0e..0000000000 --- a/riscv/insns/xperm16.h +++ /dev/null @@ -1,2 +0,0 @@ -require_extension(EXT_XZBP); -WRITE_RD(sext_xlen(xperm(RS1, RS2, 4, xlen))); diff --git a/riscv/insns/xperm32.h b/riscv/insns/xperm32.h deleted file mode 100644 index 64d90a406d..0000000000 --- a/riscv/insns/xperm32.h +++ /dev/null @@ -1,3 +0,0 @@ -require_rv64; -require_extension(EXT_XZBP); -WRITE_RD(xperm(RS1, RS2, 5, xlen)); diff --git a/riscv/overlap_list.h b/riscv/overlap_list.h index d8b1225866..a30c770e60 100644 --- a/riscv/overlap_list.h +++ b/riscv/overlap_list.h @@ -12,4 +12,3 @@ DECLARE_OVERLAP_INSN(c_fsd, EXT_ZCD) DECLARE_OVERLAP_INSN(c_ebreak, EXT_ZCA) DECLARE_OVERLAP_INSN(c_jalr, EXT_ZCA) DECLARE_OVERLAP_INSN(c_jr, EXT_ZCA) -DECLARE_OVERLAP_INSN(cmov, EXT_XZBT) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index ac45b2896a..db63290205 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -393,15 +393,6 @@ riscv_insn_ext_q_zfa = \ riscv_insn_ext_b = \ add_uw \ andn \ - bdecompress \ - bdecompressw \ - bcompress \ - bcompressw \ - bfp \ - bfpw \ - bmatflip \ - bmator \ - bmatxor \ sh1add \ sh1add_uw \ sh2add \ @@ -414,31 +405,13 @@ riscv_insn_ext_b = \ clz \ clzw \ cmix \ - cmov \ - crc32_b \ - crc32c_b \ - crc32c_d \ - crc32c_h \ - crc32c_w \ - crc32_d \ - crc32_h \ - crc32_w \ ctz \ ctzw \ - fsl \ - fslw \ fsr \ fsri \ - fsriw \ fsrw \ - gorc \ gorci \ - gorciw \ - gorcw \ - grev \ grevi \ - greviw \ - grevw \ max \ maxu \ min \ @@ -447,7 +420,6 @@ riscv_insn_ext_b = \ pack \ packh \ packu \ - packuw \ packw \ cpop \ cpopw \ @@ -467,26 +439,12 @@ riscv_insn_ext_b = \ bseti \ sext_b \ sext_h \ - shfl \ shfli \ - shflw \ slli_uw \ - slo \ - sloi \ - sloiw \ - slow \ - sro \ - sroi \ - sroiw \ - srow \ - unshfl \ unshfli \ - unshflw \ xnor \ xperm4 \ xperm8 \ - xperm16 \ - xperm32 \ # Scalar Crypto ISE riscv_insn_ext_k = \ From b043cc1d7430d6a4d982aa4d2b07b44dd4b2366c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 14:33:42 -0700 Subject: [PATCH 023/127] Remove Xbitmanip from instructions that belong to multiple extensions --- riscv/insns/cmix.h | 2 +- riscv/insns/fsr.h | 3 ++- riscv/insns/fsri.h | 3 ++- riscv/insns/fsrw.h | 2 +- riscv/insns/gorci.h | 3 +-- riscv/insns/grevi.h | 3 +-- riscv/insns/pack.h | 6 +----- riscv/insns/packh.h | 5 +---- riscv/insns/packu.h | 4 +--- riscv/insns/packw.h | 5 +---- riscv/insns/shfli.h | 3 +-- riscv/insns/unshfli.h | 3 +-- riscv/insns/xperm4.h | 2 +- riscv/insns/xperm8.h | 2 +- 14 files changed, 16 insertions(+), 30 deletions(-) diff --git a/riscv/insns/cmix.h b/riscv/insns/cmix.h index 98eb0bca21..f3b79773a9 100644 --- a/riscv/insns/cmix.h +++ b/riscv/insns/cmix.h @@ -1,2 +1,2 @@ -require_either_extension(EXT_ZBPBO, EXT_XZBT); +require_extension(EXT_ZBPBO); WRITE_RD((RS1 & RS2) | (RS3 & ~RS2)); diff --git a/riscv/insns/fsr.h b/riscv/insns/fsr.h index dfb26f11e1..d94f922e96 100644 --- a/riscv/insns/fsr.h +++ b/riscv/insns/fsr.h @@ -1,4 +1,5 @@ -require_either_extension(xlen == 32 ? EXT_ZBPBO : EXT_XZBT, EXT_XZBT); +require_rv32; +require_extension(EXT_ZBPBO); int shamt = RS2 & (2*xlen-1); reg_t a = RS1, b = RS3; if (shamt >= xlen) { diff --git a/riscv/insns/fsri.h b/riscv/insns/fsri.h index f7186f1b6a..ced23642a9 100644 --- a/riscv/insns/fsri.h +++ b/riscv/insns/fsri.h @@ -1,4 +1,5 @@ -require_either_extension(xlen == 32 ? EXT_ZBPBO : EXT_XZBT, EXT_XZBT); +require_rv32; +require_extension(EXT_ZBPBO); int shamt = SHAMT & (2*xlen-1); reg_t a = RS1, b = RS3; if (shamt >= xlen) { diff --git a/riscv/insns/fsrw.h b/riscv/insns/fsrw.h index 494fe260cc..9471e36dcc 100644 --- a/riscv/insns/fsrw.h +++ b/riscv/insns/fsrw.h @@ -1,5 +1,5 @@ require_rv64; -require_either_extension(EXT_ZBPBO, EXT_XZBT); +require_extension(EXT_ZBPBO); int shamt = RS2 & 63; reg_t a = RS1, b = RS3; if (shamt >= 32) { diff --git a/riscv/insns/gorci.h b/riscv/insns/gorci.h index d3017f499e..a4656faa03 100644 --- a/riscv/insns/gorci.h +++ b/riscv/insns/gorci.h @@ -1,6 +1,5 @@ // Zbb contains orc.b but not general gorci -require(((SHAMT == 7) && p->extension_enabled(EXT_ZBB)) - || p->extension_enabled(EXT_XZBP)); +require(((SHAMT == 7) && p->extension_enabled(EXT_ZBB))); require(SHAMT < xlen); reg_t x = RS1; int shamt = SHAMT; diff --git a/riscv/insns/grevi.h b/riscv/insns/grevi.h index d4718145b4..c37f59b467 100644 --- a/riscv/insns/grevi.h +++ b/riscv/insns/grevi.h @@ -4,8 +4,7 @@ int shamt = SHAMT; require(((shamt == xlen - 8) && (p->extension_enabled(EXT_ZBB) || p->extension_enabled(EXT_ZBKB))) //rev8 || ((shamt == 7) && p->extension_enabled(EXT_ZBKB)) // rev8.b || ((shamt == 8) && p->extension_enabled(EXT_ZPN)) // rev8.h - || ((shamt == xlen - 1) && p->extension_enabled(EXT_ZPN)) // rev - || p->extension_enabled(EXT_XZBP)); + || ((shamt == xlen - 1) && p->extension_enabled(EXT_ZPN))); require(shamt < xlen); reg_t x = RS1; if (shamt & 1) x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); diff --git a/riscv/insns/pack.h b/riscv/insns/pack.h index 2140f918d0..0622b92291 100644 --- a/riscv/insns/pack.h +++ b/riscv/insns/pack.h @@ -1,11 +1,7 @@ // RV32Zbb contains zext.h but not general pack require(((xlen == 32) && (insn.rs2() == 0) && p->extension_enabled(EXT_ZBB)) || p->extension_enabled(EXT_ZPN) - || p->extension_enabled(EXT_ZBKB) - || p->extension_enabled(EXT_XZBP) - || p->extension_enabled(EXT_XZBE) - || p->extension_enabled(EXT_XZBF) - || ((xlen == 64) && p->extension_enabled(EXT_XZBM))); + || p->extension_enabled(EXT_ZBKB)); reg_t lo = zext_xlen(RS1 << (xlen/2)) >> (xlen/2); reg_t hi = zext_xlen(RS2 << (xlen/2)); WRITE_RD(sext_xlen(lo | hi)); diff --git a/riscv/insns/packh.h b/riscv/insns/packh.h index 82886e3293..0f3de5b974 100644 --- a/riscv/insns/packh.h +++ b/riscv/insns/packh.h @@ -1,7 +1,4 @@ -require(p->extension_enabled(EXT_ZBKB) || - p->extension_enabled(EXT_XZBP) || - p->extension_enabled(EXT_XZBE) || - p->extension_enabled(EXT_XZBF)); +require_extension(EXT_ZBKB); reg_t lo = zext_xlen(RS1 << (xlen-8)) >> (xlen-8); reg_t hi = zext_xlen(RS2 << (xlen-8)) >> (xlen-16); WRITE_RD(sext_xlen(lo | hi)); diff --git a/riscv/insns/packu.h b/riscv/insns/packu.h index 441207c32e..0676429f80 100644 --- a/riscv/insns/packu.h +++ b/riscv/insns/packu.h @@ -1,6 +1,4 @@ -require(p->extension_enabled(EXT_ZPN) || - p->extension_enabled(EXT_XZBP) || - ((xlen == 64) && p->extension_enabled(EXT_XZBM))); +require_extension(EXT_ZPN); reg_t lo = zext_xlen(RS1) >> (xlen/2); reg_t hi = zext_xlen(RS2) >> (xlen/2) << (xlen/2); WRITE_RD(sext_xlen(lo | hi)); diff --git a/riscv/insns/packw.h b/riscv/insns/packw.h index 084c190d0d..dd78717c05 100644 --- a/riscv/insns/packw.h +++ b/riscv/insns/packw.h @@ -1,9 +1,6 @@ // RV64Zbb contains zext.h but not general packw require(((insn.rs2() == 0) && p->extension_enabled(EXT_ZBB)) - || p->extension_enabled(EXT_ZBKB) - || p->extension_enabled(EXT_XZBP) - || p->extension_enabled(EXT_XZBE) - || p->extension_enabled(EXT_XZBF)); + || p->extension_enabled(EXT_ZBKB)); require_rv64; reg_t lo = zext32(RS1 << 16) >> 16; reg_t hi = zext32(RS2 << 16); diff --git a/riscv/insns/shfli.h b/riscv/insns/shfli.h index f8636190f0..bb21d2c9d0 100644 --- a/riscv/insns/shfli.h +++ b/riscv/insns/shfli.h @@ -1,6 +1,5 @@ // Zbkb contains zip but not general shfli -require(((insn.rs2() == (xlen / 2 - 1)) && p->extension_enabled(EXT_ZBKB)) - || p->extension_enabled(EXT_XZBP)); +require(((insn.rs2() == (xlen / 2 - 1)) && p->extension_enabled(EXT_ZBKB))); require(SHAMT < (xlen/2)); reg_t x = RS1; int shamt = SHAMT & ((xlen-1) >> 1); diff --git a/riscv/insns/unshfli.h b/riscv/insns/unshfli.h index 26920f1403..5a9cff1c45 100644 --- a/riscv/insns/unshfli.h +++ b/riscv/insns/unshfli.h @@ -1,6 +1,5 @@ // Zbkb contains unzip but not general unshfli -require(((insn.rs2() == (xlen / 2 - 1)) && p->extension_enabled(EXT_ZBKB)) - || p->extension_enabled(EXT_XZBP)); +require(((insn.rs2() == (xlen / 2 - 1)) && p->extension_enabled(EXT_ZBKB))); require(SHAMT < (xlen/2)); reg_t x = RS1; int shamt = SHAMT & ((xlen-1) >> 1); diff --git a/riscv/insns/xperm4.h b/riscv/insns/xperm4.h index 38800f3bfb..a9d685f3f6 100644 --- a/riscv/insns/xperm4.h +++ b/riscv/insns/xperm4.h @@ -1,2 +1,2 @@ -require_either_extension(EXT_ZBKX, EXT_XZBP); +require_extension(EXT_ZBKX); WRITE_RD(sext_xlen(xperm(RS1, RS2, 2, xlen))); diff --git a/riscv/insns/xperm8.h b/riscv/insns/xperm8.h index c272d66949..1ba48efc6c 100644 --- a/riscv/insns/xperm8.h +++ b/riscv/insns/xperm8.h @@ -1,2 +1,2 @@ -require_either_extension(EXT_ZBKX, EXT_XZBP); +require_extension(EXT_ZBKX); WRITE_RD(sext_xlen(xperm(RS1, RS2, 3, xlen))); From 961d6def2131fa0a831083684fc77b83592a2175 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 14:34:22 -0700 Subject: [PATCH 024/127] Remove Xbitmanip from disassembler --- disasm/disasm.cc | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 25de783ae3..8722cdb977 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2144,38 +2144,6 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) } } - if (isa->extension_enabled(EXT_XZBP)) { - DEFINE_ITYPE_SHIFT(grevi); - DEFINE_ITYPE_SHIFT(gorci); - DEFINE_RTYPE(pack); - DEFINE_RTYPE(packh); - DEFINE_RTYPE(packu); - DEFINE_RTYPE(grev); - DEFINE_RTYPE(gorc); - DEFINE_RTYPE(xperm4); - DEFINE_RTYPE(xperm8); - DEFINE_RTYPE(xperm16); - DEFINE_RTYPE(xperm32); - } - - if (isa->extension_enabled(EXT_XZBP) || - isa->extension_enabled(EXT_XZBE) || - isa->extension_enabled(EXT_XZBF)) { - if(isa->get_max_xlen() == 64) { - DEFINE_RTYPE(packw); - } - } - - if (isa->extension_enabled(EXT_XZBT)) { - DEFINE_R3TYPE(cmix); - DEFINE_R3TYPE(fsr); - DEFINE_R3TYPE(fsri); - if(isa->get_max_xlen() == 64) { - DEFINE_R3TYPE(fsriw); - DEFINE_R3TYPE(fsrw); - } - } - if (isa->extension_enabled(EXT_ZICBOM)) { DISASM_INSN("cbo.clean", cbo_clean, 0, {&base_only_address}); DISASM_INSN("cbo.flush", cbo_flush, 0, {&base_only_address}); @@ -2252,7 +2220,7 @@ disassembler_t::disassembler_t(const isa_parser_t *isa) // next-highest priority: other instructions in same base ISA std::string fallback_isa_string = std::string("rv") + std::to_string(isa->get_max_xlen()) + - "gqchv_zfh_zba_zbb_zbc_zbs_zcb_zicbom_zicboz_zkn_zkr_zks_svinval_xbitmanip"; + "gqchv_zfh_zba_zbb_zbc_zbs_zcb_zicbom_zicboz_zkn_zkr_zks_svinval"; isa_parser_t fallback_isa(fallback_isa_string.c_str(), DEFAULT_PRIV); add_instructions(&fallback_isa); From 69389df41cccc2853709e5a18f7c87693f4b0c3d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 14:24:37 -0700 Subject: [PATCH 025/127] isa parser: reject Xbitmanip extensions --- riscv/isa_parser.cc | 27 +-------------------------- riscv/isa_parser.h | 8 -------- 2 files changed, 1 insertion(+), 34 deletions(-) diff --git a/riscv/isa_parser.cc b/riscv/isa_parser.cc index bd73b0c39f..8bb8c495b3 100644 --- a/riscv/isa_parser.cc +++ b/riscv/isa_parser.cc @@ -242,32 +242,7 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_SSTC] = true; } else if (ext_str[0] == 'x') { extension_table['X'] = true; - if (ext_str == "xbitmanip") { - extension_table[EXT_XZBP] = true; - extension_table[EXT_XZBS] = true; - extension_table[EXT_XZBE] = true; - extension_table[EXT_XZBF] = true; - extension_table[EXT_XZBC] = true; - extension_table[EXT_XZBM] = true; - extension_table[EXT_XZBR] = true; - extension_table[EXT_XZBT] = true; - } else if (ext_str == "xzbp") { - extension_table[EXT_XZBP] = true; - } else if (ext_str == "xzbs") { - extension_table[EXT_XZBS] = true; - } else if (ext_str == "xzbe") { - extension_table[EXT_XZBE] = true; - } else if (ext_str == "xzbf") { - extension_table[EXT_XZBF] = true; - } else if (ext_str == "xzbc") { - extension_table[EXT_XZBC] = true; - } else if (ext_str == "xzbm") { - extension_table[EXT_XZBM] = true; - } else if (ext_str == "xzbr") { - extension_table[EXT_XZBR] = true; - } else if (ext_str == "xzbt") { - extension_table[EXT_XZBT] = true; - } else if (ext_str.size() == 1) { + if (ext_str.size() == 1) { bad_isa_string(str, "single 'X' is not a proper name"); } else if (ext_str != "xdummy") { extension_t* x = find_extension(ext_str.substr(1).c_str())(); diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 7558116869..4e6856195c 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -60,14 +60,6 @@ typedef enum { EXT_ZIHPM, EXT_ZVFBFMIN, EXT_ZVFBFWMA, - EXT_XZBP, - EXT_XZBS, - EXT_XZBE, - EXT_XZBF, - EXT_XZBC, - EXT_XZBM, - EXT_XZBR, - EXT_XZBT, EXT_SSTC, EXT_INTERNAL_ZFH_MOVE, NUM_ISA_EXTENSIONS From 58f9ba084c8943de29caa4503f734a6f752b1068 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Jun 2023 14:34:32 -0700 Subject: [PATCH 026/127] Remove Xbitmanip from README --- README.md | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/README.md b/README.md index 8d5dc45b94..e850b6e459 100644 --- a/README.md +++ b/README.md @@ -53,20 +53,6 @@ Spike supports the following RISC-V ISA features: - Zvfbfmin extension, v0.6 - Zvfbfwma extension, v0.6 -As a Spike extension, the remainder of the proposed -[Bit-Manipulation Extensions](https://github.com/riscv/riscv-bitmanip) -is provided under the Spike-custom extension name _Xbitmanip_. -These instructions (and, of course, the extension name) are not RISC-V -standards. - -These proposed bit-manipulation extensions can be split into further -groups: Zbp, Zbs, Zbe, Zbf, Zbc, Zbm, Zbr, Zbt. Note that Zbc is -ratified, but the original proposal contained some extra instructions -(64-bit carryless multiplies) which are captured here. - -To enable these extensions individually, use the Spike-custom -extension names _XZbp_, _XZbs_, _XZbc_, and so on. - Versioning and APIs ------------------- From 270f408a7be7f574048e0431f172e13140d88045 Mon Sep 17 00:00:00 2001 From: Philipp Tomsich Date: Sun, 18 Jun 2023 22:42:21 +0200 Subject: [PATCH 027/127] Makefile: fix type in check target The check target processes the output using grep; however, one of the patterns misspelled 'Segmenetation'. Fixing the typo. --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 69f0405d7b..01d7baca41 100644 --- a/Makefile.in +++ b/Makefile.in @@ -319,7 +319,7 @@ $(2)_junk += \ all-$(1) : lib$(1).a $$($(2)_install_prog_exes) check-$(1) : $$($(2)_test_outs) - echo; grep -h -e'Unit Tests' -e'FAILED' -e'Segementation' $$^; echo + echo; grep -h -e'Unit Tests' -e'FAILED' -e'Segmentation' $$^; echo clean-$(1) : rm -rf $$($(2)_junk) From 07e7626e5692ae6bb5773ddb5493ba838debca86 Mon Sep 17 00:00:00 2001 From: Gianluca Guida Date: Thu, 25 May 2023 13:19:47 +0100 Subject: [PATCH 028/127] fesvr: support int128_t/uint128_t Also remove now duplicate definition for types. --- fesvr/byteorder.h | 7 +++++++ riscv/decode_macros.h | 5 ----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fesvr/byteorder.h b/fesvr/byteorder.h index 2b1dbf981c..d9e503a271 100644 --- a/fesvr/byteorder.h +++ b/fesvr/byteorder.h @@ -15,6 +15,13 @@ static inline int16_t swap(int16_t n) { return int16_t(swap(uint16_t(n))); } static inline int32_t swap(int32_t n) { return int32_t(swap(uint32_t(n))); } static inline int64_t swap(int64_t n) { return int64_t(swap(uint64_t(n))); } +#ifdef HAVE_INT128 +typedef __int128 int128_t; +typedef unsigned __int128 uint128_t; +static inline uint128_t swap(uint128_t n) { return (uint128_t(swap(uint64_t(n))) << 64) | swap(uint64_t(n >> 64)); } +static inline int128_t swap(int128_t n) { return int128_t(swap(uint128_t(n))); } +#endif + #ifdef WORDS_BIGENDIAN template static inline T from_be(T n) { return n; } template static inline T to_be(T n) { return n; } diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h index 7ba132c196..f39149b172 100644 --- a/riscv/decode_macros.h +++ b/riscv/decode_macros.h @@ -10,11 +10,6 @@ #include "softfloat_types.h" #include "specialize.h" -#ifdef HAVE_INT128 -typedef __int128 int128_t; -typedef unsigned __int128 uint128_t; -#endif - // helpful macros, etc #define MMU (*p->get_mmu()) #define STATE (*p->get_state()) From 4ac7e03dfb7a45d2732fadfe29e1af30ef25bcac Mon Sep 17 00:00:00 2001 From: Gianluca Guida Date: Thu, 25 May 2023 13:27:45 +0100 Subject: [PATCH 029/127] mmu: support load/store longer than 64-bits. --- riscv/mmu.cc | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/riscv/mmu.cc b/riscv/mmu.cc index 358ccd3e42..3f90060e82 100644 --- a/riscv/mmu.cc +++ b/riscv/mmu.cc @@ -242,6 +242,11 @@ void mmu_t::load_slow_path(reg_t addr, reg_t len, uint8_t* bytes, xlate_flags_t load_slow_path_intrapage(len - len_page0, bytes + len_page0, access_info.split_misaligned_access(len_page0)); } + while (len > sizeof(reg_t)) { + check_triggers(triggers::OPERATION_LOAD, addr, access_info.effective_virt, reg_from_bytes(sizeof(reg_t), bytes)); + len -= sizeof(reg_t); + bytes += sizeof(reg_t); + } check_triggers(triggers::OPERATION_LOAD, addr, access_info.effective_virt, reg_from_bytes(len, bytes)); } @@ -275,8 +280,16 @@ void mmu_t::store_slow_path_intrapage(reg_t len, const uint8_t* bytes, mem_acces void mmu_t::store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes, xlate_flags_t xlate_flags, bool actually_store, bool UNUSED require_alignment) { auto access_info = generate_access_info(addr, STORE, xlate_flags); - if (actually_store) - check_triggers(triggers::OPERATION_STORE, addr, access_info.effective_virt, reg_from_bytes(len, bytes)); + if (actually_store) { + reg_t trig_len = len; + const uint8_t* trig_bytes = bytes; + while (trig_len > sizeof(reg_t)) { + check_triggers(triggers::OPERATION_STORE, addr, access_info.effective_virt, reg_from_bytes(sizeof(reg_t), trig_bytes)); + trig_len -= sizeof(reg_t); + trig_bytes += sizeof(reg_t); + } + check_triggers(triggers::OPERATION_STORE, addr, access_info.effective_virt, reg_from_bytes(trig_len, trig_bytes)); + } if (addr & (len - 1)) { bool gva = access_info.effective_virt; From bfdc0f8ef7598532c096b2293535fff70218f6a5 Mon Sep 17 00:00:00 2001 From: Gianluca Guida Date: Mon, 19 Jun 2023 17:58:06 +0100 Subject: [PATCH 030/127] regenerate enconding.h --- riscv/encoding.h | 134 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/riscv/encoding.h b/riscv/encoding.h index e39f535ceb..db7b0215b6 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (8d70e77) + * https://github.com/riscv/riscv-opcodes (3ca60c5) */ #ifndef RISCV_CSR_ENCODING_H @@ -421,6 +421,12 @@ #define MASK_AMOAND_D 0xf800707f #define MATCH_AMOAND_W 0x6000202f #define MASK_AMOAND_W 0xf800707f +#define MATCH_AMOCAS_D 0x2800302f +#define MASK_AMOCAS_D 0xf800707f +#define MATCH_AMOCAS_Q 0x2800402f +#define MASK_AMOCAS_Q 0xf800707f +#define MATCH_AMOCAS_W 0x2800202f +#define MASK_AMOCAS_W 0xf800707f #define MATCH_AMOMAX_D 0xa000302f #define MASK_AMOMAX_D 0xf800707f #define MATCH_AMOMAX_W 0xa000202f @@ -1995,6 +2001,28 @@ #define MASK_VADD_VV 0xfc00707f #define MATCH_VADD_VX 0x4057 #define MASK_VADD_VX 0xfc00707f +#define MATCH_VAESDF_VS 0xa600a077 +#define MASK_VAESDF_VS 0xfe0ff07f +#define MATCH_VAESDF_VV 0xa200a077 +#define MASK_VAESDF_VV 0xfe0ff07f +#define MATCH_VAESDM_VS 0xa6002077 +#define MASK_VAESDM_VS 0xfe0ff07f +#define MATCH_VAESDM_VV 0xa2002077 +#define MASK_VAESDM_VV 0xfe0ff07f +#define MATCH_VAESEF_VS 0xa601a077 +#define MASK_VAESEF_VS 0xfe0ff07f +#define MATCH_VAESEF_VV 0xa201a077 +#define MASK_VAESEF_VV 0xfe0ff07f +#define MATCH_VAESEM_VS 0xa6012077 +#define MASK_VAESEM_VS 0xfe0ff07f +#define MATCH_VAESEM_VV 0xa2012077 +#define MASK_VAESEM_VV 0xfe0ff07f +#define MATCH_VAESKF1_VI 0x8a002077 +#define MASK_VAESKF1_VI 0xfe00707f +#define MATCH_VAESKF2_VI 0xaa002077 +#define MASK_VAESKF2_VI 0xfe00707f +#define MATCH_VAESZ_VS 0xa603a077 +#define MASK_VAESZ_VS 0xfe0ff07f #define MATCH_VAMOADDEI16_V 0x502f #define MASK_VAMOADDEI16_V 0xf800707f #define MATCH_VAMOADDEI32_V 0x602f @@ -2073,6 +2101,10 @@ #define MASK_VAND_VV 0xfc00707f #define MATCH_VAND_VX 0x24004057 #define MASK_VAND_VX 0xfc00707f +#define MATCH_VANDN_VV 0x4000057 +#define MASK_VANDN_VV 0xfc00707f +#define MATCH_VANDN_VX 0x4004057 +#define MASK_VANDN_VX 0xfc00707f #define MATCH_VASUB_VV 0x2c002057 #define MASK_VASUB_VV 0xfc00707f #define MATCH_VASUB_VX 0x2c006057 @@ -2081,10 +2113,28 @@ #define MASK_VASUBU_VV 0xfc00707f #define MATCH_VASUBU_VX 0x28006057 #define MASK_VASUBU_VX 0xfc00707f +#define MATCH_VBREV8_V 0x48042057 +#define MASK_VBREV8_V 0xfc0ff07f +#define MATCH_VBREV_V 0x48052057 +#define MASK_VBREV_V 0xfc0ff07f +#define MATCH_VCLMUL_VV 0x30002057 +#define MASK_VCLMUL_VV 0xfc00707f +#define MATCH_VCLMUL_VX 0x30006057 +#define MASK_VCLMUL_VX 0xfc00707f +#define MATCH_VCLMULH_VV 0x34002057 +#define MASK_VCLMULH_VV 0xfc00707f +#define MATCH_VCLMULH_VX 0x34006057 +#define MASK_VCLMULH_VX 0xfc00707f +#define MATCH_VCLZ_V 0x48062057 +#define MASK_VCLZ_V 0xfc0ff07f #define MATCH_VCOMPRESS_VM 0x5e002057 #define MASK_VCOMPRESS_VM 0xfe00707f #define MATCH_VCPOP_M 0x40082057 #define MASK_VCPOP_M 0xfc0ff07f +#define MATCH_VCPOP_V 0x48072057 +#define MASK_VCPOP_V 0xfc0ff07f +#define MATCH_VCTZ_V 0x4806a057 +#define MASK_VCTZ_V 0xfc0ff07f #define MATCH_VDIV_VV 0x84002057 #define MASK_VDIV_VV 0xfc00707f #define MATCH_VDIV_VX 0x84006057 @@ -2285,6 +2335,10 @@ #define MASK_VFWSUB_WF 0xfc00707f #define MATCH_VFWSUB_WV 0xd8001057 #define MASK_VFWSUB_WV 0xfc00707f +#define MATCH_VGHSH_VV 0xb2002077 +#define MASK_VGHSH_VV 0xfe00707f +#define MATCH_VGMUL_VV 0xa208a077 +#define MASK_VGMUL_VV 0xfe0ff07f #define MATCH_VID_V 0x5008a057 #define MASK_VID_V 0xfdfff07f #define MATCH_VIOTA_M 0x50082057 @@ -2631,6 +2685,8 @@ #define MASK_VREMU_VV 0xfc00707f #define MATCH_VREMU_VX 0x88006057 #define MASK_VREMU_VX 0xfc00707f +#define MATCH_VREV8_V 0x4804a057 +#define MASK_VREV8_V 0xfc0ff07f #define MATCH_VRGATHER_VI 0x30003057 #define MASK_VRGATHER_VI 0xfc00707f #define MATCH_VRGATHER_VV 0x30000057 @@ -2639,6 +2695,16 @@ #define MASK_VRGATHER_VX 0xfc00707f #define MATCH_VRGATHEREI16_VV 0x38000057 #define MASK_VRGATHEREI16_VV 0xfc00707f +#define MATCH_VROL_VV 0x54000057 +#define MASK_VROL_VV 0xfc00707f +#define MATCH_VROL_VX 0x54004057 +#define MASK_VROL_VX 0xfc00707f +#define MATCH_VROR_VI 0x50003057 +#define MASK_VROR_VI 0xf800707f +#define MATCH_VROR_VV 0x50000057 +#define MASK_VROR_VV 0xfc00707f +#define MATCH_VROR_VX 0x50004057 +#define MASK_VROR_VX 0xfc00707f #define MATCH_VRSUB_VI 0xc003057 #define MASK_VRSUB_VI 0xfc00707f #define MATCH_VRSUB_VX 0xc004057 @@ -2695,6 +2761,12 @@ #define MASK_VSEXT_VF4 0xfc0ff07f #define MATCH_VSEXT_VF8 0x4801a057 #define MASK_VSEXT_VF8 0xfc0ff07f +#define MATCH_VSHA2CH_VV 0xba002077 +#define MASK_VSHA2CH_VV 0xfe00707f +#define MATCH_VSHA2CL_VV 0xbe002077 +#define MASK_VSHA2CL_VV 0xfe00707f +#define MATCH_VSHA2MS_VV 0xb6002077 +#define MASK_VSHA2MS_VV 0xfe00707f #define MATCH_VSLIDE1DOWN_VX 0x3c006057 #define MASK_VSLIDE1DOWN_VX 0xfc00707f #define MATCH_VSLIDE1UP_VX 0x38006057 @@ -2713,6 +2785,16 @@ #define MASK_VSLL_VV 0xfc00707f #define MATCH_VSLL_VX 0x94004057 #define MASK_VSLL_VX 0xfc00707f +#define MATCH_VSM3C_VI 0xae002077 +#define MASK_VSM3C_VI 0xfe00707f +#define MATCH_VSM3ME_VV 0x82002077 +#define MASK_VSM3ME_VV 0xfe00707f +#define MATCH_VSM4K_VI 0x86002077 +#define MASK_VSM4K_VI 0xfe00707f +#define MATCH_VSM4R_VS 0xa6082077 +#define MASK_VSM4R_VS 0xfe0ff07f +#define MATCH_VSM4R_VV 0xa2082077 +#define MASK_VSM4R_VV 0xfe0ff07f #define MATCH_VSM_V 0x2b00027 #define MASK_VSM_V 0xfff0707f #define MATCH_VSMUL_VV 0x9c000057 @@ -2849,6 +2931,12 @@ #define MASK_VWREDSUM_VS 0xfc00707f #define MATCH_VWREDSUMU_VS 0xc0000057 #define MASK_VWREDSUMU_VS 0xfc00707f +#define MATCH_VWSLL_VI 0xd4003057 +#define MASK_VWSLL_VI 0xfc00707f +#define MATCH_VWSLL_VV 0xd4000057 +#define MASK_VWSLL_VV 0xfc00707f +#define MATCH_VWSLL_VX 0xd4004057 +#define MASK_VWSLL_VX 0xfc00707f #define MATCH_VWSUB_VV 0xcc002057 #define MASK_VWSUB_VV 0xfc00707f #define MATCH_VWSUB_VX 0xcc006057 @@ -3486,6 +3574,9 @@ DECLARE_INSN(amoadd_d, MATCH_AMOADD_D, MASK_AMOADD_D) DECLARE_INSN(amoadd_w, MATCH_AMOADD_W, MASK_AMOADD_W) DECLARE_INSN(amoand_d, MATCH_AMOAND_D, MASK_AMOAND_D) DECLARE_INSN(amoand_w, MATCH_AMOAND_W, MASK_AMOAND_W) +DECLARE_INSN(amocas_d, MATCH_AMOCAS_D, MASK_AMOCAS_D) +DECLARE_INSN(amocas_q, MATCH_AMOCAS_Q, MASK_AMOCAS_Q) +DECLARE_INSN(amocas_w, MATCH_AMOCAS_W, MASK_AMOCAS_W) DECLARE_INSN(amomax_d, MATCH_AMOMAX_D, MASK_AMOMAX_D) DECLARE_INSN(amomax_w, MATCH_AMOMAX_W, MASK_AMOMAX_W) DECLARE_INSN(amomaxu_d, MATCH_AMOMAXU_D, MASK_AMOMAXU_D) @@ -4273,6 +4364,17 @@ DECLARE_INSN(vadc_vxm, MATCH_VADC_VXM, MASK_VADC_VXM) DECLARE_INSN(vadd_vi, MATCH_VADD_VI, MASK_VADD_VI) DECLARE_INSN(vadd_vv, MATCH_VADD_VV, MASK_VADD_VV) DECLARE_INSN(vadd_vx, MATCH_VADD_VX, MASK_VADD_VX) +DECLARE_INSN(vaesdf_vs, MATCH_VAESDF_VS, MASK_VAESDF_VS) +DECLARE_INSN(vaesdf_vv, MATCH_VAESDF_VV, MASK_VAESDF_VV) +DECLARE_INSN(vaesdm_vs, MATCH_VAESDM_VS, MASK_VAESDM_VS) +DECLARE_INSN(vaesdm_vv, MATCH_VAESDM_VV, MASK_VAESDM_VV) +DECLARE_INSN(vaesef_vs, MATCH_VAESEF_VS, MASK_VAESEF_VS) +DECLARE_INSN(vaesef_vv, MATCH_VAESEF_VV, MASK_VAESEF_VV) +DECLARE_INSN(vaesem_vs, MATCH_VAESEM_VS, MASK_VAESEM_VS) +DECLARE_INSN(vaesem_vv, MATCH_VAESEM_VV, MASK_VAESEM_VV) +DECLARE_INSN(vaeskf1_vi, MATCH_VAESKF1_VI, MASK_VAESKF1_VI) +DECLARE_INSN(vaeskf2_vi, MATCH_VAESKF2_VI, MASK_VAESKF2_VI) +DECLARE_INSN(vaesz_vs, MATCH_VAESZ_VS, MASK_VAESZ_VS) DECLARE_INSN(vamoaddei16_v, MATCH_VAMOADDEI16_V, MASK_VAMOADDEI16_V) DECLARE_INSN(vamoaddei32_v, MATCH_VAMOADDEI32_V, MASK_VAMOADDEI32_V) DECLARE_INSN(vamoaddei64_v, MATCH_VAMOADDEI64_V, MASK_VAMOADDEI64_V) @@ -4312,12 +4414,23 @@ DECLARE_INSN(vamoxorei8_v, MATCH_VAMOXOREI8_V, MASK_VAMOXOREI8_V) DECLARE_INSN(vand_vi, MATCH_VAND_VI, MASK_VAND_VI) DECLARE_INSN(vand_vv, MATCH_VAND_VV, MASK_VAND_VV) DECLARE_INSN(vand_vx, MATCH_VAND_VX, MASK_VAND_VX) +DECLARE_INSN(vandn_vv, MATCH_VANDN_VV, MASK_VANDN_VV) +DECLARE_INSN(vandn_vx, MATCH_VANDN_VX, MASK_VANDN_VX) DECLARE_INSN(vasub_vv, MATCH_VASUB_VV, MASK_VASUB_VV) DECLARE_INSN(vasub_vx, MATCH_VASUB_VX, MASK_VASUB_VX) DECLARE_INSN(vasubu_vv, MATCH_VASUBU_VV, MASK_VASUBU_VV) DECLARE_INSN(vasubu_vx, MATCH_VASUBU_VX, MASK_VASUBU_VX) +DECLARE_INSN(vbrev8_v, MATCH_VBREV8_V, MASK_VBREV8_V) +DECLARE_INSN(vbrev_v, MATCH_VBREV_V, MASK_VBREV_V) +DECLARE_INSN(vclmul_vv, MATCH_VCLMUL_VV, MASK_VCLMUL_VV) +DECLARE_INSN(vclmul_vx, MATCH_VCLMUL_VX, MASK_VCLMUL_VX) +DECLARE_INSN(vclmulh_vv, MATCH_VCLMULH_VV, MASK_VCLMULH_VV) +DECLARE_INSN(vclmulh_vx, MATCH_VCLMULH_VX, MASK_VCLMULH_VX) +DECLARE_INSN(vclz_v, MATCH_VCLZ_V, MASK_VCLZ_V) DECLARE_INSN(vcompress_vm, MATCH_VCOMPRESS_VM, MASK_VCOMPRESS_VM) DECLARE_INSN(vcpop_m, MATCH_VCPOP_M, MASK_VCPOP_M) +DECLARE_INSN(vcpop_v, MATCH_VCPOP_V, MASK_VCPOP_V) +DECLARE_INSN(vctz_v, MATCH_VCTZ_V, MASK_VCTZ_V) DECLARE_INSN(vdiv_vv, MATCH_VDIV_VV, MASK_VDIV_VV) DECLARE_INSN(vdiv_vx, MATCH_VDIV_VX, MASK_VDIV_VX) DECLARE_INSN(vdivu_vv, MATCH_VDIVU_VV, MASK_VDIVU_VV) @@ -4418,6 +4531,8 @@ DECLARE_INSN(vfwsub_vf, MATCH_VFWSUB_VF, MASK_VFWSUB_VF) DECLARE_INSN(vfwsub_vv, MATCH_VFWSUB_VV, MASK_VFWSUB_VV) DECLARE_INSN(vfwsub_wf, MATCH_VFWSUB_WF, MASK_VFWSUB_WF) DECLARE_INSN(vfwsub_wv, MATCH_VFWSUB_WV, MASK_VFWSUB_WV) +DECLARE_INSN(vghsh_vv, MATCH_VGHSH_VV, MASK_VGHSH_VV) +DECLARE_INSN(vgmul_vv, MATCH_VGMUL_VV, MASK_VGMUL_VV) DECLARE_INSN(vid_v, MATCH_VID_V, MASK_VID_V) DECLARE_INSN(viota_m, MATCH_VIOTA_M, MASK_VIOTA_M) DECLARE_INSN(vl1re16_v, MATCH_VL1RE16_V, MASK_VL1RE16_V) @@ -4591,10 +4706,16 @@ DECLARE_INSN(vrem_vv, MATCH_VREM_VV, MASK_VREM_VV) DECLARE_INSN(vrem_vx, MATCH_VREM_VX, MASK_VREM_VX) DECLARE_INSN(vremu_vv, MATCH_VREMU_VV, MASK_VREMU_VV) DECLARE_INSN(vremu_vx, MATCH_VREMU_VX, MASK_VREMU_VX) +DECLARE_INSN(vrev8_v, MATCH_VREV8_V, MASK_VREV8_V) DECLARE_INSN(vrgather_vi, MATCH_VRGATHER_VI, MASK_VRGATHER_VI) DECLARE_INSN(vrgather_vv, MATCH_VRGATHER_VV, MASK_VRGATHER_VV) DECLARE_INSN(vrgather_vx, MATCH_VRGATHER_VX, MASK_VRGATHER_VX) DECLARE_INSN(vrgatherei16_vv, MATCH_VRGATHEREI16_VV, MASK_VRGATHEREI16_VV) +DECLARE_INSN(vrol_vv, MATCH_VROL_VV, MASK_VROL_VV) +DECLARE_INSN(vrol_vx, MATCH_VROL_VX, MASK_VROL_VX) +DECLARE_INSN(vror_vi, MATCH_VROR_VI, MASK_VROR_VI) +DECLARE_INSN(vror_vv, MATCH_VROR_VV, MASK_VROR_VV) +DECLARE_INSN(vror_vx, MATCH_VROR_VX, MASK_VROR_VX) DECLARE_INSN(vrsub_vi, MATCH_VRSUB_VI, MASK_VRSUB_VI) DECLARE_INSN(vrsub_vx, MATCH_VRSUB_VX, MASK_VRSUB_VX) DECLARE_INSN(vs1r_v, MATCH_VS1R_V, MASK_VS1R_V) @@ -4623,6 +4744,9 @@ DECLARE_INSN(vsetvli, MATCH_VSETVLI, MASK_VSETVLI) DECLARE_INSN(vsext_vf2, MATCH_VSEXT_VF2, MASK_VSEXT_VF2) DECLARE_INSN(vsext_vf4, MATCH_VSEXT_VF4, MASK_VSEXT_VF4) DECLARE_INSN(vsext_vf8, MATCH_VSEXT_VF8, MASK_VSEXT_VF8) +DECLARE_INSN(vsha2ch_vv, MATCH_VSHA2CH_VV, MASK_VSHA2CH_VV) +DECLARE_INSN(vsha2cl_vv, MATCH_VSHA2CL_VV, MASK_VSHA2CL_VV) +DECLARE_INSN(vsha2ms_vv, MATCH_VSHA2MS_VV, MASK_VSHA2MS_VV) DECLARE_INSN(vslide1down_vx, MATCH_VSLIDE1DOWN_VX, MASK_VSLIDE1DOWN_VX) DECLARE_INSN(vslide1up_vx, MATCH_VSLIDE1UP_VX, MASK_VSLIDE1UP_VX) DECLARE_INSN(vslidedown_vi, MATCH_VSLIDEDOWN_VI, MASK_VSLIDEDOWN_VI) @@ -4632,6 +4756,11 @@ DECLARE_INSN(vslideup_vx, MATCH_VSLIDEUP_VX, MASK_VSLIDEUP_VX) DECLARE_INSN(vsll_vi, MATCH_VSLL_VI, MASK_VSLL_VI) DECLARE_INSN(vsll_vv, MATCH_VSLL_VV, MASK_VSLL_VV) DECLARE_INSN(vsll_vx, MATCH_VSLL_VX, MASK_VSLL_VX) +DECLARE_INSN(vsm3c_vi, MATCH_VSM3C_VI, MASK_VSM3C_VI) +DECLARE_INSN(vsm3me_vv, MATCH_VSM3ME_VV, MASK_VSM3ME_VV) +DECLARE_INSN(vsm4k_vi, MATCH_VSM4K_VI, MASK_VSM4K_VI) +DECLARE_INSN(vsm4r_vs, MATCH_VSM4R_VS, MASK_VSM4R_VS) +DECLARE_INSN(vsm4r_vv, MATCH_VSM4R_VV, MASK_VSM4R_VV) DECLARE_INSN(vsm_v, MATCH_VSM_V, MASK_VSM_V) DECLARE_INSN(vsmul_vv, MATCH_VSMUL_VV, MASK_VSMUL_VV) DECLARE_INSN(vsmul_vx, MATCH_VSMUL_VX, MASK_VSMUL_VX) @@ -4700,6 +4829,9 @@ DECLARE_INSN(vwmulu_vv, MATCH_VWMULU_VV, MASK_VWMULU_VV) DECLARE_INSN(vwmulu_vx, MATCH_VWMULU_VX, MASK_VWMULU_VX) DECLARE_INSN(vwredsum_vs, MATCH_VWREDSUM_VS, MASK_VWREDSUM_VS) DECLARE_INSN(vwredsumu_vs, MATCH_VWREDSUMU_VS, MASK_VWREDSUMU_VS) +DECLARE_INSN(vwsll_vi, MATCH_VWSLL_VI, MASK_VWSLL_VI) +DECLARE_INSN(vwsll_vv, MATCH_VWSLL_VV, MASK_VWSLL_VV) +DECLARE_INSN(vwsll_vx, MATCH_VWSLL_VX, MASK_VWSLL_VX) DECLARE_INSN(vwsub_vv, MATCH_VWSUB_VV, MASK_VWSUB_VV) DECLARE_INSN(vwsub_vx, MATCH_VWSUB_VX, MASK_VWSUB_VX) DECLARE_INSN(vwsub_wv, MATCH_VWSUB_WV, MASK_VWSUB_WV) From f71bda9637366c2bb06612cf03ed126c628df678 Mon Sep 17 00:00:00 2001 From: Gianluca Guida Date: Tue, 23 May 2023 13:47:07 +0100 Subject: [PATCH 031/127] Implement Zacas extension. --- disasm/disasm.cc | 6 ++++++ riscv/insns/amocas_d.h | 37 +++++++++++++++++++++++++++++++++++++ riscv/insns/amocas_q.h | 34 ++++++++++++++++++++++++++++++++++ riscv/insns/amocas_w.h | 2 ++ riscv/isa_parser.cc | 6 ++++++ riscv/isa_parser.h | 1 + riscv/mmu.h | 11 +++++++++++ riscv/processor.cc | 5 +++++ riscv/riscv.mk.in | 6 ++++++ 9 files changed, 108 insertions(+) create mode 100644 riscv/insns/amocas_d.h create mode 100644 riscv/insns/amocas_q.h create mode 100644 riscv/insns/amocas_w.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 8722cdb977..940fa66c16 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -815,6 +815,12 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DEFINE_XAMO(sc_d) } + if (isa->extension_enabled(EXT_ZACAS)) { + DEFINE_XAMO(amocas_w) + DEFINE_XAMO(amocas_d) + DEFINE_XAMO(amocas_q) + } + add_insn(new disasm_insn_t("j", match_jal, mask_jal | mask_rd, {&jump_target})); add_insn(new disasm_insn_t("jal", match_jal | match_rd_ra, mask_jal | mask_rd, {&jump_target})); add_insn(new disasm_insn_t("jal", match_jal, mask_jal, {&xrd, &jump_target})); diff --git a/riscv/insns/amocas_d.h b/riscv/insns/amocas_d.h new file mode 100644 index 0000000000..e002e6ab75 --- /dev/null +++ b/riscv/insns/amocas_d.h @@ -0,0 +1,37 @@ +require_extension(EXT_ZACAS); + +if (xlen == 32) { + // RV32: the spec defines two 32-bit comparisons. Since we're + // loading 64-bit for memory we have to adjust for endianness. + uint64_t comp, swap, res; + + require_align(insn.rd(), 2); + require_align(insn.rs2(), 2); + if (insn.rd() == 0) { + comp = 0; + } else if (MMU.is_target_big_endian()) { + comp = (uint32_t)READ_REG(insn.rd() + 1) | (RD << 32); + } else { + comp = (uint32_t)RD | (READ_REG(insn.rd() + 1) << 32); + } + if (insn.rs2() == 0) { + swap = 0; + } else if (MMU.is_target_big_endian()) { + swap = (uint32_t)READ_REG(insn.rs2() + 1) | (RS2 << 32); + } else { + swap = (uint32_t)RS2 | (READ_REG(insn.rs2() + 1) << 32); + } + res = MMU.amo_compare_and_swap(RS1, comp, swap); + if (insn.rd() != 0) { + if (MMU.is_target_big_endian()) { + WRITE_REG(insn.rd() + 1, sext32((uint32_t)res)); + WRITE_REG(insn.rd(), sext32(res >> 32)); + } else { + WRITE_REG(insn.rd(), sext32((uint32_t)res)); + WRITE_REG(insn.rd() + 1, sext32(res >> 32)); + } + } + } else { + // RV64 + WRITE_RD(MMU.amo_compare_and_swap(RS1, RD, RS2)); +} diff --git a/riscv/insns/amocas_q.h b/riscv/insns/amocas_q.h new file mode 100644 index 0000000000..0b7593b3dc --- /dev/null +++ b/riscv/insns/amocas_q.h @@ -0,0 +1,34 @@ +require_extension(EXT_ZACAS); +require_rv64; +require_align(insn.rd(), 2); +require_align(insn.rs2(), 2); + +// The spec defines two 64-bit comparisons. Since we're loading +// 128-bit for memory we have to adjust for endianness. + +uint128_t comp, swap, res; + +if (insn.rd() == 0) { + comp = 0; +} else if (MMU.is_target_big_endian()) { + comp = READ_REG(insn.rd() + 1) | ((uint128_t)RD << 64); +} else { + comp = RD | ((uint128_t)READ_REG(insn.rd() + 1) << 64); +} +if (insn.rs2() == 0) { + swap = 0; +} else if (MMU.is_target_big_endian()) { + swap = READ_REG(insn.rs2() + 1) | ((uint128_t)RS2 << 64); +} else { + swap = RS2 | ((uint128_t)READ_REG(insn.rs2() + 1) << 64); +} +res = MMU.amo_compare_and_swap(RS1, comp, swap); +if (insn.rd() != 0) { + if (MMU.is_target_big_endian()) { + WRITE_REG(insn.rd(), res >> 64); + WRITE_REG(insn.rd() + 1, res); + } else { + WRITE_REG(insn.rd(), res); + WRITE_REG(insn.rd() + 1, res >> 64); + } +} diff --git a/riscv/insns/amocas_w.h b/riscv/insns/amocas_w.h new file mode 100644 index 0000000000..a78c21cb73 --- /dev/null +++ b/riscv/insns/amocas_w.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZACAS); +WRITE_RD(sext32(MMU.amo_compare_and_swap(RS1, RD, RS2))); diff --git a/riscv/isa_parser.cc b/riscv/isa_parser.cc index 8bb8c495b3..1c4300c958 100644 --- a/riscv/isa_parser.cc +++ b/riscv/isa_parser.cc @@ -120,6 +120,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) // HINTs encoded in base-ISA instructions are always present. } else if (ext_str == "zihintntl") { // HINTs encoded in base-ISA instructions are always present. + } else if (ext_str == "zacas") { + extension_table[EXT_ZACAS] = true; } else if (ext_str == "zmmul") { extension_table[EXT_ZMMUL] = true; } else if (ext_str == "zba") { @@ -301,6 +303,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) bad_isa_string(str, "'Zcf/Zcd/Zcb/Zcmp/Zcmt' extensions require 'Zca' extension"); } + if (extension_table[EXT_ZACAS] && !extension_table['A']) { + bad_isa_string(str, "'Zacas' extension requires 'A' extension"); + } + std::string lowercase = strtolower(priv); bool user = false, supervisor = false; diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 4e6856195c..3cbee7dea0 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -61,6 +61,7 @@ typedef enum { EXT_ZVFBFMIN, EXT_ZVFBFWMA, EXT_SSTC, + EXT_ZACAS, EXT_INTERNAL_ZFH_MOVE, NUM_ISA_EXTENSIONS } isa_extension_t; diff --git a/riscv/mmu.h b/riscv/mmu.h index efc6e9de14..46c54ce88a 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -187,6 +187,17 @@ class mmu_t }) } + template + T amo_compare_and_swap(reg_t addr, T comp, T swap) { + convert_load_traps_to_store_traps({ + store_slow_path(addr, sizeof(T), nullptr, {false, false, false}, false, true); + auto lhs = load(addr); + if (lhs == comp) + store(addr, swap); + return lhs; + }) + } + void store_float128(reg_t addr, float128_t val) { if (unlikely(addr & (sizeof(float128_t)-1)) && !is_misaligned_enabled()) { diff --git a/riscv/processor.cc b/riscv/processor.cc index a75b0ff6f1..1d5675a51a 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -47,6 +47,11 @@ processor_t::processor_t(const isa_parser_t *isa, const cfg_t *cfg, fprintf(stderr, "V extension is not supported on platforms without __int128 type\n"); abort(); } + + if (isa->extension_enabled(EXT_ZACAS) && isa->get_max_xlen() == 64) { + fprintf(stderr, "Zacas extension is not supported on 64-bit platforms without __int128 type\n"); + abort(); + } #endif parse_varch_string(cfg->varch()); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index db63290205..6472982ed5 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1335,6 +1335,11 @@ riscv_insn_ext_bf16 = \ $(riscv_insn_ext_zvfbfmin) \ $(riscv_insn_ext_zvfbfwma) \ +riscv_insn_ext_zacas = \ + amocas_w \ + amocas_d \ + $(if $(HAVE_INT128),amocas_q) + riscv_insn_list = \ $(riscv_insn_ext_a) \ $(riscv_insn_ext_c) \ @@ -1360,6 +1365,7 @@ riscv_insn_list = \ $(riscv_insn_ext_cmo) \ $(riscv_insn_ext_zicond) \ $(riscv_insn_ext_bf16) \ + $(riscv_insn_ext_zacas) \ riscv_gen_srcs = $(addsuffix .cc,$(riscv_insn_list)) From d6d919ee5b061add58b159b6551235241744c91f Mon Sep 17 00:00:00 2001 From: Philipp Tomsich Date: Sun, 18 Jun 2023 22:50:46 +0200 Subject: [PATCH 032/127] Add Zicond to disassembler --- disasm/disasm.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 8722cdb977..783479981f 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2154,6 +2154,11 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DISASM_INSN("cbo.zero", cbo_zero, 0, {&base_only_address}); } + if (isa->extension_enabled(EXT_ZICOND)) { + DEFINE_RTYPE(czero_eqz); + DEFINE_RTYPE(czero_nez); + } + if (isa->extension_enabled(EXT_ZKND) || isa->extension_enabled(EXT_ZKNE)) { DISASM_INSN("aes64ks1i", aes64ks1i, 0, {&xrd, &xrs1, &rcon}); From 377fb0a11b8ccc28f7d1687523b7d79403e26453 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Wed, 31 May 2023 13:57:31 -0700 Subject: [PATCH 033/127] List extensions alphabetically in riscv_insn_list The previous order lacks any obvious logic. Alphabetical order, while making it difficult to create interesting groupings, makes it easy to find which extensions are compiled in. Signed-off-by: Eric Gouriou --- riscv/riscv.mk.in | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 6472982ed5..3b493a06c8 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1341,31 +1341,31 @@ riscv_insn_ext_zacas = \ $(if $(HAVE_INT128),amocas_q) riscv_insn_list = \ + $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ $(riscv_insn_ext_a) \ + $(riscv_insn_ext_b) \ + $(riscv_insn_ext_bf16) \ $(riscv_insn_ext_c) \ - $(riscv_insn_ext_i) \ - $(riscv_insn_ext_m) \ - $(riscv_insn_ext_f) \ - $(riscv_insn_ext_f_zfa) \ + $(riscv_insn_ext_cmo) \ $(riscv_insn_ext_d) \ $(riscv_insn_ext_d_zfa) \ - $(riscv_insn_ext_zfh) \ - $(riscv_insn_ext_zfh_zfa) \ + $(riscv_insn_ext_f) \ + $(riscv_insn_ext_f_zfa) \ + $(riscv_insn_ext_h) \ + $(riscv_insn_ext_i) \ + $(riscv_insn_ext_k) \ + $(riscv_insn_ext_m) \ + $(riscv_insn_ext_p) \ $(riscv_insn_ext_q) \ $(riscv_insn_ext_q_zfa) \ - $(riscv_insn_ext_b) \ - $(riscv_insn_ext_k) \ - $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ + $(riscv_insn_ext_zacas) \ $(riscv_insn_ext_zce) \ - $(riscv_insn_ext_h) \ - $(riscv_insn_ext_p) \ + $(riscv_insn_ext_zfh) \ + $(riscv_insn_ext_zfh_zfa) \ + $(riscv_insn_ext_zicond) \ $(riscv_insn_priv) \ - $(riscv_insn_svinval) \ $(riscv_insn_smrnmi) \ - $(riscv_insn_ext_cmo) \ - $(riscv_insn_ext_zicond) \ - $(riscv_insn_ext_bf16) \ - $(riscv_insn_ext_zacas) \ + $(riscv_insn_svinval) \ riscv_gen_srcs = $(addsuffix .cc,$(riscv_insn_list)) From 1e5a71f99b3432ba9fb543995a466c2d96e96cec Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 1 Jun 2023 18:04:31 -0700 Subject: [PATCH 034/127] Zvk: extensions parsing Zvk is the short name for the Vector Cryptography Instruction Set Extension Specification being defined at . This commit adds support for parsing/enabling the Zvk extensions (Zvbb, Zvbc, Zvkg, Zvkned, Zvknha, Zvknhb, Zvksed, Zvksh, Zvkt) and the combo extensions (Zvkn, Zvknc, Zvkng, Zvks, Zvksc, Zvksg). This is an early commit in a series implementing Zvk. No instructions are actually defined here, only infastructure that will support the coming extensions. The encodings for Zvk instructions have some conflicts with Zpn encodings. This commit marks those Zpn instructions as overlapping, and adds checks to error out if conflicting extensions are enabled. Signed-off-by: Eric Gouriou --- riscv/isa_parser.cc | 57 +++++++++++++++++++++++++++++++++++++++++++- riscv/isa_parser.h | 16 +++++++++++++ riscv/overlap_list.h | 9 +++++++ 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/riscv/isa_parser.cc b/riscv/isa_parser.cc index 1c4300c958..6fb29aeb5a 100644 --- a/riscv/isa_parser.cc +++ b/riscv/isa_parser.cc @@ -236,10 +236,55 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZICOND] = true; } else if (ext_str == "zihpm") { extension_table[EXT_ZIHPM] = true; + } else if (ext_str == "zvbb") { + extension_table[EXT_ZVBB] = true; + } else if (ext_str == "zvbc") { + extension_table[EXT_ZVBC] = true; } else if (ext_str == "zvfbfmin") { extension_table[EXT_ZVFBFMIN] = true; } else if (ext_str == "zvfbfwma") { extension_table[EXT_ZVFBFWMA] = true; + } else if (ext_str == "zvkg") { + extension_table[EXT_ZVKG] = true; + } else if (ext_str == "zvkn") { + extension_table[EXT_ZVBB] = true; + extension_table[EXT_ZVKNED] = true; + extension_table[EXT_ZVKNHB] = true; + } else if (ext_str == "zvknc") { + extension_table[EXT_ZVBB] = true; + extension_table[EXT_ZVBC] = true; + extension_table[EXT_ZVKNED] = true; + extension_table[EXT_ZVKNHB] = true; + } else if (ext_str == "zvkng") { + extension_table[EXT_ZVBB] = true; + extension_table[EXT_ZVKG] = true; + extension_table[EXT_ZVKNED] = true; + extension_table[EXT_ZVKNHB] = true; + } else if (ext_str == "zvkned") { + extension_table[EXT_ZVKNED] = true; + } else if (ext_str == "zvknha") { + extension_table[EXT_ZVKNHA] = true; + } else if (ext_str == "zvknhb") { + extension_table[EXT_ZVKNHB] = true; + } else if (ext_str == "zvks") { + extension_table[EXT_ZVBB] = true; + extension_table[EXT_ZVKSED] = true; + extension_table[EXT_ZVKSH] = true; + } else if (ext_str == "zvksc") { + extension_table[EXT_ZVBB] = true; + extension_table[EXT_ZVBC] = true; + extension_table[EXT_ZVKSED] = true; + extension_table[EXT_ZVKSH] = true; + } else if (ext_str == "zvksg") { + extension_table[EXT_ZVBB] = true; + extension_table[EXT_ZVKG] = true; + extension_table[EXT_ZVKSED] = true; + extension_table[EXT_ZVKSH] = true; + } else if (ext_str == "zvksed") { + extension_table[EXT_ZVKSED] = true; + } else if (ext_str == "zvksh") { + extension_table[EXT_ZVKSH] = true; + } else if (ext_str == "zvkt") { } else if (ext_str == "sstc") { extension_table[EXT_SSTC] = true; } else if (ext_str[0] == 'x') { @@ -295,7 +340,7 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) } if ((extension_table[EXT_ZCMP] || extension_table[EXT_ZCMT]) && extension_table[EXT_ZCD]) { - bad_isa_string(str, "Zcmp' and 'Zcmt' exensions are incompatible with 'Zcd' extension"); + bad_isa_string(str, "Zcmp' and 'Zcmt' extensions are incompatible with 'Zcd' extension"); } if ((extension_table[EXT_ZCF] || extension_table[EXT_ZCD] || extension_table[EXT_ZCB] || @@ -307,6 +352,16 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) bad_isa_string(str, "'Zacas' extension requires 'A' extension"); } + // Zpn conflicts with Zvknha/Zvknhb in both rv32 and rv64 + if (extension_table[EXT_ZPN] && (extension_table[EXT_ZVKNHA] || extension_table[EXT_ZVKNHB])) { + bad_isa_string(str, "'Zvkna' and 'Zvknhb' extensions are incompatible with 'Zpn' extension"); + } + // In rv64 only, Zpn (rv64_zpn) conflicts with Zvkg/Zvkned/Zvksh + if (max_xlen == 64 && extension_table[EXT_ZPN] && + (extension_table[EXT_ZVKG] || extension_table[EXT_ZVKNED] || extension_table[EXT_ZVKSH])) { + bad_isa_string(str, "'Zvkg', 'Zvkned', and 'Zvksh' extensions are incompatible with 'Zpn' extension in rv64"); + } + std::string lowercase = strtolower(priv); bool user = false, supervisor = false; diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 3cbee7dea0..5b04347520 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -58,8 +58,24 @@ typedef enum { EXT_ZICNTR, EXT_ZICOND, EXT_ZIHPM, + EXT_ZVBB, + EXT_ZVBC, EXT_ZVFBFMIN, EXT_ZVFBFWMA, + EXT_ZVKG, + EXT_ZVKNED, + EXT_ZVKNHA, + EXT_ZVKNHB, + EXT_ZVKSED, + EXT_ZVKSH, + EXT_XZBP, + EXT_XZBS, + EXT_XZBE, + EXT_XZBF, + EXT_XZBC, + EXT_XZBM, + EXT_XZBR, + EXT_XZBT, EXT_SSTC, EXT_ZACAS, EXT_INTERNAL_ZFH_MOVE, diff --git a/riscv/overlap_list.h b/riscv/overlap_list.h index a30c770e60..2214be4a58 100644 --- a/riscv/overlap_list.h +++ b/riscv/overlap_list.h @@ -12,3 +12,12 @@ DECLARE_OVERLAP_INSN(c_fsd, EXT_ZCD) DECLARE_OVERLAP_INSN(c_ebreak, EXT_ZCA) DECLARE_OVERLAP_INSN(c_jalr, EXT_ZCA) DECLARE_OVERLAP_INSN(c_jr, EXT_ZCA) +DECLARE_OVERLAP_INSN(vaesdf_vv, EXT_ZVKNED) +DECLARE_OVERLAP_INSN(vghsh_vv, EXT_ZVKG) +DECLARE_OVERLAP_INSN(vsha2ms_vv, EXT_ZVKNHA) +DECLARE_OVERLAP_INSN(vsha2ms_vv, EXT_ZVKNHB) +DECLARE_OVERLAP_INSN(vsm3me_vv, EXT_ZVKSH) +DECLARE_OVERLAP_INSN(rstsa16, EXT_ZPN) +DECLARE_OVERLAP_INSN(rstsa32, EXT_ZPN) +DECLARE_OVERLAP_INSN(srli32_u, EXT_ZPN) +DECLARE_OVERLAP_INSN(umax32, EXT_ZPN) From d5c0339484323b5a9498576d70ec90eab2e13438 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Sun, 18 Jun 2023 17:10:53 -0700 Subject: [PATCH 035/127] Zvk: Infrastructure for Zvk extensions, element group handling Introduce types and macros useful across multiple Zvk sub-extensions, including Zvbb and Zvbc. Those will be used by upcoming per-sub-extension commits. In particular we introduce "Element Group" types and loop macros handling those element groups. The concept of element group is described in . Note that the element group access method is not implemented for WORDS_BIGENDIAN setup. As such, isa_parser.cc is modified to emit an error when WORDS_BIGENDIAN is defined and extensions using element groups are enabled. Signed-off-by: Eric Gouriou --- riscv/arith.h | 21 + riscv/isa_parser.cc | 10 +- riscv/v_ext_macros.h | 22 + riscv/vector_unit.cc | 55 +++ riscv/vector_unit.h | 19 +- riscv/zvk_ext_macros.h | 1023 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1148 insertions(+), 2 deletions(-) create mode 100644 riscv/zvk_ext_macros.h diff --git a/riscv/arith.h b/riscv/arith.h index 3b807e9698..20b15047f7 100644 --- a/riscv/arith.h +++ b/riscv/arith.h @@ -7,6 +7,7 @@ #include #include #include +#include inline uint64_t mulhu(uint64_t a, uint64_t b) { @@ -221,4 +222,24 @@ static inline uint64_t xperm(uint64_t rs1, uint64_t rs2, size_t sz_log2, size_t return r; } +// Rotates right an unsigned integer by the given number of bits. +template +static inline T rotate_right(T x, std::size_t shiftamt) { + static_assert(std::is_unsigned::value); + static constexpr T mask = (8 * sizeof(T)) - 1; + const std::size_t rshift = shiftamt & mask; + const std::size_t lshift = (-rshift) & mask; + return (x << lshift) | (x >> rshift); +} + +// Rotates right an unsigned integer by the given number of bits. +template +static inline T rotate_left(T x, std::size_t shiftamt) { + static_assert(std::is_unsigned::value); + static constexpr T mask = (8 * sizeof(T)) - 1; + const std::size_t lshift = shiftamt & mask; + const std::size_t rshift = (-lshift) & mask; + return (x << lshift) | (x >> rshift); +} + #endif diff --git a/riscv/isa_parser.cc b/riscv/isa_parser.cc index 6fb29aeb5a..59472a43f0 100644 --- a/riscv/isa_parser.cc +++ b/riscv/isa_parser.cc @@ -361,7 +361,15 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) (extension_table[EXT_ZVKG] || extension_table[EXT_ZVKNED] || extension_table[EXT_ZVKSH])) { bad_isa_string(str, "'Zvkg', 'Zvkned', and 'Zvksh' extensions are incompatible with 'Zpn' extension in rv64"); } - +#ifdef WORDS_BIGENDIAN + // Access to the vector registers as element groups is unimplemented on big-endian setups. + if (extension_table[EXT_ZVKG] || extension_table[EXT_ZVKNHA] || extension_table[EXT_ZVKNHB] || + extension_table[EXT_ZVKSED] || extension_table[EXT_ZVKSH]) { + bad_isa_string(str, + "'Zvkg', 'Zvkned', 'Zvknha', 'Zvknhb', 'Zvksed', and 'Zvksh' " + "extensions are incompatible with WORDS_BIGENDIAN setups."); + } +#endif std::string lowercase = strtolower(priv); bool user = false, supervisor = false; diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index 41256c7a59..908ff16c28 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -325,6 +325,10 @@ static inline bool is_overlapped_widen(const int astart, int asize, type_usew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); +#define V_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + #define VX_U_PARAMS(x) \ type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ type_usew_t::type rs1 = (type_usew_t::type)RS1; \ @@ -693,6 +697,24 @@ static inline bool is_overlapped_widen(const int astart, int asize, } \ VI_LOOP_END +#define VI_V_ULOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8) { \ + V_U_PARAMS(e8); \ + BODY; \ + } else if (sew == e16) { \ + V_U_PARAMS(e16); \ + BODY; \ + } else if (sew == e32) { \ + V_U_PARAMS(e32); \ + BODY; \ + } else if (sew == e64) { \ + V_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + #define VI_VX_ULOOP(BODY) \ VI_CHECK_SSS(false) \ VI_LOOP_BASE \ diff --git a/riscv/vector_unit.cc b/riscv/vector_unit.cc index 9128df63ee..08adc6166d 100644 --- a/riscv/vector_unit.cc +++ b/riscv/vector_unit.cc @@ -86,6 +86,56 @@ template T& vectorUnit_t::elt(reg_t vReg, reg_t n, bool UNUSED is_write return regStart[n]; } +// The logic differences between 'elt()' and 'elt_group()' come from +// the fact that, while 'elt()' requires that the element is fully +// contained in a single vector register, the element group may span +// multiple registers in a single register group (LMUL>1). +// +// Notes: +// - We do NOT check that a single element - i.e., the T in the element +// group type std::array - fits within a single register, or that +// T is smaller or equal to VSEW. Implementations of the instructions +// sometimes use a different T than what the specification suggests. +// Instructon implementations should 'require()' what the specification +// dictates. +// - We do NOT check that 'vReg' is a valid register group, or that +// 'n+1' element groups fit in the register group 'vReg'. It is +// the responsibility of the caller to validate those preconditions. +template EG& +vectorUnit_t::elt_group(reg_t vReg, reg_t n, bool UNUSED is_write) { +#ifdef WORDS_BIGENDIAN + fputs("vectorUnit_t::elt_group is not compatible with WORDS_BIGENDIAN setup.\n", + stderr); + abort(); +#endif + using T = typename EG::value_type; + constexpr std::size_t N = std::tuple_size::value; + assert(N > 0); + + assert(vsew != 0); + constexpr reg_t elt_group_size = N * sizeof(T); + const reg_t reg_group_size = (VLEN >> 3) * vflmul; + assert(((n + 1) * elt_group_size) <= reg_group_size); + + const reg_t start_byte = n * elt_group_size; + const reg_t bytes_per_reg = VLEN >> 3; + + // Inclusive first/last register indices. + const reg_t reg_first = vReg + start_byte / bytes_per_reg; + const reg_t reg_last = vReg + (start_byte + elt_group_size - 1) / bytes_per_reg; + + // Element groups per register groups + for (reg_t vidx = reg_first; vidx <= reg_last; ++vidx) { + reg_referenced[vidx] = 1; + + if (unlikely(p->get_log_commits_enabled() && is_write)) { + p->get_state()->log_reg_write[(vidx << 4) | 2] = {0, 0}; + } + } + + return *(EG*)((char*)reg_file + vReg * (VLEN >> 3) + start_byte); +} + template signed char& vectorUnit_t::elt(reg_t, reg_t, bool); template short& vectorUnit_t::elt(reg_t, reg_t, bool); template int& vectorUnit_t::elt(reg_t, reg_t, bool); @@ -98,3 +148,8 @@ template uint64_t& vectorUnit_t::elt(reg_t, reg_t, bool); template float16_t& vectorUnit_t::elt(reg_t, reg_t, bool); template float32_t& vectorUnit_t::elt(reg_t, reg_t, bool); template float64_t& vectorUnit_t::elt(reg_t, reg_t, bool); + +template EGU32x4_t& vectorUnit_t::elt_group(reg_t, reg_t, bool); +template EGU32x8_t& vectorUnit_t::elt_group(reg_t, reg_t, bool); +template EGU64x4_t& vectorUnit_t::elt_group(reg_t, reg_t, bool); +template EGU8x16_t& vectorUnit_t::elt_group(reg_t, reg_t, bool); diff --git a/riscv/vector_unit.h b/riscv/vector_unit.h index b9f706c53a..a057c62fbe 100644 --- a/riscv/vector_unit.h +++ b/riscv/vector_unit.h @@ -2,6 +2,9 @@ #ifndef _RISCV_VECTOR_UNIT_H #define _RISCV_VECTOR_UNIT_H +#include +#include + #include "decode.h" #include "csrs.h" @@ -69,6 +72,17 @@ struct type_sew_t<64> using type=int64_t; }; +// Element Group of 4 32 bits elements (128b total). +using EGU32x4_t = std::array; + +// Element Group of 8 32 bits elements (256b total). +using EGU32x8_t = std::array; + +// Element Group of 4 64 bits elements (256b total). +using EGU64x4_t = std::array; + +// Element Group of 16 8 bits elements (128b total). +using EGU8x16_t = std::array; class vectorUnit_t { @@ -88,8 +102,11 @@ class vectorUnit_t bool vill; bool vstart_alu; - // vector element for varies SEW + // vector element for various SEW template T& elt(reg_t vReg, reg_t n, bool is_write = false); + // vector element group access, where EG is a std::array. + template EG& + elt_group(reg_t vReg, reg_t n, bool is_write = false); public: diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h new file mode 100644 index 0000000000..7efbac806f --- /dev/null +++ b/riscv/zvk_ext_macros.h @@ -0,0 +1,1023 @@ +// Helper macros to help implement instructions defined as part of +// the RISC-V Zvk extension (vector cryptography). + +// Note that a good deal of code here would be cleaner/simpler +// if exposed as C++ functions (including templated ones), however +// this is not possible in the contexts where those headers are +// included. + +#ifndef RISCV_ZVK_EXT_MACROS_H_ +#define RISCV_ZVK_EXT_MACROS_H_ + +// +// Predicate Macros +// + +// Ensures that the ZVBB extension (vector crypto bitmanip) is present, +// and the vector unit is enabled and in a valid state. +#define require_zvbb \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVBB); \ + } while (0) + +// Ensures that the ZVBC extension (vector carryless multiplication) +// is present, and the vector unit is enabled and in a valid state. +#define require_zvbc \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVBC); \ + } while (0) + +// Ensures that the ZVKG extension (vector Gallois Field Multiplication) +// is present, and the vector unit is enabled and in a valid state. +#define require_zvkg \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVKG); \ + } while (0) + +// Ensures that a ZVK extension supporting SHA-256 is present. +// For SHA-256, this support is present in either Zvknha or Zvknhb. +// Also ensures that the vector unit is enabled and in a valid state. +#define require_zvknh_256 \ + do { \ + require_vector(true); \ + require_either_extension(EXT_ZVKNHA, EXT_ZVKNHB); \ + } while (0) + +// Ensures that the ZVKNED extension (vector AES single round) is present, +// and the vector unit is enabled and in a valid state. +#define require_zvkned \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVKNED); \ + } while (0) + +// Ensures that a ZVK extension supporting SHA-512 is present. +// For SHA-512, this support is only present in Zvknhb. +// Also ensures that the vector unit is enabled and in a valid state. +#define require_zvknh_512 \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVKNHB); \ + } while (0) + +// Ensures that the ZVKSED extension (vector SM4 block cipher) +// is present, and the vector unit is enabled and in a valid state. +#define require_zvksed \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVKSED); \ + } while (0) + +// Ensures that the ZVKSH extension (vector SM3 hash) is present, +// and the vector unit is enabled and in a valid state. +#define require_zvksh \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVKSH); \ + } while (0) + +// Ensures that the vector instruction is not using a mask. +#define require_no_vmask require(insn.v_vm() == 1) + +// Ensures that an element group can fit in a register group. That is, +// (LMUL * VLEN) <= EGW +#define require_egw_fits(EGW) require((EGW) <= (P.VU.VLEN * P.VU.vflmul)) + +// Checks that the vector unit state (vtype and vl) can be interpreted +// as element groups with EEW=32, EGS=4 (four 32-bits elements per group), +// for an effective element group width of EGW=128 bits. +// +// Per the vector crypto specification, SEW is ignored. 'vl' and 'vstart' +// are interpreted as a number of EEW-wide elements. They must both +// be multiples of EGS (potentially 0). +#define require_element_groups_32x4 \ + do { \ + /* 'vstart' must be a multiple of EGS */ \ + const reg_t vstart = P.VU.vstart->read(); \ + require(vstart % 4 == 0); \ + /* 'vl' must be a multiple of EGS */ \ + const reg_t vl = P.VU.vl->read(); \ + require(vl % 4 == 0); \ + } while (0) + +// Checks that the vector unit state (vtype and vl) can be interpreted +// as element groups with EEW=32, EGS=8 (eight 32-bits elements per group), +// for an effective element group width of EGW=256 bits. +// +// Per the vector crypto specification, SEW is ignored. 'vl' and 'vstart' +// are interpreted as a number of EEW-wide elements. They must both +// be multiples of EGS (potentially 0). +#define require_element_groups_32x8 \ + do { \ + /* 'vstart' must be a multiple of EGS */ \ + const reg_t vstart = P.VU.vstart->read(); \ + require(vstart % 8 == 0); \ + /* 'vl' must be a multiple of EGS */ \ + const reg_t vl = P.VU.vl->read(); \ + require(vl % 8 == 0); \ + } while (0) + +// Checks that the vector unit state (vtype and vl) can be interpreted +// as element groups with EEW=64, EGS=4 (four 64-bits elements per group), +// for an effective element group width of EGW=128 bits. +// +// Per the vector crypto specification, SEW is ignored. 'vl' and 'vstart' +// are interpreted as a number of EEW-wide elements. They must both +// be multiples of EGS (potentially 0). +#define require_element_groups_64x4 \ + do { \ + /* 'vstart' must be a multiple of EGS */ \ + const reg_t vstart = P.VU.vstart->read(); \ + require(vstart % 4 == 0); \ + /* 'vl' must be a multiple of EGS */ \ + const reg_t vl = P.VU.vl->read(); \ + require(vl % 4 == 0); \ + } while (0) + +// +// Loop Parameters Macros +// + +// Extracts a 32b*4 element group as a EGU32x4_t variables at the given +// element group index, from register arguments 'vd' (by reference, mutable), +// 'vs1' and 'vs2' (constant, by value). +#define VV_VD_VS1_VS2_EGU32x4_PARAMS(VD_NUM, VS1_NUM, VS2_NUM, EG_IDX) \ + EGU32x4_t &vd = P.VU.elt_group((VD_NUM), (EG_IDX), true); \ + const EGU32x4_t vs1 = P.VU.elt_group((VS1_NUM), (EG_IDX)); \ + const EGU32x4_t vs2 = P.VU.elt_group((VS2_NUM), (EG_IDX)) + +// Extracts a 32b*8 element group as a EGU32x8_t variables at the given +// element group index, from register arguments 'vd' (by reference, mutable), +// 'vs1' and 'vs2' (constant, by value). +#define VV_VD_VS1_VS2_EGU32x8_PARAMS(VD_NUM, VS1_NUM, VS2_NUM, EG_IDX) \ + EGU32x8_t &vd = P.VU.elt_group((VD_NUM), (EG_IDX), true); \ + const EGU32x8_t vs1 = P.VU.elt_group((VS1_NUM), (EG_IDX)); \ + const EGU32x8_t vs2 = P.VU.elt_group((VS2_NUM), (EG_IDX)) + +// Extracts a 32b*4 element group as a EGU32x4_t variables at the given +// element group index, from register arguments 'vd' (by reference, mutable), +// and 'vs2' (constant, by value). +#define VV_VD_VS2_EGU32x4_PARAMS(VD_NUM, VS2_NUM, EG_IDX) \ + EGU32x4_t &vd = P.VU.elt_group((VD_NUM), (EG_IDX), true); \ + const EGU32x4_t vs2 = P.VU.elt_group((VS2_NUM), (EG_IDX)) + +// Extracts a 32b*8 element group as a EGU32x8_t variables at the given +// element group index, from register arguments 'vd' (by reference, mutable), +// and 'vs2' (constant, by value). +#define VV_VD_VS2_EGU32x8_PARAMS(VD_NUM, VS2_NUM, EG_IDX) \ + EGU32x8_t &vd = P.VU.elt_group((VD_NUM), (EG_IDX), true); \ + const EGU32x8_t vs2 = P.VU.elt_group((VS2_NUM), (EG_IDX)) + +// Extracts a 64b*4 element group as a EGU64x4_t variables at the given +// element group index, from register arguments 'vd' (by reference, mutable), +// 'vs1' and 'vs2' (constant, by value). +#define VV_VD_VS1_VS2_EGU64x4_PARAMS(VD_NUM, VS1_NUM, VS2_NUM, EG_IDX) \ + EGU64x4_t &vd = P.VU.elt_group((VD_NUM), (EG_IDX), true); \ + const EGU64x4_t vs1 = P.VU.elt_group((VS1_NUM), (EG_IDX)); \ + const EGU64x4_t vs2 = P.VU.elt_group((VS2_NUM), (EG_IDX)) + +// Extracts elements from the vector register groups 'vd', 'vs2', and 'vs1', +// as part of a widening operation where 'vd' has EEW = 2 * SEW. +// Defines +// - 'vd_w', unsigned, 2 * SEW width, by reference, mutable. +// - 'vs2', unsigned, SEW width, by value, constant. +// - 'vs2_w', unsigned, 2 * SEW width, by value, constant, +// a widened copy of 'vs2'. +// - 'vs1', unsigned, SEW width, by value, constant. +#define VI_ZVK_VV_WIDENING_U_PARAMS(SEW) \ + auto &vd_w = P.VU.elt::type>(rd_num, i, true); \ + const auto vs2 = P.VU.elt::type>(rs2_num, i); \ + const type_usew_t<2 * SEW>::type vs2_w = vs2; \ + const auto vs1 = P.VU.elt::type>(rs1_num, i); \ + +// Extracts elements from the vector register groups 'vd', 'vs2', +// and the scalar register 'rs1', as part of a widening operation where +// 'vd' has EEW = 2 * SEW. +// Defines +// - 'vd_w', unsigned, 2 * SEW width, by reference, mutable. +// - 'vs2', unsigned, SEW width, by value, constant. +// - 'vs2_w', unsigned, 2 * SEW width, by value, constant, +// a widened copy of 'vs2'. +// - 'rs1', unsigned, SEW width, by value, constant. +#define VI_ZVK_VX_WIDENING_U_PARAMS(SEW) \ + auto &vd_w = P.VU.elt::type>(rd_num, i, true); \ + const auto vs2 = P.VU.elt::type>(rs2_num, i); \ + const type_usew_t<2 * SEW>::type vs2_w = vs2; \ + const auto rs1 = (type_usew_t::type)RS1; \ + +// Extracts elements from the vector register groups 'vd', 'vs2', +// and the 5-bit immediate field 'zimm5', as part of a widening operation +// where 'vd' has EEW = 2 * SEW. +// Defines +// - 'vd_w', unsigned, 2 * SEW width, by reference, mutable. +// - 'vs2', unsigned, SEW width, by value, constant. +// - 'vs2_w', unsigned, 2 * SEW width, by value, constant, +// a widened copy of 'vs2'. +// - 'zimm5', unsigned, SEW width, by value, constant. +#define VI_ZVK_VI_WIDENING_U_PARAMS(SEW) \ + auto &vd_w = P.VU.elt::type>(rd_num, i, true); \ + const auto vs2 = P.VU.elt::type>(rs2_num, i); \ + const type_usew_t<2 * SEW>::type vs2_w = vs2; \ + const auto zimm5 = (type_usew_t::type)insn.v_zimm5(); \ + +// +// Loop Macros +// + +// NOTES: +// - Each of the element-group loop macros DO contain an invocation +// of the corresponding 'require_element_groups_x<#elements>;', +// because the macro correctness requires proper VL/VSTART values. +// - Each of the loop macros named "_NOVM_" DO contain an invocation +// of the 'require_no_vmask>;' macro. Those macros (all of them +// at this time) do not support masking (i.e., no skipping +// of elements/element groups is performed). + +// Processes all 32b*4 element groups available in the vector register +// operands vd, vs1, and vs2. This interprets the vectors as containing +// element groups of 4 uint32_t values (EGW=128, EEW=32, EGS=4), while +// *ignoring* the current SEW setting of the vector unit. +// +// IMPORTANT +// - This macro contains an invocation of 'require_element_groups_32x4;', +// since the "loop" macro correctness depends on invariants that +// are checked by the "require" macro. +// - This macro does not support masking, and contains an invocation +// of 'require_no_vmask;'. +// - While the name states "VD_VS1_VS2", many vector instructions +// are specified as "op vd, vs2, vs1". This macro does not imply +// a specific operand order and can be used with both "op vd, vs2, vs1" +// and "op vd, vs1, vs2" instructions. +// +// Invokes two statement blocks: +// - PRELUDE, invoked once, before any element group. It is executed even +// if the vector is empty. It is placed in a "do { } while (0);", hence +// any variable declared there is not visible outside. +// - EG_BODY, once per element group. +// +// Declares the following variables available for use in both statement blocks: +// 'vd_num': register index of vd +// 'vs1_num': register index of vs1 +// 'vs2_num': register index of vs2 +// 'vstart_eg': index of the first element group, *in EG units* +// 'vl_eg': length of the vector, *in EG units* +// +// The following variables are available in the EG_BODY block: +// 'idx_eg': index of the current element group. +// 'vd': EGU32x4_t reference, mutable,, content of the current +// element group in the 'vd' vector register / register group. +// 'vs1': EGU32x4_t, content of the current element group +// in the 'vs1' vector register / register group. +// 'vs2': EGU32x4_t, content of the current element group +// in the 'vs2' vector register / register group. +// +#define VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP(PRELUDE, EG_BODY) \ + do { \ + require_element_groups_32x4; \ + require_no_vmask; \ + const reg_t vd_num = insn.rd(); \ + const reg_t vs1_num = insn.rs1(); \ + const reg_t vs2_num = insn.rs2(); \ + const reg_t vstart_eg = P.VU.vstart->read() / 4; \ + const reg_t vl_eg = P.VU.vl->read() / 4; \ + do { PRELUDE } while (0); \ + for (reg_t idx_eg = vstart_eg; idx_eg < vl_eg; ++idx_eg) { \ + VV_VD_VS1_VS2_EGU32x4_PARAMS(vd_num, vs1_num, vs2_num, idx_eg); \ + EG_BODY \ + } \ + P.VU.vstart->write(0); \ + } while (0) + +// Processes all 32b*8 element groups available in the vector register +// operands vd, vs1, and vs2. This interprets the vectors as containing +// element groups of 8 uint32_t values (EGW=256, EEW=32, EGS=8), while +// *ignoring* the current SEW setting of the vector unit. +// +// IMPORTANT +// - This macro contains an invocation of the macro 'require_element_groups_32x8;', +// since the "loop" macro correctness depends on invariants that +// are checked by the "require" macro. +// - This macro does not support masking, and contains an invocation +// of 'require_no_vmask;'. +// - While the name states "VD_VS1_VS2", many vector instructions +// are specified as "op vd, vs2, vs1". This macro does not imply +// a specific operand order and can be used with both "op vd, vs2, vs1" +// and "op vd, vs1, vs2" instructions. +// +// Invokes two statement blocks: +// - PRELUDE, invoked once, before any element group. It is executed even +// if the vector is empty. It is placed in a "do { } while (0);", hence +// any variable declared there is not visible outside. +// - EG_BODY, once per element group. +// +// Declares the following variables available for use in both statement blocks: +// 'vd_num': register index of vd +// 'vs1_num': register index of vs1 +// 'vs2_num': register index of vs2 +// 'vstart_eg': index of the first element group, *in EG units* +// 'vl_eg': length of the vector, *in EG units* +// +// The following variables are available in the EG_BODY block: +// 'idx_eg': index of the current element group. +// 'vd': EGU32x8_t reference, mutable,, content of the current +// element group in the 'vd' vector register / register group. +// 'vs1': EGU32x8_t, content of the current element group +// in the 'vs1' vector register / register group. +// 'vs2': EGU32x8_t, content of the current element group +// in the 'vs2' vector register / register group. +// +#define VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP(PRELUDE, EG_BODY) \ + do { \ + require_element_groups_32x8;; \ + require_no_vmask; \ + const reg_t vd_num = insn.rd(); \ + const reg_t vs1_num = insn.rs1(); \ + const reg_t vs2_num = insn.rs2(); \ + const reg_t vstart_eg = P.VU.vstart->read() / 8; \ + const reg_t vl_eg = P.VU.vl->read() / 8; \ + do { PRELUDE } while (0); \ + for (reg_t idx_eg = vstart_eg; idx_eg < vl_eg; ++idx_eg) { \ + VV_VD_VS1_VS2_EGU32x8_PARAMS(vd_num, vs1_num, vs2_num, idx_eg); \ + EG_BODY \ + } \ + P.VU.vstart->write(0); \ + } while (0) + +// Processes all 32b*4 element groups available in the vector register +// operands vd, vs1, and vs2. This interprets the vectors as containing +// element groups of 4 uint32_t values (EGW=128, EEW=32, EGS=4), while +// *ignoring* the current SEW setting of the vector unit. +// +// Compared to VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP: +// - this macro does NOT extract the element groups into EGU32x4_t +// variables. It is intended for uses where there is a more natural +// type to use (e.g., EGU8x16_t). The type should still be a 128 bits +// wide type if extracted via 'P.VU.elt_group(...)'. +// - this macro offers the additional PRELOOP code block argument, +// that is executed once if the loop is going to be entered. +// This is intended for use with "vector scalar" instructions where +// we extract the first element group from one of the operands and +// use it for all loop iterations. +// +// IMPORTANT +// - This macro contains an invocation of 'require_element_groups_32x4;', +// since the "loop" macro correctness depends on invariants that +// are checked by the "require" macro. +// - This macro does not support masking, and contains an invocation +// of 'require_no_vmask;'. +// - While the name states "VD_VS1_VS2", many vector instructions +// are specified as "op vd, vs2, vs1". This macro does not imply +// a specific operand order and can be used with both "op vd, vs2, vs1" +// and "op vd, vs1, vs2" instructions. +// +// Invokes two statement blocks: +// - PRELUDE, invoked once, before any element group. It is executed even +// if the vector is empty. It is placed in a "do { } while (0);", hence +// any variable declared there is not visible outside. +// - PRELOOP, invoked once IF there is at least one element group to process. +// It is NOT placed in its own scope, variables declared in PRELOOP are +// visible when EG_BODY executes. +// Pass {} when there is no need for such a pre-loop block. +// - EG_BODY, once per element group. +// +// Declares the following variables available for use in both statement blocks: +// 'vd_num': register index of vd +// 'vs1_num': register index of vs1 +// 'vs2_num': register index of vs2 +// 'vstart_eg': index of the first element group, *in EG units* +// 'vl_eg': length of the vector, *in EG units* +// +// The following variables are available in the EG_BODY block: +// 'idx_eg': index of the current element group. +// +#define VI_ZVK_VD_VS1_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(PRELUDE, \ + PRELOOP, \ + EG_BODY) \ + do { \ + require_element_groups_32x4; \ + require_no_vmask; \ + const reg_t vd_num = insn.rd(); \ + const reg_t vs1_num = insn.rs1(); \ + const reg_t vs2_num = insn.rs2(); \ + const reg_t vstart_eg = P.VU.vstart->read() / 4; \ + const reg_t vl_eg = P.VU.vl->read() / 4; \ + do { PRELUDE } while (0); \ + if (vstart_eg < vl_eg) { \ + PRELOOP \ + for (reg_t idx_eg = vstart_eg; idx_eg < vl_eg; ++idx_eg) { \ + EG_BODY \ + } \ + } \ + P.VU.vstart->write(0); \ + } while (0) + +// Processes all 32b*4 element groups available in the vector register +// operands vd and vs2. This interprets the vectors as containing +// element groups of 4 uint32_t values (EGW=128, EEW=32, EGS=4), while +// *ignoring* the current SEW setting of the vector unit. +// +// Compared to VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP: +// - this macro is meant to be used for "op vd, vs2" instructions, +// whether vd is output only, or input and output. +// - this macro does NOT extract the element groups into EGU32x4_t +// variables. It is intended for uses where there is a more natural +// type to use (e.g., EGU8x16_t). The type should still be a 128 bits +// wide type if extracted via 'P.VU.elt_group(...)'. +// - this macro offers the additional PRELOOP code block argument, +// that is executed once if the loop is going to be entered. +// This is intended for use with "vector scalar" instructions where +// we extract the first element group from one of the operands and +// use it for all loop iterations. +// +// IMPORTANT +// - This macro contains an invocation of 'require_element_groups_32x4;', +// since the "loop" macro correctness depends on invariants that +// are checked by the "require" macro. +// - This macro does not support masking, and contains an invocation +// of 'require_no_vmask;'. +// - While the name states "VD_VS1_VS2", many vector instructions +// are specified as "op vd, vs2, vs1". This macro does not imply +// a specific operand order and can be used with both "op vd, vs2, vs1" +// and "op vd, vs1, vs2" instructions. +// +// Invokes three statement blocks: +// - PRELUDE, invoked once, before any element group. It is executed even +// if the vector is empty. It is placed in a "do { } while (0);", hence +// any variable declared there is not visible outside. +// - PRELOOP, invoked once IF there is at least one element group to process. +// It is NOT placed in its own scope, variables declared in PRELOOP are +// visible when EG_BODY executes. +// Pass {} when there is no need for such a pre-loop block. +// - EG_BODY, once per element group. +// +// Declares the following variables available for use in both statement blocks: +// 'vd_num': register index of vd +// 'vs2_num': register index of vs2 +// 'vstart_eg': index of the first element group, *in EG units* +// 'vl_eg': length of the vector, *in EG units* +// +// The following variables are available in the EG_BODY block: +// 'idx_eg': index of the current element group. +// +#define VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(PRELUDE, \ + PRELOOP, \ + EG_BODY) \ + do { \ + require_element_groups_32x4; \ + require_no_vmask; \ + const reg_t vd_num = insn.rd(); \ + const reg_t vs2_num = insn.rs2(); \ + const reg_t vstart_eg = P.VU.vstart->read() / 4; \ + const reg_t vl_eg = P.VU.vl->read() / 4; \ + do { PRELUDE } while (0); \ + if (vstart_eg < vl_eg) { \ + PRELOOP \ + for (reg_t idx_eg = vstart_eg; idx_eg < vl_eg; ++idx_eg) { \ + EG_BODY \ + } \ + } \ + P.VU.vstart->write(0); \ + } while (0) + +// Processes all 32b*4 element groups available in the vector registers +// vd, vs2. This interprets the vectors as containing element groups +// of 4 uint32_t values (EGW=128, EEW=32, EGS=4), +// *ignoring* the current SEW that applies to the vectors. +// +// IMPORTANT +// - This macro contains an invocation of 'require_element_groups_32x4;', +// since the "loop" macro correctness depends on invariants that +// are checked by the "require" macro. +// - This macro does not support masking, and contains an invocation +// of 'require_no_vmask;'. +// +// Invokes two statement blocks: +// - PRELUDE, invoked once, before any element group. It is executed even +// if the vector is empty. It is placed in a "do { } while (0);", hence +// any variable declared there is not visible outside. +// - EG_BODY, once per element group. +// +// Declares the following variables available for use in both statement blocks: +// 'vd_num': register index of vd +// 'vs2_num': register index of vs2 +// 'vstart_eg': index of the first element group, *in EG units* +// 'vl_eg': length of the vector, *in EG units* +// +// The following variables are available in the EG_BODY block: +// 'idx_eg': index of the current element group. +// 'vd': EGU32x4_t reference, mutable,, content of the current +// element group in the 'vd' vector register / register group. +// 'vs2': EGU32x4_t, content of the current element group +// in the 'vs2' vector register / register group. +// +#define VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(PRELUDE, EG_BODY) \ + do { \ + require_element_groups_32x4; \ + require_no_vmask; \ + const reg_t vd_num = insn.rd(); \ + const reg_t vs2_num = insn.rs2(); \ + const reg_t vstart_eg = P.VU.vstart->read() / 4; \ + const reg_t vl_eg = P.VU.vl->read() / 4; \ + do { PRELUDE } while (0); \ + for (reg_t idx_eg = vstart_eg; idx_eg < vl_eg; ++idx_eg) { \ + VV_VD_VS2_EGU32x4_PARAMS(vd_num, vs2_num, idx_eg); \ + EG_BODY \ + } \ + P.VU.vstart->write(0); \ + } while (0) + +// Processes all 32b*4 element groups available in the vector registers +// vd, vs2, given the 'zimm5' immediate. This interprets the vectors as +// containing element groups of 4 uint32_t values (EGW=128, EEW=32, EGS=4), +// *ignoring* the current SEW that applies to the vectors. +// +// IMPORTANT +// - This macro contains an invocation of 'require_element_groups_32x4;', +// since the "loop" macro correctness depends on invariants that +// are checked by the "require" macro. +// - This macro does not support masking, and contains an invocation +// of 'require_no_vmask;'. +// +// Invokes three statement blocks: +// - PRELUDE, invoked once, before any element group. It is executed even +// if the vector is empty. It is placed in a "do { } while (0);", hence +// any variable declared there is not visible outside. +// - PRELOOP, invoked once IF there is at least one element group to process. +// It is NOT placed in its own scope, variables declared in PRELOOP are +// visible when EG_BODY executes. +// Pass {} when there is no need for such a pre-loop block. +// - EG_BODY, once per element group. +// +// Declares the following variables available for use in both statement blocks: +// 'vd_num': register index of vd +// 'vs2_num': register index of vs2 +// 'zimm5': 5 bits unsigned immediate +// 'vstart_eg': index of the first element group, *in EG units* +// 'vl_eg': length of the vector, *in EG units* +// +// The following variables are available in the EG_BODY block: +// 'idx_eg': index of the current element group. +// 'vd': EGU32x4_t reference, mutable,, content of the current +// element group in the 'vd' vector register / register group. +// 'vs2': EGU32x4_t, content of the current element group +// in the 'vs2' vector register / register group. +// +#define VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP(PRELUDE, PRELOOP, EG_BODY) \ + do { \ + require_element_groups_32x4; \ + require_no_vmask; \ + const reg_t vd_num = insn.rd(); \ + const reg_t vs2_num = insn.rs2(); \ + const reg_t zimm5 = insn.v_zimm5(); \ + const reg_t vstart_eg = P.VU.vstart->read() / 4; \ + const reg_t vl_eg = P.VU.vl->read() / 4; \ + do { PRELUDE } while (0); \ + if (vstart_eg < vl_eg) { \ + PRELOOP \ + for (reg_t idx_eg = vstart_eg; idx_eg < vl_eg; ++idx_eg) { \ + VV_VD_VS2_EGU32x4_PARAMS(vd_num, vs2_num, idx_eg); \ + EG_BODY \ + } \ + } \ + P.VU.vstart->write(0); \ + } while (0) + +// Processes all 32b*8 element groups available in the vector registers +// vd, vs2, given the 'zimm5' immediate. This interprets the vectors as +// containing element groups of 8 uint32_t values (EGW=256, EEW=32, EGS=8), +// *ignoring* the current SEW that applies to the vectors. +// +// IMPORTANT +// - This macro contains an invocation of 'require_element_groups_32x8;', +// since the "loop" macro correctness depends on invariants that +// are checked by the "require" macro. +// - This macro does not support masking, and contains an invocation +// of 'require_no_vmask;'. +// +// Invokes three statement blocks: +// - PRELUDE, invoked once, before any element group. It is executed even +// if the vector is empty. It is placed in a "do { } while (0);", hence +// any variable declared there is not visible outside. +// - PRELOOP, invoked once IF there is at least one element group to process. +// It is NOT placed in its own scope, variables declared in PRELOOP are +// visible when EG_BODY executes. +// Pass {} when there is no need for such a pre-loop block. +// - EG_BODY, once per element group. +// +// Declares the following variables available for use in both statement blocks: +// 'vd_num': register index of vd +// 'vs2_num': register index of vs2 +// 'zimm5': unsigned 5 bits immediate +// 'vstart_eg': index of the first element group, *in EG units* +// 'vl_eg': length of the vector, *in EG units* +// +// The following variables are available in the EG_BODY block: +// 'idx_eg': index of the current element group. +// 'vd': EGU32x8_t reference, mutable,, content of the current +// element group in the 'vd' vector register / register group. +// 'vs2': EGU32x8_t, content of the current element group +// in the 'vs2' vector register / register group. +// +#define VI_ZVK_VD_VS2_ZIMM5_EGU32x8_NOVM_LOOP(PRELUDE, PRELOOP, EG_BODY) \ + do { \ + require_element_groups_32x8; \ + require_no_vmask; \ + const reg_t vd_num = insn.rd(); \ + const reg_t vs2_num = insn.rs2(); \ + const reg_t zimm5 = insn.v_zimm5(); \ + const reg_t vstart_eg = P.VU.vstart->read() / 8; \ + const reg_t vl_eg = P.VU.vl->read() / 8; \ + do { PRELUDE } while (0); \ + if (vstart_eg < vl_eg) { \ + PRELOOP \ + for (reg_t idx_eg = vstart_eg; idx_eg < vl_eg; ++idx_eg) { \ + VV_VD_VS2_EGU32x8_PARAMS(vd_num, vs2_num, idx_eg); \ + EG_BODY \ + } \ + } \ + P.VU.vstart->write(0); \ + } while (0) + +// Processes all 64b*4 element groups available in the vector registers +// vd, vs1, and vs2. This interprets the vectors as containing element groups +// of 4 uint64_t values (EGW=128, EEW=64, EGS=4), *ignoring* the current +// SEW that applies to the vectors. +// +// IMPORTANT +// - This macro contains an invocation of 'require_element_groups_64x4;', +// since the "loop" macro correctness depends on invariants that +// are checked by the "require" macro. +// - This macro does not support masking, and contains an invocation +// of 'require_no_vmask;'. +// - While the name states "VD_VS1_VS2", many vector instructions +// are specified as "op vd, vs2, vs1". This macro does not imply +// a specific operand order and can be used with both "op vd, vs2, vs1" +// and "op vd, vs1, vs2" instructions. +// +// Invokes two statement blocks: +// - PRELUDE, invoked once, before any element group. It is executed even +// if the vector is empty. It is placed in a "do { } while (0);", hence +// any variable declared there is not visible outside. +// - EG_BODY, once per element group. +// +// Declares the following variables available for use in both statement blocks: +// 'vd_num': register index of vd +// 'vs1_num': register index of vs1 +// 'vs2_num': register index of vs2 +// 'vstart_eg': index of the first element group, *in EG units* +// 'vl_eg': length of the vector, *in EG units* +// +// The following variables are available in the EG_BODY block: +// 'idx_eg': index of the current element group. +// 'vd': EGU64x4_t reference, content of the current element group +// in the 'vd' vector register / vector register group. +// 'vs1': EGU64x4_t, content of the current element group +// in the 'vs1' vector register / vector register group. +// 'vs2': EGU64x4_t, content of the current element group +// in the 'vs2' vector register / vector register group. +#define VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP(PRELUDE, EG_BODY) \ + do { \ + require_element_groups_64x4; \ + require_no_vmask; \ + const reg_t vd_num = insn.rd(); \ + const reg_t vs1_num = insn.rs1(); \ + const reg_t vs2_num = insn.rs2(); \ + const reg_t vstart_eg = P.VU.vstart->read() / 4; \ + const reg_t vl_eg = P.VU.vl->read() / 4; \ + do { PRELUDE } while (0); \ + for (reg_t idx_eg = vstart_eg; idx_eg < vl_eg; ++idx_eg) { \ + VV_VD_VS1_VS2_EGU64x4_PARAMS(vd_num, vs1_num, vs2_num, idx_eg); \ + EG_BODY \ + } \ + P.VU.vstart->write(0); \ + } while (0) + + +// Loop macro for widening instructions taking parameters 'vd, vs2, v1', +// with logic processing elements one-at-a-time in those register groups +// and treating the elements as unsigned integers. +// +// Invokes the BODY statement block once per element. +// As a widening instruction, it is defined for SEW in {8, 16, 32}. +// A separate copy of BODY is instantiated for each SEW value. +// +// Declares the following variables available for use in BODY: +// - 'vd_w', unsigned, 2 * SEW width, by reference, mutable. +// - 'vs2', unsigned, SEW width, by value, constant. +// - 'vs2_w', unsigned, 2 * SEW width, by value, constant, +// a widened copy of 'vs2'. +// - 'vs1', unsigned, SEW width, by value, constant. +#define VI_ZVK_VV_WIDENING_ULOOP(BODY) \ + do { \ + VI_CHECK_DSS(true); \ + VI_LOOP_BASE \ + switch (sew) { \ + case e8: { \ + VI_ZVK_VV_WIDENING_U_PARAMS(e8); \ + BODY \ + break; \ + } \ + case e16: { \ + VI_ZVK_VV_WIDENING_U_PARAMS(e16); \ + BODY \ + break; \ + } \ + case e32: { \ + VI_ZVK_VV_WIDENING_U_PARAMS(e32); \ + BODY \ + break; \ + } \ + } \ + VI_LOOP_END \ + } while (0) + +// Loop macro for widening instructions taking parameters 'vd, vs2, rs1', +// with logic processing elements one-at-a-time in those register groups +// and treating the elements as unsigned integers. +// +// Invokes the BODY statement block once per element. +// As a widening instruction, it is defined for SEW in {8, 16, 32}. +// A separate copy of BODY is instantiated for each SEW value. +// +// Declares the following variables available for use in BODY: +// - 'vd_w', unsigned, 2 * SEW width, by reference, mutable. +// - 'vs2', unsigned, SEW width, by value, constant. +// - 'vs2_w', unsigned, 2 * SEW width, by value, constant, +// a widened copy of 'vs2'. +// - 'rs1', unsigned, SEW width, by value, constant. +#define VI_ZVK_VX_WIDENING_ULOOP(BODY) \ + do { \ + VI_CHECK_DSS(true); \ + VI_LOOP_BASE \ + switch (sew) { \ + case e8: { \ + VI_ZVK_VX_WIDENING_U_PARAMS(e8); \ + BODY \ + break; \ + } \ + case e16: { \ + VI_ZVK_VX_WIDENING_U_PARAMS(e16); \ + BODY \ + break; \ + } \ + case e32: { \ + VI_ZVK_VX_WIDENING_U_PARAMS(e32); \ + BODY \ + break; \ + } \ + } \ + VI_LOOP_END \ + } while (0) + +// Loop macro for widening instructions taking parameters 'vd, vs2, zimm5', +// with logic processing elements one-at-a-time in those register groups +// and treating the elements as unsigned integers. +// +// Invokes the BODY statement block once per element. +// As a widening instruction, it is defined for SEW in {8, 16, 32}. +// A separate copy of BODY is instantiated for each SEW value. +// +// Declares the following variables available for use in BODY: +// - 'vd_w', unsigned, 2 * SEW width, by reference, mutable. +// - 'vs2', unsigned, SEW width, by value, constant. +// - 'vs2_w', unsigned, 2 * SEW width, by value, constant, +// a widened copy of 'vs2'. +// - 'zimm5', unsigned, SEW width, by value, constant. +#define VI_ZVK_VI_WIDENING_ULOOP(BODY) \ + do { \ + VI_CHECK_DSS(true); \ + VI_LOOP_BASE \ + switch (sew) { \ + case e8: { \ + VI_ZVK_VI_WIDENING_U_PARAMS(e8); \ + BODY \ + break; \ + } \ + case e16: { \ + VI_ZVK_VI_WIDENING_U_PARAMS(e16); \ + BODY \ + break; \ + } \ + case e32: { \ + VI_ZVK_VI_WIDENING_U_PARAMS(e32); \ + BODY \ + break; \ + } \ + } \ + VI_LOOP_END \ + } while (0) + +// +// Element Group Manipulation Macros +// + +// Extracts 4 uint32_t words from the input EGU32x4_t value +// into the (mutable) variables named by the W arguments, provided in +// "Little Endian" (LE) order, i.e., from the least significant (W0) +// to the most significant (W3). +#define EXTRACT_EGU32x4_WORDS_LE(X, W0, W1, W2, W3) \ + uint32_t W0 = (X)[0]; \ + uint32_t W1 = (X)[1]; \ + uint32_t W2 = (X)[2]; \ + uint32_t W3 = (X)[3]; \ + (void)(0) + +// Sets the elements words of given EGU32x4_t variable 'X' to +// the given 4 uint32_t values privided in "Little Endian" (LE) +// order, i.e., from the least significant (W0) to the most +// significant (W3). +#define SET_EGU32x4_LE(X, W0, W1, W2, W3) \ + do { \ + (X)[0] = (W0); \ + (X)[1] = (W1); \ + (X)[2] = (W2); \ + (X)[3] = (W3); \ + } while (0) + +// Extracts 4 uint32_t words from the input EGU32x4_t value +// into the (mutable) variables named by the W arguments, provided in +// "Big Endian" (BE) order, i.e., from the most significant (W3) +// to the least significant (W0). +#define EXTRACT_EGU32x4_WORDS_BE(X, W3, W2, W1, W0) \ + uint32_t W0 = (X)[0]; \ + uint32_t W1 = (X)[1]; \ + uint32_t W2 = (X)[2]; \ + uint32_t W3 = (X)[3]; \ + (void)(0) + +// Sets the elements words of given EGU32x4_t variable 'X' to +// the given 4 uint32_t values privided in "Big Endian" (BE) +// order, i.e., from the most significant (W3) to the least +// significant (W0). +#define SET_EGU32x4_BE(X, W3, W2, W1, W0) \ + do { \ + (X)[0] = (W0); \ + (X)[1] = (W1); \ + (X)[2] = (W2); \ + (X)[3] = (W3); \ + } while (0) + +// Byte-swap the bytes of a uin32_t such that the order of bytes +// is reversed. +#define ZVK_BSWAP32(x) \ + ((((uint32_t)((x) >> 24)) & 0xFF) << 0 | \ + (((uint32_t)((x) >> 16)) & 0xFF) << 8 | \ + (((uint32_t)((x) >> 8)) & 0xFF) << 16 | \ + (((uint32_t)((x) >> 0)) & 0xFF) << 24) + +// Extracts 8 uint32_t words from the input EGU32x8_t value +// into the (mutable) variables named by the W arguments, provided in +// "Big Endian" (BE) order, i.e., from the most significant (W7) +// to the least significant (W0). Each of the words is byte-swapped, +// from a big-endian representation in the EGU32x8_t to a native/little-endian +// ordering in the variables. +#define EXTRACT_EGU32x8_WORDS_BE_BSWAP(X, W7, W6, W5, W4, W3, W2, W1, W0) \ + uint32_t W0 = ZVK_BSWAP32((X)[0]); \ + uint32_t W1 = ZVK_BSWAP32((X)[1]); \ + uint32_t W2 = ZVK_BSWAP32((X)[2]); \ + uint32_t W3 = ZVK_BSWAP32((X)[3]); \ + uint32_t W4 = ZVK_BSWAP32((X)[4]); \ + uint32_t W5 = ZVK_BSWAP32((X)[5]); \ + uint32_t W6 = ZVK_BSWAP32((X)[6]); \ + uint32_t W7 = ZVK_BSWAP32((X)[7]); \ + (void)(0) + +// Sets the elements words of given EGU32x8_t variable 'X' to +// the given 8 uint32_t values privided in "Big Endian" (BE) +// order, i.e., from the most significant (W7) to the least +// significant (W0). Each of the words is byte-swapped, +// from a native/little-endian ordering in the variables to +// a big-endian representation in the EGU32x8_t. +#define SET_EGU32x8_WORDS_BE_BSWAP(X, W7, W6, W5, W4, W3, W2, W1, W0) \ + do { \ + (X)[0] = ZVK_BSWAP32(W0); \ + (X)[1] = ZVK_BSWAP32(W1); \ + (X)[2] = ZVK_BSWAP32(W2); \ + (X)[3] = ZVK_BSWAP32(W3); \ + (X)[4] = ZVK_BSWAP32(W4); \ + (X)[5] = ZVK_BSWAP32(W5); \ + (X)[6] = ZVK_BSWAP32(W6); \ + (X)[7] = ZVK_BSWAP32(W7); \ + } while (0) + +// Extracts 4 uint64_t words from the input EGU64x4_t value +// into the (mutable) variables named by the W arguments, provided in +// "Big Endian" (BE) order, i.e., from the most significant (W3) +// to the least significant (W0). +#define EXTRACT_EGU64x4_WORDS_BE(X, W3, W2, W1, W0) \ + uint64_t W0 = (X)[0]; \ + uint64_t W1 = (X)[1]; \ + uint64_t W2 = (X)[2]; \ + uint64_t W3 = (X)[3]; \ + (void)(0) + +// Sets the elements words of given EGU64x4_t variable 'X' to +// the given 4 uint64_t values privided in "Big Endian" (BE) +// order, i.e., from the most significant (W3) to the least +// significant (W0). +#define SET_EGU64x4_BE(X, W3, W2, W1, W0) \ + do { \ + (X)[0] = (W0); \ + (X)[1] = (W1); \ + (X)[2] = (W2); \ + (X)[3] = (W3); \ + } while (0) + +// Copies a EGU8x16_t value from 'SRC' into 'DST'. +#define EGU8x16_COPY(DST, SRC) \ + for (std::size_t bidx = 0; bidx < 16; ++bidx) { \ + (DST)[bidx] = (SRC)[bidx]; \ + } + +// Performs "MUT_A ^= CONST_B;", i.e., xor of the bytes +// in A (mutated) with the bytes in B (unchanged). +#define EGU8x16_XOREQ(MUT_A, CONST_B) \ + for (std::size_t bidx = 0; bidx < 16; ++bidx) { \ + (MUT_A)[bidx] ^= (CONST_B)[bidx]; \ + } + +// Performs "MUT_A ^= CONST_B;", i.e., xor of the bytes +// in A (mutated) with the bytes in B (unchanged). +#define EGU32x4_XOREQ(MUT_A, CONST_B) \ + for (std::size_t bidx = 0; bidx < 4; ++bidx) { \ + (MUT_A)[bidx] ^= (CONST_B)[bidx]; \ + } + +// Performs "DST = A ^ B;", i.e., DST (overwritten) receives +// the xor of the bytes in A and B (both unchanged). +#define EGU8x16_XOR(DST, A, B) \ + for (std::size_t bidx = 0; bidx < 16; ++bidx) { \ + (DST)[bidx] = (A)[bidx] ^ (B)[bidx]; \ + } + +// +// Common bit manipulations logic. +// + +// Form a 64 bit integer with bit X set +#define ZVK_BIT(X) (1ULL << (X)) + +// Reverse the order of bits within bytes of a word. +// This is used to match the data interpretation in NIST SP 800-38D +// a.k.a the GCM specification. +#define ZVK_BREV8_32(X) \ + do { \ + (X) = (((X) & 0x55555555) << 1) | (((X) & 0xaaaaaaaa) >> 1); \ + (X) = (((X) & 0x33333333) << 2) | (((X) & 0xcccccccc) >> 2); \ + (X) = (((X) & 0x0f0f0f0f) << 4) | (((X) & 0xf0f0f0f0) >> 4); \ + } while (0) + +// Rotates right a uint32_t value by N bits. +// uint32_t ROR32(uint32_t X, std::size_t N); +#define ZVK_ROR32(X, N) rotate_right((X), (N)) + +// Rotates right a uint64_t value by N bits. +// uint64_t ROR64(uint64_t X, std::size_t N); +#define ZVK_ROR64(X, N) rotate_right((X), (N)) + +// Rotates left a uint32_t value by N bits. +// uint32_t ROL32(uint32_t X, std::size_t N); +#define ZVK_ROL32(X, N) rotate_left((X), (N)) + +// +// Element Group Bit Manipulation Macros +// + +// Performs bit reversal in a EGU32x4_t group. +#define EGU32x4_BREV8(X) \ + for (std::size_t bidx = 0; bidx < 4; ++bidx) { \ + ZVK_BREV8_32((X)[bidx]); \ + } + +// Checks if a given bit is set within a EGU32x4_t group. +// Assumes LE ordering. +#define EGU32x4_ISSET(X, BIDX) \ + (((X)[(BIDX) / 32] & ZVK_BIT((BIDX) % 32)) != 0) + +// Shfts a EGU32x4_t group left by one bit. +// +// Since the entire 128 bit value is shifted we need to handle carry bits. +// In order to limit the amount of carry check logic the elements are copied to +// a 64 bit temporary variable. +#define EGU32x4_LSHIFT(X) \ + do { \ + uint64_t dword; \ + dword = ((uint64_t)(X)[3]) << 32; \ + dword |= X[2]; \ + dword <<= 1; \ + if (X[1] & ZVK_BIT(31)) { \ + dword |= ZVK_BIT(0); \ + } \ + X[2] = dword & UINT32_MAX; \ + X[3] = dword >> 32; \ + dword = ((uint64_t)(X)[1]) << 32; \ + dword |= X[0]; \ + dword <<= 1; \ + X[0] = dword & UINT32_MAX; \ + X[1] = dword >> 32; \ + } while (0) + +#endif // RISCV_ZVK_EXT_MACROS_H_ From e87038ee5e6545a5149cdf4334d220f951534f30 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 1 Jun 2023 18:06:55 -0700 Subject: [PATCH 036/127] Zvk: Implement Zvbb, Vector Bit-manipulation for Cryptography Implement the proposed instructions in Zvbb: - vandn.{vv,vx}, vector bitwise and-not - vbrev.v, vector bit reverse in element - vbrev8.v, vector bit reverse in bytes - vrev8.v, vector byte reverse - vctz.v, vector count trailing zeros - vclz.v, vector count leading zeros - vcpop.v, vector population count - vrol.{vv,vx}, vector rotate left - vror.{vi,vv,vx}, vector rotate right - vwsll.{vi,vv,vx} vector widening shift left logical A new instruction field, 'zimm6', is introduced, encoded in bits [15, 19] and [26].. It is used by "vror.vi" to encode a shift immediate in [0, 63]. Co-authored-by: Raghav Gupta Co-authored-by: Stanislaw Kardach Signed-off-by: Eric Gouriou --- riscv/decode.h | 1 + riscv/insns/vandn_vv.h | 10 ++++++++++ riscv/insns/vandn_vx.h | 10 ++++++++++ riscv/insns/vbrev8_v.h | 13 +++++++++++++ riscv/insns/vbrev_v.h | 24 ++++++++++++++++++++++++ riscv/insns/vclz_v.h | 16 ++++++++++++++++ riscv/insns/vcpop_v.h | 16 ++++++++++++++++ riscv/insns/vctz_v.h | 16 ++++++++++++++++ riscv/insns/vrev8_v.h | 16 ++++++++++++++++ riscv/insns/vrol_vv.h | 17 +++++++++++++++++ riscv/insns/vrol_vx.h | 18 ++++++++++++++++++ riscv/insns/vror_vi.h | 18 ++++++++++++++++++ riscv/insns/vror_vv.h | 17 +++++++++++++++++ riscv/insns/vror_vx.h | 18 ++++++++++++++++++ riscv/insns/vwsll_vi.h | 10 ++++++++++ riscv/insns/vwsll_vv.h | 10 ++++++++++ riscv/insns/vwsll_vx.h | 10 ++++++++++ riscv/riscv.mk.in | 22 ++++++++++++++++++++++ 18 files changed, 262 insertions(+) create mode 100644 riscv/insns/vandn_vv.h create mode 100644 riscv/insns/vandn_vx.h create mode 100644 riscv/insns/vbrev8_v.h create mode 100644 riscv/insns/vbrev_v.h create mode 100644 riscv/insns/vclz_v.h create mode 100644 riscv/insns/vcpop_v.h create mode 100644 riscv/insns/vctz_v.h create mode 100644 riscv/insns/vrev8_v.h create mode 100644 riscv/insns/vrol_vv.h create mode 100644 riscv/insns/vrol_vx.h create mode 100644 riscv/insns/vror_vi.h create mode 100644 riscv/insns/vror_vv.h create mode 100644 riscv/insns/vror_vx.h create mode 100644 riscv/insns/vwsll_vi.h create mode 100644 riscv/insns/vwsll_vv.h create mode 100644 riscv/insns/vwsll_vx.h diff --git a/riscv/decode.h b/riscv/decode.h index dad32a1e31..cd1c0a1222 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -140,6 +140,7 @@ class insn_t uint64_t v_vta() { return x(26, 1); } uint64_t v_vma() { return x(27, 1); } uint64_t v_mew() { return x(28, 1); } + uint64_t v_zimm6() { return x(15, 5) + (x(26, 1) << 5); } uint64_t p_imm2() { return x(20, 2); } uint64_t p_imm3() { return x(20, 3); } diff --git a/riscv/insns/vandn_vv.h b/riscv/insns/vandn_vv.h new file mode 100644 index 0000000000..d85e47d7fe --- /dev/null +++ b/riscv/insns/vandn_vv.h @@ -0,0 +1,10 @@ +// vandn.vv vd, vs2, vs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_VV_LOOP +({ + vd = vs2 & (~vs1); +}) diff --git a/riscv/insns/vandn_vx.h b/riscv/insns/vandn_vx.h new file mode 100644 index 0000000000..1c66a40970 --- /dev/null +++ b/riscv/insns/vandn_vx.h @@ -0,0 +1,10 @@ +// vandn.vx vd, vs2, rs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_VX_LOOP +({ + vd = vs2 & (~rs1); +}) diff --git a/riscv/insns/vbrev8_v.h b/riscv/insns/vbrev8_v.h new file mode 100644 index 0000000000..a6d3cda744 --- /dev/null +++ b/riscv/insns/vbrev8_v.h @@ -0,0 +1,13 @@ +// vbrev8.v vd, vs2, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + vd = vs2; + vd = ((vd & 0x5555555555555555llu) << 1) | ((vd & 0xAAAAAAAAAAAAAAAAllu) >> 1); + vd = ((vd & 0x3333333333333333llu) << 2) | ((vd & 0xCCCCCCCCCCCCCCCCllu) >> 2); + vd = ((vd & 0x0F0F0F0F0F0F0F0Fllu) << 4) | ((vd & 0xF0F0F0F0F0F0F0F0llu) >> 4); +}) diff --git a/riscv/insns/vbrev_v.h b/riscv/insns/vbrev_v.h new file mode 100644 index 0000000000..7f784c2231 --- /dev/null +++ b/riscv/insns/vbrev_v.h @@ -0,0 +1,24 @@ +// vbrev.v vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + reg_t x = vs2; + + // Reverse bits in bytes (vbrev8) + x = ((x & 0x5555555555555555llu) << 1) | ((x & 0xAAAAAAAAAAAAAAAAllu) >> 1); + x = ((x & 0x3333333333333333llu) << 2) | ((x & 0xCCCCCCCCCCCCCCCCllu) >> 2); + x = ((x & 0x0F0F0F0F0F0F0F0Fllu) << 4) | ((x & 0xF0F0F0F0F0F0F0F0llu) >> 4); + // Re-order bytes (vrev8) + if (P.VU.vsew > 8) + x = ((x & 0x00FF00FF00FF00FFllu) << 8) | ((x & 0xFF00FF00FF00FF00llu) >> 8); + if (P.VU.vsew > 16) + x = ((x & 0x0000FFFF0000FFFFllu) << 16) | ((x & 0xFFFF0000FFFF0000llu) >> 16); + if (P.VU.vsew > 32) + x = ((x & 0x00000000FFFFFFFFllu) << 32) | ((x & 0xFFFFFFFF00000000llu) >> 32); + + vd = x; +}) diff --git a/riscv/insns/vclz_v.h b/riscv/insns/vclz_v.h new file mode 100644 index 0000000000..5f7f03c86c --- /dev/null +++ b/riscv/insns/vclz_v.h @@ -0,0 +1,16 @@ +// vclz.v vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + unsigned int i = 0; + for (; i < P.VU.vsew; ++i) { + if (1 & (vs2 >> (P.VU.vsew - 1 - i))) { + break; + } + } + vd = i; +}) diff --git a/riscv/insns/vcpop_v.h b/riscv/insns/vcpop_v.h new file mode 100644 index 0000000000..52b29c695c --- /dev/null +++ b/riscv/insns/vcpop_v.h @@ -0,0 +1,16 @@ +// vpopc.v vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + reg_t count = 0; + for (std::size_t i = 0; i < P.VU.vsew; ++i) { + if (1 & (vs2 >> i)) { + count++; + } + } + vd = count; +}) diff --git a/riscv/insns/vctz_v.h b/riscv/insns/vctz_v.h new file mode 100644 index 0000000000..b63dd019f7 --- /dev/null +++ b/riscv/insns/vctz_v.h @@ -0,0 +1,16 @@ +// vctz.v vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + unsigned int i = 0; + for (; i < P.VU.vsew; ++i) { + if (1 & (vs2 >> i)) { + break; + } + } + vd = i; +}) diff --git a/riscv/insns/vrev8_v.h b/riscv/insns/vrev8_v.h new file mode 100644 index 0000000000..f26c5a0502 --- /dev/null +++ b/riscv/insns/vrev8_v.h @@ -0,0 +1,16 @@ +// vrev8.v vd, vs2, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_V_ULOOP +({ + vd = vs2; + if (P.VU.vsew > 8) + vd = ((vd & 0x00FF00FF00FF00FFllu) << 8) | ((vd & 0xFF00FF00FF00FF00llu) >> 8); + if (P.VU.vsew > 16) + vd = ((vd & 0x0000FFFF0000FFFFllu) << 16) | ((vd & 0xFFFF0000FFFF0000llu) >> 16); + if (P.VU.vsew > 32) + vd = ((vd & 0x00000000FFFFFFFFllu) << 32) | ((vd & 0xFFFFFFFF00000000llu) >> 32); +}) diff --git a/riscv/insns/vrol_vv.h b/riscv/insns/vrol_vv.h new file mode 100644 index 0000000000..fb2e483320 --- /dev/null +++ b/riscv/insns/vrol_vv.h @@ -0,0 +1,17 @@ +// vrol.vv vd, vs2, vs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +VI_VV_ULOOP +({ + // For .vv, the shift amount comes from the vs1 element. + const reg_t lshift = vs1 & mask; + const reg_t rshift = (-lshift) & mask; + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vrol_vx.h b/riscv/insns/vrol_vx.h new file mode 100644 index 0000000000..b0c89a27b7 --- /dev/null +++ b/riscv/insns/vrol_vx.h @@ -0,0 +1,18 @@ +// vrol.vx vd, vs2, rs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +// For .vx, the shift amount comes from rs1. +const reg_t lshift = ((reg_t)RS1) & mask; +const reg_t rshift = (-lshift) & mask; + +VI_V_ULOOP +({ + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vror_vi.h b/riscv/insns/vror_vi.h new file mode 100644 index 0000000000..1269c3d477 --- /dev/null +++ b/riscv/insns/vror_vi.h @@ -0,0 +1,18 @@ +// vror.vi vd, vs2, zimm6, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +// For .vi, the shift amount comes from bits [26,19-15]. +const reg_t rshift = insn.v_zimm6() & mask; +const reg_t lshift = (-rshift) & mask; + +VI_V_ULOOP +({ + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vror_vv.h b/riscv/insns/vror_vv.h new file mode 100644 index 0000000000..c649c6d97f --- /dev/null +++ b/riscv/insns/vror_vv.h @@ -0,0 +1,17 @@ +// vror.vv vd, vs2, vs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +VI_VV_ULOOP +({ + // For .vv, the shift amount comes from the vs1 element. + const reg_t rshift = vs1 & mask; + const reg_t lshift = (-rshift) & mask; + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vror_vx.h b/riscv/insns/vror_vx.h new file mode 100644 index 0000000000..50c8e5c94a --- /dev/null +++ b/riscv/insns/vror_vx.h @@ -0,0 +1,18 @@ +// vror.vx vd, vs2, rs1, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +// 'mask' selects the low log2(vsew) bits of the shift amount, +// to limit the maximum shift to "vsew - 1" bits. +const reg_t mask = P.VU.vsew - 1; + +// For .vx, the shift amount comes from rs1. +const reg_t rshift = ((reg_t)RS1) & mask; +const reg_t lshift = (-rshift) & mask; + +VI_V_ULOOP +({ + vd = (vs2 << lshift) | (vs2 >> rshift); +}) diff --git a/riscv/insns/vwsll_vi.h b/riscv/insns/vwsll_vi.h new file mode 100644 index 0000000000..13b5eb4a5b --- /dev/null +++ b/riscv/insns/vwsll_vi.h @@ -0,0 +1,10 @@ +// vwsll.vi vd, vs2, zimm5, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_ZVK_VI_WIDENING_ULOOP({ + const reg_t shift = zimm5 & ((2 * sew) - 1); + vd_w = vs2_w << shift; +}); diff --git a/riscv/insns/vwsll_vv.h b/riscv/insns/vwsll_vv.h new file mode 100644 index 0000000000..5a64c6c06a --- /dev/null +++ b/riscv/insns/vwsll_vv.h @@ -0,0 +1,10 @@ +// vwsll.vv vd, vs2, zimm5, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_ZVK_VV_WIDENING_ULOOP({ + const reg_t shift = (vs1 & ((2 * sew) - 1)); + vd_w = vs2_w << shift; +}); diff --git a/riscv/insns/vwsll_vx.h b/riscv/insns/vwsll_vx.h new file mode 100644 index 0000000000..5264e80eac --- /dev/null +++ b/riscv/insns/vwsll_vx.h @@ -0,0 +1,10 @@ +// vwsll.vx vd, vs2, zimm5, vm + +#include "zvk_ext_macros.h" + +require_zvbb; + +VI_ZVK_VX_WIDENING_ULOOP({ + const reg_t shift = (rs1 & ((2 * sew) - 1)); + vd_w = vs2_w << shift; +}); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 3b493a06c8..4aa23e3c1c 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1340,6 +1340,27 @@ riscv_insn_ext_zacas = \ amocas_d \ $(if $(HAVE_INT128),amocas_q) +riscv_insn_ext_zvbb = \ + vandn_vv \ + vandn_vx \ + vbrev8_v \ + vbrev_v \ + vclz_v \ + vcpop_v \ + vctz_v \ + vrev8_v \ + vrol_vv \ + vrol_vx \ + vror_vi \ + vror_vv \ + vror_vx \ + vwsll_vi \ + vwsll_vv \ + vwsll_vx \ + +riscv_insn_ext_zvk = \ + $(riscv_insn_ext_zvbb) \ + riscv_insn_list = \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ $(riscv_insn_ext_a) \ @@ -1363,6 +1384,7 @@ riscv_insn_list = \ $(riscv_insn_ext_zfh) \ $(riscv_insn_ext_zfh_zfa) \ $(riscv_insn_ext_zicond) \ + $(riscv_insn_ext_zvk) \ $(riscv_insn_priv) \ $(riscv_insn_smrnmi) \ $(riscv_insn_svinval) \ From d633af2b180391b6f73f84f56d8b305a3af7c152 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 1 Jun 2023 18:07:04 -0700 Subject: [PATCH 037/127] Zvk: Implement Zvbc extension, vectory carryless multiplaction Implement the Zvbc instructions - vclmul.{vv,vx}, vector carryless multiply low - vclmulh.{vv,vx}, vector carryless multiply high Signed-off-by: Eric Gouriou --- riscv/insns/vclmul_vv.h | 20 ++++++++++++++++++++ riscv/insns/vclmul_vx.h | 20 ++++++++++++++++++++ riscv/insns/vclmulh_vv.h | 20 ++++++++++++++++++++ riscv/insns/vclmulh_vx.h | 20 ++++++++++++++++++++ riscv/riscv.mk.in | 7 +++++++ 5 files changed, 87 insertions(+) create mode 100644 riscv/insns/vclmul_vv.h create mode 100644 riscv/insns/vclmul_vx.h create mode 100644 riscv/insns/vclmulh_vv.h create mode 100644 riscv/insns/vclmulh_vx.h diff --git a/riscv/insns/vclmul_vv.h b/riscv/insns/vclmul_vv.h new file mode 100644 index 0000000000..8957738adc --- /dev/null +++ b/riscv/insns/vclmul_vv.h @@ -0,0 +1,20 @@ +// vclmul.vv vd, vs2, vs1, vm + +#include "zvk_ext_macros.h" + +require_zvbc; +require(P.VU.vsew == 64); + +VI_VV_ULOOP +({ + // Perform a carryless multiplication 64bx64b on each 64b element, + // return the low 64b of the 128b product. + // + vd = 0; + for (std::size_t bit_idx = 0; bit_idx < sew; ++bit_idx) { + const reg_t mask = ((reg_t) 1) << bit_idx; + if ((vs1 & mask) != 0) { + vd ^= vs2 << bit_idx; + } + } +}) diff --git a/riscv/insns/vclmul_vx.h b/riscv/insns/vclmul_vx.h new file mode 100644 index 0000000000..1df7a3a2a4 --- /dev/null +++ b/riscv/insns/vclmul_vx.h @@ -0,0 +1,20 @@ +// vclmul.vx vd, vs2, rs1, vm + +#include "zvk_ext_macros.h" + +require_zvbc; +require(P.VU.vsew == 64); + +VI_VX_ULOOP +({ + // Perform a carryless multiplication 64bx64b on each 64b element, + // return the low 64b of the 128b product. + // + vd = 0; + for (std::size_t bit_idx = 0; bit_idx < sew; ++bit_idx) { + const reg_t mask = ((reg_t) 1) << bit_idx; + if ((rs1 & mask) != 0) { + vd ^= vs2 << bit_idx; + } + } +}) diff --git a/riscv/insns/vclmulh_vv.h b/riscv/insns/vclmulh_vv.h new file mode 100644 index 0000000000..6a54bcfaa6 --- /dev/null +++ b/riscv/insns/vclmulh_vv.h @@ -0,0 +1,20 @@ +// vclmulh.vv vd, vs2, vs1, vm + +#include "zvk_ext_macros.h" + +require_zvbc; +require(P.VU.vsew == 64); + +VI_VV_ULOOP +({ + // Perform a carryless multiplication 64bx64b on each 64b element, + // return the high 64b of the 128b product. + // + vd = 0; + for (std::size_t bit_idx = 1; bit_idx < sew; ++bit_idx) { + const reg_t mask = ((reg_t) 1) << bit_idx; + if ((vs1 & mask) != 0) { + vd ^= ((reg_t)vs2) >> (sew - bit_idx); + } + } +}) diff --git a/riscv/insns/vclmulh_vx.h b/riscv/insns/vclmulh_vx.h new file mode 100644 index 0000000000..e874d1df68 --- /dev/null +++ b/riscv/insns/vclmulh_vx.h @@ -0,0 +1,20 @@ +// vclmulh.vx vd, vs2, rs1, vm + +#include "zvk_ext_macros.h" + +require_zvbc; +require(P.VU.vsew == 64); + +VI_VX_ULOOP +({ + // Perform a carryless multiplication 64bx64b on each 64b element, + // return the high 64b of the 128b product. + // + vd = 0; + for (std::size_t bit_idx = 1; bit_idx < sew; ++bit_idx) { + const reg_t mask = ((reg_t) 1) << bit_idx; + if ((rs1 & mask) != 0) { + vd ^= ((reg_t)vs2) >> (sew - bit_idx); + } + } +}) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 4aa23e3c1c..dcf2640600 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1358,8 +1358,15 @@ riscv_insn_ext_zvbb = \ vwsll_vv \ vwsll_vx \ +riscv_insn_ext_zvbc = \ + vclmul_vv \ + vclmul_vx \ + vclmulh_vv \ + vclmulh_vx \ + riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvbb) \ + $(riscv_insn_ext_zvbc) \ riscv_insn_list = \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ From fbd4ca2eef884b6835e848d761b3e375a66fc47a Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 1 Jun 2023 18:07:22 -0700 Subject: [PATCH 038/127] Zvk: Implement Zvkg, Vector GCM/GMAC instruction Implement the proposed instruction in Zvkg, vghmac.vv, Vector Carryless Multiply Accumulate over GHASH Galois-Field. The instruction performs one step of GHASH routine as described in "NIST Special Publication 800-38D" a.k.a the AES-GCM specification. The logic was written to closely track the pseudo-code in the Zvk specification. Signed-off-by: Eric Gouriou Co-authored-by: Kornel Duleba Signed-off-by: Eric Gouriou --- riscv/insns/vghsh_vv.h | 38 ++++++++++++++++++++++++++++++++++++++ riscv/insns/vgmul_vv.h | 32 ++++++++++++++++++++++++++++++++ riscv/riscv.mk.in | 5 +++++ riscv/zvk_ext_macros.h | 16 ++++++++++++++-- 4 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 riscv/insns/vghsh_vv.h create mode 100644 riscv/insns/vgmul_vv.h diff --git a/riscv/insns/vghsh_vv.h b/riscv/insns/vghsh_vv.h new file mode 100644 index 0000000000..bcbfe74f33 --- /dev/null +++ b/riscv/insns/vghsh_vv.h @@ -0,0 +1,38 @@ +// vghsh.vv vd, vs2, vs1 + +#include "zvk_ext_macros.h" + +require_zvkg; +require(P.VU.vsew == 32); +require_egw_fits(128); + +VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( + {}, + { + EGU32x4_t Y = vd; // Current partial hash + EGU32x4_t X = vs1; // Block cipher output + EGU32x4_t H = vs2; // Hash subkey + + EGU32x4_BREV8(H); + EGU32x4_t Z = {}; + + // S = brev8(Y ^ X) + EGU32x4_t S; + EGU32x4_XOR(S, Y, X); + EGU32x4_BREV8(S); + + for (int bit = 0; bit < 128; bit++) { + if (EGU32x4_ISSET(S, bit)) { + EGU32x4_XOREQ(Z, H); + } + + const bool reduce = EGU32x4_ISSET(H, 127); + EGU32x4_LSHIFT(H); // Left shift by 1. + if (reduce) { + H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + } + EGU32x4_BREV8(Z); + vd = Z; + } +); diff --git a/riscv/insns/vgmul_vv.h b/riscv/insns/vgmul_vv.h new file mode 100644 index 0000000000..820b396e04 --- /dev/null +++ b/riscv/insns/vgmul_vv.h @@ -0,0 +1,32 @@ +// vgmul.vv vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvkg; +require(P.VU.vsew == 32); +require_egw_fits(128); + +VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( + {}, + { + EGU32x4_t Y = vd; // Multiplier + EGU32x4_BREV8(Y); + EGU32x4_t H = vs2; // Multiplicand + EGU32x4_BREV8(H); + EGU32x4_t Z = {}; + + for (int bit = 0; bit < 128; bit++) { + if (EGU32x4_ISSET(Y, bit)) { + EGU32x4_XOREQ(Z, H); + } + + bool reduce = EGU32x4_ISSET(H, 127); + EGU32x4_LSHIFT(H); // Lef shift by 1 + if (reduce) { + H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + } + EGU32x4_BREV8(Z); + vd = Z; + } +); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index dcf2640600..5562c0956d 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1364,9 +1364,14 @@ riscv_insn_ext_zvbc = \ vclmulh_vv \ vclmulh_vx \ +riscv_insn_ext_zvkg= \ + vghsh_vv \ + vgmul_vv \ + riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvbb) \ $(riscv_insn_ext_zvbc) \ + $(riscv_insn_ext_zvkg) \ riscv_insn_list = \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index 7efbac806f..bf893f9f12 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -942,8 +942,8 @@ // Performs "MUT_A ^= CONST_B;", i.e., xor of the bytes // in A (mutated) with the bytes in B (unchanged). #define EGU32x4_XOREQ(MUT_A, CONST_B) \ - for (std::size_t bidx = 0; bidx < 4; ++bidx) { \ - (MUT_A)[bidx] ^= (CONST_B)[bidx]; \ + for (std::size_t idx = 0; idx < 4; ++idx) { \ + (MUT_A)[idx] ^= (CONST_B)[idx]; \ } // Performs "DST = A ^ B;", i.e., DST (overwritten) receives @@ -953,6 +953,18 @@ (DST)[bidx] = (A)[bidx] ^ (B)[bidx]; \ } +// Performs "DST = A ^ B;", i.e., DST (overwritten) receives +// the xor of the bytes in A and B (both unchanged). +#define EGU32x4_XOR(DST, A, B) \ + do { \ + static_assert(std::is_same::value); \ + static_assert(std::is_same::value); \ + static_assert(std::is_same::value); \ + for (std::size_t idx = 0; idx < 4; ++idx) { \ + (DST)[idx] = (A)[idx] ^ (B)[idx]; \ + } \ + } while (0) + // // Common bit manipulations logic. // From 00873aa61acae4a17c1d269cddf1885e83b50102 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 1 Jun 2023 18:07:32 -0700 Subject: [PATCH 039/127] Zvk: Implement Zvknh[ab], NIST Suite: Vector SHA-2 Implement the instructions part of the Zvknha and Zvknhb sub-extensions: - vsha2ms.vv, message schedule - vsha2ch.vv / vsha2cl.vv, compression rounds A header files for common macros is added. Signed-off-by: Eric Gouriou --- riscv/insns/vsha2ch_vv.h | 61 +++++++++++++++ riscv/insns/vsha2cl_vv.h | 62 ++++++++++++++++ riscv/insns/vsha2ms_vv.h | 63 ++++++++++++++++ riscv/riscv.mk.in | 7 ++ riscv/zvknh_ext_macros.h | 155 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 348 insertions(+) create mode 100644 riscv/insns/vsha2ch_vv.h create mode 100644 riscv/insns/vsha2cl_vv.h create mode 100644 riscv/insns/vsha2ms_vv.h create mode 100644 riscv/zvknh_ext_macros.h diff --git a/riscv/insns/vsha2ch_vv.h b/riscv/insns/vsha2ch_vv.h new file mode 100644 index 0000000000..34c6e05fbc --- /dev/null +++ b/riscv/insns/vsha2ch_vv.h @@ -0,0 +1,61 @@ +// vsha2ch.vv vd, vs2, vs1 + +#include "zvknh_ext_macros.h" + +// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. +require_vsha2_common_constraints; + +switch (P.VU.vsew) { + case e32: { + require_vsha2_vsew32_constraints; + + VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( + {}, + { + // {c, d, g, h} <- vd + EXTRACT_EGU32x4_WORDS_BE(vd, c, d, g, h); + // {a, b, e, f} <- vs2 + EXTRACT_EGU32x4_WORDS_BE(vs2, a, b, e, f); + // {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W + EXTRACT_EGU32x4_WORDS_BE(vs1, kw3, kw2, + UNUSED _unused_kw1, UNUSED _unused_kw0); + + ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw2); + ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw3); + + // Update the destination register, vd <- {a, b, e, f}. + SET_EGU32x4_BE(vd, a, b, e, f); + } + ); + break; + } + + case e64: { + require_vsha2_vsew64_constraints; + + VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP( + {}, + { + // {c, d, g, h} <- vd + EXTRACT_EGU64x4_WORDS_BE(vd, c, d, g, h); + // {a, b, e, f} <- vs2 + EXTRACT_EGU64x4_WORDS_BE(vs2, a, b, e, f); + // {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W + EXTRACT_EGU64x4_WORDS_BE(vs1, kw3, kw2, + UNUSED _unused_kw1, UNUSED _unused_kw0); + + ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw2); + ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw3); + + // Update the destination register, vd <- {a, b, e, f}. + SET_EGU64x4_BE(vd, a, b, e, f); + } + ); + break; + } + + // 'require_vsha2_common_constraints' ensures that + // VSEW is either 32 or 64. + default: + require(false); +} diff --git a/riscv/insns/vsha2cl_vv.h b/riscv/insns/vsha2cl_vv.h new file mode 100644 index 0000000000..4a1df0904b --- /dev/null +++ b/riscv/insns/vsha2cl_vv.h @@ -0,0 +1,62 @@ +// vsha2cl.vv vd, vs2, vs1 + +#include "zvknh_ext_macros.h" + +// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. +require_vsha2_common_constraints; + +switch (P.VU.vsew) { + case e32: { + require_vsha2_vsew32_constraints; + + VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( + {}, + { + // {c, d, g, h} <- vd + EXTRACT_EGU32x4_WORDS_BE(vd, c, d, g, h); + // {a, b, e, f} <- vs2 + EXTRACT_EGU32x4_WORDS_BE(vs2, a, b, e, f); + // {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W + EXTRACT_EGU32x4_WORDS_BE(vs1, UNUSED _unused_kw3, UNUSED _unused_kw2, + kw1, kw0); + + ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw0); + ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw1); + + // Update the destination register, vd <- {a, b, e, f}. + SET_EGU32x4_BE(vd, a, b, e, f); + } + ); + break; + } + + case e64: { + require_vsha2_vsew64_constraints; + + VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP( + {}, + { + // {c, d, g, h} <- vd + EXTRACT_EGU64x4_WORDS_BE(vd, c, d, g, h); + // {a, b, e, f} <- vs2 + EXTRACT_EGU64x4_WORDS_BE(vs2, a, b, e, f); + // {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W + EXTRACT_EGU64x4_WORDS_BE(vs1, UNUSED _unused_kw3, UNUSED _unused_kw2, + kw1, kw0); + + ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw0); + ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw1); + + // Update the destination register, vd <- {a, b, e, f}. + SET_EGU64x4_BE(vd, a, b, e, f); + } + ); + break; + } + + // 'require_vsha2_common_constraints' ensures that + // VSEW is either 32 or 64. + default: + require(false); +} + diff --git a/riscv/insns/vsha2ms_vv.h b/riscv/insns/vsha2ms_vv.h new file mode 100644 index 0000000000..8f1ca085ae --- /dev/null +++ b/riscv/insns/vsha2ms_vv.h @@ -0,0 +1,63 @@ +// vshams.vv vd, vs2, vs1 + +#include "zvknh_ext_macros.h" + +// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. +require_vsha2_common_constraints; + +switch (P.VU.vsew) { + case e32: { + require_vsha2_vsew32_constraints; + + VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( + {}, + { + // {w3, w2, w1, w0} <- vd + EXTRACT_EGU32x4_WORDS_BE(vd, w3, w2, w1, w0); + // {w11, w10, w9, w4} <- vs2 + EXTRACT_EGU32x4_WORDS_BE(vs2, w11, w10, w9, w4); + // {w15, w14, w13, w12} <- vs1 + EXTRACT_EGU32x4_WORDS_BE(vs1, w15, w14, UNUSED _unused_w13, w12); + + const uint32_t w16 = ZVK_SHA256_SCHEDULE(w14, w9, w1, w0); + const uint32_t w17 = ZVK_SHA256_SCHEDULE(w15, w10, w2, w1); + const uint32_t w18 = ZVK_SHA256_SCHEDULE(w16, w11, w3, w2); + const uint32_t w19 = ZVK_SHA256_SCHEDULE(w17, w12, w4, w3); + + // Update the destination register. + SET_EGU32x4_BE(vd, w19, w18, w17, w16);; + } + ); + break; + } + + case e64: { + require_vsha2_vsew64_constraints; + + VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP( + {}, + { + // {w3, w2, w1, w0} <- vd + EXTRACT_EGU64x4_WORDS_BE(vd, w3, w2, w1, w0); + // {w11, w10, w9, w4} <- vs2 + EXTRACT_EGU64x4_WORDS_BE(vs2, w11, w10, w9, w4); + // {w15, w14, w13, w12} <- vs1 + EXTRACT_EGU64x4_WORDS_BE(vs1, w15, w14, UNUSED _unused_w13, w12); + + const uint64_t w16 = ZVK_SHA512_SCHEDULE(w14, w9, w1, w0); + const uint64_t w17 = ZVK_SHA512_SCHEDULE(w15, w10, w2, w1); + const uint64_t w18 = ZVK_SHA512_SCHEDULE(w16, w11, w3, w2); + const uint64_t w19 = ZVK_SHA512_SCHEDULE(w17, w12, w4, w3); + + // Update the destination register. + SET_EGU64x4_BE(vd, w19, w18, w17, w16);; + } + ); + break; + } + + // 'require_vsha2_common_constraints' ensures that + // VSEW is either 32 or 64. + default: + require(false); +} diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 5562c0956d..4ce088f35f 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1368,10 +1368,17 @@ riscv_insn_ext_zvkg= \ vghsh_vv \ vgmul_vv \ +# Covers both Zvknha and Zvkhnb. +riscv_insn_ext_zvknh = \ + vsha2cl_vv \ + vsha2ch_vv \ + vsha2ms_vv \ + riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvbb) \ $(riscv_insn_ext_zvbc) \ $(riscv_insn_ext_zvkg) \ + $(riscv_insn_ext_zvknh) \ riscv_insn_list = \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ diff --git a/riscv/zvknh_ext_macros.h b/riscv/zvknh_ext_macros.h new file mode 100644 index 0000000000..b50818bdae --- /dev/null +++ b/riscv/zvknh_ext_macros.h @@ -0,0 +1,155 @@ +// Helper macros to help implement instructions defined as part of +// the RISC-V Zvknh[ab] extensions (vector SHA-256/SHA-512 cryptography). + +#include "zvk_ext_macros.h" + +#ifndef RISCV_ZVKNH_EXT_MACROS_H_ +#define RISCV_ZVKNH_EXT_MACROS_H_ + +// Constraints common to all vsha* instructions, across all VSEW: +// - VSEW is 32 (SHA-256) or 64 (SHA-512) +// - No overlap of vd with vs1 or vs2. +// +// The constraint that vstart and vl are both EGS (4) aligned +// is checked in the VI_..._EGU32x4_..._LOOP and VI_..._EGU64x4_..._LOOP +// macros. +#define require_vsha2_common_constraints \ + do { \ + require(P.VU.vsew == 32 || P.VU.vsew == 64); \ + require(insn.rd() != insn.rs1()); \ + require(insn.rd() != insn.rs2()); \ + } while (false) + +// Constraints on vsha2 instructions that must be verified when VSEW==32. +// Those are *IN ADDITION* to the constraints checked by +// 'require_vsha2_common_constraints', which is meant to be run earlier. +// +// The constraint that vstart and vl are both EGS (4) aligned +// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. +#define require_vsha2_vsew32_constraints \ + do { \ + require_zvknh_256; \ + require_egw_fits(128); \ + } while (false) + +// Constraints on vsha2 instructions that must be verified when VSEW==32. +// Those are *IN ADDITION* to the constraints checked by +// 'require_vsha2_common_constraints', which is meant to be run earlier. +// +// The constraint that vstart and vl are both EGS (4) aligned +// is checked in the VI_ZVK_..._EGU64x4_..._LOOP macros. +#define require_vsha2_vsew64_constraints \ + do { \ + require_zvknh_512; \ + require_egw_fits(256); \ + } while (false) + +// +// SHA-256 and SHA-512 common logic +// + +// Ch(x, y, z) = (xy) ⊕ (~xz) = xy | ~xz +#define ZVK_SHA_CH(X, Y, Z) (((X) & (Y)) ^ ((~(X)) & (Z))) + +// Maj(x,y,z) = (xy) ⊕ (xz) ⊕(yz) = xy | xz | yz +#define ZVK_SHA_MAJ(X, Y, Z) (((X) & (Y)) ^ ((X) & (Z)) ^ ((Y) & (Z))) + +// +// SHA-256 +// + +// sum0(x) = ROTR2(x) ⊕ ROTR13(x) ⊕ ROTR22(x) +#define ZVK_SHA256_SUM0(X) \ + (ZVK_ROR32(X, 2) ^ ZVK_ROR32(X, 13) ^ ZVK_ROR32(X, 22)) + +// sum1(x) = ROTR6(x) ⊕ ROTR11(x) ⊕ ROTR25(x) +#define ZVK_SHA256_SUM1(X) \ + (ZVK_ROR32(X, 6) ^ ZVK_ROR32(X, 11) ^ ZVK_ROR32(X, 25)) + +// sig0(x) = ROTR7(x) ⊕ ROTR18(x) ⊕ SHR3 (x) +#define ZVK_SHA256_SIG0(X) \ + (ZVK_ROR32(X, 7) ^ ZVK_ROR32(X, 18) ^ ((X) >> 3)) + +// sig1(x) = ROTR17(x) ⊕ ROTR19(x) ⊕ SHR10(x) +#define ZVK_SHA256_SIG1(X) \ + (ZVK_ROR32(X, 17) ^ ZVK_ROR32(X, 19) ^ ((X) >> 10)) + +// Given the schedule words W[t+0], W[t+1], W[t+9], W[t+14], computes +// W[t+16]. +#define ZVK_SHA256_SCHEDULE(W14, W9, W1, W0) \ + (ZVK_SHA256_SIG1(W14) + (W9) + ZVK_SHA256_SIG0(W1) + (W0)) + +// Performs one round of compression (out of the 64 rounds), given the state +// temporaries A,B,C,...,H, and KW, the sum Kt+Wt. +// Updates A,B,C,...,H to their new values. KW is not modified. +// +// Note that some of the logic could be omitted in vshac[ab] since +// some of the variables are dropped in each of those. However removing +// those unnecessary updates reduces the opportunities to share this single +// per-round logic and forces us to move further away from the how the logic +// is expressed in FIPS PUB 180-4. +#define ZVK_SHA256_COMPRESS(A, B, C, D, E, F, G, H, KW) \ + { \ + const uint32_t t1 = (H) + ZVK_SHA256_SUM1(E) + \ + ZVK_SHA_CH((E), (F), (G)) + (KW); \ + const uint32_t t2 = ZVK_SHA256_SUM0(A) + ZVK_SHA_MAJ((A), (B), (C)); \ + (H) = (G); \ + (G) = (F); \ + (F) = (E); \ + (E) = (D) + t1; \ + (D) = (C); \ + (C) = (B); \ + (B) = (A); \ + (A) = t1 + t2; \ + } + +// +// SHA-512 +// + +// sum0(x) = ROTR2(x) ⊕ ROTR13(x) ⊕ ROTR22(x) +#define ZVK_SHA512_SUM0(X) \ + (ZVK_ROR64(X, 28) ^ ZVK_ROR64(X, 34) ^ ZVK_ROR64(X, 39)) + +// sum1(x) = ROTR6(x) ⊕ ROTR11(x) ⊕ ROTR25(x) +#define ZVK_SHA512_SUM1(X) \ + (ZVK_ROR64(X, 14) ^ ZVK_ROR64(X, 18) ^ ZVK_ROR64(X, 41)) + +// sig0(x) = ROTR7(x) ⊕ ROTR18(x) ⊕ SHR3 (x) +#define ZVK_SHA512_SIG0(X) \ + (ZVK_ROR64(X, 1) ^ ZVK_ROR64(X, 8) ^ ((X) >> 7)) + +// sig1(x) = ROTR17(x) ⊕ ROTR19(x) ⊕ SHR10(x) +#define ZVK_SHA512_SIG1(X) \ + (ZVK_ROR64(X, 19) ^ ZVK_ROR64(X, 61) ^ ((X) >> 6)) + +// Given the schedule words W[t+0], W[t+1], W[t+9], W[t+14], computes +// W[t+16]. +#define ZVK_SHA512_SCHEDULE(W14, W9, W1, W0) \ + (ZVK_SHA512_SIG1(W14) + (W9) + ZVK_SHA512_SIG0(W1) + (W0)) + +// Performs one round of compression (out of the 64 rounds), given the state +// temporaries A,B,C,...,H, and KW, the sum Kt+Wt. +// Updates A,B,C,...,H to their new values. KW is not modified. +// +// Note that some of the logic could be omitted in vshac[ab] since +// some of the variables are dropped in each of those. However removing +// those unnecessary updates reduces the opportunities to share this single +// per-round logic and forces us to move further away from the how the logic +// is expressed in FIPS PUB 180-4. +#define ZVK_SHA512_COMPRESS(A, B, C, D, E, F, G, H, KW) \ + { \ + const uint64_t t1 = (H) + ZVK_SHA512_SUM1(E) + \ + ZVK_SHA_CH((E), (F), (G)) + (KW); \ + const uint64_t t2 = ZVK_SHA512_SUM0(A) + ZVK_SHA_MAJ((A), (B), (C)); \ + (H) = (G); \ + (G) = (F); \ + (F) = (E); \ + (E) = (D) + t1; \ + (D) = (C); \ + (C) = (B); \ + (B) = (A); \ + (A) = t1 + t2; \ + } + +#endif // RISCV_ZVKNH_EXT_MACROS_H_ From eadb0e1129c23e709b0565740f0fc1a3359de7b7 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 1 Jun 2023 18:07:38 -0700 Subject: [PATCH 040/127] Zvk: Implement Zvkned, vector AES single round Implement the Zvkned extension, "NIST Suite: Vector AES Encryption & Decryption (Single Round)". - vaeskf1.vi: AES forward key scheduling, AES-128. - vaeskf2.vi: AES forward key scheduling, AES-256. - vaesz.vs: AES encryption/decryption, 0-th round. - vaesdm.{vs,vv}: AES decryption, middle rounds. - vaesdf.{vs,vv}: AES decryption, final round. - vaesem.{vs,vv}: AES encryption, middle rounds. - vaesef.{vs,vv}: AES encryption, final round. An extension specific header containing common logic is added. Co-authored-by: Stanislaw Kardach Signed-off-by: Eric Gouriou --- riscv/insns/vaesdf_vs.h | 43 ++++++ riscv/insns/vaesdf_vv.h | 37 ++++++ riscv/insns/vaesdm_vs.h | 44 +++++++ riscv/insns/vaesdm_vv.h | 38 ++++++ riscv/insns/vaesef_vs.h | 43 ++++++ riscv/insns/vaesef_vv.h | 37 ++++++ riscv/insns/vaesem_vs.h | 44 +++++++ riscv/insns/vaesem_vv.h | 38 ++++++ riscv/insns/vaeskf1_vi.h | 65 +++++++++ riscv/insns/vaeskf2_vi.h | 89 +++++++++++++ riscv/insns/vaesz_vs.h | 24 ++++ riscv/riscv.mk.in | 14 ++ riscv/zvkned_ext_macros.h | 270 ++++++++++++++++++++++++++++++++++++++ 13 files changed, 786 insertions(+) create mode 100644 riscv/insns/vaesdf_vs.h create mode 100644 riscv/insns/vaesdf_vv.h create mode 100644 riscv/insns/vaesdm_vs.h create mode 100644 riscv/insns/vaesdm_vv.h create mode 100644 riscv/insns/vaesef_vs.h create mode 100644 riscv/insns/vaesef_vv.h create mode 100644 riscv/insns/vaesem_vs.h create mode 100644 riscv/insns/vaesem_vv.h create mode 100644 riscv/insns/vaeskf1_vi.h create mode 100644 riscv/insns/vaeskf2_vi.h create mode 100644 riscv/insns/vaesz_vs.h create mode 100644 riscv/zvkned_ext_macros.h diff --git a/riscv/insns/vaesdf_vs.h b/riscv/insns/vaesdf_vs.h new file mode 100644 index 0000000000..a124278477 --- /dev/null +++ b/riscv/insns/vaesdf_vs.h @@ -0,0 +1,43 @@ +// vaesdf.vs vd, vs2 + +#include "zvkned_ext_macros.h" +#include "zvk_ext_macros.h" + +require_vaes_vs_constraints; + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + const EGU8x16_t scalar_key = P.VU.elt_group(vs2_num, 0);, + { + // For AES128, AES192, or AES256, state and key are 128b/16B values: + // - vd contains the input state, + // - vs2 contains the round key, + // - vd does receive the output state. + // + // While the spec calls for handling the vector as made of EGU32x4 + // element groups (i.e., 4 uint32_t), it is convenient to treat + // AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why + // we extract the operands here instead of using the existing LOOP + // macro that defines/extracts the operand variables as EGU32x4. + EGU8x16_t aes_state = P.VU.elt_group(vd_num, idx_eg); + + // InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. + VAES_INV_SHIFT_ROWS(aes_state); + // InvSubBytes - Apply S-box to every byte in the state + VAES_INV_SUB_BYTES(aes_state); + // AddRoundKey (which is also InvAddRoundKey as it's xor) + EGU8x16_XOREQ(aes_state, scalar_key); + // InvMixColumns is not performed in the final round. + + // Update the destination register. + EGU8x16_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU8x16_COPY(vd, aes_state); + } +); diff --git a/riscv/insns/vaesdf_vv.h b/riscv/insns/vaesdf_vv.h new file mode 100644 index 0000000000..9fca5722fb --- /dev/null +++ b/riscv/insns/vaesdf_vv.h @@ -0,0 +1,37 @@ +// vaesdf.vv vd, vs2 + +#include "zvkned_ext_macros.h" +#include "zvk_ext_macros.h" + +require_vaes_vv_constraints; + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + {}, // No PRELOOP. + { + // For AES128, AES192, or AES256, state and key are 128b/16B values: + // - vd in contains the input state, + // - vs2 contains the input round key, + // - vd out receives the output state. + // + // While the spec calls for handling the vector as made of EGU32x4 + // element groups (i.e., 4 uint32_t), it is convenient to treat + // AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why + // we extract the operands here instead of using the existing LOOP + // macro that defines/extracts the operand variables as EGU32x4. + EGU8x16_t aes_state = P.VU.elt_group(vd_num, idx_eg); + const EGU8x16_t round_key = P.VU.elt_group(vs2_num, idx_eg); + + // InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. + VAES_INV_SHIFT_ROWS(aes_state); + // InvSubBytes - Apply S-box to every byte in the state + VAES_INV_SUB_BYTES(aes_state); + // AddRoundKey (which is also InvAddRoundKey as it's xor) + EGU8x16_XOREQ(aes_state, round_key); + // InvMixColumns is not performed in the final round. + + // Update the destination register. + EGU8x16_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU8x16_COPY(vd, aes_state); + } +); diff --git a/riscv/insns/vaesdm_vs.h b/riscv/insns/vaesdm_vs.h new file mode 100644 index 0000000000..3c23e69e93 --- /dev/null +++ b/riscv/insns/vaesdm_vs.h @@ -0,0 +1,44 @@ +// vaesdm.vs vd, vs2 + +#include "zvkned_ext_macros.h" +#include "zvk_ext_macros.h" + +require_vaes_vs_constraints; + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + const EGU8x16_t scalar_key = P.VU.elt_group(vs2_num, 0);, + { + // For AES128, AES192, or AES256, state and key are 128b/16B values: + // - vd in contains the input state, + // - vs2 contains the input round key, + // - vd out receives the output state. + // + // While the spec calls for handling the vector as made of EGU32x4 + // element groups (i.e., 4 uint32_t), it is convenient to treat + // AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why + // we extract the operands here instead of using the existing LOOP + // macro that defines/extracts the operand variables as EGU32x4. + EGU8x16_t aes_state = P.VU.elt_group(vd_num, idx_eg); + + // InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. + VAES_INV_SHIFT_ROWS(aes_state); + // InvSubBytes - Apply S-box to every byte in the state + VAES_INV_SUB_BYTES(aes_state); + // AddRoundKey (which is also InvAddRoundKey as it's xor) + EGU8x16_XOREQ(aes_state, scalar_key); + // InvMixColumns + VAES_INV_MIX_COLUMNS(aes_state); + + // Update the destination register. + EGU8x16_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU8x16_COPY(vd, aes_state); + } +); diff --git a/riscv/insns/vaesdm_vv.h b/riscv/insns/vaesdm_vv.h new file mode 100644 index 0000000000..9c29cd965e --- /dev/null +++ b/riscv/insns/vaesdm_vv.h @@ -0,0 +1,38 @@ +// vaesdm.vv vd, vs2 + +#include "zvkned_ext_macros.h" +#include "zvk_ext_macros.h" + +require_vaes_vv_constraints; + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + {}, // No PRELOOP. + { + // For AES128, AES192, or AES256, state and key are 128b/16B values: + // - vd contains the input state, + // - vs2 contains the round key, + // - vd does receive the output state. + // + // While the spec calls for handling the vector as made of EGU32x4 + // element groups (i.e., 4 uint32_t), it is convenient to treat + // AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why + // we extract the operands here instead of using the existing LOOP + // macro that defines/extracts the operand variables as EGU32x4. + EGU8x16_t aes_state = P.VU.elt_group(vd_num, idx_eg); + const EGU8x16_t round_key = P.VU.elt_group(vs2_num, idx_eg); + + // InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. + VAES_INV_SHIFT_ROWS(aes_state); + // InvSubBytes - Apply S-box to every byte in the state + VAES_INV_SUB_BYTES(aes_state); + // AddRoundKey (which is also InvAddRoundKey as it's xor) + EGU8x16_XOREQ(aes_state, round_key); + // InvMixColumns + VAES_INV_MIX_COLUMNS(aes_state); + + // Update the destination register. + EGU8x16_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU8x16_COPY(vd, aes_state); + } +); diff --git a/riscv/insns/vaesef_vs.h b/riscv/insns/vaesef_vs.h new file mode 100644 index 0000000000..2d32653345 --- /dev/null +++ b/riscv/insns/vaesef_vs.h @@ -0,0 +1,43 @@ +// vaesef.vs vd, vs2 + +#include "zvkned_ext_macros.h" +#include "zvk_ext_macros.h" + +require_vaes_vs_constraints; + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + const EGU8x16_t scalar_key = P.VU.elt_group(vs2_num, 0);, + { + // For AES128, AES192, or AES256, state and key are 128b/16B values: + // - vd contains the input state, + // - vs2 contains the round key, + // - vd receives the output state. + // + // While the spec calls for handling the vector as made of EGU32x4 + // element groups (i.e., 4 uint32_t), it is convenient to treat + // AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why + // we extract the operands here instead of using the existing LOOP + // macro that defines/extracts the operand variables as EGU32x4. + EGU8x16_t aes_state = P.VU.elt_group(vd_num, idx_eg); + + // SubBytes - Apply S-box to every byte in the state + VAES_SUB_BYTES(aes_state); + // ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. + VAES_SHIFT_ROWS(aes_state); + // MixColumns is not performed for the final round. + // AddRoundKey + EGU8x16_XOREQ(aes_state, scalar_key); + + // Update the destination register. + EGU8x16_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU8x16_COPY(vd, aes_state); + } +); diff --git a/riscv/insns/vaesef_vv.h b/riscv/insns/vaesef_vv.h new file mode 100644 index 0000000000..9b43a6d213 --- /dev/null +++ b/riscv/insns/vaesef_vv.h @@ -0,0 +1,37 @@ +// vaesef.vv vd, vs2 + +#include "zvkned_ext_macros.h" +#include "zvk_ext_macros.h" + +require_vaes_vv_constraints; + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + {}, // No PRELOOP. + { + // For AES128, AES192, or AES256, state and key are 128b/16B values: + // - vd contains the input state, + // - vs2 contains the round key, + // - vd receives the output state. + // + // While the spec calls for handling the vector as made of EGU32x4 + // element groups (i.e., 4 uint32_t), it is convenient to treat + // AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why + // we extract the operands here instead of using the existing LOOP + // macro that defines/extracts the operand variables as EGU32x4. + EGU8x16_t aes_state = P.VU.elt_group(vd_num, idx_eg); + const EGU8x16_t round_key = P.VU.elt_group(vs2_num, idx_eg); + + // SubBytes - Apply S-box to every byte in the state + VAES_SUB_BYTES(aes_state); + // ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. + VAES_SHIFT_ROWS(aes_state); + // MixColumns is not performed for the final round. + // AddRoundKey + EGU8x16_XOREQ(aes_state, round_key); + + // Update the destination register. + EGU8x16_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU8x16_COPY(vd, aes_state); + } +); diff --git a/riscv/insns/vaesem_vs.h b/riscv/insns/vaesem_vs.h new file mode 100644 index 0000000000..348cd9f83f --- /dev/null +++ b/riscv/insns/vaesem_vs.h @@ -0,0 +1,44 @@ +// vaesem.vs vd, vs2 + +#include "zvkned_ext_macros.h" +#include "zvk_ext_macros.h" + +require_vaes_vs_constraints; + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + const EGU8x16_t scalar_key = P.VU.elt_group(vs2_num, 0);, + { + // For AES128, AES192, or AES256, state and key are 128b/16B values: + // - vd contains the input state, + // - vs2 contains the round key, + // - vd receives the output state. + // + // While the spec calls for handling the vector as made of EGU32x4 + // element groups (i.e., 4 uint32_t), it is convenient to treat + // AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why + // we extract the operands here instead of using the existing LOOP + // macro that defines/extracts the operand variables as EGU32x4. + EGU8x16_t aes_state = P.VU.elt_group(vd_num, idx_eg); + + // SubBytes - Apply S-box to every byte in the state + VAES_SUB_BYTES(aes_state); + // ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. + VAES_SHIFT_ROWS(aes_state); + // MixColumns + VAES_MIX_COLUMNS(aes_state); + // AddRoundKey + EGU8x16_XOREQ(aes_state, scalar_key); + + // Update the destination register. + EGU8x16_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU8x16_COPY(vd, aes_state); + } +); diff --git a/riscv/insns/vaesem_vv.h b/riscv/insns/vaesem_vv.h new file mode 100644 index 0000000000..34f0056590 --- /dev/null +++ b/riscv/insns/vaesem_vv.h @@ -0,0 +1,38 @@ +// vaesem.vv vd, vs2 + +#include "zvkned_ext_macros.h" +#include "zvk_ext_macros.h" + +require_vaes_vv_constraints; + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + {}, // No PRELOOP. + { + // For AES128, AES192, or AES256, state and key are 128b/16B values: + // - vd contains the input state, + // - vs2 contains the round key, + // - vd receives the output state. + // + // While the spec calls for handling the vector as made of EGU32x4 + // element groups (i.e., 4 uint32_t), it is convenient to treat + // AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why + // we extract the operands here instead of using the existing LOOP + // macro that defines/extracts the operand variables as EGU32x4. + EGU8x16_t aes_state = P.VU.elt_group(vd_num, idx_eg); + const EGU8x16_t round_key = P.VU.elt_group(vs2_num, idx_eg); + + // SubBytes - Apply S-box to every byte in the state + VAES_SUB_BYTES(aes_state); + // ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. + VAES_SHIFT_ROWS(aes_state); + // MixColumns + VAES_MIX_COLUMNS(aes_state); + // AddRoundKey + EGU8x16_XOREQ(aes_state, round_key); + + // Update the destination register. + EGU8x16_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU8x16_COPY(vd, aes_state); + } +); diff --git a/riscv/insns/vaeskf1_vi.h b/riscv/insns/vaeskf1_vi.h new file mode 100644 index 0000000000..28d03d03b1 --- /dev/null +++ b/riscv/insns/vaeskf1_vi.h @@ -0,0 +1,65 @@ +// vaeskf1.vi vd, vs2, rnd + +#include "zvk_ext_macros.h" +#include "zvkned_ext_macros.h" + +require_vaeskf_vi_constraints; + +// There is one round constant for each round number +// between 1 and 10. We index using 'round# -1'. +static constexpr uint8_t kRoundConstants[10] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 +}; + +// For AES128, AES192, or AES256, keys (and state) are handled as +// 128b/16B values. +// +// The Zvkned spec calls for handling the vector as made of EGU32x4 +// element groups (i.e., 4 uint32_t), and FIPS-197 AES specification +// describes the key expansion in terms of manipulations of 32 bit +// words, so using the EGU32x4 is natural. +// +VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP( + {}, + // The following statements will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the 'round' variable + // declared and defined here here to be visible in the loop block. + // Only consider the bottom 4 bits of the immediate. + const reg_t zimm4 = zimm5 & 0xF; + // Normalize the round value to be in [2, 14] by toggling bit 3 + // if outside the range (i.e., +8 or -8). + const reg_t round = ((1 <= zimm4) && (zimm4 <= 10)) ? zimm4 : (zimm4 ^ 0x8); + const uint32_t rcon = kRoundConstants[round - 1];, + // Per Element Group body. + { + // vaeskf1_vi produces key[i+1] in vd, it receives key[i] in vs2, + // i.e., 4x32b values (4 words). + // + // The logic is fairly similar between vaeskf1/vaeskf2, with the following + // differences: + // - in AES-128 (vaeskf1), we get both the 'temp' word and + // the "previous words" w0..w3 from key[i]/vs2. + // - in AES-256 (vaeskf2), we get 'temp' from key[i]/vs2, and + // the "previous words" w0..w3 from key[i-1]/vd. + + // 'temp' is extracted from the last (most significant) word of key[i]. + uint32_t temp = vs2[3]; + temp = (temp >> 8) | (temp << 24); // Rotate right by 8 + temp = (((uint32_t)AES_ENC_SBOX[(temp >> 24) & 0xFF] << 24) | + ((uint32_t)AES_ENC_SBOX[(temp >> 16) & 0xFF] << 16) | + ((uint32_t)AES_ENC_SBOX[(temp >> 8) & 0xFF] << 8) | + ((uint32_t)AES_ENC_SBOX[(temp >> 0) & 0xFF] << 0)); + temp = temp ^ rcon; + + // "old" words are the w[i-Nk] of FIPS-197. They are extracted + // from vs2, which contains key[i] in AES-128 where Nk=4. + const uint32_t w0 = vs2[0] ^ temp; + const uint32_t w1 = vs2[1] ^ w0; + const uint32_t w2 = vs2[2] ^ w1; + const uint32_t w3 = vs2[3] ^ w2; + + // Overwrite vd with k[i+1] from the new words. + SET_EGU32x4_LE(vd, w0, w1, w2, w3); + } +); diff --git a/riscv/insns/vaeskf2_vi.h b/riscv/insns/vaeskf2_vi.h new file mode 100644 index 0000000000..49c2a2db02 --- /dev/null +++ b/riscv/insns/vaeskf2_vi.h @@ -0,0 +1,89 @@ +// vaeskf2.vi vd, vs2, rnd + +#include "zvk_ext_macros.h" +#include "zvkned_ext_macros.h" + +require_vaeskf_vi_constraints; + +// Round Constants +// +// Only the odd rounds need to be encoded, the even ones can use 0 +// or skip the rcon handling. We can use '(round# / 2) - 1' +// (or "(round# >> 1) - 1") to index into the array. +// +// Round# Constant +// [ 2] -> kRoundConstants[0] +// [ 3] -> 0 / Nothing +// [ 4] -> kRoundConstants[1] +// [ 5] -> 0 / Nothing +// [ 6] -> kRoundConstants[2] +// [ 7] -> 0 / Nothing +// ... +// [13] -> 0 / Nothing +// [14] -> kRoundConstants[6] +static constexpr uint8_t kRoundConstants[7] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, +}; + +// For AES128, AES192, or AES256, keys (and state) are handled as +// 128b/16B values. +// +// The Zvkned spec calls for handling the vector as made of EGU32x4 +// element groups (i.e., 4 uint32_t), and FIPS-197 AES specification +// describes the key expansion in terms of manipulations of 32 bit +// words, so using the EGU32x4 is natural. +// +VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP( + {}, + // The following statements will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the 'round' variable + // declared and defined here here to be visible in the loop block. + // Only consider the bottom 4 bits of the immediate. + const reg_t zimm4 = zimm5 & 0xF; + // Normalize the round value to be in [2, 14] by toggling bit 3 + // if outside the range (i.e., +8 or -8). + const reg_t round = ((2 <= zimm4) && (zimm4 <= 14)) ? zimm4 : (zimm4 ^ 0x8);, + // Per Element Group body. + { + // vaeskf2_vi produces key[i+1] in vd, it receives key[i] in vs2, + // i.e., 4x32b values (4 words). + // + // The logic is fairly similar between vaeskf2/vaeskf2, with the following + // differences: + // - in AES-128 (vaeskf1), we get both the 'temp' word and + // the "previous words" w0..w3 from key[i]/vs2. + // - in AES-256 (vaeskf2), we get 'temp' from key[i]/vs2, and + // the "previous words" w0..w3 from key[i-1]/vd. + + // 'temp' is extracted from the last (most significant) word of key[i]. + uint32_t temp = vs2[3]; + // With AES-256, when we have an even round number, we hit the + // Nk > 6 and i mod Nk = 4 + // condition in the FIPS-197 key expansion pseudo-code (Figure 11). + // In those cases we skip RotWord and the round constant is 0. + const bool is_even_round = (round & 0x1) == 0; + if (is_even_round) { + temp = (temp >> 8) | (temp << 24); // Rotate right by 8 + } + temp = (((uint32_t)AES_ENC_SBOX[(temp >> 24) & 0xFF] << 24) | + ((uint32_t)AES_ENC_SBOX[(temp >> 16) & 0xFF] << 16) | + ((uint32_t)AES_ENC_SBOX[(temp >> 8) & 0xFF] << 8) | + ((uint32_t)AES_ENC_SBOX[(temp >> 0) & 0xFF] << 0)); + + if (is_even_round) { + const uint32_t rcon = kRoundConstants[(round >> 1) - 1]; + temp = temp ^ rcon; + } + + // "old" words are the w[i-Nk] of FIPS-197. For AES-256, where Nk=8, + // they are extracted from vd which contains key[i-1]. + const uint32_t w0 = vd[0] ^ temp; + const uint32_t w1 = vd[1] ^ w0; + const uint32_t w2 = vd[2] ^ w1; + const uint32_t w3 = vd[3] ^ w2; + + // Overwrite vd with k[i+1] from the new words. + SET_EGU32x4_LE(vd, w0, w1, w2, w3); + } +); diff --git a/riscv/insns/vaesz_vs.h b/riscv/insns/vaesz_vs.h new file mode 100644 index 0000000000..c3dc931c93 --- /dev/null +++ b/riscv/insns/vaesz_vs.h @@ -0,0 +1,24 @@ +// vaesz.vs vd, vs2 + +#include "zvk_ext_macros.h" +#include "zvkned_ext_macros.h" + +require_vaes_vs_constraints; + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + const EGU8x16_t scalar_key = P.VU.elt_group(vs2_num, 0);, + // Per Element Group body. + { + EGU8x16_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + // Produce vd = vd ^ "common key from vs2". + EGU8x16_XOR(vd, vd, scalar_key); + } +); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 4ce088f35f..2d75662101 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1368,6 +1368,19 @@ riscv_insn_ext_zvkg= \ vghsh_vv \ vgmul_vv \ +riscv_insn_ext_zvkned = \ + vaesdf_vs \ + vaesdf_vv \ + vaesdm_vs \ + vaesdm_vv \ + vaesef_vs \ + vaesef_vv \ + vaesem_vs \ + vaesem_vv \ + vaeskf1_vi \ + vaeskf2_vi \ + vaesz_vs \ + # Covers both Zvknha and Zvkhnb. riscv_insn_ext_zvknh = \ vsha2cl_vv \ @@ -1378,6 +1391,7 @@ riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvbb) \ $(riscv_insn_ext_zvbc) \ $(riscv_insn_ext_zvkg) \ + $(riscv_insn_ext_zvkned) \ $(riscv_insn_ext_zvknh) \ riscv_insn_list = \ diff --git a/riscv/zvkned_ext_macros.h b/riscv/zvkned_ext_macros.h new file mode 100644 index 0000000000..db705c71e5 --- /dev/null +++ b/riscv/zvkned_ext_macros.h @@ -0,0 +1,270 @@ +// Helper macros to help implement instructions defined as part of +// the RISC-V Zvkned extension (vector AES single round). + +#include "insns/aes_common.h" + +#ifndef RISCV_ZVKNED_EXT_MACROS_H_ +#define RISCV_ZVKNED_EXT_MACROS_H_ + +// vaes*.vs instruction constraints: +// - Zvkned is enabled +// - EGW (128) <= LMUL * VLEN +// - vd and vs2 cannot overlap +// +// The constraint that vstart and vl are both EGS (4) aligned +// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. +#define require_vaes_vs_constraints \ + do { \ + require_zvkned; \ + require(P.VU.vsew == 32); \ + require_egw_fits(128); \ + require(insn.rd() != insn.rs2()); \ + } while (false) + +// vaes*.vv instruction constraints. Those are the same as the .vs ones, +// except for the overlap constraint that is not present for .vv variants. +// - Zvkned is enabled +// - EGW (128) <= LMUL * VLEN +// +// The constraint that vstart and vl are both EGS (4) aligned +// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. +#define require_vaes_vv_constraints \ + do { \ + require_zvkned; \ + require(P.VU.vsew == 32); \ + require_egw_fits(128); \ + } while (false) + +// vaeskf*.vi instruction constraints. Those are the same as the .vv ones. +#define require_vaeskf_vi_constraints \ + do { \ + require_zvkned; \ + require(P.VU.vsew == 32); \ + require_egw_fits(128); \ + } while (false) + +#define VAES_XTIME(A) (((A) << 1) ^ (((A) & 0x80) ? 0x1b : 0)) + +#define VAES_GFMUL(A, B) \ + ((((B) & 0x1) ? (A) : 0) ^ \ + (((B) & 0x2) ? VAES_XTIME(A) : 0) ^ \ + (((B) & 0x4) ? VAES_XTIME(VAES_XTIME(A)) : 0) ^ \ + (((B) & 0x8) ? VAES_XTIME(VAES_XTIME(VAES_XTIME(A))) : 0)) + +// Apply the S-box transform to every byte in the VAESState 'state' +#define VAES_SUB_BYTES(STATE) \ + do { \ + static constexpr uint8_t kVAESXEncSBox[256]= { \ + 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, \ + 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, \ + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, \ + 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, \ + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, \ + 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, \ + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, \ + 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, \ + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, \ + 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, \ + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, \ + 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, \ + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, \ + 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, \ + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, \ + 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, \ + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, \ + 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, \ + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, \ + 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, \ + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, \ + 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, \ + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, \ + 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, \ + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, \ + 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, \ + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, \ + 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, \ + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, \ + 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, \ + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, \ + 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16, \ + }; \ + for (uint8_t& byte : (STATE)) { \ + byte = kVAESXEncSBox[byte]; \ + } \ + } while (0) + +// Applies the S-box inverse (decode) transform to every byte +// in the VAESState 'state'. +#define VAES_INV_SUB_BYTES(STATE) \ + do { \ + static constexpr uint8_t kVAESXDecSBox[256] = { \ + 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, \ + 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, \ + 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, \ + 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, \ + 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, \ + 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, \ + 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, \ + 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, \ + 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, \ + 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, \ + 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, \ + 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, \ + 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, \ + 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, \ + 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, \ + 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, \ + 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, \ + 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, \ + 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, \ + 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E, \ + 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, \ + 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, \ + 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, \ + 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, \ + 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, \ + 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, \ + 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, \ + 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, \ + 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, \ + 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, \ + 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, \ + 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D, \ + }; \ + for (uint8_t &byte : (STATE)) { \ + byte = kVAESXDecSBox[byte]; \ + } \ + } while (0) + +// Shift the state rows, as specified in ShiftRows. +// 'STATE' is a VAESState value. +#define VAES_SHIFT_ROWS(STATE) \ + do { \ + uint8_t temp; \ + /* Row 0 (byte indices 0, 4, 8, 12) does not rotate. */ \ + /* Row 1 (byte indices 1, 5, 9, 13) rotates left by 1 position. */ \ + temp = (STATE)[1]; \ + (STATE)[ 1] = (STATE)[ 5]; \ + (STATE)[ 5] = (STATE)[ 9]; \ + (STATE)[ 9] = (STATE)[13]; \ + (STATE)[13] = temp; \ + /* Row 2 (byte indices 2, 6, 10, 14) rotates by 2 positions. */ \ + temp = (STATE)[2]; \ + (STATE)[ 2] = (STATE)[10]; \ + (STATE)[10] = temp; \ + temp = (STATE)[6]; \ + (STATE)[ 6] = (STATE)[14]; \ + (STATE)[14] = temp; \ + /* Row 3 (byte indices 3, 7, 11, 15) rotates by 3 position (or -1). */ \ + temp = (STATE)[3]; \ + (STATE)[ 3] = (STATE)[15]; \ + (STATE)[15] = (STATE)[11]; \ + (STATE)[11] = (STATE)[ 7]; \ + (STATE)[ 7] = temp; \ + } while (0) + +// Shifts the state rows, as specified in InvShiftRows. +// 'STATE' is a VAESState value. +#define VAES_INV_SHIFT_ROWS(STATE) \ + do { \ + uint8_t temp; \ + /* Row 0 (byte indices 0, 4, 8, 12) does not rotate. */ \ + /* Row 1 (byte indices 1, 5, 9, 13) rotates left by 1 position. */ \ + temp = (STATE)[1]; \ + (STATE)[ 1] = (STATE)[13]; \ + (STATE)[13] = (STATE)[ 9]; \ + (STATE)[ 9] = (STATE)[ 5]; \ + (STATE)[ 5] = temp; \ + /* Row 2 (byte indices 2, 6, 10, 14) rotates by 2 positions. */ \ + temp = (STATE)[2]; \ + (STATE)[ 2] = (STATE)[10]; \ + (STATE)[10] = temp; \ + temp = (STATE)[6]; \ + (STATE)[ 6] = (STATE)[14]; \ + (STATE)[14] = temp; \ + /* Row 3 (byte indices 3, 7, 11, 15) rotates by 3 position (or -1). */ \ + temp = (STATE)[3]; \ + (STATE)[ 3] = (STATE)[ 7]; \ + (STATE)[ 7] = (STATE)[11]; \ + (STATE)[11] = (STATE)[15]; \ + (STATE)[15] = temp; \ + } while (0) + +// Implements the function producing one byte, one-fourth of the column +// transformation MixColumns() specified in FIPS-197 5.1.3 . +// +// The arguments are all bytes (i.e., uint8_t). The function implemented +// is +// F(A, B, C, D) = (2 . A) xor (3 . B) xor C xor D +// where '.' denotes the Galois Field multiplication over 2**8. +// +#define VAES_MIX_COLUMN_BYTE(A, B, C, D) \ + (VAES_GFMUL((A), 0x2) ^ VAES_GFMUL((B), 0x3) ^ (C) ^ (D)) + +// Implements the function producing one byte, one-fourth of the column +// transformation InvMixColumns() specified in FIPS-197 5.3.3 . +// +// The arguments are all bytes (i.e., uint8_t). The function implemented +// is +// F(A, B, C, D) = (0xE . A) xor (0xB . B) xor (0xD . C) xor (0x9 . D) +// where '.' denotes the Galois Field multiplication over 2**8. +// +#define VAES_INV_MIX_COLUMN_BYTE(A, B, C, D) \ + (VAES_GFMUL((A), 0xE) ^ \ + VAES_GFMUL((B), 0xB) ^ \ + VAES_GFMUL((C), 0xD) ^ \ + VAES_GFMUL((D), 0x9)) + +// Given a column as a uin32_t (4 Bytes), produces the mixed column +// as a uin32_t. +#define VAES_MIX_COLUMN(STATE, COL_IDX) \ + do { \ + uint8_t *column = &(STATE)[(COL_IDX) * 4]; \ + /* Extract the bytes, before we start overwriting them */ \ + const uint8_t b0 = column[0]; \ + const uint8_t b1 = column[1]; \ + const uint8_t b2 = column[2]; \ + const uint8_t b3 = column[3]; \ + /* Every iteration rotates the byte indices by 1 */ \ + column[0] = VAES_MIX_COLUMN_BYTE(b0, b1, b2, b3); \ + column[1] = VAES_MIX_COLUMN_BYTE(b1, b2, b3, b0); \ + column[2] = VAES_MIX_COLUMN_BYTE(b2, b3, b0, b1); \ + column[3] = VAES_MIX_COLUMN_BYTE(b3, b0, b1, b2); \ + } while (0) + +// Given a column as a uin32_t (4 Bytes), produces the inverse +// mixed column as a uin32_t. +#define VAES_INV_MIX_COLUMN(STATE, COL_IDX) \ + do { \ + uint8_t *column = &(STATE)[(COL_IDX) * 4]; \ + /* Extract the bytes, before we start overwriting them */ \ + const uint8_t b0 = column[0]; \ + const uint8_t b1 = column[1]; \ + const uint8_t b2 = column[2]; \ + const uint8_t b3 = column[3]; \ + /* Every iteration rotates the byte indices by 1 */ \ + column[0] = VAES_INV_MIX_COLUMN_BYTE(b0, b1, b2, b3); \ + column[1] = VAES_INV_MIX_COLUMN_BYTE(b1, b2, b3, b0); \ + column[2] = VAES_INV_MIX_COLUMN_BYTE(b2, b3, b0, b1); \ + column[3] = VAES_INV_MIX_COLUMN_BYTE(b3, b0, b1, b2); \ + } while (0) + +// Implements MixColumns as defined in FIPS-197 5.1.3. +#define VAES_MIX_COLUMNS(STATE) \ + do { \ + VAES_MIX_COLUMN((STATE), 0); \ + VAES_MIX_COLUMN((STATE), 1); \ + VAES_MIX_COLUMN((STATE), 2); \ + VAES_MIX_COLUMN((STATE), 3); \ + } while (0) + +// Implements InvMixColumns as defined in FIPS-197 5.3.3. +#define VAES_INV_MIX_COLUMNS(STATE) \ + do { \ + VAES_INV_MIX_COLUMN((STATE), 0); \ + VAES_INV_MIX_COLUMN((STATE), 1); \ + VAES_INV_MIX_COLUMN((STATE), 2); \ + VAES_INV_MIX_COLUMN((STATE), 3); \ + } while (0) + +#endif // RISCV_ZVKNED_EXT_MACROS_H_ From cbb2b1a224d8922c6d3146da56f5087a3858ced5 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 1 Jun 2023 18:07:53 -0700 Subject: [PATCH 041/127] Zvk: Implement Zvksed, vector SM4 Block Cipher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the Zvksed sub-extension, "ShangMi Suite: SM4 Block Cipher": - vsm4k.vi, vector SM4 key expansion, - vsm4r.{vs,vv}, vector SM4 rounds. This also introduces a header for common vector SM4 logic. Co-authored-by: Raghav Gupta Co-authored-by: Albert Jakieła Signed-off-by: Eric Gouriou --- riscv/insns/sm4_common.h | 1 - riscv/insns/vsm4k_vi.h | 52 +++++++++++++++++++++++++++++++++ riscv/insns/vsm4r_vs.h | 51 +++++++++++++++++++++++++++++++++ riscv/insns/vsm4r_vv.h | 37 ++++++++++++++++++++++++ riscv/riscv.mk.in | 6 ++++ riscv/zvksed_ext_macros.h | 60 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 riscv/insns/vsm4k_vi.h create mode 100644 riscv/insns/vsm4r_vs.h create mode 100644 riscv/insns/vsm4r_vv.h create mode 100644 riscv/zvksed_ext_macros.h diff --git a/riscv/insns/sm4_common.h b/riscv/insns/sm4_common.h index 17f129f0ad..24d6ce1d05 100644 --- a/riscv/insns/sm4_common.h +++ b/riscv/insns/sm4_common.h @@ -24,4 +24,3 @@ static const uint8_t sm4_sbox[256] = { 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 }; - diff --git a/riscv/insns/vsm4k_vi.h b/riscv/insns/vsm4k_vi.h new file mode 100644 index 0000000000..8f52e68199 --- /dev/null +++ b/riscv/insns/vsm4k_vi.h @@ -0,0 +1,52 @@ +// vsm4k.vi vd, vs2, round# + +#include "zvksed_ext_macros.h" + +// SM4 Constant Key (CK) - section 7.3.2. of the IETF draft. +static constexpr uint32_t zvksed_ck[32] = { + 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269, + 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9, + 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249, + 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9, + 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229, + 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299, + 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209, + 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 +}; + +require_vsm4_constraints; + +VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP( + {}, + // The following statements will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the 'round' variable + // declared and defined here here to be visible in the loop block. + // Only consider the bottom 3 bits of the immediate, ensuring that + // 'round' is in the valid range [0, 7]. + const reg_t round = zimm5 & 0x7;, + // Per Element Group body. + { + // {rk0, rk1, rk2, rk3} <- vs2 + EXTRACT_EGU32x4_WORDS_LE(vs2, rk0, rk1, rk2, rk3); + + uint32_t B = rk1 ^ rk2 ^ rk3 ^ zvksed_ck[4 * round]; + uint32_t S = ZVKSED_SUB_BYTES(B); + uint32_t rk4 = ZVKSED_ROUND_KEY(rk0, S); + + B = rk2 ^ rk3 ^ rk4 ^ zvksed_ck[4 * round + 1]; + S = ZVKSED_SUB_BYTES(B); + uint32_t rk5 = ZVKSED_ROUND_KEY(rk1, S); + + B = rk3 ^ rk4 ^ rk5 ^ zvksed_ck[4 * round + 2]; + S = ZVKSED_SUB_BYTES(B); + uint32_t rk6 = ZVKSED_ROUND_KEY(rk2, S); + + B = rk4 ^ rk5 ^ rk6 ^ zvksed_ck[4 * round + 3]; + S = ZVKSED_SUB_BYTES(B); + uint32_t rk7 = ZVKSED_ROUND_KEY(rk3, S); + + // Update the destination register. + SET_EGU32x4_LE(vd, rk4, rk5, rk6, rk7); + } +); diff --git a/riscv/insns/vsm4r_vs.h b/riscv/insns/vsm4r_vs.h new file mode 100644 index 0000000000..44011eb544 --- /dev/null +++ b/riscv/insns/vsm4r_vs.h @@ -0,0 +1,51 @@ +// vsm4r.vs vd, vs2 + +#include "zvksed_ext_macros.h" + +require_vsm4_constraints; +// No overlap of vd and vs2. +require(insn.rd() != insn.rs2()); + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + const EGU32x4_t scalar_key = P.VU.elt_group(vs2_num, 0); + const uint32_t rk0 = scalar_key[0]; + const uint32_t rk1 = scalar_key[1]; + const uint32_t rk2 = scalar_key[2]; + const uint32_t rk3 = scalar_key[3];, + { + EGU32x4_t &state = P.VU.elt_group(vd_num, idx_eg, true); + + // {x0, x1,x2, x3} <- vd + EXTRACT_EGU32x4_WORDS_LE(state, x0, x1, x2, x3); + + uint32_t B; + uint32_t S; + + B = x1 ^ x2 ^ x3 ^ rk0; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x4 = ZVKSED_ROUND(x0, S); + + B = x2 ^ x3 ^ x4 ^ rk1; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x5 = ZVKSED_ROUND(x1, S); + + B = x3 ^ x4 ^ x5 ^ rk2; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x6 = ZVKSED_ROUND(x2, S); + + B = x4 ^ x5 ^ x6 ^ rk3; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x7 = ZVKSED_ROUND(x3, S); + + // Update the destination register. + SET_EGU32x4_LE(state, x4, x5, x6, x7); + } +); diff --git a/riscv/insns/vsm4r_vv.h b/riscv/insns/vsm4r_vv.h new file mode 100644 index 0000000000..9a18cecee0 --- /dev/null +++ b/riscv/insns/vsm4r_vv.h @@ -0,0 +1,37 @@ +// vsm4r.vv vd, vs2 + +#include "zvksed_ext_macros.h" + +require_vsm4_constraints; + +VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( + {}, + { + // vd = {x0, x1,x2, x3} <- vd + EXTRACT_EGU32x4_WORDS_LE(vd, x0, x1, x2, x3); + // {rk0, rk1, rk2, rk3} <- vs2 + EXTRACT_EGU32x4_WORDS_LE(vs2, rk0, rk1, rk2, rk3); + + uint32_t B; + uint32_t S; + + B = x1 ^ x2 ^ x3 ^ rk0; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x4 = ZVKSED_ROUND(x0, S); + + B = x2 ^ x3 ^ x4 ^ rk1; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x5 = ZVKSED_ROUND(x1, S); + + B = x3 ^ x4 ^ x5 ^ rk2; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x6 = ZVKSED_ROUND(x2, S); + + B = x4 ^ x5 ^ x6 ^ rk3; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x7 = ZVKSED_ROUND(x3, S); + + // Update the destination register. + SET_EGU32x4_LE(vd, x4, x5, x6, x7); + } +); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 2d75662101..c774e1bf28 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1387,12 +1387,18 @@ riscv_insn_ext_zvknh = \ vsha2ch_vv \ vsha2ms_vv \ +riscv_insn_ext_zvksed = \ + vsm4k_vi \ + vsm4r_vs \ + vsm4r_vv \ + riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvbb) \ $(riscv_insn_ext_zvbc) \ $(riscv_insn_ext_zvkg) \ $(riscv_insn_ext_zvkned) \ $(riscv_insn_ext_zvknh) \ + $(riscv_insn_ext_zvksed) \ riscv_insn_list = \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ diff --git a/riscv/zvksed_ext_macros.h b/riscv/zvksed_ext_macros.h new file mode 100644 index 0000000000..46e399b904 --- /dev/null +++ b/riscv/zvksed_ext_macros.h @@ -0,0 +1,60 @@ +// Helper macros and functions to help implement instructions defined as part of +// the RISC-V Zvksed extension (vectorized SM4). + +#include "insns/sm4_common.h" +#include "zvk_ext_macros.h" + +#ifndef RISCV_ZVKSED_MACROS_H_ +#define RISCV_ZVKSED_MACROS_H_ + +// Constraints common to all vsm4* instructions: +// - Zvksed is enabled +// - VSEW == 32 +// - EGW (128) <= LMUL * VLEN +// +// The constraint that vstart and vl are both EGS (4) aligned +// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. +#define require_vsm4_constraints \ + do { \ + require_zvksed; \ + require(P.VU.vsew == 32); \ + require_egw_fits(128); \ + } while (false) + +// Returns a uint32_t value constructed from the 4 bytes (uint8_t) +// provided in "Little Endian" (LE) order, i.e., from least significant (B0) +// to most significant (B3). +#define ZVKSED_U32_FROM_U8_LE(B0, B1, B2, B3) \ + (((uint32_t)(B0)) << 0 | \ + ((uint32_t)(B1)) << 8 | \ + ((uint32_t)(B2)) << 16 | \ + ((uint32_t)(B3)) << 24) + +// Get byte BYTE of the SBox. +#define ZVKSED_SBOX(BYTE) (sm4_sbox[(BYTE)]) + +// Given an unsigned integer value 'X' and a byte index, +// returns a uint8_t value for the byte at the given index. +#define ZVKSED_EXTRACT_U8(X, BYTE_IDX) ((uint8_t)((X) >> (BYTE_IDX * 8))) + +// Apply the nonlinear transformation tau to a 32 bit word B - section 6.2.1. +// of the IETF draft. +#define ZVKSED_SUB_BYTES(B) \ + ZVKSED_U32_FROM_U8_LE(ZVKSED_SBOX(ZVKSED_EXTRACT_U8((B), 0)), \ + ZVKSED_SBOX(ZVKSED_EXTRACT_U8((B), 1)), \ + ZVKSED_SBOX(ZVKSED_EXTRACT_U8((B), 2)), \ + ZVKSED_SBOX(ZVKSED_EXTRACT_U8((B), 3))) + +// Perform the linear transformation L to a 32 bit word S and xor it with a 32 +// bit word X - section 6.2.2. of the IETF draft. +#define ZVKSED_ROUND(X, S) \ + ((X) ^ \ + ((S) ^ ZVK_ROL32((S), 2) ^ ZVK_ROL32((S), 10) ^ \ + ZVK_ROL32((S), 18) ^ ZVK_ROL32((S), 24))) + +// Perform the linear transformation L' to a 32 bit word S and xor it with a 32 +// bit word X - section 6.2.2. of the IETF draft. +#define ZVKSED_ROUND_KEY(X, S) \ + ((X) ^ ((S) ^ ZVK_ROL32((S), 13) ^ ZVK_ROL32((S), 23))) + +#endif // RISCV_ZVKSED_MACROS_H_ From a55f96ae9380d5cc9bef05e8b9e82e54d5d6ec35 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 1 Jun 2023 18:09:07 -0700 Subject: [PATCH 042/127] Zvk: Implement Zvksh, vector SM3 Hash Function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the Zvksh sub-extension, "ShangMi Suite: SM3 Hash Function Instructions": - vsm3me.vv, message expansion, - vsm3c.vi, compression rounds. This also introduces a SM3 specific header for common logic. Co-authored-by: Raghav Gupta Co-authored-by: Albert Jakieła Co-authored-by: Kornel Dulęba Signed-off-by: Eric Gouriou --- riscv/insns/vsm3c_vi.h | 60 ++++++++++++++++++++++++++++++++++++++++ riscv/insns/vsm3me_vv.h | 39 ++++++++++++++++++++++++++ riscv/riscv.mk.in | 5 ++++ riscv/zvksh_ext_macros.h | 47 +++++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+) create mode 100644 riscv/insns/vsm3c_vi.h create mode 100644 riscv/insns/vsm3me_vv.h create mode 100644 riscv/zvksh_ext_macros.h diff --git a/riscv/insns/vsm3c_vi.h b/riscv/insns/vsm3c_vi.h new file mode 100644 index 0000000000..b3e81216f8 --- /dev/null +++ b/riscv/insns/vsm3c_vi.h @@ -0,0 +1,60 @@ +// vsm3c.vi vd, vs2, rnd + +#include "zvksh_ext_macros.h" + +require_vsm3_constraints; + +VI_ZVK_VD_VS2_ZIMM5_EGU32x8_NOVM_LOOP( + {}, + // No need to validate or normalize 'zimm5' here as this is a 5 bits value + // and all values in 0-31 are valid. + const reg_t round = zimm5;, + { + // {H, G, F, E, D, C, B, A} <- vd + EXTRACT_EGU32x8_WORDS_BE_BSWAP(vd, H, G, F, E, D, C, B, A); + // {_, _, w5, w4, _, _, w1, w0} <- vs2 + EXTRACT_EGU32x8_WORDS_BE_BSWAP(vs2, + UNUSED _unused_w7, UNUSED _unused_w6, w5, w4, + UNUSED _unused_w3, UNUSED _unused_w2, w1, w0); + const uint32_t x0 = w0 ^ w4; // W'[0] in spec documentation. + const uint32_t x1 = w1 ^ w5; // W'[1] + + // Two rounds of compression. + uint32_t ss1; + uint32_t ss2; + uint32_t tt1; + uint32_t tt2; + uint32_t j; + + j = 2 * round; + ss1 = ZVK_ROL32(ZVK_ROL32(A, 12) + E + ZVK_ROL32(ZVKSH_T(j), j % 32), 7); + ss2 = ss1 ^ ZVK_ROL32(A, 12); + tt1 = ZVKSH_FF(A, B, C, j) + D + ss2 + x0; + tt2 = ZVKSH_GG(E, F, G, j) + H + ss1 + w0; + D = C; + const uint32_t C1 = ZVK_ROL32(B, 9); + B = A; + const uint32_t A1 = tt1; + H = G; + const uint32_t G1 = ZVK_ROL32(F, 19); + F = E; + const uint32_t E1 = ZVKSH_P0(tt2); + + j = 2 * round + 1; + ss1 = ZVK_ROL32(ZVK_ROL32(A1, 12) + E1 + ZVK_ROL32(ZVKSH_T(j), j % 32), 7); + ss2 = ss1 ^ ZVK_ROL32(A1, 12); + tt1 = ZVKSH_FF(A1, B, C1, j) + D + ss2 + x1; + tt2 = ZVKSH_GG(E1, F, G1, j) + H + ss1 + w1; + D = C1; + const uint32_t C2 = ZVK_ROL32(B, 9); + B = A1; + const uint32_t A2 = tt1; + H = G1; + const uint32_t G2 = ZVK_ROL32(F, 19); + F = E1; + const uint32_t E2 = ZVKSH_P0(tt2); + + // Update the destination register. + SET_EGU32x8_WORDS_BE_BSWAP(vd, G1, G2, E1, E2, C1, C2, A1, A2); + } +); diff --git a/riscv/insns/vsm3me_vv.h b/riscv/insns/vsm3me_vv.h new file mode 100644 index 0000000000..dd6cb523f2 --- /dev/null +++ b/riscv/insns/vsm3me_vv.h @@ -0,0 +1,39 @@ +// vsm3me.vv vd, vs2, vs1 + +#include "zvk_ext_macros.h" +#include "zvksh_ext_macros.h" + +// Per the SM3 spec, the message expansion computes new words Wi as: +// W[i] = ( P_1( W[i-16] xor W[i-9] xor ( W[i-3] <<< 15 ) ) +// xor ( W[i-13] <<< 7 ) +// xor W[i-6])) +// Using arguments M16 = W[i-16], M9 = W[i-9], etc., +// where Mk stands for "W[i Minus k]", we define the "W function": +#define ZVKSH_W(M16, M9, M3, M13, M6) \ + (ZVKSH_P1((M16) ^ (M9) ^ ZVK_ROL32((M3), 15)) ^ ZVK_ROL32((M13), 7) ^ (M6)) + +require_vsm3_constraints; + +VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP( + {}, + { + // {w7, w6, w5, w4, w3, w2, w1, w0} <- vs1 + EXTRACT_EGU32x8_WORDS_BE_BSWAP(vs1, w7, w6, w5, w4, w3, w2, w1, w0); + // {w15, w14, w13, w12, w11, w10, w9, w8} <- vs2 + EXTRACT_EGU32x8_WORDS_BE_BSWAP(vs2, w15, w14, w13, w12, w11, w10, w9, w8); + + // Arguments are W[i-16], W[i-9], W[i-13], W[i-6]. + // Note that some of the newly computed words are used in later invocations. + const uint32_t w16 = ZVKSH_W(w0, w7, w13, w3, w10); + const uint32_t w17 = ZVKSH_W(w1, w8, w14, w4, w11); + const uint32_t w18 = ZVKSH_W(w2, w9, w15, w5, w12); + const uint32_t w19 = ZVKSH_W(w3, w10, w16, w6, w13); + const uint32_t w20 = ZVKSH_W(w4, w11, w17, w7, w14); + const uint32_t w21 = ZVKSH_W(w5, w12, w18, w8, w15); + const uint32_t w22 = ZVKSH_W(w6, w13, w19, w9, w16); + const uint32_t w23 = ZVKSH_W(w7, w14, w20, w10, w17); + + // Update the destination register. + SET_EGU32x8_WORDS_BE_BSWAP(vd, w23, w22, w21, w20, w19, w18, w17, w16); + } +); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index c774e1bf28..a3e125f5d8 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1392,6 +1392,10 @@ riscv_insn_ext_zvksed = \ vsm4r_vs \ vsm4r_vv \ +riscv_insn_ext_zvksh = \ + vsm3c_vi \ + vsm3me_vv \ + riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvbb) \ $(riscv_insn_ext_zvbc) \ @@ -1399,6 +1403,7 @@ riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvkned) \ $(riscv_insn_ext_zvknh) \ $(riscv_insn_ext_zvksed) \ + $(riscv_insn_ext_zvksh) \ riscv_insn_list = \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ diff --git a/riscv/zvksh_ext_macros.h b/riscv/zvksh_ext_macros.h new file mode 100644 index 0000000000..71c5a09149 --- /dev/null +++ b/riscv/zvksh_ext_macros.h @@ -0,0 +1,47 @@ +// Helper macros and functions to help implement instructions defined as part of +// the RISC-V Zvksh extension (vectorized SM3). + +#include "zvk_ext_macros.h" + +#ifndef RISCV_INSNS_ZVKSH_COMMON_H_ +#define RISCV_INSNS_ZVKSH_COMMON_H_ + +// Constraints common to all vsm3* instructions: +// - Zvksh is enabled +// - VSEW == 32 +// - EGW (256) <= LMUL * VLEN +// - No overlap of vd and vs2. +// +// The constraint that vstart and vl are both EGS (8) aligned +// is checked in the VI_ZVK_..._EGU32x8_..._LOOP macros. +#define require_vsm3_constraints \ + do { \ + require_zvksh; \ + require(P.VU.vsew == 32); \ + require_egw_fits(256); \ + require(insn.rd() != insn.rs2()); \ + } while (false) + +#define FF1(X, Y, Z) ((X) ^ (Y) ^ (Z)) +#define FF2(X, Y, Z) (((X) & (Y)) | ((X) & (Z)) | ((Y) & (Z))) + +// Boolean function FF_j - section 4.3. of the IETF draft. +#define ZVKSH_FF(X, Y, Z, J) (((J) <= 15) ? FF1(X, Y, Z) : FF2(X, Y, Z)) + +#define GG1(X, Y, Z) ((X) ^ (Y) ^ (Z)) +#define GG2(X, Y, Z) (((X) & (Y)) | ((~(X)) & (Z))) + +// Boolean function GG_j - section 4.3. of the IETF draft. +#define ZVKSH_GG(X, Y, Z, J) (((J) <= 15) ? GG1(X, Y, Z) : GG2(X, Y, Z)) + +#define T1 0x79CC4519 +#define T2 0x7A879D8A + +// T_j constant - section 4.2. of the IETF draft. +#define ZVKSH_T(J) (((J) <= 15) ? (T1) : (T2)) + +// Permutation functions P_0 and P_1 - section 4.4 of the IETF draft. +#define ZVKSH_P0(X) ((X) ^ ZVK_ROL32((X), 9) ^ ZVK_ROL32((X), 17)) +#define ZVKSH_P1(X) ((X) ^ ZVK_ROL32((X), 15) ^ ZVK_ROL32((X), 23)) + +#endif // RISCV_INSNS_ZVKSH_COMMON_H From e1101a13ae8ba0effea1f4647da52cb1c273a105 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Tue, 20 Jun 2023 00:21:42 -0700 Subject: [PATCH 043/127] Zvk: disassembler support Add disassembler support for all instructions in Zvk extensions: - Zvbb (bitmanip) - Zvbc (carryless multiplication) - Zvkg (GMAC) - Zvkned (AES) - Zvknha / Zvknhb (SHA-256, SHA-512) - Zvksed (SM4) - Zvksh (SM3) Macros are used to limit code duplication, following the example of the base V extension. Because the V extension undefines some of its macros after their use, there Zvk support does define some similar macros. Co-authored-by: Gianluca Guida Signed-off-by: Eric Gouriou --- disasm/disasm.cc | 98 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 096c38f0f3..6f93d241ec 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -187,6 +187,12 @@ struct : public arg_t { } } zimm5; +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string(insn.v_zimm6()); + } +} v_zimm6; + struct : public arg_t { std::string to_string(insn_t insn) const { int32_t target = insn.sb_imm(); @@ -678,6 +684,11 @@ static void NOINLINE add_vector_viu_insn(disassembler_t* d, const char* name, ui d->add_insn(new disasm_insn_t(name, match, mask, {&vd, &vs2, &zimm5, opt, &vm})); } +static void NOINLINE add_vector_viu_z6_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) +{ + d->add_insn(new disasm_insn_t(name, match, mask, {&vd, &vs2, &v_zimm6, opt, &vm})); +} + static void NOINLINE add_vector_vvm_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) { d->add_insn(new disasm_insn_t(name, match, mask, {&vd, &vs2, &vs1, &v0})); @@ -2222,6 +2233,93 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DEFINE_R1TYPE(sm3p1); } + if (isa->extension_enabled(EXT_ZVBB)) { +#define DEFINE_VECTOR_VIU_ZIMM6(code) \ + add_vector_viu_z6_insn(this, #code, match_##code, mask_##code) +#define DISASM_VECTOR_VV_VX(name) \ + DEFINE_VECTOR_VV(name##_vv); \ + DEFINE_VECTOR_VX(name##_vx) +#define DISASM_VECTOR_VV_VX_VIU(name) \ + DEFINE_VECTOR_VV(name##_vv); \ + DEFINE_VECTOR_VX(name##_vx); \ + DEFINE_VECTOR_VIU(name##_vx) +#define DISASM_VECTOR_VV_VX_VIU_ZIMM6(name) \ + DEFINE_VECTOR_VV(name##_vv); \ + DEFINE_VECTOR_VX(name##_vx); \ + DEFINE_VECTOR_VIU_ZIMM6(name##_vi) + + DISASM_VECTOR_VV_VX(vandn); + DEFINE_VECTOR_V(vbrev_v); + DEFINE_VECTOR_V(vbrev8_v); + DEFINE_VECTOR_V(vrev8_v); + DEFINE_VECTOR_V(vclz_v); + DEFINE_VECTOR_V(vctz_v); + DEFINE_VECTOR_V(vcpop_v); + DISASM_VECTOR_VV_VX(vrol); + DISASM_VECTOR_VV_VX_VIU_ZIMM6(vror); + DISASM_VECTOR_VV_VX_VIU(vwsll); + +#undef DEFINE_VECTOR_VIU_ZIMM6 +#undef DISASM_VECTOR_VV_VX +#undef DISASM_VECTOR_VV_VX_VIU +#undef DISASM_VECTOR_VV_VX_VIU_ZIMM6 + } + + if (isa->extension_enabled(EXT_ZVBC)) { +#define DISASM_VECTOR_VV_VX(name) \ + DEFINE_VECTOR_VV(name##_vv); \ + DEFINE_VECTOR_VX(name##_vx) + + DISASM_VECTOR_VV_VX(vclmul); + DISASM_VECTOR_VV_VX(vclmulh); + +#undef DISASM_VECTOR_VV_VX + } + + if (isa->extension_enabled(EXT_ZVKG)) { + // Despite its suffix, the vgmul.vv instruction + // is really ".v", with the form "vgmul.vv vd, vs2". + DEFINE_VECTOR_V(vgmul_vv); + DEFINE_VECTOR_VV(vghsh_vv); + } + + if (isa->extension_enabled(EXT_ZVKNED)) { + // Despite their suffixes, the vaes*.{vv,vs} instructions + // are really ".v", with the form ".{vv,vs} vd, vs2". +#define DISASM_VECTOR_VV_VS(name) \ + DEFINE_VECTOR_V(name##_vv); \ + DEFINE_VECTOR_V(name##_vs) + + DISASM_VECTOR_VV_VS(vaesdm); + DISASM_VECTOR_VV_VS(vaesdf); + DISASM_VECTOR_VV_VS(vaesem); + DISASM_VECTOR_VV_VS(vaesef); + + DEFINE_VECTOR_V(vaesz_vs); + DEFINE_VECTOR_VIU(vaeskf1_vi); + DEFINE_VECTOR_VIU(vaeskf2_vi); +#undef DISASM_VECTOR_VV_VS + } + + if (isa->extension_enabled(EXT_ZVKNHA) || + isa->extension_enabled(EXT_ZVKNHB)) { + DEFINE_VECTOR_VV(vsha2ms_vv); + DEFINE_VECTOR_VV(vsha2ch_vv); + DEFINE_VECTOR_VV(vsha2cl_vv); + } + + if (isa->extension_enabled(EXT_ZVKSED)) { + DEFINE_VECTOR_VIU(vsm4k_vi); + // Despite their suffixes, the vsm4r.{vv,vs} instructions + // are really ".v", with the form "vsm4r.{vv,vs} vd, vs2". + DEFINE_VECTOR_V(vsm4r_vv); + DEFINE_VECTOR_V(vsm4r_vs); + } + + if (isa->extension_enabled(EXT_ZVKSH)) { + DEFINE_VECTOR_VIU(vsm3c_vi); + DEFINE_VECTOR_VV(vsm3me_vv); + } } disassembler_t::disassembler_t(const isa_parser_t *isa) From 59e8b9fab6d96acf74f78f6a7db8cc2005d4fa70 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 6 Jun 2023 13:03:22 -0700 Subject: [PATCH 044/127] device_t: Add missing overrides to derived abstract_device_t classes --- riscv/devices.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/riscv/devices.h b/riscv/devices.h index 02d9e98068..b9f639d45b 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -17,8 +17,8 @@ class simif_t; class bus_t : public abstract_device_t { public: - bool load(reg_t addr, size_t len, uint8_t* bytes); - bool store(reg_t addr, size_t len, const uint8_t* bytes); + bool load(reg_t addr, size_t len, uint8_t* bytes) override; + bool store(reg_t addr, size_t len, const uint8_t* bytes) override; void add_device(reg_t addr, abstract_device_t* dev); std::pair find_device(reg_t addr); @@ -30,8 +30,8 @@ class bus_t : public abstract_device_t { class rom_device_t : public abstract_device_t { public: rom_device_t(std::vector data); - bool load(reg_t addr, size_t len, uint8_t* bytes); - bool store(reg_t addr, size_t len, const uint8_t* bytes); + bool load(reg_t addr, size_t len, uint8_t* bytes) override; + bool store(reg_t addr, size_t len, const uint8_t* bytes) override; const std::vector& contents() { return data; } private: std::vector data; @@ -43,8 +43,8 @@ class mem_t : public abstract_device_t { mem_t(const mem_t& that) = delete; ~mem_t(); - bool load(reg_t addr, size_t len, uint8_t* bytes) { return load_store(addr, len, bytes, false); } - bool store(reg_t addr, size_t len, const uint8_t* bytes) { return load_store(addr, len, const_cast(bytes), true); } + bool load(reg_t addr, size_t len, uint8_t* bytes) override { return load_store(addr, len, bytes, false); } + bool store(reg_t addr, size_t len, const uint8_t* bytes) override { return load_store(addr, len, const_cast(bytes), true); } char* contents(reg_t addr); reg_t size() { return sz; } void dump(std::ostream& o); @@ -59,8 +59,8 @@ class mem_t : public abstract_device_t { class clint_t : public abstract_device_t { public: clint_t(simif_t*, uint64_t freq_hz, bool real_time); - bool load(reg_t addr, size_t len, uint8_t* bytes); - bool store(reg_t addr, size_t len, const uint8_t* bytes); + bool load(reg_t addr, size_t len, uint8_t* bytes) override; + bool store(reg_t addr, size_t len, const uint8_t* bytes) override; size_t size() { return CLINT_SIZE; } void increment(reg_t inc); uint64_t get_mtimecmp(reg_t hartid) { return mtimecmp[hartid]; } @@ -98,9 +98,9 @@ struct plic_context_t { class plic_t : public abstract_device_t, public abstract_interrupt_controller_t { public: plic_t(simif_t*, uint32_t ndev); - bool load(reg_t addr, size_t len, uint8_t* bytes); - bool store(reg_t addr, size_t len, const uint8_t* bytes); - void set_interrupt_level(uint32_t id, int lvl); + bool load(reg_t addr, size_t len, uint8_t* bytes) override; + bool store(reg_t addr, size_t len, const uint8_t* bytes) override; + void set_interrupt_level(uint32_t id, int lvl) override; size_t size() { return PLIC_SIZE; } private: std::vector contexts; @@ -129,8 +129,8 @@ class ns16550_t : public abstract_device_t { public: ns16550_t(class bus_t *bus, abstract_interrupt_controller_t *intctrl, uint32_t interrupt_id, uint32_t reg_shift, uint32_t reg_io_width); - bool load(reg_t addr, size_t len, uint8_t* bytes); - bool store(reg_t addr, size_t len, const uint8_t* bytes); + bool load(reg_t addr, size_t len, uint8_t* bytes) override; + bool store(reg_t addr, size_t len, const uint8_t* bytes) override; void tick(void); size_t size() { return NS16550_SIZE; } private: From 803d85bac7b1dc5e491b735080cbcd7af7eac03e Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Thu, 1 Jun 2023 19:29:57 -0700 Subject: [PATCH 045/127] sim_t: change devices to shared_ptrs --- riscv/sim.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/riscv/sim.h b/riscv/sim.h index 3109173f19..e5a73030be 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -70,10 +70,10 @@ class sim_t : public htif_t, public simif_t std::string dts; std::string dtb; bool dtb_enabled; - std::unique_ptr boot_rom; - std::unique_ptr clint; - std::unique_ptr plic; - std::unique_ptr ns16550; + std::shared_ptr boot_rom; + std::shared_ptr clint; + std::shared_ptr plic; + std::shared_ptr ns16550; bus_t bus; log_file_t log_file; From 20793b36b79db337187964da3df397c1da576c23 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Thu, 1 Jun 2023 19:35:55 -0700 Subject: [PATCH 046/127] sim_t: Add list of ptrs to devices to sim_t --- riscv/sim.cc | 4 ++++ riscv/sim.h | 1 + 2 files changed, 5 insertions(+) diff --git a/riscv/sim.cc b/riscv/sim.cc index dcbd469d32..82619e7b93 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -123,6 +123,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, if (fdt_parse_clint(fdt, &clint_base, "riscv,clint0") == 0) { clint.reset(new clint_t(this, CPU_HZ / INSNS_PER_RTC_TICK, cfg->real_time_clint())); bus.add_device(clint_base, clint.get()); + devices.push_back(clint); } // pointer to wired interrupt controller @@ -134,6 +135,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, if (fdt_parse_plic(fdt, &plic_base, &plic_ndev, "riscv,plic0") == 0) { plic.reset(new plic_t(this, plic_ndev)); bus.add_device(plic_base, plic.get()); + devices.push_back(plic); intctrl = plic.get(); } @@ -146,6 +148,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, ns16550.reset(new ns16550_t(&bus, intctrl, NS16550_INTERRUPT_ID, ns16550_shift, ns16550_io_width)); bus.add_device(ns16550_base, ns16550.get()); + devices.push_back(ns16550); } //per core attribute @@ -376,6 +379,7 @@ void sim_t::set_rom() boot_rom.reset(new rom_device_t(rom)); bus.add_device(DEFAULT_RSTVEC, boot_rom.get()); + devices.push_back(boot_rom); } char* sim_t::addr_to_mem(reg_t paddr) { diff --git a/riscv/sim.h b/riscv/sim.h index e5a73030be..3afeeddc21 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -70,6 +70,7 @@ class sim_t : public htif_t, public simif_t std::string dts; std::string dtb; bool dtb_enabled; + std::vector> devices; std::shared_ptr boot_rom; std::shared_ptr clint; std::shared_ptr plic; From e47fc7075110fd97ee3fc96a4e67acb4a3b9c5fa Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Fri, 2 Jun 2023 09:15:03 -0700 Subject: [PATCH 047/127] clint: Change clint_t::increment to override abstract_device_t::tick(rtc_ticks) --- riscv/abstract_device.h | 2 ++ riscv/clint.cc | 10 +++++----- riscv/devices.h | 4 ++-- riscv/ns16550.cc | 2 +- riscv/sim.cc | 5 +++-- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/riscv/abstract_device.h b/riscv/abstract_device.h index 559c64f6d6..f4ccebe230 100644 --- a/riscv/abstract_device.h +++ b/riscv/abstract_device.h @@ -2,6 +2,7 @@ #define _RISCV_ABSTRACT_DEVICE_H #include "decode.h" +#include "common.h" #include #include @@ -10,6 +11,7 @@ class abstract_device_t { virtual bool load(reg_t addr, size_t len, uint8_t* bytes) = 0; virtual bool store(reg_t addr, size_t len, const uint8_t* bytes) = 0; virtual ~abstract_device_t() {} + virtual void tick(reg_t UNUSED rtc_ticks) {} }; #endif diff --git a/riscv/clint.cc b/riscv/clint.cc index f27f02c342..485c997843 100644 --- a/riscv/clint.cc +++ b/riscv/clint.cc @@ -12,7 +12,7 @@ clint_t::clint_t(simif_t* sim, uint64_t freq_hz, bool real_time) real_time_ref_secs = base.tv_sec; real_time_ref_usecs = base.tv_usec; - increment(0); + tick(0); } /* 0000 msip hart 0 @@ -34,7 +34,7 @@ bool clint_t::load(reg_t addr, size_t len, uint8_t* bytes) if (len > 8) return false; - increment(0); + tick(0); if (addr >= MSIP_BASE && addr < MTIMECMP_BASE) { if (len == 8) { @@ -90,11 +90,11 @@ bool clint_t::store(reg_t addr, size_t len, const uint8_t* bytes) } else { return false; } - increment(0); + tick(0); return true; } -void clint_t::increment(reg_t inc) +void clint_t::tick(reg_t rtc_ticks) { if (real_time) { struct timeval now; @@ -104,7 +104,7 @@ void clint_t::increment(reg_t inc) diff_usecs = ((now.tv_sec - real_time_ref_secs) * 1000000) + (now.tv_usec - real_time_ref_usecs); mtime = diff_usecs * freq_hz / 1000000; } else { - mtime += inc; + mtime += rtc_ticks; } for (const auto& [hart_id, hart] : sim->get_harts()) { diff --git a/riscv/devices.h b/riscv/devices.h index b9f639d45b..a62509ae5b 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -62,7 +62,7 @@ class clint_t : public abstract_device_t { bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; size_t size() { return CLINT_SIZE; } - void increment(reg_t inc); + void tick(reg_t rtc_ticks) override; uint64_t get_mtimecmp(reg_t hartid) { return mtimecmp[hartid]; } uint64_t get_mtime() { return mtime; } private: @@ -131,7 +131,7 @@ class ns16550_t : public abstract_device_t { uint32_t interrupt_id, uint32_t reg_shift, uint32_t reg_io_width); bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; - void tick(void); + void tick(reg_t rtc_ticks) override; size_t size() { return NS16550_SIZE; } private: class bus_t *bus; diff --git a/riscv/ns16550.cc b/riscv/ns16550.cc index 8d7e4de2d7..d21983be70 100644 --- a/riscv/ns16550.cc +++ b/riscv/ns16550.cc @@ -292,7 +292,7 @@ bool ns16550_t::store(reg_t addr, size_t len, const uint8_t* bytes) return ret; } -void ns16550_t::tick(void) +void ns16550_t::tick(reg_t UNUSED rtc_ticks) { if (!(fcr & UART_FCR_ENABLE_FIFO) || (mcr & UART_MCR_LOOP) || diff --git a/riscv/sim.cc b/riscv/sim.cc index 82619e7b93..77ed4c7a09 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -245,8 +245,9 @@ void sim_t::step(size_t n) procs[current_proc]->get_mmu()->yield_load_reservation(); if (++current_proc == procs.size()) { current_proc = 0; - if (clint) clint->increment(INTERLEAVE / INSNS_PER_RTC_TICK); - if (ns16550) ns16550->tick(); + reg_t rtc_ticks = INTERLEAVE / INSNS_PER_RTC_TICK; + if (clint) clint->tick(rtc_ticks); + if (ns16550) ns16550->tick(rtc_ticks); } } } From e733a70d0565bcee9aeba27b654df1a52dff08fe Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Fri, 2 Jun 2023 09:18:11 -0700 Subject: [PATCH 048/127] sim_t: Tick all devices, not just clint and ns16550 --- riscv/sim.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/riscv/sim.cc b/riscv/sim.cc index 77ed4c7a09..9d0bfb82e9 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -246,8 +246,7 @@ void sim_t::step(size_t n) if (++current_proc == procs.size()) { current_proc = 0; reg_t rtc_ticks = INTERLEAVE / INSNS_PER_RTC_TICK; - if (clint) clint->tick(rtc_ticks); - if (ns16550) ns16550->tick(rtc_ticks); + for (auto &dev : devices) dev->tick(rtc_ticks); } } } From 6456b5ad25a2b7efb6c4f9ccd28e00a5408eb743 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Fri, 2 Jun 2023 09:27:09 -0700 Subject: [PATCH 049/127] sim_t: Remove boot_rom/ns16550 members of sim_t These are redundant with sim_t::devices --- riscv/sim.cc | 6 +++--- riscv/sim.h | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/riscv/sim.cc b/riscv/sim.cc index 9d0bfb82e9..4fa49b2cf5 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -145,8 +145,8 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, if (fdt_parse_ns16550(fdt, &ns16550_base, &ns16550_shift, &ns16550_io_width, "ns16550a") == 0) { assert(intctrl); - ns16550.reset(new ns16550_t(&bus, intctrl, NS16550_INTERRUPT_ID, - ns16550_shift, ns16550_io_width)); + std::shared_ptr ns16550(new ns16550_t(&bus, intctrl, NS16550_INTERRUPT_ID, + ns16550_shift, ns16550_io_width)); bus.add_device(ns16550_base, ns16550.get()); devices.push_back(ns16550); } @@ -377,7 +377,7 @@ void sim_t::set_rom() const int align = 0x1000; rom.resize((rom.size() + align - 1) / align * align); - boot_rom.reset(new rom_device_t(rom)); + std::shared_ptr boot_rom(new rom_device_t(rom)); bus.add_device(DEFAULT_RSTVEC, boot_rom.get()); devices.push_back(boot_rom); } diff --git a/riscv/sim.h b/riscv/sim.h index 3afeeddc21..a851643125 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -71,10 +71,8 @@ class sim_t : public htif_t, public simif_t std::string dtb; bool dtb_enabled; std::vector> devices; - std::shared_ptr boot_rom; std::shared_ptr clint; std::shared_ptr plic; - std::shared_ptr ns16550; bus_t bus; log_file_t log_file; From 426a33e77438f956d0890391af7bb7ed9b7a20fc Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Fri, 2 Jun 2023 12:07:22 -0700 Subject: [PATCH 050/127] sim_t: change plugin_devices to a vec of shared_ptrs --- ci-tests/testlib.c | 2 +- riscv/sim.cc | 4 ++-- riscv/sim.h | 4 ++-- spike_main/spike.cc | 7 ++----- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ci-tests/testlib.c b/ci-tests/testlib.c index 3d5438b586..33eaede2e5 100644 --- a/ci-tests/testlib.c +++ b/ci-tests/testlib.c @@ -28,7 +28,7 @@ int main() hartids, false, 4); - std::vector> plugin_devices; + std::vector>> plugin_devices; std::vector htif_args {"pk", "hello"}; debug_module_config_t dm_config = { .progbufsize = 2, diff --git a/riscv/sim.cc b/riscv/sim.cc index 4fa49b2cf5..43c91f6106 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -34,7 +34,7 @@ const size_t sim_t::INTERLEAVE; sim_t::sim_t(const cfg_t *cfg, bool halted, std::vector> mems, - std::vector> plugin_devices, + std::vector>> plugin_devices, const std::vector& args, const debug_module_config_t &dm_config, const char *log_path, @@ -67,7 +67,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, bus.add_device(x.first, x.second); for (auto& x : plugin_devices) - bus.add_device(x.first, x.second); + bus.add_device(x.first, x.second.get()); debug_module.add_device(&bus); diff --git a/riscv/sim.h b/riscv/sim.h index a851643125..ba956661ec 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -27,7 +27,7 @@ class sim_t : public htif_t, public simif_t public: sim_t(const cfg_t *cfg, bool halted, std::vector> mems, - std::vector> plugin_devices, + std::vector>> plugin_devices, const std::vector& args, const debug_module_config_t &dm_config, const char *log_path, bool dtb_enabled, const char *dtb_file, @@ -63,7 +63,7 @@ class sim_t : public htif_t, public simif_t isa_parser_t isa; const cfg_t * const cfg; std::vector> mems; - std::vector> plugin_devices; + std::vector>> plugin_devices; std::vector procs; std::map harts; std::pair initrd_range; diff --git a/spike_main/spike.cc b/spike_main/spike.cc index 7290f38bbd..f257582ffb 100644 --- a/spike_main/spike.cc +++ b/spike_main/spike.cc @@ -336,7 +336,7 @@ int main(int argc, char** argv) bool dtb_enabled = true; const char* kernel = NULL; reg_t kernel_offset, kernel_size; - std::vector> plugin_devices; + std::vector>> plugin_devices; std::unique_ptr ic; std::unique_ptr dc; std::unique_ptr l2; @@ -416,7 +416,7 @@ int main(int argc, char** argv) std::string args(avail, '\0'); stream.readsome(&args[0], avail); - plugin_devices.emplace_back(base, new mmio_plugin_device_t(name, args)); + plugin_devices.emplace_back(base, std::make_shared(name, args)); }; option_parser_t parser; @@ -602,8 +602,5 @@ int main(int argc, char** argv) for (auto& mem : mems) delete mem.second; - for (auto& plugin_device : plugin_devices) - delete plugin_device.second; - return return_code; } From 1bd44c71a13f3d8d25de112fb5346589c03e332d Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Fri, 2 Jun 2023 12:10:38 -0700 Subject: [PATCH 051/127] sim_t: Merge sim_t::plugin_devices with sim_t::devices --- riscv/sim.cc | 5 +++-- riscv/sim.h | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/riscv/sim.cc b/riscv/sim.cc index 43c91f6106..877d5c2586 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -45,7 +45,6 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, isa(cfg->isa(), cfg->priv()), cfg(cfg), mems(mems), - plugin_devices(plugin_devices), procs(std::max(cfg->nprocs(), size_t(1))), dtb_enabled(dtb_enabled), log_file(log_path), @@ -66,8 +65,10 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, for (auto& x : mems) bus.add_device(x.first, x.second); - for (auto& x : plugin_devices) + for (auto& x : plugin_devices) { bus.add_device(x.first, x.second.get()); + devices.push_back(x.second); + } debug_module.add_device(&bus); diff --git a/riscv/sim.h b/riscv/sim.h index ba956661ec..6e6907891d 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -63,7 +63,6 @@ class sim_t : public htif_t, public simif_t isa_parser_t isa; const cfg_t * const cfg; std::vector> mems; - std::vector>> plugin_devices; std::vector procs; std::map harts; std::pair initrd_range; From cd0bd1bda701f5004f9667d0c87a8b65f54d30e3 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Fri, 2 Jun 2023 16:39:45 -0700 Subject: [PATCH 052/127] sim_t: Make static consts public members --- riscv/sim.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/riscv/sim.h b/riscv/sim.h index 6e6907891d..7689d54e74 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -59,6 +59,10 @@ class sim_t : public htif_t, public simif_t // Callback for processors to let the simulation know they were reset. virtual void proc_reset(unsigned id) override; + static const size_t INTERLEAVE = 5000; + static const size_t INSNS_PER_RTC_TICK = 100; // 10 MHz clock for 1 BIPS core + static const size_t CPU_HZ = 1000000000; // 1GHz CPU + private: isa_parser_t isa; const cfg_t * const cfg; @@ -82,9 +86,6 @@ class sim_t : public htif_t, public simif_t processor_t* get_core(const std::string& i); void step(size_t n); // step through simulation - static const size_t INTERLEAVE = 5000; - static const size_t INSNS_PER_RTC_TICK = 100; // 10 MHz clock for 1 BIPS core - static const size_t CPU_HZ = 1000000000; // 1GHz CPU size_t current_step; size_t current_proc; bool debug; From 81218a2e0201e3ec3d7520fbe2f3466609bd613f Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Fri, 2 Jun 2023 20:56:00 -0700 Subject: [PATCH 053/127] ns16550_t: remove unused bus_t member --- riscv/devices.h | 3 +-- riscv/ns16550.cc | 4 ++-- riscv/sim.cc | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/riscv/devices.h b/riscv/devices.h index a62509ae5b..fe19f74a73 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -127,14 +127,13 @@ class plic_t : public abstract_device_t, public abstract_interrupt_controller_t class ns16550_t : public abstract_device_t { public: - ns16550_t(class bus_t *bus, abstract_interrupt_controller_t *intctrl, + ns16550_t(abstract_interrupt_controller_t *intctrl, uint32_t interrupt_id, uint32_t reg_shift, uint32_t reg_io_width); bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; void tick(reg_t rtc_ticks) override; size_t size() { return NS16550_SIZE; } private: - class bus_t *bus; abstract_interrupt_controller_t *intctrl; uint32_t interrupt_id; uint32_t reg_shift; diff --git a/riscv/ns16550.cc b/riscv/ns16550.cc index d21983be70..475d5ec134 100644 --- a/riscv/ns16550.cc +++ b/riscv/ns16550.cc @@ -69,9 +69,9 @@ #define UART_SCR 7 /* I/O: Scratch Register */ -ns16550_t::ns16550_t(class bus_t *bus, abstract_interrupt_controller_t *intctrl, +ns16550_t::ns16550_t(abstract_interrupt_controller_t *intctrl, uint32_t interrupt_id, uint32_t reg_shift, uint32_t reg_io_width) - : bus(bus), intctrl(intctrl), interrupt_id(interrupt_id), reg_shift(reg_shift), reg_io_width(reg_io_width), backoff_counter(0) + : intctrl(intctrl), interrupt_id(interrupt_id), reg_shift(reg_shift), reg_io_width(reg_io_width), backoff_counter(0) { ier = 0; iir = UART_IIR_NO_INT; diff --git a/riscv/sim.cc b/riscv/sim.cc index 877d5c2586..858ace3f96 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -146,7 +146,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, if (fdt_parse_ns16550(fdt, &ns16550_base, &ns16550_shift, &ns16550_io_width, "ns16550a") == 0) { assert(intctrl); - std::shared_ptr ns16550(new ns16550_t(&bus, intctrl, NS16550_INTERRUPT_ID, + std::shared_ptr ns16550(new ns16550_t(intctrl, NS16550_INTERRUPT_ID, ns16550_shift, ns16550_io_width)); bus.add_device(ns16550_base, ns16550.get()); devices.push_back(ns16550); From 5b39c69bfadd507f22e794baae3f4ef342303462 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Fri, 2 Jun 2023 21:26:52 -0700 Subject: [PATCH 054/127] devices: Pass const pointers to sim_t to clint/plic --- riscv/clint.cc | 2 +- riscv/devices.h | 6 +++--- riscv/plic.cc | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/riscv/clint.cc b/riscv/clint.cc index 485c997843..25e45fbb79 100644 --- a/riscv/clint.cc +++ b/riscv/clint.cc @@ -3,7 +3,7 @@ #include "processor.h" #include "simif.h" -clint_t::clint_t(simif_t* sim, uint64_t freq_hz, bool real_time) +clint_t::clint_t(const simif_t* sim, uint64_t freq_hz, bool real_time) : sim(sim), freq_hz(freq_hz), real_time(real_time), mtime(0) { struct timeval base; diff --git a/riscv/devices.h b/riscv/devices.h index fe19f74a73..11cc3479cb 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -58,7 +58,7 @@ class mem_t : public abstract_device_t { class clint_t : public abstract_device_t { public: - clint_t(simif_t*, uint64_t freq_hz, bool real_time); + clint_t(const simif_t*, uint64_t freq_hz, bool real_time); bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; size_t size() { return CLINT_SIZE; } @@ -69,7 +69,7 @@ class clint_t : public abstract_device_t { typedef uint64_t mtime_t; typedef uint64_t mtimecmp_t; typedef uint32_t msip_t; - simif_t* sim; + const simif_t* sim; uint64_t freq_hz; bool real_time; uint64_t real_time_ref_secs; @@ -97,7 +97,7 @@ struct plic_context_t { class plic_t : public abstract_device_t, public abstract_interrupt_controller_t { public: - plic_t(simif_t*, uint32_t ndev); + plic_t(const simif_t*, uint32_t ndev); bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; void set_interrupt_level(uint32_t id, int lvl) override; diff --git a/riscv/plic.cc b/riscv/plic.cc index 37a5f53ba0..e2685a9c28 100644 --- a/riscv/plic.cc +++ b/riscv/plic.cc @@ -70,7 +70,7 @@ #define REG_SIZE 0x1000000 -plic_t::plic_t(simif_t* sim, uint32_t ndev) +plic_t::plic_t(const simif_t* sim, uint32_t ndev) : num_ids(ndev + 1), num_ids_word(((ndev + 1) + (32 - 1)) / 32), max_prio((1UL << PLIC_PRIO_BITS) - 1), priority{}, level{} { From fa27eeb3b77cdeeefabdbf225cd17edad21a5e97 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Fri, 2 Jun 2023 21:30:15 -0700 Subject: [PATCH 055/127] dts: void* fdt arg to parse_fdt should be const --- riscv/dts.cc | 22 +++++++++++----------- riscv/dts.h | 18 +++++++++--------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/riscv/dts.cc b/riscv/dts.cc index 200288ef2c..bd1f4fe1f2 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -215,7 +215,7 @@ std::string dts_compile(const std::string& dts) return dtb.str(); } -static int fdt_get_node_addr_size(void *fdt, int node, reg_t *addr, +static int fdt_get_node_addr_size(const void *fdt, int node, reg_t *addr, unsigned long *size, const char *field) { int parent, len, i; @@ -259,7 +259,7 @@ static int fdt_get_node_addr_size(void *fdt, int node, reg_t *addr, return 0; } -static int check_cpu_node(void *fdt, int cpu_offset) +static int check_cpu_node(const void *fdt, int cpu_offset) { int len; const void *prop; @@ -276,22 +276,22 @@ static int check_cpu_node(void *fdt, int cpu_offset) return 0; } -int fdt_get_offset(void *fdt, const char *field) +int fdt_get_offset(const void *fdt, const char *field) { return fdt_path_offset(fdt, field); } -int fdt_get_first_subnode(void *fdt, int node) +int fdt_get_first_subnode(const void *fdt, int node) { return fdt_first_subnode(fdt, node); } -int fdt_get_next_subnode(void *fdt, int node) +int fdt_get_next_subnode(const void *fdt, int node) { return fdt_next_subnode(fdt, node); } -int fdt_parse_clint(void *fdt, reg_t *clint_addr, +int fdt_parse_clint(const void *fdt, reg_t *clint_addr, const char *compatible) { int nodeoffset, rc; @@ -307,7 +307,7 @@ int fdt_parse_clint(void *fdt, reg_t *clint_addr, return 0; } -int fdt_parse_plic(void *fdt, reg_t *plic_addr, uint32_t *ndev, +int fdt_parse_plic(const void *fdt, reg_t *plic_addr, uint32_t *ndev, const char *compatible) { int nodeoffset, len, rc; @@ -329,7 +329,7 @@ int fdt_parse_plic(void *fdt, reg_t *plic_addr, uint32_t *ndev, return 0; } -int fdt_parse_ns16550(void *fdt, reg_t *ns16550_addr, +int fdt_parse_ns16550(const void *fdt, reg_t *ns16550_addr, uint32_t *reg_shift, uint32_t *reg_io_width, const char *compatible) { @@ -365,7 +365,7 @@ int fdt_parse_ns16550(void *fdt, reg_t *ns16550_addr, return 0; } -int fdt_parse_pmp_num(void *fdt, int cpu_offset, reg_t *pmp_num) +int fdt_parse_pmp_num(const void *fdt, int cpu_offset, reg_t *pmp_num) { int rc; @@ -380,7 +380,7 @@ int fdt_parse_pmp_num(void *fdt, int cpu_offset, reg_t *pmp_num) return 0; } -int fdt_parse_pmp_alignment(void *fdt, int cpu_offset, reg_t *pmp_align) +int fdt_parse_pmp_alignment(const void *fdt, int cpu_offset, reg_t *pmp_align) { int rc; @@ -395,7 +395,7 @@ int fdt_parse_pmp_alignment(void *fdt, int cpu_offset, reg_t *pmp_align) return 0; } -int fdt_parse_mmu_type(void *fdt, int cpu_offset, const char **mmu_type) +int fdt_parse_mmu_type(const void *fdt, int cpu_offset, const char **mmu_type) { assert(mmu_type); diff --git a/riscv/dts.h b/riscv/dts.h index 7a64d7bc63..2b7404e457 100644 --- a/riscv/dts.h +++ b/riscv/dts.h @@ -16,18 +16,18 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, std::string dts_compile(const std::string& dts); -int fdt_get_offset(void *fdt, const char *field); -int fdt_get_first_subnode(void *fdt, int node); -int fdt_get_next_subnode(void *fdt, int node); +int fdt_get_offset(const void *fdt, const char *field); +int fdt_get_first_subnode(const void *fdt, int node); +int fdt_get_next_subnode(const void *fdt, int node); -int fdt_parse_clint(void *fdt, reg_t *clint_addr, +int fdt_parse_clint(const void *fdt, reg_t *clint_addr, const char *compatible); -int fdt_parse_plic(void *fdt, reg_t *plic_addr, uint32_t *ndev, +int fdt_parse_plic(const void *fdt, reg_t *plic_addr, uint32_t *ndev, const char *compatible); -int fdt_parse_ns16550(void *fdt, reg_t *ns16550_addr, +int fdt_parse_ns16550(const void *fdt, reg_t *ns16550_addr, uint32_t *reg_shift, uint32_t *reg_io_width, const char *compatible); -int fdt_parse_pmp_num(void *fdt, int cpu_offset, reg_t *pmp_num); -int fdt_parse_pmp_alignment(void *fdt, int cpu_offset, reg_t *pmp_align); -int fdt_parse_mmu_type(void *fdt, int cpu_offset, const char **mmu_type); +int fdt_parse_pmp_num(const void *fdt, int cpu_offset, reg_t *pmp_num); +int fdt_parse_pmp_alignment(const void *fdt, int cpu_offset, reg_t *pmp_align); +int fdt_parse_mmu_type(const void *fdt, int cpu_offset, const char **mmu_type); #endif From b9275b7ce23937b6ba0345eb30ebb9c82dbd3c3f Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Sun, 4 Jun 2023 21:37:04 -0700 Subject: [PATCH 056/127] Inline make_dtb into sim_t constructor make_dtb is only called here, this simplifies later work towards refactoring device DTS node generation --- riscv/sim.cc | 64 ++++++++++++++++++++++++---------------------------- riscv/sim.h | 1 - 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/riscv/sim.cc b/riscv/sim.cc index 858ace3f96..10e86be2c7 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -109,7 +109,35 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, if (!dtb_enabled) return; // Load dtb_file if provided, otherwise self-generate a dts/dtb - make_dtb(dtb_file); + if (dtb_file) { + std::ifstream fin(dtb_file, std::ios::binary); + if (!fin.good()) { + std::cerr << "can't find dtb file: " << dtb_file << std::endl; + exit(-1); + } + std::stringstream strstream; + strstream << fin.rdbuf(); + + dtb = strstream.str(); + } else { + std::pair initrd_bounds = cfg->initrd_bounds(); + dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, + initrd_bounds.first, initrd_bounds.second, + cfg->bootargs(), cfg->pmpregions, procs, mems); + dtb = dts_compile(dts); + } + + int fdt_code = fdt_check_header(dtb.c_str()); + if (fdt_code) { + std::cerr << "Failed to read DTB from "; + if (!dtb_file) { + std::cerr << "auto-generated DTS string"; + } else { + std::cerr << "`" << dtb_file << "'"; + } + std::cerr << ": " << fdt_strerror(fdt_code) << ".\n"; + exit(-1); + } void *fdt = (void *)dtb.c_str(); @@ -303,40 +331,6 @@ bool sim_t::mmio_store(reg_t paddr, size_t len, const uint8_t* bytes) return bus.store(paddr, len, bytes); } -void sim_t::make_dtb(const char* dtb_file) -{ - if (dtb_file) { - std::ifstream fin(dtb_file, std::ios::binary); - if (!fin.good()) { - std::cerr << "can't find dtb file: " << dtb_file << std::endl; - exit(-1); - } - - std::stringstream strstream; - strstream << fin.rdbuf(); - - dtb = strstream.str(); - } else { - std::pair initrd_bounds = cfg->initrd_bounds(); - dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, - initrd_bounds.first, initrd_bounds.second, - cfg->bootargs(), cfg->pmpregions, procs, mems); - dtb = dts_compile(dts); - } - - int fdt_code = fdt_check_header(dtb.c_str()); - if (fdt_code) { - std::cerr << "Failed to read DTB from "; - if (!dtb_file) { - std::cerr << "auto-generated DTS string"; - } else { - std::cerr << "`" << dtb_file << "'"; - } - std::cerr << ": " << fdt_strerror(fdt_code) << ".\n"; - exit(-1); - } -} - void sim_t::set_rom() { const int reset_vec_size = 8; diff --git a/riscv/sim.h b/riscv/sim.h index 7689d54e74..7f08ec191b 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -98,7 +98,6 @@ class sim_t : public htif_t, public simif_t virtual char* addr_to_mem(reg_t paddr) override; virtual bool mmio_load(reg_t paddr, size_t len, uint8_t* bytes) override; virtual bool mmio_store(reg_t paddr, size_t len, const uint8_t* bytes) override; - void make_dtb(const char* dtb_file); void set_rom(); virtual const char* get_symbol(uint64_t paddr) override; From b2ab751ce317929a179fdad06ad12bdcc2be42a7 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Mon, 5 Jun 2023 10:25:57 -0700 Subject: [PATCH 057/127] sim_t: Add sim_t::get_intctrl --- riscv/sim.h | 1 + 1 file changed, 1 insertion(+) diff --git a/riscv/sim.h b/riscv/sim.h index 7f08ec191b..1cb0658088 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -52,6 +52,7 @@ class sim_t : public htif_t, public simif_t } const char* get_dts() { return dts.c_str(); } processor_t* get_core(size_t i) { return procs.at(i); } + abstract_interrupt_controller_t* get_intctrl() const { assert(plic.get()); return plic.get(); } virtual const cfg_t &get_cfg() const override { return *cfg; } virtual const std::map& get_harts() const override { return harts; } From 0beed2cc22b20093b5ee0fdab343c525503b8a16 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Mon, 5 Jun 2023 10:28:20 -0700 Subject: [PATCH 058/127] device_t: Add device_factory_t This class should implement conditional fdt-based device instantiation, as well as adding device nodes to the dts --- riscv/abstract_device.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/riscv/abstract_device.h b/riscv/abstract_device.h index f4ccebe230..7239b667af 100644 --- a/riscv/abstract_device.h +++ b/riscv/abstract_device.h @@ -5,6 +5,9 @@ #include "common.h" #include #include +#include + +class sim_t; class abstract_device_t { public: @@ -14,4 +17,20 @@ class abstract_device_t { virtual void tick(reg_t UNUSED rtc_ticks) {} }; +// factory for devices which should show up in the DTS, and can be +// parameterized by parsing the DTS +class device_factory_t { +public: + virtual abstract_device_t* parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) = 0; + virtual std::string generate_dts(const sim_t* sim) = 0; + virtual ~device_factory_t() {} +}; + +#define REGISTER_DEVICE(name, parse, generate) \ + class name##_factory_t : public device_factory_t { \ + public: \ + name##_t* parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) override { return parse(fdt, sim, base); } \ + std::string generate_dts(const sim_t* sim) override { return generate(sim); } \ + }; device_factory_t *name##_factory = new name##_factory_t(); + #endif From e5a61098733b34f8906e26827f94afdc9df277df Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Mon, 5 Jun 2023 10:31:44 -0700 Subject: [PATCH 059/127] device_t: Add device_factory_t's for ns16550/clint/plic --- riscv/clint.cc | 31 +++++++++++++++++++++++++++++++ riscv/ns16550.cc | 37 +++++++++++++++++++++++++++++++++++++ riscv/plic.cc | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/riscv/clint.cc b/riscv/clint.cc index 25e45fbb79..908ccb606a 100644 --- a/riscv/clint.cc +++ b/riscv/clint.cc @@ -1,7 +1,10 @@ #include +#include #include "devices.h" #include "processor.h" #include "simif.h" +#include "sim.h" +#include "dts.h" clint_t::clint_t(const simif_t* sim, uint64_t freq_hz, bool real_time) : sim(sim), freq_hz(freq_hz), real_time(real_time), mtime(0) @@ -112,3 +115,31 @@ void clint_t::tick(reg_t rtc_ticks) hart->state.mip->backdoor_write_with_mask(MIP_MTIP, mtime >= mtimecmp[hart_id] ? MIP_MTIP : 0); } } + +clint_t* clint_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) { + if (fdt_parse_clint(fdt, base, "riscv,clint0") == 0) + return new clint_t(sim, + sim->CPU_HZ / sim->INSNS_PER_RTC_TICK, + sim->get_cfg().real_time_clint()); + else + return nullptr; +} + +std::string clint_generate_dts(const sim_t* sim) { + std::stringstream s; + s << std::hex + << " clint@" << CLINT_BASE << " {\n" + " compatible = \"riscv,clint0\";\n" + " interrupts-extended = <" << std::dec; + for (size_t i = 0; i < sim->get_cfg().nprocs(); i++) + s << "&CPU" << i << "_intc 3 &CPU" << i << "_intc 7 "; + reg_t clintbs = CLINT_BASE; + reg_t clintsz = CLINT_SIZE; + s << std::hex << ">;\n" + " reg = <0x" << (clintbs >> 32) << " 0x" << (clintbs & (uint32_t)-1) << + " 0x" << (clintsz >> 32) << " 0x" << (clintsz & (uint32_t)-1) << ">;\n" + " };\n"; + return s.str(); +} + +REGISTER_DEVICE(clint, clint_parse_from_fdt, clint_generate_dts) diff --git a/riscv/ns16550.cc b/riscv/ns16550.cc index 475d5ec134..a4bd204fde 100644 --- a/riscv/ns16550.cc +++ b/riscv/ns16550.cc @@ -1,7 +1,10 @@ #include +#include #include "devices.h" #include "processor.h" #include "term.h" +#include "sim.h" +#include "dts.h" #define UART_QUEUE_SIZE 64 @@ -317,3 +320,37 @@ void ns16550_t::tick(reg_t UNUSED rtc_ticks) lsr |= UART_LSR_DR; update_interrupt(); } + +std::string ns16550_generate_dts(const sim_t* sim) +{ + std::stringstream s; + s << std::hex + << " SERIAL0: ns16550@" << NS16550_BASE << " {\n" + " compatible = \"ns16550a\";\n" + " clock-frequency = <" << std::dec << (sim->CPU_HZ/sim->INSNS_PER_RTC_TICK) << ">;\n" + " interrupt-parent = <&PLIC>;\n" + " interrupts = <" << std::dec << NS16550_INTERRUPT_ID; + reg_t ns16550bs = NS16550_BASE; + reg_t ns16550sz = NS16550_SIZE; + s << std::hex << ">;\n" + " reg = <0x" << (ns16550bs >> 32) << " 0x" << (ns16550bs & (uint32_t)-1) << + " 0x" << (ns16550sz >> 32) << " 0x" << (ns16550sz & (uint32_t)-1) << ">;\n" + " reg-shift = <0x" << NS16550_REG_SHIFT << ">;\n" + " reg-io-width = <0x" << NS16550_REG_IO_WIDTH << ">;\n" + " };\n"; + return s.str(); +} + +ns16550_t* ns16550_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) +{ + uint32_t ns16550_shift, ns16550_io_width; + if (fdt_parse_ns16550(fdt, base, + &ns16550_shift, &ns16550_io_width, "ns16550a") == 0) { + abstract_interrupt_controller_t* intctrl = sim->get_intctrl(); + return new ns16550_t(intctrl, NS16550_INTERRUPT_ID, ns16550_shift, ns16550_io_width); + } else { + return nullptr; + } +} + +REGISTER_DEVICE(ns16550, ns16550_parse_from_fdt, ns16550_generate_dts) diff --git a/riscv/plic.cc b/riscv/plic.cc index e2685a9c28..1aa5852cb1 100644 --- a/riscv/plic.cc +++ b/riscv/plic.cc @@ -1,7 +1,10 @@ #include +#include #include "devices.h" #include "processor.h" #include "simif.h" +#include "sim.h" +#include "dts.h" #define PLIC_MAX_CONTEXTS 15872 @@ -388,3 +391,37 @@ bool plic_t::store(reg_t addr, size_t len, const uint8_t* bytes) return ret; } + +std::string plic_generate_dts(const sim_t* sim) +{ + std::stringstream s; + s << std::hex + << " PLIC: plic@" << PLIC_BASE << " {\n" + " compatible = \"riscv,plic0\";\n" + " #address-cells = <2>;\n" + " interrupts-extended = <" << std::dec; + for (size_t i = 0; i < sim->get_cfg().nprocs(); i++) + s << "&CPU" << i << "_intc 11 &CPU" << i << "_intc 9 "; + reg_t plicbs = PLIC_BASE; + reg_t plicsz = PLIC_SIZE; + s << std::hex << ">;\n" + " reg = <0x" << (plicbs >> 32) << " 0x" << (plicbs & (uint32_t)-1) << + " 0x" << (plicsz >> 32) << " 0x" << (plicsz & (uint32_t)-1) << ">;\n" + " riscv,ndev = <0x" << PLIC_NDEV << ">;\n" + " riscv,max-priority = <0x" << ((1U << PLIC_PRIO_BITS) - 1) << ">;\n" + " #interrupt-cells = <1>;\n" + " interrupt-controller;\n" + " };\n"; + return s.str(); +} + +plic_t* plic_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) +{ + uint32_t plic_ndev; + if (fdt_parse_plic(fdt, base, &plic_ndev, "riscv,plic0") == 0) + return new plic_t(sim, plic_ndev); + else + return nullptr; +} + +REGISTER_DEVICE(plic, plic_parse_from_fdt, plic_generate_dts) From 6ae3783b08b86303836babcc7a8b473cf37b7c64 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Mon, 5 Jun 2023 10:36:54 -0700 Subject: [PATCH 060/127] sim_t: Move dts device node construction/parsing to device_factories --- riscv/dts.cc | 46 +++-------------------------- riscv/dts.h | 3 +- riscv/sim.cc | 82 +++++++++++++++++++++++++--------------------------- 3 files changed, 46 insertions(+), 85 deletions(-) diff --git a/riscv/dts.cc b/riscv/dts.cc index bd1f4fe1f2..4b8de9ff92 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -17,7 +17,8 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, const char* bootargs, size_t pmpregions, std::vector procs, - std::vector> mems) + std::vector> mems, + std::string device_nodes) { std::stringstream s; s << std::dec << @@ -85,47 +86,8 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, " #size-cells = <2>;\n" " compatible = \"ucbbar,spike-bare-soc\", \"simple-bus\";\n" " ranges;\n" - " clint@" << CLINT_BASE << " {\n" - " compatible = \"riscv,clint0\";\n" - " interrupts-extended = <" << std::dec; - for (size_t i = 0; i < procs.size(); i++) - s << "&CPU" << i << "_intc 3 &CPU" << i << "_intc 7 "; - reg_t clintbs = CLINT_BASE; - reg_t clintsz = CLINT_SIZE; - s << std::hex << ">;\n" - " reg = <0x" << (clintbs >> 32) << " 0x" << (clintbs & (uint32_t)-1) << - " 0x" << (clintsz >> 32) << " 0x" << (clintsz & (uint32_t)-1) << ">;\n" - " };\n" - " PLIC: plic@" << PLIC_BASE << " {\n" - " compatible = \"riscv,plic0\";\n" - " #address-cells = <2>;\n" - " interrupts-extended = <" << std::dec; - for (size_t i = 0; i < procs.size(); i++) - s << "&CPU" << i << "_intc 11 &CPU" << i << "_intc 9 "; - reg_t plicbs = PLIC_BASE; - reg_t plicsz = PLIC_SIZE; - s << std::hex << ">;\n" - " reg = <0x" << (plicbs >> 32) << " 0x" << (plicbs & (uint32_t)-1) << - " 0x" << (plicsz >> 32) << " 0x" << (plicsz & (uint32_t)-1) << ">;\n" - " riscv,ndev = <0x" << PLIC_NDEV << ">;\n" - " riscv,max-priority = <0x" << ((1U << PLIC_PRIO_BITS) - 1) << ">;\n" - " #interrupt-cells = <1>;\n" - " interrupt-controller;\n" - " };\n" - " SERIAL0: ns16550@" << NS16550_BASE << " {\n" - " compatible = \"ns16550a\";\n" - " clock-frequency = <" << std::dec << (cpu_hz/insns_per_rtc_tick) << ">;\n" - " interrupt-parent = <&PLIC>;\n" - " interrupts = <" << std::dec << NS16550_INTERRUPT_ID; - reg_t ns16550bs = NS16550_BASE; - reg_t ns16550sz = NS16550_SIZE; - s << std::hex << ">;\n" - " reg = <0x" << (ns16550bs >> 32) << " 0x" << (ns16550bs & (uint32_t)-1) << - " 0x" << (ns16550sz >> 32) << " 0x" << (ns16550sz & (uint32_t)-1) << ">;\n" - " reg-shift = <0x" << NS16550_REG_SHIFT << ">;\n" - " reg-io-width = <0x" << NS16550_REG_IO_WIDTH << ">;\n" - " };\n" - " };\n" + << device_nodes + << " };\n" " htif {\n" " compatible = \"ucb,htif0\";\n" " };\n" diff --git a/riscv/dts.h b/riscv/dts.h index 2b7404e457..d3655025f4 100644 --- a/riscv/dts.h +++ b/riscv/dts.h @@ -12,7 +12,8 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, const char* bootargs, size_t pmpregions, std::vector procs, - std::vector> mems); + std::vector> mems, + std::string device_nodes); std::string dts_compile(const std::string& dts); diff --git a/riscv/sim.cc b/riscv/sim.cc index 10e86be2c7..3283e5c2f9 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -32,6 +32,10 @@ static void handle_signal(int sig) const size_t sim_t::INTERLEAVE; +extern device_factory_t* clint_factory; +extern device_factory_t* plic_factory; +extern device_factory_t* ns16550_factory; + sim_t::sim_t(const cfg_t *cfg, bool halted, std::vector> mems, std::vector>> plugin_devices, @@ -90,9 +94,9 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, #ifndef RISCV_ENABLE_DUAL_ENDIAN if (cfg->endianness != endianness_little) { fputs("Big-endian support has not been prroperly enabled; " - "please rebuild the riscv-isa-sim project using " - "\"configure --enable-dual-endian\".\n", - stderr); + "please rebuild the riscv-isa-sim project using " + "\"configure --enable-dual-endian\".\n", + stderr); abort(); } #endif @@ -108,6 +112,19 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, // When running without using a dtb, skip the fdt-based configuration steps if (!dtb_enabled) return; + // Only make a CLINT (Core-Local INTerrupt controller) and PLIC (Platform- + // Level-Interrupt-Controller) if they are specified in the device tree + // configuration. + // + // This isn't *quite* as general as we could get (because you might have one + // that's not bus-accessible), but it should handle the normal use cases. In + // particular, the default device tree configuration that you get without + // setting the dtb_file argument has one. + std::vector device_factories = { + clint_factory, // clint must be element 0 + plic_factory, // plic must be element 1 + ns16550_factory}; + // Load dtb_file if provided, otherwise self-generate a dts/dtb if (dtb_file) { std::ifstream fin(dtb_file, std::ios::binary); @@ -117,13 +134,16 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, } std::stringstream strstream; strstream << fin.rdbuf(); - dtb = strstream.str(); } else { std::pair initrd_bounds = cfg->initrd_bounds(); + std::string device_nodes; + for (device_factory_t *factory : device_factories) + device_nodes.append(factory->generate_dts(this)); dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, initrd_bounds.first, initrd_bounds.second, - cfg->bootargs(), cfg->pmpregions, procs, mems); + cfg->bootargs(), cfg->pmpregions, procs, mems, + device_nodes); dtb = dts_compile(dts); } @@ -141,43 +161,21 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, void *fdt = (void *)dtb.c_str(); - // Only make a CLINT (Core-Local INTerrupt controller) if one is specified in - // the device tree configuration. - // - // This isn't *quite* as general as we could get (because you might have one - // that's not bus-accessible), but it should handle the normal use cases. In - // particular, the default device tree configuration that you get without - // setting the dtb_file argument has one. - reg_t clint_base; - if (fdt_parse_clint(fdt, &clint_base, "riscv,clint0") == 0) { - clint.reset(new clint_t(this, CPU_HZ / INSNS_PER_RTC_TICK, cfg->real_time_clint())); - bus.add_device(clint_base, clint.get()); - devices.push_back(clint); - } - - // pointer to wired interrupt controller - abstract_interrupt_controller_t *intctrl = NULL; - - // create plic - reg_t plic_base; - uint32_t plic_ndev; - if (fdt_parse_plic(fdt, &plic_base, &plic_ndev, "riscv,plic0") == 0) { - plic.reset(new plic_t(this, plic_ndev)); - bus.add_device(plic_base, plic.get()); - devices.push_back(plic); - intctrl = plic.get(); - } - - // create ns16550 - reg_t ns16550_base; - uint32_t ns16550_shift, ns16550_io_width; - if (fdt_parse_ns16550(fdt, &ns16550_base, - &ns16550_shift, &ns16550_io_width, "ns16550a") == 0) { - assert(intctrl); - std::shared_ptr ns16550(new ns16550_t(intctrl, NS16550_INTERRUPT_ID, - ns16550_shift, ns16550_io_width)); - bus.add_device(ns16550_base, ns16550.get()); - devices.push_back(ns16550); + for (size_t i = 0; i < device_factories.size(); i++) { + device_factory_t *factory = device_factories[i]; + reg_t device_base = 0; + abstract_device_t* device = factory->parse_from_fdt(fdt, this, &device_base); + if (device) { + assert(device_base); + bus.add_device(device_base, device); + std::shared_ptr dev_ptr(device); + devices.push_back(dev_ptr); + + if (i == 0) // clint_factory + clint = std::static_pointer_cast(dev_ptr); + else if (i == 1) // plic_factory + plic = std::static_pointer_cast(dev_ptr); + } } //per core attribute From 3ab4107b81e7b6c42bf60010d1fa598e9058a7c9 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Mon, 5 Jun 2023 16:32:01 -0700 Subject: [PATCH 061/127] device_t: device_factories should be const --- riscv/abstract_device.h | 10 +++++----- riscv/sim.cc | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/riscv/abstract_device.h b/riscv/abstract_device.h index 7239b667af..90e2b24cbc 100644 --- a/riscv/abstract_device.h +++ b/riscv/abstract_device.h @@ -21,16 +21,16 @@ class abstract_device_t { // parameterized by parsing the DTS class device_factory_t { public: - virtual abstract_device_t* parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) = 0; - virtual std::string generate_dts(const sim_t* sim) = 0; + virtual abstract_device_t* parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) const = 0; + virtual std::string generate_dts(const sim_t* sim) const = 0; virtual ~device_factory_t() {} }; #define REGISTER_DEVICE(name, parse, generate) \ class name##_factory_t : public device_factory_t { \ public: \ - name##_t* parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) override { return parse(fdt, sim, base); } \ - std::string generate_dts(const sim_t* sim) override { return generate(sim); } \ - }; device_factory_t *name##_factory = new name##_factory_t(); + name##_t* parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) const override { return parse(fdt, sim, base); } \ + std::string generate_dts(const sim_t* sim) const override { return generate(sim); } \ + }; const device_factory_t *name##_factory = new name##_factory_t(); #endif diff --git a/riscv/sim.cc b/riscv/sim.cc index 3283e5c2f9..50dc4f68bd 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -120,7 +120,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, // that's not bus-accessible), but it should handle the normal use cases. In // particular, the default device tree configuration that you get without // setting the dtb_file argument has one. - std::vector device_factories = { + std::vector device_factories = { clint_factory, // clint must be element 0 plic_factory, // plic must be element 1 ns16550_factory}; @@ -138,7 +138,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, } else { std::pair initrd_bounds = cfg->initrd_bounds(); std::string device_nodes; - for (device_factory_t *factory : device_factories) + for (const device_factory_t *factory : device_factories) device_nodes.append(factory->generate_dts(this)); dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, initrd_bounds.first, initrd_bounds.second, @@ -162,7 +162,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, void *fdt = (void *)dtb.c_str(); for (size_t i = 0; i < device_factories.size(); i++) { - device_factory_t *factory = device_factories[i]; + const device_factory_t *factory = device_factories[i]; reg_t device_base = 0; abstract_device_t* device = factory->parse_from_fdt(fdt, this, &device_base); if (device) { From 16be75973aae3331bf97ba9452797e72909312c1 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 6 Jun 2023 10:04:50 -0700 Subject: [PATCH 062/127] libfdt: Install libfdt and libfdt.h --- fdt/fdt.mk.in | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fdt/fdt.mk.in b/fdt/fdt.mk.in index 8c8dbe53c7..32c6d49295 100644 --- a/fdt/fdt.mk.in +++ b/fdt/fdt.mk.in @@ -1,5 +1,10 @@ fdt_subproject_deps = \ +fdt_install_shared_lib = yes + +fdt_install_hdrs = \ + libfdt.h \ + fdt_c_srcs = \ fdt.c \ fdt_ro.c \ From bb2754c2017f1062071ab820af36d3852cb9859d Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 6 Jun 2023 11:33:36 -0700 Subject: [PATCH 063/127] dts_t: Add dts.h to list of installed headers --- riscv/dts.h | 1 - riscv/riscv.mk.in | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv/dts.h b/riscv/dts.h index d3655025f4..b6bb5b2792 100644 --- a/riscv/dts.h +++ b/riscv/dts.h @@ -4,7 +4,6 @@ #include "devices.h" #include "processor.h" -#include "mmu.h" #include std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index a3e125f5d8..d82df45e12 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -28,6 +28,7 @@ riscv_install_hdrs = \ decode.h \ devices.h \ disasm.h \ + dts.h \ encoding.h \ entropy_source.h \ extension.h \ From 37e50ad49914725f6ba0364255e2ae809c60f0f2 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 6 Jun 2023 11:42:13 -0700 Subject: [PATCH 064/127] dts: Expose fdt_get_node_addr_size function in header --- riscv/dts.cc | 4 ++-- riscv/dts.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/riscv/dts.cc b/riscv/dts.cc index 4b8de9ff92..8c1ceb46a3 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -177,8 +177,8 @@ std::string dts_compile(const std::string& dts) return dtb.str(); } -static int fdt_get_node_addr_size(const void *fdt, int node, reg_t *addr, - unsigned long *size, const char *field) +int fdt_get_node_addr_size(const void *fdt, int node, reg_t *addr, + unsigned long *size, const char *field) { int parent, len, i; int cell_addr, cell_size; diff --git a/riscv/dts.h b/riscv/dts.h index b6bb5b2792..10d3cc10bf 100644 --- a/riscv/dts.h +++ b/riscv/dts.h @@ -16,6 +16,8 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, std::string dts_compile(const std::string& dts); +int fdt_get_node_addr_size(const void *fdt, int node, reg_t *addr, + unsigned long *size, const char *field); int fdt_get_offset(const void *fdt, const char *field); int fdt_get_first_subnode(const void *fdt, int node); int fdt_get_next_subnode(const void *fdt, int node); From 701029d28b0e73f98a36869ef9317c49f0dc2949 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 6 Jun 2023 11:21:21 -0700 Subject: [PATCH 065/127] ns16550_t: ns16550 should parse interrupt id from the fdt --- riscv/dts.cc | 10 ++++++++++ riscv/dts.h | 2 +- riscv/ns16550.cc | 7 ++++--- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/riscv/dts.cc b/riscv/dts.cc index 8c1ceb46a3..cc65e3ce5e 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -293,6 +293,7 @@ int fdt_parse_plic(const void *fdt, reg_t *plic_addr, uint32_t *ndev, int fdt_parse_ns16550(const void *fdt, reg_t *ns16550_addr, uint32_t *reg_shift, uint32_t *reg_io_width, + uint32_t* reg_int_id, const char *compatible) { int nodeoffset, len, rc; @@ -324,6 +325,15 @@ int fdt_parse_ns16550(const void *fdt, reg_t *ns16550_addr, } } + reg_p = (fdt32_t *)fdt_getprop(fdt, nodeoffset, "interrupts", &len); + if (reg_int_id) { + if (reg_p) { + *reg_int_id = fdt32_to_cpu(*reg_p); + } else { + *reg_int_id = NS16550_INTERRUPT_ID; + } + } + return 0; } diff --git a/riscv/dts.h b/riscv/dts.h index 10d3cc10bf..7ec1ceb692 100644 --- a/riscv/dts.h +++ b/riscv/dts.h @@ -27,7 +27,7 @@ int fdt_parse_clint(const void *fdt, reg_t *clint_addr, int fdt_parse_plic(const void *fdt, reg_t *plic_addr, uint32_t *ndev, const char *compatible); int fdt_parse_ns16550(const void *fdt, reg_t *ns16550_addr, - uint32_t *reg_shift, uint32_t *reg_io_width, + uint32_t *reg_shift, uint32_t *reg_io_width, uint32_t* reg_int_id, const char *compatible); int fdt_parse_pmp_num(const void *fdt, int cpu_offset, reg_t *pmp_num); int fdt_parse_pmp_alignment(const void *fdt, int cpu_offset, reg_t *pmp_align); diff --git a/riscv/ns16550.cc b/riscv/ns16550.cc index a4bd204fde..dabe3a9b09 100644 --- a/riscv/ns16550.cc +++ b/riscv/ns16550.cc @@ -343,11 +343,12 @@ std::string ns16550_generate_dts(const sim_t* sim) ns16550_t* ns16550_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) { - uint32_t ns16550_shift, ns16550_io_width; + uint32_t ns16550_shift, ns16550_io_width, ns16550_int_id; if (fdt_parse_ns16550(fdt, base, - &ns16550_shift, &ns16550_io_width, "ns16550a") == 0) { + &ns16550_shift, &ns16550_io_width, &ns16550_int_id, + "ns16550a") == 0) { abstract_interrupt_controller_t* intctrl = sim->get_intctrl(); - return new ns16550_t(intctrl, NS16550_INTERRUPT_ID, ns16550_shift, ns16550_io_width); + return new ns16550_t(intctrl, ns16550_int_id, ns16550_shift, ns16550_io_width); } else { return nullptr; } From 186c619fb38f02d0b18514a2f8399cd8248e1dcc Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 6 Jun 2023 11:53:02 -0700 Subject: [PATCH 066/127] devices: Switch plugin device interface to use device_factory_t Plugins should now implement and register a device_factory_t to configure how that device should be parsed from a FDT, and an optional default DTS string. This drops support for command-line flag-based device configuration --- ci-tests/testlib.c | 2 +- riscv/abstract_device.h | 11 +++++ riscv/devices.cc | 46 +++------------------ riscv/devices.h | 14 ------- riscv/mmio_plugin.h | 91 ----------------------------------------- riscv/riscv.mk.in | 1 - riscv/sim.cc | 10 ++--- riscv/sim.h | 2 +- spike_main/spike.cc | 58 ++++---------------------- 9 files changed, 32 insertions(+), 203 deletions(-) delete mode 100644 riscv/mmio_plugin.h diff --git a/ci-tests/testlib.c b/ci-tests/testlib.c index 33eaede2e5..6342f9d06d 100644 --- a/ci-tests/testlib.c +++ b/ci-tests/testlib.c @@ -28,7 +28,7 @@ int main() hartids, false, 4); - std::vector>> plugin_devices; + std::vector plugin_devices; std::vector htif_args {"pk", "hello"}; debug_module_config_t dm_config = { .progbufsize = 2, diff --git a/riscv/abstract_device.h b/riscv/abstract_device.h index 90e2b24cbc..c5c64157ea 100644 --- a/riscv/abstract_device.h +++ b/riscv/abstract_device.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include class sim_t; @@ -26,9 +28,18 @@ class device_factory_t { virtual ~device_factory_t() {} }; +// Type for holding all registered MMIO plugins by name. +using mmio_device_map_t = std::map; + +mmio_device_map_t& mmio_device_map(); + #define REGISTER_DEVICE(name, parse, generate) \ class name##_factory_t : public device_factory_t { \ public: \ + name##_factory_t() { \ + std::string str(#name); \ + if (!mmio_device_map().emplace(str, this).second) throw std::runtime_error("Plugin \"" + str + "\" already registered"); \ + }; \ name##_t* parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base) const override { return parse(fdt, sim, base); } \ std::string generate_dts(const sim_t* sim) const override { return generate(sim); } \ }; const device_factory_t *name##_factory = new name##_factory_t(); diff --git a/riscv/devices.cc b/riscv/devices.cc index 81b232d120..2c06f78feb 100644 --- a/riscv/devices.cc +++ b/riscv/devices.cc @@ -2,6 +2,12 @@ #include "mmu.h" #include +mmio_device_map_t& mmio_device_map() +{ + static mmio_device_map_t device_map; + return device_map; +} + void bus_t::add_device(reg_t addr, abstract_device_t* dev) { // Searching devices via lower_bound/upper_bound @@ -51,46 +57,6 @@ std::pair bus_t::find_device(reg_t addr) return std::make_pair(it->first, it->second); } -// Type for holding all registered MMIO plugins by name. -using mmio_plugin_map_t = std::map; - -// Simple singleton instance of an mmio_plugin_map_t. -static mmio_plugin_map_t& mmio_plugin_map() -{ - static mmio_plugin_map_t instance; - return instance; -} - -void register_mmio_plugin(const char* name_cstr, - const mmio_plugin_t* mmio_plugin) -{ - std::string name(name_cstr); - if (!mmio_plugin_map().emplace(name, *mmio_plugin).second) { - throw std::runtime_error("Plugin \"" + name + "\" already registered!"); - } -} - -mmio_plugin_device_t::mmio_plugin_device_t(const std::string& name, - const std::string& args) - : plugin(mmio_plugin_map().at(name)), user_data((*plugin.alloc)(args.c_str())) -{ -} - -mmio_plugin_device_t::~mmio_plugin_device_t() -{ - (*plugin.dealloc)(user_data); -} - -bool mmio_plugin_device_t::load(reg_t addr, size_t len, uint8_t* bytes) -{ - return (*plugin.load)(user_data, addr, len, bytes); -} - -bool mmio_plugin_device_t::store(reg_t addr, size_t len, const uint8_t* bytes) -{ - return (*plugin.store)(user_data, addr, len, bytes); -} - mem_t::mem_t(reg_t size) : sz(size) { diff --git a/riscv/devices.h b/riscv/devices.h index 11cc3479cb..b752a21ecb 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -2,7 +2,6 @@ #define _RISCV_DEVICES_H #include "decode.h" -#include "mmio_plugin.h" #include "abstract_device.h" #include "abstract_interrupt_controller.h" #include "platform.h" @@ -157,19 +156,6 @@ class ns16550_t : public abstract_device_t { static const int MAX_BACKOFF = 16; }; -class mmio_plugin_device_t : public abstract_device_t { - public: - mmio_plugin_device_t(const std::string& name, const std::string& args); - virtual ~mmio_plugin_device_t() override; - - virtual bool load(reg_t addr, size_t len, uint8_t* bytes) override; - virtual bool store(reg_t addr, size_t len, const uint8_t* bytes) override; - - private: - mmio_plugin_t plugin; - void* user_data; -}; - template void write_little_endian_reg(T* word, reg_t addr, size_t len, const uint8_t* bytes) { diff --git a/riscv/mmio_plugin.h b/riscv/mmio_plugin.h deleted file mode 100644 index f14470bf38..0000000000 --- a/riscv/mmio_plugin.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef _RISCV_MMIO_PLUGIN_H -#define _RISCV_MMIO_PLUGIN_H - -#include -#include -#include - -#ifdef __cplusplus -extern "C" -{ -#endif - -typedef uint64_t reg_t; - -typedef struct { - // Allocate user data for an instance of the plugin. The parameter is a simple - // c-string containing arguments used to construct the plugin. It returns a - // void* to the allocated data. - void* (*alloc)(const char*); - - // Load a memory address of the MMIO plugin. The parameters are the user_data - // (void*), memory offset (reg_t), number of bytes to load (size_t), and the - // buffer into which the loaded data should be written (uint8_t*). Return true - // if the load is successful and false otherwise. - bool (*load)(void*, reg_t, size_t, uint8_t*); - - // Store some bytes to a memory address of the MMIO plugin. The parameters are - // the user_data (void*), memory offset (reg_t), number of bytes to store - // (size_t), and the buffer containing the data to be stored (const uint8_t*). - // Return true if the store is successful and false otherwise. - bool (*store)(void*, reg_t, size_t, const uint8_t*); - - // Deallocate the data allocated during the call to alloc. The parameter is a - // pointer to the user data allocated during the call to alloc. - void (*dealloc)(void*); -} mmio_plugin_t; - -// Register an mmio plugin with the application. This should be called by -// plugins as part of their loading process. -extern void register_mmio_plugin(const char* name_cstr, - const mmio_plugin_t* mmio_plugin); - -#ifdef __cplusplus -} - -#include - -// Wrapper around the C plugin API that makes registering a C++ class with -// correctly formed constructor, load, and store functions easier. The template -// type should be the type that implements the MMIO plugin interface. Simply -// make a global mmio_plugin_registration_t and your plugin should register -// itself with the application when it is loaded because the -// mmio_plugin_registration_t constructor will be called. -template -struct mmio_plugin_registration_t -{ - static void* alloc(const char* args) - { - return reinterpret_cast(new T(std::string(args))); - } - - static bool load(void* self, reg_t addr, size_t len, uint8_t* bytes) - { - return reinterpret_cast(self)->load(addr, len, bytes); - } - - static bool store(void* self, reg_t addr, size_t len, const uint8_t* bytes) - { - return reinterpret_cast(self)->store(addr, len, bytes); - } - - static void dealloc(void* self) - { - delete reinterpret_cast(self); - } - - mmio_plugin_registration_t(const std::string& name) - { - mmio_plugin_t plugin = { - mmio_plugin_registration_t::alloc, - mmio_plugin_registration_t::load, - mmio_plugin_registration_t::store, - mmio_plugin_registration_t::dealloc, - }; - - register_mmio_plugin(name.c_str(), &plugin); - } -}; -#endif // __cplusplus - -#endif diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index d82df45e12..1ad8b23b2e 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -35,7 +35,6 @@ riscv_install_hdrs = \ isa_parser.h \ log_file.h \ memtracer.h \ - mmio_plugin.h \ mmu.h \ platform.h \ processor.h \ diff --git a/riscv/sim.cc b/riscv/sim.cc index 50dc4f68bd..0c5a7fb299 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -38,7 +38,7 @@ extern device_factory_t* ns16550_factory; sim_t::sim_t(const cfg_t *cfg, bool halted, std::vector> mems, - std::vector>> plugin_devices, + std::vector plugin_device_factories, const std::vector& args, const debug_module_config_t &dm_config, const char *log_path, @@ -69,11 +69,6 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, for (auto& x : mems) bus.add_device(x.first, x.second); - for (auto& x : plugin_devices) { - bus.add_device(x.first, x.second.get()); - devices.push_back(x.second); - } - debug_module.add_device(&bus); socketif = NULL; @@ -124,6 +119,9 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, clint_factory, // clint must be element 0 plic_factory, // plic must be element 1 ns16550_factory}; + device_factories.insert(device_factories.end(), + plugin_device_factories.begin(), + plugin_device_factories.end()); // Load dtb_file if provided, otherwise self-generate a dts/dtb if (dtb_file) { diff --git a/riscv/sim.h b/riscv/sim.h index 1cb0658088..a3445db257 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -27,7 +27,7 @@ class sim_t : public htif_t, public simif_t public: sim_t(const cfg_t *cfg, bool halted, std::vector> mems, - std::vector>> plugin_devices, + std::vector plugin_device_factories, const std::vector& args, const debug_module_config_t &dm_config, const char *log_path, bool dtb_enabled, const char *dtb_file, diff --git a/spike_main/spike.cc b/spike_main/spike.cc index f257582ffb..4766f6dad0 100644 --- a/spike_main/spike.cc +++ b/spike_main/spike.cc @@ -50,12 +50,7 @@ static void help(int exit_code = 1) fprintf(stderr, " --l2=:: B both powers of 2).\n"); fprintf(stderr, " --big-endian Use a big-endian memory system.\n"); fprintf(stderr, " --misaligned Support misaligned memory accesses\n"); - fprintf(stderr, " --device= Attach MMIO plugin device from an --extlib library\n"); - fprintf(stderr, " P -- Name of the MMIO plugin\n"); - fprintf(stderr, " B -- Base memory address of the device\n"); - fprintf(stderr, " A -- String arguments to pass to the plugin\n"); - fprintf(stderr, " This flag can be used multiple times.\n"); - fprintf(stderr, " The extlib flag for the library must come first.\n"); + fprintf(stderr, " --device= Attach MMIO plugin device from an --extlib library\n"); fprintf(stderr, " --log-cache-miss Generate a log of cache miss\n"); fprintf(stderr, " --log-commits Generate a log of commits info\n"); fprintf(stderr, " --extension= Specify RoCC Extension\n"); @@ -336,7 +331,7 @@ int main(int argc, char** argv) bool dtb_enabled = true; const char* kernel = NULL; reg_t kernel_offset, kernel_size; - std::vector>> plugin_devices; + std::vector plugin_device_factories; std::unique_ptr ic; std::unique_ptr dc; std::unique_ptr l2; @@ -376,47 +371,12 @@ int main(int argc, char** argv) /*default_real_time_clint=*/false, /*default_trigger_count=*/4); - auto const device_parser = [&plugin_devices](const char *s) { - const std::string str(s); - std::istringstream stream(str); - - // We are parsing a string like name,base,args. - - // Parse the name, which is simply all of the characters leading up to the - // first comma. The validity of the plugin name will be checked later. - std::string name; - std::getline(stream, name, ','); - if (name.empty()) { - throw std::runtime_error("Plugin name is empty."); - } - - // Parse the base address. First, get all of the characters up to the next - // comma (or up to the end of the string if there is no comma). Then try to - // parse that string as an integer according to the rules of strtoull. It - // could be in decimal, hex, or octal. Fail if we were able to parse a - // number but there were garbage characters after the valid number. We must - // consume the entire string between the commas. - std::string base_str; - std::getline(stream, base_str, ','); - if (base_str.empty()) { - throw std::runtime_error("Device base address is empty."); - } - char* end; - reg_t base = static_cast(strtoull(base_str.c_str(), &end, 0)); - if (end != &*base_str.cend()) { - throw std::runtime_error("Error parsing device base address."); - } - - // The remainder of the string is the arguments. We could use getline, but - // that could ignore newline characters in the arguments. That should be - // rare and discouraged, but handle it here anyway with this weird in_avail - // technique. The arguments are optional, so if there were no arguments - // specified we could end up with an empty string here. That's okay. - auto avail = stream.rdbuf()->in_avail(); - std::string args(avail, '\0'); - stream.readsome(&args[0], avail); - - plugin_devices.emplace_back(base, std::make_shared(name, args)); + auto const device_parser = [&plugin_device_factories](const char *s) { + const std::string name(s); + if (name.empty()) throw std::runtime_error("Plugin name is empty."); + auto it = mmio_device_map().find(name); + if (it == mmio_device_map().end()) throw std::runtime_error("Plugin \"" + name + "\" not found in loaded extlibs."); + plugin_device_factories.push_back(it->second); }; option_parser_t parser; @@ -564,7 +524,7 @@ int main(int argc, char** argv) } sim_t s(&cfg, halted, - mems, plugin_devices, htif_args, dm_config, log_path, dtb_enabled, dtb_file, + mems, plugin_device_factories, htif_args, dm_config, log_path, dtb_enabled, dtb_file, socket, cmd_file); std::unique_ptr remote_bitbang((remote_bitbang_t *) NULL); From b87c6e64d0dba9edf7dc070f9e4a29016641c3a1 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Wed, 7 Jun 2023 10:33:38 -0700 Subject: [PATCH 067/127] debug: Remove debug_module_t::add_device, its redundant --- riscv/debug_module.cc | 4 ---- riscv/debug_module.h | 2 -- riscv/sim.cc | 2 +- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/riscv/debug_module.cc b/riscv/debug_module.cc index 27dbe66ecb..0f75c5e3df 100644 --- a/riscv/debug_module.cc +++ b/riscv/debug_module.cc @@ -118,10 +118,6 @@ void debug_module_t::reset() challenge = random(); } -void debug_module_t::add_device(bus_t *bus) { - bus->add_device(DEBUG_START, this); -} - bool debug_module_t::load(reg_t addr, size_t len, uint8_t* bytes) { addr = DEBUG_START + addr; diff --git a/riscv/debug_module.h b/riscv/debug_module.h index 0a62d77585..518f119df6 100644 --- a/riscv/debug_module.h +++ b/riscv/debug_module.h @@ -113,8 +113,6 @@ class debug_module_t : public abstract_device_t debug_module_t(simif_t *sim, const debug_module_config_t &config); ~debug_module_t(); - void add_device(bus_t *bus); - bool load(reg_t addr, size_t len, uint8_t* bytes); bool store(reg_t addr, size_t len, const uint8_t* bytes); diff --git a/riscv/sim.cc b/riscv/sim.cc index 0c5a7fb299..0779b954e4 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -69,7 +69,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, for (auto& x : mems) bus.add_device(x.first, x.second); - debug_module.add_device(&bus); + bus.add_device(DEBUG_START, &debug_module); socketif = NULL; #ifdef HAVE_BOOST_ASIO From 7ac808ee1b9c04c73899de89f550744d5963e18b Mon Sep 17 00:00:00 2001 From: "demin.han" Date: Wed, 21 Jun 2023 11:05:41 +0800 Subject: [PATCH 068/127] Remove duplicate compile options --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 01d7baca41..b5013bde79 100644 --- a/Makefile.in +++ b/Makefile.in @@ -98,7 +98,7 @@ default-CXXFLAGS := $(default-CFLAGS) -std=c++17 mcppbs-CPPFLAGS := @CPPFLAGS@ mcppbs-CFLAGS := $(default-CFLAGS) @CFLAGS@ -mcppbs-CXXFLAGS := $(mcppbs-CFLAGS) $(default-CXXFLAGS) @CXXFLAGS@ +mcppbs-CXXFLAGS := $(default-CXXFLAGS) @CXXFLAGS@ CC := @CC@ CXX := @CXX@ From fb18fe2d93988e3b9d469f6681c48cdb6c6a7835 Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Thu, 4 May 2023 08:11:32 +0800 Subject: [PATCH 069/127] triggers: icount: not to decrease on firing icount trigger with Debug Mode action The icount decreases on firing beakpoint action but not on entering Debug Mode action. Reference: https://github.com/riscv/riscv-debug-spec/issues/842 --- riscv/triggers.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv/triggers.cc b/riscv/triggers.cc index 65ba4c9b0b..39a7330f70 100644 --- a/riscv/triggers.cc +++ b/riscv/triggers.cc @@ -317,7 +317,7 @@ std::optional icount_t::detect_icount_match(processor_t * const ret = match_result_t(TIMING_BEFORE, action); } - if (count >= 1) { + if (count >= 1 && (ret == std::nullopt || action != MCONTROL_ACTION_DEBUG_MODE)) { if (count == 1) pending = 1; count = count - 1; From 2d61da362201cd73a89d22b9cb71c6590f20a3ce Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 20 Jun 2023 14:16:42 -0700 Subject: [PATCH 070/127] sim_t: Add sim_t::add_device() API This is public so libspike users can precisely configure the device bus without going through the DTS interface --- riscv/sim.cc | 11 +++++++---- riscv/sim.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/riscv/sim.cc b/riscv/sim.cc index 0779b954e4..c60d30bab2 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -165,9 +165,8 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, abstract_device_t* device = factory->parse_from_fdt(fdt, this, &device_base); if (device) { assert(device_base); - bus.add_device(device_base, device); std::shared_ptr dev_ptr(device); - devices.push_back(dev_ptr); + add_device(device_base, dev_ptr); if (i == 0) // clint_factory clint = std::static_pointer_cast(dev_ptr); @@ -277,6 +276,11 @@ void sim_t::step(size_t n) } } +void sim_t::add_device(reg_t addr, std::shared_ptr dev) { + bus.add_device(addr, dev.get()); + devices.push_back(dev); +} + void sim_t::set_debug(bool value) { debug = value; @@ -369,8 +373,7 @@ void sim_t::set_rom() rom.resize((rom.size() + align - 1) / align * align); std::shared_ptr boot_rom(new rom_device_t(rom)); - bus.add_device(DEFAULT_RSTVEC, boot_rom.get()); - devices.push_back(boot_rom); + add_device(DEFAULT_RSTVEC, boot_rom); } char* sim_t::addr_to_mem(reg_t paddr) { diff --git a/riscv/sim.h b/riscv/sim.h index a3445db257..2b58975515 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -39,6 +39,7 @@ class sim_t : public htif_t, public simif_t int run(); void set_debug(bool value); void set_histogram(bool value); + void add_device(reg_t addr, std::shared_ptr dev); // Configure logging // From ea6740350fcdd5fe63f299d931b6d2b9ff7797ef Mon Sep 17 00:00:00 2001 From: Tim Newsome Date: Mon, 26 Jun 2023 17:33:18 -0700 Subject: [PATCH 071/127] Let debugger control hart availability This change lets me test OpenOCD's behavior when harts become available. It only affects how things look to the debugger. Harts that are "unavailable" still execute code as usual. Control is implemented through the 2 LSBs of the DMCUSTOM register in the Debug Module. --- riscv/debug_module.cc | 45 +++++++++++++++++++++++++++++++++++-------- riscv/debug_module.h | 6 ++++++ 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/riscv/debug_module.cc b/riscv/debug_module.cc index f7163ef41b..4a7a802ef5 100644 --- a/riscv/debug_module.cc +++ b/riscv/debug_module.cc @@ -73,6 +73,9 @@ debug_module_t::debug_module_t(simif_t *sim, const debug_module_config_t &config jal(ZERO, debug_abstract_start - DEBUG_ROM_WHERETO)); memset(debug_abstract, 0, sizeof(debug_abstract)); + for (unsigned i = 0; i < sizeof(hart_available_state) / sizeof(*hart_available_state); i++) { + hart_available_state[i] = true; + } reset(); } @@ -202,7 +205,8 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) if (hart_state[id].haltgroup) { for (const auto& [hart_id, hart] : sim->get_harts()) { if (!hart_state[hart_id].halted && - hart_state[hart_id].haltgroup == hart_state[id].haltgroup) { + hart_state[hart_id].haltgroup == hart_state[id].haltgroup && + hart_available(hart_id)) { hart->halt_request = hart->HR_GROUP; // TODO: What if the debugger comes and writes dmcontrol before the // halt occurs? @@ -337,6 +341,13 @@ void debug_module_t::sb_write() } } +bool debug_module_t::hart_available(unsigned hart_id) const +{ + if (hart_id < sizeof(hart_available_state) / sizeof(*hart_available_state)) + return hart_available_state[hart_id]; + return true; +} + bool debug_module_t::dmi_read(unsigned address, uint32_t *value) { uint32_t result = 0; @@ -391,6 +402,8 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) dmstatus.allnonexistant = true; dmstatus.allresumeack = true; dmstatus.anyresumeack = false; + dmstatus.allunavail = true; + dmstatus.anyunavail = false; for (const auto& [hart_id, hart] : sim->get_harts()) { if (hart_selected(hart_id)) { dmstatus.allnonexistant = false; @@ -399,12 +412,19 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) } else { dmstatus.allresumeack = false; } + auto hart = sim->get_harts().at(hart_id); if (hart_state[hart_id].halted) { dmstatus.allrunning = false; dmstatus.anyhalted = true; + dmstatus.allunavail = false; + } else if (!hart_available(hart_id)) { + dmstatus.allrunning = false; + dmstatus.allhalted = false; + dmstatus.anyunavail = true; } else { dmstatus.allhalted = false; dmstatus.anyrunning = true; + dmstatus.allunavail = false; } } } @@ -414,9 +434,6 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) // non-existant hartsel. dmstatus.anynonexistant = dmcontrol.hartsel >= sim->get_cfg().nprocs(); - dmstatus.allunavail = false; - dmstatus.anyunavail = false; - result = set_field(result, DM_DMSTATUS_IMPEBREAK, dmstatus.impebreak); result = set_field(result, DM_DMSTATUS_ALLHAVERESET, selected_hart_state().havereset); @@ -522,6 +539,11 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) case DM_DMCS2: result = set_field(result, DM_DMCS2_GROUP, selected_hart_state().haltgroup); break; + case DM_CUSTOM: + for (unsigned i = 0; i < sizeof(hart_available_state) / sizeof(*hart_available_state); i++) { + result |= hart_available_state[i] << i; + } + break; default: result = 0; D(fprintf(stderr, "Unexpected. Returning Error.")); @@ -790,16 +812,18 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) if (get_field(value, DM_DMCONTROL_ACKHAVERESET)) { hart_state[hart_id].havereset = false; } - hart->halt_request = dmcontrol.haltreq ? hart->HR_REGULAR : hart->HR_NONE; - if (dmcontrol.haltreq) { + if (dmcontrol.haltreq && hart_available(hart_id)) { + hart->halt_request = hart->HR_REGULAR; D(fprintf(stderr, "halt hart %d\n", hart_id)); + } else { + hart->halt_request = hart->HR_NONE; } - if (dmcontrol.resumereq) { + if (dmcontrol.resumereq && hart_available(hart_id)) { D(fprintf(stderr, "resume hart %d\n", hart_id)); debug_rom_flags[hart_id] |= (1 << DEBUG_ROM_FLAG_RESUME); hart_state[hart_id].resumeack = false; } - if (dmcontrol.hartreset) { + if (dmcontrol.hartreset && hart_available(hart_id)) { hart->reset(); } } @@ -903,6 +927,11 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) selected_hart_state().haltgroup = get_field(value, DM_DMCS2_GROUP); } return true; + case DM_CUSTOM: + for (unsigned i = 0; i < sizeof(hart_available_state) / sizeof(*hart_available_state); i++) { + hart_available_state[i] = get_field(value, 1< Date: Thu, 6 Jul 2023 16:27:46 -0700 Subject: [PATCH 072/127] Disassemble Zicond by default In general, the strategy has been that the disassembler enables a maximal set of non-conflicting extensions, thereby doing the right thing for the largest number of users. --- disasm/disasm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 6f93d241ec..b2bed47adb 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2329,7 +2329,7 @@ disassembler_t::disassembler_t(const isa_parser_t *isa) // next-highest priority: other instructions in same base ISA std::string fallback_isa_string = std::string("rv") + std::to_string(isa->get_max_xlen()) + - "gqchv_zfh_zba_zbb_zbc_zbs_zcb_zicbom_zicboz_zkn_zkr_zks_svinval"; + "gqchv_zfh_zba_zbb_zbc_zbs_zcb_zicbom_zicboz_zicond_zkn_zkr_zks_svinval"; isa_parser_t fallback_isa(fallback_isa_string.c_str(), DEFAULT_PRIV); add_instructions(&fallback_isa); From 47ab8926b04a592128b924da0bdbd622213568f8 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 11 Jul 2023 17:48:04 -0700 Subject: [PATCH 073/127] Move isa_parser_t to libdisasm out of libriscv --- disasm/disasm.mk.in | 1 + {riscv => disasm}/isa_parser.cc | 0 riscv/riscv.mk.in | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename {riscv => disasm}/isa_parser.cc (100%) diff --git a/disasm/disasm.mk.in b/disasm/disasm.mk.in index 445c430f55..090911bd20 100644 --- a/disasm/disasm.mk.in +++ b/disasm/disasm.mk.in @@ -1,5 +1,6 @@ disasm_srcs = \ disasm.cc \ + isa_parser.cc \ regnames.cc \ disasm_install_lib = yes diff --git a/riscv/isa_parser.cc b/disasm/isa_parser.cc similarity index 100% rename from riscv/isa_parser.cc rename to disasm/isa_parser.cc diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 1ad8b23b2e..3a0333c544 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -49,7 +49,6 @@ riscv_precompiled_hdrs = \ insn_template.h \ riscv_srcs = \ - isa_parser.cc \ processor.cc \ execute.cc \ dts.cc \ From 2eb2b40102ba0302e1b28959072d9ccb91930a8f Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Tue, 11 Jul 2023 17:53:23 -0700 Subject: [PATCH 074/127] Remove dependency of isa_parser_t on extension_t --- disasm/isa_parser.cc | 7 +------ riscv/isa_parser.h | 9 +++------ riscv/processor.cc | 2 +- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index 59472a43f0..c18a72d8b6 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -1,5 +1,4 @@ #include "isa_parser.h" -#include "extension.h" static std::string strtolower(const char* str) { @@ -292,11 +291,7 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) if (ext_str.size() == 1) { bad_isa_string(str, "single 'X' is not a proper name"); } else if (ext_str != "xdummy") { - extension_t* x = find_extension(ext_str.substr(1).c_str())(); - if (!extensions.insert(std::make_pair(x->name(), x)).second) { - fprintf(stderr, "extensions must have unique names (got two named \"%s\"!)\n", x->name()); - abort(); - } + extensions.insert(ext_str.substr(1)); } } else { bad_isa_string(str, ("unsupported extension: " + ext_str).c_str()); diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 5b04347520..335a42bfe4 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -6,9 +6,7 @@ #include #include -#include - -class extension_t; +#include typedef enum { // 65('A') ~ 90('Z') is reserved for standard isa in misa @@ -109,15 +107,14 @@ class isa_parser_t { std::bitset get_extension_table() const { return extension_table; } - const std::unordered_map & - get_extensions() const { return extensions; } + const std::set &get_extensions() const { return extensions; } protected: unsigned max_xlen; reg_t max_isa; std::bitset extension_table; std::string isa_string; - std::unordered_map extensions; + std::set extensions; }; #endif diff --git a/riscv/processor.cc b/riscv/processor.cc index 1d5675a51a..e81375a2ac 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -61,7 +61,7 @@ processor_t::processor_t(const isa_parser_t *isa, const cfg_t *cfg, disassembler = new disassembler_t(isa); for (auto e : isa->get_extensions()) - register_extension(e.second); + register_extension(find_extension(e.c_str())()); set_pmp_granularity(1 << PMP_SHIFT); set_pmp_num(cfg->pmpregions); From fe0576044590751e0a10a22ec28077f72d8c5414 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Sun, 9 Jul 2023 22:21:54 -0700 Subject: [PATCH 075/127] build: Support project-defined LDFLAGS --- Makefile.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index b5013bde79..f236576b99 100644 --- a/Makefile.in +++ b/Makefile.in @@ -264,7 +264,7 @@ $$($(2)_test_objs) : %.o : %.cc $(COMPILE) -c $$< $$($(2)_test_exes) : %-utst : %.t.o $$($(2)_test_libnames) - $(LINK) -o $$@ $$< $$($(2)_test_libnames) $(LIBS) + $(LINK) $$($(2)_LDFLAGS) -o $$@ $$< $$($(2)_test_libnames) $(LIBS) $(2)_deps += $$($(2)_test_deps) $(2)_junk += \ @@ -292,7 +292,7 @@ $$($(2)_prog_objs) : %.o : %.cc $(COMPILE) -c $$< $$($(2)_prog_exes) : % : %.o $$($(2)_prog_libnames) - $(LINK) -o $$@ $$< $$($(2)_prog_libnames) $(LIBS) + $(LINK) $$($(2)_LDFLAGS) -o $$@ $$< $$($(2)_prog_libnames) $(LIBS) $(2)_deps += $$($(2)_prog_deps) $(2)_junk += $$($(2)_prog_objs) $$($(2)_prog_deps) $$($(2)_prog_exes) @@ -307,7 +307,7 @@ $$($(2)_install_prog_objs) : %.o : %.cc $$($(2)_gen_hdrs) $(COMPILE) -c $$< $$($(2)_install_prog_exes) : % : %.o $$($(2)_prog_libnames) - $(LINK) -o $$@ $$< $$($(2)_prog_libnames) $(LIBS) + $(LINK) $$($(2)_LDFLAGS) -o $$@ $$< $$($(2)_prog_libnames) $(LIBS) $(2)_deps += $$($(2)_install_prog_deps) $(2)_junk += \ From 3b0d3c2004e97bbbcc1ae17a4053f1913b01991f Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Sun, 9 Jul 2023 22:22:11 -0700 Subject: [PATCH 076/127] build: Force inclusion of all symbols from extension.o in spike-main The --extension feature requires that all symbols in extension.o be available when the libraries are dynamically loaded by dlopen. Prepending extension.o to the linker command adds the otherwise omitted symbols to spike's dynamic symbol table. --- spike_main/spike_main.mk.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spike_main/spike_main.mk.in b/spike_main/spike_main.mk.in index 25a7a6c61c..c842c4ed90 100644 --- a/spike_main/spike_main.mk.in +++ b/spike_main/spike_main.mk.in @@ -14,3 +14,7 @@ spike_main_install_prog_srcs = \ spike_main_srcs = \ spike_main_CFLAGS = -fPIC + +# This hack adds all symbols from extension.o to spike's dynamic symbol +# table, which is required for dynamically loaded --extension libraries +spike_main_LDFLAGS = extension.o From faceda27e60c0721b3233658198bcf50d09b02cc Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Thu, 13 Jul 2023 09:27:46 +0800 Subject: [PATCH 077/127] refactor: mcontrol/mcontrol6: extend check_triggers() with tval parameter --- riscv/mmu.cc | 6 +++--- riscv/mmu.h | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/riscv/mmu.cc b/riscv/mmu.cc index 3f90060e82..f6d23a3bf6 100644 --- a/riscv/mmu.cc +++ b/riscv/mmu.cc @@ -169,7 +169,7 @@ bool mmu_t::mmio(reg_t paddr, size_t len, uint8_t* bytes, access_type type) return true; } -void mmu_t::check_triggers(triggers::operation_t operation, reg_t address, bool virt, std::optional data) +void mmu_t::check_triggers(triggers::operation_t operation, reg_t address, bool virt, reg_t tval, std::optional data) { if (matched_trigger || !proc) return; @@ -179,13 +179,13 @@ void mmu_t::check_triggers(triggers::operation_t operation, reg_t address, bool if (match.has_value()) switch (match->timing) { case triggers::TIMING_BEFORE: - throw triggers::matched_t(operation, address, match->action, virt); + throw triggers::matched_t(operation, tval, match->action, virt); case triggers::TIMING_AFTER: // We want to take this exception on the next instruction. We check // whether to do so in the I$ refill path, so flush the I$. flush_icache(); - matched_trigger = new triggers::matched_t(operation, address, match->action, virt); + matched_trigger = new triggers::matched_t(operation, tval, match->action, virt); } } diff --git a/riscv/mmu.h b/riscv/mmu.h index 46c54ce88a..62e9a73b79 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -402,7 +402,10 @@ class mmu_t bool mmio_store(reg_t paddr, size_t len, const uint8_t* bytes); bool mmio(reg_t paddr, size_t len, uint8_t* bytes, access_type type); bool mmio_ok(reg_t paddr, access_type type); - void check_triggers(triggers::operation_t operation, reg_t address, bool virt, std::optional data = std::nullopt); + void check_triggers(triggers::operation_t operation, reg_t address, bool virt, std::optional data = std::nullopt) { + check_triggers(operation, address, virt, address, data); + } + void check_triggers(triggers::operation_t operation, reg_t address, bool virt, reg_t tval, std::optional data); reg_t translate(mem_access_info_t access_info, reg_t len); reg_t pte_load(reg_t pte_paddr, reg_t addr, bool virt, access_type trap_type, size_t ptesize) { From 4aea5a05ad7bf1b48b2d87e000e6575f68747033 Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Thu, 13 Jul 2023 10:57:32 +0800 Subject: [PATCH 078/127] fix mcontrol's tval on cbo_zero The tval should capture the effective address on an (trigger) exception. Reference: https://github.com/riscv/riscv-CMOs/issues/55 --- riscv/mmu.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/riscv/mmu.h b/riscv/mmu.h index 62e9a73b79..cfbe57cc6a 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -219,8 +219,10 @@ class mmu_t void cbo_zero(reg_t addr) { auto base = addr & ~(blocksz - 1); - for (size_t offset = 0; offset < blocksz; offset += 1) + for (size_t offset = 0; offset < blocksz; offset += 1) { + check_triggers(triggers::OPERATION_STORE, base + offset, false, addr, std::nullopt); store(base + offset, 0); + } } void clean_inval(reg_t addr, bool clean, bool inval) { From 93aad1d355d309ab785e61f66183d96b76dfccc1 Mon Sep 17 00:00:00 2001 From: Yinan Xu Date: Tue, 18 Jul 2023 10:57:29 +0800 Subject: [PATCH 079/127] mmu: fetch instruction bytes in ascending order Fetching instruction bytes in descending order would result in wrong xtval update values. --- riscv/mmu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/riscv/mmu.h b/riscv/mmu.h index 46c54ce88a..8c2bdbea6f 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -294,13 +294,13 @@ class mmu_t } else if (length == 2) { // entire instruction already fetched } else if (length == 6) { - insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32; insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16; + insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32; } else { static_assert(sizeof(insn_bits_t) == 8, "insn_bits_t must be uint64_t"); - insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 6)) << 48; - insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32; insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16; + insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32; + insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 6)) << 48; } insn_fetch_t fetch = {proc->decode_insn(insn), insn}; From 8658429647eb6952707e2bf2a3bb4eca75a8e379 Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Wed, 19 Jul 2023 14:15:57 +0800 Subject: [PATCH 080/127] mcontrol/mcontrol6 triggers on cbo.flush/clean The mcontrol/mcontrol6 store address before has a higher priority over page faults and access faults. Thus, trigger checking should before the translate(). This commit checks all address of the cache block. Reference: Debug spec 1.0, 5.5.3 Cache Operations Reference: CMO spec 1.0.1, 2.5.4 Breakpoint Exceptions and Debug Mode Entry --- riscv/mmu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/riscv/mmu.h b/riscv/mmu.h index cfbe57cc6a..a54a4838d0 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -226,6 +226,9 @@ class mmu_t } void clean_inval(reg_t addr, bool clean, bool inval) { + auto base = addr & ~(blocksz - 1); + for (size_t offset = 0; offset < blocksz; offset += 1) + check_triggers(triggers::OPERATION_STORE, base + offset, false, addr, std::nullopt); convert_load_traps_to_store_traps({ const reg_t paddr = translate(generate_access_info(addr, LOAD, {false, false, false}), 1); if (sim->reservable(paddr)) { From 2d802093471e1c0515e3f58a41da4451afa5103e Mon Sep 17 00:00:00 2001 From: Atul Khare Date: Mon, 10 Jul 2023 15:58:44 -0700 Subject: [PATCH 081/127] Add Smcsrind/Sscsrind extensions --- disasm/isa_parser.cc | 4 ++++ riscv/isa_parser.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index c18a72d8b6..f4d9da44eb 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -286,6 +286,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) } else if (ext_str == "zvkt") { } else if (ext_str == "sstc") { extension_table[EXT_SSTC] = true; + } else if (ext_str == "smcsrind") { + extension_table[EXT_SMCSRIND] = true; + } else if (ext_str == "sscsrind") { + extension_table[EXT_SSCSRIND] = true; } else if (ext_str[0] == 'x') { extension_table['X'] = true; if (ext_str.size() == 1) { diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 335a42bfe4..bba5a91c56 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -77,6 +77,8 @@ typedef enum { EXT_SSTC, EXT_ZACAS, EXT_INTERNAL_ZFH_MOVE, + EXT_SMCSRIND, + EXT_SSCSRIND, NUM_ISA_EXTENSIONS } isa_extension_t; From a6bc48b95e4c7cbf4e6f70315d884b1fe0a06d7f Mon Sep 17 00:00:00 2001 From: Atul Khare Date: Mon, 10 Jul 2023 14:47:54 -0700 Subject: [PATCH 082/127] Rengenerate encoding.h --- riscv/encoding.h | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/riscv/encoding.h b/riscv/encoding.h index db7b0215b6..9666774076 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (3ca60c5) + * https://github.com/riscv/riscv-opcodes (be53d24) */ #ifndef RISCV_CSR_ENCODING_H @@ -3059,6 +3059,11 @@ #define CSR_STIMECMP 0x14d #define CSR_SISELECT 0x150 #define CSR_SIREG 0x151 +#define CSR_SIREG2 0x152 +#define CSR_SIREG3 0x153 +#define CSR_SIREG4 0x155 +#define CSR_SIREG5 0x156 +#define CSR_SIREG6 0x157 #define CSR_STOPEI 0x15c #define CSR_SATP 0x180 #define CSR_SCONTEXT 0x5a8 @@ -3073,6 +3078,11 @@ #define CSR_VSTIMECMP 0x24d #define CSR_VSISELECT 0x250 #define CSR_VSIREG 0x251 +#define CSR_VSIREG2 0x252 +#define CSR_VSIREG3 0x253 +#define CSR_VSIREG4 0x255 +#define CSR_VSIREG5 0x256 +#define CSR_VSIREG6 0x257 #define CSR_VSTOPEI 0x25c #define CSR_VSATP 0x280 #define CSR_HSTATUS 0x600 @@ -3140,6 +3150,11 @@ #define CSR_MTVAL2 0x34b #define CSR_MISELECT 0x350 #define CSR_MIREG 0x351 +#define CSR_MIREG2 0x352 +#define CSR_MIREG3 0x353 +#define CSR_MIREG4 0x355 +#define CSR_MIREG5 0x356 +#define CSR_MIREG6 0x357 #define CSR_MTOPEI 0x35c #define CSR_PMPCFG0 0x3a0 #define CSR_PMPCFG1 0x3a1 @@ -4926,6 +4941,11 @@ DECLARE_CSR(sip, CSR_SIP) DECLARE_CSR(stimecmp, CSR_STIMECMP) DECLARE_CSR(siselect, CSR_SISELECT) DECLARE_CSR(sireg, CSR_SIREG) +DECLARE_CSR(sireg2, CSR_SIREG2) +DECLARE_CSR(sireg3, CSR_SIREG3) +DECLARE_CSR(sireg4, CSR_SIREG4) +DECLARE_CSR(sireg5, CSR_SIREG5) +DECLARE_CSR(sireg6, CSR_SIREG6) DECLARE_CSR(stopei, CSR_STOPEI) DECLARE_CSR(satp, CSR_SATP) DECLARE_CSR(scontext, CSR_SCONTEXT) @@ -4940,6 +4960,11 @@ DECLARE_CSR(vsip, CSR_VSIP) DECLARE_CSR(vstimecmp, CSR_VSTIMECMP) DECLARE_CSR(vsiselect, CSR_VSISELECT) DECLARE_CSR(vsireg, CSR_VSIREG) +DECLARE_CSR(vsireg2, CSR_VSIREG2) +DECLARE_CSR(vsireg3, CSR_VSIREG3) +DECLARE_CSR(vsireg4, CSR_VSIREG4) +DECLARE_CSR(vsireg5, CSR_VSIREG5) +DECLARE_CSR(vsireg6, CSR_VSIREG6) DECLARE_CSR(vstopei, CSR_VSTOPEI) DECLARE_CSR(vsatp, CSR_VSATP) DECLARE_CSR(hstatus, CSR_HSTATUS) @@ -5007,6 +5032,11 @@ DECLARE_CSR(mtinst, CSR_MTINST) DECLARE_CSR(mtval2, CSR_MTVAL2) DECLARE_CSR(miselect, CSR_MISELECT) DECLARE_CSR(mireg, CSR_MIREG) +DECLARE_CSR(mireg2, CSR_MIREG2) +DECLARE_CSR(mireg3, CSR_MIREG3) +DECLARE_CSR(mireg4, CSR_MIREG4) +DECLARE_CSR(mireg5, CSR_MIREG5) +DECLARE_CSR(mireg6, CSR_MIREG6) DECLARE_CSR(mtopei, CSR_MTOPEI) DECLARE_CSR(pmpcfg0, CSR_PMPCFG0) DECLARE_CSR(pmpcfg1, CSR_PMPCFG1) From bc5842f94517c5fa760409cf8c956bbc7c37ead5 Mon Sep 17 00:00:00 2001 From: Atul Khare Date: Tue, 11 Jul 2023 13:31:12 -0700 Subject: [PATCH 083/127] Add Smcsrind/Sscsrind support This adds the following CSRs: miselect (0x350), mireg (0x351), mireg2/3 (0x352, 0x353), mireg4-6 (0x355 - 0x357), siselect (0x150), sireg (0x151), sireg2/3 (0x152, 0x153), sireg4-6 (0x155 - 0x157), vsiselect (0x250), vsireg (0x251), mireg2/3 (0x252, 0x253), vsireg4-6 (0x255 - 0x257). Presently, attempts to read / write from ireg? registers will fail, and future extensions will provide proxy CSR mappings for the respective ?ireg CSRs. --- riscv/csrs.cc | 62 ++++++++++++++++++++++++++++++++++++++++++++++ riscv/csrs.h | 28 +++++++++++++++++++++ riscv/processor.cc | 40 ++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+) diff --git a/riscv/csrs.cc b/riscv/csrs.cc index 7ea07d104c..4d1e546775 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -1574,3 +1574,65 @@ void jvt_csr_t::verify_permissions(insn_t insn, bool write) const { } } } + +virtualized_indirect_csr_t::virtualized_indirect_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt): + virtualized_csr_t(proc, orig, virt) { +} + +void virtualized_indirect_csr_t::verify_permissions(insn_t insn, bool write) const { + virtualized_csr_t::verify_permissions(insn, write); + if (state->v) + virt_csr->verify_permissions(insn, write); + else + orig_csr->verify_permissions(insn, write); +} + +sscsrind_reg_csr_t::sscsrind_reg_csr_t(processor_t* const proc, const reg_t addr, csr_t_p iselect) : + csr_t(proc, addr), + iselect(iselect) { +} + +void sscsrind_reg_csr_t::verify_permissions(insn_t insn, bool write) const { + // Don't call base verify_permission for VS registers remapped to S-mode + if (insn.csr() == address) + csr_t::verify_permissions(insn, write); + + csr_t_p proxy_csr = get_reg(); + if (proxy_csr == nullptr) { + if (!state->v) { + throw trap_illegal_instruction(insn.bits()); + } else { + throw trap_virtual_instruction(insn.bits()); + } + } + proxy_csr->verify_permissions(insn, write); +} + + +reg_t sscsrind_reg_csr_t::read() const noexcept { + csr_t_p target_csr = get_reg(); + if (target_csr != nullptr) { + return target_csr->read(); + } + return 0; +} + +bool sscsrind_reg_csr_t::unlogged_write(const reg_t val) noexcept { + csr_t_p proxy_csr = get_reg(); + if (proxy_csr != nullptr) { + proxy_csr->write(val); + } + return false; +} + +// Returns the actual CSR that maps to value in *siselect or nullptr if no mapping exists +csr_t_p sscsrind_reg_csr_t::get_reg() const noexcept { + auto proxy = ireg_proxy; + auto isel = iselect->read(); + auto it = proxy.find(isel); + return it != proxy.end() ? it->second : nullptr; +} + +void sscsrind_reg_csr_t::add_ireg_proxy(const reg_t iselect_value, csr_t_p csr) { + ireg_proxy[iselect_value] = csr; +} diff --git a/riscv/csrs.h b/riscv/csrs.h index 07d6d82ac5..d28d12446a 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -6,8 +6,12 @@ #include "encoding.h" // For reg_t: #include "decode.h" +// For std::unordered_map +#include // For std::shared_ptr #include +// For std::optional +#include // For access_type: #include "memtracer.h" #include @@ -787,4 +791,28 @@ class jvt_csr_t: public basic_csr_t { jvt_csr_t(processor_t* const proc, const reg_t addr, const reg_t init); virtual void verify_permissions(insn_t insn, bool write) const override; }; + +// Sscsrind registers needs permissions checked +// (the original virtualized_csr_t does not call verify_permission of the underlying CSRs) +class virtualized_indirect_csr_t: public virtualized_csr_t { + public: + virtualized_indirect_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt); + virtual void verify_permissions(insn_t insn, bool write) const override; +}; + +class sscsrind_reg_csr_t : public csr_t { + public: + typedef std::shared_ptr sscsrind_reg_csr_t_p; + sscsrind_reg_csr_t(processor_t* const proc, const reg_t addr, csr_t_p iselect); + reg_t read() const noexcept override; + virtual void verify_permissions(insn_t insn, bool write) const override; + void add_ireg_proxy(const reg_t iselect_val, csr_t_p proxy_csr); + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; + private: + csr_t_p iselect; + std::unordered_map ireg_proxy; + csr_t_p get_reg() const noexcept; +}; + #endif diff --git a/riscv/processor.cc b/riscv/processor.cc index e81375a2ac..c41b3cf366 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -524,6 +524,46 @@ void state_t::reset(processor_t* const proc, reg_t max_isa) if (proc->extension_enabled(EXT_ZCMT)) csrmap[CSR_JVT] = jvt = std::make_shared(proc, CSR_JVT, 0); + + // Smcsrind / Sscsrind + csr_t_p miselect; + csr_t_p siselect; + csr_t_p vsiselect; + sscsrind_reg_csr_t::sscsrind_reg_csr_t_p mireg[6]; + sscsrind_reg_csr_t::sscsrind_reg_csr_t_p sireg[6]; + sscsrind_reg_csr_t::sscsrind_reg_csr_t_p vsireg[6]; + + if (proc->extension_enabled_const(EXT_SMCSRIND)) { + const reg_t mireg_csrs[] = { CSR_MIREG, CSR_MIREG2, CSR_MIREG3, CSR_MIREG4, CSR_MIREG5, CSR_MIREG6 }; + auto i = 0; + for (auto csr : mireg_csrs) { + csrmap[csr] = mireg[i] = std::make_shared(proc, csr, miselect); + i++; + } + } + + if (proc->extension_enabled_const(EXT_SSCSRIND)) { + vsiselect = std::make_shared(proc, CSR_VSISELECT, 0); + csrmap[CSR_VSISELECT] = vsiselect; + siselect = std::make_shared(proc, CSR_SISELECT, 0); + csrmap[CSR_SISELECT] = std::make_shared(proc, siselect, vsiselect); + + const reg_t vsireg_csrs[] = { CSR_VSIREG, CSR_VSIREG2, CSR_VSIREG3, CSR_VSIREG4, CSR_VSIREG5, CSR_VSIREG6 }; + auto i = 0; + for (auto csr : vsireg_csrs) { + csrmap[csr] = vsireg[i] = std::make_shared(proc, csr, vsiselect); + i++; + } + + const reg_t sireg_csrs[] = { CSR_SIREG, CSR_SIREG2, CSR_SIREG3, CSR_SIREG4, CSR_SIREG5, CSR_SIREG6 }; + i = 0; + for (auto csr : sireg_csrs) { + sireg[i] = std::make_shared(proc, csr, siselect); + csrmap[csr] = std::make_shared(proc, sireg[i], vsireg[i]); + i++; + } + } + serialized = false; log_reg_write.clear(); From cb013516665addb45dadd197028762fd3757bdb0 Mon Sep 17 00:00:00 2001 From: Michal Terepeta Date: Mon, 17 Jul 2023 06:06:31 +0000 Subject: [PATCH 084/127] Introduce `abstract_mem_t` to allow custom implementations This change allows to create custom implementations of `abstract_mem_t` and inject them when constructing `sim_t`. The current `mem_t` implementation remains unchanged. Fixes #1408. --- ci-tests/testlib.c | 7 ++++--- riscv/devices.h | 19 ++++++++++++++----- riscv/dts.cc | 2 +- riscv/dts.h | 2 +- riscv/sim.cc | 4 ++-- riscv/sim.h | 4 ++-- spike_main/spike.cc | 9 +++++---- 7 files changed, 29 insertions(+), 18 deletions(-) diff --git a/ci-tests/testlib.c b/ci-tests/testlib.c index 6342f9d06d..2525b18bc2 100644 --- a/ci-tests/testlib.c +++ b/ci-tests/testlib.c @@ -2,9 +2,9 @@ // Copied from spike main. // TODO: This should really be provided in libriscv -static std::vector> make_mems(const std::vector &layout) +static std::vector> make_mems(const std::vector &layout) { - std::vector> mems; + std::vector> mems; mems.reserve(layout.size()); for (const auto &cfg : layout) { mems.push_back(std::make_pair(cfg.get_base(), new mem_t(cfg.get_size()))); @@ -41,7 +41,8 @@ int main() .support_haltgroups = true, .support_impebreak = true }; - std::vector> mems = make_mems(cfg.mem_layout()); + std::vector> mems = + make_mems(cfg.mem_layout()); sim_t sim(&cfg, false, mems, plugin_devices, diff --git a/riscv/devices.h b/riscv/devices.h index b752a21ecb..6ef32e9f4f 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -36,17 +36,26 @@ class rom_device_t : public abstract_device_t { std::vector data; }; -class mem_t : public abstract_device_t { +class abstract_mem_t : public abstract_device_t { + public: + virtual ~abstract_mem_t() = default; + + virtual char* contents(reg_t addr) = 0; + virtual reg_t size() = 0; + virtual void dump(std::ostream& o) = 0; +}; + +class mem_t : public abstract_mem_t { public: mem_t(reg_t size); mem_t(const mem_t& that) = delete; - ~mem_t(); + ~mem_t() override; bool load(reg_t addr, size_t len, uint8_t* bytes) override { return load_store(addr, len, bytes, false); } bool store(reg_t addr, size_t len, const uint8_t* bytes) override { return load_store(addr, len, const_cast(bytes), true); } - char* contents(reg_t addr); - reg_t size() { return sz; } - void dump(std::ostream& o); + char* contents(reg_t addr) override; + reg_t size() override { return sz; } + void dump(std::ostream& o) override; private: bool load_store(reg_t addr, size_t len, uint8_t* bytes, bool store); diff --git a/riscv/dts.cc b/riscv/dts.cc index cc65e3ce5e..9f73bac73a 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -17,7 +17,7 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, const char* bootargs, size_t pmpregions, std::vector procs, - std::vector> mems, + std::vector> mems, std::string device_nodes) { std::stringstream s; diff --git a/riscv/dts.h b/riscv/dts.h index 7ec1ceb692..9240124e84 100644 --- a/riscv/dts.h +++ b/riscv/dts.h @@ -11,7 +11,7 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, const char* bootargs, size_t pmpregions, std::vector procs, - std::vector> mems, + std::vector> mems, std::string device_nodes); std::string dts_compile(const std::string& dts); diff --git a/riscv/sim.cc b/riscv/sim.cc index c60d30bab2..fc75a37cf2 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -37,7 +37,7 @@ extern device_factory_t* plic_factory; extern device_factory_t* ns16550_factory; sim_t::sim_t(const cfg_t *cfg, bool halted, - std::vector> mems, + std::vector> mems, std::vector plugin_device_factories, const std::vector& args, const debug_module_config_t &dm_config, @@ -380,7 +380,7 @@ char* sim_t::addr_to_mem(reg_t paddr) { if (!paddr_ok(paddr)) return NULL; auto desc = bus.find_device(paddr); - if (auto mem = dynamic_cast(desc.second)) + if (auto mem = dynamic_cast(desc.second)) if (paddr - desc.first < mem->size()) return mem->contents(paddr - desc.first); return NULL; diff --git a/riscv/sim.h b/riscv/sim.h index 2b58975515..2455263842 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -26,7 +26,7 @@ class sim_t : public htif_t, public simif_t { public: sim_t(const cfg_t *cfg, bool halted, - std::vector> mems, + std::vector> mems, std::vector plugin_device_factories, const std::vector& args, const debug_module_config_t &dm_config, const char *log_path, @@ -68,7 +68,7 @@ class sim_t : public htif_t, public simif_t private: isa_parser_t isa; const cfg_t * const cfg; - std::vector> mems; + std::vector> mems; std::vector procs; std::map harts; std::pair initrd_range; diff --git a/spike_main/spike.cc b/spike_main/spike.cc index 4766f6dad0..7b28e25205 100644 --- a/spike_main/spike.cc +++ b/spike_main/spike.cc @@ -104,7 +104,7 @@ static std::ifstream::pos_type get_file_size(const char *filename) } static void read_file_bytes(const char *filename,size_t fileoff, - mem_t* mem, size_t memoff, size_t read_sz) + abstract_mem_t* mem, size_t memoff, size_t read_sz) { std::ifstream in(filename, std::ios::in | std::ios::binary); in.seekg(fileoff, std::ios::beg); @@ -260,9 +260,9 @@ static std::vector parse_mem_layout(const char* arg) return merged_mem; } -static std::vector> make_mems(const std::vector &layout) +static std::vector> make_mems(const std::vector &layout) { - std::vector> mems; + std::vector> mems; mems.reserve(layout.size()); for (const auto &cfg : layout) { mems.push_back(std::make_pair(cfg.get_base(), new mem_t(cfg.get_size()))); @@ -473,7 +473,8 @@ int main(int argc, char** argv) if (!*argv1) help(); - std::vector> mems = make_mems(cfg.mem_layout()); + std::vector> mems = + make_mems(cfg.mem_layout()); if (kernel && check_file_exists(kernel)) { const char *isa = cfg.isa(); From 6bfab0e212b381953a5c8b6cc6e8bd1dbed56ec8 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Thu, 20 Jul 2023 18:27:53 +0200 Subject: [PATCH 085/127] Fix compilation warning in riscv/execute.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ../riscv/execute.cc: In function ‘void commit_log_print_insn(processor_t*, reg_t, insn_t)’: ../riscv/execute.cc:132:16: warning: ‘prefix’ may be used uninitialized [-Wmaybe-uninitialized] 132 | fprintf(log_file, " %c%-2d ", prefix, rd); | ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../riscv/execute.cc:88:10: note: ‘prefix’ was declared here 88 | char prefix; | ^~~~~~ --- riscv/execute.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv/execute.cc b/riscv/execute.cc index 295879d4db..591090bd96 100644 --- a/riscv/execute.cc +++ b/riscv/execute.cc @@ -85,7 +85,7 @@ static void commit_log_print_insn(processor_t *p, reg_t pc, insn_t insn) if (item.first == 0) continue; - char prefix; + char prefix = ' '; int size; int rd = item.first >> 4; bool is_vec = false; From f6e7338b26f1508bdcc823ff1920427bf72e95ae Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Mon, 17 Jul 2023 12:04:56 +0800 Subject: [PATCH 086/127] legalize menvcfg.CBIE The value 2 of menvcfg.CBIE is reserved. This commit legalizes it to 0 by adding a specialized class envcfg_csr_t. Reference: https://github.com/riscv/riscv-CMOs/issues/65 --- riscv/csrs.cc | 13 +++++++++++++ riscv/csrs.h | 7 +++++++ riscv/processor.cc | 2 +- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/riscv/csrs.cc b/riscv/csrs.cc index 7ea07d104c..a895b6c362 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -901,6 +901,19 @@ bool masked_csr_t::unlogged_write(const reg_t val) noexcept { return basic_csr_t::unlogged_write((read() & ~mask) | (val & mask)); } +envcfg_csr_t::envcfg_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, + const reg_t init): + masked_csr_t(proc, addr, mask, init) { + // In unlogged_write() we WARLize this field for all three of [msh]envcfg + assert(MENVCFG_CBIE == SENVCFG_CBIE && MENVCFG_CBIE == HENVCFG_CBIE); +} + +bool envcfg_csr_t::unlogged_write(const reg_t val) noexcept { + const reg_t cbie_reserved = 2; // Reserved value of xenvcfg.CBIE + const reg_t adjusted_val = get_field(val, MENVCFG_CBIE) != cbie_reserved ? val : set_field(val, MENVCFG_CBIE, 0); + return masked_csr_t::unlogged_write(adjusted_val); +} + // implement class henvcfg_csr_t henvcfg_csr_t::henvcfg_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init, csr_t_p menvcfg): masked_csr_t(proc, addr, mask, init), diff --git a/riscv/csrs.h b/riscv/csrs.h index 07d6d82ac5..f152802902 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -458,6 +458,13 @@ class masked_csr_t: public basic_csr_t { const reg_t mask; }; +class envcfg_csr_t: public masked_csr_t { + public: + envcfg_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init); + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; +}; + // henvcfg.pbmte is read_only 0 when menvcfg.pbmte = 0 // henvcfg.stce is read_only 0 when menvcfg.stce = 0 // henvcfg.hade is read_only 0 when menvcfg.hade = 0 diff --git a/riscv/processor.cc b/riscv/processor.cc index e81375a2ac..2f458bf1b7 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -445,7 +445,7 @@ void state_t::reset(processor_t* const proc, reg_t max_isa) (proc->extension_enabled(EXT_SVPBMT) ? MENVCFG_PBMTE : 0) | (proc->extension_enabled(EXT_SSTC) ? MENVCFG_STCE : 0); const reg_t menvcfg_init = (proc->extension_enabled(EXT_SVPBMT) ? MENVCFG_PBMTE : 0); - menvcfg = std::make_shared(proc, CSR_MENVCFG, menvcfg_mask, menvcfg_init); + menvcfg = std::make_shared(proc, CSR_MENVCFG, menvcfg_mask, menvcfg_init); if (xlen == 32) { csrmap[CSR_MENVCFG] = std::make_shared(proc, CSR_MENVCFG, menvcfg); csrmap[CSR_MENVCFGH] = std::make_shared(proc, CSR_MENVCFGH, menvcfg); From 7f22022e1ad4019afb18e48ceb76ec9e6f483b50 Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Mon, 17 Jul 2023 12:14:14 +0800 Subject: [PATCH 087/127] legalize senvcfg.CBIE The value 2 of senvcfg.CBIE is reserved. This commit legalizes it to 0. Reference: https://github.com/riscv/riscv-CMOs/issues/65 --- riscv/csrs.cc | 2 +- riscv/csrs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/riscv/csrs.cc b/riscv/csrs.cc index a895b6c362..65f5594abd 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -1476,7 +1476,7 @@ void sstateen_csr_t::verify_permissions(insn_t insn, bool write) const { // implement class senvcfg_csr_t senvcfg_csr_t::senvcfg_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init): - masked_csr_t(proc, addr, mask, init) { + envcfg_csr_t(proc, addr, mask, init) { } void senvcfg_csr_t::verify_permissions(insn_t insn, bool write) const { diff --git a/riscv/csrs.h b/riscv/csrs.h index f152802902..f08262b638 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -759,7 +759,7 @@ class sstateen_csr_t: public hstateen_csr_t { virtual bool unlogged_write(const reg_t val) noexcept override; }; -class senvcfg_csr_t final: public masked_csr_t { +class senvcfg_csr_t final: public envcfg_csr_t { public: senvcfg_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init); virtual void verify_permissions(insn_t insn, bool write) const override; From e7e188011182a60c94bf2b35c94f02795d906da4 Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Mon, 17 Jul 2023 12:27:59 +0800 Subject: [PATCH 088/127] legalize henvcfg.CBIE The value 2 of henvcfg.CBIE is reserved. This commit legalizes it to 0. Reference: https://github.com/riscv/riscv-CMOs/issues/65 --- riscv/csrs.cc | 2 +- riscv/csrs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/riscv/csrs.cc b/riscv/csrs.cc index 65f5594abd..34c03a5f56 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -916,7 +916,7 @@ bool envcfg_csr_t::unlogged_write(const reg_t val) noexcept { // implement class henvcfg_csr_t henvcfg_csr_t::henvcfg_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init, csr_t_p menvcfg): - masked_csr_t(proc, addr, mask, init), + envcfg_csr_t(proc, addr, mask, init), menvcfg(menvcfg) { } diff --git a/riscv/csrs.h b/riscv/csrs.h index f08262b638..e483ea3d2c 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -468,7 +468,7 @@ class envcfg_csr_t: public masked_csr_t { // henvcfg.pbmte is read_only 0 when menvcfg.pbmte = 0 // henvcfg.stce is read_only 0 when menvcfg.stce = 0 // henvcfg.hade is read_only 0 when menvcfg.hade = 0 -class henvcfg_csr_t final: public masked_csr_t { +class henvcfg_csr_t final: public envcfg_csr_t { public: henvcfg_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init, csr_t_p menvcfg); From 63379810b4d5c469de3ba1a9aeb90a8387df8543 Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Wed, 26 Jul 2023 08:35:37 +0800 Subject: [PATCH 089/127] triggers: fix textra.sbytemask Ignore corresponding bytes to the scontext and textra.svalue. Cast 0xff to reg_t for the 34-bit textra64.svalue. --- riscv/triggers.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv/triggers.cc b/riscv/triggers.cc index 39a7330f70..b2b815dad7 100644 --- a/riscv/triggers.cc +++ b/riscv/triggers.cc @@ -84,7 +84,7 @@ bool trigger_t::textra_match(processor_t * const proc) const noexcept assert(CSR_TEXTRA32_SBYTEMASK_LENGTH < CSR_TEXTRA64_SBYTEMASK_LENGTH); for (int i = 0; i < CSR_TEXTRA64_SBYTEMASK_LENGTH; i++) if (sbytemask & (1 << i)) - mask &= 0xff << (i * 8); + mask &= ~(reg_t(0xff) << (i * 8)); if ((state->scontext->read() & mask) != (svalue & mask)) return false; } else if (sselect == SSELECT_ASID) { From 1c91fd56ba54bf62c67fea94b80726008895a57a Mon Sep 17 00:00:00 2001 From: Atul Khare Date: Mon, 24 Jul 2023 17:56:38 -0700 Subject: [PATCH 090/127] Regenerate encoding.h --- riscv/encoding.h | 57 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/riscv/encoding.h b/riscv/encoding.h index 9666774076..f1defd41b2 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (be53d24) + * https://github.com/riscv/riscv-opcodes (6790b30) */ #ifndef RISCV_CSR_ENCODING_H @@ -172,6 +172,7 @@ #define MSTATEEN0_FCSR 0x00000002 #define MSTATEEN0_JVT 0x00000004 #define MSTATEEN0_HCONTEXT 0x0200000000000000 +#define MSTATEEN0_CD 0x0100000000000000 #define MSTATEEN0_HENVCFG 0x4000000000000000 #define MSTATEEN_HSTATEEN 0x8000000000000000 @@ -205,6 +206,18 @@ #define HENVCFGH_PBMTE 0x40000000 #define HENVCFGH_STCE 0x80000000 +#define SISELECT_SMCDELEG_START 0x40 +#define SISELECT_SMCDELEG_UNUSED 0x41 +#define SISELECT_SMCDELEG_INSTRET 0x42 +#define SISELECT_SMCDELEG_INSTRETCFG 0x42 +/* + * ?iselect values for hpmcounters4..31 and hpmevent4..31 + * can easily computed, and were elided for brevity. + */ +#define SISELECT_SMCDELEG_HPMCOUNTER_3 0x43 +#define SISELECT_SMCDELEG_HPMEVENT_3 0x43 +#define SISELECT_SMCDELEG_END 0x5f + #define HSTATEEN0_CS 0x00000001 #define HSTATEEN0_FCSR 0x00000002 #define HSTATEEN0_JVT 0x00000004 @@ -2477,10 +2490,10 @@ #define MASK_VMADD_VV 0xfc00707f #define MATCH_VMADD_VX 0xa4006057 #define MASK_VMADD_VX 0xfc00707f -#define MATCH_VMAND_MM 0x64002057 -#define MASK_VMAND_MM 0xfc00707f -#define MATCH_VMANDN_MM 0x60002057 -#define MASK_VMANDN_MM 0xfc00707f +#define MATCH_VMAND_MM 0x66002057 +#define MASK_VMAND_MM 0xfe00707f +#define MATCH_VMANDN_MM 0x62002057 +#define MASK_VMANDN_MM 0xfe00707f #define MATCH_VMAX_VV 0x1c000057 #define MASK_VMAX_VV 0xfc00707f #define MATCH_VMAX_VX 0x1c004057 @@ -2523,14 +2536,14 @@ #define MASK_VMINU_VV 0xfc00707f #define MATCH_VMINU_VX 0x10004057 #define MASK_VMINU_VX 0xfc00707f -#define MATCH_VMNAND_MM 0x74002057 -#define MASK_VMNAND_MM 0xfc00707f -#define MATCH_VMNOR_MM 0x78002057 -#define MASK_VMNOR_MM 0xfc00707f -#define MATCH_VMOR_MM 0x68002057 -#define MASK_VMOR_MM 0xfc00707f -#define MATCH_VMORN_MM 0x70002057 -#define MASK_VMORN_MM 0xfc00707f +#define MATCH_VMNAND_MM 0x76002057 +#define MASK_VMNAND_MM 0xfe00707f +#define MATCH_VMNOR_MM 0x7a002057 +#define MASK_VMNOR_MM 0xfe00707f +#define MATCH_VMOR_MM 0x6a002057 +#define MASK_VMOR_MM 0xfe00707f +#define MATCH_VMORN_MM 0x72002057 +#define MASK_VMORN_MM 0xfe00707f #define MATCH_VMSBC_VV 0x4e000057 #define MASK_VMSBC_VV 0xfe00707f #define MATCH_VMSBC_VVM 0x4c000057 @@ -2619,10 +2632,10 @@ #define MASK_VMV_V_X 0xfff0707f #define MATCH_VMV_X_S 0x42002057 #define MASK_VMV_X_S 0xfe0ff07f -#define MATCH_VMXNOR_MM 0x7c002057 -#define MASK_VMXNOR_MM 0xfc00707f -#define MATCH_VMXOR_MM 0x6c002057 -#define MASK_VMXOR_MM 0xfc00707f +#define MATCH_VMXNOR_MM 0x7e002057 +#define MASK_VMXNOR_MM 0xfe00707f +#define MATCH_VMXOR_MM 0x6e002057 +#define MASK_VMXOR_MM 0xfe00707f #define MATCH_VNCLIP_WI 0xbc003057 #define MASK_VNCLIP_WI 0xfc00707f #define MATCH_VNCLIP_WV 0xbc000057 @@ -3051,6 +3064,7 @@ #define CSR_SSTATEEN1 0x10d #define CSR_SSTATEEN2 0x10e #define CSR_SSTATEEN3 0x10f +#define CSR_SCOUNTINHIBIT 0x120 #define CSR_SSCRATCH 0x140 #define CSR_SEPC 0x141 #define CSR_SCAUSE 0x142 @@ -3280,6 +3294,8 @@ #define CSR_MHPMCOUNTER29 0xb1d #define CSR_MHPMCOUNTER30 0xb1e #define CSR_MHPMCOUNTER31 0xb1f +#define CSR_MCYCLECFG 0x321 +#define CSR_MINSTRETCFG 0x322 #define CSR_MHPMEVENT3 0x323 #define CSR_MHPMEVENT4 0x324 #define CSR_MHPMEVENT5 0x325 @@ -3375,6 +3391,8 @@ #define CSR_MSTATEEN2H 0x31e #define CSR_MSTATEEN3H 0x31f #define CSR_MIPH 0x354 +#define CSR_MCYCLECFGH 0x721 +#define CSR_MINSTRETCFGH 0x722 #define CSR_MHPMEVENT3H 0x723 #define CSR_MHPMEVENT4H 0x724 #define CSR_MHPMEVENT5H 0x725 @@ -4933,6 +4951,7 @@ DECLARE_CSR(sstateen0, CSR_SSTATEEN0) DECLARE_CSR(sstateen1, CSR_SSTATEEN1) DECLARE_CSR(sstateen2, CSR_SSTATEEN2) DECLARE_CSR(sstateen3, CSR_SSTATEEN3) +DECLARE_CSR(scountinhibit, CSR_SCOUNTINHIBIT) DECLARE_CSR(sscratch, CSR_SSCRATCH) DECLARE_CSR(sepc, CSR_SEPC) DECLARE_CSR(scause, CSR_SCAUSE) @@ -5162,6 +5181,8 @@ DECLARE_CSR(mhpmcounter28, CSR_MHPMCOUNTER28) DECLARE_CSR(mhpmcounter29, CSR_MHPMCOUNTER29) DECLARE_CSR(mhpmcounter30, CSR_MHPMCOUNTER30) DECLARE_CSR(mhpmcounter31, CSR_MHPMCOUNTER31) +DECLARE_CSR(mcyclecfg, CSR_MCYCLECFG) +DECLARE_CSR(minstretcfg, CSR_MINSTRETCFG) DECLARE_CSR(mhpmevent3, CSR_MHPMEVENT3) DECLARE_CSR(mhpmevent4, CSR_MHPMEVENT4) DECLARE_CSR(mhpmevent5, CSR_MHPMEVENT5) @@ -5257,6 +5278,8 @@ DECLARE_CSR(mstateen1h, CSR_MSTATEEN1H) DECLARE_CSR(mstateen2h, CSR_MSTATEEN2H) DECLARE_CSR(mstateen3h, CSR_MSTATEEN3H) DECLARE_CSR(miph, CSR_MIPH) +DECLARE_CSR(mcyclecfgh, CSR_MCYCLECFGH) +DECLARE_CSR(minstretcfgh, CSR_MINSTRETCFGH) DECLARE_CSR(mhpmevent3h, CSR_MHPMEVENT3H) DECLARE_CSR(mhpmevent4h, CSR_MHPMEVENT4H) DECLARE_CSR(mhpmevent5h, CSR_MHPMEVENT5H) From 62178539f8377805705fd6d857338c04b52ef60f Mon Sep 17 00:00:00 2001 From: Atul Khare Date: Wed, 14 Jun 2023 16:12:56 -0700 Subject: [PATCH 091/127] Add prv_changed / v_changed fields to state This tracks whether the privilege / virtual mode was changed by the execution of the current instruction. --- riscv/execute.cc | 2 ++ riscv/processor.cc | 4 ++++ riscv/processor.h | 2 ++ 3 files changed, 8 insertions(+) diff --git a/riscv/execute.cc b/riscv/execute.cc index 591090bd96..4f5860ba19 100644 --- a/riscv/execute.cc +++ b/riscv/execute.cc @@ -226,6 +226,8 @@ void processor_t::step(size_t n) size_t instret = 0; reg_t pc = state.pc; mmu_t* _mmu = mmu; + state.prv_changed = false; + state.v_changed = false; #define advance_pc() \ if (unlikely(invalid_pc(pc))) { \ diff --git a/riscv/processor.cc b/riscv/processor.cc index ff64f5a24a..3126433a34 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -203,6 +203,8 @@ void state_t::reset(processor_t* const proc, reg_t max_isa) prv = prev_prv = PRV_M; v = prev_v = false; + prv_changed = false; + v_changed = false; csrmap[CSR_MISA] = misa = std::make_shared(proc, CSR_MISA, max_isa); mstatus = std::make_shared(proc, CSR_MSTATUS); @@ -766,6 +768,8 @@ void processor_t::set_privilege(reg_t prv, bool virt) state.prev_v = state.v; state.prv = legalize_privilege(prv); state.v = virt && state.prv != PRV_M; + state.prv_changed = state.prv != state.prev_prv; + state.v_changed = state.v != state.prev_v; } const char* processor_t::get_privilege_string() diff --git a/riscv/processor.h b/riscv/processor.h index 1b00808977..a2e428651e 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -84,6 +84,8 @@ struct state_t std::unordered_map csrmap; reg_t prv; // TODO: Can this be an enum instead? reg_t prev_prv; + bool prv_changed; + bool v_changed; bool v; bool prev_v; misa_csr_t_p misa; From c927773dd1584d870dd60a1cf86c0a8f0d138dd4 Mon Sep 17 00:00:00 2001 From: Atul Khare Date: Wed, 14 Jun 2023 16:49:40 -0700 Subject: [PATCH 092/127] Add Smcntrpmf functionality If Smcntrpmf is enabled, mcycle / minstret increment only if counting for the privilege level isn't inhibited in mcyclecfg / minstretcfg. --- disasm/isa_parser.cc | 2 ++ riscv/csrs.cc | 47 ++++++++++++++++++++++++++++++++++++++++---- riscv/csrs.h | 22 ++++++++++++++++++++- riscv/isa_parser.h | 1 + riscv/processor.cc | 23 ++++++++++++++++++++-- 5 files changed, 88 insertions(+), 7 deletions(-) diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index f4d9da44eb..d5dc439a77 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -290,6 +290,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_SMCSRIND] = true; } else if (ext_str == "sscsrind") { extension_table[EXT_SSCSRIND] = true; + } else if (ext_str == "smcntrpmf") { + extension_table[EXT_SMCNTRPMF] = true; } else if (ext_str[0] == 'x') { extension_table['X'] = true; if (ext_str.size() == 1) { diff --git a/riscv/csrs.cc b/riscv/csrs.cc index 9849ea2fbc..e3b5ad4902 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -999,9 +999,10 @@ bool virtualized_satp_csr_t::unlogged_write(const reg_t val) noexcept { } // implement class wide_counter_csr_t -wide_counter_csr_t::wide_counter_csr_t(processor_t* const proc, const reg_t addr): +wide_counter_csr_t::wide_counter_csr_t(processor_t* const proc, const reg_t addr, smcntrpmf_csr_t_p config_csr): csr_t(proc, addr), - val(0) { + val(0), + config_csr(config_csr) { } reg_t wide_counter_csr_t::read() const noexcept { @@ -1009,7 +1010,11 @@ reg_t wide_counter_csr_t::read() const noexcept { } void wide_counter_csr_t::bump(const reg_t howmuch) noexcept { - val += howmuch; // to keep log reasonable size, don't log every bump + if (is_counting_enabled()) { + val += howmuch; // to keep log reasonable size, don't log every bump + } + // Clear cached value + config_csr->reset_prev(); } bool wide_counter_csr_t::unlogged_write(const reg_t val) noexcept { @@ -1018,7 +1023,10 @@ bool wide_counter_csr_t::unlogged_write(const reg_t val) noexcept { // takes precedence over the increment to instret. However, Spike // unconditionally increments instret after executing an instruction. // Correct for this artifact by decrementing instret here. - this->val--; + // Ensure that Smctrpmf hasn't disabled counting. + if (is_counting_enabled()) { + this->val--; + } return true; } @@ -1027,6 +1035,20 @@ reg_t wide_counter_csr_t::written_value() const noexcept { return this->val + 1; } +// Returns true if counting is not inhibited by Smcntrpmf. +// Note that minstretcfg / mcyclecfg / mhpmevent* share the same inhibit bits. +bool wide_counter_csr_t::is_counting_enabled() const noexcept { + auto prv = state->prv_changed ? state->prev_prv : state->prv; + auto v = state->v_changed ? state->v : state->prev_v; + auto mask = MHPMEVENT_MINH; + if (prv == PRV_S) { + mask = v ? MHPMEVENT_VSINH : MHPMEVENT_SINH; + } else if (prv == PRV_U) { + mask = v ? MHPMEVENT_VUINH : MHPMEVENT_UINH; + } + return (config_csr->read_prev() & mask) == 0; +} + // implement class time_counter_csr_t time_counter_csr_t::time_counter_csr_t(processor_t* const proc, const reg_t addr): csr_t(proc, addr), @@ -1649,3 +1671,20 @@ csr_t_p sscsrind_reg_csr_t::get_reg() const noexcept { void sscsrind_reg_csr_t::add_ireg_proxy(const reg_t iselect_value, csr_t_p csr) { ireg_proxy[iselect_value] = csr; } + +smcntrpmf_csr_t::smcntrpmf_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init) : masked_csr_t(proc, addr, mask, init) { +} + +reg_t smcntrpmf_csr_t::read_prev() const noexcept { + reg_t val = prev_val.value_or(read()); + return val; +} + +void smcntrpmf_csr_t::reset_prev() noexcept { + prev_val.reset(); +} + +bool smcntrpmf_csr_t::unlogged_write(const reg_t val) noexcept { + prev_val = read(); + return masked_csr_t::unlogged_write(val); +} diff --git a/riscv/csrs.h b/riscv/csrs.h index cf48673ced..5ca7e15738 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -15,6 +15,8 @@ // For access_type: #include "memtracer.h" #include +// For std::optional +#include class processor_t; struct state_t; @@ -516,12 +518,16 @@ class virtualized_satp_csr_t: public virtualized_csr_t { satp_csr_t_p orig_satp; }; +// Forward declaration +class smcntrpmf_csr_t; +typedef std::shared_ptr smcntrpmf_csr_t_p; + // For minstret and mcycle, which are always 64 bits, but in RV32 are // split into high and low halves. The first class always holds the // full 64-bit value. class wide_counter_csr_t: public csr_t { public: - wide_counter_csr_t(processor_t* const proc, const reg_t addr); + wide_counter_csr_t(processor_t* const proc, const reg_t addr, smcntrpmf_csr_t_p config_csr); // Always returns full 64-bit value virtual reg_t read() const noexcept override; void bump(const reg_t howmuch) noexcept; @@ -529,7 +535,9 @@ class wide_counter_csr_t: public csr_t { virtual bool unlogged_write(const reg_t val) noexcept override; virtual reg_t written_value() const noexcept override; private: + bool is_counting_enabled() const noexcept; reg_t val; + smcntrpmf_csr_t_p config_csr; }; typedef std::shared_ptr wide_counter_csr_t_p; @@ -822,4 +830,16 @@ class sscsrind_reg_csr_t : public csr_t { csr_t_p get_reg() const noexcept; }; +// smcntrpmf_csr_t caches the previous state of the CSR in case a CSRW instruction +// modifies the state that should not be immediately visible to bump() +class smcntrpmf_csr_t : public masked_csr_t { + public: + smcntrpmf_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init); + reg_t read_prev() const noexcept; + void reset_prev() noexcept; + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; + private: + std::optional prev_val; +}; #endif diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index bba5a91c56..f955e1613c 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -79,6 +79,7 @@ typedef enum { EXT_INTERNAL_ZFH_MOVE, EXT_SMCSRIND, EXT_SSCSRIND, + EXT_SMCNTRPMF, NUM_ISA_EXTENSIONS } isa_extension_t; diff --git a/riscv/processor.cc b/riscv/processor.cc index 3126433a34..0704d8ccf8 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -219,8 +219,15 @@ void state_t::reset(processor_t* const proc, reg_t max_isa) csrmap[CSR_MSCRATCH] = std::make_shared(proc, CSR_MSCRATCH, 0); csrmap[CSR_MTVEC] = mtvec = std::make_shared(proc, CSR_MTVEC); csrmap[CSR_MCAUSE] = mcause = std::make_shared(proc, CSR_MCAUSE); - minstret = std::make_shared(proc, CSR_MINSTRET); - mcycle = std::make_shared(proc, CSR_MCYCLE); + + auto smcntrpmf_enabled = proc->extension_enabled_const(EXT_SMCNTRPMF); + const reg_t mask = smcntrpmf_enabled ? MHPMEVENT_MINH | MHPMEVENT_SINH | + MHPMEVENT_UINH | MHPMEVENT_VSINH | MHPMEVENT_VUINH : 0; + auto minstretcfg = std::make_shared(proc, CSR_MINSTRETCFG, mask, 0); + auto mcyclecfg = std::make_shared(proc, CSR_MCYCLECFG, mask, 0); + + minstret = std::make_shared(proc, CSR_MINSTRET, minstretcfg); + mcycle = std::make_shared(proc, CSR_MCYCLE, mcyclecfg); time = std::make_shared(proc, CSR_TIME); if (proc->extension_enabled_const(EXT_ZICNTR)) { csrmap[CSR_INSTRET] = std::make_shared(proc, CSR_INSTRET, minstret); @@ -566,6 +573,18 @@ void state_t::reset(processor_t* const proc, reg_t max_isa) } } + if (smcntrpmf_enabled) { + if (xlen == 32) { + csrmap[CSR_MCYCLECFG] = std::make_shared(proc, CSR_MCYCLECFG, mcyclecfg); + csrmap[CSR_MCYCLECFGH] = std::make_shared(proc, CSR_MCYCLECFGH, mcyclecfg); + csrmap[CSR_MINSTRETCFG] = std::make_shared(proc, CSR_MINSTRETCFG, minstretcfg); + csrmap[CSR_MINSTRETCFGH] = std::make_shared(proc, CSR_MINSTRETCFGH, minstretcfg); + } else { + csrmap[CSR_MCYCLECFG] = mcyclecfg; + csrmap[CSR_MINSTRETCFG] = minstretcfg; + } + } + serialized = false; log_reg_write.clear(); From de8e0588acd6201fc79f2513664935443666281b Mon Sep 17 00:00:00 2001 From: Dan Smathers Date: Thu, 3 Aug 2023 15:43:50 -0600 Subject: [PATCH 093/127] update set_msw/clear_msw/set_mtimer/clear_mtimer Added ifndef to clint addresses instead of hard-coding Added clear_msw and clear mtimer Tested against Sail/isa-sim with new proposed Smclint/Ssclint arch-tests https://github.com/riscv-non-isa/riscv-arch-test/pull/372 Building a baseline of interrupt tests that changes to SAIL/isa-sim can be tested against when other interrupt extensions are added. Signed-off-by: Dan Smathers --- arch_test_target/spike/model_test.h | 50 ++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/arch_test_target/spike/model_test.h b/arch_test_target/spike/model_test.h index e968e43aa8..bfdb4422dd 100644 --- a/arch_test_target/spike/model_test.h +++ b/arch_test_target/spike/model_test.h @@ -49,16 +49,50 @@ //RVTEST_IO_ASSERT_DFPR_EQ #define RVMODEL_IO_ASSERT_DFPR_EQ(_D, _R, _I) -#define RVMODEL_SET_MSW_INT \ - li t1, 1; \ - li t2, 0x2000000; \ - sw t1, 0(t2); +#ifndef RVMODEL_MCLINTBASE + #define RVMODEL_MCLINTBASE 0x02000000 +#endif + +#ifndef RVMODEL_MSIP_OFFSET + #define RVMODEL_MSIP_OFFSET 0x0 +#endif + +#ifndef RVMODEL_MTIMECMP_OFFSET + #define RVMODEL_MTIMECMP_OFFSET 0x4000 +#endif + +#ifndef RVMODEL_MTIMECMPH_OFFSET + #define RVMODEL_MTIMECMPH_OFFSET 0x4004 +#endif + +#define RVMODEL_SET_MSW_INT \ + lui t0, ((RVMODEL_MCLINTBASE + RVMODEL_MSIP_OFFSET)>> 12); \ + addi t0, t0, ((RVMODEL_MCLINTBASE + RVMODEL_MSIP_OFFSET) & 0xFFF); \ + li t1, 1; \ + sw t1, (t0); \ + +#define RVMODEL_CLEAR_MSW_INT \ + lui t0, ((RVMODEL_MCLINTBASE + RVMODEL_MSIP_OFFSET)>> 12); \ + addi t0, t0, ((RVMODEL_MCLINTBASE + RVMODEL_MSIP_OFFSET) & 0xFFF); \ + sw x0, (t0); \ -#define RVMODEL_CLEAR_MSW_INT \ - li t2, 0x2000000; \ - sw x0, 0(t2); +#define RVMODEL_SET_MTIMER_INT \ + lui t0, ((RVMODEL_MCLINTBASE + RVMODEL_MTIMECMP_OFFSET)>> 12); \ + addi t0, t0, ((RVMODEL_MCLINTBASE + RVMODEL_MTIMECMP_OFFSET) & 0xFFF); \ + sw x0, (t0); \ + lui t0, ((RVMODEL_MCLINTBASE + RVMODEL_MTIMECMPH_OFFSET)>> 12); \ + addi t0, t0, ((RVMODEL_MCLINTBASE + RVMODEL_MTIMECMPH_OFFSET) & 0xFFF); \ + sw x0, (t0); \ -#define RVMODEL_CLEAR_MTIMER_INT +#define RVMODEL_CLEAR_MTIMER_INT \ + addi t1,x0,1; \ + neg t1,t1; \ + lui t0, ((RVMODEL_MCLINTBASE + RVMODEL_MTIMECMPH_OFFSET)>> 12); \ + addi t0, t0, ((RVMODEL_MCLINTBASE + RVMODEL_MTIMECMPH_OFFSET) & 0xFFF); \ + sw t1, (t0); \ + lui t0, ((RVMODEL_MCLINTBASE + RVMODEL_MTIMECMP_OFFSET)>> 12); \ + addi t0, t0, ((RVMODEL_MCLINTBASE + RVMODEL_MTIMECMP_OFFSET) & 0xFFF); \ + sw t1, (t0); \ #define RVMODEL_CLEAR_MEXT_INT From 07c2e2bfcbeae79f5bda72146d8d5652a40f8861 Mon Sep 17 00:00:00 2001 From: Ved Shanbhogue Date: Mon, 14 Aug 2023 12:44:57 -0500 Subject: [PATCH 094/127] rename *envcfg.HADE to *envcfg.ADUE --- riscv/csrs.h | 2 +- riscv/encoding.h | 18 +++++++++--------- riscv/mmu.cc | 4 ++-- riscv/processor.cc | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/riscv/csrs.h b/riscv/csrs.h index 5ca7e15738..efa7f10a63 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -479,7 +479,7 @@ class henvcfg_csr_t final: public envcfg_csr_t { henvcfg_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init, csr_t_p menvcfg); reg_t read() const noexcept override { - return (menvcfg->read() | ~(MENVCFG_PBMTE | MENVCFG_STCE | MENVCFG_HADE)) & masked_csr_t::read(); + return (menvcfg->read() | ~(MENVCFG_PBMTE | MENVCFG_STCE | MENVCFG_ADUE)) & masked_csr_t::read(); } virtual void verify_permissions(insn_t insn, bool write) const override; diff --git a/riscv/encoding.h b/riscv/encoding.h index f1defd41b2..a7e2d94512 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (6790b30) + * https://github.com/riscv/riscv-opcodes (d752f19) */ #ifndef RISCV_CSR_ENCODING_H @@ -160,11 +160,11 @@ #define MENVCFG_CBIE 0x00000030 #define MENVCFG_CBCFE 0x00000040 #define MENVCFG_CBZE 0x00000080 -#define MENVCFG_HADE 0x2000000000000000 +#define MENVCFG_ADUE 0x2000000000000000 #define MENVCFG_PBMTE 0x4000000000000000 #define MENVCFG_STCE 0x8000000000000000 -#define MENVCFGH_HADE 0x20000000 +#define MENVCFGH_ADUE 0x20000000 #define MENVCFGH_PBMTE 0x40000000 #define MENVCFGH_STCE 0x80000000 @@ -198,11 +198,11 @@ #define HENVCFG_CBIE 0x00000030 #define HENVCFG_CBCFE 0x00000040 #define HENVCFG_CBZE 0x00000080 -#define HENVCFG_HADE 0x2000000000000000 +#define HENVCFG_ADUE 0x2000000000000000 #define HENVCFG_PBMTE 0x4000000000000000 #define HENVCFG_STCE 0x8000000000000000 -#define HENVCFGH_HADE 0x20000000 +#define HENVCFGH_ADUE 0x20000000 #define HENVCFGH_PBMTE 0x40000000 #define HENVCFGH_STCE 0x80000000 @@ -1004,10 +1004,10 @@ #define MASK_FMVH_X_D 0xfff0707f #define MATCH_FMVH_X_Q 0xe6100053 #define MASK_FMVH_X_Q 0xfff0707f -#define MATCH_FMVP_D_X 0xb2100053 -#define MASK_FMVP_D_X 0xfff0707f -#define MATCH_FMVP_Q_X 0xb6100053 -#define MASK_FMVP_Q_X 0xfff0707f +#define MATCH_FMVP_D_X 0xb2000053 +#define MASK_FMVP_D_X 0xfe00707f +#define MATCH_FMVP_Q_X 0xb6000053 +#define MASK_FMVP_Q_X 0xfe00707f #define MATCH_FNMADD_D 0x200004f #define MASK_FNMADD_D 0x600007f #define MATCH_FNMADD_H 0x400004f diff --git a/riscv/mmu.cc b/riscv/mmu.cc index f6d23a3bf6..285ef6d364 100644 --- a/riscv/mmu.cc +++ b/riscv/mmu.cc @@ -413,7 +413,7 @@ reg_t mmu_t::s2xlate(reg_t gva, reg_t gpa, access_type type, access_type trap_ty reg_t pte = pte_load(pte_paddr, gva, virt, trap_type, vm.ptesize); reg_t ppn = (pte & ~reg_t(PTE_ATTR)) >> PTE_PPN_SHIFT; bool pbmte = proc->get_state()->menvcfg->read() & MENVCFG_PBMTE; - bool hade = proc->get_state()->menvcfg->read() & MENVCFG_HADE; + bool hade = proc->get_state()->menvcfg->read() & MENVCFG_ADUE; if (pte & PTE_RSVD) { break; @@ -507,7 +507,7 @@ reg_t mmu_t::walk(mem_access_info_t access_info) reg_t pte = pte_load(pte_paddr, addr, virt, type, vm.ptesize); reg_t ppn = (pte & ~reg_t(PTE_ATTR)) >> PTE_PPN_SHIFT; bool pbmte = virt ? (proc->get_state()->henvcfg->read() & HENVCFG_PBMTE) : (proc->get_state()->menvcfg->read() & MENVCFG_PBMTE); - bool hade = virt ? (proc->get_state()->henvcfg->read() & HENVCFG_HADE) : (proc->get_state()->menvcfg->read() & MENVCFG_HADE); + bool hade = virt ? (proc->get_state()->henvcfg->read() & HENVCFG_ADUE) : (proc->get_state()->menvcfg->read() & MENVCFG_ADUE); if (pte & PTE_RSVD) { break; diff --git a/riscv/processor.cc b/riscv/processor.cc index 0704d8ccf8..22e654250d 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -450,7 +450,7 @@ void state_t::reset(processor_t* const proc, reg_t max_isa) if (proc->extension_enabled_const('U')) { const reg_t menvcfg_mask = (proc->extension_enabled(EXT_ZICBOM) ? MENVCFG_CBCFE | MENVCFG_CBIE : 0) | (proc->extension_enabled(EXT_ZICBOZ) ? MENVCFG_CBZE : 0) | - (proc->extension_enabled(EXT_SVADU) ? MENVCFG_HADE: 0) | + (proc->extension_enabled(EXT_SVADU) ? MENVCFG_ADUE: 0) | (proc->extension_enabled(EXT_SVPBMT) ? MENVCFG_PBMTE : 0) | (proc->extension_enabled(EXT_SSTC) ? MENVCFG_STCE : 0); const reg_t menvcfg_init = (proc->extension_enabled(EXT_SVPBMT) ? MENVCFG_PBMTE : 0); @@ -466,7 +466,7 @@ void state_t::reset(processor_t* const proc, reg_t max_isa) csrmap[CSR_SENVCFG] = senvcfg = std::make_shared(proc, CSR_SENVCFG, senvcfg_mask, 0); const reg_t henvcfg_mask = (proc->extension_enabled(EXT_ZICBOM) ? HENVCFG_CBCFE | HENVCFG_CBIE : 0) | (proc->extension_enabled(EXT_ZICBOZ) ? HENVCFG_CBZE : 0) | - (proc->extension_enabled(EXT_SVADU) ? HENVCFG_HADE: 0) | + (proc->extension_enabled(EXT_SVADU) ? HENVCFG_ADUE: 0) | (proc->extension_enabled(EXT_SVPBMT) ? HENVCFG_PBMTE : 0) | (proc->extension_enabled(EXT_SSTC) ? HENVCFG_STCE : 0); const reg_t henvcfg_init = (proc->extension_enabled(EXT_SVPBMT) ? HENVCFG_PBMTE : 0); From 05c10a06a335b795738f11f98230d1fefec50d21 Mon Sep 17 00:00:00 2001 From: LIU Yu Date: Wed, 16 Aug 2023 16:50:48 +0800 Subject: [PATCH 095/127] Install header files fdt.h and libfdt_env.h as needed by libfdt.h --- fdt/fdt.mk.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fdt/fdt.mk.in b/fdt/fdt.mk.in index 32c6d49295..51e7ae4ec8 100644 --- a/fdt/fdt.mk.in +++ b/fdt/fdt.mk.in @@ -4,6 +4,8 @@ fdt_install_shared_lib = yes fdt_install_hdrs = \ libfdt.h \ + fdt.h \ + libfdt_env.h fdt_c_srcs = \ fdt.c \ From c6e2b703c560878bdb49377064d255b5a5fb50af Mon Sep 17 00:00:00 2001 From: Mark Lai Date: Wed, 16 Aug 2023 17:50:21 +0800 Subject: [PATCH 096/127] Include cerrno in fesvr/elfloader.cc It caused compile error "use of undeclared identifier 'errno'" at line 26 and 33. I Add #include in fesvr/elfloader.cc to fix error and compile successfully. --- fesvr/elfloader.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/fesvr/elfloader.cc b/fesvr/elfloader.cc index c70de12dbb..391afa04bb 100644 --- a/fesvr/elfloader.cc +++ b/fesvr/elfloader.cc @@ -16,6 +16,7 @@ #include #include #include +#include std::map load_elf(const char* fn, memif_t* memif, reg_t* entry, unsigned required_xlen = 0) { From eff6c60498c2fc3e628151e6c0e1d667280a2cfd Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Thu, 24 Aug 2023 21:33:27 -0700 Subject: [PATCH 097/127] bf16: handle invalid Nan-boxed accessing assume 0x0000_0000_0000_7d2d at 0x8000_0000 a0 = 0x8000_0000 fld ft0, 0(a0) <- load 0x0000_0000_0000_7d2d to ft0, it is invalid Nanboxed fcvt.s.bf16 ft1, ft0 <- read bf16 from ft0. it should be 0x7fc0 (bf16 QNaN) but not 0x7e00 (f16 QNaN) Signed-off-by: Chih-Min Chao --- riscv/decode_macros.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h index f39149b172..e31da5c109 100644 --- a/riscv/decode_macros.h +++ b/riscv/decode_macros.h @@ -63,13 +63,14 @@ // FPU macros #define READ_ZDINX_REG(reg) (xlen == 32 ? f64(READ_REG_PAIR(reg)) : f64(STATE.XPR[reg] & (uint64_t)-1)) #define READ_FREG_H(reg) (p->extension_enabled(EXT_ZFINX) ? f16(STATE.XPR[reg] & (uint16_t)-1) : f16(READ_FREG(reg))) +#define READ_FREG_BF(reg) (p->extension_enabled(EXT_ZFINX) ? bf16(STATE.XPR[reg] & (uint16_t)-1) : bf16(READ_FREG(reg))) #define READ_FREG_F(reg) (p->extension_enabled(EXT_ZFINX) ? f32(STATE.XPR[reg] & (uint32_t)-1) : f32(READ_FREG(reg))) #define READ_FREG_D(reg) (p->extension_enabled(EXT_ZFINX) ? READ_ZDINX_REG(reg) : f64(READ_FREG(reg))) #define FRS1 READ_FREG(insn.rs1()) #define FRS2 READ_FREG(insn.rs2()) #define FRS3 READ_FREG(insn.rs3()) #define FRS1_H READ_FREG_H(insn.rs1()) -#define FRS1_BF FRS1_H +#define FRS1_BF READ_FREG_BF(insn.rs1()) #define FRS1_F READ_FREG_F(insn.rs1()) #define FRS1_D READ_FREG_D(insn.rs1()) #define FRS2_H READ_FREG_H(insn.rs2()) @@ -224,14 +225,18 @@ class wait_for_interrupt_t {}; /* Convenience wrappers to simplify softfloat code sequences */ #define isBoxedF16(r) (isBoxedF32(r) && ((uint64_t)((r.v[0] >> 16) + 1) == ((uint64_t)1 << 48))) #define unboxF16(r) (isBoxedF16(r) ? (uint16_t)r.v[0] : defaultNaNF16UI) +#define isBoxedBF16(r) isBoxedF16(r) +#define unboxBF16(r) (isBoxedBF16(r) ? (uint16_t)r.v[0] : defaultNaNBF16UI) #define isBoxedF32(r) (isBoxedF64(r) && ((uint32_t)((r.v[0] >> 32) + 1) == 0)) #define unboxF32(r) (isBoxedF32(r) ? (uint32_t)r.v[0] : defaultNaNF32UI) #define isBoxedF64(r) ((r.v[1] + 1) == 0) #define unboxF64(r) (isBoxedF64(r) ? r.v[0] : defaultNaNF64UI) inline float16_t f16(uint16_t v) { return { v }; } +inline bfloat16_t bf16(uint16_t v) { return { v }; } inline float32_t f32(uint32_t v) { return { v }; } inline float64_t f64(uint64_t v) { return { v }; } inline float16_t f16(freg_t r) { return f16(unboxF16(r)); } +inline bfloat16_t bf16(freg_t r) { return bf16(unboxBF16(r)); } inline float32_t f32(freg_t r) { return f32(unboxF32(r)); } inline float64_t f64(freg_t r) { return f64(unboxF64(r)); } inline float128_t f128(freg_t r) { return r; } From 3c8320ecd76b1d98b5d0955dd521bf5ee1616136 Mon Sep 17 00:00:00 2001 From: viktoryou <143797577+viktoryou@users.noreply.github.com> Date: Fri, 1 Sep 2023 19:36:35 +0800 Subject: [PATCH 098/127] fix condition of executing cbo.inval as a flush operation Signed-off-by: viktoryou <143797577+viktoryou@users.noreply.github.com> --- riscv/insns/cbo_inval.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/riscv/insns/cbo_inval.h b/riscv/insns/cbo_inval.h index bd80a6fd2b..db13da9ce1 100644 --- a/riscv/insns/cbo_inval.h +++ b/riscv/insns/cbo_inval.h @@ -1,9 +1,9 @@ require_extension(EXT_ZICBOM); DECLARE_XENVCFG_VARS(CBIE); require_envcfg(CBIE); -if (((STATE.prv != PRV_M) && (mCBIE == 1)) || - ((!STATE.v && (STATE.prv == PRV_U)) && (sCBIE = 1)) || - (STATE.v && ((hCBIE == 1) || ((STATE.prv == PRV_U) && (sCBIE== 0))))) +if ((STATE.prv != PRV_M && mCBIE) || + (!STATE.v && STATE.prv == PRV_U && sCBIE) || + (STATE.v && (hCBIE || (STATE.prv == PRV_U && sCBIE)))) MMU.clean_inval(RS1, true, true); else MMU.clean_inval(RS1, false, true); From e3e610050d8844c22ef177804acd151a4340ee69 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 1 Sep 2023 13:46:35 -0700 Subject: [PATCH 099/127] Attempt to fix Mac OS CI --- .github/workflows/continuous-integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index d8c9a0255d..48ef2c116a 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -46,7 +46,7 @@ jobs: fetch-depth: 0 - name: Install Dependencies - run: xargs brew install < .github/workflows/brew-packages.txt + run: xargs brew install --overwrite < .github/workflows/brew-packages.txt - run: | for commit in $(git rev-list origin/master..HEAD); do From 7b3b2e94adefa63a31c73267af21819ad833aba0 Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Tue, 1 Aug 2023 14:44:07 +0800 Subject: [PATCH 100/127] triggers: refactor: icount: breakdown detect_icount_match() into detect_icount_fire() and detect_icount_decrement() --- riscv/triggers.cc | 18 +++++++++++++----- riscv/triggers.h | 6 ++++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/riscv/triggers.cc b/riscv/triggers.cc index b2b815dad7..24a0f4e496 100644 --- a/riscv/triggers.cc +++ b/riscv/triggers.cc @@ -305,7 +305,7 @@ void mcontrol6_t::tdata1_write(processor_t * const proc, const reg_t val, const load = get_field(val, CSR_MCONTROL6_LOAD); } -std::optional icount_t::detect_icount_match(processor_t * const proc) noexcept +std::optional icount_t::detect_icount_fire(processor_t * const proc) noexcept { if (!common_match(proc) || !allow_action(proc->get_state())) return std::nullopt; @@ -317,13 +317,19 @@ std::optional icount_t::detect_icount_match(processor_t * const ret = match_result_t(TIMING_BEFORE, action); } - if (count >= 1 && (ret == std::nullopt || action != MCONTROL_ACTION_DEBUG_MODE)) { + return ret; +} + +void icount_t::detect_icount_decrement(processor_t * const proc) noexcept +{ + if (!common_match(proc) || !allow_action(proc->get_state())) + return; + + if (count >= 1) { if (count == 1) pending = 1; count = count - 1; } - - return ret; } reg_t icount_t::tdata1_read(const processor_t * const proc) const noexcept @@ -588,7 +594,9 @@ std::optional module_t::detect_icount_match() noexcept std::optional ret = std::nullopt; for (auto trigger: triggers) { - auto result = trigger->detect_icount_match(proc); + auto result = trigger->detect_icount_fire(proc); + if (result == std::nullopt || result->action != MCONTROL_ACTION_DEBUG_MODE) + trigger->detect_icount_decrement(proc); if (result.has_value() && (!ret.has_value() || ret->action < result->action)) ret = result; } diff --git a/riscv/triggers.h b/riscv/triggers.h index 0bf6097a99..6f00122c6a 100644 --- a/riscv/triggers.h +++ b/riscv/triggers.h @@ -85,7 +85,8 @@ class trigger_t { virtual std::optional detect_memory_access_match(processor_t UNUSED * const proc, operation_t UNUSED operation, reg_t UNUSED address, std::optional UNUSED data) noexcept { return std::nullopt; } - virtual std::optional detect_icount_match(processor_t UNUSED * const proc) { return std::nullopt; } + virtual std::optional detect_icount_fire(processor_t UNUSED * const proc) { return std::nullopt; } + virtual void detect_icount_decrement(processor_t UNUSED * const proc) {} virtual std::optional detect_trap_match(processor_t UNUSED * const proc, const trap_t UNUSED & t) noexcept { return std::nullopt; } protected: @@ -248,7 +249,8 @@ class icount_t : public trigger_t { virtual bool icount_check_needed() const override { return count > 0 || pending; } virtual void stash_read_values() override; - virtual std::optional detect_icount_match(processor_t * const proc) noexcept override; + virtual std::optional detect_icount_fire(processor_t * const proc) noexcept override; + virtual void detect_icount_decrement(processor_t * const proc) noexcept override; private: bool dmode = false; From 9bc80f3d095c8220200dec74b0c03f5d806976af Mon Sep 17 00:00:00 2001 From: YenHaoChen Date: Tue, 1 Aug 2023 16:54:05 +0800 Subject: [PATCH 101/127] triggers: fix: not decrease icount.count on firing other icount with action=debug --- riscv/triggers.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/riscv/triggers.cc b/riscv/triggers.cc index 24a0f4e496..5a2d18b20c 100644 --- a/riscv/triggers.cc +++ b/riscv/triggers.cc @@ -595,11 +595,12 @@ std::optional module_t::detect_icount_match() noexcept std::optional ret = std::nullopt; for (auto trigger: triggers) { auto result = trigger->detect_icount_fire(proc); - if (result == std::nullopt || result->action != MCONTROL_ACTION_DEBUG_MODE) - trigger->detect_icount_decrement(proc); if (result.has_value() && (!ret.has_value() || ret->action < result->action)) ret = result; } + if (ret == std::nullopt || ret->action != MCONTROL_ACTION_DEBUG_MODE) + for (auto trigger: triggers) + trigger->detect_icount_decrement(proc); return ret; } From cf3f787474a590845300f0ac80e6feaf3f58dbdc Mon Sep 17 00:00:00 2001 From: Brendan Sweeney Date: Tue, 26 Sep 2023 13:35:42 -0500 Subject: [PATCH 102/127] Change disasm for vset{i}vli with reserved vtypes to display the reserved bits Currently there is a bug with the disassembly when vsetivli/vsetvli have invalid vtypes (with reserved bits set). Spike correctly detects this and sets vill, but the disassembler integrated into spike ignores those bits being set and prints the instruction as if they weren't. This makes debugging harder, it looks like an otherwise valid vtype was being rejected and can lead down debugging paths like thinking the vector unit is configured incorrectly. This commit changes the behaviour so that if these reserved bits are set, it prints out the hex value of the vtype. This is understood by the assembler. GCC disassembler prints out the decimal value of the vtype in this case, I think that hex value is clearer but I can change it if desired. Signed-off-by: Brendan Sweeney --- disasm/disasm.cc | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index b2bed47adb..15e767ee64 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -413,27 +413,35 @@ struct : public arg_t { int lmul = insn.v_lmul(); auto vta = insn.v_vta() == 1 ? "ta" : "tu"; auto vma = insn.v_vma() == 1 ? "ma" : "mu"; - s << "e" << sew; - if(insn.v_frac_lmul()) { - std::string lmul_str = ""; - switch(lmul){ - case 3: - lmul_str = "f2"; - break; - case 2: - lmul_str = "f4"; - break; - case 1: - lmul_str = "f8"; - break; - default: - assert(true && "unsupport fractional LMUL"); - } - s << ", m" << lmul_str; + int newType = (insn.bits() & 0x80000000) ? insn.v_zimm10() : insn.v_zimm11(); + // if bit 31 is set, this is vsetivli and there is a 10-bit vtype, else this is vsetvli and there is an 11-bit vtype + // If the provided vtype has reserved bits, display the hex version of the vtype instead + if ((newType >> 8) != 0) { + s << "0x" << std::hex << newType; } else { - s << ", m" << (1 << lmul); + s << "e" << sew; + if(insn.v_frac_lmul()) { + std::string lmul_str = ""; + switch(lmul){ + case 3: + lmul_str = "f2"; + break; + case 2: + lmul_str = "f4"; + break; + case 1: + lmul_str = "f8"; + break; + default: + assert(true && "unsupport fractional LMUL"); + } + s << ", m" << lmul_str; + } else { + s << ", m" << (1 << lmul); + } + s << ", " << vta << ", " << vma; } - s << ", " << vta << ", " << vma; + return s.str(); } } v_vtype; From 7613da4d26dbaaf9063540187a4a880cb3c0b3e9 Mon Sep 17 00:00:00 2001 From: Tim Newsome Date: Thu, 28 Sep 2023 09:07:56 -0700 Subject: [PATCH 103/127] debug: Halted harts can also be unavailable. --- riscv/debug_module.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/riscv/debug_module.cc b/riscv/debug_module.cc index 4a7a802ef5..026f4b3d9b 100644 --- a/riscv/debug_module.cc +++ b/riscv/debug_module.cc @@ -413,14 +413,14 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) dmstatus.allresumeack = false; } auto hart = sim->get_harts().at(hart_id); - if (hart_state[hart_id].halted) { - dmstatus.allrunning = false; - dmstatus.anyhalted = true; - dmstatus.allunavail = false; - } else if (!hart_available(hart_id)) { + if (!hart_available(hart_id)) { dmstatus.allrunning = false; dmstatus.allhalted = false; dmstatus.anyunavail = true; + } else if (hart_state[hart_id].halted) { + dmstatus.allrunning = false; + dmstatus.anyhalted = true; + dmstatus.allunavail = false; } else { dmstatus.allhalted = false; dmstatus.anyrunning = true; From 77e9aaef19f528d8ced2301fe39eb941a9fdc3e2 Mon Sep 17 00:00:00 2001 From: Tim Newsome Date: Thu, 28 Sep 2023 09:08:25 -0700 Subject: [PATCH 104/127] debug: Abstract commands fail on unavailable harts. --- riscv/debug_module.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/riscv/debug_module.cc b/riscv/debug_module.cc index 026f4b3d9b..e9aef1a822 100644 --- a/riscv/debug_module.cc +++ b/riscv/debug_module.cc @@ -579,6 +579,10 @@ bool debug_module_t::perform_abstract_command() abstractcs.cmderr = CMDERR_BUSY; return true; } + if (!hart_available(dmcontrol.hartsel)) { + abstractcs.cmderr = CMDERR_HALTRESUME; + return true; + } if ((command >> 24) == 0) { // register access From c5eee7426d7c1eb987544ad03671173d228ca69a Mon Sep 17 00:00:00 2001 From: Abraham Gonzalez Date: Mon, 9 Oct 2023 21:42:11 -0700 Subject: [PATCH 105/127] Update dtm.h with switch_to_* functions Signed-off-by: Abraham Gonzalez --- fesvr/dtm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fesvr/dtm.h b/fesvr/dtm.h index 1f5ee3e890..f47b648c9a 100644 --- a/fesvr/dtm.h +++ b/fesvr/dtm.h @@ -55,6 +55,12 @@ class dtm_t : public htif_t void producer_thread(); + void switch_to_host() { + // update the target with the current context + target = context_t::current(); + host.switch_to(); + } + protected: virtual void read_chunk(addr_t taddr, size_t len, void* dst) override; virtual void write_chunk(addr_t taddr, size_t len, const void* src) override; @@ -74,6 +80,10 @@ class dtm_t : public htif_t void resume(int); uint32_t get_data_base() { return data_base; }; + void switch_to_target() { + target->switch_to(); + } + private: context_t host; context_t* target; From 2e2476304516945da407727d56944672d2b56698 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Mon, 16 Oct 2023 20:59:40 -0700 Subject: [PATCH 106/127] vamo: remove from building list Signed-off-by: Chih-Min Chao --- riscv/riscv.mk.in | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 3a0333c544..ecf4fa0166 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -795,44 +795,6 @@ riscv_insn_ext_v_alu_fp = \ vmfne_vf \ vmfne_vv \ -riscv_insn_ext_v_amo = \ - vamoswapei8_v \ - vamoaddei8_v \ - vamoandei8_v \ - vamomaxei8_v \ - vamomaxuei8_v \ - vamominei8_v \ - vamominuei8_v \ - vamoorei8_v \ - vamoxorei8_v \ - vamoswapei16_v \ - vamoaddei16_v \ - vamoandei16_v \ - vamomaxei16_v \ - vamomaxuei16_v \ - vamominei16_v \ - vamominuei16_v \ - vamoorei16_v \ - vamoxorei16_v \ - vamoswapei32_v \ - vamoaddei32_v \ - vamoandei32_v \ - vamomaxei32_v \ - vamomaxuei32_v \ - vamominei32_v \ - vamominuei32_v \ - vamoorei32_v \ - vamoxorei32_v \ - vamoswapei64_v \ - vamoaddei64_v \ - vamoandei64_v \ - vamomaxei64_v \ - vamomaxuei64_v \ - vamominei64_v \ - vamominuei64_v \ - vamoorei64_v \ - vamoxorei64_v \ - riscv_insn_ext_v_ldst = \ vlm_v \ vle8_v \ @@ -901,7 +863,6 @@ riscv_insn_ext_v_ctrl = \ riscv_insn_ext_v = \ $(riscv_insn_ext_v_alu_fp) \ $(riscv_insn_ext_v_alu_int) \ - $(riscv_insn_ext_v_amo) \ $(riscv_insn_ext_v_ctrl) \ $(riscv_insn_ext_v_ldst) \ From 9705dc3ba82c283f4c6de78126ac0e36c1c3eb61 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Mon, 16 Oct 2023 20:59:21 -0700 Subject: [PATCH 107/127] vamo: remove instruction implementation Signed-off-by: Chih-Min Chao --- riscv/insns/vamoaddei16_v.h | 2 -- riscv/insns/vamoaddei32_v.h | 2 -- riscv/insns/vamoaddei64_v.h | 2 -- riscv/insns/vamoaddei8_v.h | 2 -- riscv/insns/vamoandei16_v.h | 2 -- riscv/insns/vamoandei32_v.h | 2 -- riscv/insns/vamoandei64_v.h | 2 -- riscv/insns/vamoandei8_v.h | 2 -- riscv/insns/vamomaxei16_v.h | 2 -- riscv/insns/vamomaxei32_v.h | 2 -- riscv/insns/vamomaxei64_v.h | 2 -- riscv/insns/vamomaxei8_v.h | 2 -- riscv/insns/vamomaxuei16_v.h | 2 -- riscv/insns/vamomaxuei32_v.h | 2 -- riscv/insns/vamomaxuei64_v.h | 2 -- riscv/insns/vamomaxuei8_v.h | 2 -- riscv/insns/vamominei16_v.h | 2 -- riscv/insns/vamominei32_v.h | 2 -- riscv/insns/vamominei64_v.h | 2 -- riscv/insns/vamominei8_v.h | 2 -- riscv/insns/vamominuei16_v.h | 2 -- riscv/insns/vamominuei32_v.h | 2 -- riscv/insns/vamominuei64_v.h | 2 -- riscv/insns/vamominuei8_v.h | 2 -- riscv/insns/vamoorei16_v.h | 2 -- riscv/insns/vamoorei32_v.h | 2 -- riscv/insns/vamoorei64_v.h | 2 -- riscv/insns/vamoorei8_v.h | 2 -- riscv/insns/vamoswapei16_v.h | 2 -- riscv/insns/vamoswapei32_v.h | 2 -- riscv/insns/vamoswapei64_v.h | 2 -- riscv/insns/vamoswapei8_v.h | 2 -- riscv/insns/vamoxorei16_v.h | 2 -- riscv/insns/vamoxorei32_v.h | 2 -- riscv/insns/vamoxorei64_v.h | 2 -- riscv/insns/vamoxorei8_v.h | 2 -- 36 files changed, 72 deletions(-) delete mode 100644 riscv/insns/vamoaddei16_v.h delete mode 100644 riscv/insns/vamoaddei32_v.h delete mode 100644 riscv/insns/vamoaddei64_v.h delete mode 100644 riscv/insns/vamoaddei8_v.h delete mode 100644 riscv/insns/vamoandei16_v.h delete mode 100644 riscv/insns/vamoandei32_v.h delete mode 100644 riscv/insns/vamoandei64_v.h delete mode 100644 riscv/insns/vamoandei8_v.h delete mode 100644 riscv/insns/vamomaxei16_v.h delete mode 100644 riscv/insns/vamomaxei32_v.h delete mode 100644 riscv/insns/vamomaxei64_v.h delete mode 100644 riscv/insns/vamomaxei8_v.h delete mode 100644 riscv/insns/vamomaxuei16_v.h delete mode 100644 riscv/insns/vamomaxuei32_v.h delete mode 100644 riscv/insns/vamomaxuei64_v.h delete mode 100644 riscv/insns/vamomaxuei8_v.h delete mode 100644 riscv/insns/vamominei16_v.h delete mode 100644 riscv/insns/vamominei32_v.h delete mode 100644 riscv/insns/vamominei64_v.h delete mode 100644 riscv/insns/vamominei8_v.h delete mode 100644 riscv/insns/vamominuei16_v.h delete mode 100644 riscv/insns/vamominuei32_v.h delete mode 100644 riscv/insns/vamominuei64_v.h delete mode 100644 riscv/insns/vamominuei8_v.h delete mode 100644 riscv/insns/vamoorei16_v.h delete mode 100644 riscv/insns/vamoorei32_v.h delete mode 100644 riscv/insns/vamoorei64_v.h delete mode 100644 riscv/insns/vamoorei8_v.h delete mode 100644 riscv/insns/vamoswapei16_v.h delete mode 100644 riscv/insns/vamoswapei32_v.h delete mode 100644 riscv/insns/vamoswapei64_v.h delete mode 100644 riscv/insns/vamoswapei8_v.h delete mode 100644 riscv/insns/vamoxorei16_v.h delete mode 100644 riscv/insns/vamoxorei32_v.h delete mode 100644 riscv/insns/vamoxorei64_v.h delete mode 100644 riscv/insns/vamoxorei8_v.h diff --git a/riscv/insns/vamoaddei16_v.h b/riscv/insns/vamoaddei16_v.h deleted file mode 100644 index 3cb3db709d..0000000000 --- a/riscv/insns/vamoaddei16_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoadde.v vd, (rs1), vs2, vd -VI_AMO({ return lhs + vs3; }, uint, e16); diff --git a/riscv/insns/vamoaddei32_v.h b/riscv/insns/vamoaddei32_v.h deleted file mode 100644 index 2bd77fcbd2..0000000000 --- a/riscv/insns/vamoaddei32_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoadde.v vd, (rs1), vs2, vd -VI_AMO({ return lhs + vs3; }, uint, e32); diff --git a/riscv/insns/vamoaddei64_v.h b/riscv/insns/vamoaddei64_v.h deleted file mode 100644 index 79ca748205..0000000000 --- a/riscv/insns/vamoaddei64_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoadde.v vd, (rs1), vs2, vd -VI_AMO({ return lhs + vs3; }, uint, e64); diff --git a/riscv/insns/vamoaddei8_v.h b/riscv/insns/vamoaddei8_v.h deleted file mode 100644 index 06b8c79302..0000000000 --- a/riscv/insns/vamoaddei8_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoadde.v vd, (rs1), vs2, vd -VI_AMO({ return lhs + vs3; }, uint, e8); diff --git a/riscv/insns/vamoandei16_v.h b/riscv/insns/vamoandei16_v.h deleted file mode 100644 index be119497f3..0000000000 --- a/riscv/insns/vamoandei16_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoande.v vd, (rs1), vs2, vd -VI_AMO({ return lhs & vs3; }, uint, e16); diff --git a/riscv/insns/vamoandei32_v.h b/riscv/insns/vamoandei32_v.h deleted file mode 100644 index 71506704ff..0000000000 --- a/riscv/insns/vamoandei32_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoande.v vd, (rs1), vs2, vd -VI_AMO({ return lhs & vs3; }, uint, e32); diff --git a/riscv/insns/vamoandei64_v.h b/riscv/insns/vamoandei64_v.h deleted file mode 100644 index 3efae3b59f..0000000000 --- a/riscv/insns/vamoandei64_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoande.v vd, (rs1), vs2, vd -VI_AMO({ return lhs & vs3; }, uint, e64); diff --git a/riscv/insns/vamoandei8_v.h b/riscv/insns/vamoandei8_v.h deleted file mode 100644 index c47645d3e0..0000000000 --- a/riscv/insns/vamoandei8_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoande.v vd, (rs1), vs2, vd -VI_AMO({ return lhs & vs3; }, uint, e8); diff --git a/riscv/insns/vamomaxei16_v.h b/riscv/insns/vamomaxei16_v.h deleted file mode 100644 index ca67893e99..0000000000 --- a/riscv/insns/vamomaxei16_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomaxe.v vd, (rs1), vs2, vd -VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e16); diff --git a/riscv/insns/vamomaxei32_v.h b/riscv/insns/vamomaxei32_v.h deleted file mode 100644 index b6823cd042..0000000000 --- a/riscv/insns/vamomaxei32_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomaxe.v vd, (rs1), vs2, vd -VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e32); diff --git a/riscv/insns/vamomaxei64_v.h b/riscv/insns/vamomaxei64_v.h deleted file mode 100644 index 46e8a3bbd1..0000000000 --- a/riscv/insns/vamomaxei64_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomaxe.v vd, (rs1), vs2, vd -VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e64); diff --git a/riscv/insns/vamomaxei8_v.h b/riscv/insns/vamomaxei8_v.h deleted file mode 100644 index 9697b3a4cb..0000000000 --- a/riscv/insns/vamomaxei8_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomaxe.v vd, (rs1), vs2, vd -VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e8); diff --git a/riscv/insns/vamomaxuei16_v.h b/riscv/insns/vamomaxuei16_v.h deleted file mode 100644 index e05971dfcf..0000000000 --- a/riscv/insns/vamomaxuei16_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomaxue.v vd, (rs1), vs2, vd -VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e16); diff --git a/riscv/insns/vamomaxuei32_v.h b/riscv/insns/vamomaxuei32_v.h deleted file mode 100644 index 9b873543b9..0000000000 --- a/riscv/insns/vamomaxuei32_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomaxue.v vd, (rs1), vs2, vd -VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e32); diff --git a/riscv/insns/vamomaxuei64_v.h b/riscv/insns/vamomaxuei64_v.h deleted file mode 100644 index bbfbc9f2a3..0000000000 --- a/riscv/insns/vamomaxuei64_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomaxue.v vd, (rs1), vs2, vd -VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e64); diff --git a/riscv/insns/vamomaxuei8_v.h b/riscv/insns/vamomaxuei8_v.h deleted file mode 100644 index 357ba2454a..0000000000 --- a/riscv/insns/vamomaxuei8_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomaxue.v vd, (rs1), vs2, vd -VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e8); diff --git a/riscv/insns/vamominei16_v.h b/riscv/insns/vamominei16_v.h deleted file mode 100644 index 9d1ecac643..0000000000 --- a/riscv/insns/vamominei16_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomine.v vd, (rs1), vs2, vd -VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e16); diff --git a/riscv/insns/vamominei32_v.h b/riscv/insns/vamominei32_v.h deleted file mode 100644 index 6cb8475e39..0000000000 --- a/riscv/insns/vamominei32_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomine.v vd, (rs1), vs2, vd -VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e32); diff --git a/riscv/insns/vamominei64_v.h b/riscv/insns/vamominei64_v.h deleted file mode 100644 index 9ef3d4ee3b..0000000000 --- a/riscv/insns/vamominei64_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomine.v vd, (rs1), vs2, vd -VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e64); diff --git a/riscv/insns/vamominei8_v.h b/riscv/insns/vamominei8_v.h deleted file mode 100644 index 5c035ea47b..0000000000 --- a/riscv/insns/vamominei8_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamomine.v vd, (rs1), vs2, vd -VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e8); diff --git a/riscv/insns/vamominuei16_v.h b/riscv/insns/vamominuei16_v.h deleted file mode 100644 index d4a8f89292..0000000000 --- a/riscv/insns/vamominuei16_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamominue.v vd, (rs1), vs2, vd -VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e16); diff --git a/riscv/insns/vamominuei32_v.h b/riscv/insns/vamominuei32_v.h deleted file mode 100644 index 16296c5beb..0000000000 --- a/riscv/insns/vamominuei32_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamominue.v vd, (rs1), vs2, vd -VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e32); diff --git a/riscv/insns/vamominuei64_v.h b/riscv/insns/vamominuei64_v.h deleted file mode 100644 index fd850fd063..0000000000 --- a/riscv/insns/vamominuei64_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamominue.v vd, (rs1), vs2, vd -VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e64); diff --git a/riscv/insns/vamominuei8_v.h b/riscv/insns/vamominuei8_v.h deleted file mode 100644 index 3749d0525d..0000000000 --- a/riscv/insns/vamominuei8_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamominue.v vd, (rs1), vs2, vd -VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e8); diff --git a/riscv/insns/vamoorei16_v.h b/riscv/insns/vamoorei16_v.h deleted file mode 100644 index a5ba1caa74..0000000000 --- a/riscv/insns/vamoorei16_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoore.v vd, (rs1), vs2, vd -VI_AMO({ return lhs | vs3; }, uint, e16); diff --git a/riscv/insns/vamoorei32_v.h b/riscv/insns/vamoorei32_v.h deleted file mode 100644 index 94e4458e49..0000000000 --- a/riscv/insns/vamoorei32_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoore.v vd, (rs1), vs2, vd -VI_AMO({ return lhs | vs3; }, uint, e32); diff --git a/riscv/insns/vamoorei64_v.h b/riscv/insns/vamoorei64_v.h deleted file mode 100644 index 84e03944e5..0000000000 --- a/riscv/insns/vamoorei64_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoore.v vd, (rs1), vs2, vd -VI_AMO({ return lhs | vs3; }, uint, e64); diff --git a/riscv/insns/vamoorei8_v.h b/riscv/insns/vamoorei8_v.h deleted file mode 100644 index 364035dbb2..0000000000 --- a/riscv/insns/vamoorei8_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoore.v vd, (rs1), vs2, vd -VI_AMO({ return lhs | vs3; }, uint, e8); diff --git a/riscv/insns/vamoswapei16_v.h b/riscv/insns/vamoswapei16_v.h deleted file mode 100644 index 31ff021030..0000000000 --- a/riscv/insns/vamoswapei16_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoswape.v vd, (rs1), vs2, vd -VI_AMO({ return vs3; }, uint, e16); diff --git a/riscv/insns/vamoswapei32_v.h b/riscv/insns/vamoswapei32_v.h deleted file mode 100644 index a5741929ab..0000000000 --- a/riscv/insns/vamoswapei32_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoswape.v vd, (rs1), vs2, vd -VI_AMO({ return vs3; }, uint, e32); diff --git a/riscv/insns/vamoswapei64_v.h b/riscv/insns/vamoswapei64_v.h deleted file mode 100644 index 58bd035217..0000000000 --- a/riscv/insns/vamoswapei64_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoswape.v vd, (rs1), vs2, vd -VI_AMO({ return vs3; }, uint, e64); diff --git a/riscv/insns/vamoswapei8_v.h b/riscv/insns/vamoswapei8_v.h deleted file mode 100644 index af37c8c3f8..0000000000 --- a/riscv/insns/vamoswapei8_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoswape.v vd, (rs1), vs2, vd -VI_AMO({ return vs3; }, uint, e8); diff --git a/riscv/insns/vamoxorei16_v.h b/riscv/insns/vamoxorei16_v.h deleted file mode 100644 index 61e8c3272c..0000000000 --- a/riscv/insns/vamoxorei16_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoore.v vd, (rs1), vs2, vd -VI_AMO({ return lhs ^ vs3; }, uint, e16); diff --git a/riscv/insns/vamoxorei32_v.h b/riscv/insns/vamoxorei32_v.h deleted file mode 100644 index d48d951504..0000000000 --- a/riscv/insns/vamoxorei32_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoore.v vd, (rs1), vs2, vd -VI_AMO({ return lhs ^ vs3; }, uint, e32); diff --git a/riscv/insns/vamoxorei64_v.h b/riscv/insns/vamoxorei64_v.h deleted file mode 100644 index f7a3ca42e1..0000000000 --- a/riscv/insns/vamoxorei64_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoore.v vd, (rs1), vs2, vd -VI_AMO({ return lhs ^ vs3; }, uint, e64); diff --git a/riscv/insns/vamoxorei8_v.h b/riscv/insns/vamoxorei8_v.h deleted file mode 100644 index 4b6c79824c..0000000000 --- a/riscv/insns/vamoxorei8_v.h +++ /dev/null @@ -1,2 +0,0 @@ -//vamoore.v vd, (rs1), vs2, vd -VI_AMO({ return lhs ^ vs3; }, uint, e8); From 9ffcbc3556a00a1ac228b96c09051f9bbbe90e1c Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Mon, 16 Oct 2023 21:00:27 -0700 Subject: [PATCH 108/127] vamo: remove related loop macro Signed-off-by: Chih-Min Chao --- riscv/v_ext_macros.h | 54 -------------------------------------------- 1 file changed, 54 deletions(-) diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index 908ff16c28..b198d54b62 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -1399,60 +1399,6 @@ reg_t index[P.VU.vlmax]; \ } \ P.VU.vstart->write(0); -// -// vector: amo -// -#define VI_AMO(op, type, idx_type) \ - require_vector(false); \ - require_align(insn.rd(), P.VU.vflmul); \ - require(P.VU.vsew <= P.get_xlen() && P.VU.vsew >= 32); \ - require_align(insn.rd(), P.VU.vflmul); \ - float vemul = ((float)idx_type / P.VU.vsew * P.VU.vflmul); \ - require(vemul >= 0.125 && vemul <= 8); \ - require_align(insn.rs2(), vemul); \ - if (insn.v_wd()) { \ - require_vm; \ - if (idx_type > P.VU.vsew) { \ - if (insn.rd() != insn.rs2()) \ - require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ - } else if (idx_type < P.VU.vsew) { \ - if (vemul < 1) { \ - require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ - } else { \ - require_noover_widen(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ - } \ - } \ - } \ - VI_DUPLICATE_VREG(insn.rs2(), idx_type); \ - const reg_t vl = P.VU.vl->read(); \ - const reg_t baseAddr = RS1; \ - const reg_t vd = insn.rd(); \ - for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ - VI_ELEMENT_SKIP; \ - VI_STRIP(i); \ - P.VU.vstart->write(i); \ - switch (P.VU.vsew) { \ - case e32: { \ - auto vs3 = P.VU.elt< type ## 32_t>(vd, vreg_inx); \ - auto val = MMU.amo(baseAddr + index[i], [&](type ## 32_t UNUSED lhs) { op }); \ - if (insn.v_wd()) \ - P.VU.elt< type ## 32_t>(vd, vreg_inx, true) = val; \ - } \ - break; \ - case e64: { \ - auto vs3 = P.VU.elt< type ## 64_t>(vd, vreg_inx); \ - auto val = MMU.amo(baseAddr + index[i], [&](type ## 64_t UNUSED lhs) { op }); \ - if (insn.v_wd()) \ - P.VU.elt< type ## 64_t>(vd, vreg_inx, true) = val; \ - } \ - break; \ - default: \ - require(0); \ - break; \ - } \ - } \ - P.VU.vstart->write(0); - // vector: sign/unsiged extension #define VI_VV_EXT(div, type) \ require(insn.rd() != insn.rs2()); \ From e84ef9385ceae7d30b4dd196931a8fa23919d1a2 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Mon, 16 Oct 2023 23:09:06 -0700 Subject: [PATCH 109/127] vamo: remove disassembler Signed-off-by: Chih-Min Chao --- disasm/disasm.cc | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 15e767ee64..8188b08aa3 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1795,41 +1795,6 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) #undef DISASM_OPIV_S__INSN #undef DISASM_OPIV_W__INSN #undef DISASM_VFUNARY0_INSN - - // vector amo - std::vector v_fmt_amo_wd = {&vd, &v_address, &vs2, &vd, opt, &vm}; - std::vector v_fmt_amo = {&x0, &v_address, &vs2, &vd, opt, &vm}; - for (size_t elt = 0; elt <= 3; ++elt) { - const custom_fmt_t template_insn[] = { - {match_vamoaddei8_v | mask_wd, mask_vamoaddei8_v | mask_wd, - "%sei%d.v", v_fmt_amo_wd}, - {match_vamoaddei8_v, mask_vamoaddei8_v | mask_wd, - "%sei%d.v", v_fmt_amo}, - }; - std::pair amo_map[] = { - {"vamoswap", 0x01ul << 27}, - {"vamoadd", 0x00ul << 27}, - {"vamoxor", 0x04ul << 27}, - {"vamoand", 0x0cul << 27}, - {"vamoor", 0x08ul << 27}, - {"vamomin", 0x10ul << 27}, - {"vamomax", 0x14ul << 27}, - {"vamominu", 0x18ul << 27}, - {"vamomaxu", 0x1cul << 27}}; - const reg_t elt_map[] = {0x0ul << 12, 0x5ul << 12, - 0x6ul <<12, 0x7ul << 12}; - - for (size_t idx = 0; idx < sizeof(amo_map) / sizeof(amo_map[0]); ++idx) { - for (auto item : template_insn) { - char buf[128]; - snprintf(buf, sizeof(buf), item.fmt, amo_map[idx].first, 8 << elt); - add_insn(new disasm_insn_t(buf, - item.match | amo_map[idx].second | elt_map[elt], - item.mask, - item.arg)); - } - } - } } if (isa->extension_enabled(EXT_ZVFBFMIN)) { From 68d4f6faff508f7f7975a6221133f9b41681332e Mon Sep 17 00:00:00 2001 From: Tim Newsome Date: Tue, 17 Oct 2023 13:16:20 -0700 Subject: [PATCH 110/127] workflow: Update riscv-openocd and riscv-tests versions Make sure we test with recent OpenOCD and tests. --- .github/workflows/debug-smoke.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/debug-smoke.yml b/.github/workflows/debug-smoke.yml index d320b1959b..a1d9f4fdf7 100644 --- a/.github/workflows/debug-smoke.yml +++ b/.github/workflows/debug-smoke.yml @@ -25,7 +25,7 @@ jobs: run: | git clone --recurse-submodules https://github.com/riscv/riscv-openocd.git cd riscv-openocd - git checkout 43ea20dfbb6c815004a51106a3b2009d7f6c4940 + git checkout a495dd854ce2e857a583125a31527a47320ec6b9 - name: Build OpenOCD run: | @@ -47,7 +47,7 @@ jobs: run: | git clone --recurse-submodules https://github.com/riscv-software-src/riscv-tests.git cd riscv-tests - git checkout c84daca8824635b7d896003c78f9c6245997cf7a + git checkout d020e2069a9f6a9c0e875f23f0f4aababea9fbf0 - name: Run Tests run: | From 1de1e81952e387ef6b282dc46f0fdf9ae4f74df5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 18 Oct 2023 01:15:18 -0700 Subject: [PATCH 111/127] tmp --- riscv/decode.h | 1 + 1 file changed, 1 insertion(+) diff --git a/riscv/decode.h b/riscv/decode.h index cd1c0a1222..6e2bdec038 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -1,5 +1,6 @@ // See LICENSE for license details. + #ifndef _RISCV_DECODE_H #define _RISCV_DECODE_H From 4eb1cceb3d7ddb73a695fd58ee7a819641de2522 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 18 Oct 2023 08:50:57 -0700 Subject: [PATCH 112/127] Revert "tmp" This reverts commit 1de1e81952e387ef6b282dc46f0fdf9ae4f74df5. --- riscv/decode.h | 1 - 1 file changed, 1 deletion(-) diff --git a/riscv/decode.h b/riscv/decode.h index 6e2bdec038..cd1c0a1222 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -1,6 +1,5 @@ // See LICENSE for license details. - #ifndef _RISCV_DECODE_H #define _RISCV_DECODE_H From 762ed3f5c849d1c7f7e9fff6da3712531663d7a8 Mon Sep 17 00:00:00 2001 From: Ved Shanbhogue Date: Sat, 23 Sep 2023 11:17:48 -0500 Subject: [PATCH 113/127] add zcmop extension instructions --- disasm/disasm.cc | 13 +++- disasm/isa_parser.cc | 2 + riscv/encoding.h | 161 +++++++++++++++++++++++++++++++++++++++++- riscv/insns/c_lui.h | 7 +- riscv/insns/c_mop_N.h | 2 + riscv/isa_parser.h | 1 + 6 files changed, 182 insertions(+), 4 deletions(-) create mode 100644 riscv/insns/c_mop_N.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 8188b08aa3..4f9937fc7c 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2149,6 +2149,17 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DEFINE_RTYPE(czero_nez); } + if (isa->extension_enabled(EXT_ZCMOP)) { + DISASM_INSN("c.mop.1", c_mop_1, 0, {}); + DISASM_INSN("c.mop.3", c_mop_3, 0, {}); + DISASM_INSN("c.mop.5", c_mop_5, 0, {}); + DISASM_INSN("c.mop.7", c_mop_7, 0, {}); + DISASM_INSN("c.mop.9", c_mop_9, 0, {}); + DISASM_INSN("c.mop.11", c_mop_11, 0, {}); + DISASM_INSN("c.mop.13", c_mop_13, 0, {}); + DISASM_INSN("c.mop.15", c_mop_15, 0, {}); + } + if (isa->extension_enabled(EXT_ZKND) || isa->extension_enabled(EXT_ZKNE)) { DISASM_INSN("aes64ks1i", aes64ks1i, 0, {&xrd, &xrs1, &rcon}); @@ -2302,7 +2313,7 @@ disassembler_t::disassembler_t(const isa_parser_t *isa) // next-highest priority: other instructions in same base ISA std::string fallback_isa_string = std::string("rv") + std::to_string(isa->get_max_xlen()) + - "gqchv_zfh_zba_zbb_zbc_zbs_zcb_zicbom_zicboz_zicond_zkn_zkr_zks_svinval"; + "gqchv_zfh_zba_zbb_zbc_zbs_zcb_zicbom_zicboz_zicond_zkn_zkr_zks_svinval_zcmop"; isa_parser_t fallback_isa(fallback_isa_string.c_str(), DEFAULT_PRIV); add_instructions(&fallback_isa); diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index d5dc439a77..3291f7d194 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -292,6 +292,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_SSCSRIND] = true; } else if (ext_str == "smcntrpmf") { extension_table[EXT_SMCNTRPMF] = true; + } else if (ext_str == "zcmop") { + extension_table[EXT_ZCMOP] = true; } else if (ext_str[0] == 'x') { extension_table['X'] = true; if (ext_str.size() == 1) { diff --git a/riscv/encoding.h b/riscv/encoding.h index a7e2d94512..6ad1fcc6c1 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (d752f19) + * https://github.com/riscv/riscv-opcodes (b9d63ad) */ #ifndef RISCV_CSR_ENCODING_H @@ -592,6 +592,24 @@ #define MASK_C_LW 0xe003 #define MATCH_C_LWSP 0x4002 #define MASK_C_LWSP 0xe003 +#define MATCH_C_MOP_1 0x6081 +#define MASK_C_MOP_1 0xffff +#define MATCH_C_MOP_11 0x6581 +#define MASK_C_MOP_11 0xffff +#define MATCH_C_MOP_13 0x6681 +#define MASK_C_MOP_13 0xffff +#define MATCH_C_MOP_15 0x6781 +#define MASK_C_MOP_15 0xffff +#define MATCH_C_MOP_3 0x6181 +#define MASK_C_MOP_3 0xffff +#define MATCH_C_MOP_5 0x6281 +#define MASK_C_MOP_5 0xffff +#define MATCH_C_MOP_7 0x6381 +#define MASK_C_MOP_7 0xffff +#define MATCH_C_MOP_9 0x6481 +#define MASK_C_MOP_9 0xffff +#define MATCH_C_MOP_N 0x6081 +#define MASK_C_MOP_N 0xf8ff #define MATCH_C_MUL 0x9c41 #define MASK_C_MUL 0xfc63 #define MATCH_C_MV 0x8002 @@ -1402,6 +1420,90 @@ #define MASK_MINU 0xfe00707f #define MATCH_MNRET 0x70200073 #define MASK_MNRET 0xffffffff +#define MATCH_MOP_R_0 0x81c04073 +#define MASK_MOP_R_0 0xfff0707f +#define MATCH_MOP_R_1 0x81d04073 +#define MASK_MOP_R_1 0xfff0707f +#define MATCH_MOP_R_10 0x89e04073 +#define MASK_MOP_R_10 0xfff0707f +#define MATCH_MOP_R_11 0x89f04073 +#define MASK_MOP_R_11 0xfff0707f +#define MATCH_MOP_R_12 0x8dc04073 +#define MASK_MOP_R_12 0xfff0707f +#define MATCH_MOP_R_13 0x8dd04073 +#define MASK_MOP_R_13 0xfff0707f +#define MATCH_MOP_R_14 0x8de04073 +#define MASK_MOP_R_14 0xfff0707f +#define MATCH_MOP_R_15 0x8df04073 +#define MASK_MOP_R_15 0xfff0707f +#define MATCH_MOP_R_16 0xc1c04073 +#define MASK_MOP_R_16 0xfff0707f +#define MATCH_MOP_R_17 0xc1d04073 +#define MASK_MOP_R_17 0xfff0707f +#define MATCH_MOP_R_18 0xc1e04073 +#define MASK_MOP_R_18 0xfff0707f +#define MATCH_MOP_R_19 0xc1f04073 +#define MASK_MOP_R_19 0xfff0707f +#define MATCH_MOP_R_2 0x81e04073 +#define MASK_MOP_R_2 0xfff0707f +#define MATCH_MOP_R_20 0xc5c04073 +#define MASK_MOP_R_20 0xfff0707f +#define MATCH_MOP_R_21 0xc5d04073 +#define MASK_MOP_R_21 0xfff0707f +#define MATCH_MOP_R_22 0xc5e04073 +#define MASK_MOP_R_22 0xfff0707f +#define MATCH_MOP_R_23 0xc5f04073 +#define MASK_MOP_R_23 0xfff0707f +#define MATCH_MOP_R_24 0xc9c04073 +#define MASK_MOP_R_24 0xfff0707f +#define MATCH_MOP_R_25 0xc9d04073 +#define MASK_MOP_R_25 0xfff0707f +#define MATCH_MOP_R_26 0xc9e04073 +#define MASK_MOP_R_26 0xfff0707f +#define MATCH_MOP_R_27 0xc9f04073 +#define MASK_MOP_R_27 0xfff0707f +#define MATCH_MOP_R_28 0xcdc04073 +#define MASK_MOP_R_28 0xfff0707f +#define MATCH_MOP_R_29 0xcdd04073 +#define MASK_MOP_R_29 0xfff0707f +#define MATCH_MOP_R_3 0x81f04073 +#define MASK_MOP_R_3 0xfff0707f +#define MATCH_MOP_R_30 0xcde04073 +#define MASK_MOP_R_30 0xfff0707f +#define MATCH_MOP_R_31 0xcdf04073 +#define MASK_MOP_R_31 0xfff0707f +#define MATCH_MOP_R_4 0x85c04073 +#define MASK_MOP_R_4 0xfff0707f +#define MATCH_MOP_R_5 0x85d04073 +#define MASK_MOP_R_5 0xfff0707f +#define MATCH_MOP_R_6 0x85e04073 +#define MASK_MOP_R_6 0xfff0707f +#define MATCH_MOP_R_7 0x85f04073 +#define MASK_MOP_R_7 0xfff0707f +#define MATCH_MOP_R_8 0x89c04073 +#define MASK_MOP_R_8 0xfff0707f +#define MATCH_MOP_R_9 0x89d04073 +#define MASK_MOP_R_9 0xfff0707f +#define MATCH_MOP_R_N 0x81c04073 +#define MASK_MOP_R_N 0xb3c0707f +#define MATCH_MOP_RR_0 0x82004073 +#define MASK_MOP_RR_0 0xfe00707f +#define MATCH_MOP_RR_1 0x86004073 +#define MASK_MOP_RR_1 0xfe00707f +#define MATCH_MOP_RR_2 0x8a004073 +#define MASK_MOP_RR_2 0xfe00707f +#define MATCH_MOP_RR_3 0x8e004073 +#define MASK_MOP_RR_3 0xfe00707f +#define MATCH_MOP_RR_4 0xc2004073 +#define MASK_MOP_RR_4 0xfe00707f +#define MATCH_MOP_RR_5 0xc6004073 +#define MASK_MOP_RR_5 0xfe00707f +#define MATCH_MOP_RR_6 0xca004073 +#define MASK_MOP_RR_6 0xfe00707f +#define MATCH_MOP_RR_7 0xce004073 +#define MASK_MOP_RR_7 0xfe00707f +#define MATCH_MOP_RR_N 0x82004073 +#define MASK_MOP_RR_N 0xb200707f #define MATCH_MRET 0x30200073 #define MASK_MRET 0xffffffff #define MATCH_MSUBR32 0xc6001077 @@ -3579,6 +3681,12 @@ #define INSN_FIELD_C_RS2 0x7c #define INSN_FIELD_C_SREG1 0x380 #define INSN_FIELD_C_SREG2 0x1c +#define INSN_FIELD_MOP_R_T_30 0x40000000 +#define INSN_FIELD_MOP_R_T_27_26 0xc000000 +#define INSN_FIELD_MOP_R_T_21_20 0x300000 +#define INSN_FIELD_MOP_RR_T_30 0x40000000 +#define INSN_FIELD_MOP_RR_T_27_26 0xc000000 +#define INSN_FIELD_C_MOP_T 0x700 #endif #ifdef DECLARE_INSN DECLARE_INSN(add, MATCH_ADD, MASK_ADD) @@ -3686,6 +3794,15 @@ DECLARE_INSN(c_lqsp, MATCH_C_LQSP, MASK_C_LQSP) DECLARE_INSN(c_lui, MATCH_C_LUI, MASK_C_LUI) DECLARE_INSN(c_lw, MATCH_C_LW, MASK_C_LW) DECLARE_INSN(c_lwsp, MATCH_C_LWSP, MASK_C_LWSP) +DECLARE_INSN(c_mop_1, MATCH_C_MOP_1, MASK_C_MOP_1) +DECLARE_INSN(c_mop_11, MATCH_C_MOP_11, MASK_C_MOP_11) +DECLARE_INSN(c_mop_13, MATCH_C_MOP_13, MASK_C_MOP_13) +DECLARE_INSN(c_mop_15, MATCH_C_MOP_15, MASK_C_MOP_15) +DECLARE_INSN(c_mop_3, MATCH_C_MOP_3, MASK_C_MOP_3) +DECLARE_INSN(c_mop_5, MATCH_C_MOP_5, MASK_C_MOP_5) +DECLARE_INSN(c_mop_7, MATCH_C_MOP_7, MASK_C_MOP_7) +DECLARE_INSN(c_mop_9, MATCH_C_MOP_9, MASK_C_MOP_9) +DECLARE_INSN(c_mop_N, MATCH_C_MOP_N, MASK_C_MOP_N) DECLARE_INSN(c_mul, MATCH_C_MUL, MASK_C_MUL) DECLARE_INSN(c_mv, MATCH_C_MV, MASK_C_MV) DECLARE_INSN(c_nop, MATCH_C_NOP, MASK_C_NOP) @@ -4091,6 +4208,48 @@ DECLARE_INSN(maxu, MATCH_MAXU, MASK_MAXU) DECLARE_INSN(min, MATCH_MIN, MASK_MIN) DECLARE_INSN(minu, MATCH_MINU, MASK_MINU) DECLARE_INSN(mnret, MATCH_MNRET, MASK_MNRET) +DECLARE_INSN(mop_r_0, MATCH_MOP_R_0, MASK_MOP_R_0) +DECLARE_INSN(mop_r_1, MATCH_MOP_R_1, MASK_MOP_R_1) +DECLARE_INSN(mop_r_10, MATCH_MOP_R_10, MASK_MOP_R_10) +DECLARE_INSN(mop_r_11, MATCH_MOP_R_11, MASK_MOP_R_11) +DECLARE_INSN(mop_r_12, MATCH_MOP_R_12, MASK_MOP_R_12) +DECLARE_INSN(mop_r_13, MATCH_MOP_R_13, MASK_MOP_R_13) +DECLARE_INSN(mop_r_14, MATCH_MOP_R_14, MASK_MOP_R_14) +DECLARE_INSN(mop_r_15, MATCH_MOP_R_15, MASK_MOP_R_15) +DECLARE_INSN(mop_r_16, MATCH_MOP_R_16, MASK_MOP_R_16) +DECLARE_INSN(mop_r_17, MATCH_MOP_R_17, MASK_MOP_R_17) +DECLARE_INSN(mop_r_18, MATCH_MOP_R_18, MASK_MOP_R_18) +DECLARE_INSN(mop_r_19, MATCH_MOP_R_19, MASK_MOP_R_19) +DECLARE_INSN(mop_r_2, MATCH_MOP_R_2, MASK_MOP_R_2) +DECLARE_INSN(mop_r_20, MATCH_MOP_R_20, MASK_MOP_R_20) +DECLARE_INSN(mop_r_21, MATCH_MOP_R_21, MASK_MOP_R_21) +DECLARE_INSN(mop_r_22, MATCH_MOP_R_22, MASK_MOP_R_22) +DECLARE_INSN(mop_r_23, MATCH_MOP_R_23, MASK_MOP_R_23) +DECLARE_INSN(mop_r_24, MATCH_MOP_R_24, MASK_MOP_R_24) +DECLARE_INSN(mop_r_25, MATCH_MOP_R_25, MASK_MOP_R_25) +DECLARE_INSN(mop_r_26, MATCH_MOP_R_26, MASK_MOP_R_26) +DECLARE_INSN(mop_r_27, MATCH_MOP_R_27, MASK_MOP_R_27) +DECLARE_INSN(mop_r_28, MATCH_MOP_R_28, MASK_MOP_R_28) +DECLARE_INSN(mop_r_29, MATCH_MOP_R_29, MASK_MOP_R_29) +DECLARE_INSN(mop_r_3, MATCH_MOP_R_3, MASK_MOP_R_3) +DECLARE_INSN(mop_r_30, MATCH_MOP_R_30, MASK_MOP_R_30) +DECLARE_INSN(mop_r_31, MATCH_MOP_R_31, MASK_MOP_R_31) +DECLARE_INSN(mop_r_4, MATCH_MOP_R_4, MASK_MOP_R_4) +DECLARE_INSN(mop_r_5, MATCH_MOP_R_5, MASK_MOP_R_5) +DECLARE_INSN(mop_r_6, MATCH_MOP_R_6, MASK_MOP_R_6) +DECLARE_INSN(mop_r_7, MATCH_MOP_R_7, MASK_MOP_R_7) +DECLARE_INSN(mop_r_8, MATCH_MOP_R_8, MASK_MOP_R_8) +DECLARE_INSN(mop_r_9, MATCH_MOP_R_9, MASK_MOP_R_9) +DECLARE_INSN(mop_r_N, MATCH_MOP_R_N, MASK_MOP_R_N) +DECLARE_INSN(mop_rr_0, MATCH_MOP_RR_0, MASK_MOP_RR_0) +DECLARE_INSN(mop_rr_1, MATCH_MOP_RR_1, MASK_MOP_RR_1) +DECLARE_INSN(mop_rr_2, MATCH_MOP_RR_2, MASK_MOP_RR_2) +DECLARE_INSN(mop_rr_3, MATCH_MOP_RR_3, MASK_MOP_RR_3) +DECLARE_INSN(mop_rr_4, MATCH_MOP_RR_4, MASK_MOP_RR_4) +DECLARE_INSN(mop_rr_5, MATCH_MOP_RR_5, MASK_MOP_RR_5) +DECLARE_INSN(mop_rr_6, MATCH_MOP_RR_6, MASK_MOP_RR_6) +DECLARE_INSN(mop_rr_7, MATCH_MOP_RR_7, MASK_MOP_RR_7) +DECLARE_INSN(mop_rr_N, MATCH_MOP_RR_N, MASK_MOP_RR_N) DECLARE_INSN(mret, MATCH_MRET, MASK_MRET) DECLARE_INSN(msubr32, MATCH_MSUBR32, MASK_MSUBR32) DECLARE_INSN(mul, MATCH_MUL, MASK_MUL) diff --git a/riscv/insns/c_lui.h b/riscv/insns/c_lui.h index 956fa448bc..3e0e02f4e5 100644 --- a/riscv/insns/c_lui.h +++ b/riscv/insns/c_lui.h @@ -2,7 +2,10 @@ require_extension(EXT_ZCA); if (insn.rvc_rd() == 2) { // c.addi16sp require(insn.rvc_addi16sp_imm() != 0); WRITE_REG(X_SP, sext_xlen(RVC_SP + insn.rvc_addi16sp_imm())); -} else { - require(insn.rvc_imm() != 0); +} else if (insn.rvc_imm() != 0) { // c.lui WRITE_RD(insn.rvc_imm() << 12); +} else if ((insn.rvc_rd() & 1) != 0) { // c.mop.N + #include "c_mop_N.h" +} else { + require(false); } diff --git a/riscv/insns/c_mop_N.h b/riscv/insns/c_mop_N.h new file mode 100644 index 0000000000..78ec382293 --- /dev/null +++ b/riscv/insns/c_mop_N.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZCA); +require_extension(EXT_ZCMOP); diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index f955e1613c..af4f9254fb 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -80,6 +80,7 @@ typedef enum { EXT_SMCSRIND, EXT_SSCSRIND, EXT_SMCNTRPMF, + EXT_ZCMOP, NUM_ISA_EXTENSIONS } isa_extension_t; From 9258b59e67f92332fe057e4856e3e2f07312bfe5 Mon Sep 17 00:00:00 2001 From: brs Date: Wed, 18 Oct 2023 19:07:45 -0500 Subject: [PATCH 114/127] Spike support for the Zalasr extension --- disasm/disasm.cc | 11 ++++ disasm/isa_parser.cc | 2 + riscv/encoding.h | 136 +++++++++---------------------------------- riscv/insns/lb_aq.h | 2 + riscv/insns/ld_aq.h | 3 + riscv/insns/lh_aq.h | 2 + riscv/insns/lw_aq.h | 2 + riscv/insns/sb_rl.h | 2 + riscv/insns/sd_rl.h | 3 + riscv/insns/sh_rl.h | 2 + riscv/insns/sw_rl.h | 2 + riscv/isa_parser.h | 1 + riscv/riscv.mk.in | 11 ++++ 13 files changed, 69 insertions(+), 110 deletions(-) create mode 100644 riscv/insns/lb_aq.h create mode 100644 riscv/insns/ld_aq.h create mode 100644 riscv/insns/lh_aq.h create mode 100644 riscv/insns/lw_aq.h create mode 100644 riscv/insns/sb_rl.h create mode 100644 riscv/insns/sd_rl.h create mode 100644 riscv/insns/sh_rl.h create mode 100644 riscv/insns/sw_rl.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 4f9937fc7c..df02e03242 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2304,6 +2304,17 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DEFINE_VECTOR_VIU(vsm3c_vi); DEFINE_VECTOR_VV(vsm3me_vv); } + + if (isa->extension_enabled(EXT_ZALASR)) { + DEFINE_XLOAD_BASE(lb_aq); + DEFINE_XLOAD_BASE(lh_aq); + DEFINE_XLOAD_BASE(lw_aq); + DEFINE_XLOAD_BASE(ld_aq); + DEFINE_XSTORE_BASE(sb_rl); + DEFINE_XSTORE_BASE(sh_rl); + DEFINE_XSTORE_BASE(sw_rl); + DEFINE_XSTORE_BASE(sd_rl); + } } disassembler_t::disassembler_t(const isa_parser_t *isa) diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index 3291f7d194..563f687416 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -294,6 +294,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_SMCNTRPMF] = true; } else if (ext_str == "zcmop") { extension_table[EXT_ZCMOP] = true; + } else if (ext_str == "zalasr") { + extension_table[EXT_ZALASR] = true; } else if (ext_str[0] == 'x') { extension_table['X'] = true; if (ext_str.size() == 1) { diff --git a/riscv/encoding.h b/riscv/encoding.h index 6ad1fcc6c1..1a0f5549b3 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (b9d63ad) + * https://github.com/riscv/riscv-opcodes (4f28129) */ #ifndef RISCV_CSR_ENCODING_H @@ -1386,14 +1386,20 @@ #define MASK_KWMMUL_U 0xfe00707f #define MATCH_LB 0x3 #define MASK_LB 0x707f +#define MATCH_LB_AQ 0x3000002f +#define MASK_LB_AQ 0xf9f0707f #define MATCH_LBU 0x4003 #define MASK_LBU 0x707f #define MATCH_LD 0x3003 #define MASK_LD 0x707f +#define MATCH_LD_AQ 0x3000302f +#define MASK_LD_AQ 0xf9f0707f #define MATCH_LDU 0x7003 #define MASK_LDU 0x707f #define MATCH_LH 0x1003 #define MASK_LH 0x707f +#define MATCH_LH_AQ 0x3000102f +#define MASK_LH_AQ 0xf9f0707f #define MATCH_LHU 0x5003 #define MASK_LHU 0x707f #define MATCH_LQ 0x300f @@ -1406,6 +1412,8 @@ #define MASK_LUI 0x7f #define MATCH_LW 0x2003 #define MASK_LW 0x707f +#define MATCH_LW_AQ 0x3000202f +#define MASK_LW_AQ 0xf9f0707f #define MATCH_LWU 0x6003 #define MASK_LWU 0x707f #define MATCH_MADDR32 0xc4001077 @@ -1620,6 +1628,8 @@ #define MASK_RSUBW 0xfe00707f #define MATCH_SB 0x23 #define MASK_SB 0x707f +#define MATCH_SB_RL 0x3800002f +#define MASK_SB_RL 0xf8007fff #define MATCH_SC_D 0x1800302f #define MASK_SC_D 0xf800707f #define MATCH_SC_W 0x1800202f @@ -1640,6 +1650,8 @@ #define MASK_SCMPLT8 0xfe00707f #define MATCH_SD 0x3023 #define MASK_SD 0x707f +#define MATCH_SD_RL 0x3800302f +#define MASK_SD_RL 0xf8007fff #define MATCH_SEXT_B 0x60401013 #define MASK_SEXT_B 0xfff0707f #define MATCH_SEXT_H 0x60501013 @@ -1664,6 +1676,8 @@ #define MASK_SH3ADD 0xfe00707f #define MATCH_SH3ADD_UW 0x2000603b #define MASK_SH3ADD_UW 0xfe00707f +#define MATCH_SH_RL 0x3800102f +#define MASK_SH_RL 0xf8007fff #define MATCH_SHA256SIG0 0x10201013 #define MASK_SHA256SIG0 0xfff0707f #define MATCH_SHA256SIG1 0x10301013 @@ -1970,6 +1984,8 @@ #define MASK_SUNPKD832 0xfff0707f #define MATCH_SW 0x2023 #define MASK_SW 0x707f +#define MATCH_SW_RL 0x3800202f +#define MASK_SW_RL 0xf8007fff #define MATCH_UCLIP16 0x85000077 #define MASK_UCLIP16 0xff00707f #define MATCH_UCLIP32 0xf4000077 @@ -2138,78 +2154,6 @@ #define MASK_VAESKF2_VI 0xfe00707f #define MATCH_VAESZ_VS 0xa603a077 #define MASK_VAESZ_VS 0xfe0ff07f -#define MATCH_VAMOADDEI16_V 0x502f -#define MASK_VAMOADDEI16_V 0xf800707f -#define MATCH_VAMOADDEI32_V 0x602f -#define MASK_VAMOADDEI32_V 0xf800707f -#define MATCH_VAMOADDEI64_V 0x702f -#define MASK_VAMOADDEI64_V 0xf800707f -#define MATCH_VAMOADDEI8_V 0x2f -#define MASK_VAMOADDEI8_V 0xf800707f -#define MATCH_VAMOANDEI16_V 0x6000502f -#define MASK_VAMOANDEI16_V 0xf800707f -#define MATCH_VAMOANDEI32_V 0x6000602f -#define MASK_VAMOANDEI32_V 0xf800707f -#define MATCH_VAMOANDEI64_V 0x6000702f -#define MASK_VAMOANDEI64_V 0xf800707f -#define MATCH_VAMOANDEI8_V 0x6000002f -#define MASK_VAMOANDEI8_V 0xf800707f -#define MATCH_VAMOMAXEI16_V 0xa000502f -#define MASK_VAMOMAXEI16_V 0xf800707f -#define MATCH_VAMOMAXEI32_V 0xa000602f -#define MASK_VAMOMAXEI32_V 0xf800707f -#define MATCH_VAMOMAXEI64_V 0xa000702f -#define MASK_VAMOMAXEI64_V 0xf800707f -#define MATCH_VAMOMAXEI8_V 0xa000002f -#define MASK_VAMOMAXEI8_V 0xf800707f -#define MATCH_VAMOMAXUEI16_V 0xe000502f -#define MASK_VAMOMAXUEI16_V 0xf800707f -#define MATCH_VAMOMAXUEI32_V 0xe000602f -#define MASK_VAMOMAXUEI32_V 0xf800707f -#define MATCH_VAMOMAXUEI64_V 0xe000702f -#define MASK_VAMOMAXUEI64_V 0xf800707f -#define MATCH_VAMOMAXUEI8_V 0xe000002f -#define MASK_VAMOMAXUEI8_V 0xf800707f -#define MATCH_VAMOMINEI16_V 0x8000502f -#define MASK_VAMOMINEI16_V 0xf800707f -#define MATCH_VAMOMINEI32_V 0x8000602f -#define MASK_VAMOMINEI32_V 0xf800707f -#define MATCH_VAMOMINEI64_V 0x8000702f -#define MASK_VAMOMINEI64_V 0xf800707f -#define MATCH_VAMOMINEI8_V 0x8000002f -#define MASK_VAMOMINEI8_V 0xf800707f -#define MATCH_VAMOMINUEI16_V 0xc000502f -#define MASK_VAMOMINUEI16_V 0xf800707f -#define MATCH_VAMOMINUEI32_V 0xc000602f -#define MASK_VAMOMINUEI32_V 0xf800707f -#define MATCH_VAMOMINUEI64_V 0xc000702f -#define MASK_VAMOMINUEI64_V 0xf800707f -#define MATCH_VAMOMINUEI8_V 0xc000002f -#define MASK_VAMOMINUEI8_V 0xf800707f -#define MATCH_VAMOOREI16_V 0x4000502f -#define MASK_VAMOOREI16_V 0xf800707f -#define MATCH_VAMOOREI32_V 0x4000602f -#define MASK_VAMOOREI32_V 0xf800707f -#define MATCH_VAMOOREI64_V 0x4000702f -#define MASK_VAMOOREI64_V 0xf800707f -#define MATCH_VAMOOREI8_V 0x4000002f -#define MASK_VAMOOREI8_V 0xf800707f -#define MATCH_VAMOSWAPEI16_V 0x800502f -#define MASK_VAMOSWAPEI16_V 0xf800707f -#define MATCH_VAMOSWAPEI32_V 0x800602f -#define MASK_VAMOSWAPEI32_V 0xf800707f -#define MATCH_VAMOSWAPEI64_V 0x800702f -#define MASK_VAMOSWAPEI64_V 0xf800707f -#define MATCH_VAMOSWAPEI8_V 0x800002f -#define MASK_VAMOSWAPEI8_V 0xf800707f -#define MATCH_VAMOXOREI16_V 0x2000502f -#define MASK_VAMOXOREI16_V 0xf800707f -#define MATCH_VAMOXOREI32_V 0x2000602f -#define MASK_VAMOXOREI32_V 0xf800707f -#define MATCH_VAMOXOREI64_V 0x2000702f -#define MASK_VAMOXOREI64_V 0xf800707f -#define MATCH_VAMOXOREI8_V 0x2000002f -#define MASK_VAMOXOREI8_V 0xf800707f #define MATCH_VAND_VI 0x24003057 #define MASK_VAND_VI 0xfc00707f #define MATCH_VAND_VV 0x24000057 @@ -3603,7 +3547,6 @@ #define INSN_FIELD_BIMM12HI 0xfe000000 #define INSN_FIELD_IMM12LO 0xf80 #define INSN_FIELD_BIMM12LO 0xf80 -#define INSN_FIELD_ZIMM 0xf8000 #define INSN_FIELD_SHAMTQ 0x7f00000 #define INSN_FIELD_SHAMTW 0x1f00000 #define INSN_FIELD_SHAMTW4 0xf00000 @@ -3616,6 +3559,7 @@ #define INSN_FIELD_IMM4 0xf00000 #define INSN_FIELD_IMM5 0x1f00000 #define INSN_FIELD_IMM6 0x3f00000 +#define INSN_FIELD_ZIMM 0xf8000 #define INSN_FIELD_OPCODE 0x7f #define INSN_FIELD_FUNCT7 0xfe000000 #define INSN_FIELD_VD 0xf80 @@ -4191,16 +4135,20 @@ DECLARE_INSN(ksubw, MATCH_KSUBW, MASK_KSUBW) DECLARE_INSN(kwmmul, MATCH_KWMMUL, MASK_KWMMUL) DECLARE_INSN(kwmmul_u, MATCH_KWMMUL_U, MASK_KWMMUL_U) DECLARE_INSN(lb, MATCH_LB, MASK_LB) +DECLARE_INSN(lb_aq, MATCH_LB_AQ, MASK_LB_AQ) DECLARE_INSN(lbu, MATCH_LBU, MASK_LBU) DECLARE_INSN(ld, MATCH_LD, MASK_LD) +DECLARE_INSN(ld_aq, MATCH_LD_AQ, MASK_LD_AQ) DECLARE_INSN(ldu, MATCH_LDU, MASK_LDU) DECLARE_INSN(lh, MATCH_LH, MASK_LH) +DECLARE_INSN(lh_aq, MATCH_LH_AQ, MASK_LH_AQ) DECLARE_INSN(lhu, MATCH_LHU, MASK_LHU) DECLARE_INSN(lq, MATCH_LQ, MASK_LQ) DECLARE_INSN(lr_d, MATCH_LR_D, MASK_LR_D) DECLARE_INSN(lr_w, MATCH_LR_W, MASK_LR_W) DECLARE_INSN(lui, MATCH_LUI, MASK_LUI) DECLARE_INSN(lw, MATCH_LW, MASK_LW) +DECLARE_INSN(lw_aq, MATCH_LW_AQ, MASK_LW_AQ) DECLARE_INSN(lwu, MATCH_LWU, MASK_LWU) DECLARE_INSN(maddr32, MATCH_MADDR32, MASK_MADDR32) DECLARE_INSN(max, MATCH_MAX, MASK_MAX) @@ -4308,6 +4256,7 @@ DECLARE_INSN(rsub64, MATCH_RSUB64, MASK_RSUB64) DECLARE_INSN(rsub8, MATCH_RSUB8, MASK_RSUB8) DECLARE_INSN(rsubw, MATCH_RSUBW, MASK_RSUBW) DECLARE_INSN(sb, MATCH_SB, MASK_SB) +DECLARE_INSN(sb_rl, MATCH_SB_RL, MASK_SB_RL) DECLARE_INSN(sc_d, MATCH_SC_D, MASK_SC_D) DECLARE_INSN(sc_w, MATCH_SC_W, MASK_SC_W) DECLARE_INSN(sclip16, MATCH_SCLIP16, MASK_SCLIP16) @@ -4318,6 +4267,7 @@ DECLARE_INSN(scmple8, MATCH_SCMPLE8, MASK_SCMPLE8) DECLARE_INSN(scmplt16, MATCH_SCMPLT16, MASK_SCMPLT16) DECLARE_INSN(scmplt8, MATCH_SCMPLT8, MASK_SCMPLT8) DECLARE_INSN(sd, MATCH_SD, MASK_SD) +DECLARE_INSN(sd_rl, MATCH_SD_RL, MASK_SD_RL) DECLARE_INSN(sext_b, MATCH_SEXT_B, MASK_SEXT_B) DECLARE_INSN(sext_h, MATCH_SEXT_H, MASK_SEXT_H) DECLARE_INSN(sfence_inval_ir, MATCH_SFENCE_INVAL_IR, MASK_SFENCE_INVAL_IR) @@ -4330,6 +4280,7 @@ DECLARE_INSN(sh2add, MATCH_SH2ADD, MASK_SH2ADD) DECLARE_INSN(sh2add_uw, MATCH_SH2ADD_UW, MASK_SH2ADD_UW) DECLARE_INSN(sh3add, MATCH_SH3ADD, MASK_SH3ADD) DECLARE_INSN(sh3add_uw, MATCH_SH3ADD_UW, MASK_SH3ADD_UW) +DECLARE_INSN(sh_rl, MATCH_SH_RL, MASK_SH_RL) DECLARE_INSN(sha256sig0, MATCH_SHA256SIG0, MASK_SHA256SIG0) DECLARE_INSN(sha256sig1, MATCH_SHA256SIG1, MASK_SHA256SIG1) DECLARE_INSN(sha256sum0, MATCH_SHA256SUM0, MASK_SHA256SUM0) @@ -4483,6 +4434,7 @@ DECLARE_INSN(sunpkd830, MATCH_SUNPKD830, MASK_SUNPKD830) DECLARE_INSN(sunpkd831, MATCH_SUNPKD831, MASK_SUNPKD831) DECLARE_INSN(sunpkd832, MATCH_SUNPKD832, MASK_SUNPKD832) DECLARE_INSN(sw, MATCH_SW, MASK_SW) +DECLARE_INSN(sw_rl, MATCH_SW_RL, MASK_SW_RL) DECLARE_INSN(uclip16, MATCH_UCLIP16, MASK_UCLIP16) DECLARE_INSN(uclip32, MATCH_UCLIP32, MASK_UCLIP32) DECLARE_INSN(uclip8, MATCH_UCLIP8, MASK_UCLIP8) @@ -4567,42 +4519,6 @@ DECLARE_INSN(vaesem_vv, MATCH_VAESEM_VV, MASK_VAESEM_VV) DECLARE_INSN(vaeskf1_vi, MATCH_VAESKF1_VI, MASK_VAESKF1_VI) DECLARE_INSN(vaeskf2_vi, MATCH_VAESKF2_VI, MASK_VAESKF2_VI) DECLARE_INSN(vaesz_vs, MATCH_VAESZ_VS, MASK_VAESZ_VS) -DECLARE_INSN(vamoaddei16_v, MATCH_VAMOADDEI16_V, MASK_VAMOADDEI16_V) -DECLARE_INSN(vamoaddei32_v, MATCH_VAMOADDEI32_V, MASK_VAMOADDEI32_V) -DECLARE_INSN(vamoaddei64_v, MATCH_VAMOADDEI64_V, MASK_VAMOADDEI64_V) -DECLARE_INSN(vamoaddei8_v, MATCH_VAMOADDEI8_V, MASK_VAMOADDEI8_V) -DECLARE_INSN(vamoandei16_v, MATCH_VAMOANDEI16_V, MASK_VAMOANDEI16_V) -DECLARE_INSN(vamoandei32_v, MATCH_VAMOANDEI32_V, MASK_VAMOANDEI32_V) -DECLARE_INSN(vamoandei64_v, MATCH_VAMOANDEI64_V, MASK_VAMOANDEI64_V) -DECLARE_INSN(vamoandei8_v, MATCH_VAMOANDEI8_V, MASK_VAMOANDEI8_V) -DECLARE_INSN(vamomaxei16_v, MATCH_VAMOMAXEI16_V, MASK_VAMOMAXEI16_V) -DECLARE_INSN(vamomaxei32_v, MATCH_VAMOMAXEI32_V, MASK_VAMOMAXEI32_V) -DECLARE_INSN(vamomaxei64_v, MATCH_VAMOMAXEI64_V, MASK_VAMOMAXEI64_V) -DECLARE_INSN(vamomaxei8_v, MATCH_VAMOMAXEI8_V, MASK_VAMOMAXEI8_V) -DECLARE_INSN(vamomaxuei16_v, MATCH_VAMOMAXUEI16_V, MASK_VAMOMAXUEI16_V) -DECLARE_INSN(vamomaxuei32_v, MATCH_VAMOMAXUEI32_V, MASK_VAMOMAXUEI32_V) -DECLARE_INSN(vamomaxuei64_v, MATCH_VAMOMAXUEI64_V, MASK_VAMOMAXUEI64_V) -DECLARE_INSN(vamomaxuei8_v, MATCH_VAMOMAXUEI8_V, MASK_VAMOMAXUEI8_V) -DECLARE_INSN(vamominei16_v, MATCH_VAMOMINEI16_V, MASK_VAMOMINEI16_V) -DECLARE_INSN(vamominei32_v, MATCH_VAMOMINEI32_V, MASK_VAMOMINEI32_V) -DECLARE_INSN(vamominei64_v, MATCH_VAMOMINEI64_V, MASK_VAMOMINEI64_V) -DECLARE_INSN(vamominei8_v, MATCH_VAMOMINEI8_V, MASK_VAMOMINEI8_V) -DECLARE_INSN(vamominuei16_v, MATCH_VAMOMINUEI16_V, MASK_VAMOMINUEI16_V) -DECLARE_INSN(vamominuei32_v, MATCH_VAMOMINUEI32_V, MASK_VAMOMINUEI32_V) -DECLARE_INSN(vamominuei64_v, MATCH_VAMOMINUEI64_V, MASK_VAMOMINUEI64_V) -DECLARE_INSN(vamominuei8_v, MATCH_VAMOMINUEI8_V, MASK_VAMOMINUEI8_V) -DECLARE_INSN(vamoorei16_v, MATCH_VAMOOREI16_V, MASK_VAMOOREI16_V) -DECLARE_INSN(vamoorei32_v, MATCH_VAMOOREI32_V, MASK_VAMOOREI32_V) -DECLARE_INSN(vamoorei64_v, MATCH_VAMOOREI64_V, MASK_VAMOOREI64_V) -DECLARE_INSN(vamoorei8_v, MATCH_VAMOOREI8_V, MASK_VAMOOREI8_V) -DECLARE_INSN(vamoswapei16_v, MATCH_VAMOSWAPEI16_V, MASK_VAMOSWAPEI16_V) -DECLARE_INSN(vamoswapei32_v, MATCH_VAMOSWAPEI32_V, MASK_VAMOSWAPEI32_V) -DECLARE_INSN(vamoswapei64_v, MATCH_VAMOSWAPEI64_V, MASK_VAMOSWAPEI64_V) -DECLARE_INSN(vamoswapei8_v, MATCH_VAMOSWAPEI8_V, MASK_VAMOSWAPEI8_V) -DECLARE_INSN(vamoxorei16_v, MATCH_VAMOXOREI16_V, MASK_VAMOXOREI16_V) -DECLARE_INSN(vamoxorei32_v, MATCH_VAMOXOREI32_V, MASK_VAMOXOREI32_V) -DECLARE_INSN(vamoxorei64_v, MATCH_VAMOXOREI64_V, MASK_VAMOXOREI64_V) -DECLARE_INSN(vamoxorei8_v, MATCH_VAMOXOREI8_V, MASK_VAMOXOREI8_V) DECLARE_INSN(vand_vi, MATCH_VAND_VI, MASK_VAND_VI) DECLARE_INSN(vand_vv, MATCH_VAND_VV, MASK_VAND_VV) DECLARE_INSN(vand_vx, MATCH_VAND_VX, MASK_VAND_VX) diff --git a/riscv/insns/lb_aq.h b/riscv/insns/lb_aq.h new file mode 100644 index 0000000000..84423de430 --- /dev/null +++ b/riscv/insns/lb_aq.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZALASR); +WRITE_RD(MMU.load(RS1)); diff --git a/riscv/insns/ld_aq.h b/riscv/insns/ld_aq.h new file mode 100644 index 0000000000..3c5bae6a1d --- /dev/null +++ b/riscv/insns/ld_aq.h @@ -0,0 +1,3 @@ +require_rv64; +require_extension(EXT_ZALASR); +WRITE_RD(MMU.load(RS1)); diff --git a/riscv/insns/lh_aq.h b/riscv/insns/lh_aq.h new file mode 100644 index 0000000000..e018503315 --- /dev/null +++ b/riscv/insns/lh_aq.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZALASR); +WRITE_RD(MMU.load(RS1)); diff --git a/riscv/insns/lw_aq.h b/riscv/insns/lw_aq.h new file mode 100644 index 0000000000..88917b5183 --- /dev/null +++ b/riscv/insns/lw_aq.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZALASR); +WRITE_RD(MMU.load(RS1)); diff --git a/riscv/insns/sb_rl.h b/riscv/insns/sb_rl.h new file mode 100644 index 0000000000..91d4d25e91 --- /dev/null +++ b/riscv/insns/sb_rl.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZALASR); +MMU.store(RS1, RS2); diff --git a/riscv/insns/sd_rl.h b/riscv/insns/sd_rl.h new file mode 100644 index 0000000000..980844cba0 --- /dev/null +++ b/riscv/insns/sd_rl.h @@ -0,0 +1,3 @@ +require_rv64; +require_extension(EXT_ZALASR); +MMU.store(RS1, RS2); diff --git a/riscv/insns/sh_rl.h b/riscv/insns/sh_rl.h new file mode 100644 index 0000000000..bd81cf1556 --- /dev/null +++ b/riscv/insns/sh_rl.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZALASR); +MMU.store(RS1, RS2); diff --git a/riscv/insns/sw_rl.h b/riscv/insns/sw_rl.h new file mode 100644 index 0000000000..e97f6269a1 --- /dev/null +++ b/riscv/insns/sw_rl.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZALASR); +MMU.store(RS1, RS2); diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index af4f9254fb..fac418642a 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -81,6 +81,7 @@ typedef enum { EXT_SSCSRIND, EXT_SMCNTRPMF, EXT_ZCMOP, + EXT_ZALASR, NUM_ISA_EXTENSIONS } isa_extension_t; diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index ecf4fa0166..f0c55aa716 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1300,6 +1300,16 @@ riscv_insn_ext_zacas = \ amocas_d \ $(if $(HAVE_INT128),amocas_q) +riscv_insn_ext_zalasr = \ + lb_aq \ + lh_aq \ + lw_aq \ + ld_aq \ + sb_rl \ + sh_rl \ + sw_rl \ + sd_rl \ + riscv_insn_ext_zvbb = \ vandn_vv \ vandn_vx \ @@ -1384,6 +1394,7 @@ riscv_insn_list = \ $(riscv_insn_ext_q) \ $(riscv_insn_ext_q_zfa) \ $(riscv_insn_ext_zacas) \ + $(riscv_insn_ext_zalasr) \ $(riscv_insn_ext_zce) \ $(riscv_insn_ext_zfh) \ $(riscv_insn_ext_zfh_zfa) \ From 177ecae3656acc8d66d0c50687cd92fad169e8cd Mon Sep 17 00:00:00 2001 From: brs Date: Thu, 19 Oct 2023 10:55:40 -0500 Subject: [PATCH 115/127] Update to Zalasr encodings to require the aq/rl bits be set rather than assuming they are --- riscv/encoding.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/riscv/encoding.h b/riscv/encoding.h index 1a0f5549b3..7ee3e3d09e 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (4f28129) + * https://github.com/riscv/riscv-opcodes (65c40e9) */ #ifndef RISCV_CSR_ENCODING_H @@ -1386,20 +1386,20 @@ #define MASK_KWMMUL_U 0xfe00707f #define MATCH_LB 0x3 #define MASK_LB 0x707f -#define MATCH_LB_AQ 0x3000002f -#define MASK_LB_AQ 0xf9f0707f +#define MATCH_LB_AQ 0x3400002f +#define MASK_LB_AQ 0xfdf0707f #define MATCH_LBU 0x4003 #define MASK_LBU 0x707f #define MATCH_LD 0x3003 #define MASK_LD 0x707f -#define MATCH_LD_AQ 0x3000302f -#define MASK_LD_AQ 0xf9f0707f +#define MATCH_LD_AQ 0x3400302f +#define MASK_LD_AQ 0xfdf0707f #define MATCH_LDU 0x7003 #define MASK_LDU 0x707f #define MATCH_LH 0x1003 #define MASK_LH 0x707f -#define MATCH_LH_AQ 0x3000102f -#define MASK_LH_AQ 0xf9f0707f +#define MATCH_LH_AQ 0x3400102f +#define MASK_LH_AQ 0xfdf0707f #define MATCH_LHU 0x5003 #define MASK_LHU 0x707f #define MATCH_LQ 0x300f @@ -1412,8 +1412,8 @@ #define MASK_LUI 0x7f #define MATCH_LW 0x2003 #define MASK_LW 0x707f -#define MATCH_LW_AQ 0x3000202f -#define MASK_LW_AQ 0xf9f0707f +#define MATCH_LW_AQ 0x3400202f +#define MASK_LW_AQ 0xfdf0707f #define MATCH_LWU 0x6003 #define MASK_LWU 0x707f #define MATCH_MADDR32 0xc4001077 @@ -1628,8 +1628,8 @@ #define MASK_RSUBW 0xfe00707f #define MATCH_SB 0x23 #define MASK_SB 0x707f -#define MATCH_SB_RL 0x3800002f -#define MASK_SB_RL 0xf8007fff +#define MATCH_SB_RL 0x3a00002f +#define MASK_SB_RL 0xfa007fff #define MATCH_SC_D 0x1800302f #define MASK_SC_D 0xf800707f #define MATCH_SC_W 0x1800202f @@ -1650,8 +1650,8 @@ #define MASK_SCMPLT8 0xfe00707f #define MATCH_SD 0x3023 #define MASK_SD 0x707f -#define MATCH_SD_RL 0x3800302f -#define MASK_SD_RL 0xf8007fff +#define MATCH_SD_RL 0x3a00302f +#define MASK_SD_RL 0xfa007fff #define MATCH_SEXT_B 0x60401013 #define MASK_SEXT_B 0xfff0707f #define MATCH_SEXT_H 0x60501013 @@ -1676,8 +1676,8 @@ #define MASK_SH3ADD 0xfe00707f #define MATCH_SH3ADD_UW 0x2000603b #define MASK_SH3ADD_UW 0xfe00707f -#define MATCH_SH_RL 0x3800102f -#define MASK_SH_RL 0xf8007fff +#define MATCH_SH_RL 0x3a00102f +#define MASK_SH_RL 0xfa007fff #define MATCH_SHA256SIG0 0x10201013 #define MASK_SHA256SIG0 0xfff0707f #define MATCH_SHA256SIG1 0x10301013 @@ -1984,8 +1984,8 @@ #define MASK_SUNPKD832 0xfff0707f #define MATCH_SW 0x2023 #define MASK_SW 0x707f -#define MATCH_SW_RL 0x3800202f -#define MASK_SW_RL 0xf8007fff +#define MATCH_SW_RL 0x3a00202f +#define MASK_SW_RL 0xfa007fff #define MATCH_UCLIP16 0x85000077 #define MASK_UCLIP16 0xff00707f #define MATCH_UCLIP32 0xf4000077 From 37e00ffb6e9544bda894a16456955a9ef88c87fd Mon Sep 17 00:00:00 2001 From: brs Date: Thu, 19 Oct 2023 11:12:02 -0500 Subject: [PATCH 116/127] Update README on adding new instructions to point out that they need to be added to riscv.mk.in --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e850b6e459..2c5bba9145 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,10 @@ Adding an instruction to the simulator requires two steps: $ make install ``` - 3. Rebuild the simulator. + 3. Add the instruction to riscv/riscv.mk.in. Otherwise, the instruction + will not be included in the build and will be treated as an illegal instruction. + + 4. Rebuild the simulator. Interactive Debug Mode --------------------------- From 434138e109107d8eb3144a409ac7cb29c2f7048a Mon Sep 17 00:00:00 2001 From: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:16:34 -0700 Subject: [PATCH 117/127] Fixing minor typo Gallois -> Galois Signed-off-by: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com> --- riscv/zvk_ext_macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index bf893f9f12..75aa56aed7 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -29,7 +29,7 @@ require_extension(EXT_ZVBC); \ } while (0) -// Ensures that the ZVKG extension (vector Gallois Field Multiplication) +// Ensures that the ZVKG extension (vector Galois Field Multiplication) // is present, and the vector unit is enabled and in a valid state. #define require_zvkg \ do { \ From 08bad17b04320631f2e60af8ea07e04e2af613ac Mon Sep 17 00:00:00 2001 From: Ved Shanbhogue Date: Mon, 30 Oct 2023 17:21:40 -0500 Subject: [PATCH 118/127] add byte width amo instructions --- riscv/insns/amoadd_b.h | 3 +++ riscv/insns/amoand_b.h | 3 +++ riscv/insns/amocas_b.h | 4 ++++ riscv/insns/amomax_b.h | 3 +++ riscv/insns/amomaxu_b.h | 3 +++ riscv/insns/amomin_b.h | 3 +++ riscv/insns/amominu_b.h | 3 +++ riscv/insns/amoor_b.h | 3 +++ riscv/insns/amoswap_b.h | 3 +++ riscv/insns/amoxor_b.h | 3 +++ 10 files changed, 31 insertions(+) create mode 100644 riscv/insns/amoadd_b.h create mode 100644 riscv/insns/amoand_b.h create mode 100644 riscv/insns/amocas_b.h create mode 100644 riscv/insns/amomax_b.h create mode 100644 riscv/insns/amomaxu_b.h create mode 100644 riscv/insns/amomin_b.h create mode 100644 riscv/insns/amominu_b.h create mode 100644 riscv/insns/amoor_b.h create mode 100644 riscv/insns/amoswap_b.h create mode 100644 riscv/insns/amoxor_b.h diff --git a/riscv/insns/amoadd_b.h b/riscv/insns/amoadd_b.h new file mode 100644 index 0000000000..2138104999 --- /dev/null +++ b/riscv/insns/amoadd_b.h @@ -0,0 +1,3 @@ +require_extension('A'); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return lhs + RS2; }))); diff --git a/riscv/insns/amoand_b.h b/riscv/insns/amoand_b.h new file mode 100644 index 0000000000..f461c292b9 --- /dev/null +++ b/riscv/insns/amoand_b.h @@ -0,0 +1,3 @@ +require_extension('A'); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return lhs & RS2; }))); diff --git a/riscv/insns/amocas_b.h b/riscv/insns/amocas_b.h new file mode 100644 index 0000000000..ca609c7241 --- /dev/null +++ b/riscv/insns/amocas_b.h @@ -0,0 +1,4 @@ +require_extension('A'); +require_extension(EXT_ZACAS); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo_compare_and_swap(RS1, RD, RS2))); diff --git a/riscv/insns/amomax_b.h b/riscv/insns/amomax_b.h new file mode 100644 index 0000000000..8187a3cca9 --- /dev/null +++ b/riscv/insns/amomax_b.h @@ -0,0 +1,3 @@ +require_extension('A'); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](int8_t lhs) { return std::max(lhs, int8_t(RS2)); }))); diff --git a/riscv/insns/amomaxu_b.h b/riscv/insns/amomaxu_b.h new file mode 100644 index 0000000000..534b3ca9ef --- /dev/null +++ b/riscv/insns/amomaxu_b.h @@ -0,0 +1,3 @@ +require_extension('A'); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return std::max(lhs, uint8_t(RS2)); }))); diff --git a/riscv/insns/amomin_b.h b/riscv/insns/amomin_b.h new file mode 100644 index 0000000000..c5e8cf971e --- /dev/null +++ b/riscv/insns/amomin_b.h @@ -0,0 +1,3 @@ +require_extension('A'); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](int8_t lhs) { return std::min(lhs, int8_t(RS2)); }))); diff --git a/riscv/insns/amominu_b.h b/riscv/insns/amominu_b.h new file mode 100644 index 0000000000..9bce0e7573 --- /dev/null +++ b/riscv/insns/amominu_b.h @@ -0,0 +1,3 @@ +require_extension('A'); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return std::min(lhs, uint8_t(RS2)); }))); diff --git a/riscv/insns/amoor_b.h b/riscv/insns/amoor_b.h new file mode 100644 index 0000000000..f96ff540de --- /dev/null +++ b/riscv/insns/amoor_b.h @@ -0,0 +1,3 @@ +require_extension('A'); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return lhs | RS2; }))); diff --git a/riscv/insns/amoswap_b.h b/riscv/insns/amoswap_b.h new file mode 100644 index 0000000000..5ecbd26e2b --- /dev/null +++ b/riscv/insns/amoswap_b.h @@ -0,0 +1,3 @@ +require_extension('A'); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t UNUSED lhs) { return RS2; }))); diff --git a/riscv/insns/amoxor_b.h b/riscv/insns/amoxor_b.h new file mode 100644 index 0000000000..1966bd4a53 --- /dev/null +++ b/riscv/insns/amoxor_b.h @@ -0,0 +1,3 @@ +require_extension('A'); +require_extension(EXT_ZABHA); +WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return lhs ^ RS2; }))); From 8dda7a208d290fd8e035aabd5ae6d42ff0e39a8f Mon Sep 17 00:00:00 2001 From: Ved Shanbhogue Date: Mon, 30 Oct 2023 17:21:54 -0500 Subject: [PATCH 119/127] add halfword width amo instructions --- riscv/insns/amoadd_b.h | 3 +-- riscv/insns/amoadd_h.h | 2 ++ riscv/insns/amoand_b.h | 3 +-- riscv/insns/amoand_h.h | 2 ++ riscv/insns/amocas_b.h | 3 +-- riscv/insns/amocas_h.h | 3 +++ riscv/insns/amomax_b.h | 3 +-- riscv/insns/amomax_h.h | 2 ++ riscv/insns/amomaxu_b.h | 3 +-- riscv/insns/amomaxu_h.h | 2 ++ riscv/insns/amomin_b.h | 3 +-- riscv/insns/amomin_h.h | 2 ++ riscv/insns/amominu_b.h | 3 +-- riscv/insns/amominu_h.h | 2 ++ riscv/insns/amoor_b.h | 3 +-- riscv/insns/amoor_h.h | 2 ++ riscv/insns/amoswap_b.h | 3 +-- riscv/insns/amoswap_h.h | 2 ++ riscv/insns/amoxor_b.h | 3 +-- riscv/insns/amoxor_h.h | 2 ++ 20 files changed, 31 insertions(+), 20 deletions(-) create mode 100644 riscv/insns/amoadd_h.h create mode 100644 riscv/insns/amoand_h.h create mode 100644 riscv/insns/amocas_h.h create mode 100644 riscv/insns/amomax_h.h create mode 100644 riscv/insns/amomaxu_h.h create mode 100644 riscv/insns/amomin_h.h create mode 100644 riscv/insns/amominu_h.h create mode 100644 riscv/insns/amoor_h.h create mode 100644 riscv/insns/amoswap_h.h create mode 100644 riscv/insns/amoxor_h.h diff --git a/riscv/insns/amoadd_b.h b/riscv/insns/amoadd_b.h index 2138104999..ce6748863d 100644 --- a/riscv/insns/amoadd_b.h +++ b/riscv/insns/amoadd_b.h @@ -1,3 +1,2 @@ -require_extension('A'); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return lhs + RS2; }))); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int8_t lhs) { return lhs + RS2; }))); diff --git a/riscv/insns/amoadd_h.h b/riscv/insns/amoadd_h.h new file mode 100644 index 0000000000..93d2209fb6 --- /dev/null +++ b/riscv/insns/amoadd_h.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int16_t lhs) { return lhs + RS2; }))); diff --git a/riscv/insns/amoand_b.h b/riscv/insns/amoand_b.h index f461c292b9..f103888695 100644 --- a/riscv/insns/amoand_b.h +++ b/riscv/insns/amoand_b.h @@ -1,3 +1,2 @@ -require_extension('A'); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return lhs & RS2; }))); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int8_t lhs) { return lhs & RS2; }))); diff --git a/riscv/insns/amoand_h.h b/riscv/insns/amoand_h.h new file mode 100644 index 0000000000..7034118e60 --- /dev/null +++ b/riscv/insns/amoand_h.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int16_t lhs) { return lhs & RS2; }))); diff --git a/riscv/insns/amocas_b.h b/riscv/insns/amocas_b.h index ca609c7241..54ba824359 100644 --- a/riscv/insns/amocas_b.h +++ b/riscv/insns/amocas_b.h @@ -1,4 +1,3 @@ -require_extension('A'); require_extension(EXT_ZACAS); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo_compare_and_swap(RS1, RD, RS2))); +WRITE_RD(sreg_t(MMU.amo_compare_and_swap(RS1, RD, RS2))); diff --git a/riscv/insns/amocas_h.h b/riscv/insns/amocas_h.h new file mode 100644 index 0000000000..064d041960 --- /dev/null +++ b/riscv/insns/amocas_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZACAS); +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo_compare_and_swap(RS1, RD, RS2))); diff --git a/riscv/insns/amomax_b.h b/riscv/insns/amomax_b.h index 8187a3cca9..84df51af25 100644 --- a/riscv/insns/amomax_b.h +++ b/riscv/insns/amomax_b.h @@ -1,3 +1,2 @@ -require_extension('A'); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](int8_t lhs) { return std::max(lhs, int8_t(RS2)); }))); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int8_t lhs) { return std::max(lhs, int8_t(RS2)); }))); diff --git a/riscv/insns/amomax_h.h b/riscv/insns/amomax_h.h new file mode 100644 index 0000000000..d91fe1986f --- /dev/null +++ b/riscv/insns/amomax_h.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int16_t lhs) { return std::max(lhs, int16_t(RS2)); }))); diff --git a/riscv/insns/amomaxu_b.h b/riscv/insns/amomaxu_b.h index 534b3ca9ef..d56b98ea90 100644 --- a/riscv/insns/amomaxu_b.h +++ b/riscv/insns/amomaxu_b.h @@ -1,3 +1,2 @@ -require_extension('A'); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return std::max(lhs, uint8_t(RS2)); }))); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](uint8_t lhs) { return std::max(lhs, uint8_t(RS2)); }))); diff --git a/riscv/insns/amomaxu_h.h b/riscv/insns/amomaxu_h.h new file mode 100644 index 0000000000..76cfa779fd --- /dev/null +++ b/riscv/insns/amomaxu_h.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](uint16_t lhs) { return std::max(lhs, uint16_t(RS2)); }))); diff --git a/riscv/insns/amomin_b.h b/riscv/insns/amomin_b.h index c5e8cf971e..9b781e49f1 100644 --- a/riscv/insns/amomin_b.h +++ b/riscv/insns/amomin_b.h @@ -1,3 +1,2 @@ -require_extension('A'); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](int8_t lhs) { return std::min(lhs, int8_t(RS2)); }))); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int8_t lhs) { return std::min(lhs, int8_t(RS2)); }))); diff --git a/riscv/insns/amomin_h.h b/riscv/insns/amomin_h.h new file mode 100644 index 0000000000..4405ac3e8c --- /dev/null +++ b/riscv/insns/amomin_h.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int16_t lhs) { return std::min(lhs, int16_t(RS2)); }))); diff --git a/riscv/insns/amominu_b.h b/riscv/insns/amominu_b.h index 9bce0e7573..7e12bf3c58 100644 --- a/riscv/insns/amominu_b.h +++ b/riscv/insns/amominu_b.h @@ -1,3 +1,2 @@ -require_extension('A'); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return std::min(lhs, uint8_t(RS2)); }))); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](uint8_t lhs) { return std::min(lhs, uint8_t(RS2)); }))); diff --git a/riscv/insns/amominu_h.h b/riscv/insns/amominu_h.h new file mode 100644 index 0000000000..60226fb4e8 --- /dev/null +++ b/riscv/insns/amominu_h.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](uint16_t lhs) { return std::min(lhs, uint16_t(RS2)); }))); diff --git a/riscv/insns/amoor_b.h b/riscv/insns/amoor_b.h index f96ff540de..3048ee9023 100644 --- a/riscv/insns/amoor_b.h +++ b/riscv/insns/amoor_b.h @@ -1,3 +1,2 @@ -require_extension('A'); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return lhs | RS2; }))); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int8_t lhs) { return lhs | RS2; }))); diff --git a/riscv/insns/amoor_h.h b/riscv/insns/amoor_h.h new file mode 100644 index 0000000000..1e71a51091 --- /dev/null +++ b/riscv/insns/amoor_h.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int16_t lhs) { return lhs | RS2; }))); diff --git a/riscv/insns/amoswap_b.h b/riscv/insns/amoswap_b.h index 5ecbd26e2b..54c9e6ebf2 100644 --- a/riscv/insns/amoswap_b.h +++ b/riscv/insns/amoswap_b.h @@ -1,3 +1,2 @@ -require_extension('A'); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t UNUSED lhs) { return RS2; }))); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int8_t UNUSED lhs) { return RS2; }))); diff --git a/riscv/insns/amoswap_h.h b/riscv/insns/amoswap_h.h new file mode 100644 index 0000000000..0c1a8ff20c --- /dev/null +++ b/riscv/insns/amoswap_h.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int16_t UNUSED lhs) { return RS2; }))); diff --git a/riscv/insns/amoxor_b.h b/riscv/insns/amoxor_b.h index 1966bd4a53..dbaf591771 100644 --- a/riscv/insns/amoxor_b.h +++ b/riscv/insns/amoxor_b.h @@ -1,3 +1,2 @@ -require_extension('A'); require_extension(EXT_ZABHA); -WRITE_RD((sreg_t)(int8_t)(MMU.amo(RS1, [&](uint8_t lhs) { return lhs ^ RS2; }))); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int8_t lhs) { return lhs ^ RS2; }))); diff --git a/riscv/insns/amoxor_h.h b/riscv/insns/amoxor_h.h new file mode 100644 index 0000000000..110acf7e73 --- /dev/null +++ b/riscv/insns/amoxor_h.h @@ -0,0 +1,2 @@ +require_extension(EXT_ZABHA); +WRITE_RD(sreg_t(MMU.amo(RS1, [&](int16_t lhs) { return lhs ^ RS2; }))); From b96bf5565470a84b55976d50f07c5a42df633896 Mon Sep 17 00:00:00 2001 From: Ved Shanbhogue Date: Mon, 30 Oct 2023 17:22:32 -0500 Subject: [PATCH 120/127] Add encodings for Zabha instructions --- riscv/encoding.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/riscv/encoding.h b/riscv/encoding.h index 7ee3e3d09e..81d829cf15 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (65c40e9) + * https://github.com/riscv/riscv-opcodes (37413c8) */ #ifndef RISCV_CSR_ENCODING_H @@ -426,46 +426,86 @@ #define MASK_AES64KS1I 0xff00707f #define MATCH_AES64KS2 0x7e000033 #define MASK_AES64KS2 0xfe00707f +#define MATCH_AMOADD_B 0x2f +#define MASK_AMOADD_B 0xf800707f #define MATCH_AMOADD_D 0x302f #define MASK_AMOADD_D 0xf800707f +#define MATCH_AMOADD_H 0x102f +#define MASK_AMOADD_H 0xf800707f #define MATCH_AMOADD_W 0x202f #define MASK_AMOADD_W 0xf800707f +#define MATCH_AMOAND_B 0x6000002f +#define MASK_AMOAND_B 0xf800707f #define MATCH_AMOAND_D 0x6000302f #define MASK_AMOAND_D 0xf800707f +#define MATCH_AMOAND_H 0x6000102f +#define MASK_AMOAND_H 0xf800707f #define MATCH_AMOAND_W 0x6000202f #define MASK_AMOAND_W 0xf800707f +#define MATCH_AMOCAS_B 0x2800002f +#define MASK_AMOCAS_B 0xf800707f #define MATCH_AMOCAS_D 0x2800302f #define MASK_AMOCAS_D 0xf800707f +#define MATCH_AMOCAS_H 0x2800102f +#define MASK_AMOCAS_H 0xf800707f #define MATCH_AMOCAS_Q 0x2800402f #define MASK_AMOCAS_Q 0xf800707f #define MATCH_AMOCAS_W 0x2800202f #define MASK_AMOCAS_W 0xf800707f +#define MATCH_AMOMAX_B 0xa000002f +#define MASK_AMOMAX_B 0xf800707f #define MATCH_AMOMAX_D 0xa000302f #define MASK_AMOMAX_D 0xf800707f +#define MATCH_AMOMAX_H 0xa000102f +#define MASK_AMOMAX_H 0xf800707f #define MATCH_AMOMAX_W 0xa000202f #define MASK_AMOMAX_W 0xf800707f +#define MATCH_AMOMAXU_B 0xe000002f +#define MASK_AMOMAXU_B 0xf800707f #define MATCH_AMOMAXU_D 0xe000302f #define MASK_AMOMAXU_D 0xf800707f +#define MATCH_AMOMAXU_H 0xe000102f +#define MASK_AMOMAXU_H 0xf800707f #define MATCH_AMOMAXU_W 0xe000202f #define MASK_AMOMAXU_W 0xf800707f +#define MATCH_AMOMIN_B 0x8000002f +#define MASK_AMOMIN_B 0xf800707f #define MATCH_AMOMIN_D 0x8000302f #define MASK_AMOMIN_D 0xf800707f +#define MATCH_AMOMIN_H 0x8000102f +#define MASK_AMOMIN_H 0xf800707f #define MATCH_AMOMIN_W 0x8000202f #define MASK_AMOMIN_W 0xf800707f +#define MATCH_AMOMINU_B 0xc000002f +#define MASK_AMOMINU_B 0xf800707f #define MATCH_AMOMINU_D 0xc000302f #define MASK_AMOMINU_D 0xf800707f +#define MATCH_AMOMINU_H 0xc000102f +#define MASK_AMOMINU_H 0xf800707f #define MATCH_AMOMINU_W 0xc000202f #define MASK_AMOMINU_W 0xf800707f +#define MATCH_AMOOR_B 0x4000002f +#define MASK_AMOOR_B 0xf800707f #define MATCH_AMOOR_D 0x4000302f #define MASK_AMOOR_D 0xf800707f +#define MATCH_AMOOR_H 0x4000102f +#define MASK_AMOOR_H 0xf800707f #define MATCH_AMOOR_W 0x4000202f #define MASK_AMOOR_W 0xf800707f +#define MATCH_AMOSWAP_B 0x800002f +#define MASK_AMOSWAP_B 0xf800707f #define MATCH_AMOSWAP_D 0x800302f #define MASK_AMOSWAP_D 0xf800707f +#define MATCH_AMOSWAP_H 0x800102f +#define MASK_AMOSWAP_H 0xf800707f #define MATCH_AMOSWAP_W 0x800202f #define MASK_AMOSWAP_W 0xf800707f +#define MATCH_AMOXOR_B 0x2000002f +#define MASK_AMOXOR_B 0xf800707f #define MATCH_AMOXOR_D 0x2000302f #define MASK_AMOXOR_D 0xf800707f +#define MATCH_AMOXOR_H 0x2000102f +#define MASK_AMOXOR_H 0xf800707f #define MATCH_AMOXOR_W 0x2000202f #define MASK_AMOXOR_W 0xf800707f #define MATCH_AND 0x7033 @@ -3655,26 +3695,46 @@ DECLARE_INSN(aes64esm, MATCH_AES64ESM, MASK_AES64ESM) DECLARE_INSN(aes64im, MATCH_AES64IM, MASK_AES64IM) DECLARE_INSN(aes64ks1i, MATCH_AES64KS1I, MASK_AES64KS1I) DECLARE_INSN(aes64ks2, MATCH_AES64KS2, MASK_AES64KS2) +DECLARE_INSN(amoadd_b, MATCH_AMOADD_B, MASK_AMOADD_B) DECLARE_INSN(amoadd_d, MATCH_AMOADD_D, MASK_AMOADD_D) +DECLARE_INSN(amoadd_h, MATCH_AMOADD_H, MASK_AMOADD_H) DECLARE_INSN(amoadd_w, MATCH_AMOADD_W, MASK_AMOADD_W) +DECLARE_INSN(amoand_b, MATCH_AMOAND_B, MASK_AMOAND_B) DECLARE_INSN(amoand_d, MATCH_AMOAND_D, MASK_AMOAND_D) +DECLARE_INSN(amoand_h, MATCH_AMOAND_H, MASK_AMOAND_H) DECLARE_INSN(amoand_w, MATCH_AMOAND_W, MASK_AMOAND_W) +DECLARE_INSN(amocas_b, MATCH_AMOCAS_B, MASK_AMOCAS_B) DECLARE_INSN(amocas_d, MATCH_AMOCAS_D, MASK_AMOCAS_D) +DECLARE_INSN(amocas_h, MATCH_AMOCAS_H, MASK_AMOCAS_H) DECLARE_INSN(amocas_q, MATCH_AMOCAS_Q, MASK_AMOCAS_Q) DECLARE_INSN(amocas_w, MATCH_AMOCAS_W, MASK_AMOCAS_W) +DECLARE_INSN(amomax_b, MATCH_AMOMAX_B, MASK_AMOMAX_B) DECLARE_INSN(amomax_d, MATCH_AMOMAX_D, MASK_AMOMAX_D) +DECLARE_INSN(amomax_h, MATCH_AMOMAX_H, MASK_AMOMAX_H) DECLARE_INSN(amomax_w, MATCH_AMOMAX_W, MASK_AMOMAX_W) +DECLARE_INSN(amomaxu_b, MATCH_AMOMAXU_B, MASK_AMOMAXU_B) DECLARE_INSN(amomaxu_d, MATCH_AMOMAXU_D, MASK_AMOMAXU_D) +DECLARE_INSN(amomaxu_h, MATCH_AMOMAXU_H, MASK_AMOMAXU_H) DECLARE_INSN(amomaxu_w, MATCH_AMOMAXU_W, MASK_AMOMAXU_W) +DECLARE_INSN(amomin_b, MATCH_AMOMIN_B, MASK_AMOMIN_B) DECLARE_INSN(amomin_d, MATCH_AMOMIN_D, MASK_AMOMIN_D) +DECLARE_INSN(amomin_h, MATCH_AMOMIN_H, MASK_AMOMIN_H) DECLARE_INSN(amomin_w, MATCH_AMOMIN_W, MASK_AMOMIN_W) +DECLARE_INSN(amominu_b, MATCH_AMOMINU_B, MASK_AMOMINU_B) DECLARE_INSN(amominu_d, MATCH_AMOMINU_D, MASK_AMOMINU_D) +DECLARE_INSN(amominu_h, MATCH_AMOMINU_H, MASK_AMOMINU_H) DECLARE_INSN(amominu_w, MATCH_AMOMINU_W, MASK_AMOMINU_W) +DECLARE_INSN(amoor_b, MATCH_AMOOR_B, MASK_AMOOR_B) DECLARE_INSN(amoor_d, MATCH_AMOOR_D, MASK_AMOOR_D) +DECLARE_INSN(amoor_h, MATCH_AMOOR_H, MASK_AMOOR_H) DECLARE_INSN(amoor_w, MATCH_AMOOR_W, MASK_AMOOR_W) +DECLARE_INSN(amoswap_b, MATCH_AMOSWAP_B, MASK_AMOSWAP_B) DECLARE_INSN(amoswap_d, MATCH_AMOSWAP_D, MASK_AMOSWAP_D) +DECLARE_INSN(amoswap_h, MATCH_AMOSWAP_H, MASK_AMOSWAP_H) DECLARE_INSN(amoswap_w, MATCH_AMOSWAP_W, MASK_AMOSWAP_W) +DECLARE_INSN(amoxor_b, MATCH_AMOXOR_B, MASK_AMOXOR_B) DECLARE_INSN(amoxor_d, MATCH_AMOXOR_D, MASK_AMOXOR_D) +DECLARE_INSN(amoxor_h, MATCH_AMOXOR_H, MASK_AMOXOR_H) DECLARE_INSN(amoxor_w, MATCH_AMOXOR_W, MASK_AMOXOR_W) DECLARE_INSN(and, MATCH_AND, MASK_AND) DECLARE_INSN(andi, MATCH_ANDI, MASK_ANDI) From 2adfab25a5c206afdd7c06855cbe2e27536d633e Mon Sep 17 00:00:00 2001 From: Ved Shanbhogue Date: Mon, 30 Oct 2023 17:23:22 -0500 Subject: [PATCH 121/127] Add enum for Zabha extension --- riscv/isa_parser.h | 1 + 1 file changed, 1 insertion(+) diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index fac418642a..a84b6fe8d7 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -76,6 +76,7 @@ typedef enum { EXT_XZBT, EXT_SSTC, EXT_ZACAS, + EXT_ZABHA, EXT_INTERNAL_ZFH_MOVE, EXT_SMCSRIND, EXT_SSCSRIND, From 04cbaf0c1c609506b8fc74e756305e0d792fa784 Mon Sep 17 00:00:00 2001 From: Ved Shanbhogue Date: Mon, 30 Oct 2023 17:24:43 -0500 Subject: [PATCH 122/127] Add Zabha instructions to disasm --- disasm/disasm.cc | 23 +++++++++++++++++++++++ disasm/isa_parser.cc | 6 ++++++ 2 files changed, 29 insertions(+) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index df02e03242..6917fa555b 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -840,6 +840,29 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DEFINE_XAMO(amocas_q) } + if (isa->extension_enabled(EXT_ZABHA)) { + DEFINE_XAMO(amoadd_b) + DEFINE_XAMO(amoswap_b) + DEFINE_XAMO(amoand_b) + DEFINE_XAMO(amoor_b) + DEFINE_XAMO(amoxor_b) + DEFINE_XAMO(amomin_b) + DEFINE_XAMO(amomax_b) + DEFINE_XAMO(amominu_b) + DEFINE_XAMO(amomaxu_b) + DEFINE_XAMO(amocas_b) + DEFINE_XAMO(amoadd_h) + DEFINE_XAMO(amoswap_h) + DEFINE_XAMO(amoand_h) + DEFINE_XAMO(amoor_h) + DEFINE_XAMO(amoxor_h) + DEFINE_XAMO(amomin_h) + DEFINE_XAMO(amomax_h) + DEFINE_XAMO(amominu_h) + DEFINE_XAMO(amomaxu_h) + DEFINE_XAMO(amocas_h) + } + add_insn(new disasm_insn_t("j", match_jal, mask_jal | mask_rd, {&jump_target})); add_insn(new disasm_insn_t("jal", match_jal | match_rd_ra, mask_jal | mask_rd, {&jump_target})); add_insn(new disasm_insn_t("jal", match_jal, mask_jal, {&xrd, &jump_target})); diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index 563f687416..ef513108d1 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -121,6 +121,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) // HINTs encoded in base-ISA instructions are always present. } else if (ext_str == "zacas") { extension_table[EXT_ZACAS] = true; + } else if (ext_str == "zabha") { + extension_table[EXT_ZABHA] = true; } else if (ext_str == "zmmul") { extension_table[EXT_ZMMUL] = true; } else if (ext_str == "zba") { @@ -357,6 +359,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) bad_isa_string(str, "'Zacas' extension requires 'A' extension"); } + if (extension_table[EXT_ZABHA] && !extension_table['A']) { + bad_isa_string(str, "'Zabha' extension requires 'A' extension"); + } + // Zpn conflicts with Zvknha/Zvknhb in both rv32 and rv64 if (extension_table[EXT_ZPN] && (extension_table[EXT_ZVKNHA] || extension_table[EXT_ZVKNHB])) { bad_isa_string(str, "'Zvkna' and 'Zvknhb' extensions are incompatible with 'Zpn' extension"); From 5a984591563bb3bbb54ccf0e82a2b732b650da75 Mon Sep 17 00:00:00 2001 From: Ved Shanbhogue Date: Mon, 30 Oct 2023 17:25:06 -0500 Subject: [PATCH 123/127] Add Zabha instructions to make --- riscv/riscv.mk.in | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index f0c55aa716..76c2ed7c05 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1300,6 +1300,28 @@ riscv_insn_ext_zacas = \ amocas_d \ $(if $(HAVE_INT128),amocas_q) +riscv_insn_ext_zabha = \ + amoadd_b \ + amoand_b \ + amomax_b \ + amomaxu_b \ + amomin_b \ + amominu_b \ + amoor_b \ + amoswap_b \ + amoxor_b \ + amocas_b \ + amoadd_h \ + amoand_h \ + amomax_h \ + amomaxu_h \ + amomin_h \ + amominu_h \ + amoor_h \ + amoswap_h \ + amoxor_h \ + amocas_h \ + riscv_insn_ext_zalasr = \ lb_aq \ lh_aq \ @@ -1394,6 +1416,7 @@ riscv_insn_list = \ $(riscv_insn_ext_q) \ $(riscv_insn_ext_q_zfa) \ $(riscv_insn_ext_zacas) \ + $(riscv_insn_ext_zabha) \ $(riscv_insn_ext_zalasr) \ $(riscv_insn_ext_zce) \ $(riscv_insn_ext_zfh) \ From 5bdb9d4d7e8b8e113606446fe64ab7af9c17850a Mon Sep 17 00:00:00 2001 From: Karthik B K Date: Sat, 4 Nov 2023 15:55:21 +0530 Subject: [PATCH 124/127] expose pmp granularity as a cli option. PMP Granularity is made available as a command line option. The default value is 4 Bytes. The value can be changed by passing the option --pmp-granularity= to spike. Signed-off-by: Karthik B K --- ci-tests/testlib.c | 1 + riscv/cfg.h | 3 +++ riscv/dts.cc | 3 ++- riscv/dts.h | 1 + riscv/processor.cc | 2 +- riscv/sim.cc | 4 ++-- spike_main/spike-log-parser.cc | 1 + spike_main/spike.cc | 3 +++ 8 files changed, 14 insertions(+), 4 deletions(-) diff --git a/ci-tests/testlib.c b/ci-tests/testlib.c index 2525b18bc2..1fca4fadb0 100644 --- a/ci-tests/testlib.c +++ b/ci-tests/testlib.c @@ -24,6 +24,7 @@ int main() false, endianness_little, 16, + (1 << PMP_SHIFT), mem_cfg, hartids, false, diff --git a/riscv/cfg.h b/riscv/cfg.h index 8ead618b7a..422c1aebe1 100644 --- a/riscv/cfg.h +++ b/riscv/cfg.h @@ -68,6 +68,7 @@ class cfg_t const bool default_misaligned, const endianness_t default_endianness, const reg_t default_pmpregions, + const reg_t default_pmpgranularity, const std::vector &default_mem_layout, const std::vector default_hartids, bool default_real_time_clint, @@ -80,6 +81,7 @@ class cfg_t misaligned(default_misaligned), endianness(default_endianness), pmpregions(default_pmpregions), + pmpgranularity(default_pmpgranularity), mem_layout(default_mem_layout), hartids(default_hartids), explicit_hartids(false), @@ -95,6 +97,7 @@ class cfg_t bool misaligned; endianness_t endianness; reg_t pmpregions; + reg_t pmpgranularity; cfg_arg_t> mem_layout; std::optional start_pc; cfg_arg_t> hartids; diff --git a/riscv/dts.cc b/riscv/dts.cc index 9f73bac73a..8304171b56 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -16,6 +16,7 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, reg_t initrd_start, reg_t initrd_end, const char* bootargs, size_t pmpregions, + size_t pmpgranularity, std::vector procs, std::vector> mems, std::string device_nodes) @@ -62,7 +63,7 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, " riscv,isa = \"" << procs[i]->get_isa().get_isa_string() << "\";\n" " mmu-type = \"riscv," << (procs[i]->get_isa().get_max_xlen() <= 32 ? "sv32" : "sv57") << "\";\n" " riscv,pmpregions = <" << pmpregions << ">;\n" - " riscv,pmpgranularity = <" << (1 << PMP_SHIFT) << ">;\n" + " riscv,pmpgranularity = <" << pmpgranularity << ">;\n" " clock-frequency = <" << cpu_hz << ">;\n" " CPU" << i << "_intc: interrupt-controller {\n" " #address-cells = <2>;\n" diff --git a/riscv/dts.h b/riscv/dts.h index 9240124e84..7afe376cbc 100644 --- a/riscv/dts.h +++ b/riscv/dts.h @@ -10,6 +10,7 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, reg_t initrd_start, reg_t initrd_end, const char* bootargs, size_t pmpregions, + size_t pmpgranularity, std::vector procs, std::vector> mems, std::string device_nodes); diff --git a/riscv/processor.cc b/riscv/processor.cc index 22e654250d..0ac6e67c94 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -63,7 +63,7 @@ processor_t::processor_t(const isa_parser_t *isa, const cfg_t *cfg, for (auto e : isa->get_extensions()) register_extension(find_extension(e.c_str())()); - set_pmp_granularity(1 << PMP_SHIFT); + set_pmp_granularity(cfg->pmpgranularity); set_pmp_num(cfg->pmpregions); if (isa->get_max_xlen() == 32) diff --git a/riscv/sim.cc b/riscv/sim.cc index fc75a37cf2..d75de46188 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -140,8 +140,8 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, device_nodes.append(factory->generate_dts(this)); dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, initrd_bounds.first, initrd_bounds.second, - cfg->bootargs(), cfg->pmpregions, procs, mems, - device_nodes); + cfg->bootargs(), cfg->pmpregions, cfg->pmpgranularity, + procs, mems, device_nodes); dtb = dts_compile(dts); } diff --git a/spike_main/spike-log-parser.cc b/spike_main/spike-log-parser.cc index 9bea5c51c3..6ac4ab0ce4 100644 --- a/spike_main/spike-log-parser.cc +++ b/spike_main/spike-log-parser.cc @@ -36,6 +36,7 @@ int main(int UNUSED argc, char** argv) /*default_misaligned=*/false, /*default_endianness*/endianness_little, /*default_pmpregions=*/16, + /*default_pmpgranularity=*/(1 << PMP_SHIFT), /*default_mem_layout=*/std::vector(), /*default_hartids=*/std::vector(), /*default_real_time_clint=*/false, diff --git a/spike_main/spike.cc b/spike_main/spike.cc index 7b28e25205..8fd9104fcc 100644 --- a/spike_main/spike.cc +++ b/spike_main/spike.cc @@ -41,6 +41,7 @@ static void help(int exit_code = 1) fprintf(stderr, " --debug-cmd= Read commands from file (use with -d)\n"); fprintf(stderr, " --isa= RISC-V ISA string [default %s]\n", DEFAULT_ISA); fprintf(stderr, " --pmpregions= Number of PMP regions [default 16]\n"); + fprintf(stderr, " --pmpgranularity= PMP Granularity in bytes [default 4]\n"); fprintf(stderr, " --priv= RISC-V privilege modes supported [default %s]\n", DEFAULT_PRIV); fprintf(stderr, " --varch= RISC-V Vector uArch string [default %s]\n", DEFAULT_VARCH); fprintf(stderr, " --pc=
Override ELF entry point\n"); @@ -366,6 +367,7 @@ int main(int argc, char** argv) /*default_misaligned=*/false, /*default_endianness*/endianness_little, /*default_pmpregions=*/16, + /*default_pmpgranularity=*/(1 << PMP_SHIFT), /*default_mem_layout=*/parse_mem_layout("2048"), /*default_hartids=*/std::vector(), /*default_real_time_clint=*/false, @@ -406,6 +408,7 @@ int main(int argc, char** argv) parser.option(0, "log-cache-miss", 0, [&](const char UNUSED *s){log_cache = true;}); parser.option(0, "isa", 1, [&](const char* s){cfg.isa = s;}); parser.option(0, "pmpregions", 1, [&](const char* s){cfg.pmpregions = atoul_safe(s);}); + parser.option(0, "pmpgranularity", 1, [&](const char* s){cfg.pmpgranularity = atoul_safe(s);}); parser.option(0, "priv", 1, [&](const char* s){cfg.priv = s;}); parser.option(0, "varch", 1, [&](const char* s){cfg.varch = s;}); parser.option(0, "device", 1, device_parser); From f04be9849356cf013f4071429f6fd152a8e5e518 Mon Sep 17 00:00:00 2001 From: Volodymyr Fialko Date: Tue, 26 Sep 2023 14:05:08 +0200 Subject: [PATCH 125/127] fesvr/htif: allow exit on SIGINT. Currently signal handler would call exit() only on second received signal, this prevent proper program cleanup. Instead use signal flag to exit loop. Signed-off-by: Volodymyr Fialko --- fesvr/htif.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fesvr/htif.cc b/fesvr/htif.cc index 3f93f7b507..8b5eb8dc84 100644 --- a/fesvr/htif.cc +++ b/fesvr/htif.cc @@ -253,7 +253,7 @@ int htif_t::run() std::bind(enq_func, &fromhost_queue, std::placeholders::_1); if (tohost_addr == 0) { - while (true) + while (!signal_exit) idle(); } From ca84e5325e60fd5bcb8c27bea3d62c3ef564fa16 Mon Sep 17 00:00:00 2001 From: Tim Newsome Date: Fri, 10 Nov 2023 09:27:29 -0800 Subject: [PATCH 126/127] Update debug smoketest action. To get https://github.com/riscv-software-src/riscv-tests/pull/522, which fixes an intermittent failure. --- .github/workflows/debug-smoke.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/debug-smoke.yml b/.github/workflows/debug-smoke.yml index a1d9f4fdf7..7c24d55fef 100644 --- a/.github/workflows/debug-smoke.yml +++ b/.github/workflows/debug-smoke.yml @@ -47,7 +47,7 @@ jobs: run: | git clone --recurse-submodules https://github.com/riscv-software-src/riscv-tests.git cd riscv-tests - git checkout d020e2069a9f6a9c0e875f23f0f4aababea9fbf0 + git checkout bd0a19c136927eaa3b7296a591a896c141affb6b - name: Run Tests run: | From 4841ad0238f0b71ca86fb28974765495cc0c34a9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 16 Nov 2023 18:41:15 -0800 Subject: [PATCH 127/127] Fix FMVP.D.X implementation Resolves #1507 --- riscv/insns/fmvp_d_x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv/insns/fmvp_d_x.h b/riscv/insns/fmvp_d_x.h index f95cfe9684..95d786a3bd 100644 --- a/riscv/insns/fmvp_d_x.h +++ b/riscv/insns/fmvp_d_x.h @@ -4,5 +4,5 @@ require_extension(EXT_ZFA); require_fp; ui64_f64 ui; ui.ui = ((uint64_t)RS2) << 32; -ui.ui |= RS1; +ui.ui |= zext32(RS1); WRITE_FRD_D(f64(ui.ui));