From 73b1ba585114b4175d4d65aa6c07396103ebb11f Mon Sep 17 00:00:00 2001 From: Vacantron Chen Date: Mon, 16 Dec 2024 14:31:08 +0800 Subject: [PATCH] Support just-in-time (JIT) with system simulation This commit introduces "satp" field to the block structure in JIT mode to ensure the block cache is replaced correctly. The MOP fusion and T2C are disabled temporarily. Use the following commands to boot Linux Kernel: $ make ENABLE_SYSTEM=1 ENABLE_MOP_FUSION=0 ENABLE_JIT=1 ENABLE_T2C=0 $ ./build/rv32emu -k -i -b --- Makefile | 2 +- src/decode.h | 3 + src/emulate.c | 71 ++++++-- src/jit.c | 335 ++++++++++++++++++++++++++++++++---- src/jit.h | 6 + src/riscv.c | 6 +- src/riscv_private.h | 15 ++ src/rv32_jit.c | 410 ++++++++++++++++++++++++++++++++++++++------ src/rv32_template.c | 192 +++++++++++++-------- src/utils.c | 28 ++- src/utils.h | 31 ++++ 11 files changed, 918 insertions(+), 181 deletions(-) diff --git a/Makefile b/Makefile index f73075ff..0798d9f6 100644 --- a/Makefile +++ b/Makefile @@ -188,7 +188,7 @@ ifeq ($(call has, JIT), 1) ifeq ("$(CHECK_LLVM_LIBS)", "0") OBJS_EXT += t2c.o CFLAGS += -g $(shell $(LLVM_CONFIG) --cflags) - LDFLAGS += $(shell $(LLVM_CONFIG) --libs) + LDFLAGS += $(shell $(LLVM_CONFIG) --libfiles) else $(error No llvm-config-18 installed. Check llvm-config-18 installation in advance, or use "ENABLE_T2C=0" to disable tier-2 LLVM compiler) endif diff --git a/src/decode.h b/src/decode.h index e2b2984c..edb88f42 100644 --- a/src/decode.h +++ b/src/decode.h @@ -288,6 +288,9 @@ typedef struct { struct rv_insn *target[HISTORY_SIZE]; #else uint32_t times[HISTORY_SIZE]; +#if RV32_HAS(SYSTEM) + uint32_t satp[HISTORY_SIZE]; +#endif #endif } branch_history_table_t; diff --git a/src/emulate.c b/src/emulate.c index 293ce031..e706925c 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -42,7 +42,9 @@ extern struct target_ops gdbstub_ops; #define IF_imm(i, v) (i->imm == v) #if RV32_HAS(SYSTEM) +#if !RV32_HAS(JIT) static bool need_clear_block_map = false; +#endif static uint32_t reloc_enable_mmu_jalr_addr; static bool reloc_enable_mmu = false; bool need_retranslate = false; @@ -704,6 +706,7 @@ static inline void remove_next_nth_ir(const riscv_t *rv, * Strategies are being devised to increase the number of instructions that * match the pattern, including possible instruction reordering. */ +#if RV32_HAS(MOP_FUSION) static void match_pattern(riscv_t *rv, block_t *block) { uint32_t i; @@ -795,7 +798,7 @@ static void match_pattern(riscv_t *rv, block_t *block) } } } - +#endif typedef struct { bool is_constant[N_RV_REGS]; uint32_t const_val[N_RV_REGS]; @@ -838,12 +841,11 @@ static block_t *block_find_or_translate(riscv_t *rv) block_t *next_blk = block_find(map, rv->PC); #else /* lookup the next block in the block cache */ - /* - * The function "cache_get()" gets the cached block by the given "key (PC)". - * In system simulation, the returned block might be dropped because it is - * not the one from the current process (by checking SATP CSR register). - */ block_t *next_blk = (block_t *) cache_get(rv->block_cache, rv->PC, true); +#if RV32_HAS(SYSTEM) + if (next_blk && next_blk->satp != rv->csr_satp) + next_blk = NULL; +#endif #endif if (next_blk) @@ -861,12 +863,20 @@ static block_t *block_find_or_translate(riscv_t *rv) block_translate(rv, next_blk); +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + /* + * may be an ifetch fault which changes satp, Do not do this + * in "block_alloc() + */ + next_blk->satp = rv->csr_satp; +#endif + optimize_constant(rv, next_blk); + #if RV32_HAS(GDBSTUB) if (likely(!rv->debug_mode)) #endif -#if RV32_HAS(MOP_FUSION) - /* macro operation fusion */ +#if RV32_HAS(GDBSTUB) || RV32_HAS(MOP_FUSION) match_pattern(rv, next_blk); #endif @@ -890,8 +900,6 @@ static block_t *block_find_or_translate(riscv_t *rv) return next_blk; } - list_del_init(&replaced_blk->list); - if (prev == replaced_blk) prev = NULL; @@ -910,6 +918,32 @@ static block_t *block_find_or_translate(riscv_t *rv) if (untaken == replaced_blk_entry) { entry->ir_tail->branch_untaken = NULL; } + + /* upadte JALR LUT */ + if (!entry->ir_tail->branch_table) + continue; + +#if 0 + /* + * This branch lookup updating is unused since we get the PC from it and + * use function "cache_get()" achieve the branch prediction of T1C. + * However, if the structure "branch_table_t" is going to reference the + * block directly, this updating is nacessary to avoid to use the freed + * blocks. + */ + for (int i = 0; i < HISTORY_SIZE; i++) { + if (entry->ir_tail->branch_table->PC[i] == replaced_blk->pc_start) { + IIF(RV32_HAS(SYSTEM)) + (if (entry->ir_tail->branch_table->satp[i] == + replaced_blk->satp), ) + { + entry->ir_tail->branch_table->PC[i] = + entry->ir_tail->branch_table->satp[i] = + entry->ir_tail->branch_table->times[i] = 0; + } + } + } +#endif } /* free IRs in replaced block */ @@ -923,6 +957,7 @@ static block_t *block_find_or_translate(riscv_t *rv) mpool_free(rv->block_ir_mp, ir); } + list_del_init(&replaced_blk->list); mpool_free(rv->block_mp, replaced_blk); #if RV32_HAS(T2C) pthread_mutex_unlock(&rv->cache_lock); @@ -941,6 +976,10 @@ static bool runtime_profiler(riscv_t *rv, block_t *block) * we posit that our profiler could effectively identify hotspots using * three key indicators. */ +#if RV32_HAS(SYSTEM) + if (block->satp != rv->csr_satp) + return false; +#endif uint32_t freq = cache_freq(rv->block_cache, block->pc_start); /* To profile a block after chaining, it must first be executed. */ if (unlikely(freq >= 2 && block->has_loops)) @@ -1022,15 +1061,21 @@ void rv_step(void *arg) block_t *block = block_find_or_translate(rv); /* by now, a block should be available */ assert(block); +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + assert(block->satp == rv->csr_satp); +#endif /* After emulating the previous block, it is determined whether the * branch is taken or not. The IR array of the current block is then * assigned to either the branch_taken or branch_untaken pointer of * the previous block. */ - #if RV32_HAS(BLOCK_CHAINING) - if (prev) { + if (prev +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + && prev->satp == rv->csr_satp +#endif + ) { rv_insn_t *last_ir = prev->ir_tail; /* chain block */ if (!insn_is_unconditional_branch(last_ir->opcode)) { @@ -1048,7 +1093,7 @@ void rv_step(void *arg) #endif last_pc = rv->PC; #if RV32_HAS(JIT) -#if RV32_HAS(T2C) +#if RV32_HAS(T2C) && !RV32_HAS(SYSTEM) /* executed through the tier-2 JIT compiler */ if (block->hot2) { ((exec_t2c_func_t) block->func)(rv); diff --git a/src/jit.c b/src/jit.c index da5eb079..fa23b5e2 100644 --- a/src/jit.c +++ b/src/jit.c @@ -45,6 +45,9 @@ #include "riscv.h" #include "riscv_private.h" #include "utils.h" +#if RV32_HAS(SYSTEM) +#include "system.h" +#endif #define JIT_CLS_MASK 0x07 #define JIT_ALU_OP_MASK 0xf0 @@ -267,12 +270,15 @@ static inline void set_dirty(int reg_idx, bool is_dirty) } } -static inline void offset_map_insert(struct jit_state *state, int32_t target_pc) +static inline void offset_map_insert(struct jit_state *state, block_t *block) { struct offset_map *map_entry = &state->offset_map[state->n_blocks++]; - assert(state->n_blocks < MAX_BLOCKS); - map_entry->pc = target_pc; + assert(state->n_blocks <= MAX_BLOCKS); + map_entry->pc = block->pc_start; map_entry->offset = state->offset; +#if RV32_HAS(SYSTEM) + map_entry->satp = block->satp; +#endif } #if !defined(__APPLE__) @@ -287,6 +293,10 @@ static void emit_bytes(struct jit_state *state, void *data, uint32_t len) should_flush = true; return; } + if (unlikely(state->n_blocks == MAX_BLOCKS)) { + should_flush = true; + return; + } #if defined(__APPLE__) && defined(__aarch64__) pthread_jit_write_protect_np(false); #endif @@ -330,11 +340,12 @@ static inline void emit_modrm_and_displacement(struct jit_state *state, int m, int32_t d) { + /* + * Do not use short encoding even if the offset is one byte value since the + * length of operation is undetermined. + */ if (d == 0 && (m & 7) != RBP) { emit_modrm(state, 0x00, r, m); - } else if ((int8_t) d == d) { - emit_modrm(state, 0x40, r, m); - emit1(state, d); } else { emit_modrm(state, 0x80, r, m); emit4(state, d); @@ -377,12 +388,16 @@ static inline void emit_pop(struct jit_state *state, int r) } static inline void emit_jump_target_address(struct jit_state *state, - int32_t target_pc) + int32_t target_pc, + uint32_t target_satp UNUSED) { struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_JUMPS); + assert(state->n_jumps <= MAX_JUMPS); jump->offset_loc = state->offset; jump->target_pc = target_pc; +#if RV32_HAS(SYSTEM) + jump->target_satp = target_satp; +#endif emit4(state, 0); } #elif defined(__aarch64__) @@ -591,7 +606,7 @@ static inline void emit_jump_target_offset(struct jit_state *state, uint32_t jump_state_offset) { struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_JUMPS); + assert(state->n_jumps <= MAX_JUMPS); jump->offset_loc = jump_loc; jump->target_offset = jump_state_offset; } @@ -968,17 +983,22 @@ static inline void emit_store(struct jit_state *state, set_dirty(src, false); } -static inline void emit_jmp(struct jit_state *state, uint32_t target_pc) +static inline void emit_jmp(struct jit_state *state, + uint32_t target_pc, + uint32_t target_satp) { #if defined(__x86_64__) emit1(state, 0xe9); - emit_jump_target_address(state, target_pc); + emit_jump_target_address(state, target_pc, target_satp); #elif defined(__aarch64__) struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_JUMPS); + assert(state->n_jumps <= MAX_JUMPS); jump->offset_loc = state->offset; jump->target_pc = target_pc; emit_a64(state, UBR_B); +#if RV32_HAS(SYSTEM) + jump->target_satp = target_satp; +#endif #endif } @@ -1017,7 +1037,7 @@ static inline void emit_exit(struct jit_state *state) emit_jump_target_offset(state, state->offset, state->exit_loc); emit4(state, 0); #elif defined(__aarch64__) - emit_jmp(state, TARGET_PC_EXIT); + emit_jmp(state, TARGET_PC_EXIT, 0); #endif } @@ -1233,6 +1253,211 @@ static void muldivmod(struct jit_state *state, } #endif /* RV32_HAS(EXT_M) */ +#if RV32_HAS(SYSTEM) +uint32_t jit_mmio_read_wrapper(riscv_t *rv, uint32_t addr) +{ + MMIO_READ(); + __UNREACHABLE; +} + +void jit_mmu_handler(riscv_t *rv, uint32_t vreg_idx) +{ + assert(vreg_idx < 32); + + uint32_t addr = rv->jit_mmu.vaddr; + + if (!rv->csr_satp) { + rv->jit_mmu.paddr = addr; + return; + } + + bool ok; + uint32_t level, *pte = mmu_walk(rv, addr, &level); + + if (rv->jit_mmu.type == rv_insn_sb || rv->jit_mmu.type == rv_insn_sh || + rv->jit_mmu.type == rv_insn_sw) + ok = mmu_write_fault_check(rv, pte, addr, PTE_W); + else + ok = mmu_read_fault_check(rv, pte, addr, PTE_R); + + if (unlikely(!ok)) + pte = mmu_walk(rv, addr, &level); + + get_ppn_and_offset(); + addr = ppn | offset; + + if (likely(addr < PRIV(rv)->mem->mem_size)) { + rv->jit_mmu.is_mmio = 0; + rv->jit_mmu.paddr = addr; + return; + } + + uint32_t val; + rv->jit_mmu.is_mmio = 1; + + switch (rv->jit_mmu.type) { + case rv_insn_sb: + val = rv->X[vreg_idx] & 0xff; + MMIO_WRITE(); + break; + case rv_insn_sh: + val = rv->X[vreg_idx] & 0xffff; + MMIO_WRITE(); + break; + case rv_insn_sw: + val = rv->X[vreg_idx]; + MMIO_WRITE(); + break; + case rv_insn_lb: + rv->X[vreg_idx] = (int8_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lh: + rv->X[vreg_idx] = (int16_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lw: + rv->X[vreg_idx] = jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lbu: + rv->X[vreg_idx] = (uint8_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lhu: + rv->X[vreg_idx] = (uint16_t) jit_mmio_read_wrapper(rv, addr); + break; + default: + assert(NULL); + __UNREACHABLE; + } +} + +void emit_jit_mmu_handler(struct jit_state *state, uint8_t vreg_idx) +{ + assert(vreg_idx < 32); + +#if defined(__x86_64__) + /* push $rdi */ + emit1(state, 0xff); + emit_modrm(state, 0x3 << 6, 0x6, parameter_reg[0]); + + /* mov $vreg_idx, %rsi */ + emit1(state, 0xbe); + emit4(state, vreg_idx); + + /* call jit_mmu_handler */ + emit_load_imm(state, temp_reg, (uintptr_t) &jit_mmu_handler); + emit1(state, 0xff); + emit_modrm(state, 0x3 << 6, 0x2, temp_reg); + + /* pop rv to $rdi */ + emit1(state, 0x8f); + emit_modrm(state, 0x3 << 6, 0x0, parameter_reg[0]); +#elif defined(__aarch64__) + uint32_t insn; + + /* push rv into stack */ + insn = (0xf81f0fe << 4) | R0; + emit_a64(state, insn); + + emit_movewide_imm(state, false, R1, vreg_idx); + + /* blr jit_mmu_handler */ + emit_movewide_imm(state, true, temp_reg, (uintptr_t) &jit_mmu_handler); + insn = (0xd63f << 16) | (temp_reg << 5); + emit_a64(state, insn); + + /* pop from stack */ + insn = (0xf84107e << 4) | R0; + emit_a64(state, insn); +#endif +} + +void emit_jit_mmio_escape_load(struct jit_state *state) +{ +#if defined(__x86_64__) + /* JE */ + emit1(state, 0x0f); + emit1(state, 0x84); + + /* pre-calculated jump offset */ + emit4(state, 0xb); + return; +#elif defined(__aarch64__) + /* b.eq (3 instructions) */ + emit_a64(state, (0x54 << 24) | (0x3 << 5)); +#endif +} + +void emit_jit_mmio_escape_load_end(struct jit_state *state, + int rv_insn_type UNUSED) +{ +#if defined(__x86_64__) + /* JMP */ + emit1(state, 0xe9); + + /* pre-calculated jump offset */ + switch (rv_insn_type) { + case rv_insn_sb: + case rv_insn_sh: + emit4(state, 0x1c); + return; + case rv_insn_sw: + emit4(state, 0x1b); + return; + case rv_insn_lb: + case rv_insn_lh: + case rv_insn_lbu: + case rv_insn_lhu: + emit4(state, 0x16); + return; + case rv_insn_lw: + emit4(state, 0x15); + return; + default: + assert(NULL); + __UNREACHABLE; + } +#elif defined(__aarch64__) + /* b (5 instructions) */ + emit_a64(state, (0x54 << 24) | (0x6 << 5) | 0xe); +#endif +} + +void emit_jit_mmio_escape_store(struct jit_state *state, + int rv_insn_type UNUSED) +{ +#if defined(__x86_64__) + /* JE */ + emit1(state, 0x0f); + emit1(state, 0x84); + + /* pre-calculated jump offset */ + switch (rv_insn_type) { + case rv_insn_sb: + case rv_insn_sh: + emit4(state, 0x1c); + return; + case rv_insn_sw: + emit4(state, 0x1b); + return; + case rv_insn_lb: + case rv_insn_lh: + case rv_insn_lbu: + case rv_insn_lhu: + emit4(state, 0x16); + return; + case rv_insn_lw: + emit4(state, 0x15); + return; + default: + assert(NULL); + __UNREACHABLE; + } +#elif defined(__aarch64__) + /* b.eq (7 instructions) */ + emit_a64(state, (0x54 << 24) | (0x7 << 5)); +#endif +} +#endif + static void prepare_translate(struct jit_state *state) { #if defined(__x86_64__) @@ -1734,7 +1959,9 @@ static void ra_load2_sext(struct jit_state *state, } } -void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir) +void parse_branch_history_table(struct jit_state *state, + riscv_t *rv UNUSED, + rv_insn_t *ir) { int max_idx = 0; branch_history_table_t *bt = ir->branch_table; @@ -1745,14 +1972,21 @@ void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir) max_idx = i; } if (bt->PC[max_idx] && bt->times[max_idx] >= IN_JUMP_THRESHOLD) { - save_reg(state, 0); - unmap_vm_reg(0); - emit_load_imm(state, register_map[0].reg_idx, bt->PC[max_idx]); - emit_cmp32(state, temp_reg, register_map[0].reg_idx); - uint32_t jump_loc = state->offset; - emit_jcc_offset(state, 0x85); - emit_jmp(state, bt->PC[max_idx]); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + IIF(RV32_HAS(SYSTEM))(if (bt->satp[max_idx] == rv->csr_satp), ) + { + save_reg(state, 0); + unmap_vm_reg(0); + emit_load_imm(state, register_map[0].reg_idx, bt->PC[max_idx]); + emit_cmp32(state, temp_reg, register_map[0].reg_idx); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x85); +#if RV32_HAS(SYSTEM) + emit_jmp(state, bt->PC[max_idx], bt->satp[max_idx]); +#else + emit_jmp(state, bt->PC[max_idx], 0); +#endif + emit_jump_target_offset(state, JUMP_LOC, state->offset); + } } } @@ -1914,8 +2148,12 @@ static void resolve_jumps(struct jit_state *state) target_loc = jump.offset_loc + sizeof(uint32_t); for (int i = 0; i < state->n_blocks; i++) { if (jump.target_pc == state->offset_map[i].pc) { - target_loc = state->offset_map[i].offset; - break; + IIF(RV32_HAS(SYSTEM)) + (if (jump.target_satp == state->offset_map[i].satp), ) + { + target_loc = state->offset_map[i].offset; + break; + } } } } @@ -1936,11 +2174,14 @@ static void translate_chained_block(struct jit_state *state, riscv_t *rv, block_t *block) { - if (set_has(&state->set, block->pc_start)) + if (set_has(&state->set, COMPOSED_KEY(block))) return; - set_add(&state->set, block->pc_start); - offset_map_insert(state, block->pc_start); + if (state->n_blocks == MAX_BLOCKS) + return; + + assert(set_add(&state->set, COMPOSED_KEY(block))); + offset_map_insert(state, block); translate(state, rv, block); if (unlikely(should_flush)) return; @@ -1948,15 +2189,22 @@ static void translate_chained_block(struct jit_state *state, if (ir->branch_untaken && !set_has(&state->set, ir->branch_untaken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_untaken->pc, false); - if (block1->translatable) - translate_chained_block(state, rv, block1); + if (block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } } if (ir->branch_taken && !set_has(&state->set, ir->branch_taken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_taken->pc, false); - if (block1->translatable) - translate_chained_block(state, rv, block1); + if (block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } } + branch_history_table_t *bt = ir->branch_table; if (bt) { int max_idx = 0; @@ -1968,10 +2216,16 @@ static void translate_chained_block(struct jit_state *state, } if (bt->PC[max_idx] && bt->times[max_idx] >= IN_JUMP_THRESHOLD && !set_has(&state->set, bt->PC[max_idx])) { - block_t *block1 = - cache_get(rv->block_cache, bt->PC[max_idx], false); - if (block1 && block1->translatable) - translate_chained_block(state, rv, block1); + IIF(RV32_HAS(SYSTEM))(if (bt->satp[max_idx] == rv->csr_satp), ) + { + block_t *block1 = + cache_get(rv->block_cache, bt->PC[max_idx], false); + if (block1 && block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } + } } } } @@ -1979,18 +2233,23 @@ static void translate_chained_block(struct jit_state *state, void jit_translate(riscv_t *rv, block_t *block) { struct jit_state *state = rv->jit_state; - if (set_has(&state->set, block->pc_start)) { + if (set_has(&state->set, COMPOSED_KEY(block))) { for (int i = 0; i < state->n_blocks; i++) { - if (block->pc_start == state->offset_map[i].pc) { + if (block->pc_start == state->offset_map[i].pc +#if RV32_HAS(SYSTEM) + && block->satp == state->offset_map[i].satp +#endif + ) { block->offset = state->offset_map[i].offset; block->hot = true; return; } } + assert(NULL); __UNREACHABLE; } restart: - memset(state->jumps, 0, 1024 * sizeof(struct jump)); + memset(state->jumps, 0, MAX_JUMPS * sizeof(struct jump)); state->n_jumps = 0; block->offset = state->offset; translate_chained_block(state, rv, block); diff --git a/src/jit.h b/src/jit.h index 3967a1df..4bbafa2f 100644 --- a/src/jit.h +++ b/src/jit.h @@ -14,11 +14,17 @@ struct jump { uint32_t offset_loc; uint32_t target_pc; uint32_t target_offset; +#if RV32_HAS(SYSTEM) + uint32_t target_satp; +#endif }; struct offset_map { uint32_t pc; uint32_t offset; +#if RV32_HAS(SYSTEM) + uint32_t satp; +#endif }; struct jit_state { diff --git a/src/riscv.c b/src/riscv.c index a06a2f33..8e1d103b 100644 --- a/src/riscv.c +++ b/src/riscv.c @@ -463,7 +463,7 @@ riscv_t *rv_create(riscv_user_t rv_attr) rv->jit_state = jit_state_init(CODE_CACHE_SIZE); rv->block_cache = cache_create(BLOCK_MAP_CAPACITY_BITS); assert(rv->block_cache); -#if RV32_HAS(T2C) +#if RV32_HAS(T2C) && !RV32_HAS(SYSTEM) rv->quit = false; rv->jit_cache = jit_cache_init(); /* prepare wait queue. */ @@ -566,8 +566,10 @@ bool rv_has_halted(riscv_t *rv) void rv_delete(riscv_t *rv) { assert(rv); -#if !RV32_HAS(JIT) +#if !RV32_HAS(JIT) || (RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER)) vm_attr_t *attr = PRIV(rv); +#endif +#if !RV32_HAS(JIT) map_delete(attr->fd_map); memory_delete(attr->mem); block_map_destroy(rv); diff --git a/src/riscv_private.h b/src/riscv_private.h index 0ae6f279..684426dd 100644 --- a/src/riscv_private.h +++ b/src/riscv_private.h @@ -90,6 +90,9 @@ typedef struct block { bool translatable; /**< Determine the block has RV32AF insturctions or not */ bool has_loops; /**< Determine the block has loop or not */ +#if RV32_HAS(SYSTEM) + uint32_t satp; +#endif #if RV32_HAS(T2C) bool compiled; /**< The T2C request is enqueued or not */ #endif @@ -126,6 +129,18 @@ struct riscv_internal { riscv_word_t X[N_RV_REGS]; riscv_word_t PC; +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + /* + * Aarch encoder only accepts 9 bits signed offset. Do not put this + * structure to the bottom. + */ + struct { + uint32_t is_mmio; /* whether is MMIO or not */ + uint32_t type; /* 0: read, 1: write */ + uint32_t vaddr; + uint32_t paddr; + } jit_mmu; +#endif /* user provided data */ riscv_user_t data; diff --git a/src/rv32_jit.c b/src/rv32_jit.c index 4c1dad9f..50ec8c03 100644 --- a/src/rv32_jit.c +++ b/src/rv32_jit.c @@ -13,7 +13,7 @@ GEN(jal, { emit_load_imm(state, vm_reg[0], ir->pc + 4); } store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -28,7 +28,7 @@ GEN(jalr, { emit_load_imm(state, vm_reg[1], ir->pc + 4); } store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) @@ -39,14 +39,14 @@ GEN(beq, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x84); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -59,14 +59,14 @@ GEN(bne, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x85); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -79,14 +79,14 @@ GEN(blt, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x8c); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -99,14 +99,14 @@ GEN(bge, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x8d); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -119,14 +119,14 @@ GEN(bltu, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x82); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -139,14 +139,14 @@ GEN(bgeu, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x83); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -155,66 +155,366 @@ GEN(bgeu, { GEN(lb, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lb); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lb); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + }) }) GEN(lh, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lh); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lh); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + }) }) GEN(lw, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S32, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lw); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lw); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S32, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); + }) }) GEN(lbu, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S8, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lbu); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lbu); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S8, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S8, temp_reg, vm_reg[1], 0); + }) }) GEN(lhu, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S16, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lhu); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lhu); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S16, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S16, temp_reg, vm_reg[1], 0); + }) }) GEN(sb, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S8, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sb); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into th + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + emit_jit_mmio_escape_store(state, rv_insn_sb); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); + reset_reg(); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); + }) }) GEN(sh, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S16, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sh); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into th + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + emit_jit_mmio_escape_store(state, rv_insn_sh); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); + reset_reg(); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); + }) }) GEN(sw, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S32, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sw); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into th + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + emit_jit_mmio_escape_store(state, rv_insn_sw); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); + reset_reg(); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); + }) }) GEN(addi, { vm_reg[0] = ra_load(state, ir->rs1); @@ -388,7 +688,9 @@ GEN(ebreak, { }) GEN(wfi, { assert(NULL); }) GEN(uret, { assert(NULL); }) +#if RV32_HAS(SYSTEM) GEN(sret, { assert(NULL); }) +#endif GEN(hret, { assert(NULL); }) GEN(mret, { assert(NULL); }) GEN(sfencevma, { assert(NULL); }) @@ -539,7 +841,7 @@ GEN(cjal, { vm_reg[0] = map_vm_reg(state, rv_reg_ra); emit_load_imm(state, vm_reg[0], ir->pc + 2); store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -598,7 +900,7 @@ GEN(cand, { }) GEN(cj, { store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -610,14 +912,14 @@ GEN(cbeqz, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x84); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 2); + emit_jmp(state, ir->pc + 2, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 2); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -630,14 +932,14 @@ GEN(cbnez, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x85); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 2); + emit_jmp(state, ir->pc + 2, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 2); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -659,7 +961,7 @@ GEN(cjr, { vm_reg[0] = ra_load(state, ir->rs1); emit_mov(state, vm_reg[0], temp_reg); store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) @@ -685,7 +987,7 @@ GEN(cjalr, { vm_reg[1] = map_vm_reg(state, rv_reg_ra); emit_load_imm(state, vm_reg[1], ir->pc + 2); store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) diff --git a/src/rv32_template.c b/src/rv32_template.c index e0e41cf6..30d5b14e 100644 --- a/src/rv32_template.c +++ b/src/rv32_template.c @@ -167,11 +167,18 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)(if (!rv->is_trapped && !reloc_enable_mmu), )) + { + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -244,32 +251,45 @@ RVOP( } \ } #else -#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \ - block_t *block = cache_get(rv->block_cache, PC, true); \ - if (block) { \ - for (int i = 0; i < HISTORY_SIZE; i++) { \ - if (ir->branch_table->PC[i] == PC) { \ - ir->branch_table->times[i]++; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ - } \ - } \ - /* update branch history table */ \ - int min_idx = 0; \ - for (int i = 0; i < HISTORY_SIZE; i++) { \ - if (!ir->branch_table->times[i]) { \ - min_idx = i; \ - break; \ - } else if (ir->branch_table->times[min_idx] > \ - ir->branch_table->times[i]) { \ - min_idx = i; \ - } \ - } \ - ir->branch_table->times[min_idx] = 1; \ - ir->branch_table->PC[min_idx] = PC; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ - MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, PC); \ +#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \ + IIF(RV32_HAS(SYSTEM))(if (!rv->is_trapped && !reloc_enable_mmu), ) \ + { \ + block_t *block = cache_get(rv->block_cache, PC, true); \ + if (block) { \ + IIF(RV32_HAS(SYSTEM))(if (block->satp == rv->csr_satp), ) \ + { \ + for (int i = 0; i < HISTORY_SIZE; i++) { \ + if (ir->branch_table->PC[i] == PC) { \ + IIF(RV32_HAS(SYSTEM)) \ + (if (ir->branch_table->satp[i] == rv->csr_satp), ) \ + { \ + ir->branch_table->times[i]++; \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + } \ + } \ + } \ + /* update branch history table */ \ + int min_idx = 0; \ + for (int i = 0; i < HISTORY_SIZE; i++) { \ + if (!ir->branch_table->times[i]) { \ + min_idx = i; \ + break; \ + } else if (ir->branch_table->times[min_idx] > \ + ir->branch_table->times[i]) { \ + min_idx = i; \ + } \ + } \ + ir->branch_table->times[min_idx] = 1; \ + ir->branch_table->PC[min_idx] = PC; \ + IIF(RV32_HAS(SYSTEM)) \ + (ir->branch_table->satp[min_idx] = rv->csr_satp, ); \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + MUST_TAIL return block->ir_head->impl(rv, block->ir_head, \ + cycle, PC); \ + } \ + } \ } #endif @@ -359,11 +379,14 @@ RVOP( IIF(RV32_HAS(JIT)) \ ( \ { \ - cache_get(rv->block_cache, PC + 4, true); \ - if (!set_add(&pc_set, PC + 4)) \ - has_loops = true; \ - if (cache_hot(rv->block_cache, PC + 4)) \ - goto nextop; \ + block_t *next = cache_get(rv->block_cache, PC + 4, true); \ + if (next IIF(RV32_HAS(SYSTEM))( \ + &&next->satp == rv->csr_satp, )) { \ + if (!set_add(&pc_set, PC + 4)) \ + has_loops = true; \ + if (cache_hot(rv->block_cache, PC + 4)) \ + goto nextop; \ + } \ }, ); \ PC += 4; \ IIF(RV32_HAS(SYSTEM)) \ @@ -393,11 +416,14 @@ RVOP( IIF(RV32_HAS(JIT)) \ ( \ { \ - cache_get(rv->block_cache, PC, true); \ - if (!set_add(&pc_set, PC)) \ - has_loops = true; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ + block_t *next = cache_get(rv->block_cache, PC, true); \ + if (next IIF(RV32_HAS(SYSTEM))( \ + &&next->satp == rv->csr_satp, )) { \ + if (!set_add(&pc_set, PC)) \ + has_loops = true; \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + } \ }, ); \ IIF(RV32_HAS(SYSTEM)) \ ( \ @@ -2079,11 +2105,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) @@ -2246,11 +2276,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -2284,11 +2318,15 @@ RVOP( if (!untaken) goto nextop; #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC + 2, true); - if (!set_add(&pc_set, PC + 2)) - has_loops = true; - if (cache_hot(rv->block_cache, PC + 2)) - goto nextop; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC + 2, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC + 2)) + has_loops = true; + if (cache_hot(rv->block_cache, PC + 2)) + goto nextop; + } #endif PC += 2; #if RV32_HAS(SYSTEM) @@ -2306,11 +2344,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -2353,11 +2395,15 @@ RVOP( if (!untaken) goto nextop; #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC + 2, true); - if (!set_add(&pc_set, PC + 2)) - has_loops = true; - if (cache_hot(rv->block_cache, PC + 2)) - goto nextop; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC + 2, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC + 2)) + has_loops = true; + if (cache_hot(rv->block_cache, PC + 2)) + goto nextop; + } #endif PC += 2; #if RV32_HAS(SYSTEM) @@ -2375,11 +2421,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) diff --git a/src/utils.c b/src/utils.c index 3199235d..13696855 100644 --- a/src/utils.c +++ b/src/utils.c @@ -3,6 +3,7 @@ * "LICENSE" for information on usage and redistribution of this file. */ +#include #include #include #include @@ -174,7 +175,11 @@ char *sanitize_path(const char *input) return ret; } +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +HASH_FUNC_IMPL_64(set_hash_64, SET_SIZE_BITS, 1 << SET_SIZE_BITS); +#else HASH_FUNC_IMPL(set_hash, SET_SIZE_BITS, 1 << SET_SIZE_BITS); +#endif void set_reset(set_t *set) { @@ -186,15 +191,25 @@ void set_reset(set_t *set) * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_add(set_t *set, uint64_t key) +#else bool set_add(set_t *set, uint32_t key) +#endif { +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) + const uint64_t index = set_hash_64(key); +#else const uint32_t index = set_hash(key); +#endif + uint8_t count = 0; - while (set->table[index][count]) { + while (count < SET_SLOTS_SIZE && set->table[index][count]) { if (set->table[index][count++] == key) return false; } + assert(count < SET_SLOTS_SIZE); set->table[index][count] = key; return true; } @@ -204,10 +219,19 @@ bool set_add(set_t *set, uint32_t key) * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_has(set_t *set, uint64_t key) +#else bool set_has(set_t *set, uint32_t key) +#endif { +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) + const uint64_t index = set_hash_64(key); +#else const uint32_t index = set_hash(key); - for (uint8_t count = 0; set->table[index][count]; count++) { +#endif + for (uint8_t count = 0; count < SET_SLOTS_SIZE && set->table[index][count]; + count++) { if (set->table[index][count] == key) return true; } diff --git a/src/utils.h b/src/utils.h index 78f68985..e6f42228 100644 --- a/src/utils.h +++ b/src/utils.h @@ -24,6 +24,14 @@ void rv_clock_gettime(struct timespec *tp); return (val * 0x61C88647 >> (32 - size_bits)) & ((size) - (1)); \ } +#define HASH_FUNC_IMPL_64(name, size_bits, size) \ + FORCE_INLINE uint64_t name(uint64_t val) \ + { \ + /* 0x61c8864680b583eb is 64-bit golden ratio */ \ + return (val * 0x61c8864680b583ebull >> (64 - size_bits)) & \ + ((size) - (1)); \ + } + /* sanitize_path returns the shortest path name equivalent to path * by purely lexical processing. It applies the following rules * iteratively until no further processing can be done: @@ -133,11 +141,26 @@ static inline void list_del_init(struct list_head *node) #define SET_SIZE (1 << SET_SIZE_BITS) #define SET_SLOTS_SIZE 32 +/* + * Use composed key in JIT system simulation. The higher 32 bits stores the + * value of supervisor address translation and protection (SATP) register, + * and the lower 32 bits stores the program counter (PC) as same as userspace + * simulation. + */ +#define COMPOSED_KEY(block) \ + IIF(RV32_HAS(SYSTEM)) \ + (((((uint64_t) block->satp) << 32) | (uint64_t) block->pc_start), \ + (uint32_t) block->pc_start) + /* The set consists of SET_SIZE buckets, with each bucket containing * SET_SLOTS_SIZE slots. */ typedef struct { +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) + uint64_t table[SET_SIZE][SET_SLOTS_SIZE]; +#else uint32_t table[SET_SIZE][SET_SLOTS_SIZE]; +#endif } set_t; /** @@ -151,11 +174,19 @@ void set_reset(set_t *set); * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_add(set_t *set, uint64_t key); +#else bool set_add(set_t *set, uint32_t key); +#endif /** * set_has - check whether the element exist in the set or not * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_has(set_t *set, uint64_t key); +#else bool set_has(set_t *set, uint32_t key); +#endif