diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 0da26503..1d78f9a2 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -79,7 +79,7 @@ VL_FLAGS += -DNOPAE CFLAGS += -DNOPAE # use DPI FPU -VL_FLAGS += -DFPU_FAST +#VL_FLAGS += -DFPU_FAST RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index acbcfd44..3ed54b96 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -4,15 +4,15 @@ CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw # control RTL debug print states +DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK -DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA -DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM -DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP +DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE @@ -65,7 +65,7 @@ else endif # use DPI FPU -VL_FLAGS += -DFPU_FAST +#VL_FLAGS += -DFPU_FAST PROJECT = libvortex.so # PROJECT = libvortex.dylib diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index dcb4ad9e..8140eeb1 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -98,7 +98,7 @@ module VX_alu_unit #( VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33) - ) alu_reg ( + ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall_out), diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index cdf2a3f7..e39820e4 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -94,7 +94,7 @@ module VX_csr_unit #( VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32) - ) csr_reg ( + ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall), diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index 4bbf3e93..79e0a63b 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -131,7 +131,7 @@ module VX_fpu_unit #( VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS)) - ) fpu_reg ( + ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall_out), diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 0c11b9eb..cea69845 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -77,7 +77,7 @@ module VX_gpu_unit #( VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE) - ) csr_reg ( + ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall), diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 0c89ec7d..1f94d47f 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -75,7 +75,7 @@ module VX_lsu_unit #( VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))) - ) req_reg ( + ) pipe_reg0 ( .clk (clk), .reset (reset), .stall (stall_in), @@ -181,7 +181,7 @@ module VX_lsu_unit #( VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)) - ) rsp_reg ( + ) pipe_reg1 ( .clk (clk), .reset (reset), .stall (stall_out), diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index a85e17e8..733c0896 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -41,7 +41,9 @@ module VX_mem_unit # ( ) core_dcache_rsp_qual_if(), core_smem_rsp_if(); // select shared memory bus - wire is_smem_addr = (({core_dcache_req_if.addr[0], 2'b0} - `SHARED_MEM_BASE_ADDR) <= `SCACHE_SIZE); + wire is_smem_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `SHARED_MEM_BASE_ADDR) + && ({core_dcache_req_if.addr[0], 2'b0} < (`SHARED_MEM_BASE_ADDR + `SCACHE_SIZE)); + wire smem_req_select = (| core_dcache_req_if.valid) ? is_smem_addr : 0; wire smem_rsp_select = (| core_smem_rsp_if.valid); diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index 52ae629c..49c0541e 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -145,7 +145,7 @@ module VX_mul_unit #( VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)) - ) mul_reg ( + ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall_out), diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index dc620ab2..12fdf6b8 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -239,7 +239,7 @@ module VX_warp_sched #( VX_generic_register #( .N(1 + `NUM_THREADS + 32 + `NW_BITS) - ) fetch_reg ( + ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall_out), diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index c5228257..3d763781 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -77,7 +77,7 @@ module VX_writeback #( VX_generic_register #( .N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)) - ) wb_reg ( + ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index c24a0eef..f1d7ede9 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -869,7 +869,7 @@ module VX_bank #( $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0); end if (reqq_pop) begin - $display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, reqq_byteen_st0, debug_wid_st0, debug_pc_st0); + $display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, reqq_byteen_st0, reqq_writeword_st0, debug_wid_st0, debug_pc_st0); end if (snrq_pop) begin $display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0); diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index a7c6e201..3529eeda 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -94,7 +94,7 @@ module VX_cache_core_rsp_merge #( VX_generic_register #( .N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)), .PASSTHRU(NUM_BANKS <= 2) - ) core_wb_reg ( + ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall), diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index d0284445..ed8b84ef 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -127,7 +127,7 @@ module VX_data_access #( if (is_fill_in) begin $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), dirtyb_out, addrline, use_write_data); end else begin - $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, writeword_in); + $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, byte_enable, dirtyb_out, addrline, wordsel_in, writeword_in); end end else begin $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, qual_read_data); diff --git a/hw/rtl/fp_cores/VX_fp_noncomp.v b/hw/rtl/fp_cores/VX_fp_noncomp.v index ba11dd67..107cac5f 100644 --- a/hw/rtl/fp_cores/VX_fp_noncomp.v +++ b/hw/rtl/fp_cores/VX_fp_noncomp.v @@ -38,6 +38,8 @@ module VX_fp_noncomp #( SIG_NAN = 32'h00000100, QUT_NAN = 32'h00000200; + reg valid_in_r; + reg [TAGW-1:0] tag_in_r; reg [`FPU_BITS-1:0] op_type_r; reg [`FRM_BITS-1:0] frm_r; @@ -87,7 +89,7 @@ module VX_fp_noncomp #( VX_generic_register #( .N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1) - ) fnc1_reg ( + ) pipe_reg0 ( .clk (clk), .reset (reset), .stall (stall), @@ -98,14 +100,14 @@ module VX_fp_noncomp #( end VX_generic_register #( - .N(`FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)) - ) fnc2_reg ( + .N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)) + ) pipe_reg1 ( .clk (clk), .reset (reset), .stall (stall), .flush (1'b0), - .in ({op_type, frm, dataa, datab}), - .out ({op_type_r, frm_r, dataa_r, datab_r}) + .in ({valid_in, tag_in, op_type, frm, dataa, datab}), + .out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r}) ); // FCLASS @@ -155,7 +157,7 @@ module VX_fp_noncomp #( for (genvar i = 0; i < LANES; i++) begin always @(*) begin case (frm_r) - 0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]}; + 0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]}; 1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]}; 2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]}; default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value @@ -249,13 +251,13 @@ module VX_fp_noncomp #( VX_generic_register #( .N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)) - ) nc_reg ( + ) pipe_reg2 ( .clk (clk), .reset (reset), .stall (stall), .flush (1'b0), - .in ({valid_in, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}), - .out ({valid_out, tag_out, result, has_fflags, fflags}) + .in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}), + .out ({valid_out, tag_out, result, has_fflags, fflags}) ); assign ready_in = ~stall;