diff --git a/driver/opae/scope.cpp b/driver/opae/scope.cpp index 34874437..9f601749 100644 --- a/driver/opae/scope.cpp +++ b/driver/opae/scope.cpp @@ -37,9 +37,9 @@ static const scope_signal_t scope_signals[] = { { 32, "dram_req_addr" }, { 1, "dram_req_rw" }, { 16, "dram_req_byteen" }, - { 32, "dram_req_data" }, + { 128, "dram_req_data" }, { 29, "dram_req_tag" }, - { 32, "dram_rsp_data" }, + { 128, "dram_rsp_data" }, { 29, "dram_rsp_tag" }, { 32, "snp_req_addr" }, @@ -55,12 +55,12 @@ static const scope_signal_t scope_signals[] = { { NW_BITS, "dcache_req_warp_num" }, { 32, "dcache_req_curr_PC" }, - { 32, "dcache_req_addr" }, + { 64, "dcache_req_addr" }, { 1, "dcache_req_rw" }, - { 4, "dcache_req_byteen" }, - { 32, "dcache_req_data" }, + { 8, "dcache_req_byteen" }, + { 64, "dcache_req_data" }, { NW_BITS, "dcache_req_tag" }, - { 32, "dcache_rsp_data" }, + { 64, "dcache_rsp_data" }, { NW_BITS, "dcache_rsp_tag" }, { NW_BITS, "decode_warp_num" }, @@ -70,14 +70,26 @@ static const scope_signal_t scope_signals[] = { { 5, "decode_rs2" }, { NW_BITS, "execute_warp_num" }, + { 32, "execute_curr_PC" }, { 5, "execute_rd" }, - { 32, "execute_a" }, - { 32, "execute_b" }, + { 64, "execute_a" }, + { 64, "execute_b" }, { NW_BITS, "writeback_warp_num" }, + { 32, "writeback_curr_PC" }, { 2, "writeback_wb" }, { 5, "writeback_rd" }, - { 32, "writeback_data" }, + { 64, "writeback_data" }, + + { 32, "bank_addr_st0" }, + { 32, "bank_addr_st1" }, + { 32, "bank_addr_st2" }, + { 1, "scope_bank_is_mrvq_st1" }, + { 1, "scope_bank_miss_st1" }, + { 1, "scope_bank_dirty_st1" }, + { 1, "scope_bank_tag_valid_st1" }, + { 1, "scope_bank_tag_match_st1" }, + { 1, "scope_bank_force_miss_st1" }, /////////////////////////////////////////////////////////////////////////// @@ -103,12 +115,18 @@ static const scope_signal_t scope_signals[] = { { NUM_THREADS, "decode_valid" }, { NUM_THREADS, "execute_valid" }, - { NUM_THREADS, "writeback_valid" }, + { NUM_THREADS, "writeback_valid" }, + { 1, "schedule_delay" }, { 1, "memory_delay" }, { 1, "exec_delay" }, { 1, "gpr_stage_delay" }, { 1, "busy" }, + + { 1, "bank_valid_st0" }, + { 1, "bank_valid_st1" }, + { 1, "bank_valid_st2" }, + { 1, "bank_stall_pipe" }, }; static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 2f7282fe..ff020636 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -306,7 +306,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si auto ls_shift = (int)std::log2(line_size); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr >> ls_shift) )); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE)); @@ -349,7 +349,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, auto ls_shift = (int)std::log2(line_size); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr) >> ls_shift)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ)); diff --git a/hw/modelsim/vortex_tb.v b/hw/modelsim/vortex_tb.v index 086aeaf3..38b4acad 100644 --- a/hw/modelsim/vortex_tb.v +++ b/hw/modelsim/vortex_tb.v @@ -140,7 +140,7 @@ reg[31:0] io_data; clk = 0; end - #5 clk <= ~clk; + #5 clk <= !clk; end endmodule diff --git a/hw/opae/README b/hw/opae/README index 853e01f7..b0c1e371 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -88,4 +88,7 @@ kill -9 lsof +D build_ase_1c # quick off cache synthesis -make -C cache > cache/build.log 2>&1 & \ No newline at end of file +make -C pipeline > pipeline/build.log 2>&1 & +make -C cache > cache/build.log 2>&1 & +make -C vortex > vortex/build.log 2>&1 & +make -C pipeline > pipeline/build.log 2>&1 & \ No newline at end of file diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index a39b9691..0650c0ac 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -83,6 +83,10 @@ typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data; state_t state; +`ifdef SCOPE +`SCOPE_SIGNALS_DECL +`endif + // Vortex ports /////////////////////////////////////////////////////////////// logic vx_dram_req_valid; @@ -384,19 +388,19 @@ assign cci_dram_wr_req_enable = (state == STATE_WRITE) && (cci_dram_wr_req_ctr < csr_data_size); assign vx_dram_req_enable = vortex_enabled && (avs_pending_reads < AVS_RD_QUEUE_SIZE); -assign vx_dram_rd_req_enable = vx_dram_req_enable && vx_dram_req_valid && ~vx_dram_req_rw; +assign vx_dram_rd_req_enable = vx_dram_req_enable && vx_dram_req_valid && !vx_dram_req_rw; assign vx_dram_wr_req_enable = vx_dram_req_enable && vx_dram_req_valid && vx_dram_req_rw; -assign cci_dram_rd_req_fire = cci_dram_rd_req_enable && ~avs_waitrequest; -assign cci_dram_wr_req_fire = cci_dram_wr_req_enable && ~avs_waitrequest; +assign cci_dram_rd_req_fire = cci_dram_rd_req_enable && !avs_waitrequest; +assign cci_dram_wr_req_fire = cci_dram_wr_req_enable && !avs_waitrequest; -assign vx_dram_rd_req_fire = vx_dram_rd_req_enable && ~avs_waitrequest; -assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && ~avs_waitrequest; +assign vx_dram_rd_req_fire = vx_dram_rd_req_enable && !avs_waitrequest; +assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && !avs_waitrequest; assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready; assign avs_pending_reads_next = avs_pending_reads - + (((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && ~avs_rdq_pop) ? 1 : + + (((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 : (~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0); if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin @@ -575,10 +579,10 @@ assign cci_rdq_push = cci_rd_rsp_fire; assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)}; assign cci_pending_reads_next = cci_pending_reads - + ((cci_rd_req_fire && ~cci_rdq_pop) ? 1 : - (~cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); + + ((cci_rd_req_fire && !cci_rdq_pop) ? 1 : + (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); -assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && ~cci_rd_req_wait; +assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; // Send read requests to CCI always_ff @(posedge clk) @@ -672,12 +676,12 @@ assign cci_wr_req_fire = af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull; assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; assign cci_pending_writes_next = cci_pending_writes - + ((cci_wr_req_fire && ~cci_wr_rsp_fire) ? 1 : - (~cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); + + ((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : + (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); -assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && ~avs_rdq_empty; +assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty; // Send write requests to CCI always_ff @(posedge clk) @@ -798,87 +802,6 @@ begin end end -// SCOPE ////////////////////////////////////////////////////////////////////// - -`ifdef SCOPE - -`SCOPE_SIGNALS_DECL -localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}); -localparam SCOPE_SR_DEPTH = 2; - -`SCOPE_ASSIGN(scope_dram_req_valid, vx_dram_req_valid); -`SCOPE_ASSIGN(scope_dram_req_addr, {vx_dram_req_addr, 4'b0}); -`SCOPE_ASSIGN(scope_dram_req_rw, vx_dram_req_rw); -`SCOPE_ASSIGN(scope_dram_req_byteen,vx_dram_req_byteen); -`SCOPE_ASSIGN(scope_dram_req_data, vx_dram_req_data[31:0]); -`SCOPE_ASSIGN(scope_dram_req_tag, vx_dram_req_tag); -`SCOPE_ASSIGN(scope_dram_req_ready, vx_dram_req_ready); - -`SCOPE_ASSIGN(scope_dram_rsp_valid, vx_dram_rsp_valid); -`SCOPE_ASSIGN(scope_dram_rsp_data, vx_dram_rsp_data[31:0]); -`SCOPE_ASSIGN(scope_dram_rsp_tag, vx_dram_rsp_tag); -`SCOPE_ASSIGN(scope_dram_rsp_ready, vx_dram_rsp_ready); - -`SCOPE_ASSIGN(scope_snp_req_valid, vx_snp_req_valid); -`SCOPE_ASSIGN(scope_snp_req_addr, {vx_snp_req_addr, 4'b0}); -`SCOPE_ASSIGN(scope_snp_req_invalidate, vx_snp_req_invalidate); -`SCOPE_ASSIGN(scope_snp_req_tag, vx_snp_req_tag); -`SCOPE_ASSIGN(scope_snp_req_ready, vx_snp_req_ready); - -`SCOPE_ASSIGN(scope_snp_rsp_valid, vx_snp_rsp_valid); -`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag); -`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready); - -wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) - || (scope_icache_rsp_valid && scope_icache_rsp_ready) - || ((| scope_dcache_req_valid) && scope_dcache_req_ready) - || ((| scope_dcache_rsp_valid) && scope_dcache_rsp_ready) - || (scope_dram_req_valid && scope_dram_req_ready) - || (scope_dram_rsp_valid && scope_dram_rsp_ready) - || (scope_snp_req_valid && scope_snp_req_ready) - || (scope_snp_rsp_valid && scope_snp_rsp_ready); - -wire scope_start = vx_reset; - -wire [SCOPE_DATAW+1:0] scope_data_in_st[SCOPE_SR_DEPTH-1:0]; -wire [SCOPE_DATAW+1:0] scope_data_in; -assign scope_data_in_st[0] = {`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST, scope_changed, scope_start}; -assign scope_data_in = scope_data_in_st[SCOPE_SR_DEPTH-1]; - -genvar i; -for (i = 1; i < SCOPE_SR_DEPTH; i++) begin - VX_generic_register #( - .N (SCOPE_DATAW+2) - ) scope_sr ( - .clk (clk), - .reset (SoftReset), - .stall (0), - .flush (0), - .in (scope_data_in_st[i-1]), - .out (scope_data_in_st[i]) - ); -end - -VX_scope #( - .DATAW (SCOPE_DATAW), - .BUSW (64), - .SIZE (4096), - .UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST})) -) scope ( - .clk (clk), - .reset (SoftReset), - .start (scope_data_in[0]), - .stop (0), - .changed (scope_data_in[1]), - .data_in (scope_data_in[SCOPE_DATAW+1:2]), - .bus_in (csr_scope_cmd), - .bus_out (csr_scope_data), - .bus_read (csr_scope_read), - .bus_write(csr_scope_write) -); - -`endif - // Vortex ///////////////////////////////////////////////////////////////////// assign cmd_run_done = !vx_busy; @@ -887,7 +810,7 @@ Vortex #() vortex ( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND `SCOPE_SIGNALS_CORE_BIND - `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_CACHE_BIND `SCOPE_SIGNALS_PIPELINE_BIND `SCOPE_SIGNALS_BE_BIND @@ -941,4 +864,92 @@ Vortex #() vortex ( `UNUSED_PIN (ebreak) ); +// SCOPE ////////////////////////////////////////////////////////////////////// + +`ifdef SCOPE + +localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}); +localparam SCOPE_SR_DEPTH = 2; + +`SCOPE_ASSIGN(scope_dram_req_valid, vx_dram_req_valid); +`SCOPE_ASSIGN(scope_dram_req_addr, {vx_dram_req_addr, 4'b0}); +`SCOPE_ASSIGN(scope_dram_req_rw, vx_dram_req_rw); +`SCOPE_ASSIGN(scope_dram_req_byteen,vx_dram_req_byteen); +`SCOPE_ASSIGN(scope_dram_req_data, vx_dram_req_data); +`SCOPE_ASSIGN(scope_dram_req_tag, vx_dram_req_tag); +`SCOPE_ASSIGN(scope_dram_req_ready, vx_dram_req_ready); + +`SCOPE_ASSIGN(scope_dram_rsp_valid, vx_dram_rsp_valid); +`SCOPE_ASSIGN(scope_dram_rsp_data, vx_dram_rsp_data); +`SCOPE_ASSIGN(scope_dram_rsp_tag, vx_dram_rsp_tag); +`SCOPE_ASSIGN(scope_dram_rsp_ready, vx_dram_rsp_ready); + +`SCOPE_ASSIGN(scope_snp_req_valid, vx_snp_req_valid); +`SCOPE_ASSIGN(scope_snp_req_addr, {vx_snp_req_addr, 4'b0}); +`SCOPE_ASSIGN(scope_snp_req_invalidate, vx_snp_req_invalidate); +`SCOPE_ASSIGN(scope_snp_req_tag, vx_snp_req_tag); +`SCOPE_ASSIGN(scope_snp_req_ready, vx_snp_req_ready); + +`SCOPE_ASSIGN(scope_snp_rsp_valid, vx_snp_rsp_valid); +`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag); +`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready); + +`SCOPE_ASSIGN(scope_snp_rsp_valid, vx_snp_rsp_valid); +`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag); +`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready); + +wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) + || (scope_icache_rsp_valid && scope_icache_rsp_ready) + || ((| scope_dcache_req_valid) && scope_dcache_req_ready) + || ((| scope_dcache_rsp_valid) && scope_dcache_rsp_ready) + || (scope_dram_req_valid && scope_dram_req_ready) + || (scope_dram_rsp_valid && scope_dram_rsp_ready) + || (scope_snp_req_valid && scope_snp_req_ready) + || (scope_snp_rsp_valid && scope_snp_rsp_ready) + || scope_bank_valid_st0 + || scope_bank_valid_st1 + || scope_bank_valid_st2 + || scope_bank_stall_pipe; + +wire scope_start = vx_reset; + +wire [SCOPE_DATAW+1:0] scope_data_in_st[SCOPE_SR_DEPTH-1:0]; +wire [SCOPE_DATAW+1:0] scope_data_in_ste; +assign scope_data_in_st[0] = {`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST, scope_changed, scope_start}; +assign scope_data_in_ste = scope_data_in_st[SCOPE_SR_DEPTH-1]; + +genvar i; +for (i = 1; i < SCOPE_SR_DEPTH; i++) begin + VX_generic_register #( + .N (SCOPE_DATAW+2) + ) scope_sr ( + .clk (clk), + .reset (SoftReset), + .stall (0), + .flush (0), + .in (scope_data_in_st[i-1]), + .out (scope_data_in_st[i]) + ); +end + +VX_scope #( + .DATAW (SCOPE_DATAW), + .BUSW (64), + .SIZE (4096), + .UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST})) +) scope ( + .clk (clk), + .reset (SoftReset), + .start (scope_data_in_ste[0]), + .stop (0), + .changed (scope_data_in_ste[1]), + .data_in (scope_data_in_ste[SCOPE_DATAW+1:2]), + .bus_in (csr_scope_cmd), + .bus_out (csr_scope_data), + .bus_read (csr_scope_read), + .bus_write(csr_scope_write) +); + +`endif + endmodule \ No newline at end of file diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 51e1bb65..2581967b 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -30,11 +30,11 @@ module VX_alu_unit ( VX_divide #( .WIDTHN(32), .WIDTHD(32), - .SPEED("HIGHEST"), + .REP("UNSIGNED"), .PIPELINE(DIV_PIPELINE_LEN) ) unsigned_div ( - .clock(clk), - .aclr(1'b0), + .clk(clk), + .reset(reset), .clken(1'b1), // TODO this could be disabled on inactive instructions .numer(ALU_in1), .denom(ALU_in2), @@ -45,13 +45,11 @@ module VX_alu_unit ( VX_divide #( .WIDTHN(32), .WIDTHD(32), - .NREP("SIGNED"), - .DREP("SIGNED"), - .SPEED("HIGHEST"), + .REP("SIGNED"), .PIPELINE(DIV_PIPELINE_LEN) ) signed_div ( - .clock(clk), - .aclr(1'b0), + .clk(clk), + .reset(reset), .clken(1'b1), // TODO this could be disabled on inactive instructions .numer(ALU_in1), .denom(ALU_in2), @@ -63,12 +61,11 @@ module VX_alu_unit ( .WIDTHA(64), .WIDTHB(64), .WIDTHP(64), - .SPEED("HIGHEST"), - .FORCE_LE("YES"), + .REP("UNSIGNED"), .PIPELINE(MUL_PIPELINE_LEN) ) multiplier ( - .clock(clk), - .aclr(1'b0), + .clk(clk), + .reset(reset), .clken(1'b1), // TODO this could be disabled on inactive instructions .dataa(mul_data_a), .datab(mul_data_b), diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index f16abc46..960e691e 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -133,14 +133,16 @@ module VX_back_end #( `SCOPE_ASSIGN(scope_execute_valid, exec_unit_req_if.valid); `SCOPE_ASSIGN(scope_execute_warp_num, exec_unit_req_if.warp_num); + `SCOPE_ASSIGN(scope_execute_curr_PC, exec_unit_req_if.curr_PC); `SCOPE_ASSIGN(scope_execute_rd, exec_unit_req_if.rd); - `SCOPE_ASSIGN(scope_execute_a, exec_unit_req_if.a_reg_data[0]); - `SCOPE_ASSIGN(scope_execute_b, exec_unit_req_if.b_reg_data[0]); + `SCOPE_ASSIGN(scope_execute_a, exec_unit_req_if.a_reg_data); + `SCOPE_ASSIGN(scope_execute_b, exec_unit_req_if.b_reg_data); - `SCOPE_ASSIGN(scope_writeback_valid, writeback_if.valid); - `SCOPE_ASSIGN(scope_writeback_wb, writeback_if.wb); + `SCOPE_ASSIGN(scope_writeback_valid, writeback_if.valid); `SCOPE_ASSIGN(scope_writeback_warp_num, writeback_if.warp_num); + `SCOPE_ASSIGN(scope_writeback_curr_PC, writeback_if.curr_PC); + `SCOPE_ASSIGN(scope_writeback_wb, writeback_if.wb); `SCOPE_ASSIGN(scope_writeback_rd, writeback_if.rd); - `SCOPE_ASSIGN(scope_writeback_data, writeback_if.data[0]); + `SCOPE_ASSIGN(scope_writeback_data, writeback_if.data); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index cb7ff4db..d9539660 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -6,7 +6,7 @@ module VX_cluster #( `SCOPE_SIGNALS_ISTAGE_IO `SCOPE_SIGNALS_LSU_IO `SCOPE_SIGNALS_CORE_IO - `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_CACHE_IO `SCOPE_SIGNALS_PIPELINE_IO `SCOPE_SIGNALS_BE_IO @@ -120,7 +120,7 @@ module VX_cluster #( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND `SCOPE_SIGNALS_CORE_BIND - `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_CACHE_BIND `SCOPE_SIGNALS_PIPELINE_BIND `SCOPE_SIGNALS_BE_BIND @@ -319,7 +319,9 @@ module VX_cluster #( .NUM_SNP_REQUESTS (`NUM_CORES), .SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH) - ) gpu_l2cache ( + ) l2cache ( + `SCOPE_SIGNALS_CACHE_UNBIND + .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 2eb2ff5e..0478a981 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -36,7 +36,7 @@ `endif `ifndef SHARED_MEM_BASE_ADDR -`define SHARED_MEM_BASE_ADDR 32'hFE000000 +`define SHARED_MEM_BASE_ADDR 32'h6FFFF000 `endif `ifndef STACK_BASE_ADDR diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index 7ce25bbd..33ae9635 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -6,7 +6,7 @@ module VX_core #( `SCOPE_SIGNALS_ISTAGE_IO `SCOPE_SIGNALS_LSU_IO `SCOPE_SIGNALS_CORE_IO - `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_CACHE_IO `SCOPE_SIGNALS_PIPELINE_IO `SCOPE_SIGNALS_BE_IO @@ -237,7 +237,7 @@ module VX_core #( VX_mem_unit #( .CORE_ID(CORE_ID) ) mem_unit ( - `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_CACHE_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_dcache_arb.v b/hw/rtl/VX_dcache_arb.v index d7057fc0..264c91e1 100644 --- a/hw/rtl/VX_dcache_arb.v +++ b/hw/rtl/VX_dcache_arb.v @@ -43,6 +43,6 @@ module VX_dcache_arb ( assign out_core_rsp_if.core_rsp_data = rsp_select0 ? in0_core_rsp_if.core_rsp_data : in1_core_rsp_if.core_rsp_data; assign out_core_rsp_if.core_rsp_tag = rsp_select0 ? in0_core_rsp_if.core_rsp_tag : in1_core_rsp_if.core_rsp_tag; assign in0_core_rsp_if.core_rsp_ready = out_core_rsp_if.core_rsp_ready && rsp_select0; - assign in1_core_rsp_if.core_rsp_ready = out_core_rsp_if.core_rsp_ready && ~rsp_select0; + assign in1_core_rsp_if.core_rsp_ready = out_core_rsp_if.core_rsp_ready && !rsp_select0; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 91f04ca3..3985fe05 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -56,14 +56,14 @@ module VX_icache_stage #( end // Icache Request - assign icache_req_if.core_req_valid = valid_inst && ~mrq_full; + assign icache_req_if.core_req_valid = valid_inst && !mrq_full; assign icache_req_if.core_req_rw = 0; assign icache_req_if.core_req_byteen = 4'b1111; assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc[31:2]; assign icache_req_if.core_req_data = 0; // Can't accept new request - assign icache_stage_delay = mrq_full || ~icache_req_if.core_req_ready; + assign icache_stage_delay = mrq_full || !icache_req_if.core_req_ready; `ifdef DBG_CORE_REQ_INFO assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num, mrq_write_addr}; @@ -78,7 +78,7 @@ module VX_icache_stage #( assign icache_stage_wid = fe_inst_meta_id.warp_num; // Can't accept new response - assign icache_rsp_if.core_rsp_ready = ~total_freeze; + assign icache_rsp_if.core_rsp_ready = !total_freeze; `SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.core_req_valid); `SCOPE_ASSIGN(scope_icache_req_warp_num, fe_inst_meta_fi.warp_num); diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 9800b5f8..05a4c952 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -132,7 +132,7 @@ module VX_lsu_unit #( `endif // Can't accept new request - assign delay = mrq_full || ~dcache_req_if.core_req_ready; + assign delay = mrq_full || !dcache_req_if.core_req_ready; // Core Response @@ -156,20 +156,20 @@ module VX_lsu_unit #( assign mem_wb_if.data = core_rsp_data; // Can't accept new response - assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem; + assign dcache_rsp_if.core_rsp_ready = !no_slot_mem; `SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.core_req_valid); `SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num); `SCOPE_ASSIGN(scope_dcache_req_curr_PC, use_pc); - `SCOPE_ASSIGN(scope_dcache_req_addr, {dcache_req_if.core_req_addr[0], 2'b0}); + `SCOPE_ASSIGN(scope_dcache_req_addr, use_address); `SCOPE_ASSIGN(scope_dcache_req_rw, core_req_rw); - `SCOPE_ASSIGN(scope_dcache_req_byteen,dcache_req_if.core_req_byteen[0]); - `SCOPE_ASSIGN(scope_dcache_req_data, dcache_req_if.core_req_data[0]); + `SCOPE_ASSIGN(scope_dcache_req_byteen,dcache_req_if.core_req_byteen); + `SCOPE_ASSIGN(scope_dcache_req_data, dcache_req_if.core_req_data); `SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_if.core_req_tag); `SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.core_req_ready); `SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_if.core_rsp_valid); - `SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.core_rsp_data[0]); + `SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.core_rsp_data); `SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_if.core_rsp_tag); `SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.core_rsp_ready); diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 8c06438f..6b3c3888 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -3,7 +3,7 @@ module VX_mem_unit # ( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_CACHE_IO input wire clk, input wire reset, @@ -78,6 +78,8 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) ) smem ( + `SCOPE_SIGNALS_CACHE_UNBIND + .clk (clk), .reset (reset), @@ -161,6 +163,8 @@ module VX_mem_unit # ( .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), .SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH) ) dcache ( + `SCOPE_SIGNALS_CACHE_BIND + .clk (clk), .reset (reset), @@ -243,7 +247,7 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) ) icache ( - `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_CACHE_UNBIND .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index d3d97a35..fb31cbd9 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -55,7 +55,7 @@ module VX_scheduler ( wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid; - reg [CTVW-1:0] count_valid_next = (acquire_rd && ~(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) : + reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) : (~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) : count_valid; diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index d2c9dcd0..8a75da9c 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -35,13 +35,24 @@ scope_decode_rs1, \ scope_decode_rs2, \ scope_execute_warp_num, \ + scope_execute_curr_PC, \ scope_execute_rd, \ scope_execute_a, \ scope_execute_b, \ scope_writeback_warp_num, \ + scope_writeback_curr_PC, \ scope_writeback_wb, \ scope_writeback_rd, \ - scope_writeback_data, + scope_writeback_data, \ + scope_bank_addr_st0, \ + scope_bank_addr_st1, \ + scope_bank_addr_st2, \ + scope_bank_is_mrvq_st1, \ + scope_bank_miss_st1, \ + scope_bank_dirty_st1, \ + scope_bank_tag_valid_st1, \ + scope_bank_tag_match_st1, \ + scope_bank_force_miss_st1, `define SCOPE_SIGNALS_UPD_LIST \ @@ -68,18 +79,22 @@ scope_memory_delay, \ scope_exec_delay, \ scope_gpr_stage_delay, \ - scope_busy + scope_busy, \ + scope_bank_valid_st0, \ + scope_bank_valid_st1, \ + scope_bank_valid_st2, \ + scope_bank_stall_pipe `define SCOPE_SIGNALS_DECL \ wire scope_dram_req_valid; \ wire [31:0] scope_dram_req_addr; \ wire scope_dram_req_rw; \ wire [15:0] scope_dram_req_byteen; \ - wire [31:0] scope_dram_req_data; \ + wire [127:0] scope_dram_req_data; \ wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \ wire scope_dram_req_ready; \ wire scope_dram_rsp_valid; \ - wire [31:0] scope_dram_rsp_data; \ + wire [127:0] scope_dram_rsp_data; \ wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \ wire scope_dram_rsp_ready; \ wire scope_snp_req_valid; \ @@ -101,14 +116,14 @@ wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \ wire [`NW_BITS-1:0] scope_dcache_req_warp_num; \ wire [31:0] scope_dcache_req_curr_PC; \ - wire [31:0] scope_dcache_req_addr; \ + wire [63:0] scope_dcache_req_addr; \ wire scope_dcache_req_rw; \ - wire [3:0] scope_dcache_req_byteen; \ - wire [31:0] scope_dcache_req_data; \ + wire [7:0] scope_dcache_req_byteen; \ + wire [63:0] scope_dcache_req_data; \ wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \ wire scope_dcache_req_ready; \ wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \ - wire [31:0] scope_dcache_rsp_data; \ + wire [63:0] scope_dcache_rsp_data; \ wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \ wire scope_dcache_rsp_ready; \ wire scope_busy; \ @@ -125,14 +140,29 @@ wire [4:0] scope_decode_rs2; \ wire [`NUM_THREADS-1:0] scope_execute_valid; \ wire [`NW_BITS-1:0] scope_execute_warp_num; \ + wire [31:0] scope_execute_curr_PC; \ wire [4:0] scope_execute_rd; \ - wire [31:0] scope_execute_a; \ - wire [31:0] scope_execute_b; \ + wire [63:0] scope_execute_a; \ + wire [63:0] scope_execute_b; \ wire [`NUM_THREADS-1:0] scope_writeback_valid; \ wire [`NW_BITS-1:0] scope_writeback_warp_num; \ + wire [31:0] scope_writeback_curr_PC; \ wire [1:0] scope_writeback_wb; \ wire [4:0] scope_writeback_rd; \ - wire [31:0] scope_writeback_data; + wire [63:0] scope_writeback_data; \ + wire scope_bank_valid_st0; \ + wire scope_bank_valid_st1; \ + wire scope_bank_valid_st2; \ + wire [31:0] scope_bank_addr_st0; \ + wire [31:0] scope_bank_addr_st1; \ + wire [31:0] scope_bank_addr_st2; \ + wire scope_bank_is_mrvq_st1; \ + wire scope_bank_miss_st1; \ + wire scope_bank_dirty_st1; \ + wire scope_bank_tag_valid_st1; \ + wire scope_bank_tag_match_st1; \ + wire scope_bank_force_miss_st1; \ + wire scope_bank_stall_pipe; `define SCOPE_SIGNALS_ISTAGE_IO \ output wire scope_icache_req_valid, \ @@ -149,20 +179,33 @@ output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \ output wire [`NW_BITS-1:0] scope_dcache_req_warp_num, \ output wire [31:0] scope_dcache_req_curr_PC, \ - output wire [31:0] scope_dcache_req_addr, \ + output wire [63:0] scope_dcache_req_addr, \ output wire scope_dcache_req_rw, \ - output wire [3:0] scope_dcache_req_byteen, \ - output wire [31:0] scope_dcache_req_data, \ + output wire [7:0] scope_dcache_req_byteen, \ + output wire [63:0] scope_dcache_req_data, \ output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \ output wire scope_dcache_req_ready, \ output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \ - output wire [31:0] scope_dcache_rsp_data, \ + output wire [63:0] scope_dcache_rsp_data, \ output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \ output wire scope_dcache_rsp_ready, `define SCOPE_SIGNALS_CORE_IO \ - `define SCOPE_SIGNALS_ICACHE_IO \ + `define SCOPE_SIGNALS_CACHE_IO \ + output wire scope_bank_valid_st0, \ + output wire scope_bank_valid_st1, \ + output wire scope_bank_valid_st2, \ + output wire [31:0] scope_bank_addr_st0, \ + output wire [31:0] scope_bank_addr_st1, \ + output wire [31:0] scope_bank_addr_st2, \ + output wire scope_bank_is_mrvq_st1, \ + output wire scope_bank_miss_st1, \ + output wire scope_bank_dirty_st1, \ + output wire scope_bank_tag_valid_st1, \ + output wire scope_bank_tag_match_st1, \ + output wire scope_bank_force_miss_st1, \ + output wire scope_bank_stall_pipe, `define SCOPE_SIGNALS_PIPELINE_IO \ output wire scope_busy, \ @@ -172,22 +215,24 @@ output wire scope_gpr_stage_delay, `define SCOPE_SIGNALS_BE_IO \ - output wire [`NUM_THREADS-1:0] scope_decode_valid, \ - output wire [`NW_BITS-1:0] scope_decode_warp_num, \ + output wire [`NUM_THREADS-1:0] scope_decode_valid, \ + output wire [`NW_BITS-1:0] scope_decode_warp_num, \ output wire [31:0] scope_decode_curr_PC, \ output wire scope_decode_is_jal, \ output wire [4:0] scope_decode_rs1, \ output wire [4:0] scope_decode_rs2, \ output wire [`NUM_THREADS-1:0] scope_execute_valid, \ output wire [`NW_BITS-1:0] scope_execute_warp_num, \ + output wire [31:0] scope_execute_curr_PC, \ output wire [4:0] scope_execute_rd, \ - output wire [31:0] scope_execute_a, \ - output wire [31:0] scope_execute_b, \ + output wire [63:0] scope_execute_a, \ + output wire [63:0] scope_execute_b, \ output wire [`NUM_THREADS-1:0] scope_writeback_valid, \ output wire [`NW_BITS-1:0] scope_writeback_warp_num, \ + output wire [31:0] scope_writeback_curr_PC, \ output wire [1:0] scope_writeback_wb, \ output wire [4:0] scope_writeback_rd, \ - output wire [31:0] scope_writeback_data, + output wire [63:0] scope_writeback_data, `define SCOPE_SIGNALS_ISTAGE_BIND \ .scope_icache_req_valid (scope_icache_req_valid), \ @@ -217,8 +262,83 @@ `define SCOPE_SIGNALS_CORE_BIND \ - `define SCOPE_SIGNALS_ICACHE_BIND \ + `define SCOPE_SIGNALS_CACHE_BIND \ + .scope_bank_valid_st0 (scope_bank_valid_st0), \ + .scope_bank_valid_st1 (scope_bank_valid_st1), \ + .scope_bank_valid_st2 (scope_bank_valid_st2), \ + .scope_bank_addr_st0 (scope_bank_addr_st0), \ + .scope_bank_addr_st1 (scope_bank_addr_st1), \ + .scope_bank_addr_st2 (scope_bank_addr_st2), \ + .scope_bank_is_mrvq_st1 (scope_bank_is_mrvq_st1), \ + .scope_bank_miss_st1 (scope_bank_miss_st1), \ + .scope_bank_dirty_st1 (scope_bank_dirty_st1), \ + .scope_bank_tag_valid_st1 (scope_bank_tag_valid_st1), \ + .scope_bank_tag_match_st1 (scope_bank_tag_match_st1), \ + .scope_bank_force_miss_st1 (scope_bank_force_miss_st1), \ + .scope_bank_stall_pipe (scope_bank_stall_pipe), + `define SCOPE_SIGNALS_CACHE_UNBIND \ + /* verilator lint_off PINCONNECTEMPTY */ \ + .scope_bank_valid_st0 (), \ + .scope_bank_valid_st1 (), \ + .scope_bank_valid_st2 (), \ + .scope_bank_addr_st0 (), \ + .scope_bank_addr_st1 (), \ + .scope_bank_addr_st2 (), \ + .scope_bank_is_mrvq_st1 (), \ + .scope_bank_miss_st1 (), \ + .scope_bank_dirty_st1 (), \ + .scope_bank_tag_valid_st1 (), \ + .scope_bank_tag_match_st1 (), \ + .scope_bank_force_miss_st1 (), \ + .scope_bank_stall_pipe (), \ + /* verilator lint_on PINCONNECTEMPTY */ + + `define SCOPE_SIGNALS_CACHE_BANK_SELECT \ + /* verilator lint_off UNUSED */ \ + wire [NUM_BANKS-1:0] scope_per_bank_valid_st0; \ + wire [NUM_BANKS-1:0] scope_per_bank_valid_st1; \ + wire [NUM_BANKS-1:0] scope_per_bank_valid_st2; \ + wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st0; \ + wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st1; \ + wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st2; \ + wire [NUM_BANKS-1:0] scope_per_bank_is_mrvq_st1; \ + wire [NUM_BANKS-1:0] scope_per_bank_miss_st1; \ + wire [NUM_BANKS-1:0] scope_per_bank_dirty_st1; \ + wire [NUM_BANKS-1:0] scope_per_bank_tag_valid_st1; \ + wire [NUM_BANKS-1:0] scope_per_bank_tag_match_st1; \ + wire [NUM_BANKS-1:0] scope_per_bank_force_miss_st1; \ + wire [NUM_BANKS-1:0] scope_per_bank_stall_pipe; \ + /* verilator lint_on UNUSED */ \ + assign scope_bank_valid_st0 = scope_per_bank_valid_st0[0]; \ + assign scope_bank_valid_st1 = scope_per_bank_valid_st1[0]; \ + assign scope_bank_valid_st2 = scope_per_bank_valid_st2[0]; \ + assign scope_bank_addr_st0 = scope_per_bank_addr_st0[0]; \ + assign scope_bank_addr_st1 = scope_per_bank_addr_st1[0]; \ + assign scope_bank_addr_st2 = scope_per_bank_addr_st2[0]; \ + assign scope_bank_is_mrvq_st1 = scope_per_bank_is_mrvq_st1[0]; \ + assign scope_bank_miss_st1 = scope_per_bank_miss_st1[0]; \ + assign scope_bank_dirty_st1 = scope_per_bank_dirty_st1[0]; \ + assign scope_bank_tag_valid_st1 = scope_per_bank_tag_valid_st1[0]; \ + assign scope_bank_tag_match_st1 = scope_per_bank_tag_match_st1[0]; \ + assign scope_bank_force_miss_st1 = scope_per_bank_force_miss_st1[0]; \ + assign scope_bank_stall_pipe = scope_per_bank_stall_pipe[0]; + + `define SCOPE_SIGNALS_CACHE_BANK_BIND \ + .scope_bank_valid_st0 (scope_per_bank_valid_st0[i]), \ + .scope_bank_valid_st1 (scope_per_bank_valid_st1[i]), \ + .scope_bank_valid_st2 (scope_per_bank_valid_st2[i]), \ + .scope_bank_addr_st0 (scope_per_bank_addr_st0[i]), \ + .scope_bank_addr_st1 (scope_per_bank_addr_st1[i]), \ + .scope_bank_addr_st2 (scope_per_bank_addr_st2[i]), \ + .scope_bank_is_mrvq_st1 (scope_per_bank_is_mrvq_st1[i]), \ + .scope_bank_miss_st1 (scope_per_bank_miss_st1[i]), \ + .scope_bank_dirty_st1 (scope_per_bank_dirty_st1[i]), \ + .scope_bank_tag_valid_st1 (scope_per_bank_tag_valid_st1[i]), \ + .scope_bank_tag_match_st1 (scope_per_bank_tag_match_st1[i]), \ + .scope_bank_force_miss_st1 (scope_per_bank_force_miss_st1[i]), \ + .scope_bank_stall_pipe (scope_per_bank_stall_pipe[i]), + `define SCOPE_SIGNALS_PIPELINE_BIND \ .scope_busy (scope_busy), \ .scope_schedule_delay (scope_schedule_delay), \ @@ -235,11 +355,13 @@ .scope_decode_rs2 (scope_decode_rs2), \ .scope_execute_valid (scope_execute_valid), \ .scope_execute_warp_num (scope_execute_warp_num), \ + .scope_execute_curr_PC (scope_execute_curr_PC), \ .scope_execute_rd (scope_execute_rd), \ .scope_execute_a (scope_execute_a), \ .scope_execute_b (scope_execute_b), \ .scope_writeback_valid (scope_writeback_valid), \ .scope_writeback_warp_num (scope_writeback_warp_num), \ + .scope_writeback_curr_PC(scope_writeback_curr_PC), \ .scope_writeback_wb (scope_writeback_wb), \ .scope_writeback_rd (scope_writeback_rd), \ .scope_writeback_data (scope_writeback_data), @@ -249,17 +371,21 @@ `define SCOPE_SIGNALS_ISTAGE_IO `define SCOPE_SIGNALS_LSU_IO `define SCOPE_SIGNALS_CORE_IO - `define SCOPE_SIGNALS_ICACHE_IO + `define SCOPE_SIGNALS_CACHE_IO `define SCOPE_SIGNALS_PIPELINE_IO `define SCOPE_SIGNALS_BE_IO `define SCOPE_SIGNALS_ISTAGE_BIND `define SCOPE_SIGNALS_LSU_BIND `define SCOPE_SIGNALS_CORE_BIND - `define SCOPE_SIGNALS_ICACHE_BIND + `define SCOPE_SIGNALS_CACHE_BIND `define SCOPE_SIGNALS_PIPELINE_BIND `define SCOPE_SIGNALS_BE_BIND + `define SCOPE_SIGNALS_CACHE_UNBIND + `define SCOPE_SIGNALS_CACHE_BANK_SELECT + `define SCOPE_SIGNALS_CACHE_BANK_BIND + `define SCOPE_ASSIGN(d,s) `endif diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index a4a961b3..09e381a0 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -4,7 +4,7 @@ module Vortex ( `SCOPE_SIGNALS_ISTAGE_IO `SCOPE_SIGNALS_LSU_IO `SCOPE_SIGNALS_CORE_IO - `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_CACHE_IO `SCOPE_SIGNALS_PIPELINE_IO `SCOPE_SIGNALS_BE_IO @@ -66,7 +66,7 @@ module Vortex ( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND `SCOPE_SIGNALS_CORE_BIND - `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_CACHE_BIND `SCOPE_SIGNALS_PIPELINE_BIND `SCOPE_SIGNALS_BE_BIND @@ -162,7 +162,7 @@ module Vortex ( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND `SCOPE_SIGNALS_CORE_BIND - `SCOPE_SIGNALS_ICACHE_BIND + `SCOPE_SIGNALS_CACHE_BIND `SCOPE_SIGNALS_PIPELINE_BIND `SCOPE_SIGNALS_BE_BIND @@ -332,7 +332,9 @@ module Vortex ( .NUM_SNP_REQUESTS (`NUM_CLUSTERS), .SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH) - ) gpu_l3cache ( + ) l3cache ( + `SCOPE_SIGNALS_CACHE_UNBIND + .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 30179004..180c863a 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -52,6 +52,8 @@ module VX_bank #( // Snooping request tag width parameter SNP_REQ_TAG_WIDTH = 0 ) ( + `SCOPE_SIGNALS_CACHE_IO + input wire clk, input wire reset, @@ -153,7 +155,7 @@ module VX_bank #( `UNUSED_PIN (size) ); - assign snp_req_ready = ~snrq_full; + assign snp_req_ready = !snrq_full; wire dfpq_pop; wire dfpq_empty; @@ -223,7 +225,7 @@ module VX_bank #( .reqq_full (reqq_full) ); - assign core_req_ready = ~reqq_full; + assign core_req_ready = !reqq_full; assign reqq_push = (| core_req_valid) && core_req_ready; wire mrvq_pop; @@ -291,7 +293,7 @@ module VX_bank #( wire qual_valid_st0; wire [`LINE_ADDR_WIDTH-1:0] qual_addr_st0; wire [`UP(`WORD_SELECT_WIDTH)-1:0] qual_wsel_st0; - wire qual_from_mrvq_st0; + wire qual_is_mrvq_st0; wire [`WORD_WIDTH-1:0] qual_writeword_st0; wire [`BANK_LINE_WIDTH-1:0] qual_writedata_st0; @@ -308,7 +310,7 @@ module VX_bank #( wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0]; wire is_snp_st1 [STAGE_1_CYCLES-1:0]; wire snp_invalidate_st1 [STAGE_1_CYCLES-1:0]; - wire from_mrvq_st1 [STAGE_1_CYCLES-1:0]; + wire is_mrvq_st1 [STAGE_1_CYCLES-1:0]; assign qual_is_fill_st0 = dfpq_pop_unqual; @@ -352,7 +354,7 @@ module VX_bank #( reqq_pop_unqual ? reqq_req_writeword_st0 : 0; - assign qual_from_mrvq_st0 = mrvq_pop_unqual; + assign qual_is_mrvq_st0 = mrvq_pop_unqual; `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin @@ -367,8 +369,8 @@ module VX_bank #( .reset (reset), .stall (stall_bank_pipe), .flush (1'b0), - .in ({qual_from_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), - .out ({from_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]}) + .in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), + .out ({is_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]}) ); genvar i; @@ -380,8 +382,8 @@ module VX_bank #( .reset (reset), .stall (stall_bank_pipe), .flush (1'b0), - .in ({from_mrvq_st1[i-1], is_snp_st1[i-1], snp_invalidate_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}), - .out ({from_mrvq_st1[i] , is_snp_st1[i], snp_invalidate_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]}) + .in ({is_mrvq_st1[i-1], is_snp_st1[i-1], snp_invalidate_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}), + .out ({is_mrvq_st1[i] , is_snp_st1[i], snp_invalidate_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]}) ); end @@ -404,26 +406,31 @@ module VX_bank #( wire mrvq_init_ready_state_st1e; wire miss_add_because_miss; wire valid_st1e; - wire from_mrvq_st1e; + wire is_mrvq_st1e; wire mrvq_recover_ready_state_st1e; + wire[`LINE_ADDR_WIDTH-1:0] addr_st1e; - assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1]; + wire tag_valid_st1e; + wire tag_match_st1e; + + assign is_mrvq_st1e = is_mrvq_st1[STAGE_1_CYCLES-1]; assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1]; assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1]; assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1]; + assign addr_st1e = addr_st1[STAGE_1_CYCLES-1]; assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; assign st2_pending_hazard_st1e = (miss_add_because_miss) - && ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2); + && ((addr_st2 == addr_st1e) && !is_fill_st2); - assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) - || (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2); + assign force_request_miss_st1e = (valid_st1e && !is_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) + || (valid_st1e && is_mrvq_st1e && recover_mrvq_state_st2); assign mrvq_recover_ready_state_st1e = valid_st1e - && from_mrvq_st1e + && is_mrvq_st1e && recover_mrvq_state_st2 - && (addr_st2 == addr_st1[STAGE_1_CYCLES-1]); + && (addr_st2 == addr_st1e); VX_tag_data_access #( .CACHE_SIZE (CACHE_SIZE), @@ -447,7 +454,7 @@ module VX_bank #( // Actual Read/Write .valid_req_st1e (valid_st1e), .writefill_st1e (is_fill_st1[STAGE_1_CYCLES-1]), - .writeaddr_st1e (addr_st1[STAGE_1_CYCLES-1]), + .writeaddr_st1e (addr_st1e), .wordsel_st1e (wsel_st1[STAGE_1_CYCLES-1]), .writeword_st1e (writeword_st1[STAGE_1_CYCLES-1]), .writedata_st1e (writedata_st1[STAGE_1_CYCLES-1]), @@ -467,7 +474,10 @@ module VX_bank #( .dirtyb_st1e (dirtyb_st1e), .fill_saw_dirty_st1e (fill_saw_dirty_st1e), .snp_to_mrvq_st1e (snp_to_mrvq_st1e), - .mrvq_init_ready_state_st1e(mrvq_init_ready_state_st1e) + .mrvq_init_ready_state_st1e(mrvq_init_ready_state_st1e), + + .tag_valid_st1e (tag_valid_st1e), + .tag_match_st1e (tag_match_st1e) ); `ifdef DBG_CORE_REQ_INFO @@ -476,8 +486,8 @@ module VX_bank #( end `endif - wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1]; - wire from_mrvq_st1e_st2 = from_mrvq_st1e; + wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1]; + wire is_mrvq_st1e_st2 = is_mrvq_st1e; wire valid_st2; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2; @@ -493,7 +503,7 @@ module VX_bank #( wire is_snp_st2; wire snp_invalidate_st2; wire snp_to_mrvq_st2; - wire from_mrvq_st2; + wire is_mrvq_st2; wire mrvq_init_ready_state_st2; wire mrvq_recover_ready_state_st2; wire mrvq_init_ready_state_unqual_st2; @@ -507,8 +517,8 @@ module VX_bank #( .reset (reset), .stall (stall_bank_pipe), .flush (1'b0), - .in ({mrvq_recover_ready_state_st1e, from_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), - .out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 }) + .in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), + .out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 }) ); `ifdef DBG_CORE_REQ_INFO @@ -530,7 +540,7 @@ module VX_bank #( || dwbq_push_stall || dram_fill_req_stall); - assign recover_mrvq_state_st2 = miss_add && from_mrvq_st2; + assign recover_mrvq_state_st2 = miss_add && is_mrvq_st2; wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2; wire [`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel = wsel_st2; @@ -539,10 +549,10 @@ module VX_bank #( wire miss_add_is_snp = is_snp_st2; wire miss_add_snp_invalidate = snp_invalidate_st2; - wire miss_add_from_mrvq = valid_st2 && from_mrvq_st2 && !stall_bank_pipe; + wire miss_add_is_mrvq = valid_st2 && is_mrvq_st2 && !stall_bank_pipe; - assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 ); - assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]); + assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0); + assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1e); assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 || mrvq_init_ready_state_hazard_st0_st1 @@ -564,7 +574,7 @@ module VX_bank #( // Enqueue .miss_add (miss_add), - .from_mrvq (miss_add_from_mrvq), + .is_mrvq (miss_add_is_mrvq), .miss_add_addr (miss_add_addr), .miss_add_wsel (miss_add_wsel), .miss_add_data (miss_add_data), @@ -580,7 +590,7 @@ module VX_bank #( // Broadcast .is_fill_st1 (is_fill_st1[STAGE_1_CYCLES-1]), - .fill_addr_st1 (addr_st1[STAGE_1_CYCLES-1]), + .fill_addr_st1 (addr_st1e), .pending_hazard (mrvq_pending_hazard_st1e), // Dequeue @@ -641,7 +651,7 @@ module VX_bank #( wire dram_fill_req_unqual = miss_add_unqual && (!mrvq_init_ready_state_st2 - || (from_mrvq_st2 && !mrvq_recover_ready_state_st2)); + || (is_mrvq_st2 && !mrvq_recover_ready_state_st2)); assign dram_fill_req_valid = dram_fill_req_unqual && !(dwbq_push_stall @@ -649,7 +659,7 @@ module VX_bank #( || cwbq_push_stall); assign dram_fill_req_addr = addr_st2; - assign dram_fill_req_stall = dram_fill_req_unqual && ~dram_fill_req_ready; + assign dram_fill_req_stall = dram_fill_req_unqual && !dram_fill_req_ready; // Enqueue DRAM writeback request @@ -706,11 +716,11 @@ module VX_bank #( end // when both dwb and snp are asserted, first release the cwb, then release the snp. - assign dram_wb_req_valid = ~dwbq_empty && dwbq_is_dwb_out && (~dwbq_is_snp_out || dwbq_dual_valid_sel == 0); - assign snp_rsp_valid = ~dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dwb_out || dwbq_dual_valid_sel == 1); + assign dram_wb_req_valid = !dwbq_empty && dwbq_is_dwb_out && (~dwbq_is_snp_out || dwbq_dual_valid_sel == 0); + assign snp_rsp_valid = !dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dwb_out || dwbq_dual_valid_sel == 1); - assign dwbq_pop = (dwbq_is_dwb_out && ~dwbq_is_snp_out && dram_wb_req_fire) - || (dwbq_is_snp_out && ~dwbq_is_dwb_out && snp_rsp_fire) + assign dwbq_pop = (dwbq_is_dwb_out && !dwbq_is_snp_out && dram_wb_req_fire) + || (dwbq_is_snp_out && !dwbq_is_dwb_out && snp_rsp_fire) || (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire); // bank pipeline stall @@ -745,4 +755,20 @@ module VX_bank #( end `endif +`SCOPE_ASSIGN(scope_bank_valid_st0, qual_valid_st0); +`SCOPE_ASSIGN(scope_bank_valid_st1, valid_st1e); +`SCOPE_ASSIGN(scope_bank_valid_st2, valid_st2); + +`SCOPE_ASSIGN(scope_bank_is_mrvq_st1, is_mrvq_st1e); +`SCOPE_ASSIGN(scope_bank_miss_st1, miss_st1e); +`SCOPE_ASSIGN(scope_bank_dirty_st1, dirty_st1e); +`SCOPE_ASSIGN(scope_bank_tag_valid_st1, tag_valid_st1e); +`SCOPE_ASSIGN(scope_bank_tag_match_st1, tag_match_st1e); +`SCOPE_ASSIGN(scope_bank_force_miss_st1, force_request_miss_st1e); +`SCOPE_ASSIGN(scope_bank_stall_pipe, stall_bank_pipe); + +`SCOPE_ASSIGN(scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID)); +`SCOPE_ASSIGN(scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1e, BANK_ID)); +`SCOPE_ASSIGN(scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); + endmodule : VX_bank \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index b3b56741..99055572 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -65,7 +65,7 @@ module VX_cache #( // Snooping forward tag width parameter SNP_FWD_TAG_WIDTH = 1 ) ( - `SCOPE_SIGNALS_ICACHE_IO + `SCOPE_SIGNALS_CACHE_IO input wire clk, input wire reset, @@ -167,6 +167,8 @@ module VX_cache #( wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag; wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready; + `SCOPE_SIGNALS_CACHE_BANK_SELECT + wire snp_req_valid_qual; wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual; wire snp_req_invalidate_qual; @@ -352,28 +354,30 @@ module VX_cache #( assign curr_bank_snp_rsp_ready = per_bank_snp_rsp_ready[i]; VX_bank #( - .BANK_ID (i), - .CACHE_ID (CACHE_ID), - .CACHE_SIZE (CACHE_SIZE), - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS), - .STAGE_1_CYCLES (STAGE_1_CYCLES), - .CREQ_SIZE (CREQ_SIZE), - .MRVQ_SIZE (MRVQ_SIZE), - .DFPQ_SIZE (DFPQ_SIZE), - .SNRQ_SIZE (SNRQ_SIZE), - .CWBQ_SIZE (CWBQ_SIZE), - .DWBQ_SIZE (DWBQ_SIZE), - .DFQQ_SIZE (DFQQ_SIZE), - .DRAM_ENABLE (DRAM_ENABLE), - .WRITE_ENABLE (WRITE_ENABLE), - .SNOOP_FORWARDING (SNOOP_FORWARDING), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), - .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) + .BANK_ID (i), + .CACHE_ID (CACHE_ID), + .CACHE_SIZE (CACHE_SIZE), + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQUESTS (NUM_REQUESTS), + .STAGE_1_CYCLES (STAGE_1_CYCLES), + .CREQ_SIZE (CREQ_SIZE), + .MRVQ_SIZE (MRVQ_SIZE), + .DFPQ_SIZE (DFPQ_SIZE), + .SNRQ_SIZE (SNRQ_SIZE), + .CWBQ_SIZE (CWBQ_SIZE), + .DWBQ_SIZE (DWBQ_SIZE), + .DFQQ_SIZE (DFQQ_SIZE), + .DRAM_ENABLE (DRAM_ENABLE), + .WRITE_ENABLE (WRITE_ENABLE), + .SNOOP_FORWARDING (SNOOP_FORWARDING), + .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), + .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) ) bank ( + `SCOPE_SIGNALS_CACHE_BANK_BIND + .clk (clk), .reset (reset), // Core request @@ -452,11 +456,11 @@ module VX_cache #( ); VX_cache_core_rsp_merge #( - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQUESTS (NUM_REQUESTS), + .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) ) cache_core_rsp_merge ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v index a13e367f..8c38452a 100644 --- a/hw/rtl/cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/cache/VX_cache_dram_req_arb.v @@ -56,7 +56,7 @@ module VX_cache_dram_req_arb #( .clk (clk), .reset (reset), - .dram_req (dram_req_valid && ~dram_req_rw), + .dram_req (dram_req_valid && !dram_req_rw), .dram_req_addr(dram_req_addr), .pref_pop (pref_pop), @@ -91,7 +91,7 @@ module VX_cache_dram_req_arb #( .dfqq_full (dfqq_full) ); - assign dram_fill_req_ready = ~dfqq_full; + assign dram_fill_req_ready = !dfqq_full; wire [`BANK_BITS-1:0] dwb_bank; diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 25fc234b..d4e6d5ee 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -23,7 +23,7 @@ module VX_cache_miss_resrv #( // Miss enqueue input wire miss_add, - input wire from_mrvq, + input wire is_mrvq, input wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr, input wire[`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel, input wire[`WORD_WIDTH-1:0] miss_add_data, @@ -102,11 +102,11 @@ module VX_cache_miss_resrv #( miss_resrv_is_snp_st0, miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index]; - wire mrvq_push = miss_add && enqueue_possible && !from_mrvq; + wire mrvq_push = miss_add && enqueue_possible && !is_mrvq; wire mrvq_pop = miss_resrv_pop && dequeue_possible; - wire recover_state = miss_add && from_mrvq; - wire increment_head = !miss_add && from_mrvq; + wire recover_state = miss_add && is_mrvq; + wire increment_head = !miss_add && is_mrvq; wire update_ready = (|make_ready); diff --git a/hw/rtl/cache/VX_prefetcher.v b/hw/rtl/cache/VX_prefetcher.v index f1fafe8e..ab8267ce 100644 --- a/hw/rtl/cache/VX_prefetcher.v +++ b/hw/rtl/cache/VX_prefetcher.v @@ -28,7 +28,7 @@ module VX_prefetcher #( wire current_full; wire current_empty; - assign current_valid = ~current_empty; + assign current_valid = !current_empty; wire update_use = ((use_valid == 0) || ((use_valid-1) == 0)) && current_valid; diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index 388e3cf6..8d70a1f1 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -50,14 +50,17 @@ module VX_tag_data_access #( output wire[BANK_LINE_SIZE-1:0] dirtyb_st1e, output wire fill_saw_dirty_st1e, output wire snp_to_mrvq_st1e, - output wire mrvq_init_ready_state_st1e + output wire mrvq_init_ready_state_st1e, + + output wire tag_valid_st1e, + output wire tag_match_st1e ); - reg read_valid_st1c[STAGE_1_CYCLES-1:0]; - reg read_dirty_st1c[STAGE_1_CYCLES-1:0]; - reg[BANK_LINE_SIZE-1:0] read_dirtyb_st1c[STAGE_1_CYCLES-1:0]; - reg[`TAG_SELECT_BITS-1:0] read_tag_st1c [STAGE_1_CYCLES-1:0]; - reg[`BANK_LINE_WIDTH-1:0] read_data_st1c [STAGE_1_CYCLES-1:0]; + wire read_valid_st1c[STAGE_1_CYCLES-1:0]; + wire read_dirty_st1c[STAGE_1_CYCLES-1:0]; + wire[BANK_LINE_SIZE-1:0] read_dirtyb_st1c[STAGE_1_CYCLES-1:0]; + wire[`TAG_SELECT_BITS-1:0] read_tag_st1c [STAGE_1_CYCLES-1:0]; + wire[`BANK_LINE_WIDTH-1:0] read_data_st1c [STAGE_1_CYCLES-1:0]; wire qual_read_valid_st1; wire qual_read_dirty_st1; @@ -135,7 +138,7 @@ module VX_tag_data_access #( ); end - assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || ~DRAM_ENABLE; // If shared memory, always valid + assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || !DRAM_ENABLE; // If shared memory, always valid assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writetag_st1e; // Tag is always the same in SM assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1]; @@ -177,11 +180,8 @@ module VX_tag_data_access #( wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && !force_request_miss_st1e; wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e; wire req_miss = valid_req_st1e && !is_snp_st1e && use_read_valid_st1e && !writefill_st1e && !tags_match; - wire real_miss = req_invalid || req_miss; - - wire force_core_miss = (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e && !real_miss); - + wire force_core_miss = (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e && !real_miss); assign snp_to_mrvq_st1e = valid_req_st1e && is_snp_st1e && force_request_miss_st1e; // The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss @@ -197,6 +197,9 @@ module VX_tag_data_access #( assign fill_saw_dirty_st1e = real_writefill && dirty_st1e; assign invalidate_line = snoop_hit_no_pending; + assign tag_valid_st1e = use_read_valid_st1e; + assign tag_match_st1e = tags_match; + endmodule diff --git a/hw/rtl/libs/VX_divide.v b/hw/rtl/libs/VX_divide.v index 4862a542..9e2369e0 100644 --- a/hw/rtl/libs/VX_divide.v +++ b/hw/rtl/libs/VX_divide.v @@ -1,14 +1,14 @@ `include "VX_define.vh" module VX_divide #( - parameter WIDTHN=1, - parameter WIDTHD=1, - parameter NREP="UNSIGNED", - parameter DREP="UNSIGNED", - parameter SPEED="MIXED", // "MIXED" or "HIGHEST" - parameter PIPELINE=0 + parameter WIDTHN = 1, + parameter WIDTHD = 1, + parameter REP = "UNSIGNED", + parameter PIPELINE = 0 ) ( - input clock, aclr, clken, + input wire clk, + input wire reset, + input wire clken, input [WIDTHN-1:0] numer, input [WIDTHD-1:0] denom, @@ -17,105 +17,86 @@ module VX_divide #( output reg [WIDTHD-1:0] remainder ); - generate +`ifdef QUARTUS - if (NREP != DREP) begin - different_nrep_drep_not_yet_supported non_existing_module(); - end + lpm_divide #( + .LPM_WIDTHN(WIDTHN), + .LPM_WIDTHD(WIDTHD), + .LPM_NREPRESENTATION(REP), + .LPM_DREPRESENTATION(REP), + .LPM_PIPELINE(PIPELINE), + .DSP_BLOCK_BALANCING("LOGIC ELEMENTS"), + .MAXIMIZE_SPEED(9) + ) quartus_divider ( + .clock(clk), + .aclr(reset), + .clken(clken), + .numer(numer), + .denom(denom), + .quotient(quotient), + .remain(remainder) + ); - `ifdef QUARTUS +`else - localparam lpm_speed=SPEED == "HIGHEST" ? 9 : 5; + wire [WIDTHN-1:0] numer_pipe_end; + wire [WIDTHD-1:0] denom_pipe_end; - lpm_divide #( - .LPM_WIDTHN(WIDTHN), - .LPM_WIDTHD(WIDTHD), - .LPM_NREPRESENTATION(NREP), - .LPM_DREPRESENTATION(DREP), - .LPM_PIPELINE(PIPELINE), - .LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator - .MAXIMIZE_SPEED(lpm_speed) - ) quartus_divider ( - .clock(clock), - .aclr(aclr), - .clken(clken), - .numer(numer), - .denom(denom), - .quotient(quotient), - .remain(remainder) - ); + if (PIPELINE == 0) begin + assign numer_pipe_end = numer; + assign denom_pipe_end = denom; + end else begin + reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1]; + reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1]; - `else - - wire [WIDTHN-1:0] numer_pipe_end; - wire [WIDTHD-1:0] denom_pipe_end; - - if (PIPELINE == 0) begin - assign numer_pipe_end = numer; - assign denom_pipe_end = denom; - end else begin - reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1]; - reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1]; - - genvar i; - for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages - always @(posedge clock or posedge aclr) begin - if (aclr) begin - numer_pipe[i+1] <= 0; - denom_pipe[i+1] <= 0; - end - else if (clken) begin - numer_pipe[i+1] <= numer_pipe[i]; - denom_pipe[i+1] <= denom_pipe[i]; - end - end - end - - always @(posedge clock or posedge aclr) begin - if (aclr) begin - numer_pipe[0] <= 0; - denom_pipe[0] <= 0; + genvar i; + for (i = 0; i < PIPELINE; i++) begin + always @(posedge clk) begin + if (reset) begin + numer_pipe[i] <= 0; + denom_pipe[i] <= 0; end else if (clken) begin - numer_pipe[0] <= numer; - denom_pipe[0] <= denom; - end - end - - assign numer_pipe_end = numer_pipe[PIPELINE-1]; - assign denom_pipe_end = denom_pipe[PIPELINE-1]; - end - - /* * * * * * * * * * * * * * * * * * * * * * */ - /* Do the actual fallback computation here */ - /* * * * * * * * * * * * * * * * * * * * * * */ - - if (NREP == "SIGNED") begin - always @(*) begin - if (denom_pipe_end == 0) begin - quotient = 32'hffffffff; - remainder = numer_pipe_end; - end - else if (denom_pipe_end == 32'hffffffff - && numer_pipe_end == 32'h80000000) begin - // this edge case kills verilator in some cases by causing a division - // overflow exception. INT_MIN / -1 (on x86) - quotient = 0; - remainder = 0; - end - else begin - quotient = $signed(numer_pipe_end) / $signed(denom_pipe_end); - remainder = $signed(numer_pipe_end) % $signed(denom_pipe_end); + if (i == 0) begin + numer_pipe[0] <= 0; + denom_pipe[0] <= 0; + end else begin + numer_pipe[i] <= numer_pipe[i-1]; + denom_pipe[i] <= denom_pipe[i-1]; + end end end end - else begin - assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end; - assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end; - end + assign numer_pipe_end = numer_pipe[PIPELINE-1]; + assign denom_pipe_end = denom_pipe[PIPELINE-1]; + end + + always @(*) begin + if (denom_pipe_end == 0) begin + quotient = {WIDTHN{1'b1}}; + remainder = numer_pipe_end; + end + `ifndef SYNTHESIS + // this edge case kills verilator in some cases by causing a division + // overflow exception. INT_MIN / -1 (on x86) + else if (numer_pipe_end == {1'b1, (WIDTHN-1)'(0)} + && denom_pipe_end == {WIDTHD{1'b1}}) begin + quotient = 0; + remainder = 0; + end `endif + else begin + if (REP == "SIGNED") begin + quotient = $signed(numer_pipe_end) / $signed(denom_pipe_end); + remainder = $signed(numer_pipe_end) % $signed(denom_pipe_end); + end else begin + quotient = numer_pipe_end / denom_pipe_end; + remainder = numer_pipe_end % denom_pipe_end; + end + end + end - endgenerate +`endif endmodule : VX_divide diff --git a/hw/rtl/libs/VX_indexable_queue.v b/hw/rtl/libs/VX_indexable_queue.v index 64c3fe9b..b6749e81 100644 --- a/hw/rtl/libs/VX_indexable_queue.v +++ b/hw/rtl/libs/VX_indexable_queue.v @@ -28,8 +28,8 @@ module VX_indexable_queue #( assign empty = (wr_ptr == rd_ptr); assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SIZE)] != rd_ptr[`LOG2UP(SIZE)]); - assign enqueue = push && ~full; - assign dequeue = ~empty && ~valid[rd_a]; // auto-remove when head is invalid + assign enqueue = push && !full; + assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/libs/VX_matrix_arbiter.v b/hw/rtl/libs/VX_matrix_arbiter.v index 6878cfed..1d578166 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.v +++ b/hw/rtl/libs/VX_matrix_arbiter.v @@ -30,17 +30,17 @@ module VX_matrix_arbiter #( for (i = 0; i < N; ++i) begin for (j = 0; j < N; ++j) begin if (j > i) begin - assign pri[j][i] = requests[i] & state[i][j]; + assign pri[j][i] = requests[i] && state[i][j]; end else if (j < i) begin - assign pri[j][i] = requests[i] & ~state[j][i]; + assign pri[j][i] = requests[i] && !state[j][i]; end else begin assign pri[j][i] = 0; end end - assign grant_onehot[i] = requests[i] & ~(| pri[i]); + assign grant_onehot[i] = requests[i] && !(| pri[i]); end for (i = 0; i < N; ++i) begin @@ -50,7 +50,7 @@ module VX_matrix_arbiter #( state[i][j] <= 0; end else begin - state[i][j] <= (state[i][j] || grant_onehot[j]) && ~grant_onehot[i]; + state[i][j] <= (state[i][j] || grant_onehot[j]) && !grant_onehot[i]; end end end diff --git a/hw/rtl/libs/VX_mult.v b/hw/rtl/libs/VX_mult.v index 03187b8a..e337b1c6 100644 --- a/hw/rtl/libs/VX_mult.v +++ b/hw/rtl/libs/VX_mult.v @@ -1,16 +1,14 @@ `include "VX_define.vh" module VX_mult #( - parameter WIDTHA=1, - parameter WIDTHB=1, - parameter WIDTHP=1, - parameter REP="UNSIGNED", - parameter SPEED="MIXED", // "MIXED" or "HIGHEST" - parameter PIPELINE=0, - parameter FORCE_LE="NO" + parameter WIDTHA = 1, + parameter WIDTHB = 1, + parameter WIDTHP = 1, + parameter REP = "UNSIGNED", + parameter PIPELINE = 0 ) ( - input clock, - input aclr, + input clk, + input reset, input clken, input [WIDTHA-1:0] dataa, @@ -19,102 +17,67 @@ module VX_mult #( output reg [WIDTHP-1:0] result ); - generate +`ifdef QUARTUS - `ifdef QUARTUS + lpm_mult #( + .LPM_WIDTHA(WIDTHA), + .LPM_WIDTHB(WIDTHB), + .LPM_WIDTHP(WIDTHP), + .LPM_REPRESENTATION(REP), + .LPM_PIPELINE(PIPELINE), + .DSP_BLOCK_BALANCING("LOGIC ELEMENTS"), + .MAXIMIZE_SPEED(9) + ) quartus_mult ( + .clock(clk), + .aclr(reset), + .clken(clken), + .dataa(dataa), + .datab(datab), + .result(result) + ); - localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5; +`else + + wire [WIDTHA-1:0] dataa_pipe_end; + wire [WIDTHB-1:0] datab_pipe_end; - if (FORCE_LE == "YES") begin - lpm_mult #( - .LPM_WIDTHA(WIDTHA), - .LPM_WIDTHB(WIDTHB), - .LPM_WIDTHP(WIDTHP), - .LPM_REPRESENTATION(REP), - .LPM_PIPELINE(PIPELINE), - .DSP_BLOCK_BALANCING("LOGIC ELEMENTS"), - .MAXIMIZE_SPEED(lpm_speed) - ) quartus_mult ( - .clock(clock), - .aclr(aclr), - .clken(clken), - .dataa(dataa), - .datab(datab), - .result(result) - ); - end - else begin - lpm_mult#( - .LPM_WIDTHA(WIDTHA), - .LPM_WIDTHB(WIDTHB), - .LPM_WIDTHP(WIDTHP), - .LPM_REPRESENTATION(REP), - .LPM_PIPELINE(PIPELINE), - .MAXIMIZE_SPEED(lpm_speed) - ) quartus_mult( - .clock(clock), - .aclr(aclr), - .clken(clken), - .dataa(dataa), - .datab(datab), - .result(result) - ); - end + if (PIPELINE == 0) begin + assign dataa_pipe_end = dataa; + assign datab_pipe_end = datab; + end else begin + reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1]; + reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1]; - `else - - wire [WIDTHA-1:0] dataa_pipe_end; - wire [WIDTHB-1:0] datab_pipe_end; - - if (PIPELINE == 0) begin - assign dataa_pipe_end = dataa; - assign datab_pipe_end = datab; - end else begin - reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1]; - reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1]; - - genvar i; - for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages - always @(posedge clock or posedge aclr) begin - if (aclr) begin - dataa_pipe[i+1] <= 0; - datab_pipe[i+1] <= 0; - end - else if (clken) begin - dataa_pipe[i+1] <= dataa_pipe[i]; - datab_pipe[i+1] <= datab_pipe[i]; - end - end - end - - always @(posedge clock or posedge aclr) begin - if (aclr) begin - dataa_pipe[0] <= 0; - datab_pipe[0] <= 0; + genvar i; + for (i = 0; i < PIPELINE; i++) begin + always @(posedge clk) begin + if (reset) begin + dataa_pipe[i] <= 0; + datab_pipe[i] <= 0; end else if (clken) begin - dataa_pipe[0] <= dataa; - datab_pipe[0] <= datab; + if (i == 0) begin + dataa_pipe[0] <= dataa; + datab_pipe[0] <= datab; + end else begin + dataa_pipe[i] <= dataa_pipe[i-1]; + datab_pipe[i] <= datab_pipe[i-1]; + end end end - - assign dataa_pipe_end = dataa_pipe[PIPELINE-1]; - assign datab_pipe_end = datab_pipe[PIPELINE-1]; end - /* * * * * * * * * * * * * * * * * * * * * * */ - /* Do the actual fallback computation here */ - /* * * * * * * * * * * * * * * * * * * * * * */ + assign dataa_pipe_end = dataa_pipe[PIPELINE-1]; + assign datab_pipe_end = datab_pipe[PIPELINE-1]; + end - if (REP == "SIGNED") begin - assign result = $signed(dataa_pipe_end) * $signed(datab_pipe_end); - end - else begin - assign result = dataa_pipe_end * datab_pipe_end; - end + if (REP == "SIGNED") begin + assign result = $signed(dataa_pipe_end) * $signed(datab_pipe_end); + end + else begin + assign result = dataa_pipe_end * datab_pipe_end; + end - `endif - - endgenerate +`endif endmodule: VX_mult diff --git a/hw/unit_tests/VX_divide_tb.v b/hw/unit_tests/VX_divide_tb.v index 1aa85c7c..02e63e1f 100644 --- a/hw/unit_tests/VX_divide_tb.v +++ b/hw/unit_tests/VX_divide_tb.v @@ -155,6 +155,6 @@ module VX_tb_divide(); end always #1 - clk = ~clk; + clk = !clk; endmodule: VX_tb_divide \ No newline at end of file