From d7737542e4c57fe564c01b76457defc1068ec78e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 9 Dec 2021 20:43:22 -0500 Subject: [PATCH] cache uuid support --- hw/rtl/VX_alu_unit.sv | 6 +-- hw/rtl/VX_csr_data.sv | 5 ++- hw/rtl/VX_csr_unit.sv | 2 +- hw/rtl/VX_define.vh | 12 +++--- hw/rtl/VX_dispatch.sv | 10 ++--- hw/rtl/VX_fpu_unit.sv | 6 +-- hw/rtl/VX_gpu_unit.sv | 4 +- hw/rtl/VX_ibuffer.sv | 2 +- hw/rtl/VX_icache_stage.sv | 27 +++---------- hw/rtl/VX_lsu_unit.sv | 47 ++++++++--------------- hw/rtl/VX_muldiv.sv | 18 ++++----- hw/rtl/VX_warp_sched.sv | 6 +-- hw/rtl/cache/VX_bank.sv | 12 +++--- hw/rtl/cache/VX_cache_define.vh | 2 +- hw/rtl/cache/VX_data_access.sv | 4 +- hw/rtl/cache/VX_miss_resrv.sv | 12 +++--- hw/rtl/cache/VX_shared_mem.sv | 8 ++-- hw/rtl/cache/VX_tag_access.sv | 4 +- hw/rtl/interfaces/VX_alu_req_if.sv | 2 +- hw/rtl/interfaces/VX_commit_if.sv | 2 +- hw/rtl/interfaces/VX_csr_req_if.sv | 2 +- hw/rtl/interfaces/VX_decode_if.sv | 2 +- hw/rtl/interfaces/VX_fpu_req_if.sv | 2 +- hw/rtl/interfaces/VX_gpu_req_if.sv | 2 +- hw/rtl/interfaces/VX_ibuffer_if.sv | 2 +- hw/rtl/interfaces/VX_ifetch_req_if.sv | 2 +- hw/rtl/interfaces/VX_ifetch_rsp_if.sv | 2 +- hw/rtl/interfaces/VX_lsu_req_if.sv | 2 +- hw/rtl/interfaces/VX_tex_csr_if.sv | 7 +++- hw/rtl/interfaces/VX_tex_req_if.sv | 2 +- hw/rtl/interfaces/VX_tex_rsp_if.sv | 2 +- hw/rtl/interfaces/VX_writeback_if.sv | 2 +- hw/rtl/tex_unit/VX_tex_define.vh | 16 ++++++++ hw/rtl/tex_unit/VX_tex_mem.sv | 52 +++++++++++-------------- hw/rtl/tex_unit/VX_tex_unit.sv | 55 +++++++++------------------ hw/scripts/scope.json | 16 ++++---- 36 files changed, 159 insertions(+), 200 deletions(-) diff --git a/hw/rtl/VX_alu_unit.sv b/hw/rtl/VX_alu_unit.sv index da20eb6d..72d36184 100644 --- a/hw/rtl/VX_alu_unit.sv +++ b/hw/rtl/VX_alu_unit.sv @@ -96,7 +96,7 @@ module VX_alu_unit #( wire alu_ready_in; wire alu_valid_out; wire alu_ready_out; - wire [63:0] alu_uuid; + wire [`UUID_BITS-1:0] alu_uuid; wire [`NW_BITS-1:0] alu_wid; wire [`NUM_THREADS-1:0] alu_tmask; wire [31:0] alu_PC; @@ -113,7 +113,7 @@ module VX_alu_unit #( assign alu_ready_in = alu_ready_out || ~alu_valid_out; VX_pipe_register #( - .DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32), + .DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32), .RESETW (1) ) pipe_reg ( .clk (clk), @@ -139,7 +139,7 @@ module VX_alu_unit #( wire mul_ready_in; wire mul_valid_out; wire mul_ready_out; - wire [63:0] mul_uuid; + wire [`UUID_BITS-1:0] mul_uuid; wire [`NW_BITS-1:0] mul_wid; wire [`NUM_THREADS-1:0] mul_tmask; wire [31:0] mul_PC; diff --git a/hw/rtl/VX_csr_data.sv b/hw/rtl/VX_csr_data.sv index d63d1b7d..6d4a82c9 100644 --- a/hw/rtl/VX_csr_data.sv +++ b/hw/rtl/VX_csr_data.sv @@ -25,13 +25,13 @@ module VX_csr_data #( `endif input wire read_enable, - input wire [63:0] read_uuid, + input wire [`UUID_BITS-1:0] read_uuid, input wire[`CSR_ADDR_BITS-1:0] read_addr, input wire[`NW_BITS-1:0] read_wid, output wire[31:0] read_data, input wire write_enable, - input wire [63:0] write_uuid, + input wire [`UUID_BITS-1:0] write_uuid, input wire[`CSR_ADDR_BITS-1:0] write_addr, input wire[`NW_BITS-1:0] write_wid, input wire[31:0] write_data, @@ -100,6 +100,7 @@ module VX_csr_data #( assign tex_csr_if.write_enable = write_enable; assign tex_csr_if.write_addr = write_addr; assign tex_csr_if.write_data = write_data; + assign tex_csr_if.write_uuid = write_uuid; `endif always @(posedge clk) begin diff --git a/hw/rtl/VX_csr_unit.sv b/hw/rtl/VX_csr_unit.sv index 6f7b35c9..9186586a 100644 --- a/hw/rtl/VX_csr_unit.sv +++ b/hw/rtl/VX_csr_unit.sv @@ -110,7 +110,7 @@ module VX_csr_unit #( wire stall_out = ~csr_commit_if.ready && csr_commit_if.valid; VX_pipe_register #( - .DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32), + .DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32), .RESETW (1) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index d4cf83fa..2badf7f8 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -34,6 +34,8 @@ `define PERF_CTR_BITS 44 +`define UUID_BITS 44 + /////////////////////////////////////////////////////////////////////////////// `define EX_NOP 3'h0 @@ -239,10 +241,6 @@ /////////////////////////////////////////////////////////////////////////////// -// cache request identifier -`define DBG_CACHE_REQ_IDW 48 -`define DBG_CACHE_REQ_ID(type, ctr) {4'(type), {`DBG_CACHE_REQ_IDW-4{1'b0}}} + ctr - // non-cacheable tag bits `define NC_TAG_BIT 1 @@ -267,7 +265,7 @@ `define ICACHE_CORE_TAG_ID_BITS `NW_BITS // Core request tag bits -`define ICACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_IDW + `ICACHE_CORE_TAG_ID_BITS) +`define ICACHE_CORE_TAG_WIDTH (`UUID_BITS + `ICACHE_CORE_TAG_ID_BITS) // Memory request data bits `define ICACHE_MEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8) @@ -293,13 +291,13 @@ `define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE) `ifdef EXT_TEX_ENABLE `define LSU_TAG_ID_BITS `MAX(`LSUQ_ADDR_BITS, 2) -`define LSU_TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_IDW + `LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS) +`define LSU_TEX_DCACHE_TAG_BITS (`UUID_BITS + `LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS) `define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS + `TEX_TAG_BIT) `else `define LSU_TAG_ID_BITS `LSUQ_ADDR_BITS `define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS) `endif -`define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_IDW + `DCACHE_CORE_TAG_ID_BITS) +`define DCACHE_CORE_TAG_WIDTH (`UUID_BITS + `DCACHE_CORE_TAG_ID_BITS) // Memory request data bits `define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8) diff --git a/hw/rtl/VX_dispatch.sv b/hw/rtl/VX_dispatch.sv index 5715d14b..9b8b88c8 100644 --- a/hw/rtl/VX_dispatch.sv +++ b/hw/rtl/VX_dispatch.sv @@ -42,7 +42,7 @@ module VX_dispatch ( wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type); VX_skid_buffer #( - .DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)), + .DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)), .OUT_REG (1) ) alu_buffer ( .clk (clk), @@ -63,7 +63,7 @@ module VX_dispatch ( wire lsu_is_prefetch = `INST_LSU_IS_PREFETCH(ibuffer_if.op_mod); VX_skid_buffer #( - .DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32) + 1), + .DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32) + 1), .OUT_REG (1) ) lsu_buffer ( .clk (clk), @@ -85,7 +85,7 @@ module VX_dispatch ( wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid]; VX_skid_buffer #( - .DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32), + .DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32), .OUT_REG (1) ) csr_buffer ( .clk (clk), @@ -105,7 +105,7 @@ module VX_dispatch ( wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type); VX_skid_buffer #( - .DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)), + .DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)), .OUT_REG (1) ) fpu_buffer ( .clk (clk), @@ -127,7 +127,7 @@ module VX_dispatch ( wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type); VX_skid_buffer #( - .DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)), + .DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)), .OUT_REG (1) ) gpu_buffer ( .clk (clk), diff --git a/hw/rtl/VX_fpu_unit.sv b/hw/rtl/VX_fpu_unit.sv index 84af116b..342bf36d 100644 --- a/hw/rtl/VX_fpu_unit.sv +++ b/hw/rtl/VX_fpu_unit.sv @@ -22,7 +22,7 @@ module VX_fpu_unit #( wire valid_out; wire ready_out; - wire [63:0] rsp_uuid; + wire [`UUID_BITS-1:0] rsp_uuid; wire [`NW_BITS-1:0] rsp_wid; wire [`NUM_THREADS-1:0] rsp_tmask; wire [31:0] rsp_PC; @@ -40,7 +40,7 @@ module VX_fpu_unit #( wire fpuq_pop = valid_out && ready_out; VX_index_buffer #( - .DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), + .DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .SIZE (`FPUQ_SIZE) ) req_metadata ( .clk (clk), @@ -181,7 +181,7 @@ module VX_fpu_unit #( wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid; VX_pipe_register #( - .DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS), + .DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS), .RESETW (1) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/VX_gpu_unit.sv b/hw/rtl/VX_gpu_unit.sv index 6db637a2..b4047830 100644 --- a/hw/rtl/VX_gpu_unit.sv +++ b/hw/rtl/VX_gpu_unit.sv @@ -33,7 +33,7 @@ module VX_gpu_unit #( localparam RSP_DATAW = `MAX(`NUM_THREADS * 32, WCTL_DATAW); wire rsp_valid; - wire [63:0] rsp_uuid; + wire [`UUID_BITS-1:0] rsp_uuid; wire [`NW_BITS-1:0] rsp_wid; wire [`NUM_THREADS-1:0] rsp_tmask; wire [31:0] rsp_PC; @@ -187,7 +187,7 @@ module VX_gpu_unit #( assign stall_out = ~gpu_commit_if.ready && gpu_commit_if.valid; VX_pipe_register #( - .DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1), + .DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1), .RESETW (1) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/VX_ibuffer.sv b/hw/rtl/VX_ibuffer.sv index 953f1426..6231ac5f 100644 --- a/hw/rtl/VX_ibuffer.sv +++ b/hw/rtl/VX_ibuffer.sv @@ -15,7 +15,7 @@ module VX_ibuffer #( `UNUSED_PARAM (CORE_ID) - localparam DATAW = 64 + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1; + localparam DATAW = `UUID_BITS + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1; localparam ADDRW = $clog2(`IBUF_SIZE+1); localparam NWARPSW = $clog2(`NUM_WARPS+1); diff --git a/hw/rtl/VX_icache_stage.sv b/hw/rtl/VX_icache_stage.sv index 77a20b47..be096c5f 100644 --- a/hw/rtl/VX_icache_stage.sv +++ b/hw/rtl/VX_icache_stage.sv @@ -24,24 +24,19 @@ module VX_icache_stage #( localparam OUT_REG = 0; - reg [`DBG_CACHE_REQ_IDW-1:0] req_id; - wire [`DBG_CACHE_REQ_IDW-1:0] rsp_id; wire [`NW_BITS-1:0] req_tag, rsp_tag; - `UNUSED_VAR (rsp_id) - wire icache_req_fire = icache_req_if.valid && icache_req_if.ready; assign req_tag = ifetch_req_if.wid; assign rsp_tag = icache_rsp_if.tag[`NW_BITS-1:0]; - assign rsp_id = icache_rsp_if.tag[`NW_BITS +: `DBG_CACHE_REQ_IDW]; - wire [63:0] rsp_uuid; + wire [`UUID_BITS-1:0] rsp_uuid; wire [31:0] rsp_PC; wire [`NUM_THREADS-1:0] rsp_tmask; VX_dp_ram #( - .DATAW (32 + `NUM_THREADS + 64), + .DATAW (32 + `NUM_THREADS + `UUID_BITS), .SIZE (`NUM_WARPS), .LUTRAM (1) ) req_metadata ( @@ -59,17 +54,7 @@ module VX_icache_stage #( // Icache Request assign icache_req_if.valid = ifetch_req_if.valid; assign icache_req_if.addr = ifetch_req_if.PC[31:2]; - assign icache_req_if.tag = {req_id, req_tag}; - - always @(posedge clk) begin - if (reset) begin - req_id <= `DBG_CACHE_REQ_ID(0, 0); - end else begin - if (icache_req_fire) begin - req_id <= req_id + 1; - end - end - end + assign icache_req_if.tag = {ifetch_req_if.uuid, req_tag}; // Can accept new request? assign ifetch_req_if.ready = icache_req_if.ready; @@ -79,7 +64,7 @@ module VX_icache_stage #( wire stall_out = ~ifetch_rsp_if.ready && (0 == OUT_REG && ifetch_rsp_if.valid); VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + 64), + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `UUID_BITS), .RESETW (1), .DEPTH (OUT_REG) ) pipe_reg ( @@ -106,10 +91,10 @@ module VX_icache_stage #( `ifdef DBG_TRACE_CORE_ICACHE always @(posedge clk) begin if (icache_req_fire) begin - dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h, req_id=%0h (#%0d)\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, req_id, ifetch_req_if.uuid); + dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h (#%0d)\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, ifetch_req_if.uuid); end if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin - dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, req_id=%0h, data=%0h (#%0d)\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, rsp_id, ifetch_rsp_if.data, ifetch_rsp_if.uuid); + dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, data=%0h (#%0d)\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.data, ifetch_rsp_if.uuid); end end `endif diff --git a/hw/rtl/VX_lsu_unit.sv b/hw/rtl/VX_lsu_unit.sv index ec8fca80..5116035f 100644 --- a/hw/rtl/VX_lsu_unit.sv +++ b/hw/rtl/VX_lsu_unit.sv @@ -28,7 +28,7 @@ module VX_lsu_unit #( `STATIC_ASSERT(`SMEM_SIZE == `MEM_BLOCK_SIZE * (`SMEM_SIZE / `MEM_BLOCK_SIZE), ("invalid parameter")) wire req_valid; - wire [63:0] req_uuid; + wire [`UUID_BITS-1:0] req_uuid; wire [`NUM_THREADS-1:0] req_tmask; wire [`NUM_THREADS-1:0][31:0] req_addr; wire [`INST_LSU_BITS-1:0] req_type; @@ -82,7 +82,7 @@ module VX_lsu_unit #( wire lsu_wb = lsu_req_if.wb | lsu_req_if.is_prefetch; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 64 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .DATAW (1 + 1 + 1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)), .RESETW (1) ) req_pipe_reg ( .clk (clk), @@ -95,7 +95,7 @@ module VX_lsu_unit #( // Can accept new request? assign lsu_req_if.ready = ~stall_in && ~fence_wait; - wire [63:0] rsp_uuid; + wire [`UUID_BITS-1:0] rsp_uuid; wire [`NW_BITS-1:0] rsp_wid; wire [31:0] rsp_pc; wire [`NR_BITS-1:0] rsp_rd; @@ -108,8 +108,6 @@ module VX_lsu_unit #( wire [`NUM_THREADS-1:0] rsp_rem_mask_n; wire [`NUM_THREADS-1:0] rsp_tmask; - reg [`DBG_CACHE_REQ_IDW-1:0] req_id; - wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id; reg [`NUM_THREADS-1:0] req_sent_mask; reg is_req_start; @@ -118,7 +116,6 @@ module VX_lsu_unit #( `UNUSED_VAR (rsp_type) `UNUSED_VAR (rsp_is_prefetch) - `UNUSED_VAR (rsp_req_id) wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_offset, rsp_offset; for (genvar i = 0; i < `NUM_THREADS; i++) begin @@ -127,8 +124,6 @@ module VX_lsu_unit #( wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; - wire dcache_req_fire_any = (| dcache_req_fire); - wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready; wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1}; @@ -141,14 +136,13 @@ module VX_lsu_unit #( wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n); assign mbuf_raddr = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: `LSUQ_ADDR_BITS]; - assign rsp_req_id = dcache_rsp_if.tag[(`CACHE_ADDR_TYPE_BITS + `LSU_TAG_ID_BITS) +: `DBG_CACHE_REQ_IDW]; `UNUSED_VAR (dcache_rsp_if.tag) // do not writeback from software prefetch wire req_wb2 = req_wb && ~req_is_prefetch; VX_index_buffer #( - .DATAW (64 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1 + 1), + .DATAW (`UUID_BITS + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1 + 1), .SIZE (`LSUQ_SIZE) ) req_metadata ( .clk (clk), @@ -241,19 +235,9 @@ module VX_lsu_unit #( assign dcache_req_if.addr[i] = req_addr[i][31:2]; assign dcache_req_if.byteen[i] = mem_req_byteen; assign dcache_req_if.data[i] = mem_req_data; - assign dcache_req_if.tag[i] = {req_id, `LSU_TAG_ID_BITS'(req_tag), req_addr_type[i]}; + assign dcache_req_if.tag[i] = {req_uuid, `LSU_TAG_ID_BITS'(req_tag), req_addr_type[i]}; end - always @(posedge clk) begin - if (reset) begin - req_id <= `DBG_CACHE_REQ_ID(1, 0); - end else begin - if (dcache_req_fire_any) begin - req_id <= req_id + 1; - end - end - end - assign ready_in = req_dep_ready && dcache_req_ready; // send store commit @@ -298,7 +282,7 @@ module VX_lsu_unit #( wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid; VX_pipe_register #( - .DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1), + .DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1), .RESETW (1) ) rsp_pipe_reg ( .clk (clk), @@ -325,7 +309,7 @@ module VX_lsu_unit #( `SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr); `ifndef SYNTHESIS - reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + `NR_BITS + 64 + 64 + 1)-1:0] pending_reqs; + reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + `NR_BITS + `UUID_BITS + 64 + 1)-1:0] pending_reqs; wire [63:0] delay_timeout = 10000 * (1 ** (`L2_ENABLE + `L3_ENABLE)); always @(posedge clk) begin @@ -344,23 +328,24 @@ module VX_lsu_unit #( if (pending_reqs[i][0]) begin `ASSERT(($time - pending_reqs[i][1 +: 64]) < delay_timeout, ("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d (#%0d)", - $time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+64+32+`NR_BITS +: `NW_BITS], - pending_reqs[i][1+64+64+`NR_BITS +: 32], - pending_reqs[i][1+64+64 +: `NR_BITS], - pending_reqs[i][1+64 +: 64])); + $time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+`UUID_BITS+`NR_BITS+32 +: `NW_BITS], + pending_reqs[i][1+64+`UUID_BITS+`NR_BITS +: 32], + pending_reqs[i][1+64+`UUID_BITS +: `NR_BITS], + pending_reqs[i][1+64 +: `UUID_BITS])); end end end `endif `ifdef DBG_TRACE_CORE_DCACHE + wire dcache_req_fire_any = (| dcache_req_fire); always @(posedge clk) begin if (lsu_req_if.valid && fence_wait) begin dpi_trace("%d: *** D$%0d fence wait\n", $time, CORE_ID); end if (dcache_req_fire_any) begin if (dcache_req_if.rw[0]) begin - dpi_trace("%d: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_id); + dpi_trace("%d: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire); `TRACE_ARRAY1D(req_addr, `NUM_THREADS); dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen); `TRACE_ARRAY1D(req_addr_type, `NUM_THREADS); @@ -368,7 +353,7 @@ module VX_lsu_unit #( `TRACE_ARRAY1D(dcache_req_if.data, `NUM_THREADS); dpi_trace(", (#%0d)\n", req_uuid); end else begin - dpi_trace("%d: D$%0d Rd Req: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, req_id=%0h, addr=", $time, CORE_ID, req_is_prefetch, req_wid, req_pc, dcache_req_fire, req_id); + dpi_trace("%d: D$%0d Rd Req: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_is_prefetch, req_wid, req_pc, dcache_req_fire); `TRACE_ARRAY1D(req_addr, `NUM_THREADS); dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen); `TRACE_ARRAY1D(req_addr_type, `NUM_THREADS); @@ -376,8 +361,8 @@ module VX_lsu_unit #( end end if (dcache_rsp_fire) begin - dpi_trace("%d: D$%0d Rsp: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, rd=%0d, data=", - $time, CORE_ID, rsp_is_prefetch, rsp_wid, rsp_pc, dcache_rsp_if.tmask, rsp_req_id, mbuf_raddr, rsp_rd); + dpi_trace("%d: D$%0d Rsp: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, tag=%0h, rd=%0d, data=", + $time, CORE_ID, rsp_is_prefetch, rsp_wid, rsp_pc, dcache_rsp_if.tmask, mbuf_raddr, rsp_rd); `TRACE_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS); dpi_trace(", is_dup=%b (#%0d)\n", rsp_is_dup, rsp_uuid); end diff --git a/hw/rtl/VX_muldiv.sv b/hw/rtl/VX_muldiv.sv index c4dda93b..ea992825 100644 --- a/hw/rtl/VX_muldiv.sv +++ b/hw/rtl/VX_muldiv.sv @@ -6,7 +6,7 @@ module VX_muldiv ( // Inputs input wire [`INST_MUL_BITS-1:0] alu_op, - input wire [63:0] uuid_in, + input wire [`UUID_BITS-1:0] uuid_in, input wire [`NW_BITS-1:0] wid_in, input wire [`NUM_THREADS-1:0] tmask_in, input wire [31:0] PC_in, @@ -16,7 +16,7 @@ module VX_muldiv ( input wire [`NUM_THREADS-1:0][31:0] alu_in2, // Outputs - output wire [63:0] uuid_out, + output wire [`UUID_BITS-1:0] uuid_out, output wire [`NW_BITS-1:0] wid_out, output wire [`NUM_THREADS-1:0] tmask_out, output wire [31:0] PC_out, @@ -34,7 +34,7 @@ module VX_muldiv ( wire is_div_op = `INST_MUL_IS_DIV(alu_op); wire [`NUM_THREADS-1:0][31:0] mul_result; - wire [63:0] mul_uuid_out; + wire [`UUID_BITS-1:0] mul_uuid_out; wire [`NW_BITS-1:0] mul_wid_out; wire [`NUM_THREADS-1:0] mul_tmask_out; wire [31:0] mul_PC_out; @@ -66,7 +66,7 @@ module VX_muldiv ( end VX_shift_register #( - .DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .DEPTH (`LATENCY_IMUL), .RESETW (1) ) mul_shift_reg ( @@ -106,7 +106,7 @@ module VX_muldiv ( end VX_shift_register #( - .DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1), + .DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1), .DEPTH (`LATENCY_IMUL), .RESETW (1) ) mul_shift_reg ( @@ -122,7 +122,7 @@ module VX_muldiv ( /////////////////////////////////////////////////////////////////////////// wire [`NUM_THREADS-1:0][31:0] div_result; - wire [63:0] div_uuid_out; + wire [`UUID_BITS-1:0] div_uuid_out; wire [`NW_BITS-1:0] div_wid_out; wire [`NUM_THREADS-1:0] div_tmask_out; wire [31:0] div_PC_out; @@ -151,7 +151,7 @@ module VX_muldiv ( end VX_shift_register #( - .DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .DEPTH (`LATENCY_IMUL), .RESETW (1) ) div_shift_reg ( @@ -199,7 +199,7 @@ module VX_muldiv ( /////////////////////////////////////////////////////////////////////////// wire rsp_valid = mul_valid_out || div_valid_out; - wire [63:0] rsp_uuid = mul_valid_out ? mul_uuid_out : div_uuid_out; + wire [`UUID_BITS-1:0] rsp_uuid = mul_valid_out ? mul_uuid_out : div_uuid_out; wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out; wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out; wire [31:0] rsp_PC = mul_valid_out ? mul_PC_out : div_PC_out; @@ -210,7 +210,7 @@ module VX_muldiv ( assign stall_out = ~ready_out && valid_out; VX_pipe_register #( - .DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .RESETW (1) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/VX_warp_sched.sv b/hw/rtl/VX_warp_sched.sv index b8ec17bf..dda8600b 100644 --- a/hw/rtl/VX_warp_sched.sv +++ b/hw/rtl/VX_warp_sched.sv @@ -46,7 +46,7 @@ module VX_warp_sched #( wire schedule_valid; wire warp_scheduled; - reg [63:0] issued_instrs; + reg [`UUID_BITS-1:0] issued_instrs; wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready; @@ -228,10 +228,10 @@ module VX_warp_sched #( assign warp_scheduled = schedule_valid && ~stall_out; - wire [63:0] instr_uuid = (issued_instrs * `NUM_CORES * `NUM_CLUSTERS) + 64'(CORE_ID); + wire [`UUID_BITS-1:0] instr_uuid = (issued_instrs * `NUM_CORES * `NUM_CLUSTERS) + `UUID_BITS'(CORE_ID); VX_pipe_register #( - .DATAW (1 + 64 + `NUM_THREADS + 32 + `NW_BITS), + .DATAW (1 + `UUID_BITS + `NUM_THREADS + 32 + `NW_BITS), .RESETW (1) ) pipe_reg ( .clk (clk), diff --git a/hw/rtl/cache/VX_bank.sv b/hw/rtl/cache/VX_bank.sv index 9e1f3552..22e5887b 100644 --- a/hw/rtl/cache/VX_bank.sv +++ b/hw/rtl/cache/VX_bank.sv @@ -488,22 +488,22 @@ module VX_bank #( dpi_trace("%d: cache%0d:%0d fill-rsp: addr=%0h, id=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data); end if (mshr_fire) begin - dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, req_id_sel); + dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, req_id_sel); end if (creq_fire) begin if (creq_rw) - dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, req_id_sel); + dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, req_id_sel); else - dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, req_id_sel); + dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, req_id_sel); end if (crsq_fire) begin - dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, req_id_st1); + dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, req_id_st1); end if (mreq_push) begin if (is_write_st1) - dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, req_id_st1); + dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, req_id_st1); else - dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, req_id_st1); + dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, req_id_st1); end end `endif diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index b8f2fdbc..647ea0be 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -4,7 +4,7 @@ `include "VX_platform.vh" // cache request identifier -`define DBG_CACHE_REQ_IDW 48 +`define DBG_CACHE_REQ_IDW 44 `define REQS_BITS `LOG2UP(NUM_REQS) diff --git a/hw/rtl/cache/VX_data_access.sv b/hw/rtl/cache/VX_data_access.sv index 887b4095..f5809644 100644 --- a/hw/rtl/cache/VX_data_access.sv +++ b/hw/rtl/cache/VX_data_access.sv @@ -122,10 +122,10 @@ module VX_data_access #( dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data); end if (read && ~stall) begin - dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, req_id=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, line_addr, read_data); + dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, blk_addr=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, read_data, req_id); end if (write && ~stall) begin - dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, req_id=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, byteen, line_addr, write_data); + dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, byteen=%b, blk_addr=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), byteen, line_addr, write_data, req_id); end end `endif diff --git a/hw/rtl/cache/VX_miss_resrv.sv b/hw/rtl/cache/VX_miss_resrv.sv index 08b76add..b9081fdd 100644 --- a/hw/rtl/cache/VX_miss_resrv.sv +++ b/hw/rtl/cache/VX_miss_resrv.sv @@ -201,22 +201,22 @@ module VX_miss_resrv #( always @(posedge clk) begin if (allocate_fire || fill_valid || dequeue_fire || lookup_replay || lookup_valid || release_valid) begin if (allocate_fire) - dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, + dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_req_id); if (fill_valid) dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID)); if (dequeue_fire) - dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, + dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_id); if (lookup_replay) - dpi_trace("%d: cache%0d:%0d mshr-replay: addr=%0h, id=%0d\n", $time, CACHE_ID, BANK_ID, - `LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id); + dpi_trace("%d: cache%0d:%0d mshr-replay: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, + `LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lkp_req_id); if (lookup_valid) - dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b, req_id=%0h\n", $time, CACHE_ID, BANK_ID, + dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lookup_match, lkp_req_id); if (release_valid) - dpi_trace("%d: cache%0d:%0d mshr-release id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, release_id, rel_req_id); + dpi_trace("%d: cache%0d:%0d mshr-release id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, release_id, rel_req_id); dpi_trace("%d: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID); for (integer i = 0; i < MSHR_SIZE; ++i) begin if (valid_table[i]) begin diff --git a/hw/rtl/cache/VX_shared_mem.sv b/hw/rtl/cache/VX_shared_mem.sv index 971795e0..7d6eb275 100644 --- a/hw/rtl/cache/VX_shared_mem.sv +++ b/hw/rtl/cache/VX_shared_mem.sv @@ -306,10 +306,10 @@ module VX_shared_mem #( for (integer i = 0; i < NUM_BANKS; ++i) begin if (per_bank_core_req_valid_unqual[i]) begin if (per_bank_core_req_rw_unqual[i]) begin - dpi_trace("%d: smem%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, req_id=%0h\n", + dpi_trace("%d: smem%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h (#%0d)\n", $time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i], req_id_st0[i]); end else begin - dpi_trace("%d: smem%0d:%0d core-rd-req: addr=%0h, tag=%0h, req_id=%0h\n", + dpi_trace("%d: smem%0d:%0d core-rd-req: addr=%0h, tag=%0h (#%0d)\n", $time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], req_id_st0[i]); end end @@ -319,10 +319,10 @@ module VX_shared_mem #( for (integer i = 0; i < NUM_BANKS; ++i) begin if (per_bank_core_req_valid[i]) begin if (per_bank_core_req_rw[i]) begin - dpi_trace("%d: smem%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, data=%0h, req_id=%0h\n", + dpi_trace("%d: smem%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, data=%0h (#%0d)\n", $time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_data[i], req_id_st1[i]); end else begin - dpi_trace("%d: smem%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, data=%0h, req_id=%0h\n", + dpi_trace("%d: smem%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, data=%0h (#%0d)\n", $time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_rsp_data[i], req_id_st1[i]); end end diff --git a/hw/rtl/cache/VX_tag_access.sv b/hw/rtl/cache/VX_tag_access.sv index 808008d5..d8d2a4db 100644 --- a/hw/rtl/cache/VX_tag_access.sv +++ b/hw/rtl/cache/VX_tag_access.sv @@ -68,9 +68,9 @@ module VX_tag_access #( end if (lookup && ~stall) begin if (tag_match) begin - dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, req_id=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, line_addr, line_tag); + dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, blk_addr=%0d, tag_id=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, req_id); end else begin - dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, req_id=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, line_addr, line_tag, read_tag); + dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, read_tag, req_id); end end end diff --git a/hw/rtl/interfaces/VX_alu_req_if.sv b/hw/rtl/interfaces/VX_alu_req_if.sv index 35049542..f6818e7d 100644 --- a/hw/rtl/interfaces/VX_alu_req_if.sv +++ b/hw/rtl/interfaces/VX_alu_req_if.sv @@ -6,7 +6,7 @@ interface VX_alu_req_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_commit_if.sv b/hw/rtl/interfaces/VX_commit_if.sv index e85d310f..ddbd9600 100644 --- a/hw/rtl/interfaces/VX_commit_if.sv +++ b/hw/rtl/interfaces/VX_commit_if.sv @@ -6,7 +6,7 @@ interface VX_commit_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_csr_req_if.sv b/hw/rtl/interfaces/VX_csr_req_if.sv index 0639f3aa..c8eef24a 100644 --- a/hw/rtl/interfaces/VX_csr_req_if.sv +++ b/hw/rtl/interfaces/VX_csr_req_if.sv @@ -6,7 +6,7 @@ interface VX_csr_req_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_decode_if.sv b/hw/rtl/interfaces/VX_decode_if.sv index 23039847..5c00fb0f 100644 --- a/hw/rtl/interfaces/VX_decode_if.sv +++ b/hw/rtl/interfaces/VX_decode_if.sv @@ -6,7 +6,7 @@ interface VX_decode_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_fpu_req_if.sv b/hw/rtl/interfaces/VX_fpu_req_if.sv index 2b7d69f0..62ea9255 100644 --- a/hw/rtl/interfaces/VX_fpu_req_if.sv +++ b/hw/rtl/interfaces/VX_fpu_req_if.sv @@ -6,7 +6,7 @@ interface VX_fpu_req_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_gpu_req_if.sv b/hw/rtl/interfaces/VX_gpu_req_if.sv index 06ef6cc7..027f7a2b 100644 --- a/hw/rtl/interfaces/VX_gpu_req_if.sv +++ b/hw/rtl/interfaces/VX_gpu_req_if.sv @@ -6,7 +6,7 @@ interface VX_gpu_req_if(); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_ibuffer_if.sv b/hw/rtl/interfaces/VX_ibuffer_if.sv index a436ae7b..2f9c17b6 100644 --- a/hw/rtl/interfaces/VX_ibuffer_if.sv +++ b/hw/rtl/interfaces/VX_ibuffer_if.sv @@ -6,7 +6,7 @@ interface VX_ibuffer_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_ifetch_req_if.sv b/hw/rtl/interfaces/VX_ifetch_req_if.sv index 4132f90b..95e88223 100644 --- a/hw/rtl/interfaces/VX_ifetch_req_if.sv +++ b/hw/rtl/interfaces/VX_ifetch_req_if.sv @@ -6,7 +6,7 @@ interface VX_ifetch_req_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NUM_THREADS-1:0] tmask; wire [`NW_BITS-1:0] wid; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_ifetch_rsp_if.sv b/hw/rtl/interfaces/VX_ifetch_rsp_if.sv index 350af081..f47e8749 100644 --- a/hw/rtl/interfaces/VX_ifetch_rsp_if.sv +++ b/hw/rtl/interfaces/VX_ifetch_rsp_if.sv @@ -6,7 +6,7 @@ interface VX_ifetch_rsp_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NUM_THREADS-1:0] tmask; wire [`NW_BITS-1:0] wid; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_lsu_req_if.sv b/hw/rtl/interfaces/VX_lsu_req_if.sv index 128b3c20..f52b22da 100644 --- a/hw/rtl/interfaces/VX_lsu_req_if.sv +++ b/hw/rtl/interfaces/VX_lsu_req_if.sv @@ -6,7 +6,7 @@ interface VX_lsu_req_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_tex_csr_if.sv b/hw/rtl/interfaces/VX_tex_csr_if.sv index a83c9479..e0c626a5 100644 --- a/hw/rtl/interfaces/VX_tex_csr_if.sv +++ b/hw/rtl/interfaces/VX_tex_csr_if.sv @@ -8,17 +8,20 @@ interface VX_tex_csr_if (); wire write_enable; wire [`CSR_ADDR_BITS-1:0] write_addr; wire [31:0] write_data; + wire [`UUID_BITS-1:0] write_uuid; modport master ( output write_enable, output write_addr, - output write_data + output write_data, + output write_uuid ); modport slave ( input write_enable, input write_addr, - input write_data + input write_data, + input write_uuid ); endinterface diff --git a/hw/rtl/interfaces/VX_tex_req_if.sv b/hw/rtl/interfaces/VX_tex_req_if.sv index 0059de59..a3fec613 100644 --- a/hw/rtl/interfaces/VX_tex_req_if.sv +++ b/hw/rtl/interfaces/VX_tex_req_if.sv @@ -6,7 +6,7 @@ interface VX_tex_req_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_tex_rsp_if.sv b/hw/rtl/interfaces/VX_tex_rsp_if.sv index 5966124c..b6fe625a 100644 --- a/hw/rtl/interfaces/VX_tex_rsp_if.sv +++ b/hw/rtl/interfaces/VX_tex_rsp_if.sv @@ -6,7 +6,7 @@ interface VX_tex_rsp_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NW_BITS-1:0] wid; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; diff --git a/hw/rtl/interfaces/VX_writeback_if.sv b/hw/rtl/interfaces/VX_writeback_if.sv index 00cab3b8..6b93a04f 100644 --- a/hw/rtl/interfaces/VX_writeback_if.sv +++ b/hw/rtl/interfaces/VX_writeback_if.sv @@ -6,7 +6,7 @@ interface VX_writeback_if (); wire valid; - wire [63:0] uuid; + wire [`UUID_BITS-1:0] uuid; wire [`NUM_THREADS-1:0] tmask; wire [`NW_BITS-1:0] wid; wire [31:0] PC; diff --git a/hw/rtl/tex_unit/VX_tex_define.vh b/hw/rtl/tex_unit/VX_tex_define.vh index a3e1a926..381069fc 100644 --- a/hw/rtl/tex_unit/VX_tex_define.vh +++ b/hw/rtl/tex_unit/VX_tex_define.vh @@ -32,4 +32,20 @@ `define TEX_FORMAT_L8 `TEX_FORMAT_BITS'(5) `define TEX_FORMAT_A8 `TEX_FORMAT_BITS'(6) +task trace_tex_state ( + input [`CSR_ADDR_BITS-1:0] state +); + case (state) + `CSR_TEX_ADDR: dpi_trace("ADDR"); + `CSR_TEX_WIDTH: dpi_trace("WIDTH"); + `CSR_TEX_HEIGHT: dpi_trace("HEIGHT"); + `CSR_TEX_FORMAT: dpi_trace("FORMAT"); + `CSR_TEX_FILTER: dpi_trace("FILTER"); + `CSR_TEX_WRAPU: dpi_trace("WRAPU"); + `CSR_TEX_WRAPV: dpi_trace("WRAPV"); + //`CSR_TEX_MIPOFF + default: dpi_trace("MIPOFF"); + endcase +endtask + `endif \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_mem.sv b/hw/rtl/tex_unit/VX_tex_mem.sv index dd9878a2..73f9367c 100644 --- a/hw/rtl/tex_unit/VX_tex_mem.sv +++ b/hw/rtl/tex_unit/VX_tex_mem.sv @@ -75,6 +75,9 @@ module VX_tex_mem #( wire [`TEX_LGSTRIDE_BITS-1:0] q_req_lgstride; wire [3:0][NUM_REQS-1:0][1:0] q_align_offs; wire [3:0] q_dup_reqs; + wire [`NW_BITS-1:0] q_req_wid; + wire [31:0] q_req_PC; + wire [`UUID_BITS-1:0] q_req_uuid; assign reqq_push = req_valid && req_ready; @@ -105,12 +108,8 @@ module VX_tex_mem #( wire sent_all_ready, last_texel_sent; wire req_texel_dup; wire [NUM_REQS-1:0][29:0] req_texel_addr; - reg [`DBG_CACHE_REQ_IDW-1:0] req_id; - wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id; reg [1:0] req_texel_idx; reg req_texels_done; - - `UNUSED_VAR (rsp_req_id) always @(posedge clk) begin if (reset || last_texel_sent) begin @@ -156,22 +155,16 @@ module VX_tex_mem #( wire [NUM_REQS-1:0] req_dup_mask = {{(NUM_REQS-1){~req_texel_dup}}, 1'b1}; + assign {q_req_wid, q_req_PC, q_req_uuid} = q_req_info[`NW_BITS+32+`UUID_BITS-1:0]; + `UNUSED_VAR (q_req_wid) + `UNUSED_VAR (q_req_PC) + assign dcache_req_if.valid = {NUM_REQS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask; assign dcache_req_if.rw = {NUM_REQS{1'b0}}; assign dcache_req_if.addr = req_texel_addr; assign dcache_req_if.byteen = {NUM_REQS{4'b0}}; assign dcache_req_if.data = 'x; - assign dcache_req_if.tag = {NUM_REQS{req_id, `LSU_TAG_ID_BITS'(req_texel_idx), `CACHE_ADDR_TYPE_BITS'(0)}}; - - always @(posedge clk) begin - if (reset) begin - req_id <= `DBG_CACHE_REQ_ID(2, 0); - end else begin - if (dcache_req_fire_any) begin - req_id <= req_id + 1; - end - end - end + assign dcache_req_if.tag = {NUM_REQS{q_req_uuid, `LSU_TAG_ID_BITS'(req_texel_idx), `CACHE_ADDR_TYPE_BITS'(0)}}; // Dcache Response @@ -188,7 +181,6 @@ module VX_tex_mem #( wire rsp_texel_dup; assign rsp_texel_idx = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: 2]; - assign rsp_req_id = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS + `LSU_TAG_ID_BITS +: `DBG_CACHE_REQ_IDW]; `UNUSED_VAR (dcache_rsp_if.tag) assign rsp_texel_dup = q_dup_reqs[rsp_texel_idx]; @@ -285,25 +277,25 @@ module VX_tex_mem #( // Can accept new cache response? assign dcache_rsp_if.ready = ~(is_last_rsp && stall_out); -`ifdef DBG_TRACE_TEX - wire [`NW_BITS-1:0] q_req_wid, req_wid, rsp_wid; - wire [31:0] q_req_PC, req_PC, rsp_PC; - assign {q_req_wid, q_req_PC} = q_req_info[`NW_BITS+32-1:0]; - assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0]; - assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0]; +`ifdef DBG_TRACE_TEX + wire [`NW_BITS-1:0] req_wid, rsp_wid; + wire [31:0] req_PC, rsp_PC; + wire [`UUID_BITS-1:0] req_uuid, rsp_uuid; + assign {req_wid, req_PC, req_uuid} = req_info[`NW_BITS+32+`UUID_BITS-1:0]; + assign {rsp_wid, rsp_PC, rsp_uuid} = rsp_info[`NW_BITS+32+`UUID_BITS-1:0]; always @(posedge clk) begin if (dcache_req_fire_any) begin - dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, addr=", - $time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_id, req_texel_idx); + dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=", + $time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_texel_idx); `TRACE_ARRAY1D(req_texel_addr, NUM_REQS); - dpi_trace(", is_dup=%b\n", req_texel_dup); + dpi_trace(", is_dup=%b (#%0d)\n", req_texel_dup, q_req_uuid); end if (dcache_rsp_fire) begin - dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, data=", - $time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_req_id, rsp_texel_idx); + dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=", + $time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_texel_idx); `TRACE_ARRAY1D(dcache_rsp_if.data, NUM_REQS); - dpi_trace("\n"); + dpi_trace(" (#%0d)\n", q_req_uuid); end if (req_valid && req_ready) begin dpi_trace("%d: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, lgstride=%0d, baseaddr=", @@ -311,13 +303,13 @@ module VX_tex_mem #( `TRACE_ARRAY1D(req_baseaddr, NUM_REQS); dpi_trace(", addr="); `TRACE_ARRAY2D(req_addr, 4, NUM_REQS); - dpi_trace("\n"); + dpi_trace(" (#%0d)\n", req_uuid); end if (rsp_valid && rsp_ready) begin dpi_trace("%d: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, tmask=%b, data=", $time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask); `TRACE_ARRAY2D(rsp_data, 4, NUM_REQS); - dpi_trace("\n"); + dpi_trace(" (#%0d)\n", rsp_uuid); end end `endif diff --git a/hw/rtl/tex_unit/VX_tex_unit.sv b/hw/rtl/tex_unit/VX_tex_unit.sv index c10cdf64..9045c5aa 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.sv +++ b/hw/rtl/tex_unit/VX_tex_unit.sv @@ -23,11 +23,11 @@ module VX_tex_unit #( VX_tex_rsp_if.master tex_rsp_if ); - localparam REQ_INFO_W = 64 + `NR_BITS + 1 + `NW_BITS + 32; + localparam REQ_INFO_W = `NR_BITS + 1 + `NW_BITS + 32 + `UUID_BITS; localparam BLEND_FRAC_W = (2 * `NUM_THREADS * `TEX_BLEND_FRAC); reg [$clog2(`NUM_TEX_UNITS)-1:0] csr_tex_unit; - reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][`TEX_LOD_MAX+1-1:0]; + reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(`TEX_LOD_MAX+1)-1:0]; reg [1:0][`TEX_LOD_BITS-1:0] tex_logdims [`NUM_TEX_UNITS-1:0]; reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0]; reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0]; @@ -36,9 +36,6 @@ module VX_tex_unit #( // CSRs programming - reg csrs_dirty [`NUM_TEX_UNITS-1:0]; - `UNUSED_VAR (csrs_dirty) - always @(posedge clk) begin if (tex_csr_if.write_enable) begin case (tex_csr_if.write_addr) @@ -47,50 +44,39 @@ module VX_tex_unit #( end `CSR_TEX_ADDR: begin tex_baddr[csr_tex_unit] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0]; - csrs_dirty[csr_tex_unit] <= 1; end `CSR_TEX_FORMAT: begin tex_format[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0]; - csrs_dirty[csr_tex_unit] <= 1; end `CSR_TEX_WRAPU: begin tex_wraps[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0]; - csrs_dirty[csr_tex_unit] <= 1; end `CSR_TEX_WRAPV: begin tex_wraps[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0]; - csrs_dirty[csr_tex_unit] <= 1; end `CSR_TEX_FILTER: begin - tex_filter[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0]; - csrs_dirty[csr_tex_unit] <= 1; + tex_filter[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0]; end `CSR_TEX_WIDTH: begin tex_logdims[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0]; - csrs_dirty[csr_tex_unit] <= 1; end `CSR_TEX_HEIGHT: begin tex_logdims[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0]; - csrs_dirty[csr_tex_unit] <= 1; end default: begin for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin `IGNORE_WARNINGS_BEGIN if (tex_csr_if.write_addr == `CSR_TEX_MIPOFF(j)) begin `IGNORE_WARNINGS_END - tex_mipoff[csr_tex_unit][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0]; - csrs_dirty[csr_tex_unit] <= 1; + tex_mipoff[csr_tex_unit][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0]; end end end endcase end - if (reset || (tex_req_if.valid && tex_req_if.ready)) begin - for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin - csrs_dirty[i] <= 0; - end - end end + wire [`UUID_BITS-1:0] write_uuid = tex_csr_if.write_uuid; + `UNUSED_VAR (write_uuid); // mipmap attributes @@ -136,7 +122,7 @@ module VX_tex_unit #( .mip_level (mip_level), .req_mipoff (sel_mipoff), .req_logdims(sel_logdims), - .req_info ({tex_format[tex_req_if.unit], tex_req_if.uuid, tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}), + .req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC, tex_req_if.uuid}), .req_ready (tex_req_if.ready), .rsp_valid (mem_req_valid), @@ -211,9 +197,9 @@ module VX_tex_unit #( .rsp_valid (tex_rsp_if.valid), .rsp_tmask (tex_rsp_if.tmask), .rsp_data (tex_rsp_if.data), - .rsp_info ({tex_rsp_if.uuid, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC}), + .rsp_info ({tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.uuid}), .rsp_ready (tex_rsp_if.ready) - ); + ); `ifdef PERF_ENABLE wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_req_per_cycle; @@ -255,31 +241,24 @@ module VX_tex_unit #( `ifdef DBG_TRACE_TEX always @(posedge clk) begin + if (tex_csr_if.write_enable) begin + dpi_trace("%d: core%0d-tex-csr: unit=%0d, state=", $time, CORE_ID, csr_tex_unit); + trace_tex_state(tex_csr_if.write_addr); + dpi_trace(", data=%0h (#%0d)\n", tex_csr_if.write_data, tex_csr_if.write_uuid); + end if (tex_req_if.valid && tex_req_if.ready) begin - for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin - if (csrs_dirty[i]) begin - dpi_trace("%d: core%0d-tex-csr: tex%0d_addr=%0h\n", $time, CORE_ID, i, tex_baddr[i]); - dpi_trace("%d: core%0d-tex-csr: tex%0d_logwidth=%0h\n", $time, CORE_ID, i, tex_logdims[i][0]); - dpi_trace("%d: core%0d-tex-csr: tex%0d_logheight=%0h\n", $time, CORE_ID, i, tex_logdims[i][1]); - dpi_trace("%d: core%0d-tex-csr: tex%0d_format=%0h\n", $time, CORE_ID, i, tex_format[i]); - dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_u=%0h\n", $time, CORE_ID, i, tex_wraps[i][0]); - dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_v=%0h\n", $time, CORE_ID, i, tex_wraps[i][1]); - dpi_trace("%d: core%0d-tex-csr: tex%0d_filter=%0h\n", $time, CORE_ID, i, tex_filter[i]); - end - end - dpi_trace("%d: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=", - $time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod); + $time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod); `TRACE_ARRAY1D(tex_req_if.coords[0], `NUM_THREADS); dpi_trace(", v="); `TRACE_ARRAY1D(tex_req_if.coords[1], `NUM_THREADS); - dpi_trace("\n"); + dpi_trace(" (#%0d)\n", tex_req_if.uuid); end if (tex_rsp_if.valid && tex_rsp_if.ready) begin dpi_trace("%d: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=", $time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask); `TRACE_ARRAY1D(tex_rsp_if.data, `NUM_THREADS); - dpi_trace("\n"); + dpi_trace(" (#%0d)\n", tex_rsp_if.uuid); end end `endif diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index c9c49ebe..d6cfd609 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -142,7 +142,7 @@ }, "afu/vortex/cluster/core/pipeline/fetch/warp_sched": { "?wsched_scheduled": 1, - "wsched_schedule_uuid": 64, + "wsched_schedule_uuid": "`UUID_BITS", "wsched_active_warps": "`NUM_WARPS", "wsched_stalled_warps": "`NUM_WARPS", "wsched_schedule_tmask": "`NUM_THREADS", @@ -151,17 +151,17 @@ }, "afu/vortex/cluster/core/pipeline/fetch/icache_stage": { "?icache_req_fire": 1, - "icache_req_uuid": 64, + "icache_req_uuid": "`UUID_BITS", "icache_req_addr": 32, "icache_req_tag":"`ICACHE_CORE_TAG_ID_BITS", "?icache_rsp_fire": 1, - "icache_rsp_uuid": 64, + "icache_rsp_uuid": "`UUID_BITS", "icache_rsp_data": 32, "icache_rsp_tag":"`ICACHE_CORE_TAG_ID_BITS" }, "afu/vortex/cluster/core/pipeline/issue": { "?issue_fire": 1, - "issue_uuid": 64, + "issue_uuid": "`UUID_BITS", "issue_tmask":"`NUM_THREADS", "issue_ex_type":"`EX_BITS", "issue_op_type":"`INST_OP_BITS", @@ -178,7 +178,7 @@ "gpr_rs2":"`NUM_THREADS * 32", "gpr_rs3":"`NUM_THREADS * 32", "?writeback_valid": 1, - "writeback_uuid": 64, + "writeback_uuid": "`UUID_BITS", "writeback_tmask":"`NUM_THREADS", "writeback_rd":"`NR_BITS", "writeback_data":"`NUM_THREADS * 32", @@ -188,20 +188,20 @@ }, "afu/vortex/cluster/core/pipeline/execute/lsu_unit": { "?dcache_req_fire":"`NUM_THREADS", - "dcache_req_uuid": 64, + "dcache_req_uuid": "`UUID_BITS", "dcache_req_addr":"`NUM_THREADS * 32", "dcache_req_rw": 1, "dcache_req_byteen":"`NUM_THREADS * 4", "dcache_req_data":"`NUM_THREADS * 32", "dcache_req_tag":"`LSUQ_ADDR_BITS", "?dcache_rsp_fire":"`NUM_THREADS", - "dcache_rsp_uuid": 64, + "dcache_rsp_uuid": "`UUID_BITS", "dcache_rsp_data":"`NUM_THREADS * 32", "dcache_rsp_tag":"`LSUQ_ADDR_BITS" }, "afu/vortex/cluster/core/pipeline/execute/gpu_unit": { "?gpu_rsp_valid": 1, - "gpu_rsp_uuid": 64, + "gpu_rsp_uuid": "`UUID_BITS", "gpu_rsp_tmc": 1, "gpu_rsp_wspawn": 1, "gpu_rsp_split": 1,