diff --git a/driver/common/vx_utils.cpp b/driver/common/vx_utils.cpp index 0e590415..dbe1d3e1 100644 --- a/driver/common/vx_utils.cpp +++ b/driver/common/vx_utils.cpp @@ -91,17 +91,10 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) { return err; } -extern int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs) { +extern int vx_get_perf(vx_device_h device, int core_id, size_t* instrs, size_t* cycles) { int ret = 0; unsigned value; - - if (cycles) { - ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value); - *cycles = value; - ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value); - *cycles = (*cycles << 32) | value; - } if (instrs) { ret |= vx_csr_get(device, core_id, CSR_INSTRET_H, &value); @@ -110,5 +103,12 @@ extern int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* *instrs = (*instrs << 32) | value; } + if (cycles) { + ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value); + *cycles = value; + ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value); + *cycles = (*cycles << 32) | value; + } + return ret; } \ No newline at end of file diff --git a/driver/include/vortex.h b/driver/include/vortex.h index 36b11a53..e115834d 100644 --- a/driver/include/vortex.h +++ b/driver/include/vortex.h @@ -72,7 +72,7 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) int vx_upload_kernel_file(vx_device_h device, const char* filename); // get performance counters -int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs); +int vx_get_perf(vx_device_h device, int core_id, size_t* instrs, size_t* cycles); #ifdef __cplusplus } diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index c40469e8..732e36b4 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -18,35 +18,38 @@ module VX_commit #( VX_writeback_if writeback_if, VX_cmt_to_csr_if cmt_to_csr_if ); - localparam NCMTW = $clog2(`NUM_EXS*`NUM_THREADS+1); + localparam CMTW = $clog2(`NUM_THREADS+1); // CSRs update - wire [`NUM_EXS-1-1:0] exu_committed; - wire [`NUM_THREADS-1:0] lsu_committed; - wire [$clog2(`NUM_EXS-1+1)-1:0] exu_commits; - wire [$clog2(`NUM_THREADS+1)-1:0] lsu_commits; + wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready; + wire lsu_commit_fire = lsu_commit_if.valid && lsu_commit_if.ready; + wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready; + wire mul_commit_fire = mul_commit_if.valid && mul_commit_if.ready; + wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready; + wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready; - assign exu_committed = {alu_commit_if.valid, - csr_commit_if.valid, - mul_commit_if.valid, - fpu_commit_if.valid, - gpu_commit_if.valid}; + wire commit_fire = alu_commit_fire + || lsu_commit_fire + || csr_commit_fire + || mul_commit_fire + || fpu_commit_fire + || gpu_commit_fire; - assign lsu_committed = {`NUM_THREADS{lsu_commit_if.valid}} & lsu_commit_if.tmask; + wire [`NUM_THREADS-1:0] commit_tmask = alu_commit_fire ? alu_commit_if.tmask: + lsu_commit_fire ? lsu_commit_if.tmask: + csr_commit_fire ? csr_commit_if.tmask: + mul_commit_fire ? mul_commit_if.tmask: + fpu_commit_fire ? fpu_commit_if.tmask: + gpu_commit_if.tmask; - VX_countones #( - .N(`NUM_EXS-1) - ) exu_counter ( - .valids(exu_committed), - .count (exu_commits) - ); + wire [CMTW-1:0] commit_size; VX_countones #( .N(`NUM_THREADS) - ) lsu_counter ( - .valids(lsu_committed), - .count (lsu_commits) + ) commit_ctr ( + .valids(commit_tmask), + .count (commit_size) ); fflags_t fflags; @@ -63,25 +66,23 @@ module VX_commit #( end end - fflags_t fflags_r; - reg has_fflags_r; - reg [`NW_BITS-1:0] wid_r; - reg [$clog2(`NUM_EXS-1+1)-1:0] exu_cmt_r; - reg [$clog2(`NUM_THREADS+1)-1:0] lsu_cmt_r; reg csr_update_r; + reg [`NW_BITS-1:0] wid_r; + reg [CMTW-1:0] commit_size_r; + reg has_fflags_r; + fflags_t fflags_r; always @(posedge clk) begin - csr_update_r <= (| exu_committed) | lsu_commit_if.valid; - fflags_r <= fflags; - has_fflags_r <= fpu_commit_if.valid && fpu_commit_if.has_fflags; - wid_r <= fpu_commit_if.wid; - exu_cmt_r <= exu_commits; - lsu_cmt_r <= lsu_commits; + csr_update_r <= commit_fire; + wid_r <= fpu_commit_if.wid; + commit_size_r <= commit_size; + has_fflags_r <= fpu_commit_if.has_fflags; + fflags_r <= fflags; end assign cmt_to_csr_if.valid = csr_update_r; assign cmt_to_csr_if.wid = wid_r; - assign cmt_to_csr_if.num_commits = {exu_cmt_r, `NT_BITS'(0)} + NCMTW'(lsu_cmt_r); + assign cmt_to_csr_if.commit_size = commit_size_r; assign cmt_to_csr_if.has_fflags = has_fflags_r; assign cmt_to_csr_if.fflags = fflags_r; diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index 7ceb83fa..655f155c 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -40,7 +40,7 @@ module VX_csr_data #( reg [31:0] read_data_r; always @(posedge clk) begin - if (cmt_to_csr_if.has_fflags) begin + if (cmt_to_csr_if.valid && cmt_to_csr_if.has_fflags) begin csr_fflags[cmt_to_csr_if.wid] <= cmt_to_csr_if.fflags; csr_fcsr[cmt_to_csr_if.wid][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags; end @@ -92,7 +92,7 @@ module VX_csr_data #( csr_cycle <= csr_cycle + 1; end if (cmt_to_csr_if.valid) begin - csr_instret <= csr_instret + 64'(cmt_to_csr_if.num_commits); + csr_instret <= csr_instret + 64'(cmt_to_csr_if.commit_size); end end end @@ -100,42 +100,42 @@ module VX_csr_data #( always @(*) begin read_data_r = 'x; case (read_addr) - `CSR_FFLAGS : read_data_r = 32'(csr_fflags[read_wid]); - `CSR_FRM : read_data_r = 32'(csr_frm[read_wid]); - `CSR_FCSR : read_data_r = 32'(csr_fcsr[read_wid]); + `CSR_FFLAGS : read_data_r = 32'(csr_fflags[read_wid]); + `CSR_FRM : read_data_r = 32'(csr_frm[read_wid]); + `CSR_FCSR : read_data_r = 32'(csr_fcsr[read_wid]); - `CSR_LWID : read_data_r = 32'(read_wid); - `CSR_LTID , - `CSR_GTID , - `CSR_MHARTID , - `CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid); - `CSR_GCID : read_data_r = CORE_ID; - `CSR_NT : read_data_r = `NUM_THREADS; - `CSR_NW : read_data_r = `NUM_WARPS; - `CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS; + `CSR_LWID : read_data_r = 32'(read_wid); + `CSR_LTID , + `CSR_GTID , + `CSR_MHARTID , + `CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid); + `CSR_GCID : read_data_r = CORE_ID; + `CSR_NT : read_data_r = `NUM_THREADS; + `CSR_NW : read_data_r = `NUM_WARPS; + `CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS; - `CSR_SATP : read_data_r = 32'(csr_satp); + `CSR_SATP : read_data_r = 32'(csr_satp); - `CSR_MSTATUS : read_data_r = 32'(csr_mstatus); - `CSR_MISA : read_data_r = `ISA_CODE; - `CSR_MEDELEG : read_data_r = 32'(csr_medeleg); - `CSR_MIDELEG : read_data_r = 32'(csr_mideleg); - `CSR_MIE : read_data_r = 32'(csr_mie); - `CSR_MTVEC : read_data_r = 32'(csr_mtvec); + `CSR_MSTATUS : read_data_r = 32'(csr_mstatus); + `CSR_MISA : read_data_r = `ISA_CODE; + `CSR_MEDELEG : read_data_r = 32'(csr_medeleg); + `CSR_MIDELEG : read_data_r = 32'(csr_mideleg); + `CSR_MIE : read_data_r = 32'(csr_mie); + `CSR_MTVEC : read_data_r = 32'(csr_mtvec); - `CSR_MEPC : read_data_r = 32'(csr_mepc); + `CSR_MEPC : read_data_r = 32'(csr_mepc); - `CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]); - `CSR_PMPADDR0: read_data_r = 32'(csr_pmpaddr[0]); + `CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]); + `CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]); - `CSR_CYCLE : read_data_r = csr_cycle[31:0]; - `CSR_CYCLE_H : read_data_r = csr_cycle[63:32]; - `CSR_INSTRET : read_data_r = csr_instret[31:0]; - `CSR_INSTRET_H:read_data_r = csr_instret[63:32]; + `CSR_CYCLE : read_data_r = csr_cycle[31:0]; + `CSR_CYCLE_H : read_data_r = csr_cycle[63:32]; + `CSR_INSTRET : read_data_r = csr_instret[31:0]; + `CSR_INSTRET_H : read_data_r = csr_instret[63:32]; - `CSR_MVENDORID:read_data_r = `VENDOR_ID; - `CSR_MARCHID : read_data_r = `ARCHITECTURE_ID; - `CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID; + `CSR_MVENDORID : read_data_r = `VENDOR_ID; + `CSR_MARCHID : read_data_r = `ARCHITECTURE_ID; + `CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID; default: begin assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr); @@ -144,6 +144,6 @@ module VX_csr_data #( end assign read_data = read_data_r; - assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid]; + assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid]; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 55df0466..c5228257 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -72,8 +72,8 @@ module VX_writeback #( fpu_valid ? fpu_commit_if.data : 0; - always @(*) assert(writeback_if.ready); wire stall =~writeback_if.ready && writeback_if.valid; + always @(*) assert(writeback_if.ready); // the writeback currently has no backpressure from issue stage VX_generic_register #( .N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)) diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 0d63743d..27d58a09 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -97,7 +97,7 @@ module VX_bank #( input wire snp_rsp_ready, // Misses - output wire misses + output wire misses ); `ifdef DBG_CORE_REQ_INFO diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index fe7c9600..b4efedae 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -392,12 +392,12 @@ module VX_cache #( .snp_req_ready (curr_bank_snp_req_ready), // Snoop response - .snp_rsp_valid (curr_bank_snp_rsp_valid), - .snp_rsp_tag (curr_bank_snp_rsp_tag), - .snp_rsp_ready (curr_bank_snp_rsp_ready), + .snp_rsp_valid (curr_bank_snp_rsp_valid), + .snp_rsp_tag (curr_bank_snp_rsp_tag), + .snp_rsp_ready (curr_bank_snp_rsp_ready), //Misses - .misses (curr_bank_miss) + .misses (curr_bank_miss) ); end diff --git a/hw/rtl/interfaces/VX_cmt_to_csr_if.v b/hw/rtl/interfaces/VX_cmt_to_csr_if.v index 7845a969..563e1b28 100644 --- a/hw/rtl/interfaces/VX_cmt_to_csr_if.v +++ b/hw/rtl/interfaces/VX_cmt_to_csr_if.v @@ -9,7 +9,7 @@ interface VX_cmt_to_csr_if (); wire [`NW_BITS-1:0] wid; - wire [$clog2(`NUM_EXS*`NUM_THREADS+1)-1:0] num_commits; + wire [$clog2(`NUM_THREADS+1)-1:0] commit_size; wire has_fflags; fflags_t fflags;