diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 51cc7c58..8848318e 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -10,7 +10,7 @@ module VX_alu_unit #( VX_alu_req_if alu_req_if, // Outputs - VX_wb_if alu_wb_if + VX_commit_if alu_commit_if ); wire [`NUM_THREADS-1:0][31:0] alu_result; wire [`NUM_THREADS-1:0][32:0] sub_result; @@ -48,7 +48,7 @@ module VX_alu_unit #( end end - wire stall = ~alu_wb_if.ready && (| alu_wb_if.valid); + wire stall = ~alu_commit_if.ready && (| alu_commit_if.valid); VX_generic_register #( .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)), @@ -57,8 +57,8 @@ module VX_alu_unit #( .reset (reset), .stall (stall), .flush (0), - .in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_result}), - .out ({alu_wb_if.valid, alu_wb_if.warp_num, alu_wb_if.curr_PC, alu_wb_if.rd, alu_wb_if.wb, alu_wb_if.data}) + .in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_result}), + .out ({alu_commit_if.valid, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data}) ); assign alu_req_if.ready = ~stall; diff --git a/hw/rtl/VX_branch_unit.v b/hw/rtl/VX_branch_unit.v index d1bd8eca..a708e1da 100644 --- a/hw/rtl/VX_branch_unit.v +++ b/hw/rtl/VX_branch_unit.v @@ -10,8 +10,8 @@ module VX_branch_unit #( VX_branch_req_if branch_req_if, // Outputs - VX_branch_rsp_if branch_rsp_if, - VX_wb_if branch_wb_if + VX_branch_ctl_if branch_ctl_if, + VX_commit_if branch_commit_if ); wire [`NT_BITS-1:0] br_result_index; @@ -19,7 +19,7 @@ module VX_branch_unit #( VX_priority_encoder #( .N(`NUM_THREADS) ) choose_alu_result ( - .data_in (alu_req_if.valid), + .data_in (branch_req_if.valid), .data_out (br_result_index), `UNUSED_PIN (valid_out) ); @@ -53,7 +53,7 @@ module VX_branch_unit #( wire [31:0] base_addr = (br_op == `BR_JALR) ? rs1_data : branch_req_if.curr_PC; wire [31:0] br_dest = $signed(base_addr) + $signed(branch_req_if.offset); - wire stall = (~branch_wb_if.ready && (| branch_wb_if.valid)); + wire stall = (~branch_commit_if.ready && (| branch_commit_if.valid)); VX_generic_register #( .N(1 + `NW_BITS + 1 + 32) @@ -63,7 +63,7 @@ module VX_branch_unit #( .stall (stall), .flush (0), .in ({in_valid, branch_req_if.warp_num, br_taken, br_dest}), - .out ({branch_rsp_if.valid, branch_rsp_if.warp_num, branch_rsp_if.taken, branch_rsp_if.dest}) + .out ({branch_ctl_if.valid, branch_ctl_if.warp_num, branch_ctl_if.taken, branch_ctl_if.dest}) ); VX_generic_register #( @@ -74,7 +74,7 @@ module VX_branch_unit #( .stall (stall), .flush (0), .in ({branch_req_if.valid, branch_req_if.warp_num, branch_req_if.curr_PC, branch_req_if.rd, branch_req_if.wb, {`NUM_THREADS{branch_req_if.next_PC}}}), - .out ({branch_wb_if.valid, branch_wb_if.warp_num, branch_wb_if.curr_PC, branch_wb_if.rd, branch_wb_if.wb, branch_wb_if.data}) + .out ({branch_commit_if.valid, branch_commit_if.warp_num, branch_commit_if.curr_PC, branch_commit_if.rd, branch_commit_if.wb, branch_commit_if.data}) ); assign branch_req_if.ready = ~stall; diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v new file mode 100644 index 00000000..6c0b2fb2 --- /dev/null +++ b/hw/rtl/VX_commit.v @@ -0,0 +1,105 @@ +`include "VX_define.vh" + +module VX_commit #( + parameter CORE_ID = 0 +) ( + input wire clk, + input wire reset, + + // inputs + VX_commit_if alu_commit_if, + VX_commit_if branch_commit_if, + VX_commit_if lsu_commit_if, + VX_commit_if mul_commit_if, + VX_commit_if csr_commit_if, + VX_commit_if gpu_commit_if, + + // outputs + VX_wb_if writeback_if, + VX_perf_cntrs_if perf_cntrs_if +); + + wire [`NUM_EXS-1:0] commited_mask; + assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready), + ((| branch_commit_if.valid) && branch_commit_if.ready), + ((| lsu_commit_if.valid) && lsu_commit_if.ready), + ((| mul_commit_if.valid) && mul_commit_if.ready), + ((| csr_commit_if.valid) && csr_commit_if.ready), + ((| gpu_commit_if.valid) && gpu_commit_if.ready)}; + + wire [`NE_BITS:0] num_commits; + + VX_countones #( + .N(`NUM_EXS) + ) valids_counter ( + .valids(commited_mask), + .count (num_commits) + ); + + wire has_committed = (| commited_mask); + + reg [63:0] total_cycles, total_instrs; + + always @(posedge clk) begin + if (reset) begin + total_cycles <= 0; + total_instrs <= 0; + end else begin + total_cycles <= total_cycles + 1; + if (has_committed) begin + total_instrs <= total_instrs + 64'(num_commits); + end + end + end + + assign perf_cntrs_if.total_cycles = total_cycles; + assign perf_cntrs_if.total_instrs = total_instrs; + + assign gpu_commit_if.ready = 1'b1; // doesn't writeback + + VX_writeback #( + .CORE_ID(CORE_ID) + ) writeback ( + .clk (clk), + .reset (reset), + + .alu_commit_if (alu_commit_if), + .branch_commit_if(branch_commit_if), + .lsu_commit_if (lsu_commit_if), + .csr_commit_if (csr_commit_if), + .mul_commit_if (mul_commit_if), + + .writeback_if (writeback_if) + ); + +`ifdef DBG_PRINT_PIPELINE + always @(posedge clk) begin + if ((| alu_commit_if.valid) && alu_commit_if.ready) begin + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data); + end + if ((| branch_commit_if.valid) && branch_commit_if.ready) begin + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, branch_commit_if.warp_num, branch_commit_if.curr_PC, branch_commit_if.wb, branch_commit_if.rd, branch_commit_if.data); + end + if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data); + end + if ((| mul_commit_if.valid) && mul_commit_if.ready) begin + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data); + end + if ((| csr_commit_if.valid) && csr_commit_if.ready) begin + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data); + end + if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data); + end + end +`endif + +endmodule + + + + + + + diff --git a/hw/rtl/VX_csr_arb.v b/hw/rtl/VX_csr_arb.v index 2db5c7a8..7ed17e52 100644 --- a/hw/rtl/VX_csr_arb.v +++ b/hw/rtl/VX_csr_arb.v @@ -12,11 +12,11 @@ module VX_csr_arb ( VX_csr_req_if csr_req_if, // input - VX_wb_if csr_rsp_if, + VX_commit_if csr_rsp_if, // outputs VX_csr_io_rsp_if csr_io_rsp_if, - VX_wb_if csr_wb_if + VX_commit_if csr_commit_if ); `UNUSED_VAR (clk) @@ -42,13 +42,13 @@ module VX_csr_arb ( assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & csr_rsp_if.is_io; assign csr_io_rsp_if.data = csr_rsp_if.data[0]; - assign csr_wb_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}}; - assign csr_wb_if.warp_num = csr_rsp_if.warp_num; - assign csr_wb_if.curr_PC = csr_rsp_if.curr_PC; - assign csr_wb_if.data = csr_rsp_if.data; - assign csr_wb_if.rd = csr_rsp_if.rd; - assign csr_wb_if.wb = csr_rsp_if.wb; + assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}}; + assign csr_commit_if.warp_num = csr_rsp_if.warp_num; + assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC; + assign csr_commit_if.data = csr_rsp_if.data; + assign csr_commit_if.rd = csr_rsp_if.rd; + assign csr_commit_if.wb = csr_rsp_if.wb; - assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_wb_if.ready; + assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_commit_if.ready; endmodule diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index cace905f..abe7f7e8 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -3,8 +3,7 @@ module VX_csr_data #( parameter CORE_ID = 0 ) ( - input wire clk, // Clock - input wire reset, + input wire clk, input wire[`CSR_ADDR_SIZE-1:0] read_addr, output reg[31:0] read_data, @@ -15,29 +14,18 @@ module VX_csr_data #( `IGNORE_WARNINGS_END input wire[`CSR_WIDTH-1:0] write_data, input wire[`NW_BITS-1:0] warp_num, - input wire notify_commit + VX_perf_cntrs_if perf_cntrs_if ); reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0]; - reg [63:0] num_cycles, num_instrs; - // cast address to physical CSR range wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr; assign rd_addr = $size(rd_addr)'(read_addr); - assign wr_addr = $size(wr_addr)'(write_addr); + assign wr_addr = $size(wr_addr)'(write_addr); always @(posedge clk) begin - if (reset) begin - num_cycles <= 0; - num_instrs <= 0; - end else begin - if (write_enable) begin - csr_table[wr_addr] <= write_data; - end - num_cycles <= num_cycles + 1; - if (notify_commit) begin - num_instrs <= num_instrs + 1; - end + if (write_enable) begin + csr_table[wr_addr] <= write_data; end end @@ -50,10 +38,10 @@ module VX_csr_data #( `CSR_NT : read_data = `NUM_THREADS; `CSR_NW : read_data = `NUM_WARPS; `CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS; - `CSR_CYCLE_L : read_data = num_cycles[31:0]; - `CSR_CYCLE_H : read_data = num_cycles[63:32]; - `CSR_INSTR_L : read_data = num_instrs[31:0]; - `CSR_INSTR_H : read_data = num_instrs[63:32]; + `CSR_CYCLE_L : read_data = perf_cntrs_if.total_cycles[31:0]; + `CSR_CYCLE_H : read_data = perf_cntrs_if.total_cycles[63:32]; + `CSR_INSTR_L : read_data = perf_cntrs_if.total_instrs[31:0]; + `CSR_INSTR_H : read_data = perf_cntrs_if.total_instrs[63:32]; `CSR_VEND_ID : read_data = `VENDOR_ID; `CSR_ARCH_ID : read_data = `ARCHITECTURE_ID; `CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID; diff --git a/hw/rtl/VX_csr_pipe.v b/hw/rtl/VX_csr_pipe.v index a329d258..b8f88481 100644 --- a/hw/rtl/VX_csr_pipe.v +++ b/hw/rtl/VX_csr_pipe.v @@ -5,14 +5,17 @@ module VX_csr_pipe #( ) ( input wire clk, input wire reset, - VX_csr_req_if csr_req_if, - VX_csr_io_req_if csr_io_req_if, - VX_wb_if csr_wb_if, + + VX_perf_cntrs_if perf_cntrs_if, + + VX_csr_io_req_if csr_io_req_if, VX_csr_io_rsp_if csr_io_rsp_if, - input wire notify_commit + + VX_csr_req_if csr_req_if, + VX_commit_if csr_commit_if ); - VX_csr_req_if csr_pipe_req_if(); - VX_wb_if csr_pipe_wb_if(); + VX_csr_req_if csr_pipe_req_if(); + VX_commit_if csr_pipe_commit_if(); VX_csr_arb csr_arb ( .clk (clk), @@ -20,9 +23,9 @@ module VX_csr_pipe #( .csr_core_req_if (csr_req_if), .csr_io_req_if (csr_io_req_if), .csr_req_if (csr_pipe_req_if), - .csr_rsp_if (csr_pipe_wb_if), + .csr_rsp_if (csr_pipe_commit_if), .csr_io_rsp_if (csr_io_rsp_if), - .csr_wb_if (csr_wb_if) + .csr_commit_if (csr_commit_if) ); wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2; @@ -30,24 +33,23 @@ module VX_csr_pipe #( wire [31:0] csr_updated_data_s2; wire [31:0] csr_read_data_unqual; - wire is_csr_s2 = (| csr_pipe_wb_if.valid); + wire is_csr_s2 = (| csr_pipe_commit_if.valid); VX_csr_data #( .CORE_ID(CORE_ID) ) csr_data ( .clk (clk), - .reset (reset), .read_addr (csr_pipe_req_if.csr_addr), .read_data (csr_read_data_unqual), .write_enable (is_csr_s2), .write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]), .write_addr (csr_addr_s2), .warp_num (csr_pipe_req_if.warp_num), - .notify_commit (notify_commit) + .perf_cntrs_if (perf_cntrs_if) ); wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr) - && (csr_pipe_wb_if.warp_num == csr_pipe_req_if.warp_num) + && (csr_pipe_commit_if.warp_num == csr_pipe_req_if.warp_num) && is_csr_s2; wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual; @@ -63,7 +65,7 @@ module VX_csr_pipe #( endcase end - wire stall = ~csr_pipe_wb_if.ready && (| csr_pipe_wb_if.valid); + wire stall = ~csr_pipe_commit_if.ready && (| csr_pipe_commit_if.valid); VX_generic_register #( .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32) @@ -73,12 +75,12 @@ module VX_csr_pipe #( .stall (stall), .flush (0), .in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}), - .out ({csr_pipe_wb_if.valid, csr_pipe_wb_if.warp_num, csr_pipe_wb_if.curr_PC, csr_pipe_wb_if.rd, csr_pipe_wb_if.wb, csr_addr_s2, csr_pipe_wb_if.is_io, csr_read_data_s2, csr_updated_data_s2}) + .out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, csr_pipe_commit_if.is_io, csr_read_data_s2, csr_updated_data_s2}) ); genvar i; for (i = 0; i < `NUM_THREADS; i++) begin - assign csr_pipe_wb_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i : + assign csr_pipe_commit_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i : (csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) : csr_read_data_s2; end diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 2597b298..775028e5 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -173,11 +173,13 @@ `define EX_BR 3'h2 `define EX_MUL 3'h3 `define EX_LSU 3'h4 -`define EX_FPU 3'h5 -`define EX_CSR 3'h6 -`define EX_GPU 3'h7 +`define EX_CSR 3'h5 +`define EX_GPU 3'h6 `define EX_BITS 3 +`define NUM_EXS 6 +`define NE_BITS `LOG2UP(`NUM_EXS) + `define WB_NO 2'h0 `define WB_ALU 2'h1 `define WB_MEM 2'h2 @@ -374,7 +376,6 @@ task print_ex_type; `EX_LSU: $write("LSU"); `EX_CSR: $write("CSR"); `EX_MUL: $write("MUL"); - `EX_FPU: $write("FPU"); `EX_GPU: $write("GPU"); default: $write("NOP"); endcase diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 7b424431..166e5c53 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -17,43 +17,29 @@ module VX_execute #( VX_cache_core_req_if dcache_req_if, VX_cache_core_rsp_if dcache_rsp_if, - // inputs - VX_execute_if execute_if, - VX_wb_if writeback_if, + // perf + VX_perf_cntrs_if perf_cntrs_if, + + // inputs + VX_alu_req_if alu_req_if, + VX_branch_req_if branch_req_if, + VX_lsu_req_if lsu_req_if, + VX_csr_req_if csr_req_if, + VX_mul_req_if mul_req_if, + VX_gpu_req_if gpu_req_if, // outputs - VX_branch_rsp_if branch_rsp_if, + VX_branch_ctl_if branch_ctl_if, VX_warp_ctl_if warp_ctl_if, - VX_wb_if alu_wb_if, - VX_wb_if branch_wb_if, - VX_wb_if lsu_wb_if, - VX_wb_if csr_wb_if, - VX_wb_if mul_wb_if, - - input wire notify_commit, + VX_commit_if alu_commit_if, + VX_commit_if branch_commit_if, + VX_commit_if lsu_commit_if, + VX_commit_if csr_commit_if, + VX_commit_if mul_commit_if, + VX_commit_if gpu_commit_if, + output wire ebreak ); - VX_alu_req_if alu_req_if(); - VX_branch_req_if branch_req_if(); - VX_csr_req_if csr_req_if(); - VX_lsu_req_if lsu_req_if(); - VX_mul_req_if mul_req_if(); - VX_gpu_req_if gpu_req_if(); - - VX_gpr_stage #( - .CORE_ID(CORE_ID) - ) gpr_stage ( - .clk (clk), - .reset (reset), - .writeback_if (writeback_if), - .execute_if (execute_if), - .alu_req_if (alu_req_if), - .branch_req_if (branch_req_if), - .lsu_req_if (lsu_req_if), - .csr_req_if (csr_req_if), - .mul_req_if (mul_req_if), - .gpu_req_if (gpu_req_if) - ); VX_alu_unit #( .CORE_ID(CORE_ID) @@ -61,7 +47,7 @@ module VX_execute #( .clk (clk), .reset (reset), .alu_req_if (alu_req_if), - .alu_wb_if (alu_wb_if) + .alu_commit_if (alu_commit_if) ); VX_branch_unit #( @@ -70,8 +56,8 @@ module VX_execute #( .clk (clk), .reset (reset), .branch_req_if (branch_req_if), - .branch_rsp_if (branch_rsp_if), - .branch_wb_if (branch_wb_if) + .branch_ctl_if (branch_ctl_if), + .branch_commit_if(branch_commit_if) ); VX_lsu_unit #( @@ -83,19 +69,19 @@ module VX_execute #( .dcache_req_if (dcache_req_if), .dcache_rsp_if (dcache_rsp_if), .lsu_req_if (lsu_req_if), - .lsu_wb_if (lsu_wb_if) + .lsu_commit_if (lsu_commit_if) ); VX_csr_pipe #( .CORE_ID(CORE_ID) ) csr_pipe ( .clk (clk), - .reset (reset), - .csr_req_if (csr_req_if), - .csr_io_req_if (csr_io_req_if), - .csr_wb_if (csr_wb_if), + .reset (reset), + .perf_cntrs_if (perf_cntrs_if), + .csr_io_req_if (csr_io_req_if), .csr_io_rsp_if (csr_io_rsp_if), - .notify_commit (notify_commit) + .csr_req_if (csr_req_if), + .csr_commit_if (csr_commit_if) ); VX_mul_unit #( @@ -104,14 +90,15 @@ module VX_execute #( .clk (clk), .reset (reset), .mul_req_if (mul_req_if), - .mul_wb_if (mul_wb_if) + .mul_commit_if (mul_commit_if) ); VX_gpu_unit #( .CORE_ID(CORE_ID) ) gpu_unit ( .gpu_req_if (gpu_req_if), - .warp_ctl_if (warp_ctl_if) + .warp_ctl_if (warp_ctl_if), + .gpu_commit_if (gpu_commit_if) ); assign ebreak = (| branch_req_if.valid) && (branch_req_if.br_op == `BR_EBREAK || branch_req_if.br_op == `BR_ECALL); diff --git a/hw/rtl/VX_fetch.v b/hw/rtl/VX_fetch.v index 2f38adac..88ac1e54 100644 --- a/hw/rtl/VX_fetch.v +++ b/hw/rtl/VX_fetch.v @@ -13,7 +13,7 @@ module VX_fetch #( // inputs VX_wstall_if wstall_if, VX_join_if join_if, - VX_branch_rsp_if branch_rsp_if, + VX_branch_ctl_if branch_ctl_if, VX_warp_ctl_if warp_ctl_if, // outputs @@ -32,7 +32,7 @@ module VX_fetch #( .warp_ctl_if (warp_ctl_if), .wstall_if (wstall_if), .join_if (join_if), - .branch_rsp_if (branch_rsp_if), + .branch_ctl_if (branch_ctl_if), .ifetch_req_if (ifetch_req_if), .ifetch_rsp_if (ifetch_rsp_if), .busy (busy) diff --git a/hw/rtl/VX_gpr_mux.v b/hw/rtl/VX_gpr_mux.v index 6bec71da..4b5d9615 100644 --- a/hw/rtl/VX_gpr_mux.v +++ b/hw/rtl/VX_gpr_mux.v @@ -2,7 +2,7 @@ module VX_gpr_mux ( // inputs - VX_execute_if execute_if, + VX_execute_if execute_if, input wire [`NUM_THREADS-1:0][31:0] rs1_data, input wire [`NUM_THREADS-1:0][31:0] rs2_data, @@ -80,9 +80,10 @@ module VX_gpr_mux ( // GPU unit assign gpu_req_if.valid = execute_if.valid & is_gpu; assign gpu_req_if.warp_num = execute_if.warp_num; - assign gpu_req_if.next_PC = execute_if.next_PC; + assign gpu_req_if.curr_PC = execute_if.curr_PC; assign gpu_req_if.gpu_op = `GPU_OP(execute_if.instr_op); assign gpu_req_if.rs1_data = rs1_data; assign gpu_req_if.rs2_data = rs2_data[0]; + assign gpu_req_if.next_PC = execute_if.next_PC; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index fa3fc004..78e1ea90 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -12,17 +12,17 @@ module VX_gpr_ram ( ); `ifndef ASIC - reg [`NUM_THREADS-1:0][3:0][7:0] ram [31:0]; + reg [`NUM_THREADS-1:0][3:0][7:0] ram [`NUM_REGS-1:0]; integer i; initial begin // initialize r0 to 0 for (i = 0; i < `NUM_THREADS; i++) begin - ram[i][0] = 0; - ram[i][1] = 0; - ram[i][2] = 0; - ram[i][3] = 0; + ram[0][i][0] = 0; + ram[0][i][1] = 0; + ram[0][i][2] = 0; + ram[0][i][3] = 0; end end diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 85c40d02..bfa46e58 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -6,9 +6,9 @@ module VX_gpr_stage #( input wire clk, input wire reset, - // inputs - VX_execute_if execute_if, + // inputs VX_wb_if writeback_if, + VX_execute_if execute_if, // outputs VX_alu_req_if alu_req_if, @@ -38,7 +38,7 @@ module VX_gpr_stage #( generate for (i = 0; i < `NUM_WARPS; i++) begin - assign we[i] = writeback_if.valid & {`NUM_THREADS{(writeback_if.wb != 0) && (i == writeback_if.warp_num)}}; + assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}}; VX_gpr_ram gpr_ram ( .clk (clk), .we (we[i]), diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 292b1f23..3cab2bee 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -7,7 +7,8 @@ module VX_gpu_unit #( VX_gpu_req_if gpu_req_if, // Output - VX_warp_ctl_if warp_ctl_if + VX_warp_ctl_if warp_ctl_if, + VX_commit_if gpu_commit_if ); wire [`NUM_THREADS-1:0] curr_valids = gpu_req_if.valid; wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN); @@ -76,4 +77,10 @@ module VX_gpu_unit #( assign gpu_req_if.ready = 1'b1; // has no stalls + // commit + assign gpu_commit_if.valid = gpu_req_if.valid; + assign gpu_commit_if.warp_num = gpu_req_if.warp_num; + assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC; + assign gpu_commit_if.wb = `WB_NO; + endmodule \ No newline at end of file diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 145ac917..125b070b 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -1,6 +1,6 @@ `include "VX_define.vh" -module VX_issue #( +module VX_issue #( parameter CORE_ID = 0 ) ( input wire clk, @@ -9,79 +9,41 @@ module VX_issue #( VX_decode_if decode_if, VX_wb_if writeback_if, - VX_execute_if execute_if, - - output wire is_empty + VX_alu_req_if alu_req_if, + VX_branch_req_if branch_req_if, + VX_lsu_req_if lsu_req_if, + VX_csr_req_if csr_req_if, + VX_mul_req_if mul_req_if, + VX_gpu_req_if gpu_req_if ); - localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1); + VX_execute_if execute_if(); - reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; - reg [CTVW-1:0] count_valid; - - wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0); - wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0); - wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0); + VX_scheduler #( + .CORE_ID(CORE_ID) + ) scheduler ( + .clk (clk), + .reset (reset), + .decode_if (decode_if), + .writeback_if (writeback_if), + .execute_if (execute_if), + `UNUSED_PIN (is_empty) + ); - wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1); - wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2); - wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0); + VX_gpr_stage #( + .CORE_ID(CORE_ID) + ) gpr_stage ( + .clk (clk), + .reset (reset), + + .execute_if (execute_if), + .writeback_if (writeback_if), - wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual); - - wire ex_stalled = (| decode_if.valid) - && ((!execute_if.alu_ready && (decode_if.ex_type == `EX_ALU)) - || (!execute_if.br_ready && (decode_if.ex_type == `EX_BR)) - || (!execute_if.lsu_ready && (decode_if.ex_type == `EX_LSU)) - || (!execute_if.csr_ready && (decode_if.ex_type == `EX_CSR)) - || (!execute_if.mul_ready && (decode_if.ex_type == `EX_MUL)) - || (!execute_if.gpu_ready && (decode_if.ex_type == `EX_GPU))); - - wire stall = rename_valid || ex_stalled; - - wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && (decode_if.rd != 0) && ~stall; - - wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0); - - wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid; - - reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) : - (~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) : - count_valid; - integer i, w; - - always @(posedge clk) begin - if (reset) begin - for (w = 0; w < `NUM_WARPS; w++) begin - for (i = 0; i < 32; i++) begin - rename_table[w][i] <= 0; - end - end - count_valid <= 0; - end else begin - if (acquire_rd) begin - rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid; - end - if (release_rd) begin - assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0); - rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask; - end - count_valid <= count_valid_next; - end - end - - VX_generic_register #( - .N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS), - ) schedule_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (0), - .in ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.instr_op, decode_if.wb}), - .out ({execute_if.valid, execute_if.warp_num, execute_if.curr_PC, execute_if.next_PC, execute_if.rd, execute_if.rs1, execute_if.rs2, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.ex_type, execute_if.instr_op, execute_if.wb}) - ); - - assign decode_if.ready = ~stall; - - assign is_empty = (0 == count_valid); + .alu_req_if (alu_req_if), + .branch_req_if (branch_req_if), + .lsu_req_if (lsu_req_if), + .csr_req_if (csr_req_if), + .mul_req_if (mul_req_if), + .gpu_req_if (gpu_req_if) + ); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 09add222..fb0fe514 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -16,7 +16,7 @@ module VX_lsu_unit #( VX_lsu_req_if lsu_req_if, // outputs - VX_wb_if lsu_wb_if + VX_commit_if lsu_commit_if ); wire [`NUM_THREADS-1:0] use_valid; @@ -108,7 +108,7 @@ module VX_lsu_unit #( .full (mrq_full), .pop (mrq_pop), .read_addr (mrq_read_addr), - .read_data ({dbg_mrq_write_addr, lsu_wb_if.curr_PC, lsu_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_wb_if.rd, lsu_wb_if.warp_num}), + .read_data ({dbg_mrq_write_addr, lsu_commit_if.curr_PC, lsu_commit_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_commit_if.rd, lsu_commit_if.warp_num}), `UNUSED_PIN (empty) ); @@ -151,11 +151,11 @@ module VX_lsu_unit #( end end - assign lsu_wb_if.valid = dcache_rsp_if.valid; - assign lsu_wb_if.data = core_rsp_data; + assign lsu_commit_if.valid = dcache_rsp_if.valid; + assign lsu_commit_if.data = core_rsp_data; // Can accept new cache response - assign dcache_rsp_if.ready = lsu_wb_if.ready; + assign dcache_rsp_if.ready = lsu_commit_if.ready; `SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid); `SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num); @@ -180,7 +180,7 @@ module VX_lsu_unit #( end if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin $display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h", - $time, CORE_ID, lsu_wb_if.valid, lsu_wb_if.warp_num, lsu_wb_if.curr_PC, mrq_read_addr, lsu_wb_if.rd, lsu_wb_if.data); + $time, CORE_ID, lsu_commit_if.valid, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, mrq_read_addr, lsu_commit_if.rd, lsu_commit_if.data); end end `endif diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index c0a80de1..7f349f95 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -10,7 +10,7 @@ module VX_mul_unit #( VX_mul_req_if mul_req_if, // Outputs - VX_wb_if mul_wb_if + VX_commit_if mul_commit_if ); wire [`NUM_THREADS-1:0][31:0] alu_result; wire [`NUM_THREADS-1:0][63:0] mul_result; @@ -71,7 +71,7 @@ module VX_mul_unit #( `MUL_DIV, `MUL_DIVU: alu_result[i] = (alu_in2[i] == 0) ? 32'hffffffff : div_result[i]; `MUL_REM, - `MUL_REMU: alu_result[i] = (alu_in2 == 0) ? alu_in1[i] : rem_result[i]; + `MUL_REMU: alu_result[i] = (alu_in2[i] == 0) ? alu_in1[i] : rem_result[i]; default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC, FENCE endcase end @@ -104,7 +104,7 @@ module VX_mul_unit #( wire pipeline_stall = ~result_avail && (| mul_req_if.valid); - wire stall = (~mul_wb_if.ready && (| mul_wb_if.valid)) + wire stall = (~mul_commit_if.ready && (| mul_commit_if.valid)) || pipeline_stall; VX_generic_register #( @@ -115,7 +115,7 @@ module VX_mul_unit #( .stall (stall), .flush (0), .in ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb, alu_result}), - .out ({mul_wb_if.valid, mul_wb_if.warp_num, mul_wb_if.curr_PC, mul_wb_if.rd, mul_wb_if.wb, mul_wb_if.data}) + .out ({mul_commit_if.valid, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data}) ); assign mul_req_if.ready = ~stall; diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 7fee578b..2f007b35 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -101,22 +101,27 @@ module VX_pipeline #( assign csr_io_rsp_data = csr_io_rsp_if.data; assign csr_io_rsp_if.ready = csr_io_rsp_ready; + VX_perf_cntrs_if perf_cntrs_if(); VX_decode_if decode_if(); - VX_execute_if execute_if(); - VX_branch_rsp_if branch_rsp_if(); + VX_branch_ctl_if branch_ctl_if(); VX_warp_ctl_if warp_ctl_if(); VX_ifetch_rsp_if ifetch_rsp_if(); + VX_alu_req_if alu_req_if(); + VX_branch_req_if branch_req_if(); + VX_lsu_req_if lsu_req_if(); + VX_csr_req_if csr_req_if(); + VX_mul_req_if mul_req_if(); + VX_gpu_req_if gpu_req_if(); VX_wb_if writeback_if(); VX_wstall_if wstall_if(); VX_join_if join_if(); - VX_wb_if alu_wb_if(); - VX_wb_if branch_wb_if(); - VX_wb_if lsu_wb_if(); - VX_wb_if csr_wb_if(); - VX_wb_if mul_wb_if(); + VX_commit_if alu_commit_if(); + VX_commit_if branch_commit_if(); + VX_commit_if lsu_commit_if(); + VX_commit_if csr_commit_if(); + VX_commit_if mul_commit_if(); + VX_commit_if gpu_commit_if(); - wire notify_commit; - VX_fetch #( .CORE_ID(CORE_ID) ) fetch ( @@ -127,7 +132,7 @@ module VX_pipeline #( .wstall_if (wstall_if), .join_if (join_if), .warp_ctl_if (warp_ctl_if), - .branch_rsp_if (branch_rsp_if), + .branch_ctl_if (branch_ctl_if), .ifetch_rsp_if (ifetch_rsp_if), .busy (busy) ); @@ -148,10 +153,16 @@ module VX_pipeline #( ) issue ( .clk (clk), .reset (reset), + .decode_if (decode_if), .writeback_if (writeback_if), - .execute_if (execute_if), - `UNUSED_PIN (is_empty) + + .alu_req_if (alu_req_if), + .branch_req_if (branch_req_if), + .lsu_req_if (lsu_req_if), + .csr_req_if (csr_req_if), + .mul_req_if (mul_req_if), + .gpu_req_if (gpu_req_if) ); VX_execute #( @@ -160,35 +171,49 @@ module VX_pipeline #( `SCOPE_SIGNALS_LSU_BIND .clk (clk), .reset (reset), + .dcache_req_if (core_dcache_req_if), .dcache_rsp_if (core_dcache_rsp_if), + .csr_io_req_if (csr_io_req_if), - .csr_io_rsp_if (csr_io_rsp_if), - .execute_if (execute_if), - .writeback_if (writeback_if), + .csr_io_rsp_if (csr_io_rsp_if), + + .perf_cntrs_if (perf_cntrs_if), + + .alu_req_if (alu_req_if), + .branch_req_if (branch_req_if), + .lsu_req_if (lsu_req_if), + .csr_req_if (csr_req_if), + .mul_req_if (mul_req_if), + .gpu_req_if (gpu_req_if), + .warp_ctl_if (warp_ctl_if), - .branch_rsp_if (branch_rsp_if), - .alu_wb_if (alu_wb_if), - .branch_wb_if (branch_wb_if), - .lsu_wb_if (lsu_wb_if), - .csr_wb_if (csr_wb_if), - .mul_wb_if (mul_wb_if), - .notify_commit (notify_commit), + .branch_ctl_if (branch_ctl_if), + .alu_commit_if (alu_commit_if), + .branch_commit_if(branch_commit_if), + .lsu_commit_if (lsu_commit_if), + .csr_commit_if (csr_commit_if), + .mul_commit_if (mul_commit_if), + .gpu_commit_if (gpu_commit_if), + .ebreak (ebreak) ); - VX_writeback #( + VX_commit #( .CORE_ID(CORE_ID) - ) writeback ( + ) commit ( .clk (clk), .reset (reset), - .alu_wb_if (alu_wb_if), - .branch_wb_if (branch_wb_if), - .lsu_wb_if (lsu_wb_if), - .csr_wb_if (csr_wb_if), - .mul_wb_if (mul_wb_if), + + .alu_commit_if (alu_commit_if), + .branch_commit_if(branch_commit_if), + .lsu_commit_if (lsu_commit_if), + .csr_commit_if (csr_commit_if), + .mul_commit_if (mul_commit_if), + .gpu_commit_if (gpu_commit_if), + .writeback_if (writeback_if), - .notify_commit (notify_commit) + .perf_cntrs_if (perf_cntrs_if) ); assign dcache_req_valid = core_dcache_req_if.valid; @@ -223,12 +248,4 @@ module VX_pipeline #( `SCOPE_ASSIGN(scope_exec_delay, exec_delay); `SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_delay); -`ifdef DBG_PRINT_PIPELINE - always @(posedge clk) begin - if ((| execute_if.valid) && (~execute_if.alu_ready || ~execute_if.br_ready || ~execute_if.lsu_ready || ~execute_if.csr_ready || ~execute_if.mul_ready || ~execute_if.gpu_ready)) begin - $display("%t: Core%0d-stall: warp=%0d, PC=%0h, alu=%b, br=%b, lsu=%b, csr=%b, mul=%b, gpu=%b", $time, CORE_ID, execute_if.warp_num, execute_if.curr_PC, ~execute_if.alu_ready, ~execute_if.br_ready, ~execute_if.lsu_ready, ~execute_if.csr_ready, ~execute_if.mul_ready, ~execute_if.gpu_ready); - end - end -`endif - endmodule diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v new file mode 100644 index 00000000..0b90695c --- /dev/null +++ b/hw/rtl/VX_scheduler.v @@ -0,0 +1,86 @@ +`include "VX_define.vh" + +module VX_scheduler #( + parameter CORE_ID = 0 +) ( + input wire clk, + input wire reset, + + VX_decode_if decode_if, + VX_wb_if writeback_if, + + VX_execute_if execute_if, + output wire is_empty +); + localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1); + + reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; + reg [CTVW-1:0] count_valid; + + wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0); + wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0); + wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0); + + wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1); + wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2); + wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0); + + wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual); + + wire ex_stalled = (| decode_if.valid) + && ((!execute_if.alu_ready && (decode_if.ex_type == `EX_ALU)) + || (!execute_if.br_ready && (decode_if.ex_type == `EX_BR)) + || (!execute_if.lsu_ready && (decode_if.ex_type == `EX_LSU)) + || (!execute_if.csr_ready && (decode_if.ex_type == `EX_CSR)) + || (!execute_if.mul_ready && (decode_if.ex_type == `EX_MUL)) + || (!execute_if.gpu_ready && (decode_if.ex_type == `EX_GPU))); + + wire stall = rename_valid || ex_stalled; + + wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && ~stall; + + wire release_rd = (| writeback_if.valid); + + wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid; + + reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) : + (~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) : + count_valid; + integer i, w; + + always @(posedge clk) begin + if (reset) begin + for (w = 0; w < `NUM_WARPS; w++) begin + for (i = 0; i < 32; i++) begin + rename_table[w][i] <= 0; + end + end + count_valid <= 0; + end else begin + if (acquire_rd) begin + rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid; + end + if (release_rd) begin + assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0); + rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask; + end + count_valid <= count_valid_next; + end + end + + VX_generic_register #( + .N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS), + ) schedule_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (0), + .in ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.instr_op, decode_if.wb}), + .out ({execute_if.valid, execute_if.warp_num, execute_if.curr_PC, execute_if.next_PC, execute_if.rd, execute_if.rs1, execute_if.rs2, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.ex_type, execute_if.instr_op, execute_if.wb}) + ); + + assign decode_if.ready = ~stall; + + assign is_empty = (0 == count_valid); + +endmodule \ No newline at end of file diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 09ec8564..aa34593d 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -9,7 +9,7 @@ module VX_warp_sched #( VX_warp_ctl_if warp_ctl_if, VX_wstall_if wstall_if, VX_join_if join_if, - VX_branch_rsp_if branch_rsp_if, + VX_branch_ctl_if branch_ctl_if, VX_ifetch_rsp_if ifetch_rsp_if, VX_ifetch_req_if ifetch_req_if, @@ -158,11 +158,11 @@ module VX_warp_sched #( end // Branch - if (branch_rsp_if.valid) begin - if (branch_rsp_if.taken) begin - warp_pcs[branch_rsp_if.warp_num] <= branch_rsp_if.dest; + if (branch_ctl_if.valid) begin + if (branch_ctl_if.taken) begin + warp_pcs[branch_ctl_if.warp_num] <= branch_ctl_if.dest; end - warp_stalled[branch_rsp_if.warp_num] <= 0; + warp_stalled[branch_ctl_if.warp_num] <= 0; end // Lock/Release @@ -230,7 +230,7 @@ module VX_warp_sched #( ); end - wire should_bra = (branch_rsp_if.valid && branch_rsp_if.taken && (warp_to_schedule == branch_rsp_if.warp_num)); + wire should_bra = (branch_ctl_if.valid && branch_ctl_if.taken && (warp_to_schedule == branch_ctl_if.warp_num)); assign hazard = should_bra && schedule; @@ -244,7 +244,7 @@ module VX_warp_sched #( assign warp_pc = real_use_wspawn ? use_wspawn_pc : warp_pcs[warp_to_schedule]; - assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]); + assign thread_mask = global_stall ? 0 : (real_use_wspawn ? `NUM_THREADS'(1) : thread_masks[warp_to_schedule]); assign warp_num = warp_to_schedule; diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index cd642c3d..0d4cef3f 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -3,109 +3,84 @@ module VX_writeback #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // inputs - VX_wb_if alu_wb_if, - VX_wb_if branch_wb_if, - VX_wb_if lsu_wb_if, - VX_wb_if mul_wb_if, - VX_wb_if csr_wb_if, + VX_commit_if alu_commit_if, + VX_commit_if branch_commit_if, + VX_commit_if lsu_commit_if, + VX_commit_if mul_commit_if, + VX_commit_if csr_commit_if, // outputs - VX_wb_if writeback_if, - output wire notify_commit + VX_wb_if writeback_if ); - wire br_valid = (| branch_wb_if.valid); - wire lsu_valid = (| lsu_wb_if.valid); - wire mul_valid = (| mul_wb_if.valid); - wire alu_valid = (| alu_wb_if.valid); - wire csr_valid = (| csr_wb_if.valid); + wire br_valid = (| branch_commit_if.valid) && (branch_commit_if.wb != `WB_NO); + wire lsu_valid = (| lsu_commit_if.valid) && (lsu_commit_if.wb != `WB_NO); + wire mul_valid = (| mul_commit_if.valid) && (mul_commit_if.wb != `WB_NO); + wire alu_valid = (| alu_commit_if.valid) && (alu_commit_if.wb != `WB_NO); + wire csr_valid = (| csr_commit_if.valid) && (csr_commit_if.wb != `WB_NO); VX_wb_if writeback_tmp_if(); - assign writeback_tmp_if.valid = br_valid ? branch_wb_if.valid : - lsu_valid ? lsu_wb_if.valid : - mul_valid ? mul_wb_if.valid : - alu_valid ? alu_wb_if.valid : - csr_valid ? csr_wb_if.valid : + assign writeback_tmp_if.valid = br_valid ? branch_commit_if.valid : + lsu_valid ? lsu_commit_if.valid : + mul_valid ? mul_commit_if.valid : + alu_valid ? alu_commit_if.valid : + csr_valid ? csr_commit_if.valid : 0; - assign writeback_tmp_if.warp_num = br_valid ? branch_wb_if.warp_num : - lsu_valid ? lsu_wb_if.warp_num : - mul_valid ? mul_wb_if.warp_num : - alu_valid ? alu_wb_if.warp_num : - csr_valid ? csr_wb_if.warp_num : + assign writeback_tmp_if.warp_num = br_valid ? branch_commit_if.warp_num : + lsu_valid ? lsu_commit_if.warp_num : + mul_valid ? mul_commit_if.warp_num : + alu_valid ? alu_commit_if.warp_num : + csr_valid ? csr_commit_if.warp_num : - 0; - - assign writeback_tmp_if.curr_PC = br_valid ? branch_wb_if.curr_PC : - lsu_valid ? lsu_wb_if.curr_PC : - mul_valid ? mul_wb_if.curr_PC : - alu_valid ? alu_wb_if.curr_PC : - csr_valid ? csr_wb_if.curr_PC : - 0; - - assign writeback_tmp_if.data = br_valid ? branch_wb_if.data : - lsu_valid ? lsu_wb_if.data : - mul_valid ? mul_wb_if.data : - alu_valid ? alu_wb_if.data : - csr_valid ? csr_wb_if.data : - 0; - - assign writeback_tmp_if.rd = br_valid ? branch_wb_if.rd : - lsu_valid ? lsu_wb_if.rd : - mul_valid ? mul_wb_if.rd : - alu_valid ? alu_wb_if.rd : - csr_valid ? csr_wb_if.rd : - 0; - - assign writeback_tmp_if.wb = br_valid ? branch_wb_if.wb : - lsu_valid ? lsu_wb_if.wb : - alu_valid ? alu_wb_if.wb : - csr_valid ? csr_wb_if.wb : - mul_valid ? mul_wb_if.wb : 0; + assign writeback_tmp_if.data = br_valid ? branch_commit_if.data : + lsu_valid ? lsu_commit_if.data : + mul_valid ? mul_commit_if.data : + alu_valid ? alu_commit_if.data : + csr_valid ? csr_commit_if.data : + 0; + + assign writeback_tmp_if.rd = br_valid ? branch_commit_if.rd : + lsu_valid ? lsu_commit_if.rd : + mul_valid ? mul_commit_if.rd : + alu_valid ? alu_commit_if.rd : + csr_valid ? csr_commit_if.rd : + 0; + wire stall = ~writeback_if.ready && (| writeback_if.valid); VX_generic_register #( - .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + (`NUM_THREADS * 32) + `WB_BITS) + .N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32)) ) wb_reg ( .clk (clk), .reset (reset), .stall (stall), .flush (0), - .in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.rd, writeback_tmp_if.data, writeback_tmp_if.wb}), - .out ({writeback_if.valid, writeback_if.warp_num, writeback_if.curr_PC, writeback_if.rd, writeback_if.data, writeback_if.wb}) + .in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data}), + .out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data}) ); - assign branch_wb_if.ready = !stall; - assign lsu_wb_if.ready = !stall && !br_valid; - assign mul_wb_if.ready = !stall && !br_valid && !lsu_valid; - assign alu_wb_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid; - assign csr_wb_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid && !alu_valid; + assign branch_commit_if.ready = !stall; + assign lsu_commit_if.ready = !stall && !br_valid; + assign mul_commit_if.ready = !stall && !br_valid && !lsu_valid; + assign alu_commit_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid; + assign csr_commit_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid && !alu_valid; - assign notify_commit = (| writeback_tmp_if.valid) && ~stall; - // special workaround to control RISC-V benchmarks termination on Verilator reg [31:0] last_data_wb /* verilator public */; always @(posedge clk) begin - if (notify_commit && (writeback_tmp_if.wb != 0) && (writeback_tmp_if.rd == 28)) begin + if ((| writeback_tmp_if.valid) && ~stall && (writeback_tmp_if.rd == 28)) begin last_data_wb <= writeback_tmp_if.data[0]; end end -`ifdef DBG_PRINT_PIPELINE - always @(posedge clk) begin - if ((| writeback_tmp_if.valid) && ~stall) begin - $display("%t: Core%0d-WB: warp=%0d, PC=%0h, rd=%0d, wb=%0d, data=%0h", $time, CORE_ID, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.data); - end - end -`endif - endmodule diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 24e40f91..4a72e717 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -46,7 +46,7 @@ module VX_cache_core_rsp_merge #( reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; reg [NUM_BANKS-1:0] core_rsp_bank_select; - wire stall = ~core_rsp_ready; + wire stall = ~core_rsp_ready && (| core_rsp_valid); integer i; diff --git a/hw/rtl/interfaces/VX_branch_ctl_if.v b/hw/rtl/interfaces/VX_branch_ctl_if.v new file mode 100644 index 00000000..325b2671 --- /dev/null +++ b/hw/rtl/interfaces/VX_branch_ctl_if.v @@ -0,0 +1,15 @@ +`ifndef VX_BRANCH_RSP_IF +`define VX_BRANCH_RSP_IF + +`include "VX_define.vh" + +interface VX_branch_ctl_if (); + + wire valid; + wire [`NW_BITS-1:0] warp_num; + wire taken; + wire [31:0] dest; + +endinterface + +`endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_commit_if.v b/hw/rtl/interfaces/VX_commit_if.v new file mode 100644 index 00000000..6e969c77 --- /dev/null +++ b/hw/rtl/interfaces/VX_commit_if.v @@ -0,0 +1,19 @@ +`ifndef VX_COMMIT_IF +`define VX_COMMIT_IF + +`include "VX_define.vh" + +interface VX_commit_if (); + + wire [`NUM_THREADS-1:0] valid; + wire [`NW_BITS-1:0] warp_num; + wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0][31:0] data; + wire [`NR_BITS-1:0] rd; + wire [`WB_BITS-1:0] wb; + wire is_io; + wire ready; + +endinterface + +`endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_execute_if.v b/hw/rtl/interfaces/VX_execute_if.v index ae1fe68b..8188c0e7 100644 --- a/hw/rtl/interfaces/VX_execute_if.v +++ b/hw/rtl/interfaces/VX_execute_if.v @@ -3,7 +3,7 @@ `include "VX_define.vh" -interface VX_execute_if(); +interface VX_execute_if (); wire [`NUM_THREADS-1:0] valid; wire [`NW_BITS-1:0] warp_num; diff --git a/hw/rtl/interfaces/VX_gpu_req_if.v b/hw/rtl/interfaces/VX_gpu_req_if.v index 13adb788..468fc073 100644 --- a/hw/rtl/interfaces/VX_gpu_req_if.v +++ b/hw/rtl/interfaces/VX_gpu_req_if.v @@ -7,12 +7,13 @@ interface VX_gpu_req_if(); wire [`NUM_THREADS-1:0] valid; wire [`NW_BITS-1:0] warp_num; - wire [31:0] next_PC; + wire [31:0] curr_PC; wire [`GPU_BITS-1:0] gpu_op; wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [31:0] rs2_data; + wire [31:0] next_PC; wire ready; diff --git a/hw/rtl/interfaces/VX_perf_cntrs_if.v b/hw/rtl/interfaces/VX_perf_cntrs_if.v new file mode 100644 index 00000000..daad411f --- /dev/null +++ b/hw/rtl/interfaces/VX_perf_cntrs_if.v @@ -0,0 +1,13 @@ +`ifndef VX_PERF_CNTRS_IF +`define VX_PERF_CNTRS_IF + +`include "VX_define.vh" + +interface VX_perf_cntrs_if (); + + wire [63:0] total_cycles; + wire [63:0] total_instrs; + +endinterface + +`endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_wb_if.v b/hw/rtl/interfaces/VX_wb_if.v index 4fb23d73..ceba5506 100644 --- a/hw/rtl/interfaces/VX_wb_if.v +++ b/hw/rtl/interfaces/VX_wb_if.v @@ -6,12 +6,9 @@ interface VX_wb_if (); wire [`NUM_THREADS-1:0] valid; - wire [`NW_BITS-1:0] warp_num; - wire [31:0] curr_PC; + wire [`NW_BITS-1:0] warp_num; wire [`NUM_THREADS-1:0][31:0] data; wire [`NR_BITS-1:0] rd; - wire [`WB_BITS-1:0] wb; - wire is_io; wire ready; endinterface diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index d3c164b1..213712a0 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -257,12 +257,12 @@ bool Simulator::run() { // check riscv-tests PASSED/FAILED status #if (NUM_CLUSTERS == 1 && NUM_CORES == 1) - int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf; + int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf; #else #if (NUM_CLUSTERS == 1) - int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf; + int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf; #else - int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf; + int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf; #endif #endif diff --git a/hw/simulate/testbench.cpp b/hw/simulate/testbench.cpp index 8515f1fd..51134059 100644 --- a/hw/simulate/testbench.cpp +++ b/hw/simulate/testbench.cpp @@ -22,7 +22,7 @@ int main(int argc, char **argv) "../../../benchmarks/riscv_tests/rv32ui-p-bltu.hex", "../../../benchmarks/riscv_tests/rv32ui-p-bne.hex", "../../../benchmarks/riscv_tests/rv32ui-p-jal.hex", - "../../../benchmarks/riscv_tests/rv32ui-p-jalr.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-jalr.hex", "../../../benchmarks/riscv_tests/rv32ui-p-lb.hex", "../../../benchmarks/riscv_tests/rv32ui-p-lbu.hex", "../../../benchmarks/riscv_tests/rv32ui-p-lh.hex",