diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 6a63e434..916bd884 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -100,12 +100,12 @@ module VX_alu_unit #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33), .R(1) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall_out), - .flush (1'b0), - .in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}), - .out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r}) + .clk (clk), + .reset (reset), + .stall (stall_out), + .flush (1'b0), + .data_in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}), + .data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r}) ); wire is_less = cmp_result_r[32]; diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index 1a53ab55..9c1b2cb4 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -64,12 +64,12 @@ module VX_commit #( .N(1 + CMTW), .R(1) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (1'b0), - .flush (1'b0), - .in ({commit_fire, commit_size}), - .out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size}) + .clk (clk), + .reset (reset), + .stall (1'b0), + .flush (1'b0), + .data_in ({commit_fire, commit_size}), + .data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size}) ); // Writeback diff --git a/hw/rtl/VX_csr_arb.v b/hw/rtl/VX_csr_arb.v index d2b00e16..3b203b22 100644 --- a/hw/rtl/VX_csr_arb.v +++ b/hw/rtl/VX_csr_arb.v @@ -1,57 +1,59 @@ `include "VX_define.vh" module VX_csr_arb ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // bus select - input wire select_io_req, - input wire select_io_rsp, + input wire select_io_req, + input wire select_io_rsp, // input requets - VX_csr_req_if csr_core_req_if, - VX_csr_io_req_if csr_io_req_if, + VX_csr_req_if csr_core_req_if, + VX_csr_io_req_if csr_io_req_if, // output request - VX_csr_req_if csr_req_if, + VX_csr_pipe_req_if csr_pipe_req_if, // input response - VX_commit_if csr_rsp_if, + VX_commit_if csr_pipe_rsp_if, // outputs responses - VX_commit_if csr_commit_if, - VX_csr_io_rsp_if csr_io_rsp_if + VX_commit_if csr_commit_if, + VX_csr_io_rsp_if csr_io_rsp_if ); `UNUSED_VAR (clk) `UNUSED_VAR (reset) - // requests - assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid; - assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0; - assign csr_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0; - assign csr_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0; - assign csr_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS); - assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr; - assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0); - assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0; - assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0; - assign csr_req_if.is_io = select_io_req; + wire [31:0] csr_core_req_mask = csr_core_req_if.rs2_is_imm ? 32'(csr_core_req_if.rs1) : csr_core_req_if.rs1_data; - assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req); - assign csr_io_req_if.ready = csr_req_if.ready && select_io_req; + // requests + assign csr_pipe_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid; + assign csr_pipe_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0; + assign csr_pipe_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0; + assign csr_pipe_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0; + assign csr_pipe_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS); + assign csr_pipe_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr; + assign csr_pipe_req_if.csr_mask = (~select_io_req) ? csr_core_req_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0); + assign csr_pipe_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0; + assign csr_pipe_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0; + assign csr_pipe_req_if.is_io = select_io_req; + + assign csr_core_req_if.ready = csr_pipe_req_if.ready && (~select_io_req); + assign csr_io_req_if.ready = csr_pipe_req_if.ready && select_io_req; // responses - assign csr_io_rsp_if.valid = csr_rsp_if.valid & select_io_rsp; - assign csr_io_rsp_if.data = csr_rsp_if.data[0]; + assign csr_io_rsp_if.valid = csr_pipe_rsp_if.valid & select_io_rsp; + assign csr_io_rsp_if.data = csr_pipe_rsp_if.data[0]; - assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp; - assign csr_commit_if.wid = csr_rsp_if.wid; - assign csr_commit_if.tmask = csr_rsp_if.tmask; - assign csr_commit_if.PC = csr_rsp_if.PC; - assign csr_commit_if.rd = csr_rsp_if.rd; - assign csr_commit_if.wb = csr_rsp_if.wb; - assign csr_commit_if.data = csr_rsp_if.data; + assign csr_commit_if.valid = csr_pipe_rsp_if.valid & ~select_io_rsp; + assign csr_commit_if.wid = csr_pipe_rsp_if.wid; + assign csr_commit_if.tmask = csr_pipe_rsp_if.tmask; + assign csr_commit_if.PC = csr_pipe_rsp_if.PC; + assign csr_commit_if.rd = csr_pipe_rsp_if.rd; + assign csr_commit_if.wb = csr_pipe_rsp_if.wb; + assign csr_commit_if.data = csr_pipe_rsp_if.data; - assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready; + assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready; endmodule diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 325c2d7c..a8d4f050 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -12,15 +12,15 @@ module VX_csr_unit #( VX_csr_io_req_if csr_io_req_if, VX_csr_io_rsp_if csr_io_rsp_if, - VX_csr_req_if csr_req_if, + VX_csr_req_if csr_req_if, VX_commit_if csr_commit_if, input wire busy, input wire[`NUM_WARPS-1:0] fpu_pending, output wire[`NUM_WARPS-1:0] pending ); - VX_csr_req_if csr_pipe_req_if(); - VX_commit_if csr_pipe_rsp_if(); + VX_csr_pipe_req_if csr_pipe_req_if(); + VX_commit_if csr_pipe_rsp_if(); wire select_io_req = csr_io_req_if.valid; wire select_io_rsp; @@ -34,9 +34,9 @@ module VX_csr_unit #( .csr_core_req_if (csr_req_if), .csr_io_req_if (csr_io_req_if), - .csr_req_if (csr_pipe_req_if), + .csr_pipe_req_if (csr_pipe_req_if), - .csr_rsp_if (csr_pipe_rsp_if), + .csr_pipe_rsp_if (csr_pipe_rsp_if), .csr_io_rsp_if (csr_io_rsp_if), .csr_commit_if (csr_commit_if) ); @@ -105,12 +105,12 @@ module VX_csr_unit #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32), .R(1) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall_out), - .flush (1'b0), - .in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}), - .out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1}) + .clk (clk), + .reset (reset), + .stall (stall_out), + .flush (1'b0), + .data_in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}), + .data_out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1}) ); for (genvar i = 0; i < `NUM_THREADS; i++) begin diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 9683c8f8..ac4f45d7 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -347,11 +347,9 @@ module VX_decode #( assign decode_if.rd = rd; assign decode_if.rs1 = rs1_qual; assign decode_if.rs2 = rs2; - assign decode_if.rs3 = 0; + assign decode_if.rs3 = rs3; `endif - assign decode_if.use_rs3 = use_rs3; - assign decode_if.used_regs = ((`NUM_REGS)'(use_rd) << decode_if.rd) | ((`NUM_REGS)'(use_rs1) << decode_if.rs1) | ((`NUM_REGS)'(use_rs2) << decode_if.rs2) diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index c4ae3292..2d9f98f2 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -117,6 +117,8 @@ module VX_execute #( .pending (fpu_pending) ); `else + `UNUSED_VAR (csr_pending) + `UNUSED_VAR (fpu_to_csr_if.read_frm) assign fpu_req_if.ready = 0; assign fpu_commit_if.valid = 0; assign fpu_commit_if.wid = 0; @@ -124,9 +126,12 @@ module VX_execute #( assign fpu_commit_if.tmask = 0; assign fpu_commit_if.wb = 0; assign fpu_commit_if.rd = 0; - assign fpu_commit_if.data = 0; - assign fpu_commit_if.has_fflags = 0; - assign fpu_commit_if.fflags = 0; + assign fpu_commit_if.data = 0; + assign fpu_to_csr_if.write_enable = 0; + assign fpu_to_csr_if.write_wid = 0; + assign fpu_to_csr_if.write_fflags = 0; + assign fpu_to_csr_if.read_wid = 0; + assign fpu_pending = 0; `endif VX_gpu_unit #( diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index 7dc95815..d44e8966 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -153,12 +153,12 @@ module VX_fpu_unit #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS), .R(1) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall_out), - .flush (1'b0), - .in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}), - .out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r}) + .clk (clk), + .reset (reset), + .stall (stall_out), + .flush (1'b0), + .data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}), + .data_out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r}) ); assign ready_out = ~stall_out; diff --git a/hw/rtl/VX_gpr_bypass.v b/hw/rtl/VX_gpr_bypass.v deleted file mode 100644 index e96f3862..00000000 --- a/hw/rtl/VX_gpr_bypass.v +++ /dev/null @@ -1,78 +0,0 @@ -`include "VX_platform.vh" - -module VX_gpr_bypass #( - parameter DATAW = 1, - parameter PASSTHRU = 0 -) ( - input wire clk, - input wire reset, - input wire push, - input wire pop, - input wire [DATAW-1:0] data_in, - output wire [DATAW-1:0] data_out -); - if (PASSTHRU) begin - reg delayed_push; - - always @(posedge clk) begin - if (reset) begin - delayed_push <= 0; - end else begin - delayed_push <= push; - assert(!delayed_push || pop); - end - end - - assign data_out = data_in; - - end else begin - - reg [DATAW-1:0] buffer, buffer2; - reg use_buffer, use_buffer2; - reg delayed_push; - - always @(posedge clk) begin - if (reset) begin - delayed_push <= 0; - use_buffer <= 0; - use_buffer2 <= 0; - end else begin - delayed_push <= push; - assert(!use_buffer2 || use_buffer); - if (pop) begin - use_buffer <= use_buffer2; - use_buffer2 <= 0; - end - if (delayed_push) begin - if (use_buffer) begin - assert(!use_buffer2); // full! - use_buffer <= 1; - if (!pop) begin - use_buffer2 <= 1; - end - end else if (!pop) begin - use_buffer <= 1; - end - end - end - - if (pop) begin - buffer <= buffer2; - end - if (delayed_push) begin - if (use_buffer) begin - if (pop) begin - buffer <= data_in; - end else begin - buffer2 <= data_in; - end - end else if (!pop) begin - buffer <= data_in; - end - end - end - - assign data_out = use_buffer ? buffer : data_in; - end - -endmodule \ No newline at end of file diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index ede625a9..3c25de66 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -4,33 +4,79 @@ module VX_gpr_ram ( input wire clk, - input wire [`NUM_THREADS-1:0] we, + input wire wren, + input wire [`NUM_THREADS-1:0] tmask, input wire [`NW_BITS+`NR_BITS-1:0] waddr, input wire [`NUM_THREADS-1:0][31:0] wdata, - input wire [`NW_BITS+`NR_BITS-1:0] rs1, - input wire [`NW_BITS+`NR_BITS-1:0] rs2, - output wire [`NUM_THREADS-1:0][31:0] rs1_data, - output wire [`NUM_THREADS-1:0][31:0] rs2_data + input wire [`NW_BITS+`NR_BITS-1:0] raddr1, + input wire [`NW_BITS+`NR_BITS-1:0] raddr2, + input wire [`NW_BITS+`NR_BITS-1:0] raddr3, + output wire [`NUM_THREADS-1:0][31:0] rdata1, + output wire [`NUM_THREADS-1:0][31:0] rdata2, + output wire [`NUM_THREADS-1:0][31:0] rdata3 ); + localparam RAM_DATAW = `NUM_THREADS * 32; + localparam RAM_ADDRW = `NW_BITS + `NR_BITS; + localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; + localparam RAM_BYTEEN = `NUM_THREADS * 4; - reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0]; - reg [`NUM_THREADS-1:0][31:0] q1, q2; - - always @(posedge clk) begin - for (integer i = 0; i < `NUM_THREADS; i++) begin - if (we[i]) begin - mem[waddr][i][0] <= wdata[i][07:00]; - mem[waddr][i][1] <= wdata[i][15:08]; - mem[waddr][i][2] <= wdata[i][23:16]; - mem[waddr][i][3] <= wdata[i][31:24]; + `UNUSED_VAR (raddr3) + +`ifdef EXT_F_ENABLE + + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + + reg [31:0] mem_i [(RAM_DEPTH/2)-1:0]; + reg [31:0] mem_f [(RAM_DEPTH/2)-1:0]; + + initial mem_i = '{default: 0}; + + wire waddr_is_fp = waddr[RAM_ADDRW-1]; + wire raddr1_is_fp = raddr1[RAM_ADDRW-1]; + wire raddr2_is_fp = raddr2[RAM_ADDRW-1]; + + wire [RAM_ADDRW-2:0] waddr_qual = waddr[RAM_ADDRW-2:0]; + wire [RAM_ADDRW-2:0] raddr1_qual = raddr1[RAM_ADDRW-2:0]; + wire [RAM_ADDRW-2:0] raddr2_qual = raddr2[RAM_ADDRW-2:0]; + wire [RAM_ADDRW-2:0] raddr3_qual = raddr3[RAM_ADDRW-2:0]; + + always @(posedge clk) begin + if (wren && tmask[i] && !waddr_is_fp) begin + mem_i[waddr_qual] <= wdata[i]; end end - q1 <= mem[rs1]; - q2 <= mem[rs2]; + + always @(posedge clk) begin + if (wren && tmask[i] && waddr_is_fp) begin + mem_f[waddr_qual] <= wdata[i]; + end + end + + assign rdata1[i] = raddr1_is_fp ? mem_f[raddr1_qual] : mem_i[raddr1_qual]; + assign rdata2[i] = raddr2_is_fp ? mem_f[raddr2_qual] : mem_i[raddr2_qual]; + assign rdata3[i] = mem_f[raddr3_qual]; end - assign rs1_data = q1; - assign rs2_data = q2; +`else + + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + + reg [31:0] mem [RAM_DEPTH-1:0]; + + initial mem = '{default: 0}; + + always @(posedge clk) begin + if (wren && tmask[i]) begin + mem[waddr] <= wdata[i]; + end + end + + assign rdata1[i] = mem[raddr1]; + assign rdata2[i] = mem[raddr2]; + assign rdata3[i] = 0; + end + +`endif endmodule diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 6a39fcd4..5a0af76e 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -15,91 +15,38 @@ module VX_gpr_stage #( ); `UNUSED_VAR (reset) - reg rsp_valid; - reg [`NW_BITS-1:0] rsp_wid; - reg [31:0] rsp_pc; - reg rs1_is_zero, rs2_is_zero; - - wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data; - wire [`NW_BITS+`NR_BITS-1:0] raddr1, raddr2; + wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3; + wire [`NW_BITS+`NR_BITS-1:0] waddr, raddr1, raddr2, raddr3; +`ifdef EXT_F_ENABLE + assign waddr = {writeback_if.rd[`NR_BITS-1], writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]}; + assign raddr1 = {gpr_req_if.rs1[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]}; + assign raddr2 = {gpr_req_if.rs2[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]}; + assign raddr3 = {gpr_req_if.rs3[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]}; +`else + assign waddr = {writeback_if.wid, writeback_if.rd}; + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; + assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3}; +`endif VX_gpr_ram gpr_ram ( - .clk (clk), - .we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.tmask), - .waddr ({writeback_if.wid, writeback_if.rd}), - .wdata (writeback_if.data), - .rs1 (raddr1), - .rs2 (raddr2), - .rs1_data (rs1_data), - .rs2_data (rs2_data) - ); - - always @(posedge clk) begin - if (reset) begin - rsp_valid <= 0; - end else begin - rsp_valid <= gpr_req_if.valid; - end - - rsp_wid <= gpr_req_if.wid; - rsp_pc <= gpr_req_if.PC; - rs1_is_zero <= (0 == gpr_req_if.rs1); - rs2_is_zero <= (0 == gpr_req_if.rs2); - end - -`ifdef EXT_F_ENABLE - - reg [`NUM_THREADS-1:0][31:0] rs3_data; - reg read_rs3, save_rs3; - - wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && !read_rs3; - wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready; - - always @(posedge clk) begin - if (reset) begin - read_rs3 <= 0; - end else begin - if (rs3_delay) begin - read_rs3 <= 1; - end else if (read_fire) begin - read_rs3 <= 0; - end - assert(!read_rs3 || rsp_wid == gpr_req_if.wid); - end - - if (rs3_delay) begin - save_rs3 <= 1; - end - if (save_rs3) begin - rs3_data <= rs1_data; - save_rs3 <= 0; - end - end - - assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)}; - assign gpr_req_if.ready = ~rs3_delay; - assign gpr_rsp_if.rs3_data = rs3_data; - -`else - - assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; - assign gpr_req_if.ready = 1; - assign gpr_rsp_if.rs3_data = 0; - - `UNUSED_VAR (gpr_req_if.valid); - `UNUSED_VAR (gpr_req_if.rs3); - `UNUSED_VAR (gpr_req_if.use_rs3); - `UNUSED_VAR (gpr_rsp_if.ready); + .clk (clk), + .wren (writeback_if.valid), + .tmask (writeback_if.tmask), + .waddr (waddr), + .wdata (writeback_if.data), + .raddr1 (raddr1), + .raddr2 (raddr2), + .raddr3 (raddr3), + .rdata1 (rdata1), + .rdata2 (rdata2), + .rdata3 (rdata3) + ); -`endif - - assign gpr_rsp_if.rs1_data = rs1_is_zero ? (`NUM_THREADS*32)'(0) : rs1_data; - assign gpr_rsp_if.rs2_data = rs2_is_zero ? (`NUM_THREADS*32)'(0) : rs2_data; - assign gpr_rsp_if.valid = rsp_valid; - assign gpr_rsp_if.wid = rsp_wid; - assign gpr_rsp_if.PC = rsp_pc; + assign gpr_rsp_if.rs1_data = rdata1; + assign gpr_rsp_if.rs2_data = rdata2; + assign gpr_rsp_if.rs3_data = rdata3; assign writeback_if.ready = 1'b1; diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 26905ce1..3564d851 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -79,12 +79,12 @@ module VX_gpu_unit #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE), .R(1) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}), - .out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}), + .data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier}) ); assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready; diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 1859a95f..4488323c 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -14,7 +14,7 @@ module VX_ibuffer #( output wire [`NW_BITS-1:0] deq_wid_next, VX_decode_if ibuf_deq_if ); - localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + 1 + `NUM_REGS; + localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS; localparam SIZE = `IBUF_SIZE; localparam SIZEW = $clog2(SIZE+1); localparam ADDRW = $clog2(SIZE); @@ -192,8 +192,7 @@ module VX_ibuffer #( ibuf_enq_if.rs3, ibuf_enq_if.imm, ibuf_enq_if.rs1_is_PC, - ibuf_enq_if.rs2_is_imm, - ibuf_enq_if.use_rs3, + ibuf_enq_if.rs2_is_imm, ibuf_enq_if.used_regs}; assign ibuf_deq_if.valid = deq_valid; @@ -211,7 +210,6 @@ module VX_ibuffer #( ibuf_deq_if.imm, ibuf_deq_if.rs1_is_PC, ibuf_deq_if.rs2_is_imm, - ibuf_deq_if.use_rs3, ibuf_deq_if.used_regs} = deq_instr; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index 299c4682..67bc3eb3 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -30,94 +30,72 @@ module VX_instr_demux ( // ALU unit wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU); - wire alu_req_ready; + wire alu_stall = alu_req_if.valid && ~alu_req_if.ready; + wire is_br_op = `IS_BR_MOD(execute_if.op_mod); - VX_opd_collect #( - .INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS), - .OPDSW (2 * `NUM_THREADS * 32), - .PASSTHRU (1) // ALU has no backpressure - ) alu_opc ( - .clk (clk), - .reset (reset), - .ready_in (alu_req_ready), - .valid_in (alu_req_valid), - .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}), - .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), - .data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}), - .ready_out (alu_req_if.ready), - .valid_out (alu_req_if.valid) + VX_generic_register #( + .N (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)), + .R (1) + ) alu_pipe ( + .clk (clk), + .reset (reset), + .stall (alu_stall), + .flush (1'b0), + .data_in ({alu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}) ); // lsu unit wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU); - wire lsu_req_ready; + wire lsu_stall = lsu_req_if.valid && ~lsu_req_if.ready; - VX_opd_collect #( - .INSTW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1), - .OPDSW (2 * `NUM_THREADS * 32) - ) lsu_opc ( + VX_generic_register #( + .N (1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)), + .R (1) + ) lsu_pipe ( .clk (clk), .reset (reset), - .ready_in (lsu_req_ready), - .valid_in (lsu_req_valid), - .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}), - .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), - .data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}), - .ready_out (lsu_req_if.ready), - .valid_out (lsu_req_if.valid) + .stall (lsu_stall), + .flush (1'b0), + .data_in ({lsu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}) ); // csr unit wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR); - wire csr_req_ready; + wire csr_stall = csr_req_if.valid && ~csr_req_if.ready; - reg tmp_rs2_is_imm; - reg [`NR_BITS-1:0] tmp_rs1; - - always @(posedge clk) begin - tmp_rs2_is_imm <= execute_if.rs2_is_imm; - tmp_rs1 <= execute_if.rs1; - end - - wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_rsp_if.rs1_data[0]; - - VX_opd_collect #( - .INSTW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1), - .OPDSW (32) - ) csr_opc ( + VX_generic_register #( + .N (1 + `NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32), + .R (1) + ) csr_pipe ( .clk (clk), .reset (reset), - .ready_in (csr_req_ready), - .valid_in (csr_req_valid), - .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}), - .opds_in ({csr_req_mask}), - .data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io, csr_req_if.csr_mask}), - .ready_out (csr_req_if.ready), - .valid_out (csr_req_if.valid) + .stall (csr_stall), + .flush (1'b0), + .data_in ({csr_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, execute_if.rs2_is_imm, execute_if.rs1, gpr_rsp_if.rs1_data[0]}), + .data_out ({csr_req_if.valid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.rs2_is_imm, csr_req_if.rs1, csr_req_if.rs1_data}) ); // mul unit `ifdef EXT_M_ENABLE wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL); - wire mul_req_ready; + wire mul_stall = mul_req_if.valid && ~mul_req_if.ready; - VX_opd_collect #( - .INSTW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1), - .OPDSW (2 * `NUM_THREADS * 32) - ) mul_opc ( + VX_generic_register #( + .N (1 + `NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)), + .R (1) + ) mul_pipe ( .clk (clk), .reset (reset), - .ready_in (mul_req_ready), - .valid_in (mul_req_valid), - .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), - .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), - .data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}), - .ready_out (mul_req_if.ready), - .valid_out (mul_req_if.valid) + .stall (mul_stall), + .flush (1'b0), + .data_in ({mul_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({mul_req_if.valid, mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}) ); `endif @@ -125,54 +103,50 @@ module VX_instr_demux ( `ifdef EXT_F_ENABLE wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU); - wire fpu_req_ready; + wire fpu_stall = fpu_req_if.valid && ~fpu_req_if.ready; - VX_opd_collect #( - .INSTW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1), - .OPDSW (3 * `NUM_THREADS * 32) - ) fpu_opc ( + VX_generic_register #( + .N (1 + `NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)), + .R (1) + ) fpu_pipe ( .clk (clk), .reset (reset), - .ready_in (fpu_req_ready), - .valid_in (fpu_req_valid), - .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb}), - .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}), - .data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}), - .ready_out (fpu_req_if.ready), - .valid_out (fpu_req_if.valid) + .stall (fpu_stall), + .flush (1'b0), + .data_in ({fpu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}), + .data_out ({fpu_req_if.valid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}) ); +`else + `UNUSED_VAR (gpr_rsp_if.rs3_data) `endif // gpu unit wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU); - wire gpu_req_ready; + wire gpu_stall = gpu_req_if.valid && ~gpu_req_if.ready; - VX_opd_collect #( - .INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1), - .OPDSW (`NUM_THREADS * 32 + 32) - ) gpu_opc ( + VX_generic_register #( + .N (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)), + .R (1) + ) gpu_pipe ( .clk (clk), .reset (reset), - .ready_in (gpu_req_ready), - .valid_in (gpu_req_valid), - .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), - .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}), - .data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}), - .ready_out (gpu_req_if.ready), - .valid_out (gpu_req_if.valid) + .stall (gpu_stall), + .flush (1'b0), + .data_in ({gpu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}), + .data_out ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}) ); // can take next request? - assign execute_if.ready = (alu_req_ready && (execute_if.ex_type == `EX_ALU)) - || (lsu_req_ready && (execute_if.ex_type == `EX_LSU)) - || (csr_req_ready && (execute_if.ex_type == `EX_CSR)) + assign execute_if.ready = (!alu_stall && (execute_if.ex_type == `EX_ALU)) + || (!lsu_stall && (execute_if.ex_type == `EX_LSU)) + || (!csr_stall && (execute_if.ex_type == `EX_CSR)) `ifdef EXT_M_ENABLE - || (mul_req_ready && (execute_if.ex_type == `EX_MUL)) + || (!mul_stall && (execute_if.ex_type == `EX_MUL)) `endif `ifdef EXT_F_ENABLE - || (fpu_req_ready && (execute_if.ex_type == `EX_FPU)) + || (!fpu_stall && (execute_if.ex_type == `EX_FPU)) `endif - || (gpu_req_ready && (execute_if.ex_type == `EX_GPU)); + || (!gpu_stall && (execute_if.ex_type == `EX_GPU)); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 03a75ca8..eab416d8 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -5,23 +5,23 @@ module VX_issue #( ) ( `SCOPE_IO_VX_issue - input wire clk, - input wire reset, + input wire clk, + input wire reset, - VX_decode_if decode_if, - VX_writeback_if writeback_if, + VX_decode_if decode_if, + VX_writeback_if writeback_if, - VX_alu_req_if alu_req_if, - VX_lsu_req_if lsu_req_if, - VX_csr_req_if csr_req_if, - VX_mul_req_if mul_req_if, - VX_fpu_req_if fpu_req_if, - VX_gpu_req_if gpu_req_if + VX_alu_req_if alu_req_if, + VX_lsu_req_if lsu_req_if, + VX_csr_req_if csr_req_if, + VX_mul_req_if mul_req_if, + VX_fpu_req_if fpu_req_if, + VX_gpu_req_if gpu_req_if ); - VX_decode_if ibuf_deq_if(); - VX_decode_if execute_if(); - VX_gpr_req_if gpr_req_if(); - VX_gpr_rsp_if gpr_rsp_if(); + VX_decode_if ibuf_deq_if(); + VX_decode_if execute_if(); + VX_gpr_req_if gpr_req_if(); + VX_gpr_rsp_if gpr_rsp_if(); wire scoreboard_delay; wire [`NW_BITS-1:0] deq_wid_next; @@ -29,49 +29,42 @@ module VX_issue #( VX_ibuffer #( .CORE_ID(CORE_ID) ) ibuffer ( - .clk (clk), - .reset (reset), - .freeze (~gpr_req_if.ready), - .ibuf_enq_if (decode_if), - .deq_wid_next (deq_wid_next), - .ibuf_deq_if (ibuf_deq_if) + .clk (clk), + .reset (reset), + .freeze (1'b0), + .ibuf_enq_if (decode_if), + .deq_wid_next (deq_wid_next), + .ibuf_deq_if (ibuf_deq_if) ); VX_scoreboard #( .CORE_ID(CORE_ID) ) scoreboard ( - .clk (clk), - .reset (reset), - .ibuf_deq_if (ibuf_deq_if), - .writeback_if (writeback_if), - .deq_wid_next (deq_wid_next), - .exe_delay (~execute_if.ready), - .gpr_delay (~gpr_req_if.ready), - .delay (scoreboard_delay) + .clk (clk), + .reset (reset), + .ibuf_deq_if (ibuf_deq_if), + .writeback_if (writeback_if), + .deq_wid_next (deq_wid_next), + .exe_delay (~execute_if.ready), + .delay (scoreboard_delay) ); - assign gpr_req_if.valid = ibuf_deq_if.valid && ~scoreboard_delay; - assign gpr_req_if.wid = ibuf_deq_if.wid; - assign gpr_req_if.PC = ibuf_deq_if.PC; - assign gpr_req_if.rs1 = ibuf_deq_if.rs1; - assign gpr_req_if.rs2 = ibuf_deq_if.rs2; - assign gpr_req_if.rs3 = ibuf_deq_if.rs3; - assign gpr_req_if.use_rs3 = ibuf_deq_if.use_rs3; - assign gpr_rsp_if.ready = execute_if.ready; + assign gpr_req_if.wid = ibuf_deq_if.wid; + assign gpr_req_if.rs1 = ibuf_deq_if.rs1; + assign gpr_req_if.rs2 = ibuf_deq_if.rs2; + assign gpr_req_if.rs3 = ibuf_deq_if.rs3; VX_gpr_stage #( .CORE_ID(CORE_ID) ) gpr_stage ( - .clk (clk), - .reset (reset), - .writeback_if (writeback_if), - .gpr_req_if (gpr_req_if), - .gpr_rsp_if (gpr_rsp_if) + .clk (clk), + .reset (reset), + .writeback_if (writeback_if), + .gpr_req_if (gpr_req_if), + .gpr_rsp_if (gpr_rsp_if) ); - `UNUSED_VAR (gpr_rsp_if.valid); - - assign execute_if.valid = ibuf_deq_if.valid && gpr_req_if.ready && ~scoreboard_delay; + assign execute_if.valid = ibuf_deq_if.valid && ~scoreboard_delay; assign execute_if.wid = ibuf_deq_if.wid; assign execute_if.tmask = ibuf_deq_if.tmask; assign execute_if.PC = ibuf_deq_if.PC; @@ -83,19 +76,19 @@ module VX_issue #( assign execute_if.rs1 = ibuf_deq_if.rs1; assign execute_if.imm = ibuf_deq_if.imm; assign execute_if.rs1_is_PC = ibuf_deq_if.rs1_is_PC; - assign execute_if.rs2_is_imm = ibuf_deq_if.rs2_is_imm; + assign execute_if.rs2_is_imm= ibuf_deq_if.rs2_is_imm; VX_instr_demux instr_demux ( - .clk (clk), - .reset (reset), - .execute_if (execute_if), - .gpr_rsp_if (gpr_rsp_if), - .alu_req_if (alu_req_if), - .lsu_req_if (lsu_req_if), - .csr_req_if (csr_req_if), - .mul_req_if (mul_req_if), - .fpu_req_if (fpu_req_if), - .gpu_req_if (gpu_req_if) + .clk (clk), + .reset (reset), + .execute_if (execute_if), + .gpr_rsp_if (gpr_rsp_if), + .alu_req_if (alu_req_if), + .lsu_req_if (lsu_req_if), + .csr_req_if (csr_req_if), + .mul_req_if (mul_req_if), + .fpu_req_if (fpu_req_if), + .gpu_req_if (gpu_req_if) ); `SCOPE_ASSIGN (issue_fire, ibuf_deq_if.valid && ibuf_deq_if.ready); @@ -115,12 +108,8 @@ module VX_issue #( `SCOPE_ASSIGN (issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm); `SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay); - `SCOPE_ASSIGN (gpr_delay, ~gpr_req_if.ready); `SCOPE_ASSIGN (execute_delay, ~execute_if.ready); - - `SCOPE_ASSIGN (gpr_rsp_valid, gpr_rsp_if.valid); - `SCOPE_ASSIGN (gpr_rsp_wid, gpr_rsp_if.wid); - `SCOPE_ASSIGN (gpr_rsp_pc, gpr_rsp_if.PC); + `SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data); `SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data); `SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data); @@ -140,7 +129,7 @@ module VX_issue #( $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); end if (csr_req_if.valid && csr_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.rs1_data); end if (mul_req_if.valid && mul_req_if.ready) begin $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data); diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index d54cb853..a67a6a38 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -19,6 +19,7 @@ module VX_lsu_unit #( VX_commit_if ld_commit_if, VX_commit_if st_commit_if ); + wire req_valid; wire [`NUM_THREADS-1:0] req_tmask; wire req_rw; wire [`NUM_THREADS-1:0][29:0] req_addr; @@ -71,19 +72,18 @@ module VX_lsu_unit #( reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags; `IGNORE_WARNINGS_END - wire valid_in; wire stall_in; VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))), .R(1) ) pipe_reg0 ( - .clk (clk), - .reset (reset), - .stall (stall_in), - .flush (1'b0), - .in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}), - .out ({valid_in, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data}) + .clk (clk), + .reset (reset), + .stall (stall_in), + .flush (1'b0), + .data_in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}), + .data_out ({req_valid, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data}) ); wire [`NW_BITS-1:0] rsp_wid; @@ -136,11 +136,11 @@ module VX_lsu_unit #( end end - wire stall_out = ~ld_commit_if.ready && ld_commit_if.valid; - wire store_stall = valid_in && req_rw && stall_out; + wire load_req_stall = req_valid && !req_rw && lsuq_full; + wire store_req_stall = req_valid && req_rw && !st_commit_if.ready; // Core Request - assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~lsuq_full && ~store_stall}} & req_tmask; + assign dcache_req_if.valid = {`NUM_THREADS{req_valid && !load_req_stall && !store_req_stall}} & req_tmask; assign dcache_req_if.rw = req_rw; assign dcache_req_if.byteen = req_byteen; assign dcache_req_if.addr = req_addr; @@ -152,7 +152,9 @@ module VX_lsu_unit #( assign dcache_req_if.tag = req_tag; `endif - assign stall_in = ~dcache_req_if.ready || lsuq_full || store_stall; + assign stall_in = ~dcache_req_if.ready + || load_req_stall + || store_req_stall; // Can accept new request? assign lsu_req_if.ready = ~stall_in; @@ -171,7 +173,7 @@ module VX_lsu_unit #( // send store commit - wire is_store_rsp = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready; + wire is_store_rsp = req_valid && req_rw && dcache_req_if.ready; assign st_commit_if.valid = is_store_rsp; assign st_commit_if.wid = req_wid; @@ -180,26 +182,27 @@ module VX_lsu_unit #( assign st_commit_if.rd = 0; assign st_commit_if.wb = 0; assign st_commit_if.data = 0; - `UNUSED_VAR (st_commit_if.ready) // send load commit - wire is_load_rsp = (| dcache_rsp_if.valid); + wire is_load_rsp = (| dcache_rsp_if.valid); + + wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid; VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .R(1) ) pipe_reg1 ( - .clk (clk), - .reset (reset), - .stall (stall_out), - .flush (1'b0), - .in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}), - .out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data}) + .clk (clk), + .reset (reset), + .stall (load_rsp_stall), + .flush (1'b0), + .data_in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}), + .data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data}) ); // Can accept new cache response? - assign dcache_rsp_if.ready = ~stall_out; + assign dcache_rsp_if.ready = ~load_rsp_stall; // scope registration `SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}}); diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index 1592d4f8..14a46e54 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -147,12 +147,12 @@ module VX_mul_unit #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .R(1) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall_out), - .flush (1'b0), - .in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}), - .out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data}) + .clk (clk), + .reset (reset), + .stall (stall_out), + .flush (1'b0), + .data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}), + .data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data}) ); // can accept new request? diff --git a/hw/rtl/VX_opd_collect.v b/hw/rtl/VX_opd_collect.v deleted file mode 100644 index 1c71ef84..00000000 --- a/hw/rtl/VX_opd_collect.v +++ /dev/null @@ -1,65 +0,0 @@ -`include "VX_platform.vh" - -module VX_opd_collect #( - parameter INSTW = 1, - parameter OPDSW = 1, - parameter PASSTHRU = 0 -) ( - input wire clk, - input wire reset, - - input wire valid_in, - output wire ready_in, - input wire [INSTW-1:0] inst_in, - input wire [OPDSW-1:0] opds_in, - - output wire [INSTW+OPDSW-1:0] data_out, - output wire valid_out, - input wire ready_out -); - wire [INSTW-1:0] inst_out; - wire [OPDSW-1:0] opds_out; - wire valid_out_tmp, ready_out_tmp; - - VX_skid_buffer #( - .DATAW (INSTW) - ) skid_buffer ( - .clk (clk), - .reset (reset), - .valid_in (valid_in), - .ready_in (ready_in), - .data_in (inst_in), - .data_out (inst_out), - .valid_out (valid_out_tmp), - .ready_out (ready_out_tmp) - ); - - VX_gpr_bypass #( - .DATAW (OPDSW), - .PASSTHRU (PASSTHRU) - ) gpr_bypass ( - .clk (clk), - .reset (reset), - .push (valid_in && ready_in), - .pop (valid_out_tmp && ready_out_tmp), - .data_in (opds_in), - .data_out (opds_out) - ); - - wire stall_out = valid_out && ~ready_out; - - VX_generic_register #( - .N(1 + INSTW + OPDSW), - .R(1) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall_out), - .flush (1'b0), - .in ({valid_out_tmp, inst_out, opds_out}), - .out ({valid_out, data_out}) - ); - - assign ready_out_tmp = ~stall_out; - -endmodule \ No newline at end of file diff --git a/hw/rtl/VX_scoreboard.v b/hw/rtl/VX_scoreboard.v index 58c73b50..4b4f4138 100644 --- a/hw/rtl/VX_scoreboard.v +++ b/hw/rtl/VX_scoreboard.v @@ -10,7 +10,6 @@ module VX_scoreboard #( VX_writeback_if writeback_if, input wire [`NW_BITS-1:0] deq_wid_next, input wire exe_delay, - input wire gpr_delay, output wire delay ); @@ -63,14 +62,14 @@ module VX_scoreboard #( end // issue the instruction - assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay); + assign ibuf_deq_if.ready = ~(delay || exe_delay); `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin - $display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", + $display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b", $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, - inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay); + inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay); end end `endif @@ -81,9 +80,9 @@ module VX_scoreboard #( stall_ctr <= 0; end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin stall_ctr <= stall_ctr + 1; - assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", + assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b", $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, - inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay); + inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay); end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin stall_ctr <= 0; end diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index a1e503de..9c9fd6c0 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -241,12 +241,12 @@ module VX_warp_sched #( .N(1 + `NUM_THREADS + 32 + `NW_BITS), .R(1) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall_out), - .flush (1'b0), - .in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}), - .out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid}) + .clk (clk), + .reset (reset), + .stall (stall_out), + .flush (1'b0), + .data_in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}), + .data_out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid}) ); assign busy = (active_warps != 0); diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 98befe09..7581f1c3 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -78,12 +78,12 @@ module VX_writeback #( .N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)), .R(1) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}), - .out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}), + .data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data}) ); assign alu_commit_if.ready = !stall; diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index caeb10a1..54b02c02 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -435,12 +435,12 @@ if (DRAM_ENABLE) begin .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH), .R(1) ) pipe_reg0 ( - .clk (clk), - .reset (reset), - .stall (pipeline_stall), - .flush (1'b0), - .in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}), - .out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) + .clk (clk), + .reset (reset), + .stall (pipeline_stall), + .flush (1'b0), + .data_in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}), + .data_out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) ); `ifdef DBG_CACHE_REQ_INFO @@ -508,12 +508,12 @@ if (DRAM_ENABLE) begin .N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH), .R(1) ) pipe_reg1 ( - .clk (clk), - .reset (reset), - .stall (pipeline_stall), - .flush (1'b0), - .in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}), - .out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) + .clk (clk), + .reset (reset), + .stall (pipeline_stall), + .flush (1'b0), + .data_in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}), + .data_out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) ); end else begin @@ -650,12 +650,12 @@ end .N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH), .R(1) ) pipe_reg2 ( - .clk (clk), - .reset (reset), - .stall (pipeline_stall), - .flush (1'b0), - .in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}), - .out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) + .clk (clk), + .reset (reset), + .stall (pipeline_stall), + .flush (1'b0), + .data_in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}), + .data_out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) ); `ifdef DBG_CACHE_REQ_INFO diff --git a/hw/rtl/cache/VX_bank_core_req_arb.v b/hw/rtl/cache/VX_bank_core_req_arb.v index d3816c45..42d1b72f 100644 --- a/hw/rtl/cache/VX_bank_core_req_arb.v +++ b/hw/rtl/cache/VX_bank_core_req_arb.v @@ -57,7 +57,8 @@ module VX_bank_core_req_arb #( VX_generic_queue #( .DATAW($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(writedata_in)), - .SIZE(CREQ_SIZE) + .SIZE(CREQ_SIZE), + .BUFFERED(1) ) req_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 56c28612..fd6b9adc 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -92,12 +92,12 @@ module VX_cache_core_rsp_merge #( .R(NUM_REQS), .PASSTHRU(NUM_BANKS <= 2) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}), - .out ({core_rsp_valid, core_rsp_data, core_rsp_tag}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}), + .data_out ({core_rsp_valid, core_rsp_data, core_rsp_tag}) ); for (genvar i = 0; i < NUM_BANKS; i++) begin diff --git a/hw/rtl/fp_cores/VX_fp_noncomp.v b/hw/rtl/fp_cores/VX_fp_noncomp.v index 74f43f54..ea2cb3af 100644 --- a/hw/rtl/fp_cores/VX_fp_noncomp.v +++ b/hw/rtl/fp_cores/VX_fp_noncomp.v @@ -91,12 +91,12 @@ module VX_fp_noncomp #( .N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1), .R(0) ) pipe_reg0 ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}), - .out ({a_sign[i], b_sign[i], a_exponent[i], a_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .data_in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}), + .data_out ({a_sign[i], b_sign[i], a_exponent[i], a_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]}) ); end @@ -104,12 +104,12 @@ module VX_fp_noncomp #( .N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)), .R(1) ) pipe_reg1 ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({valid_in, tag_in, op_type, frm, dataa, datab}), - .out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .data_in ({valid_in, tag_in, op_type, frm, dataa, datab}), + .data_out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r}) ); // FCLASS @@ -255,12 +255,12 @@ module VX_fp_noncomp #( .N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)), .R(1) ) pipe_reg2 ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}), - .out ({valid_out, tag_out, result, has_fflags, fflags}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .data_in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}), + .data_out ({valid_out, tag_out, result, has_fflags, fflags}) ); assign ready_in = ~stall; diff --git a/hw/rtl/interfaces/VX_csr_pipe_req_if.v b/hw/rtl/interfaces/VX_csr_pipe_req_if.v new file mode 100644 index 00000000..16e368a2 --- /dev/null +++ b/hw/rtl/interfaces/VX_csr_pipe_req_if.v @@ -0,0 +1,24 @@ +`ifndef VX_CSR_PIPE_REQ_IF +`define VX_CSR_PIPE_REQ_IF + +`include "VX_define.vh" + +interface VX_csr_pipe_req_if (); + + wire valid; + + wire [`NW_BITS-1:0] wid; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; + wire [`CSR_BITS-1:0] op_type; + wire [`CSR_ADDR_BITS-1:0] csr_addr; + wire [31:0] csr_mask; + wire [`NR_BITS-1:0] rd; + wire wb; + wire is_io; + + wire ready; + +endinterface + +`endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_csr_req_if.v b/hw/rtl/interfaces/VX_csr_req_if.v index 2c4a79a2..9bc17668 100644 --- a/hw/rtl/interfaces/VX_csr_req_if.v +++ b/hw/rtl/interfaces/VX_csr_req_if.v @@ -12,10 +12,11 @@ interface VX_csr_req_if (); wire [31:0] PC; wire [`CSR_BITS-1:0] op_type; wire [`CSR_ADDR_BITS-1:0] csr_addr; - wire [31:0] csr_mask; + wire [31:0] rs1_data; + wire rs2_is_imm; + wire [`NR_BITS-1:0] rs1; wire [`NR_BITS-1:0] rd; wire wb; - wire is_io; wire ready; diff --git a/hw/rtl/interfaces/VX_decode_if.v b/hw/rtl/interfaces/VX_decode_if.v index 692e2073..b864efa9 100644 --- a/hw/rtl/interfaces/VX_decode_if.v +++ b/hw/rtl/interfaces/VX_decode_if.v @@ -20,8 +20,7 @@ interface VX_decode_if (); wire [`NR_BITS-1:0] rs3; wire [31:0] imm; wire rs1_is_PC; - wire rs2_is_imm; - wire use_rs3; + wire rs2_is_imm; wire [`NUM_REGS-1:0] used_regs; wire ready; diff --git a/hw/rtl/interfaces/VX_gpr_req_if.v b/hw/rtl/interfaces/VX_gpr_req_if.v index 2e2e6e98..0f818ed7 100644 --- a/hw/rtl/interfaces/VX_gpr_req_if.v +++ b/hw/rtl/interfaces/VX_gpr_req_if.v @@ -4,17 +4,11 @@ `include "VX_define.vh" interface VX_gpr_req_if (); - - wire valid; - - wire [`NW_BITS-1:0] wid; - wire [31:0] PC; - wire [`NR_BITS-1:0] rs1; - wire [`NR_BITS-1:0] rs2; - wire [`NR_BITS-1:0] rs3; - wire use_rs3; - - wire ready; + + wire [`NW_BITS-1:0] wid; + wire [`NR_BITS-1:0] rs1; + wire [`NR_BITS-1:0] rs2; + wire [`NR_BITS-1:0] rs3; endinterface diff --git a/hw/rtl/interfaces/VX_gpr_rsp_if.v b/hw/rtl/interfaces/VX_gpr_rsp_if.v index 0c86bef8..b8e6f0df 100644 --- a/hw/rtl/interfaces/VX_gpr_rsp_if.v +++ b/hw/rtl/interfaces/VX_gpr_rsp_if.v @@ -4,17 +4,11 @@ `include "VX_define.vh" interface VX_gpr_rsp_if (); - wire valid; -`IGNORE_WARNINGS_BEGIN - wire [`NW_BITS-1:0] wid; - wire [31:0] PC; -`IGNORE_WARNINGS_END + wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs2_data; wire [`NUM_THREADS-1:0][31:0] rs3_data; - wire ready; - endinterface `endif \ No newline at end of file diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index 88156528..6ab88847 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -5,7 +5,7 @@ module VX_dp_ram #( parameter DATAW = 1, parameter SIZE = 1, parameter BYTEENW = 1, - parameter BUFFERED = 1, + parameter BUFFERED = 0, parameter RWCHECK = 1, parameter ADDRW = $clog2(SIZE), parameter SIZEW = $clog2(SIZE+1), @@ -26,8 +26,10 @@ module VX_dp_ram #( localparam DATA32W = DATAW / 32; localparam BYTEEN32W = BYTEENW / 4; - if (FASTRAM) begin - if (BUFFERED) begin +//`ifndef QUARTUS + + if (FASTRAM) begin + if (BUFFERED) begin reg [DATAW-1:0] dout_r; if (BYTEENW > 1) begin @@ -207,5 +209,95 @@ module VX_dp_ram #( end end +/*`else + + localparam OUTDATA_REG_B = BUFFERED ? "CLOCK0" : "UNREGISTERED"; + localparam RAM_BLOCK_TYPE = FASTRAM ? "MLAB" : "AUTO"; + + if (RWCHECK) begin + + altsyncram #( + .init_file (), + .operation_mode ("DUAL_PORT"), + .numwords_a (SIZE), + .numwords_b (SIZE), + .widthad_a (ADDRW), + .widthad_b (ADDRW), + .width_a (DATAW), + .width_b (DATAW), + .width_byteena_a(BYTEENW), + .address_reg_b ("CLOCK0"), + .outdata_reg_b (OUTDATA_REG_B), + .ram_block_type (RAM_BLOCK_TYPE) + ) mem ( + .clocken0 (1'b1), + .clocken1 (), + .clocken2 (), + .clocken3 (), + .clock0 (clk), + .clock1 (), + .address_a (waddr), + .address_b (raddr), + .byteena_a (byteen), + .byteena_b (1'b1), + .wren_a (wren), + .wren_b (1'b0), + .data_a (din), + .data_b (), + .rden_a (), + .rden_b (1'b1), + .q_a (), + .q_b (dout), + .addressstall_a (1'b0), + .addressstall_b (1'b0), + .aclr0 (1'b0), + .aclr1 (1'b0), + .eccstatus () + ); + + end else begin + + `NO_RW_RAM_CHECK altsyncram #( + .init_file (), + .operation_mode ("DUAL_PORT"), + .numwords_a (SIZE), + .numwords_b (SIZE), + .widthad_a (ADDRW), + .widthad_b (ADDRW), + .width_a (DATAW), + .width_b (DATAW), + .width_byteena_a(BYTEENW), + .outdata_reg_b (OUTDATA_REG_B), + .ram_block_type (RAM_BLOCK_TYPE) + ) mem ( + .clocken0 (1'b1), + .clocken1 (1'b1), + .clocken2 (1'b1), + .clocken3 (1'b1), + .clock0 (clk), + .clock1 (clk), + .address_a (waddr), + .address_b (raddr), + .byteena_a (byteen), + .byteena_b (1'b1), + .wren_a (wren), + .wren_b (1'b0), + .data_a (din), + .data_b (), + .rden_a (), + .rden_b (1'b1), + .q_a (), + .q_b (dout), + .addressstall_a (1'b0), + .addressstall_b (1'b0), + .aclr0 (1'b0), + .aclr1 (1'b0), + .eccstatus () + ); + + end + +`endif*/ + endmodule `TRACING_ON \ No newline at end of file diff --git a/hw/rtl/libs/VX_generic_register.v b/hw/rtl/libs/VX_generic_register.v index 169a7ad5..d29ec108 100644 --- a/hw/rtl/libs/VX_generic_register.v +++ b/hw/rtl/libs/VX_generic_register.v @@ -5,25 +5,25 @@ module VX_generic_register #( parameter R = N, parameter PASSTHRU = 0 ) ( - input wire clk, - input wire reset, - input wire stall, - input wire flush, - input wire[N-1:0] in, - output wire[N-1:0] out + input wire clk, + input wire reset, + input wire stall, + input wire flush, + input wire[N-1:0] data_in, + output wire[N-1:0] data_out ); if (PASSTHRU) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (stall) - assign out = flush ? N'(0) : in; + assign data_out = flush ? N'(0) : data_in; end else begin reg [N-1:0] value; if (R != 0) begin always @(posedge clk) begin if (~stall) begin - value <= in; + value <= data_in; end if (reset || flush) begin value[N-1:N-R] <= R'(0); @@ -34,12 +34,12 @@ module VX_generic_register #( `UNUSED_VAR (flush) always @(posedge clk) begin if (~stall) begin - value <= in; + value <= data_in; end end end - assign out = value; + assign data_out = value; end endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_stream_arbiter.v b/hw/rtl/libs/VX_stream_arbiter.v index 5950d42e..26a704a5 100644 --- a/hw/rtl/libs/VX_stream_arbiter.v +++ b/hw/rtl/libs/VX_stream_arbiter.v @@ -106,12 +106,12 @@ module VX_stream_arbiter #( .N(1 + DATAW), .R(1) ) pipe_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (1'b0), - .in ({sel_valid, data_in[sel_idx]}), - .out ({valid_out, data_out}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .data_in ({sel_valid, data_in[sel_idx]}), + .data_out ({valid_out, data_out}) ); for (genvar i = 0; i < NUM_REQS; i++) begin diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index 26a24154..32b5d965 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -184,13 +184,9 @@ "issue_imm": 32, "issue_rs1_is_pc": 1, "issue_rs2_is_imm": 1, - "?gpr_rsp_valid": 1, - "gpr_rsp_wid":"`NW_BITS", - "gpr_rsp_pc": 32, "gpr_rsp_a":"`NUM_THREADS * 32", "gpr_rsp_b":"`NUM_THREADS * 32", "gpr_rsp_c":"`NUM_THREADS * 32", - "!gpr_delay": 1, "?writeback_valid": 1, "writeback_wid":"`NW_BITS", "writeback_pc": 32,