diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index b4798870..dcb4ad9e 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -29,8 +29,8 @@ module VX_alu_unit #( wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data; wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data; - wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1; - wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2; + wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1; + wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2; wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && !is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2; for (genvar i = 0; i < `NUM_THREADS; i++) begin @@ -40,7 +40,7 @@ module VX_alu_unit #( end for (genvar i = 0; i < `NUM_THREADS; i++) begin - wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]}; + wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]}; wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]}; always @(*) begin sub_result[i] = $signed(sub_in1) - $signed(sub_in2); diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index 73fdc38c..7ceb83fa 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -7,7 +7,7 @@ module VX_csr_data #( input wire reset, VX_cmt_to_csr_if cmt_to_csr_if, - VX_csr_to_issue_if csr_to_issue_if, + VX_csr_to_fpu_if csr_to_fpu_if, input wire read_enable, input wire[`CSR_ADDR_BITS-1:0] read_addr, @@ -144,6 +144,6 @@ module VX_csr_data #( end assign read_data = read_data_r; - assign csr_to_issue_if.frm = csr_frm[csr_to_issue_if.wid]; + assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid]; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index c249b7fb..cdf2a3f7 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -7,7 +7,7 @@ module VX_csr_unit #( input wire reset, VX_cmt_to_csr_if cmt_to_csr_if, - VX_csr_to_issue_if csr_to_issue_if, + VX_csr_to_fpu_if csr_to_fpu_if, VX_csr_io_req_if csr_io_req_if, VX_csr_io_rsp_if csr_io_rsp_if, @@ -47,7 +47,7 @@ module VX_csr_unit #( .clk (clk), .reset (reset), .cmt_to_csr_if (cmt_to_csr_if), - .csr_to_issue_if(csr_to_issue_if), + .csr_to_fpu_if (csr_to_fpu_if), .read_enable (csr_pipe_req_if.valid), .read_addr (csr_pipe_req_if.csr_addr), .read_wid (csr_pipe_req_if.wid), diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 6cf8c263..db27c44d 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -28,7 +28,6 @@ module VX_execute #( VX_gpu_req_if gpu_req_if, // outputs - VX_csr_to_issue_if csr_to_issue_if, VX_branch_ctl_if branch_ctl_if, VX_warp_ctl_if warp_ctl_if, VX_exu_to_cmt_if alu_commit_if, @@ -41,6 +40,7 @@ module VX_execute #( input wire busy, output wire ebreak ); + VX_csr_to_fpu_if csr_to_fpu_if(); VX_alu_unit #( .CORE_ID(CORE_ID) @@ -70,7 +70,7 @@ module VX_execute #( .clk (clk), .reset (reset), .cmt_to_csr_if (cmt_to_csr_if), - .csr_to_issue_if(csr_to_issue_if), + .csr_to_fpu_if (csr_to_fpu_if), .csr_io_req_if (csr_io_req_if), .csr_io_rsp_if (csr_io_rsp_if), .csr_req_if (csr_req_if), @@ -104,7 +104,8 @@ module VX_execute #( ) fpu_unit ( .clk (clk), .reset (reset), - .fpu_req_if (fpu_req_if), + .fpu_req_if (fpu_req_if), + .csr_to_fpu_if (csr_to_fpu_if), .fpu_commit_if (fpu_commit_if) ); `else diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index dbd78efd..4bbf3e93 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -9,6 +9,7 @@ module VX_fpu_unit #( // inputs VX_fpu_req_if fpu_req_if, + VX_csr_to_fpu_if csr_to_fpu_if, // outputs VX_fpu_to_cmt_if fpu_commit_if @@ -56,6 +57,10 @@ module VX_fpu_unit #( wire valid_in = fpu_req_if.valid && ~fpuq_full; + // resolve dynamic FRM + assign csr_to_fpu_if.wid = fpu_req_if.wid; + wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.op_mod; + `ifdef FPU_FAST VX_fp_fpga #( @@ -70,7 +75,7 @@ module VX_fpu_unit #( .tag_in (tag_in), .op_type (fpu_req_if.op_type), - .frm (fpu_req_if.frm), + .frm (fpu_frm), .dataa (fpu_req_if.rs1_data), .datab (fpu_req_if.rs2_data), @@ -104,7 +109,7 @@ module VX_fpu_unit #( .tag_in (tag_in), .op_type (fpu_req_if.op_type), - .frm (fpu_req_if.frm), + .frm (fpu_frm), .dataa (fpu_req_if.rs1_data), .datab (fpu_req_if.rs2_data), diff --git a/hw/rtl/VX_gpr_bypass.v b/hw/rtl/VX_gpr_bypass.v index 65d13339..50abd8ed 100644 --- a/hw/rtl/VX_gpr_bypass.v +++ b/hw/rtl/VX_gpr_bypass.v @@ -40,27 +40,25 @@ module VX_gpr_bypass #( buffer2 <= 0; end else begin delayed_push <= push; - assert(!use_buffer2 || use_buffer); + assert(!use_buffer2 || use_buffer); if (pop) begin - if (use_buffer) begin - buffer <= buffer2; - use_buffer <= use_buffer2; - use_buffer2 <= 0; - end + buffer <= buffer2; + use_buffer <= use_buffer2; + use_buffer2 <= 0; end if (delayed_push) begin if (use_buffer) begin - assert(!use_buffer2); // queue full! + assert(!use_buffer2); // full! + use_buffer <= 1; if (pop) begin buffer <= data_in; end else begin buffer2 <= data_in; use_buffer2 <= 1; - end - use_buffer <= 1; + end end else if (!pop) begin buffer <= data_in; - use_buffer <= 1; + use_buffer <= 1; end end end diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index e7a280d7..c545659b 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -7,7 +7,6 @@ module VX_instr_demux ( // inputs VX_decode_if execute_if, VX_gpr_rsp_if gpr_rsp_if, - VX_csr_to_issue_if csr_to_issue_if, // outputs VX_alu_req_if alu_req_if, @@ -34,78 +33,47 @@ module VX_instr_demux ( wire alu_req_ready; wire is_br_op = `IS_BR_MOD(execute_if.op_mod); - VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS) - ) alu_reg ( + VX_opd_collect #( + .INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS), + .OPDSW (2 * `NUM_THREADS * 32), + .PASSTHRU (1) // ALU has no backpressure + ) alu_opc ( .clk (clk), .reset (reset), .ready_in (alu_req_ready), .valid_in (alu_req_valid), - .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}), - .data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid}), + .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}), + .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}), .ready_out (alu_req_if.ready), .valid_out (alu_req_if.valid) ); - VX_gpr_bypass #( - .DATAW (2 * `NUM_THREADS * 32), - .PASSTHRU (1) // ALU has no back-pressure, bypass not needed - ) alu_bypass ( - .clk (clk), - .reset (reset), - .push (alu_req_valid && alu_req_ready), - .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), - .data_out ({alu_req_if.rs1_data, alu_req_if.rs2_data}), - .pop (alu_req_if.valid && alu_req_if.ready) - ); - // lsu unit wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU); wire lsu_req_ready; - VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1) - ) lsu_reg ( + VX_opd_collect #( + .INSTW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1), + .OPDSW (2 * `NUM_THREADS * 32) + ) lsu_opc ( .clk (clk), .reset (reset), .ready_in (lsu_req_ready), .valid_in (lsu_req_valid), - .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}), - .data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb}), + .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}), + .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}), .ready_out (lsu_req_if.ready), .valid_out (lsu_req_if.valid) ); - VX_gpr_bypass #( - .DATAW ((2 * `NUM_THREADS * 32)) - ) lsu_bypass ( - .clk (clk), - .reset (reset), - .push (lsu_req_valid && lsu_req_ready), - .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), - .data_out ({lsu_req_if.base_addr, lsu_req_if.store_data}), - .pop (lsu_req_if.valid && lsu_req_if.ready) - ); - // csr unit wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR); wire csr_req_ready; - VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1) - ) csr_reg ( - .clk (clk), - .reset (reset), - .ready_in (csr_req_ready), - .valid_in (csr_req_valid), - .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}), - .data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io}), - .ready_out (csr_req_if.ready), - .valid_out (csr_req_if.valid) - ); - reg tmp_rs2_is_imm; reg [`NR_BITS-1:0] tmp_rs1; @@ -116,15 +84,19 @@ module VX_instr_demux ( wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_rsp_if.rs1_data[0]; - VX_gpr_bypass #( - .DATAW (32) - ) csr_bypass ( + VX_opd_collect #( + .INSTW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1), + .OPDSW (32) + ) csr_opc ( .clk (clk), .reset (reset), - .push (csr_req_valid && csr_req_ready), - .data_in (csr_req_mask), - .data_out (csr_req_if.csr_mask), - .pop (csr_req_if.valid && csr_req_if.ready) + .ready_in (csr_req_ready), + .valid_in (csr_req_valid), + .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}), + .opds_in ({csr_req_mask}), + .data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io, csr_req_if.csr_mask}), + .ready_out (csr_req_if.ready), + .valid_out (csr_req_if.valid) ); // mul unit @@ -133,29 +105,20 @@ module VX_instr_demux ( wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL); wire mul_req_ready; - VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1) - ) mul_reg ( + VX_opd_collect #( + .INSTW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1), + .OPDSW (2 * `NUM_THREADS * 32) + ) mul_opc ( .clk (clk), .reset (reset), .ready_in (mul_req_ready), .valid_in (mul_req_valid), - .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), - .data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb}), + .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), + .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}), .ready_out (mul_req_if.ready), .valid_out (mul_req_if.valid) ); - - VX_gpr_bypass #( - .DATAW ((2 * `NUM_THREADS * 32)) - ) mul_bypass ( - .clk (clk), - .reset (reset), - .push (mul_req_valid && mul_req_ready), - .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), - .data_out ({mul_req_if.rs1_data, mul_req_if.rs2_data}), - .pop (mul_req_if.valid && mul_req_if.ready) - ); `endif // fpu unit @@ -164,33 +127,20 @@ module VX_instr_demux ( wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU); wire fpu_req_ready; - // resolve dynamic FRM - assign csr_to_issue_if.wid = execute_if.wid; - wire [`FRM_BITS-1:0] fpu_frm = (execute_if.op_mod == `FRM_DYN) ? csr_to_issue_if.frm : execute_if.op_mod; - - VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `FRM_BITS + `NR_BITS + 1) - ) fpu_reg ( + VX_opd_collect #( + .INSTW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1), + .OPDSW (3 * `NUM_THREADS * 32) + ) fpu_opc ( .clk (clk), .reset (reset), .ready_in (fpu_req_ready), .valid_in (fpu_req_valid), - .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), fpu_frm, execute_if.rd, execute_if.wb}), - .data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.frm, fpu_req_if.rd, fpu_req_if.wb}), + .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb}), + .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}), + .data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}), .ready_out (fpu_req_if.ready), .valid_out (fpu_req_if.valid) ); - - VX_gpr_bypass #( - .DATAW ((3 * `NUM_THREADS * 32)) - ) fpu_bypass ( - .clk (clk), - .reset (reset), - .push (fpu_req_valid && fpu_req_ready), - .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}), - .data_out ({fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}), - .pop (fpu_req_if.valid && fpu_req_if.ready) - ); `endif // gpu unit @@ -198,30 +148,21 @@ module VX_instr_demux ( wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU); wire gpu_req_ready; - VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1) - ) gpu_reg ( + VX_opd_collect #( + .INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1), + .OPDSW (`NUM_THREADS * 32 + 32) + ) gpu_opc ( .clk (clk), .reset (reset), .ready_in (gpu_req_ready), .valid_in (gpu_req_valid), - .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), - .data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb}), + .inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), + .opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}), + .data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}), .ready_out (gpu_req_if.ready), .valid_out (gpu_req_if.valid) ); - VX_gpr_bypass #( - .DATAW ((`NUM_THREADS * 32) + 32) - ) gpu_bypass ( - .clk (clk), - .reset (reset), - .push (gpu_req_valid && gpu_req_ready), - .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}), - .data_out ({gpu_req_if.rs1_data, gpu_req_if.rs2_data}), - .pop (gpu_req_if.valid && gpu_req_if.ready) - ); - // can take next request? assign execute_if.ready = (alu_req_ready && (execute_if.ex_type == `EX_ALU)) || (lsu_req_ready && (execute_if.ex_type == `EX_LSU)) diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 91070f3e..03a75ca8 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -10,7 +10,6 @@ module VX_issue #( VX_decode_if decode_if, VX_writeback_if writeback_if, - VX_csr_to_issue_if csr_to_issue_if, VX_alu_req_if alu_req_if, VX_lsu_req_if lsu_req_if, @@ -71,7 +70,7 @@ module VX_issue #( ); `UNUSED_VAR (gpr_rsp_if.valid); - + assign execute_if.valid = ibuf_deq_if.valid && gpr_req_if.ready && ~scoreboard_delay; assign execute_if.wid = ibuf_deq_if.wid; assign execute_if.tmask = ibuf_deq_if.tmask; @@ -91,7 +90,6 @@ module VX_issue #( .reset (reset), .execute_if (execute_if), .gpr_rsp_if (gpr_rsp_if), - .csr_to_issue_if(csr_to_issue_if), .alu_req_if (alu_req_if), .lsu_req_if (lsu_req_if), .csr_req_if (csr_req_if), @@ -136,22 +134,22 @@ module VX_issue #( `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin if (alu_req_if.valid && alu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rs1_data, alu_req_if.rs2_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data); end if (lsu_req_if.valid && lsu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); end if (csr_req_if.valid && csr_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.csr_addr, csr_req_if.csr_mask); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask); end if (mul_req_if.valid && mul_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rs1_data, mul_req_if.rs2_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data); end if (fpu_req_if.valid && fpu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data); end if (gpu_req_if.valid && gpu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rs1_data, gpu_req_if.rs2_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data); end end `endif diff --git a/hw/rtl/VX_opd_collect.v b/hw/rtl/VX_opd_collect.v new file mode 100644 index 00000000..30f35448 --- /dev/null +++ b/hw/rtl/VX_opd_collect.v @@ -0,0 +1,62 @@ +`include "VX_platform.vh" + +module VX_opd_collect #( + parameter INSTW = 1, + parameter OPDSW = 1, + parameter PASSTHRU = 0 +) ( + input wire clk, + input wire reset, + input wire valid_in, + output wire ready_in, + input wire [INSTW-1:0] inst_in, + input wire [OPDSW-1:0] opds_in, + output wire [INSTW+OPDSW-1:0] data_out, + output wire valid_out, + input wire ready_out +); + wire [INSTW-1:0] inst_out; + wire [OPDSW-1:0] opds_out; + wire valid_out_tmp, ready_out_tmp; + + VX_skid_buffer #( + .DATAW (INSTW) + ) skid_buffer ( + .clk (clk), + .reset (reset), + .valid_in (valid_in), + .ready_in (ready_in), + .data_in (inst_in), + .data_out (inst_out), + .valid_out (valid_out_tmp), + .ready_out (ready_out_tmp) + ); + + VX_gpr_bypass #( + .DATAW (OPDSW), + .PASSTHRU (PASSTHRU) + ) gpr_bypass ( + .clk (clk), + .reset (reset), + .push (valid_in && ready_in), + .pop (valid_out_tmp && ready_out_tmp), + .data_in (opds_in), + .data_out (opds_out) + ); + + wire stall_out = valid_out && ~ready_out; + + VX_generic_register #( + .N(1+INSTW+OPDSW) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .stall (stall_out), + .flush (1'b0), + .in ({valid_out_tmp, inst_out, opds_out}), + .out ({valid_out, data_out}) + ); + + assign ready_out_tmp = ~stall_out; + +endmodule \ No newline at end of file diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index efb75497..e528a411 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -98,7 +98,6 @@ module VX_pipeline #( assign csr_io_rsp_data = csr_io_rsp_if.data; assign csr_io_rsp_if.ready = csr_io_rsp_ready; - VX_csr_to_issue_if csr_to_issue_if(); VX_cmt_to_csr_if cmt_to_csr_if(); VX_decode_if decode_if(); VX_branch_ctl_if branch_ctl_if(); @@ -157,7 +156,6 @@ module VX_pipeline #( .decode_if (decode_if), .writeback_if (writeback_if), - .csr_to_issue_if(csr_to_issue_if), .alu_req_if (alu_req_if), .lsu_req_if (lsu_req_if), @@ -181,7 +179,6 @@ module VX_pipeline #( .csr_io_req_if (csr_io_req_if), .csr_io_rsp_if (csr_io_rsp_if), - .csr_to_issue_if(csr_to_issue_if), .cmt_to_csr_if (cmt_to_csr_if), .alu_req_if (alu_req_if), diff --git a/hw/rtl/interfaces/VX_csr_to_issue_if.v b/hw/rtl/interfaces/VX_csr_to_fpu_if.v similarity index 64% rename from hw/rtl/interfaces/VX_csr_to_issue_if.v rename to hw/rtl/interfaces/VX_csr_to_fpu_if.v index f222370c..2b1aac5a 100644 --- a/hw/rtl/interfaces/VX_csr_to_issue_if.v +++ b/hw/rtl/interfaces/VX_csr_to_fpu_if.v @@ -1,5 +1,5 @@ -`ifndef VX_CSR_TO_ISSUE_IF -`define VX_CSR_TO_ISSUE_IF +`ifndef VX_CSR_TO_FPU_IF +`define VX_CSR_TO_FPU_IF `include "VX_define.vh" @@ -7,7 +7,7 @@ `IGNORE_WARNINGS_BEGIN `endif -interface VX_csr_to_issue_if (); +interface VX_csr_to_fpu_if (); wire [`NW_BITS-1:0] wid; wire [`FRM_BITS-1:0] frm; diff --git a/hw/rtl/interfaces/VX_fpu_req_if.v b/hw/rtl/interfaces/VX_fpu_req_if.v index bb0ee172..369a5cea 100644 --- a/hw/rtl/interfaces/VX_fpu_req_if.v +++ b/hw/rtl/interfaces/VX_fpu_req_if.v @@ -15,7 +15,7 @@ interface VX_fpu_req_if (); wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; wire [`FPU_BITS-1:0] op_type; - wire [`FRM_BITS-1:0] frm; + wire [`MOD_BITS-1:0] op_mod; wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs2_data; wire [`NUM_THREADS-1:0][31:0] rs3_data; diff --git a/hw/rtl/libs/VX_skid_buffer.v b/hw/rtl/libs/VX_skid_buffer.v index d179ed31..cbd9ae8c 100644 --- a/hw/rtl/libs/VX_skid_buffer.v +++ b/hw/rtl/libs/VX_skid_buffer.v @@ -17,24 +17,26 @@ module VX_skid_buffer #( reg valid_out_r; reg use_buffer; + wire push = valid_in && ready_in; + always @(posedge clk) begin if (reset) begin data_out_r <= 0; buffer <= 0; use_buffer <= 0; valid_out_r <= 0; - end else begin - if (valid_in && ready_in && valid_out && !ready_out) begin - assert(!use_buffer); - use_buffer <= 1; - end + end else begin if (ready_out) begin use_buffer <= 0; end - if (valid_in && ready_in) begin + if (push) begin buffer <= data_in; + if (valid_out_r && !ready_out) begin + assert(!use_buffer); + use_buffer <= 1; + end end - if (!valid_out || ready_out) begin + if (!valid_out_r || ready_out) begin valid_out_r <= valid_in || use_buffer; data_out_r <= use_buffer ? buffer : data_in; end