From 49b86c4b2aa233bba3aa211c5aeaae87cbda979f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 5 Sep 2020 10:52:59 -0700 Subject: [PATCH] SCOPE update --- driver/opae/scope.cpp | 85 +++++++---- hw/opae/Makefile | 12 +- hw/opae/README | 3 +- hw/opae/sources_1c.txt | 2 +- hw/opae/vortex_afu.sv | 17 ++- hw/rtl/VX_alu_unit.v | 6 +- hw/rtl/VX_cluster.v | 10 +- hw/rtl/VX_commit.v | 16 +-- hw/rtl/VX_config.vh | 2 - hw/rtl/VX_core.v | 9 +- hw/rtl/VX_csr_arb.v | 22 +-- hw/rtl/VX_csr_unit.v | 4 +- hw/rtl/VX_decode.v | 16 +-- hw/rtl/VX_execute.v | 41 +++--- hw/rtl/VX_fpu_unit.v | 12 +- hw/rtl/VX_gpr_fp_ctrl.v | 59 +++++--- hw/rtl/VX_gpr_stage.v | 55 +++++--- hw/rtl/VX_gpu_unit.v | 20 +-- hw/rtl/VX_ibuffer.v | 8 +- hw/rtl/VX_icache_stage.v | 30 ++-- hw/rtl/VX_instr_demux.v | 50 +++---- hw/rtl/VX_issue.v | 82 ++++++++--- hw/rtl/VX_lsu_unit.v | 38 ++--- hw/rtl/VX_mul_unit.v | 12 +- hw/rtl/VX_pipeline.v | 10 +- hw/rtl/VX_scope.vh | 201 ++++++++++++++++++--------- hw/rtl/VX_scoreboard.v | 6 +- hw/rtl/VX_types.vh | 2 +- hw/rtl/VX_warp_sched.v | 14 +- hw/rtl/VX_writeback.v | 14 +- hw/rtl/Vortex.v | 15 +- hw/rtl/interfaces/VX_alu_req_if.v | 4 +- hw/rtl/interfaces/VX_csr_req_if.v | 4 +- hw/rtl/interfaces/VX_decode_if.v | 4 +- hw/rtl/interfaces/VX_exu_to_cmt_if.v | 4 +- hw/rtl/interfaces/VX_fpu_req_if.v | 4 +- hw/rtl/interfaces/VX_fpu_to_cmt_if.v | 4 +- hw/rtl/interfaces/VX_gpr_read_if.v | 27 ---- hw/rtl/interfaces/VX_gpr_req_if.v | 21 +++ hw/rtl/interfaces/VX_gpr_rsp_if.v | 21 +++ hw/rtl/interfaces/VX_gpu_req_if.v | 4 +- hw/rtl/interfaces/VX_ifetch_req_if.v | 4 +- hw/rtl/interfaces/VX_ifetch_rsp_if.v | 4 +- hw/rtl/interfaces/VX_lsu_req_if.v | 4 +- hw/rtl/interfaces/VX_mul_req_if.v | 4 +- hw/rtl/interfaces/VX_writeback_if.v | 4 +- 46 files changed, 587 insertions(+), 403 deletions(-) delete mode 100644 hw/rtl/interfaces/VX_gpr_read_if.v create mode 100644 hw/rtl/interfaces/VX_gpr_req_if.v create mode 100644 hw/rtl/interfaces/VX_gpr_rsp_if.v diff --git a/driver/opae/scope.cpp b/driver/opae/scope.cpp index 9181b748..6b12a7ab 100644 --- a/driver/opae/scope.cpp +++ b/driver/opae/scope.cpp @@ -18,6 +18,12 @@ return -1; \ } while (false) + +template +constexpr bool static_print() { + return (0 < N < 100); +} + #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) @@ -27,7 +33,8 @@ struct scope_signal_t { }; constexpr int ilog2(int n) { - return (n > 1) ? 1 + ilog2(n >> 1) : 0; + return (n > 1) ? 1 + + ilog2(n >> 1) : 0; } static constexpr int NW_BITS = ilog2(NUM_WARPS); @@ -38,7 +45,14 @@ static constexpr int NR_BITS = ilog2(64); static constexpr int NR_BITS = ilog2(32); #endif -static const scope_signal_t scope_signals[] = { +static constexpr int EX_BITS = 3; +static constexpr int OP_BITS = 4; +static constexpr int MOD_BITS = 3; + +static constexpr int ICORE_TAG_WIDTH = NW_BITS; +static constexpr int DCORE_TAG_WIDTH = ilog2(LSUQ_SIZE); + +static constexpr scope_signal_t scope_signals[] = { { 32, "dram_req_addr" }, { 1, "dram_req_rw" }, @@ -55,28 +69,43 @@ static const scope_signal_t scope_signals[] = { { NW_BITS, "icache_req_wid" }, { 32, "icache_req_addr" }, - { NW_BITS, "icache_req_tag" }, + { ICORE_TAG_WIDTH, "icache_req_tag" }, { 32, "icache_rsp_data" }, - { NW_BITS, "icache_rsp_tag" }, + { ICORE_TAG_WIDTH, "icache_rsp_tag" }, { NW_BITS, "dcache_req_wid" }, - { 32, "dcache_req_PC" }, + { 32, "dcache_req_pc" }, { NUM_THREADS * 32, "dcache_req_addr" }, { 1, "dcache_req_rw" }, { NUM_THREADS * 4, "dcache_req_byteen" }, { NUM_THREADS * 32, "dcache_req_data" }, - { NW_BITS, "dcache_req_tag" }, + { DCORE_TAG_WIDTH, "dcache_req_tag" }, { NUM_THREADS * 32, "dcache_rsp_data" }, - { NW_BITS, "dcache_rsp_tag" }, + { DCORE_TAG_WIDTH, "dcache_rsp_tag" }, + + { NW_BITS, "issue_wid" }, + { NUM_THREADS, "issue_tmask" }, + { 32, "issue_pc" }, + { EX_BITS, "issue_ex_type" }, + { OP_BITS, "issue_op_type" }, + { MOD_BITS, "issue_op_mod" }, + { 1, "issue_wb" }, + { NR_BITS, "issue_rd" }, + { NR_BITS, "issue_rs1" }, + { NR_BITS, "issue_rs2" }, + { NR_BITS, "issue_rs3" }, + { 32, "issue_imm" }, + { 1, "issue_rs1_is_pc" }, + { 1, "issue_rs2_is_imm" }, - { NW_BITS, "alu_req_wid" }, - { 32, "alu_req_PC" }, - { NR_BITS, "alu_req_rd" }, - { NUM_THREADS * 32, "alu_req_a" }, - { NUM_THREADS * 32, "alu_req_b" }, + { NW_BITS, "gpr_rsp_wid" }, + { 32, "gpr_rsp_pc" }, + { NUM_THREADS * 32, "gpr_rsp_a" }, + { NUM_THREADS * 32, "gpr_rsp_b" }, + { NUM_THREADS * 32, "gpr_rsp_c" }, { NW_BITS, "writeback_wid" }, - { 32, "writeback_PC" }, + { 32, "writeback_pc" }, { NR_BITS, "writeback_rd" }, { NUM_THREADS * 32, "writeback_data" }, @@ -110,19 +139,29 @@ static const scope_signal_t scope_signals[] = { { NUM_THREADS, "dcache_rsp_valid" }, { 1, "dcache_rsp_ready" }, - { NUM_THREADS, "decode_valid" }, - { NUM_THREADS, "alu_req_valid" }, - { NUM_THREADS, "writeback_valid" }, - - { 1, "busy" }, - { 1, "bank_valid_st0" }, { 1, "bank_valid_st1" }, { 1, "bank_valid_st2" }, { 1, "bank_stall_pipe" }, + + { 1, "issue_valid" }, + { 1, "issue_ready" }, + { 1, "gpr_rsp_valid" }, + { 1, "writeback_valid" }, + { 1, "scoreboard_delay" }, + { 1, "gpr_delay" }, + { 1, "execute_delay" }, + { 1, "busy" }, }; -static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); +static constexpr int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); + +constexpr int calcFrameWidth(int index = 0) { + return (index < num_signals) ? (scope_signals[index].width + calcFrameWidth(index + 1)) : 0; +} + +static constexpr int fwidth = calcFrameWidth(); +static_assert(fwidth == 1766, "invalid size"); int vx_scope_start(fpga_handle hfpga, uint64_t delay) { if (nullptr == hfpga) @@ -154,12 +193,6 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { ofs << "$timescale 1 ns $end" << std::endl; ofs << "$var reg 1 0 clk $end" << std::endl; - int fwidth = 0; - for (int i = 0; i < num_signals; ++i) { - ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl; - fwidth += scope_signals[i].width; - } - ofs << "enddefinitions $end" << std::endl; uint64_t frame_width, max_frames, data_valid; diff --git a/hw/opae/Makefile b/hw/opae/Makefile index 57d242fc..6c57a567 100644 --- a/hw/opae/Makefile +++ b/hw/opae/Makefile @@ -9,15 +9,15 @@ sources.txt: gen_sources: sources.txt -ase-1c: setup-ase-1c gen_sources +ase-1c: gen_sources setup-ase-1c make -C $(ASE_BUILD_DIR)_1c cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_1c/work -ase-2c: setup-ase-2c gen_sources +ase-2c: gen_sources setup-ase-2c make -C $(ASE_BUILD_DIR)_2c cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_2c/work -ase-4c: setup-ase-4c gen_sources +ase-4c: gen_sources setup-ase-4c make -C $(ASE_BUILD_DIR)_4c cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_3c/work @@ -36,15 +36,15 @@ $(ASE_BUILD_DIR)_2c/Makefile: sources.txt $(ASE_BUILD_DIR)_4c/Makefile: sources.txt afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c -fpga-1c: setup-fpga-1c gen_sources +fpga-1c: gen_sources setup-fpga-1c cd $(FPGA_BUILD_DIR)_1c && qsub-synth cp ../rtl/fp_cores/altera/*.hex $(FPGA_BUILD_DIR)_1c -fpga-2c: setup-fpga-2c gen_sources +fpga-2c: gen_sources setup-fpga-2c cd $(FPGA_BUILD_DIR)_2c && qsub-synth cp ../rtl/fp_cores/altera/*.hex $(FPGA_BUILD_DIR)_2c -fpga-4c: setup-fpga-4c gen_sources +fpga-4c: gen_sources setup-fpga-4c cd $(FPGA_BUILD_DIR)_4c && qsub-synth cp ../rtl/fp_cores/altera/*.hex $(FPGA_BUILD_DIR)_4c diff --git a/hw/opae/README b/hw/opae/README index 54038744..611adec4 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -78,8 +78,7 @@ tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd tar -zcvf trace.vcd.tar.gz trace.vcd tar -zcvf run.log.tar.gz run.log -tar -cvjf run.log.tar.bz2 run.log - +tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd # decompress VCD trace tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz diff --git a/hw/opae/sources_1c.txt b/hw/opae/sources_1c.txt index f3b46403..897468c2 100644 --- a/hw/opae/sources_1c.txt +++ b/hw/opae/sources_1c.txt @@ -1,9 +1,9 @@ +define+NUM_CORES=1 -#+define+SCOPE +define+SYNTHESIS +define+QUARTUS +define+FPU_FAST +#+define+SCOPE #+define+DBG_PRINT_CORE_ICACHE #+define+DBG_PRINT_CORE_DCACHE diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index a21d6a26..63ae563c 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -913,10 +913,9 @@ assign cmd_run_done = !vx_busy; Vortex #() vortex ( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_CORE_BIND `SCOPE_SIGNALS_CACHE_BIND - `SCOPE_SIGNALS_PIPELINE_BIND - `SCOPE_SIGNALS_EX_BIND + `SCOPE_SIGNALS_ISSUE_BIND + `SCOPE_SIGNALS_EXECUTE_BIND .clk (clk), .reset (SoftReset | vx_reset), @@ -988,6 +987,8 @@ Vortex #() vortex ( localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}); localparam SCOPE_SR_DEPTH = 2; +`STATIC_ASSERT(SCOPE_DATAW == 1766, "invalid size") + `SCOPE_ASSIGN (scope_dram_req_valid, vx_dram_req_valid); `SCOPE_ASSIGN (scope_dram_req_addr, {vx_dram_req_addr, 4'b0}); `SCOPE_ASSIGN (scope_dram_req_rw, vx_dram_req_rw); @@ -1015,6 +1016,8 @@ localparam SCOPE_SR_DEPTH = 2; `SCOPE_ASSIGN (scope_snp_rsp_tag, vx_snp_rsp_tag); `SCOPE_ASSIGN (scope_snp_rsp_ready, vx_snp_rsp_ready); +`SCOPE_ASSIGN (scope_busy, vx_busy); + wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_icache_rsp_valid && scope_icache_rsp_ready) || ((| scope_dcache_req_valid) && scope_dcache_req_ready) @@ -1023,10 +1026,16 @@ wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_dram_rsp_valid && scope_dram_rsp_ready) || (scope_snp_req_valid && scope_snp_req_ready) || (scope_snp_rsp_valid && scope_snp_rsp_ready) + || (scope_issue_valid && scope_issue_ready) + || scope_gpr_rsp_valid || scope_bank_valid_st0 || scope_bank_valid_st1 || scope_bank_valid_st2 - || scope_bank_stall_pipe; + || scope_bank_stall_pipe + || scope_scoreboard_delay + || scope_gpr_delay + || scope_execute_delay + || scope_busy; wire scope_start = vx_reset; diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 930535cc..a2257e15 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -29,7 +29,7 @@ module VX_alu_unit #( wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data; wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data; - wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.curr_PC}} : alu_in1; + wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1; wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2; wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2; @@ -103,8 +103,8 @@ module VX_alu_unit #( .reset (reset), .stall (stall_out), .flush (1'b0), - .in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.thread_mask, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}), - .out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.thread_mask, alu_commit_if.curr_PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r}) + .in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}), + .out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r}) ); wire is_less = cmp_result_r[32]; diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index a39fda95..2d767cd7 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -5,10 +5,9 @@ module VX_cluster #( ) ( `SCOPE_SIGNALS_ISTAGE_IO `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_CORE_IO `SCOPE_SIGNALS_CACHE_IO - `SCOPE_SIGNALS_PIPELINE_IO - `SCOPE_SIGNALS_EX_IO + `SCOPE_SIGNALS_ISSUE_IO + `SCOPE_SIGNALS_EXECUTE_IO // Clock input wire clk, @@ -141,10 +140,9 @@ module VX_cluster #( ) core ( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_CORE_BIND `SCOPE_SIGNALS_CACHE_BIND - `SCOPE_SIGNALS_PIPELINE_BIND - `SCOPE_SIGNALS_EX_BIND + `SCOPE_SIGNALS_ISSUE_BIND + `SCOPE_SIGNALS_EXECUTE_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index b09b82f7..6735c694 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -41,7 +41,7 @@ module VX_commit #( always @(*) begin fflags = 0; for (integer i = 0; i < `NUM_THREADS; i++) begin - if (fpu_commit_if.thread_mask[i]) begin + if (fpu_commit_if.tmask[i]) begin fflags.NX |= fpu_commit_if.fflags[i].NX; fflags.UF |= fpu_commit_if.fflags[i].UF; fflags.OF |= fpu_commit_if.fflags[i].OF; @@ -92,26 +92,26 @@ module VX_commit #( `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin if (alu_commit_if.valid && alu_commit_if.ready) begin - $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.curr_PC, alu_commit_if.thread_mask, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data); + $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data); end if (lsu_commit_if.valid && lsu_commit_if.ready) begin - $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.wid, lsu_commit_if.curr_PC, lsu_commit_if.thread_mask, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data); + $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.wid, lsu_commit_if.PC, lsu_commit_if.tmask, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data); end if (csr_commit_if.valid && csr_commit_if.ready) begin - $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.curr_PC, csr_commit_if.thread_mask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data); + $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data); end if (mul_commit_if.valid && mul_commit_if.ready) begin - $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=MUL, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.wid, mul_commit_if.curr_PC, mul_commit_if.thread_mask, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data); + $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=MUL, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.wid, mul_commit_if.PC, mul_commit_if.tmask, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data); end if (fpu_commit_if.valid && fpu_commit_if.ready) begin - $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.curr_PC, fpu_commit_if.thread_mask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data); + $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data); end if (gpu_commit_if.valid && gpu_commit_if.ready) begin - $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.curr_PC, gpu_commit_if.thread_mask, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data); + $display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data); end end `else - `UNUSED_VAR(fpu_commit_if.curr_PC) + `UNUSED_VAR(fpu_commit_if.PC) `endif endmodule diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 5d3db338..7e01fe70 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -59,8 +59,6 @@ `define EXT_F_ENABLE `endif -`define FPU_FAST - // Device identification `define VENDOR_ID 0 `define ARCHITECTURE_ID 0 diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index c5d20b83..a72833f2 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -5,10 +5,9 @@ module VX_core #( ) ( `SCOPE_SIGNALS_ISTAGE_IO `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_CORE_IO `SCOPE_SIGNALS_CACHE_IO - `SCOPE_SIGNALS_PIPELINE_IO - `SCOPE_SIGNALS_EX_IO + `SCOPE_SIGNALS_ISSUE_IO + `SCOPE_SIGNALS_EXECUTE_IO // Clock input wire clk, @@ -182,8 +181,8 @@ module VX_core #( ) pipeline ( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_PIPELINE_BIND - `SCOPE_SIGNALS_EX_BIND + `SCOPE_SIGNALS_ISSUE_BIND + `SCOPE_SIGNALS_EXECUTE_BIND .clk(clk), .reset(reset), diff --git a/hw/rtl/VX_csr_arb.v b/hw/rtl/VX_csr_arb.v index 944a8b8f..c059113e 100644 --- a/hw/rtl/VX_csr_arb.v +++ b/hw/rtl/VX_csr_arb.v @@ -19,15 +19,15 @@ module VX_csr_arb ( input wire select_io_rsp ); // requests - assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid; - assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0; - assign csr_req_if.thread_mask = (~select_io_req) ? csr_core_req_if.thread_mask : 0; - assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0; - assign csr_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS); - assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr; - assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0); - assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0; - assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0; + assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid; + assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0; + assign csr_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0; + assign csr_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0; + assign csr_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS); + assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr; + assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0); + assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0; + assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0; assign csr_req_if.is_io = select_io_req; assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req); @@ -39,8 +39,8 @@ module VX_csr_arb ( assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp; assign csr_commit_if.wid = csr_rsp_if.wid; - assign csr_commit_if.thread_mask = csr_rsp_if.thread_mask; - assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC; + assign csr_commit_if.tmask = csr_rsp_if.tmask; + assign csr_commit_if.PC = csr_rsp_if.PC; assign csr_commit_if.rd = csr_rsp_if.rd; assign csr_commit_if.wb = csr_rsp_if.wb; assign csr_commit_if.data = csr_rsp_if.data; diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 7b4bfa2c..f299bb0c 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -96,8 +96,8 @@ module VX_csr_unit #( .reset (reset), .stall (stall), .flush (1'b0), - .in ({csr_pipe_req_if.valid, csr_pipe_req_if.wid, csr_pipe_req_if.thread_mask, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}), - .out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.thread_mask, csr_pipe_rsp_if.curr_PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1}) + .in ({csr_pipe_req_if.valid, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}), + .out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1}) ); for (genvar i = 0; i < `NUM_THREADS; i++) begin diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 07ac9b0b..17c3d7dd 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -261,7 +261,7 @@ module VX_decode #( wire [2:0] frm = 0; always @(*) begin - fpu_op = `FPU_OTHER; + fpu_op = `FPU_MISC; end `endif @@ -307,12 +307,12 @@ module VX_decode #( /////////////////////////////////////////////////////////////////////////// - assign decode_if.valid = ifetch_rsp_if.valid - && (decode_if.ex_type != `EX_NOP); // skip noop + assign decode_if.valid = ifetch_rsp_if.valid + && (decode_if.ex_type != `EX_NOP); // skip noop - assign decode_if.wid = ifetch_rsp_if.wid; - assign decode_if.thread_mask = ifetch_rsp_if.thread_mask; - assign decode_if.curr_PC = ifetch_rsp_if.curr_PC; + assign decode_if.wid = ifetch_rsp_if.wid; + assign decode_if.tmask = ifetch_rsp_if.tmask; + assign decode_if.PC = ifetch_rsp_if.PC; assign decode_if.ex_type = is_lsu ? `EX_LSU : is_csr ? `EX_CSR : @@ -389,11 +389,11 @@ module VX_decode #( `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin if (decode_if.valid && decode_if.ready) begin - $write("%t: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.curr_PC); + $write("%t: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.PC); print_ex_type(decode_if.ex_type); $write(", op="); print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod); - $write(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b\n", decode_if.op_mod, decode_if.thread_mask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm); + $write(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b\n", decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm); end end `endif diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 5f8f312f..f542d3ce 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -4,7 +4,7 @@ module VX_execute #( parameter CORE_ID = 0 ) ( `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_EX_IO + `SCOPE_SIGNALS_EXECUTE_IO input wire clk, input wire reset, @@ -87,10 +87,14 @@ module VX_execute #( .mul_commit_if (mul_commit_if) ); `else - assign mul_req_if.ready = 0; - assign mul_commit_if.valid = 0; - assign mul_commit_if.issue_tag = 0; - assign mul_commit_if.data = 0; + assign mul_req_if.ready = 0; + assign mul_commit_if.valid = 0; + assign mul_commit_if.wid = 0; + assign mul_commit_if.PC = 0; + assign mul_commit_if.tmask = 0; + assign mul_commit_if.wb = 0; + assign mul_commit_if.rd = 0; + assign mul_commit_if.data = 0; `endif `ifdef EXT_F_ENABLE @@ -103,12 +107,16 @@ module VX_execute #( .fpu_commit_if (fpu_commit_if) ); `else - assign fpu_req_if.ready = 0; - assign fpu_commit_if.valid = 0; - assign fpu_commit_if.issue_tag = 0; - assign fpu_commit_if.data = 0; + assign fpu_req_if.ready = 0; + assign fpu_commit_if.valid = 0; + assign fpu_commit_if.wid = 0; + assign fpu_commit_if.PC = 0; + assign fpu_commit_if.tmask = 0; + assign fpu_commit_if.wb = 0; + assign fpu_commit_if.rd = 0; + assign fpu_commit_if.data = 0; assign fpu_commit_if.has_fflags = 0; - assign fpu_commit_if.fflags = 0; + assign fpu_commit_if.fflags = 0; `endif VX_gpu_unit #( @@ -126,17 +134,4 @@ module VX_execute #( && (`BR_OP(alu_req_if.op_type) == `BR_EBREAK || `BR_OP(alu_req_if.op_type) == `BR_ECALL); - `SCOPE_ASSIGN (scope_alu_req_valid, alu_req_if.valid); - `SCOPE_ASSIGN (scope_alu_req_wid, alu_req_if.wid); - `SCOPE_ASSIGN (scope_alu_req_PC, alu_req_if.curr_PC); - `SCOPE_ASSIGN (scope_alu_req_rd, alu_req_if.rd); - `SCOPE_ASSIGN (scope_alu_req_a, alu_req_if.rs1_data); - `SCOPE_ASSIGN (scope_alu_req_b, alu_req_if.rs2_data); - - `SCOPE_ASSIGN (scope_writeback_valid, writeback_if.valid); - `SCOPE_ASSIGN (scope_writeback_wid, writeback_if.wid); - `SCOPE_ASSIGN (scope_writeback_PC, writeback_if.curr_PC); - `SCOPE_ASSIGN (scope_writeback_rd, writeback_if.rd); - `SCOPE_ASSIGN (scope_writeback_data, writeback_if.data); - endmodule diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index bb31f75f..6e84c5e1 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -20,8 +20,8 @@ module VX_fpu_unit #( wire ready_out; wire [`NW_BITS-1:0] rsp_wid; - wire [`NUM_THREADS-1:0] rsp_thread_mask; - wire [31:0] rsp_curr_PC; + wire [`NUM_THREADS-1:0] rsp_tmask; + wire [31:0] rsp_PC; wire [`NR_BITS-1:0] rsp_rd; wire rsp_wb; @@ -45,8 +45,8 @@ module VX_fpu_unit #( .write_addr (tag_in), .read_addr (tag_out), .release_addr (tag_out), - .write_data ({fpu_req_if.wid, fpu_req_if.thread_mask, fpu_req_if.curr_PC, fpu_req_if.rd, fpu_req_if.wb}), - .read_data ({rsp_wid, rsp_thread_mask, rsp_curr_PC, rsp_rd, rsp_wb}), + .write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}), + .read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}), .release_slot (fpuq_pop), .full (fpuq_full) ); @@ -131,8 +131,8 @@ module VX_fpu_unit #( .reset (reset), .stall (stall_out), .flush (1'b0), - .in ({valid_out, rsp_wid, rsp_thread_mask, rsp_curr_PC, rsp_rd, rsp_wb, result, has_fflags, fflags}), - .out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.thread_mask, fpu_commit_if.curr_PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, fpu_commit_if.has_fflags, fpu_commit_if.fflags}) + .in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, fflags}), + .out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, fpu_commit_if.has_fflags, fpu_commit_if.fflags}) ); assign ready_out = ~stall_out; diff --git a/hw/rtl/VX_gpr_fp_ctrl.v b/hw/rtl/VX_gpr_fp_ctrl.v index 38552a37..1d7224ab 100644 --- a/hw/rtl/VX_gpr_fp_ctrl.v +++ b/hw/rtl/VX_gpr_fp_ctrl.v @@ -8,41 +8,50 @@ module VX_gpr_fp_ctrl ( input wire [`NUM_THREADS-1:0][31:0] rs1_data, input wire [`NUM_THREADS-1:0][31:0] rs2_data, + VX_gpr_req_if gpr_req_if, // outputs output wire [`NW_BITS+`NR_BITS-1:0] raddr1, - VX_gpr_read_if gpr_read_if + VX_gpr_rsp_if gpr_rsp_if ); - reg [`NUM_THREADS-1:0][31:0] rs1_tmp_data, rs2_tmp_data, rs3_tmp_data; + reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data, rsp_rs3_data; + reg rsp_valid; + reg [31:0] rsp_pc; + reg [`NW_BITS-1:0] rsp_wid; reg read_rs1; - reg [`NW_BITS-1:0] rs3_wid; - wire rs3_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && read_rs1; - wire read_fire = gpr_read_if.valid && gpr_read_if.ready_out; + wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && read_rs1; + wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready; always @(posedge clk) begin if (reset) begin - rs1_tmp_data <= 0; - rs2_tmp_data <= 0; - rs3_tmp_data <= 0; - read_rs1 <= 1; - rs3_wid <= 0; + rsp_valid <= 0; + rsp_pc <= 0; + rsp_rs1_data <= 0; + rsp_rs2_data <= 0; + rsp_rs3_data <= 0; + rsp_wid <= 0; + read_rs1 <= 1; end else begin if (rs3_delay) begin read_rs1 <= 0; - rs3_wid <= gpr_read_if.wid; + rsp_wid <= gpr_req_if.wid; end else if (read_fire) begin read_rs1 <= 1; end - if (read_rs1) begin - rs1_tmp_data <= rs1_data; - end - rs2_tmp_data <= rs2_data; - rs3_tmp_data <= rs1_data; + rsp_valid <= gpr_req_if.valid; + rsp_wid <= gpr_req_if.wid; + rsp_pc <= gpr_req_if.PC; - assert(read_rs1 || rs3_wid == gpr_read_if.wid); + if (read_rs1) begin + rsp_rs1_data <= rs1_data; + end + rsp_rs2_data <= rs2_data; + rsp_rs3_data <= rs1_data; + + assert(read_rs1 || rsp_wid == gpr_req_if.wid); end end @@ -51,11 +60,15 @@ module VX_gpr_fp_ctrl ( end // outputs - wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_read_if.rs1 : gpr_read_if.rs3; - assign raddr1 = {gpr_read_if.wid, rs1}; - assign gpr_read_if.ready_in = ~rs3_delay; - assign gpr_read_if.rs1_data = rs1_tmp_data; - assign gpr_read_if.rs2_data = rs2_tmp_data; - assign gpr_read_if.rs3_data = rs3_tmp_data; + wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3; + assign raddr1 = {gpr_req_if.wid, rs1}; + assign gpr_req_if.ready = ~rs3_delay; + + assign gpr_rsp_if.valid = rsp_valid; + assign gpr_rsp_if.wid = rsp_wid; + assign gpr_rsp_if.PC = rsp_pc; + assign gpr_rsp_if.rs1_data = rsp_rs1_data; + assign gpr_rsp_if.rs2_data = rsp_rs2_data; + assign gpr_rsp_if.rs3_data = rsp_rs3_data; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 86f17ad4..129da4c0 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -8,9 +8,10 @@ module VX_gpr_stage #( // inputs VX_writeback_if writeback_if, + VX_gpr_req_if gpr_req_if, // outputs - VX_gpr_read_if gpr_read_if + VX_gpr_rsp_if gpr_rsp_if ); `UNUSED_VAR (reset) @@ -20,11 +21,11 @@ module VX_gpr_stage #( VX_gpr_ram gpr_ram ( .clk (clk), - .we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.thread_mask), + .we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.tmask), .waddr ({writeback_if.wid, writeback_if.rd}), .wdata (writeback_if.data), .rs1 (raddr1), - .rs2 ({gpr_read_if.wid, gpr_read_if.rs2}), + .rs2 ({gpr_req_if.wid, gpr_req_if.rs2}), .rs1_data (rs1_data), .rs2_data (rs2_data) ); @@ -36,26 +37,46 @@ module VX_gpr_stage #( .rs1_data (rs1_data), .rs2_data (rs2_data), .raddr1 (raddr1), - .gpr_read_if(gpr_read_if) + .gpr_req_if (gpr_req_if), + .gpr_rsp_if (gpr_rsp_if) ); `else - reg [`NUM_THREADS-1:0][31:0] rs1_tmp_data, rs2_tmp_data; + reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data; + reg rsp_valid; + reg [`NW_BITS-1:0] rsp_wid; + reg [31:0] rsp_pc; always @(posedge clk) begin - rs1_tmp_data <= rs1_data; - rs2_tmp_data <= rs2_data; + if (reset) begin + rsp_valid <= 0; + rsp_wid <= 0; + rsp_pc <= 0; + rsp_rs1_data <= 0; + rsp_rs2_data <= 0; + end else begin + rsp_valid <= gpr_req_if.valid; + rsp_wid <= gpr_req_if.wid; + rsp_pc <= gpr_req_if.PC; + rsp_rs1_data <= rs1_data; + rsp_rs2_data <= rs2_data; + end end - assign raddr1 = {gpr_read_if.wid, gpr_read_if.rs1}; - assign gpr_read_if.rs1_data = rs1_tmp_data; - assign gpr_read_if.rs2_data = rs2_tmp_data; - assign gpr_read_if.rs3_data = 0; - assign gpr_read_if.ready_in = 1; - - `UNUSED_VAR (gpr_read_if.valid); - `UNUSED_VAR (gpr_read_if.use_rs3); - `UNUSED_VAR (gpr_read_if.rs3); - `UNUSED_VAR (gpr_read_if.ready_out); + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; + + assign gpr_req_if.ready = 1; + + assign gpr_rsp_if.valid = rsp_valid; + assign gpr_rsp_if.wid = rsp_wid; + assign gpr_rsp_if.PC = rsp_pc; + assign gpr_rsp_if.rs1_data = rsp_rs1_data; + assign gpr_rsp_if.rs2_data = rsp_rs2_data; + assign gpr_rsp_if.rs3_data = 0; + + `UNUSED_VAR (gpr_req_if.valid); + `UNUSED_VAR (gpr_req_if.rs3); + `UNUSED_VAR (gpr_req_if.use_rs3); + `UNUSED_VAR (gpr_rsp_if.ready); `endif assign writeback_if.ready = 1'b1; diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 3f39d874..ac6550a3 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -32,8 +32,8 @@ module VX_gpu_unit #( for (genvar i = 0; i < `NUM_THREADS; i++) begin assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]); end - assign tmc.valid = is_tmc; - assign tmc.thread_mask = tmc_new_mask; + assign tmc.valid = is_tmc; + assign tmc.tmask = tmc_new_mask; // wspawn @@ -53,8 +53,8 @@ module VX_gpu_unit #( for (genvar i = 0; i < `NUM_THREADS; i++) begin wire taken = gpu_req_if.rs1_data[i][0]; - assign split_then_mask[i] = gpu_req_if.thread_mask[i] & taken; - assign split_else_mask[i] = gpu_req_if.thread_mask[i] & ~taken; + assign split_then_mask[i] = gpu_req_if.tmask[i] & taken; + assign split_else_mask[i] = gpu_req_if.tmask[i] & ~taken; end assign split.valid = is_split; @@ -78,12 +78,12 @@ module VX_gpu_unit #( assign warp_ctl_if.split = split; assign warp_ctl_if.barrier = barrier; - assign gpu_commit_if.valid = gpu_req_if.valid; - assign gpu_commit_if.wid = gpu_req_if.wid; - assign gpu_commit_if.thread_mask = gpu_req_if.thread_mask; - assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC; - assign gpu_commit_if.rd = gpu_req_if.rd; - assign gpu_commit_if.wb = gpu_req_if.wb; + assign gpu_commit_if.valid = gpu_req_if.valid; + assign gpu_commit_if.wid = gpu_req_if.wid; + assign gpu_commit_if.tmask = gpu_req_if.tmask; + assign gpu_commit_if.PC = gpu_req_if.PC; + assign gpu_commit_if.rd = gpu_req_if.rd; + assign gpu_commit_if.wb = gpu_req_if.wb; // can accept new request? assign gpu_req_if.ready = gpu_commit_if.ready; diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index abce644d..2d7d9db2 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -175,8 +175,8 @@ module VX_ibuffer #( assign deq_wid_next = deq_wid_n; assign ibuf_enq_if.ready = ~q_full[ibuf_enq_if.wid]; - assign q_data_in = {ibuf_enq_if.thread_mask, - ibuf_enq_if.curr_PC, + assign q_data_in = {ibuf_enq_if.tmask, + ibuf_enq_if.PC, ibuf_enq_if.ex_type, ibuf_enq_if.op_type, ibuf_enq_if.op_mod, @@ -193,8 +193,8 @@ module VX_ibuffer #( assign ibuf_deq_if.valid = deq_valid; assign ibuf_deq_if.wid = deq_wid; - assign {ibuf_deq_if.thread_mask, - ibuf_deq_if.curr_PC, + assign {ibuf_deq_if.tmask, + ibuf_deq_if.PC, ibuf_deq_if.ex_type, ibuf_deq_if.op_type, ibuf_deq_if.op_mod, diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 5cccaa74..1bb61f92 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -20,8 +20,8 @@ module VX_icache_stage #( ); `UNUSED_VAR (reset) - reg [31:0] rsp_curr_PC_buf [`NUM_WARPS-1:0]; - reg [`NUM_THREADS-1:0] rsp_thread_mask_buf [`NUM_WARPS-1:0]; + reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0]; + reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0]; wire icache_req_fire = icache_req_if.valid && icache_req_if.ready; @@ -30,8 +30,8 @@ module VX_icache_stage #( always @(posedge clk) begin if (icache_req_fire) begin - rsp_curr_PC_buf[req_tag] <= ifetch_req_if.curr_PC; - rsp_thread_mask_buf[req_tag] <= ifetch_req_if.thread_mask; + rsp_PC_buf[req_tag] <= ifetch_req_if.PC; + rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask; end end @@ -39,23 +39,23 @@ module VX_icache_stage #( assign icache_req_if.valid = ifetch_req_if.valid; assign icache_req_if.rw = 0; assign icache_req_if.byteen = 4'b1111; - assign icache_req_if.addr = ifetch_req_if.curr_PC[31:2]; + assign icache_req_if.addr = ifetch_req_if.PC[31:2]; assign icache_req_if.data = 0; // Can accept new request? assign ifetch_req_if.ready = icache_req_if.ready; `ifdef DBG_CORE_REQ_INFO - assign icache_req_if.tag = {ifetch_req_if.curr_PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag}; + assign icache_req_if.tag = {ifetch_req_if.PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag}; `else assign icache_req_if.tag = req_tag; `endif - assign ifetch_rsp_if.valid = icache_rsp_if.valid; - assign ifetch_rsp_if.wid = rsp_tag; - assign ifetch_rsp_if.thread_mask = rsp_thread_mask_buf[rsp_tag]; - assign ifetch_rsp_if.curr_PC = rsp_curr_PC_buf[rsp_tag]; - assign ifetch_rsp_if.instr = icache_rsp_if.data[0]; + assign ifetch_rsp_if.valid = icache_rsp_if.valid; + assign ifetch_rsp_if.wid = rsp_tag; + assign ifetch_rsp_if.tmask = rsp_tmask_buf[rsp_tag]; + assign ifetch_rsp_if.PC = rsp_PC_buf[rsp_tag]; + assign ifetch_rsp_if.instr = icache_rsp_if.data[0]; // Can accept new response? assign icache_rsp_if.ready = ifetch_rsp_if.ready; @@ -63,21 +63,21 @@ module VX_icache_stage #( `SCOPE_ASSIGN (scope_icache_req_valid, icache_req_if.valid); `SCOPE_ASSIGN (scope_icache_req_wid, ifetch_req_if.wid); `SCOPE_ASSIGN (scope_icache_req_addr, {icache_req_if.addr, 2'b0}); - `SCOPE_ASSIGN (scope_icache_req_tag, icache_req_if.tag); + `SCOPE_ASSIGN (scope_icache_req_tag, req_tag); `SCOPE_ASSIGN (scope_icache_req_ready, icache_req_if.ready); `SCOPE_ASSIGN (scope_icache_rsp_valid, icache_rsp_if.valid); `SCOPE_ASSIGN (scope_icache_rsp_data, icache_rsp_if.data); - `SCOPE_ASSIGN (scope_icache_rsp_tag, icache_rsp_if.tag); + `SCOPE_ASSIGN (scope_icache_rsp_tag, rsp_tag); `SCOPE_ASSIGN (scope_icache_rsp_ready, icache_rsp_if.ready); `ifdef DBG_PRINT_CORE_ICACHE always @(posedge clk) begin if (icache_req_if.valid && icache_req_if.ready) begin - $display("%t: I$%0d req: wid=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.curr_PC); + $display("%t: I$%0d req: wid=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC); end if (icache_rsp_if.valid && icache_rsp_if.ready) begin - $display("%t: I$%0d rsp: wid=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.curr_PC, ifetch_rsp_if.instr); + $display("%t: I$%0d rsp: wid=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.instr); end end `endif diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index 713ada77..e7a280d7 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -6,7 +6,7 @@ module VX_instr_demux ( // inputs VX_decode_if execute_if, - VX_gpr_read_if gpr_read_if, + VX_gpr_rsp_if gpr_rsp_if, VX_csr_to_issue_if csr_to_issue_if, // outputs @@ -21,12 +21,12 @@ module VX_instr_demux ( VX_priority_encoder #( .N(`NUM_THREADS) ) tid_select ( - .data_in (execute_if.thread_mask), + .data_in (execute_if.tmask), .data_out (tid), `UNUSED_PIN (valid_out) ); - wire [31:0] next_PC = execute_if.curr_PC + 4; + wire [31:0] next_PC = execute_if.PC + 4; // ALU unit @@ -41,8 +41,8 @@ module VX_instr_demux ( .reset (reset), .ready_in (alu_req_ready), .valid_in (alu_req_valid), - .data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}), - .data_out ({alu_req_if.wid, alu_req_if.thread_mask, alu_req_if.curr_PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid}), + .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}), + .data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid}), .ready_out (alu_req_if.ready), .valid_out (alu_req_if.valid) ); @@ -54,8 +54,8 @@ module VX_instr_demux ( .clk (clk), .reset (reset), .push (alu_req_valid && alu_req_ready), - .data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data}), - .data_out ({alu_req_if.rs1_data, alu_req_if.rs2_data}), + .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({alu_req_if.rs1_data, alu_req_if.rs2_data}), .pop (alu_req_if.valid && alu_req_if.ready) ); @@ -71,8 +71,8 @@ module VX_instr_demux ( .reset (reset), .ready_in (lsu_req_ready), .valid_in (lsu_req_valid), - .data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}), - .data_out ({lsu_req_if.wid, lsu_req_if.thread_mask, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb}), + .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}), + .data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb}), .ready_out (lsu_req_if.ready), .valid_out (lsu_req_if.valid) ); @@ -83,7 +83,7 @@ module VX_instr_demux ( .clk (clk), .reset (reset), .push (lsu_req_valid && lsu_req_ready), - .data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data}), + .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), .data_out ({lsu_req_if.base_addr, lsu_req_if.store_data}), .pop (lsu_req_if.valid && lsu_req_if.ready) ); @@ -100,8 +100,8 @@ module VX_instr_demux ( .reset (reset), .ready_in (csr_req_ready), .valid_in (csr_req_valid), - .data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}), - .data_out ({csr_req_if.wid, csr_req_if.thread_mask, csr_req_if.curr_PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io}), + .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}), + .data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io}), .ready_out (csr_req_if.ready), .valid_out (csr_req_if.valid) ); @@ -114,7 +114,7 @@ module VX_instr_demux ( tmp_rs1 <= execute_if.rs1; end - wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_read_if.rs1_data[0]; + wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_rsp_if.rs1_data[0]; VX_gpr_bypass #( .DATAW (32) @@ -140,8 +140,8 @@ module VX_instr_demux ( .reset (reset), .ready_in (mul_req_ready), .valid_in (mul_req_valid), - .data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), - .data_out ({mul_req_if.wid, mul_req_if.thread_mask, mul_req_if.curr_PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb}), + .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), + .data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb}), .ready_out (mul_req_if.ready), .valid_out (mul_req_if.valid) ); @@ -152,8 +152,8 @@ module VX_instr_demux ( .clk (clk), .reset (reset), .push (mul_req_valid && mul_req_ready), - .data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data}), - .data_out ({mul_req_if.rs1_data, mul_req_if.rs2_data}), + .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({mul_req_if.rs1_data, mul_req_if.rs2_data}), .pop (mul_req_if.valid && mul_req_if.ready) ); `endif @@ -175,8 +175,8 @@ module VX_instr_demux ( .reset (reset), .ready_in (fpu_req_ready), .valid_in (fpu_req_valid), - .data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `FPU_OP(execute_if.op_type), fpu_frm, execute_if.rd, execute_if.wb}), - .data_out ({fpu_req_if.wid, fpu_req_if.thread_mask, fpu_req_if.curr_PC, fpu_req_if.op_type, fpu_req_if.frm, fpu_req_if.rd, fpu_req_if.wb}), + .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), fpu_frm, execute_if.rd, execute_if.wb}), + .data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.frm, fpu_req_if.rd, fpu_req_if.wb}), .ready_out (fpu_req_if.ready), .valid_out (fpu_req_if.valid) ); @@ -187,8 +187,8 @@ module VX_instr_demux ( .clk (clk), .reset (reset), .push (fpu_req_valid && fpu_req_ready), - .data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data, gpr_read_if.rs3_data}), - .data_out ({fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}), + .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}), + .data_out ({fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}), .pop (fpu_req_if.valid && fpu_req_if.ready) ); `endif @@ -205,8 +205,8 @@ module VX_instr_demux ( .reset (reset), .ready_in (gpu_req_ready), .valid_in (gpu_req_valid), - .data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), - .data_out ({gpu_req_if.wid, gpu_req_if.thread_mask, gpu_req_if.curr_PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb}), + .data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), + .data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb}), .ready_out (gpu_req_if.ready), .valid_out (gpu_req_if.valid) ); @@ -217,8 +217,8 @@ module VX_instr_demux ( .clk (clk), .reset (reset), .push (gpu_req_valid && gpu_req_ready), - .data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data[0]}), - .data_out ({gpu_req_if.rs1_data, gpu_req_if.rs2_data}), + .data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}), + .data_out ({gpu_req_if.rs1_data, gpu_req_if.rs2_data}), .pop (gpu_req_if.valid && gpu_req_if.ready) ); diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index b8b8aa41..1c1e4f8a 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -3,6 +3,8 @@ module VX_issue #( parameter CORE_ID = 0 ) ( + `SCOPE_SIGNALS_ISSUE_IO + input wire clk, input wire reset, @@ -19,7 +21,8 @@ module VX_issue #( ); VX_decode_if ibuf_deq_if(); VX_decode_if execute_if(); - VX_gpr_read_if gpr_read_if(); + VX_gpr_req_if gpr_req_if(); + VX_gpr_rsp_if gpr_rsp_if(); wire scoreboard_delay; wire [`NW_BITS-1:0] deq_wid_next; @@ -29,7 +32,7 @@ module VX_issue #( ) ibuffer ( .clk (clk), .reset (reset), - .freeze (~gpr_read_if.ready_in), + .freeze (~gpr_req_if.ready), .ibuf_enq_if (decode_if), .deq_wid_next (deq_wid_next), .ibuf_deq_if (ibuf_deq_if) @@ -44,17 +47,18 @@ module VX_issue #( .writeback_if (writeback_if), .deq_wid_next (deq_wid_next), .exe_delay (~execute_if.ready), - .gpr_delay (~gpr_read_if.ready_in), + .gpr_delay (~gpr_req_if.ready), .delay (scoreboard_delay) ); - assign gpr_read_if.valid = ibuf_deq_if.valid && ~scoreboard_delay; - assign gpr_read_if.wid = ibuf_deq_if.wid; - assign gpr_read_if.rs1 = ibuf_deq_if.rs1; - assign gpr_read_if.rs2 = ibuf_deq_if.rs2; - assign gpr_read_if.rs3 = ibuf_deq_if.rs3; - assign gpr_read_if.use_rs3 = ibuf_deq_if.use_rs3; - assign gpr_read_if.ready_out = execute_if.ready; + assign gpr_req_if.valid = ibuf_deq_if.valid && ~scoreboard_delay; + assign gpr_req_if.wid = ibuf_deq_if.wid; + assign gpr_req_if.PC = ibuf_deq_if.PC; + assign gpr_req_if.rs1 = ibuf_deq_if.rs1; + assign gpr_req_if.rs2 = ibuf_deq_if.rs2; + assign gpr_req_if.rs3 = ibuf_deq_if.rs3; + assign gpr_req_if.use_rs3 = ibuf_deq_if.use_rs3; + assign gpr_rsp_if.ready = execute_if.ready; VX_gpr_stage #( .CORE_ID(CORE_ID) @@ -62,13 +66,16 @@ module VX_issue #( .clk (clk), .reset (reset), .writeback_if (writeback_if), - .gpr_read_if (gpr_read_if) + .gpr_req_if (gpr_req_if), + .gpr_rsp_if (gpr_rsp_if) ); + + `UNUSED_VAR (gpr_rsp_if.valid); - assign execute_if.valid = ibuf_deq_if.valid && gpr_read_if.ready_in && ~scoreboard_delay; + assign execute_if.valid = ibuf_deq_if.valid && gpr_req_if.ready && ~scoreboard_delay; assign execute_if.wid = ibuf_deq_if.wid; - assign execute_if.thread_mask = ibuf_deq_if.thread_mask; - assign execute_if.curr_PC = ibuf_deq_if.curr_PC; + assign execute_if.tmask = ibuf_deq_if.tmask; + assign execute_if.PC = ibuf_deq_if.PC; assign execute_if.ex_type = ibuf_deq_if.ex_type; assign execute_if.op_type = ibuf_deq_if.op_type; assign execute_if.op_mod = ibuf_deq_if.op_mod; @@ -83,7 +90,7 @@ module VX_issue #( .clk (clk), .reset (reset), .execute_if (execute_if), - .gpr_read_if (gpr_read_if), + .gpr_rsp_if (gpr_rsp_if), .csr_to_issue_if(csr_to_issue_if), .alu_req_if (alu_req_if), .lsu_req_if (lsu_req_if), @@ -93,25 +100,58 @@ module VX_issue #( .gpu_req_if (gpu_req_if) ); + `SCOPE_ASSIGN (scope_issue_valid, ibuf_deq_if.valid); + `SCOPE_ASSIGN (scope_issue_wid, ibuf_deq_if.wid); + `SCOPE_ASSIGN (scope_issue_tmask, ibuf_deq_if.tmask); + `SCOPE_ASSIGN (scope_issue_pc, ibuf_deq_if.PC); + `SCOPE_ASSIGN (scope_issue_ex_type, ibuf_deq_if.ex_type); + `SCOPE_ASSIGN (scope_issue_op_type, ibuf_deq_if.op_type); + `SCOPE_ASSIGN (scope_issue_op_mod, ibuf_deq_if.op_mod); + `SCOPE_ASSIGN (scope_issue_wb, ibuf_deq_if.wb); + `SCOPE_ASSIGN (scope_issue_rd, ibuf_deq_if.rd); + `SCOPE_ASSIGN (scope_issue_rs1, ibuf_deq_if.rs1); + `SCOPE_ASSIGN (scope_issue_rs2, ibuf_deq_if.rs2); + `SCOPE_ASSIGN (scope_issue_rs3, ibuf_deq_if.rs3); + `SCOPE_ASSIGN (scope_issue_imm, ibuf_deq_if.imm); + `SCOPE_ASSIGN (scope_issue_rs1_is_pc, ibuf_deq_if.rs1_is_PC); + `SCOPE_ASSIGN (scope_issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm); + `SCOPE_ASSIGN (scope_issue_ready, ibuf_deq_if.ready); + `SCOPE_ASSIGN (scope_scoreboard_delay, scoreboard_delay); + `SCOPE_ASSIGN (scope_gpr_delay, ~gpr_req_if.ready); + `SCOPE_ASSIGN (scope_execute_delay, ~execute_if.ready); + + `SCOPE_ASSIGN (scope_gpr_rsp_valid, gpr_rsp_if.valid); + `SCOPE_ASSIGN (scope_gpr_rsp_wid, gpr_rsp_if.wid); + `SCOPE_ASSIGN (scope_gpr_rsp_pc, gpr_rsp_if.PC); + `SCOPE_ASSIGN (scope_gpr_rsp_a, gpr_rsp_if.rs1_data); + `SCOPE_ASSIGN (scope_gpr_rsp_b, gpr_rsp_if.rs2_data); + `SCOPE_ASSIGN (scope_gpr_rsp_c, gpr_rsp_if.rs3_data); + + `SCOPE_ASSIGN (scope_writeback_valid, writeback_if.valid); + `SCOPE_ASSIGN (scope_writeback_wid, writeback_if.wid); + `SCOPE_ASSIGN (scope_writeback_pc, writeback_if.PC); + `SCOPE_ASSIGN (scope_writeback_rd, writeback_if.rd); + `SCOPE_ASSIGN (scope_writeback_data, writeback_if.data); + `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin if (alu_req_if.valid && alu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.curr_PC, alu_req_if.thread_mask, alu_req_if.rs1_data, alu_req_if.rs2_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rs1_data, alu_req_if.rs2_data); end if (lsu_req_if.valid && lsu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.curr_PC, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); end if (csr_req_if.valid && csr_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.curr_PC, csr_req_if.thread_mask, csr_req_if.csr_addr, csr_req_if.csr_mask); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.csr_addr, csr_req_if.csr_mask); end if (mul_req_if.valid && mul_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.curr_PC, mul_req_if.thread_mask, mul_req_if.rs1_data, mul_req_if.rs2_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rs1_data, mul_req_if.rs2_data); end if (fpu_req_if.valid && fpu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.curr_PC, fpu_req_if.thread_mask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data); end if (gpu_req_if.valid && gpu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.curr_PC, gpu_req_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rs1_data, gpu_req_if.rs2_data); end end `endif diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 2401d559..86ebcd3b 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -18,7 +18,7 @@ module VX_lsu_unit #( // outputs VX_exu_to_cmt_if lsu_commit_if ); - wire [`NUM_THREADS-1:0] req_thread_mask; + wire [`NUM_THREADS-1:0] req_tmask; wire req_rw; wire [`NUM_THREADS-1:0][29:0] req_addr; wire [`NUM_THREADS-1:0][1:0] req_offset; @@ -28,7 +28,7 @@ module VX_lsu_unit #( wire [`NR_BITS-1:0] req_rd; wire req_wb; wire [`NW_BITS-1:0] req_wid; - wire [31:0] req_curr_PC; + wire [31:0] req_pc; wire [`NUM_THREADS-1:0][31:0] full_address; for (genvar i = 0; i < `NUM_THREADS; i++) begin @@ -79,12 +79,12 @@ module VX_lsu_unit #( .reset (reset), .stall (stall_in), .flush (1'b0), - .in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.thread_mask, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}), - .out ({valid_in, req_wid, req_thread_mask, req_curr_PC, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data}) + .in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}), + .out ({valid_in, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data}) ); wire [`NW_BITS-1:0] rsp_wid; - wire [31:0] rsp_curr_PC; + wire [31:0] rsp_pc; wire [`NR_BITS-1:0] rsp_rd; wire rsp_wb; wire [`NUM_THREADS-1:0][1:0] rsp_offset; @@ -116,8 +116,8 @@ module VX_lsu_unit #( .write_addr (req_tag), .acquire_slot (lsuq_push), .read_addr (rsp_tag), - .write_data ({req_wid, req_curr_PC, req_rd, req_wb, req_offset, req_sext}), - .read_data ({rsp_wid, rsp_curr_PC, rsp_rd, rsp_wb, rsp_offset, rsp_sext}), + .write_data ({req_wid, req_pc, req_rd, req_wb, req_offset, req_sext}), + .read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_offset, rsp_sext}), .release_addr (rsp_tag), .release_slot (lsuq_pop), .full (lsuq_full) @@ -125,7 +125,7 @@ module VX_lsu_unit #( always @(posedge clk) begin if (lsuq_push) begin - mem_rsp_mask[req_tag] <= req_thread_mask; + mem_rsp_mask[req_tag] <= req_tmask; end if (lsuq_pop_part) begin mem_rsp_mask[rsp_tag] <= mem_rsp_mask_n; @@ -136,14 +136,14 @@ module VX_lsu_unit #( wire store_stall = valid_in && req_rw && stall_out; // Core Request - assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~lsuq_full && ~store_stall}} & req_thread_mask; + assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~lsuq_full && ~store_stall}} & req_tmask; assign dcache_req_if.rw = {`NUM_THREADS{req_rw}}; assign dcache_req_if.byteen = req_byteen; assign dcache_req_if.addr = req_addr; assign dcache_req_if.data = req_data; `ifdef DBG_CORE_REQ_INFO - assign dcache_req_if.tag = {req_curr_PC, req_rd, req_wid, req_tag}; + assign dcache_req_if.tag = {req_pc, req_rd, req_wid, req_tag}; `else assign dcache_req_if.tag = req_tag; `endif @@ -172,8 +172,8 @@ module VX_lsu_unit #( wire arb_valid = is_store_req || is_load_rsp; wire [`NW_BITS-1:0] arb_wid = is_store_req ? req_wid : rsp_wid; - wire [`NUM_THREADS-1:0] arb_tmask = is_store_req ? req_thread_mask : dcache_rsp_if.valid; - wire [31:0] arb_curr_PC = is_store_req ? req_curr_PC : rsp_curr_PC; + wire [`NUM_THREADS-1:0] arb_tmask = is_store_req ? req_tmask : dcache_rsp_if.valid; + wire [31:0] arb_PC = is_store_req ? req_pc : rsp_pc; wire [`NR_BITS-1:0] arb_rd = is_store_req ? 0 : rsp_rd; wire arb_wb = is_store_req ? 0 : rsp_wb; @@ -184,8 +184,8 @@ module VX_lsu_unit #( .reset (reset), .stall (stall_out), .flush (1'b0), - .in ({arb_valid, arb_wid, arb_tmask, arb_curr_PC, arb_rd, arb_wb, rsp_data}), - .out ({lsu_commit_if.valid, lsu_commit_if.wid, lsu_commit_if.thread_mask, lsu_commit_if.curr_PC, lsu_commit_if.rd, lsu_commit_if.wb, lsu_commit_if.data}) + .in ({arb_valid, arb_wid, arb_tmask, arb_PC, arb_rd, arb_wb, rsp_data}), + .out ({lsu_commit_if.valid, lsu_commit_if.wid, lsu_commit_if.tmask, lsu_commit_if.PC, lsu_commit_if.rd, lsu_commit_if.wb, lsu_commit_if.data}) ); // Can accept new cache response? @@ -197,25 +197,25 @@ module VX_lsu_unit #( `SCOPE_ASSIGN (scope_dcache_req_rw, req_rw); `SCOPE_ASSIGN (scope_dcache_req_byteen,dcache_req_if.byteen); `SCOPE_ASSIGN (scope_dcache_req_data, dcache_req_if.data); - `SCOPE_ASSIGN (scope_dcache_req_tag, dcache_req_if.tag); + `SCOPE_ASSIGN (scope_dcache_req_tag, req_tag); `SCOPE_ASSIGN (scope_dcache_req_ready, dcache_req_if.ready); `SCOPE_ASSIGN (scope_dcache_req_wid, req_wid); - `SCOPE_ASSIGN (scope_dcache_req_PC, req_curr_PC); + `SCOPE_ASSIGN (scope_dcache_req_pc, req_pc); `SCOPE_ASSIGN (scope_dcache_rsp_valid, dcache_rsp_if.valid); `SCOPE_ASSIGN (scope_dcache_rsp_data, dcache_rsp_if.data); - `SCOPE_ASSIGN (scope_dcache_rsp_tag, dcache_rsp_if.tag); + `SCOPE_ASSIGN (scope_dcache_rsp_tag, rsp_tag); `SCOPE_ASSIGN (scope_dcache_rsp_ready, dcache_rsp_if.ready); `ifdef DBG_PRINT_CORE_DCACHE always @(posedge clk) begin if ((| dcache_req_if.valid) && dcache_req_if.ready) begin $display("%t: D$%0d req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, rd=%0d, rw=%0b, byteen=%0h, data=%0h", - $time, CORE_ID, req_wid, req_curr_PC, dcache_req_if.valid, req_address, dcache_req_if.tag, req_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data); + $time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, req_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data); end if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin $display("%t: D$%0d rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h", - $time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_curr_PC, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data); + $time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data); end end `endif diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index 9fa861ee..fb679451 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -20,8 +20,8 @@ module VX_mul_unit #( wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data; wire [`NW_BITS-1:0] rsp_wid; - wire [`NUM_THREADS-1:0] rsp_thread_mask; - wire [31:0] rsp_curr_PC; + wire [`NUM_THREADS-1:0] rsp_tmask; + wire [31:0] rsp_PC; wire [`NR_BITS-1:0] rsp_rd; wire rsp_wb; wire [MULQ_BITS-1:0] tag_in, tag_out; @@ -42,8 +42,8 @@ module VX_mul_unit #( .write_addr (tag_in), .read_addr (tag_out), .release_addr (tag_out), - .write_data ({mul_req_if.wid, mul_req_if.thread_mask, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb}), - .read_data ({rsp_wid, rsp_thread_mask, rsp_curr_PC, rsp_rd, rsp_wb}), + .write_data ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb}), + .read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}), .release_slot (mulq_pop), .full (mulq_full) ); @@ -155,8 +155,8 @@ module VX_mul_unit #( .reset (reset), .stall (stall_out), .flush (1'b0), - .in ({valid_out, rsp_wid, rsp_thread_mask, rsp_curr_PC, rsp_rd, rsp_wb, result}), - .out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.thread_mask, mul_commit_if.curr_PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data}) + .in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}), + .out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data}) ); // can accept new request? diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 00b2c5c3..df290d5d 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -5,8 +5,8 @@ module VX_pipeline #( ) ( `SCOPE_SIGNALS_ISTAGE_IO `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_PIPELINE_IO - `SCOPE_SIGNALS_EX_IO + `SCOPE_SIGNALS_ISSUE_IO + `SCOPE_SIGNALS_EXECUTE_IO // Clock input wire clk, @@ -153,6 +153,8 @@ module VX_pipeline #( VX_issue #( .CORE_ID(CORE_ID) ) issue ( + `SCOPE_SIGNALS_ISSUE_BIND + .clk (clk), .reset (reset), @@ -172,7 +174,7 @@ module VX_pipeline #( .CORE_ID(CORE_ID) ) execute ( `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_EX_BIND + `SCOPE_SIGNALS_EXECUTE_BIND .clk (clk), .reset (reset), @@ -247,6 +249,4 @@ module VX_pipeline #( assign core_icache_rsp_if.tag = icache_rsp_tag; assign icache_rsp_ready = core_icache_rsp_if.ready; - `SCOPE_ASSIGN (scope_busy, busy); - endmodule diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index aef5eb95..fa595c56 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -21,7 +21,7 @@ scope_icache_rsp_data, \ scope_icache_rsp_tag, \ scope_dcache_req_wid, \ - scope_dcache_req_PC, \ + scope_dcache_req_pc, \ scope_dcache_req_addr, \ scope_dcache_req_rw, \ scope_dcache_req_byteen, \ @@ -29,13 +29,27 @@ scope_dcache_req_tag, \ scope_dcache_rsp_data, \ scope_dcache_rsp_tag, \ - scope_alu_req_wid, \ - scope_alu_req_PC, \ - scope_alu_req_rd, \ - scope_alu_req_a, \ - scope_alu_req_b, \ + scope_issue_wid, \ + scope_issue_tmask, \ + scope_issue_pc, \ + scope_issue_ex_type, \ + scope_issue_op_type, \ + scope_issue_op_mod, \ + scope_issue_wb, \ + scope_issue_rd, \ + scope_issue_rs1, \ + scope_issue_rs2, \ + scope_issue_rs3, \ + scope_issue_imm, \ + scope_issue_rs1_is_pc, \ + scope_issue_rs2_is_imm, \ + scope_gpr_rsp_wid, \ + scope_gpr_rsp_pc, \ + scope_gpr_rsp_a, \ + scope_gpr_rsp_b, \ + scope_gpr_rsp_c, \ scope_writeback_wid, \ - scope_writeback_PC, \ + scope_writeback_pc, \ scope_writeback_rd, \ scope_writeback_data, \ scope_bank_addr_st0, \ @@ -45,7 +59,6 @@ scope_bank_miss_st1, \ scope_bank_dirty_st1, \ scope_bank_force_miss_st1, - `define SCOPE_SIGNALS_UPD_LIST \ scope_dram_req_valid, \ @@ -64,13 +77,18 @@ scope_dcache_req_ready, \ scope_dcache_rsp_valid, \ scope_dcache_rsp_ready, \ - scope_alu_req_valid, \ - scope_writeback_valid, \ - scope_busy, \ scope_bank_valid_st0, \ scope_bank_valid_st1, \ scope_bank_valid_st2, \ - scope_bank_stall_pipe + scope_bank_stall_pipe, \ + scope_issue_valid, \ + scope_issue_ready, \ + scope_gpr_rsp_valid, \ + scope_writeback_valid, \ + scope_scoreboard_delay, \ + scope_gpr_delay, \ + scope_execute_delay, \ + scope_busy `define SCOPE_SIGNALS_DECL \ wire scope_dram_req_valid; \ @@ -94,36 +112,49 @@ wire scope_icache_req_valid; \ wire [`NW_BITS-1:0] scope_icache_req_wid; \ wire [31:0] scope_icache_req_addr; \ - wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \ + wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag; \ wire scope_icache_req_ready; \ wire scope_icache_rsp_valid; \ wire [31:0] scope_icache_rsp_data; \ - wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \ + wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag; \ wire scope_icache_rsp_ready; \ wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \ wire [`NW_BITS-1:0] scope_dcache_req_wid; \ - wire [31:0] scope_dcache_req_PC; \ + wire [31:0] scope_dcache_req_pc; \ wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr; \ wire scope_dcache_req_rw; \ wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen; \ wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data; \ - wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \ + wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag; \ wire scope_dcache_req_ready; \ wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \ wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data; \ - wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \ + wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag; \ wire scope_dcache_rsp_ready; \ - wire scope_busy; \ wire scope_snp_rsp_ready; \ - wire scope_alu_req_valid; \ - wire [`NW_BITS-1:0] scope_alu_req_wid; \ - wire [31:0] scope_alu_req_PC; \ - wire [`NR_BITS-1:0] scope_alu_req_rd; \ - wire [`NUM_THREADS-1:0][31:0] scope_alu_req_a; \ - wire [`NUM_THREADS-1:0][31:0] scope_alu_req_b; \ + wire [`NW_BITS-1:0] scope_issue_wid; \ + wire [`NUM_THREADS-1:0] scope_issue_tmask; \ + wire [31:0] scope_issue_pc; \ + wire [`EX_BITS-1:0] scope_issue_ex_type; \ + wire [`OP_BITS-1:0] scope_issue_op_type; \ + wire [`MOD_BITS-1:0] scope_issue_op_mod; \ + wire scope_issue_wb; \ + wire [`NR_BITS-1:0] scope_issue_rd; \ + wire [`NR_BITS-1:0] scope_issue_rs1; \ + wire [`NR_BITS-1:0] scope_issue_rs2; \ + wire [`NR_BITS-1:0] scope_issue_rs3; \ + wire [31:0] scope_issue_imm; \ + wire scope_issue_rs1_is_pc; \ + wire scope_issue_rs2_is_imm; \ + wire scope_gpr_rsp_valid; \ + wire [`NW_BITS-1:0] scope_gpr_rsp_wid; \ + wire [31:0] scope_gpr_rsp_pc; \ + wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a; \ + wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b; \ + wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c; \ wire scope_writeback_valid; \ wire [`NW_BITS-1:0] scope_writeback_wid; \ - wire [31:0] scope_writeback_PC; \ + wire [31:0] scope_writeback_pc; \ wire [`NR_BITS-1:0] scope_writeback_rd; \ wire [`NUM_THREADS-1:0][31:0] scope_writeback_data; \ wire scope_bank_valid_st0; \ @@ -136,35 +167,39 @@ wire scope_bank_miss_st1; \ wire scope_bank_dirty_st1; \ wire scope_bank_force_miss_st1; \ - wire scope_bank_stall_pipe; + wire scope_bank_stall_pipe; \ + wire scope_issue_valid; \ + wire scope_issue_ready; \ + wire scope_scoreboard_delay; \ + wire scope_gpr_delay; \ + wire scope_execute_delay; \ + wire scope_busy; `define SCOPE_SIGNALS_ISTAGE_IO \ output wire scope_icache_req_valid, \ output wire [`NW_BITS-1:0] scope_icache_req_wid, \ output wire [31:0] scope_icache_req_addr, \ - output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \ + output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag, \ output wire scope_icache_req_ready, \ output wire scope_icache_rsp_valid, \ output wire [31:0] scope_icache_rsp_data, \ - output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \ + output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag, \ output wire scope_icache_rsp_ready, `define SCOPE_SIGNALS_LSU_IO \ output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \ output wire [`NW_BITS-1:0] scope_dcache_req_wid, \ - output wire [31:0] scope_dcache_req_PC, \ + output wire [31:0] scope_dcache_req_pc, \ output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr, \ output wire scope_dcache_req_rw, \ output wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen, \ output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data, \ - output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \ + output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag, \ output wire scope_dcache_req_ready, \ output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \ output wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data, \ - output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \ + output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag, \ output wire scope_dcache_rsp_ready, - - `define SCOPE_SIGNALS_CORE_IO \ `define SCOPE_SIGNALS_CACHE_IO \ output wire scope_bank_valid_st0, \ @@ -179,22 +214,40 @@ output wire scope_bank_force_miss_st1, \ output wire scope_bank_stall_pipe, - `define SCOPE_SIGNALS_PIPELINE_IO \ - output wire scope_busy, - - `define SCOPE_SIGNALS_EX_IO \ - output wire scope_alu_req_valid, \ - output wire [`NW_BITS-1:0] scope_alu_req_wid, \ - output wire [31:0] scope_alu_req_PC, \ - output wire [`NR_BITS-1:0] scope_alu_req_rd, \ - output wire [`NUM_THREADS-1:0][31:0] scope_alu_req_a, \ - output wire [`NUM_THREADS-1:0][31:0] scope_alu_req_b, \ + `define SCOPE_SIGNALS_ISSUE_IO \ + output wire scope_issue_valid, \ + output wire [`NW_BITS-1:0] scope_issue_wid, \ + output wire [`NUM_THREADS-1:0] scope_issue_tmask, \ + output wire [31:0] scope_issue_pc, \ + output wire [`EX_BITS-1:0] scope_issue_ex_type, \ + output wire [`OP_BITS-1:0] scope_issue_op_type, \ + output wire [`MOD_BITS-1:0] scope_issue_op_mod, \ + output wire scope_issue_wb, \ + output wire [`NR_BITS-1:0] scope_issue_rd, \ + output wire [`NR_BITS-1:0] scope_issue_rs1, \ + output wire [`NR_BITS-1:0] scope_issue_rs2, \ + output wire [`NR_BITS-1:0] scope_issue_rs3, \ + output wire [31:0] scope_issue_imm, \ + output wire scope_issue_rs1_is_pc, \ + output wire scope_issue_rs2_is_imm, \ output wire scope_writeback_valid, \ + output wire scope_gpr_rsp_valid, \ + output wire [`NW_BITS-1:0] scope_gpr_rsp_wid, \ + output wire [31:0] scope_gpr_rsp_pc, \ + output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a, \ + output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b, \ + output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c, \ output wire [`NW_BITS-1:0] scope_writeback_wid, \ - output wire [31:0] scope_writeback_PC, \ + output wire [31:0] scope_writeback_pc, \ output wire [`NR_BITS-1:0] scope_writeback_rd, \ - output wire [`NUM_THREADS-1:0][31:0] scope_writeback_data, + output wire [`NUM_THREADS-1:0][31:0] scope_writeback_data, \ + output wire scope_issue_ready, \ + output wire scope_scoreboard_delay, \ + output wire scope_gpr_delay, \ + output wire scope_execute_delay, + `define SCOPE_SIGNALS_EXECUTE_IO + `define SCOPE_SIGNALS_ISTAGE_BIND \ .scope_icache_req_valid (scope_icache_req_valid), \ .scope_icache_req_wid (scope_icache_req_wid), \ @@ -209,7 +262,7 @@ `define SCOPE_SIGNALS_LSU_BIND \ .scope_dcache_req_valid (scope_dcache_req_valid), \ .scope_dcache_req_wid (scope_dcache_req_wid), \ - .scope_dcache_req_PC (scope_dcache_req_PC), \ + .scope_dcache_req_pc (scope_dcache_req_pc), \ .scope_dcache_req_addr (scope_dcache_req_addr), \ .scope_dcache_req_rw (scope_dcache_req_rw), \ .scope_dcache_req_byteen(scope_dcache_req_byteen), \ @@ -221,8 +274,6 @@ .scope_dcache_rsp_tag (scope_dcache_rsp_tag), \ .scope_dcache_rsp_ready (scope_dcache_rsp_ready), - `define SCOPE_SIGNALS_CORE_BIND \ - `define SCOPE_SIGNALS_CACHE_BIND \ .scope_bank_valid_st0 (scope_bank_valid_st0), \ .scope_bank_valid_st1 (scope_bank_valid_st1), \ @@ -233,7 +284,7 @@ .scope_bank_is_mrvq_st1 (scope_bank_is_mrvq_st1), \ .scope_bank_miss_st1 (scope_bank_miss_st1), \ .scope_bank_dirty_st1 (scope_bank_dirty_st1), \ - .scope_bank_force_miss_st1 (scope_bank_force_miss_st1), \ + .scope_bank_force_miss_st1(scope_bank_force_miss_st1), \ .scope_bank_stall_pipe (scope_bank_stall_pipe), `define SCOPE_SIGNALS_CACHE_UNBIND \ @@ -290,37 +341,53 @@ .scope_bank_force_miss_st1 (scope_per_bank_force_miss_st1[i]), \ .scope_bank_stall_pipe (scope_per_bank_stall_pipe[i]), - `define SCOPE_SIGNALS_PIPELINE_BIND \ - .scope_busy (scope_busy), - - `define SCOPE_SIGNALS_EX_BIND \ - .scope_alu_req_valid (scope_alu_req_valid), \ - .scope_alu_req_wid (scope_alu_req_wid), \ - .scope_alu_req_PC (scope_alu_req_PC), \ - .scope_alu_req_rd (scope_alu_req_rd), \ - .scope_alu_req_a (scope_alu_req_a), \ - .scope_alu_req_b (scope_alu_req_b), \ + `define SCOPE_SIGNALS_ISSUE_BIND \ + .scope_issue_valid (scope_issue_valid), \ + .scope_issue_wid (scope_issue_wid), \ + .scope_issue_tmask (scope_issue_tmask), \ + .scope_issue_pc (scope_issue_pc), \ + .scope_issue_ex_type (scope_issue_ex_type), \ + .scope_issue_op_type (scope_issue_op_type), \ + .scope_issue_op_mod (scope_issue_op_mod), \ + .scope_issue_wb (scope_issue_wb), \ + .scope_issue_rd (scope_issue_rd), \ + .scope_issue_rs1 (scope_issue_rs1), \ + .scope_issue_rs2 (scope_issue_rs2), \ + .scope_issue_rs3 (scope_issue_rs3), \ + .scope_issue_imm (scope_issue_imm), \ + .scope_issue_rs1_is_pc (scope_issue_rs1_is_pc), \ + .scope_issue_rs2_is_imm (scope_issue_rs2_is_imm), \ .scope_writeback_valid (scope_writeback_valid), \ .scope_writeback_wid (scope_writeback_wid), \ - .scope_writeback_PC (scope_writeback_PC), \ + .scope_writeback_pc (scope_writeback_pc), \ .scope_writeback_rd (scope_writeback_rd), \ - .scope_writeback_data (scope_writeback_data), + .scope_writeback_data (scope_writeback_data), \ + .scope_issue_ready (scope_issue_ready), \ + .scope_gpr_rsp_valid (scope_gpr_rsp_valid), \ + .scope_gpr_rsp_wid (scope_gpr_rsp_wid), \ + .scope_gpr_rsp_pc (scope_gpr_rsp_pc), \ + .scope_gpr_rsp_a (scope_gpr_rsp_a), \ + .scope_gpr_rsp_b (scope_gpr_rsp_b), \ + .scope_gpr_rsp_c (scope_gpr_rsp_c), \ + .scope_scoreboard_delay (scope_scoreboard_delay), \ + .scope_gpr_delay (scope_gpr_delay), \ + .scope_execute_delay (scope_execute_delay), \ + + `define SCOPE_SIGNALS_EXECUTE_BIND `define SCOPE_ASSIGN(d,s) assign d = s `else `define SCOPE_SIGNALS_ISTAGE_IO `define SCOPE_SIGNALS_LSU_IO - `define SCOPE_SIGNALS_CORE_IO `define SCOPE_SIGNALS_CACHE_IO - `define SCOPE_SIGNALS_PIPELINE_IO - `define SCOPE_SIGNALS_EX_IO + `define SCOPE_SIGNALS_ISSUE_IO + `define SCOPE_SIGNALS_EXECUTE_IO `define SCOPE_SIGNALS_ISTAGE_BIND `define SCOPE_SIGNALS_LSU_BIND - `define SCOPE_SIGNALS_CORE_BIND `define SCOPE_SIGNALS_CACHE_BIND - `define SCOPE_SIGNALS_PIPELINE_BIND - `define SCOPE_SIGNALS_EX_BIND + `define SCOPE_SIGNALS_ISSUE_BIND + `define SCOPE_SIGNALS_EXECUTE_BIND `define SCOPE_SIGNALS_CACHE_UNBIND `define SCOPE_SIGNALS_CACHE_BANK_SELECT diff --git a/hw/rtl/VX_scoreboard.v b/hw/rtl/VX_scoreboard.v index 239af67b..2370745c 100644 --- a/hw/rtl/VX_scoreboard.v +++ b/hw/rtl/VX_scoreboard.v @@ -26,7 +26,7 @@ module VX_scoreboard #( wire release_reg = writeback_if.valid && writeback_if.ready; - wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.wid, writeback_if.rd}] & ~writeback_if.thread_mask; + wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.wid, writeback_if.rd}] & ~writeback_if.tmask; always @(*) begin inuse_reg_mask_n = inuse_reg_mask; @@ -48,7 +48,7 @@ module VX_scoreboard #( end end else begin if (reserve_reg) begin - inuse_registers[{ibuf_deq_if.wid, ibuf_deq_if.rd}] <= ibuf_deq_if.thread_mask; + inuse_registers[{ibuf_deq_if.wid, ibuf_deq_if.rd}] <= ibuf_deq_if.tmask; end if (release_reg) begin assert(inuse_reg_mask[writeback_if.wid][writeback_if.rd] != 0); @@ -67,7 +67,7 @@ module VX_scoreboard #( always @(posedge clk) begin if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin $display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", - $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.curr_PC, ibuf_deq_if.rd, ibuf_deq_if.wb, + $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay); end end diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 92c3cb9e..dd7d22b5 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -25,7 +25,7 @@ typedef struct packed { typedef struct packed { logic valid; - logic [`NUM_THREADS-1:0] thread_mask; + logic [`NUM_THREADS-1:0] tmask; } gpu_tmc_t; typedef struct packed { diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 93f0dd59..5234f74a 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -50,7 +50,7 @@ module VX_warp_sched #( schedule_table_n = schedule_table; if (warp_ctl_if.valid && warp_ctl_if.tmc.valid - && (0 == warp_ctl_if.tmc.thread_mask)) begin + && (0 == warp_ctl_if.tmc.tmask)) begin schedule_table_n[warp_ctl_if.wid] = 0; end if (scheduled_warp) begin // remove scheduled warp (round-robin) @@ -95,9 +95,9 @@ module VX_warp_sched #( barrier_stall_mask[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1; end end else if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin - thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.thread_mask; + thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask; stalled_warps[warp_ctl_if.wid] <= 0; - if (0 == warp_ctl_if.tmc.thread_mask) begin + if (0 == warp_ctl_if.tmc.tmask) begin active_warps[warp_ctl_if.wid] <= 0; end end else if (join_if.valid && !didnt_split) begin @@ -140,7 +140,7 @@ module VX_warp_sched #( end if (ifetch_rsp_fire) begin fetch_lock[ifetch_rsp_if.wid] <= 0; - warp_pcs[ifetch_rsp_if.wid] <= ifetch_rsp_if.curr_PC + 4; + warp_pcs[ifetch_rsp_if.wid] <= ifetch_rsp_if.PC + 4; end // reset 'schedule_table' when it goes to zero @@ -173,7 +173,7 @@ module VX_warp_sched #( // split/join stack management wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0]; - wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.wid]}; + wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.wid]}; wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split.pc, warp_ctl_if.split.else_mask}; assign {join_fall, join_pc, join_tm} = ipdom[join_if.wid]; @@ -236,8 +236,8 @@ module VX_warp_sched #( .reset (reset), .stall (stall_out), .flush (1'b0), - .in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}), - .out ({ifetch_req_if.valid, ifetch_req_if.thread_mask, ifetch_req_if.curr_PC, ifetch_req_if.wid}) + .in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}), + .out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid}) ); assign busy = (active_warps != 0); diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index eab9af89..58e01f55 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -43,11 +43,11 @@ module VX_writeback #( fpu_valid ? fpu_commit_if.wid : 0; - assign wb_tmask = alu_valid ? alu_commit_if.thread_mask : - lsu_valid ? lsu_commit_if.thread_mask : - csr_valid ? csr_commit_if.thread_mask : - mul_valid ? mul_commit_if.thread_mask : - fpu_valid ? fpu_commit_if.thread_mask : + assign wb_tmask = alu_valid ? alu_commit_if.tmask : + lsu_valid ? lsu_commit_if.tmask : + csr_valid ? csr_commit_if.tmask : + mul_valid ? mul_commit_if.tmask : + fpu_valid ? fpu_commit_if.tmask : 0; assign wb_rd = alu_valid ? alu_commit_if.rd : @@ -74,8 +74,8 @@ module VX_writeback #( .reset (reset), .stall (stall), .flush (1'b0), - .in ({wb_valid, wb_wid, wb_tmask, wb_rd, wb_data}), - .out ({writeback_if.valid, writeback_if.wid, writeback_if.thread_mask, writeback_if.rd, writeback_if.data}) + .in ({wb_valid, wb_wid, wb_tmask, wb_rd, wb_data}), + .out ({writeback_if.valid, writeback_if.wid, writeback_if.tmask, writeback_if.rd, writeback_if.data}) ); assign alu_commit_if.ready = !stall; diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 3fd90221..5f055aeb 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -3,10 +3,9 @@ module Vortex ( `SCOPE_SIGNALS_ISTAGE_IO `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_CORE_IO `SCOPE_SIGNALS_CACHE_IO - `SCOPE_SIGNALS_PIPELINE_IO - `SCOPE_SIGNALS_EX_IO + `SCOPE_SIGNALS_ISSUE_IO + `SCOPE_SIGNALS_EXECUTE_IO // Clock input wire clk, @@ -78,10 +77,9 @@ module Vortex ( ) cluster ( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_CORE_BIND `SCOPE_SIGNALS_CACHE_BIND - `SCOPE_SIGNALS_PIPELINE_BIND - `SCOPE_SIGNALS_EX_BIND + `SCOPE_SIGNALS_ISSUE_BIND + `SCOPE_SIGNALS_EXECUTE_BIND .clk (clk), .reset (reset), @@ -197,10 +195,9 @@ module Vortex ( ) cluster ( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_CORE_BIND `SCOPE_SIGNALS_CACHE_BIND - `SCOPE_SIGNALS_PIPELINE_BIND - `SCOPE_SIGNALS_EX_BIND + `SCOPE_SIGNALS_ISSUE_BIND + `SCOPE_SIGNALS_EXECUTE_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/interfaces/VX_alu_req_if.v b/hw/rtl/interfaces/VX_alu_req_if.v index bb964249..d940cc7a 100644 --- a/hw/rtl/interfaces/VX_alu_req_if.v +++ b/hw/rtl/interfaces/VX_alu_req_if.v @@ -8,8 +8,8 @@ interface VX_alu_req_if (); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; wire [31:0] next_PC; wire [`ALU_BR_BITS-1:0] op_type; wire is_br_op; diff --git a/hw/rtl/interfaces/VX_csr_req_if.v b/hw/rtl/interfaces/VX_csr_req_if.v index afefc12d..2c4a79a2 100644 --- a/hw/rtl/interfaces/VX_csr_req_if.v +++ b/hw/rtl/interfaces/VX_csr_req_if.v @@ -8,8 +8,8 @@ interface VX_csr_req_if (); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; wire [`CSR_BITS-1:0] op_type; wire [`CSR_ADDR_BITS-1:0] csr_addr; wire [31:0] csr_mask; diff --git a/hw/rtl/interfaces/VX_decode_if.v b/hw/rtl/interfaces/VX_decode_if.v index 21a25970..b253ecbe 100644 --- a/hw/rtl/interfaces/VX_decode_if.v +++ b/hw/rtl/interfaces/VX_decode_if.v @@ -8,8 +8,8 @@ interface VX_decode_if (); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; wire [`EX_BITS-1:0] ex_type; wire [`OP_BITS-1:0] op_type; diff --git a/hw/rtl/interfaces/VX_exu_to_cmt_if.v b/hw/rtl/interfaces/VX_exu_to_cmt_if.v index 8ade9994..86763e2a 100644 --- a/hw/rtl/interfaces/VX_exu_to_cmt_if.v +++ b/hw/rtl/interfaces/VX_exu_to_cmt_if.v @@ -7,8 +7,8 @@ interface VX_exu_to_cmt_if (); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; wire [`NUM_THREADS-1:0][31:0] data; wire [`NR_BITS-1:0] rd; wire wb; diff --git a/hw/rtl/interfaces/VX_fpu_req_if.v b/hw/rtl/interfaces/VX_fpu_req_if.v index a146cd94..bb0ee172 100644 --- a/hw/rtl/interfaces/VX_fpu_req_if.v +++ b/hw/rtl/interfaces/VX_fpu_req_if.v @@ -12,8 +12,8 @@ interface VX_fpu_req_if (); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; wire [`FPU_BITS-1:0] op_type; wire [`FRM_BITS-1:0] frm; wire [`NUM_THREADS-1:0][31:0] rs1_data; diff --git a/hw/rtl/interfaces/VX_fpu_to_cmt_if.v b/hw/rtl/interfaces/VX_fpu_to_cmt_if.v index e0b857d0..066949d1 100644 --- a/hw/rtl/interfaces/VX_fpu_to_cmt_if.v +++ b/hw/rtl/interfaces/VX_fpu_to_cmt_if.v @@ -7,8 +7,8 @@ interface VX_fpu_to_cmt_if (); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; wire [`NUM_THREADS-1:0][31:0] data; wire [`NR_BITS-1:0] rd; wire wb; diff --git a/hw/rtl/interfaces/VX_gpr_read_if.v b/hw/rtl/interfaces/VX_gpr_read_if.v deleted file mode 100644 index c9675bee..00000000 --- a/hw/rtl/interfaces/VX_gpr_read_if.v +++ /dev/null @@ -1,27 +0,0 @@ -`ifndef VX_GPR_READ_IF -`define VX_GPR_READ_IF - -`include "VX_define.vh" - -interface VX_gpr_read_if (); - - wire valid; - - wire [`NW_BITS-1:0] wid; - - wire [`NR_BITS-1:0] rs1; - wire [`NR_BITS-1:0] rs2; - wire [`NR_BITS-1:0] rs3; - - wire use_rs3; - - wire [`NUM_THREADS-1:0][31:0] rs1_data; - wire [`NUM_THREADS-1:0][31:0] rs2_data; - wire [`NUM_THREADS-1:0][31:0] rs3_data; - - wire ready_in; - wire ready_out; - -endinterface - -`endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_gpr_req_if.v b/hw/rtl/interfaces/VX_gpr_req_if.v new file mode 100644 index 00000000..2e2e6e98 --- /dev/null +++ b/hw/rtl/interfaces/VX_gpr_req_if.v @@ -0,0 +1,21 @@ +`ifndef VX_GPR_REQ_IF +`define VX_GPR_REQ_IF + +`include "VX_define.vh" + +interface VX_gpr_req_if (); + + wire valid; + + wire [`NW_BITS-1:0] wid; + wire [31:0] PC; + wire [`NR_BITS-1:0] rs1; + wire [`NR_BITS-1:0] rs2; + wire [`NR_BITS-1:0] rs3; + wire use_rs3; + + wire ready; + +endinterface + +`endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_gpr_rsp_if.v b/hw/rtl/interfaces/VX_gpr_rsp_if.v new file mode 100644 index 00000000..4b953e06 --- /dev/null +++ b/hw/rtl/interfaces/VX_gpr_rsp_if.v @@ -0,0 +1,21 @@ +`ifndef VX_GPR_RSP_IF +`define VX_GPR_RSP_IF + +`include "VX_define.vh" + +interface VX_gpr_rsp_if (); + wire valid; +`IGNORE_WARNINGS_BEGIN + wire [`NW_BITS-1:0] wid; + wire [31:0] PC; +`IGNORE_WARNINGS_END + + wire [`NUM_THREADS-1:0][31:0] rs1_data; + wire [`NUM_THREADS-1:0][31:0] rs2_data; + wire [`NUM_THREADS-1:0][31:0] rs3_data; + + wire ready; + +endinterface + +`endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_gpu_req_if.v b/hw/rtl/interfaces/VX_gpu_req_if.v index ef55c442..5f024ae9 100644 --- a/hw/rtl/interfaces/VX_gpu_req_if.v +++ b/hw/rtl/interfaces/VX_gpu_req_if.v @@ -8,8 +8,8 @@ interface VX_gpu_req_if(); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; wire [31:0] next_PC; wire [`GPU_BITS-1:0] op_type; wire [`NUM_THREADS-1:0][31:0] rs1_data; diff --git a/hw/rtl/interfaces/VX_ifetch_req_if.v b/hw/rtl/interfaces/VX_ifetch_req_if.v index a2469cb3..c4f34bcb 100644 --- a/hw/rtl/interfaces/VX_ifetch_req_if.v +++ b/hw/rtl/interfaces/VX_ifetch_req_if.v @@ -6,9 +6,9 @@ interface VX_ifetch_req_if (); wire valid; - wire [`NUM_THREADS-1:0] thread_mask; + wire [`NUM_THREADS-1:0] tmask; wire [`NW_BITS-1:0] wid; - wire [31:0] curr_PC; + wire [31:0] PC; wire ready; endinterface diff --git a/hw/rtl/interfaces/VX_ifetch_rsp_if.v b/hw/rtl/interfaces/VX_ifetch_rsp_if.v index f9918a03..7a8c14fd 100644 --- a/hw/rtl/interfaces/VX_ifetch_rsp_if.v +++ b/hw/rtl/interfaces/VX_ifetch_rsp_if.v @@ -6,9 +6,9 @@ interface VX_ifetch_rsp_if (); wire valid; - wire [`NUM_THREADS-1:0] thread_mask; + wire [`NUM_THREADS-1:0] tmask; wire [`NW_BITS-1:0] wid; - wire [31:0] curr_PC; + wire [31:0] PC; wire [31:0] instr; wire ready; diff --git a/hw/rtl/interfaces/VX_lsu_req_if.v b/hw/rtl/interfaces/VX_lsu_req_if.v index 9d682af6..bff05720 100644 --- a/hw/rtl/interfaces/VX_lsu_req_if.v +++ b/hw/rtl/interfaces/VX_lsu_req_if.v @@ -8,8 +8,8 @@ interface VX_lsu_req_if (); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; wire rw; wire [`BYTEEN_BITS-1:0] byteen; diff --git a/hw/rtl/interfaces/VX_mul_req_if.v b/hw/rtl/interfaces/VX_mul_req_if.v index f3cd80f3..713761eb 100644 --- a/hw/rtl/interfaces/VX_mul_req_if.v +++ b/hw/rtl/interfaces/VX_mul_req_if.v @@ -12,8 +12,8 @@ interface VX_mul_req_if (); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; wire [`MUL_BITS-1:0] op_type; wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs2_data; diff --git a/hw/rtl/interfaces/VX_writeback_if.v b/hw/rtl/interfaces/VX_writeback_if.v index 466c7398..5cb412be 100644 --- a/hw/rtl/interfaces/VX_writeback_if.v +++ b/hw/rtl/interfaces/VX_writeback_if.v @@ -6,11 +6,11 @@ interface VX_writeback_if (); wire valid; - wire [`NUM_THREADS-1:0] thread_mask; + wire [`NUM_THREADS-1:0] tmask; wire [`NW_BITS-1:0] wid; `IGNORE_WARNINGS_BEGIN - wire [31:0] curr_PC; + wire [31:0] PC; `IGNORE_WARNINGS_END wire [`NR_BITS-1:0] rd;