Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
172
hw/rtl/core/VX_alu_unit.sv
Normal file
172
hw/rtl/core/VX_alu_unit.sv
Normal file
@@ -0,0 +1,172 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS]
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) execute_if[BLOCK_SIZE]();
|
||||
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 0)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.execute_if (execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_block_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
wire is_muldiv_op;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_execute_if();
|
||||
|
||||
assign int_execute_if.valid = execute_if[block_idx].valid && ~is_muldiv_op;
|
||||
assign int_execute_if.data = execute_if[block_idx].data;
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_commit_if();
|
||||
|
||||
`RESET_RELAY (int_reset, reset);
|
||||
|
||||
VX_int_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.BLOCK_IDX (block_idx),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_unit (
|
||||
.clk (clk),
|
||||
.reset (int_reset),
|
||||
.execute_if (int_execute_if),
|
||||
.branch_ctl_if (branch_ctl_if[block_idx]),
|
||||
.commit_if (int_commit_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
assign is_muldiv_op = `INST_ALU_IS_M(execute_if[block_idx].data.op_mod);
|
||||
|
||||
`RESET_RELAY (mdv_reset, reset);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_execute_if();
|
||||
|
||||
assign mdv_execute_if.valid = execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign mdv_execute_if.data = execute_if[block_idx].data;
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_commit_if();
|
||||
|
||||
VX_muldiv_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_unit (
|
||||
.clk (clk),
|
||||
.reset (mdv_reset),
|
||||
.execute_if (mdv_execute_if),
|
||||
.commit_if (mdv_commit_if)
|
||||
);
|
||||
|
||||
assign execute_if[block_idx].ready = is_muldiv_op ? mdv_execute_if.ready : int_execute_if.ready;
|
||||
|
||||
`else
|
||||
|
||||
assign is_muldiv_op = 0;
|
||||
assign execute_if[block_idx].ready = int_execute_if.ready;
|
||||
|
||||
`endif
|
||||
|
||||
// send response
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.valid,
|
||||
`endif
|
||||
int_commit_if.valid
|
||||
}),
|
||||
.ready_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.ready,
|
||||
`endif
|
||||
int_commit_if.ready
|
||||
}),
|
||||
.data_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.data,
|
||||
`endif
|
||||
int_commit_if.data
|
||||
}),
|
||||
.data_out (commit_block_if[block_idx].data),
|
||||
.valid_out (commit_block_if[block_idx].valid),
|
||||
.ready_out (commit_block_if[block_idx].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (PARTIAL_BW ? 3 : 0)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.commit_in_if (commit_block_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
endmodule
|
||||
226
hw/rtl/core/VX_commit.sv
Normal file
226
hw/rtl/core/VX_commit.sv
Normal file
@@ -0,0 +1,226 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave alu_commit_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.slave lsu_commit_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.slave fpu_commit_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_commit_if.slave sfu_commit_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_writeback_if.master writeback_if [`ISSUE_WIDTH],
|
||||
VX_commit_csr_if.master commit_csr_if,
|
||||
VX_commit_sched_if.master commit_sched_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
||||
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
|
||||
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
|
||||
|
||||
// commit arbitration
|
||||
|
||||
VX_commit_if commit_if[`ISSUE_WIDTH]();
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] commit_fire;
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] commit_wid;
|
||||
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] commit_tmask;
|
||||
wire [`ISSUE_WIDTH-1:0] commit_eop;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (`NUM_EX_UNITS),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG (1)
|
||||
) commit_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.valid_in ({
|
||||
sfu_commit_if[i].valid,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if[i].valid,
|
||||
`endif
|
||||
alu_commit_if[i].valid,
|
||||
lsu_commit_if[i].valid
|
||||
}),
|
||||
.ready_in ({
|
||||
sfu_commit_if[i].ready,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if[i].ready,
|
||||
`endif
|
||||
alu_commit_if[i].ready,
|
||||
lsu_commit_if[i].ready
|
||||
}),
|
||||
.data_in ({
|
||||
sfu_commit_if[i].data,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if[i].data,
|
||||
`endif
|
||||
alu_commit_if[i].data,
|
||||
lsu_commit_if[i].data
|
||||
}),
|
||||
.data_out (commit_if[i].data),
|
||||
.valid_out (commit_if[i].valid),
|
||||
.ready_out (commit_if[i].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
assign commit_fire[i] = commit_if[i].valid && commit_if[i].ready;
|
||||
assign commit_tmask[i] = {`NUM_THREADS{commit_fire[i]}} & commit_if[i].data.tmask;
|
||||
assign commit_wid[i] = commit_if[i].data.wid;
|
||||
assign commit_eop[i] = commit_if[i].data.eop;
|
||||
end
|
||||
|
||||
// CSRs update
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0][COMMIT_SIZEW-1:0] commit_size, commit_size_r;
|
||||
wire [COMMIT_ALL_SIZEW-1:0] commit_size_all, commit_size_all_r;
|
||||
wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr;
|
||||
|
||||
assign commit_fire_any = (| commit_fire);
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [COMMIT_SIZEW-1:0] pop_count;
|
||||
`POP_COUNT(pop_count, commit_tmask[i]);
|
||||
assign commit_size[i] = pop_count;
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `ISSUE_WIDTH * COMMIT_SIZEW),
|
||||
.RESETW (1)
|
||||
) commit_size_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire_any, commit_size}),
|
||||
.data_out ({commit_fire_any_r, commit_size_r})
|
||||
);
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (COMMIT_SIZEW),
|
||||
.DATAW_OUT (COMMIT_ALL_SIZEW),
|
||||
.N (`ISSUE_WIDTH),
|
||||
.OP ("+")
|
||||
) commit_size_reduce (
|
||||
.data_in (commit_size_r),
|
||||
.data_out (commit_size_all)
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + COMMIT_ALL_SIZEW),
|
||||
.RESETW (1)
|
||||
) commit_size_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire_any_r, commit_size_all}),
|
||||
.data_out ({commit_fire_any_rr, commit_size_all_r})
|
||||
);
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] instret;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
instret <= '0;
|
||||
end else begin
|
||||
if (commit_fire_any_rr) begin
|
||||
instret <= instret + `PERF_CTR_BITS'(commit_size_all_r);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign commit_csr_if.instret = instret;
|
||||
|
||||
// Committed instructions
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (`ISSUE_WIDTH * (1 + `NW_WIDTH)),
|
||||
.RESETW (`ISSUE_WIDTH)
|
||||
) committed_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({(commit_fire & commit_eop), commit_wid}),
|
||||
.data_out ({commit_sched_if.committed, commit_sched_if.committed_wid})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign writeback_if[i].valid = commit_if[i].valid && commit_if[i].data.wb;
|
||||
assign writeback_if[i].data.uuid = commit_if[i].data.uuid;
|
||||
assign writeback_if[i].data.wis = wid_to_wis(commit_if[i].data.wid);
|
||||
assign writeback_if[i].data.PC = commit_if[i].data.PC;
|
||||
assign writeback_if[i].data.tmask = commit_if[i].data.tmask;
|
||||
assign writeback_if[i].data.rd = commit_if[i].data.rd;
|
||||
assign writeback_if[i].data.data = commit_if[i].data.data;
|
||||
assign writeback_if[i].data.sop = commit_if[i].data.sop;
|
||||
assign writeback_if[i].data.eop = commit_if[i].data.eop;
|
||||
assign commit_if[i].ready = 1'b1;
|
||||
end
|
||||
|
||||
// simulation helper signal to get RISC-V tests Pass/Fail status
|
||||
reg [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value_r;
|
||||
always @(posedge clk) begin
|
||||
if (writeback_if[0].valid) begin
|
||||
sim_wb_value_r[writeback_if[0].data.rd] <= writeback_if[0].data.data[0];
|
||||
end
|
||||
end
|
||||
assign sim_wb_value = sim_wb_value_r;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (alu_commit_if[i].valid && alu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, alu_commit_if[i].data.wid, alu_commit_if[i].data.PC, alu_commit_if[i].data.tmask, alu_commit_if[i].data.wb, alu_commit_if[i].data.rd, alu_commit_if[i].data.sop, alu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, alu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", alu_commit_if[i].data.uuid));
|
||||
end
|
||||
if (lsu_commit_if[i].valid && lsu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, lsu_commit_if[i].data.wid, lsu_commit_if[i].data.PC, lsu_commit_if[i].data.tmask, lsu_commit_if[i].data.wb, lsu_commit_if[i].data.rd, lsu_commit_if[i].data.sop, lsu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, lsu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", lsu_commit_if[i].data.uuid));
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_commit_if[i].valid && fpu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, fpu_commit_if[i].data.wid, fpu_commit_if[i].data.PC, fpu_commit_if[i].data.tmask, fpu_commit_if[i].data.wb, fpu_commit_if[i].data.rd, fpu_commit_if[i].data.sop, fpu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, fpu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", fpu_commit_if[i].data.uuid));
|
||||
end
|
||||
`endif
|
||||
if (sfu_commit_if[i].valid && sfu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=SFU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, sfu_commit_if[i].data.wid, sfu_commit_if[i].data.PC, sfu_commit_if[i].data.tmask, sfu_commit_if[i].data.wb, sfu_commit_if[i].data.rd, sfu_commit_if[i].data.sop, sfu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, sfu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", sfu_commit_if[i].data.uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
469
hw/rtl/core/VX_core.sv
Normal file
469
hw/rtl/core/VX_core.sv
Normal file
@@ -0,0 +1,469 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`include "VX_fpu_define.vh"
|
||||
`endif
|
||||
|
||||
module VX_core import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
`endif
|
||||
|
||||
VX_dcr_bus_if.slave dcr_bus_if,
|
||||
|
||||
VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS],
|
||||
|
||||
VX_mem_bus_if.master icache_bus_if,
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if.master gbar_bus_if,
|
||||
`endif
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
VX_schedule_if schedule_if();
|
||||
VX_fetch_if fetch_if();
|
||||
VX_decode_if decode_if();
|
||||
VX_sched_csr_if sched_csr_if();
|
||||
VX_decode_sched_if decode_sched_if();
|
||||
VX_commit_sched_if commit_sched_if();
|
||||
VX_commit_csr_if commit_csr_if();
|
||||
VX_branch_ctl_if branch_ctl_if[`NUM_ALU_BLOCKS]();
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
|
||||
VX_dispatch_if alu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if alu_commit_if[`ISSUE_WIDTH]();
|
||||
|
||||
VX_dispatch_if lsu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if lsu_commit_if[`ISSUE_WIDTH]();
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if fpu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if fpu_commit_if[`ISSUE_WIDTH]();
|
||||
`endif
|
||||
VX_dispatch_if sfu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if sfu_commit_if[`ISSUE_WIDTH]();
|
||||
|
||||
VX_writeback_if writeback_if[`ISSUE_WIDTH]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[DCACHE_NUM_REQS]();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_tmp_if();
|
||||
VX_pipeline_perf_if pipeline_perf_if();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (dcr_data_reset, reset);
|
||||
`RESET_RELAY (schedule_reset, reset);
|
||||
`RESET_RELAY (fetch_reset, reset);
|
||||
`RESET_RELAY (decode_reset, reset);
|
||||
`RESET_RELAY (issue_reset, reset);
|
||||
`RESET_RELAY (execute_reset, reset);
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
base_dcrs_t base_dcrs;
|
||||
|
||||
VX_dcr_data dcr_data (
|
||||
.clk (clk),
|
||||
.reset (dcr_data_reset),
|
||||
.dcr_bus_if (dcr_bus_if),
|
||||
.base_dcrs (base_dcrs)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (3)
|
||||
|
||||
VX_schedule #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) schedule (
|
||||
.clk (clk),
|
||||
.reset (schedule_reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.decode_sched_if(decode_sched_if),
|
||||
.commit_sched_if(commit_sched_if),
|
||||
|
||||
.schedule_if (schedule_if),
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (gbar_bus_if),
|
||||
`endif
|
||||
.sched_csr_if (sched_csr_if),
|
||||
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) fetch (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (fetch_reset),
|
||||
.icache_bus_if (icache_bus_if),
|
||||
.schedule_if (schedule_if),
|
||||
.fetch_if (fetch_if)
|
||||
);
|
||||
|
||||
VX_decode #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (decode_reset),
|
||||
.fetch_if (fetch_if),
|
||||
.decode_if (decode_if),
|
||||
.decode_sched_if(decode_sched_if)
|
||||
);
|
||||
|
||||
VX_issue #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
.clk (clk),
|
||||
.reset (issue_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_issue_if (pipeline_perf_if.issue),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
.alu_dispatch_if(alu_dispatch_if),
|
||||
.lsu_dispatch_if(lsu_dispatch_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_dispatch_if(fpu_dispatch_if),
|
||||
`endif
|
||||
.sfu_dispatch_if(sfu_dispatch_if)
|
||||
);
|
||||
|
||||
VX_execute #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) execute (
|
||||
`SCOPE_IO_BIND (2)
|
||||
|
||||
.clk (clk),
|
||||
.reset (execute_reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_tmp_if),
|
||||
.pipeline_perf_if(pipeline_perf_if),
|
||||
`endif
|
||||
|
||||
.dcache_bus_if (dcache_bus_tmp_if),
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_dispatch_if(fpu_dispatch_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.sched_csr_if (sched_csr_if),
|
||||
|
||||
.alu_dispatch_if(alu_dispatch_if),
|
||||
.lsu_dispatch_if(lsu_dispatch_if),
|
||||
.sfu_dispatch_if(sfu_dispatch_if),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.sfu_commit_if (sfu_commit_if),
|
||||
|
||||
.sim_ebreak (sim_ebreak)
|
||||
);
|
||||
|
||||
VX_commit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) commit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.sfu_commit_if (sfu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_sched_if(commit_sched_if),
|
||||
|
||||
.sim_wb_value (sim_wb_value)
|
||||
);
|
||||
|
||||
VX_smem_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) smem_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_in_if (mem_perf_if),
|
||||
.mem_perf_out_if (mem_perf_tmp_if),
|
||||
`endif
|
||||
.dcache_bus_in_if (dcache_bus_tmp_if),
|
||||
.dcache_bus_out_if (dcache_bus_if)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_wr_req_per_cycle;
|
||||
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rsp_per_cycle;
|
||||
|
||||
wire perf_icache_pending_read_cycle;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)+1-1:0] perf_dcache_pending_read_cycle;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_icache_pending_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dcache_pending_reads;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ifetches;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_loads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_stores;
|
||||
|
||||
wire perf_icache_req_fire = icache_bus_if.req_valid & icache_bus_if.req_ready;
|
||||
wire perf_icache_rsp_fire = icache_bus_if.rsp_valid & icache_bus_if.rsp_ready;
|
||||
|
||||
wire [DCACHE_NUM_REQS-1:0] perf_dcache_rd_req_fire, perf_dcache_wr_req_fire, perf_dcache_rsp_fire;
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
assign perf_dcache_rd_req_fire[i] = dcache_bus_if[i].req_valid && ~dcache_bus_if[i].req_data.rw && dcache_bus_if[i].req_ready;
|
||||
assign perf_dcache_wr_req_fire[i] = dcache_bus_if[i].req_valid && dcache_bus_if[i].req_data.rw && dcache_bus_if[i].req_ready;
|
||||
assign perf_dcache_rsp_fire[i] = dcache_bus_if[i].rsp_valid && dcache_bus_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`POP_COUNT(perf_dcache_rd_req_per_cycle, perf_dcache_rd_req_fire);
|
||||
`POP_COUNT(perf_dcache_wr_req_per_cycle, perf_dcache_wr_req_fire);
|
||||
`POP_COUNT(perf_dcache_rsp_per_cycle, perf_dcache_rsp_fire);
|
||||
|
||||
assign perf_icache_pending_read_cycle = perf_icache_req_fire - perf_icache_rsp_fire;
|
||||
assign perf_dcache_pending_read_cycle = perf_dcache_rd_req_per_cycle - perf_dcache_rsp_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_icache_pending_reads <= '0;
|
||||
perf_dcache_pending_reads <= '0;
|
||||
end else begin
|
||||
perf_icache_pending_reads <= $signed(perf_icache_pending_reads) + `PERF_CTR_BITS'($signed(perf_icache_pending_read_cycle));
|
||||
perf_dcache_pending_reads <= $signed(perf_dcache_pending_reads) + `PERF_CTR_BITS'($signed(perf_dcache_pending_read_cycle));
|
||||
end
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_icache_lat;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dcache_lat;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ifetches <= '0;
|
||||
perf_loads <= '0;
|
||||
perf_stores <= '0;
|
||||
perf_icache_lat <= '0;
|
||||
perf_dcache_lat <= '0;
|
||||
end else begin
|
||||
perf_ifetches <= perf_ifetches + `PERF_CTR_BITS'(perf_icache_req_fire);
|
||||
perf_loads <= perf_loads + `PERF_CTR_BITS'(perf_dcache_rd_req_per_cycle);
|
||||
perf_stores <= perf_stores + `PERF_CTR_BITS'(perf_dcache_wr_req_per_cycle);
|
||||
perf_icache_lat <= perf_icache_lat + perf_icache_pending_reads;
|
||||
perf_dcache_lat <= perf_dcache_lat + perf_dcache_pending_reads;
|
||||
end
|
||||
end
|
||||
|
||||
assign pipeline_perf_if.ifetches = perf_ifetches;
|
||||
assign pipeline_perf_if.loads = perf_loads;
|
||||
assign pipeline_perf_if.stores = perf_stores;
|
||||
assign pipeline_perf_if.load_latency = perf_dcache_lat;
|
||||
assign pipeline_perf_if.ifetch_latency = perf_icache_lat;
|
||||
assign pipeline_perf_if.load_latency = perf_dcache_lat;
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module VX_core_top
|
||||
import VX_gpu_pkg::*;
|
||||
#(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire dcr_write_valid,
|
||||
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_write_addr,
|
||||
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_write_data,
|
||||
|
||||
output wire [DCACHE_NUM_REQS-1:0] dcache_req_valid,
|
||||
output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready,
|
||||
|
||||
input wire [DCACHE_NUM_REQS-1:0] dcache_rsp_valid,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_rsp_data,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
output wire [DCACHE_NUM_REQS-1:0] dcache_rsp_ready,
|
||||
|
||||
output wire icache_req_valid,
|
||||
output wire icache_req_rw,
|
||||
output wire [ICACHE_WORD_SIZE-1:0] icache_req_byteen,
|
||||
output wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr,
|
||||
output wire [ICACHE_WORD_SIZE*8-1:0] icache_req_data,
|
||||
output wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag,
|
||||
input wire icache_req_ready,
|
||||
|
||||
input wire icache_rsp_valid,
|
||||
input wire [ICACHE_WORD_SIZE*8-1:0] icache_rsp_data,
|
||||
input wire [ICACHE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
output wire icache_rsp_ready,
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
output wire gbar_req_valid,
|
||||
output wire [`NB_WIDTH-1:0] gbar_req_id,
|
||||
output wire [`NC_WIDTH-1:0] gbar_req_size_m1,
|
||||
output wire [`NC_WIDTH-1:0] gbar_req_core_id,
|
||||
input wire gbar_req_ready,
|
||||
input wire gbar_rsp_valid,
|
||||
input wire [`NB_WIDTH-1:0] gbar_rsp_id,
|
||||
`endif
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if gbar_bus_if();
|
||||
|
||||
assign gbar_req_valid = gbar_bus_if.req_valid;
|
||||
assign gbar_req_id = gbar_bus_if.req_id;
|
||||
assign gbar_req_size_m1 = gbar_bus_if.req_size_m1;
|
||||
assign gbar_req_core_id = gbar_bus_if.req_core_id;
|
||||
assign gbar_bus_if.req_ready = gbar_req_ready;
|
||||
assign gbar_bus_if.rsp_valid = gbar_rsp_valid;
|
||||
assign gbar_bus_if.rsp_id = gbar_rsp_id;
|
||||
`endif
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
|
||||
assign dcr_bus_if.write_valid = dcr_write_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_write_addr;
|
||||
assign dcr_bus_if.write_data = dcr_write_data;
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
) dcache_bus_if[DCACHE_NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
assign dcache_req_valid[i] = dcache_bus_if[i].req_valid;
|
||||
assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw;
|
||||
assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen;
|
||||
assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr;
|
||||
assign dcache_req_data[i] = dcache_bus_if[i].req_data.data;
|
||||
assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag;
|
||||
assign dcache_bus_if[i].req_ready = dcache_req_ready[i];
|
||||
|
||||
assign dcache_bus_if[i].rsp_valid = dcache_rsp_valid[i];
|
||||
assign dcache_bus_if[i].rsp_data.tag = dcache_rsp_tag[i];
|
||||
assign dcache_bus_if[i].rsp_data.data = dcache_rsp_data[i];
|
||||
assign dcache_rsp_ready[i] = dcache_bus_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (ICACHE_TAG_WIDTH)
|
||||
) icache_bus_if();
|
||||
|
||||
assign icache_req_valid = icache_bus_if.req_valid;
|
||||
assign icache_req_rw = icache_bus_if.req_data.rw;
|
||||
assign icache_req_byteen = icache_bus_if.req_data.byteen;
|
||||
assign icache_req_addr = icache_bus_if.req_data.addr;
|
||||
assign icache_req_data = icache_bus_if.req_data.data;
|
||||
assign icache_req_tag = icache_bus_if.req_data.tag;
|
||||
assign icache_bus_if.req_ready = icache_req_ready;
|
||||
|
||||
assign icache_bus_if.rsp_valid = icache_rsp_valid;
|
||||
assign icache_bus_if.rsp_data.tag = icache_rsp_tag;
|
||||
assign icache_bus_if.rsp_data.data = icache_rsp_data;
|
||||
assign icache_rsp_ready = icache_bus_if.rsp_ready;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
`endif
|
||||
|
||||
`ifdef SCOPE
|
||||
wire [0:0] scope_reset_w = 1'b0;
|
||||
wire [0:0] scope_bus_in_w = 1'b0;
|
||||
wire [0:0] scope_bus_out_w;
|
||||
`UNUSED_VAR (scope_bus_out_w)
|
||||
`endif
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID (0)
|
||||
) core (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
.dcr_bus_if (dcr_bus_if),
|
||||
|
||||
.dcache_bus_if (dcache_bus_if),
|
||||
|
||||
.icache_bus_if (icache_bus_if),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (gbar_bus_if),
|
||||
`endif
|
||||
|
||||
.sim_ebreak (sim_ebreak),
|
||||
.sim_wb_value (sim_wb_value),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
endmodule
|
||||
304
hw/rtl/core/VX_csr_data.sv
Normal file
304
hw/rtl/core/VX_csr_data.sv
Normal file
@@ -0,0 +1,304 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`include "VX_fpu_define.vh"
|
||||
`endif
|
||||
|
||||
module VX_csr_data
|
||||
import VX_gpu_pkg::*;
|
||||
`ifdef EXT_F_ENABLE
|
||||
import VX_fpu_pkg::*;
|
||||
`endif
|
||||
#(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
VX_sfu_perf_if.slave sfu_perf_if,
|
||||
`endif
|
||||
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if [`NUM_FPU_BLOCKS],
|
||||
`endif
|
||||
|
||||
input wire [`PERF_CTR_BITS-1:0] cycles,
|
||||
input wire [`NUM_WARPS-1:0] active_warps,
|
||||
input wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks,
|
||||
|
||||
input wire read_enable,
|
||||
input wire [`UUID_WIDTH-1:0] read_uuid,
|
||||
input wire [`NW_WIDTH-1:0] read_wid,
|
||||
input wire [`VX_CSR_ADDR_BITS-1:0] read_addr,
|
||||
output wire [31:0] read_data_ro,
|
||||
output wire [31:0] read_data_rw,
|
||||
|
||||
input wire write_enable,
|
||||
input wire [`UUID_WIDTH-1:0] write_uuid,
|
||||
input wire [`NW_WIDTH-1:0] write_wid,
|
||||
input wire [`VX_CSR_ADDR_BITS-1:0] write_addr,
|
||||
input wire [31:0] write_data
|
||||
);
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (write_wid)
|
||||
`UNUSED_VAR (write_data)
|
||||
|
||||
// CSRs Write /////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FP_FLAGS_BITS-1:0] fcsr, fcsr_n;
|
||||
wire [`NUM_FPU_BLOCKS-1:0] fpu_write_enable;
|
||||
wire [`NUM_FPU_BLOCKS-1:0][`NW_WIDTH-1:0] fpu_write_wid;
|
||||
fflags_t [`NUM_FPU_BLOCKS-1:0] fpu_write_fflags;
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
assign fpu_write_enable[i] = fpu_to_csr_if[i].write_enable;
|
||||
assign fpu_write_wid[i] = fpu_to_csr_if[i].write_wid;
|
||||
assign fpu_write_fflags[i] = fpu_to_csr_if[i].write_fflags;
|
||||
end
|
||||
always @(*) begin
|
||||
fcsr_n = fcsr;
|
||||
for (integer i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
if (fpu_write_enable[i]) begin
|
||||
fcsr_n[fpu_write_wid[i]][`FP_FLAGS_BITS-1:0] = fcsr[fpu_write_wid[i]][`FP_FLAGS_BITS-1:0]
|
||||
| fpu_write_fflags[i];
|
||||
end
|
||||
end
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`VX_CSR_FFLAGS: fcsr_n[write_wid][`FP_FLAGS_BITS-1:0] = write_data[`FP_FLAGS_BITS-1:0];
|
||||
`VX_CSR_FRM: fcsr_n[write_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS] = write_data[`INST_FRM_BITS-1:0];
|
||||
`VX_CSR_FCSR: fcsr_n[write_wid] = write_data[`FP_FLAGS_BITS+`INST_FRM_BITS-1:0];
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
assign fpu_to_csr_if[i].read_frm = fcsr[fpu_to_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
fcsr <= '0;
|
||||
end else begin
|
||||
fcsr <= fcsr_n;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_FFLAGS,
|
||||
`VX_CSR_FRM,
|
||||
`VX_CSR_FCSR,
|
||||
`endif
|
||||
`VX_CSR_SATP,
|
||||
`VX_CSR_MSTATUS,
|
||||
`VX_CSR_MNSTATUS,
|
||||
`VX_CSR_MEDELEG,
|
||||
`VX_CSR_MIDELEG,
|
||||
`VX_CSR_MIE,
|
||||
`VX_CSR_MTVEC,
|
||||
`VX_CSR_MEPC,
|
||||
`VX_CSR_PMPCFG0,
|
||||
`VX_CSR_PMPADDR0: /* do nothing!*/;
|
||||
default: begin
|
||||
`ASSERT(0, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// CSRs read //////////////////////////////////////////////////////////////
|
||||
|
||||
reg [31:0] read_data_ro_r;
|
||||
reg [31:0] read_data_rw_r;
|
||||
reg read_addr_valid_r;
|
||||
|
||||
always @(*) begin
|
||||
read_data_ro_r = '0;
|
||||
read_data_rw_r = '0;
|
||||
read_addr_valid_r = 1;
|
||||
case (read_addr)
|
||||
`VX_CSR_MVENDORID : read_data_ro_r = 32'(`VENDOR_ID);
|
||||
`VX_CSR_MARCHID : read_data_ro_r = 32'(`ARCHITECTURE_ID);
|
||||
`VX_CSR_MIMPID : read_data_ro_r = 32'(`IMPLEMENTATION_ID);
|
||||
`VX_CSR_MISA : read_data_ro_r = (((`CLOG2(`XLEN)-4) << (`XLEN-2)) | `MISA_STD);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_FFLAGS : read_data_rw_r = 32'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`VX_CSR_FRM : read_data_rw_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`VX_CSR_FCSR : read_data_rw_r = 32'(fcsr[read_wid]);
|
||||
`endif
|
||||
`VX_CSR_WARP_ID : read_data_ro_r = 32'(read_wid);
|
||||
`VX_CSR_CORE_ID : read_data_ro_r = 32'(CORE_ID);
|
||||
`VX_CSR_THREAD_MASK: read_data_ro_r = 32'(thread_masks[read_wid]);
|
||||
`VX_CSR_WARP_MASK : read_data_ro_r = 32'(active_warps);
|
||||
`VX_CSR_NUM_THREADS: read_data_ro_r = 32'(`NUM_THREADS);
|
||||
`VX_CSR_NUM_WARPS : read_data_ro_r = 32'(`NUM_WARPS);
|
||||
`VX_CSR_NUM_CORES : read_data_ro_r = 32'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
`VX_CSR_MCYCLE : read_data_ro_r = 32'(cycles[31:0]);
|
||||
`VX_CSR_MCYCLE_H : read_data_ro_r = 32'(cycles[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_RESERVED : read_data_ro_r = 'x;
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
|
||||
`VX_CSR_MINSTRET : read_data_ro_r = 32'(commit_csr_if.instret[31:0]);
|
||||
`VX_CSR_MINSTRET_H : read_data_ro_r = 32'(commit_csr_if.instret[`PERF_CTR_BITS-1:32]);
|
||||
|
||||
`VX_CSR_SATP,
|
||||
`VX_CSR_MSTATUS,
|
||||
`VX_CSR_MNSTATUS,
|
||||
`VX_CSR_MEDELEG,
|
||||
`VX_CSR_MIDELEG,
|
||||
`VX_CSR_MIE,
|
||||
`VX_CSR_MTVEC,
|
||||
`VX_CSR_MEPC,
|
||||
`VX_CSR_PMPCFG0,
|
||||
`VX_CSR_PMPADDR0 : read_data_ro_r = 32'(0);
|
||||
|
||||
default: begin
|
||||
read_addr_valid_r = 0;
|
||||
if ((read_addr >= `VX_CSR_MPM_BASE && read_addr < (`VX_CSR_MPM_BASE + 32))
|
||||
|| (read_addr >= `VX_CSR_MPM_BASE_H && read_addr < (`VX_CSR_MPM_BASE_H + 32))) begin
|
||||
read_addr_valid_r = 1;
|
||||
`ifdef PERF_ENABLE
|
||||
case (base_dcrs.mpm_class)
|
||||
`VX_DCR_MPM_CLASS_CORE: begin
|
||||
case (read_addr)
|
||||
// PERF: pipeline
|
||||
`VX_CSR_MPM_IBUF_ST : read_data_ro_r = pipeline_perf_if.ibf_stalls[31:0];
|
||||
`VX_CSR_MPM_IBUF_ST_H : read_data_ro_r = 32'(pipeline_perf_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SCRB_ST : read_data_ro_r = pipeline_perf_if.scb_stalls[31:0];
|
||||
`VX_CSR_MPM_SCRB_ST_H : read_data_ro_r = 32'(pipeline_perf_if.scb_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_ALU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_ALU][31:0];
|
||||
`VX_CSR_MPM_ALU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_ALU][`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_LSU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_LSU][31:0];
|
||||
`VX_CSR_MPM_LSU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_LSU][`PERF_CTR_BITS-1:32]);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_MPM_FPU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_FPU][31:0];
|
||||
`VX_CSR_MPM_FPU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_FPU][`PERF_CTR_BITS-1:32]);
|
||||
`else
|
||||
`VX_CSR_MPM_FPU_ST : read_data_ro_r = '0;
|
||||
`VX_CSR_MPM_FPU_ST_H : read_data_ro_r = '0;
|
||||
`endif
|
||||
`VX_CSR_MPM_SFU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_SFU][31:0];
|
||||
`VX_CSR_MPM_SFU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_SFU][`PERF_CTR_BITS-1:32]);
|
||||
// PERF: memory
|
||||
`VX_CSR_MPM_IFETCHES : read_data_ro_r = pipeline_perf_if.ifetches[31:0];
|
||||
`VX_CSR_MPM_IFETCHES_H : read_data_ro_r = 32'(pipeline_perf_if.ifetches[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_LOADS : read_data_ro_r = pipeline_perf_if.loads[31:0];
|
||||
`VX_CSR_MPM_LOADS_H : read_data_ro_r = 32'(pipeline_perf_if.loads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_STORES : read_data_ro_r = pipeline_perf_if.stores[31:0];
|
||||
`VX_CSR_MPM_STORES_H : read_data_ro_r = 32'(pipeline_perf_if.stores[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_IFETCH_LAT : read_data_ro_r = pipeline_perf_if.ifetch_latency[31:0];
|
||||
`VX_CSR_MPM_IFETCH_LAT_H : read_data_ro_r = 32'(pipeline_perf_if.ifetch_latency[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_LOAD_LAT : read_data_ro_r = pipeline_perf_if.load_latency[31:0];
|
||||
`VX_CSR_MPM_LOAD_LAT_H : read_data_ro_r = 32'(pipeline_perf_if.load_latency[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`VX_DCR_MPM_CLASS_MEM: begin
|
||||
case (read_addr)
|
||||
// PERF: icache
|
||||
`VX_CSR_MPM_ICACHE_READS : read_data_ro_r = mem_perf_if.icache_reads[31:0];
|
||||
`VX_CSR_MPM_ICACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.icache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_ICACHE_MISS_R : read_data_ro_r = mem_perf_if.icache_read_misses[31:0];
|
||||
`VX_CSR_MPM_ICACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: dcache
|
||||
`VX_CSR_MPM_DCACHE_READS : read_data_ro_r = mem_perf_if.dcache_reads[31:0];
|
||||
`VX_CSR_MPM_DCACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.dcache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_WRITES : read_data_ro_r = mem_perf_if.dcache_writes[31:0];
|
||||
`VX_CSR_MPM_DCACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.dcache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_MISS_R : read_data_ro_r = mem_perf_if.dcache_read_misses[31:0];
|
||||
`VX_CSR_MPM_DCACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_MISS_W : read_data_ro_r = mem_perf_if.dcache_write_misses[31:0];
|
||||
`VX_CSR_MPM_DCACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_BANK_ST : read_data_ro_r = mem_perf_if.dcache_bank_stalls[31:0];
|
||||
`VX_CSR_MPM_DCACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_MSHR_ST : read_data_ro_r = mem_perf_if.dcache_mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_DCACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: smem
|
||||
`VX_CSR_MPM_SMEM_READS : read_data_ro_r = mem_perf_if.smem_reads[31:0];
|
||||
`VX_CSR_MPM_SMEM_READS_H : read_data_ro_r = 32'(mem_perf_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SMEM_WRITES : read_data_ro_r = mem_perf_if.smem_writes[31:0];
|
||||
`VX_CSR_MPM_SMEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SMEM_BANK_ST : read_data_ro_r = mem_perf_if.smem_bank_stalls[31:0];
|
||||
`VX_CSR_MPM_SMEM_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: l2cache
|
||||
`VX_CSR_MPM_L2CACHE_READS : read_data_ro_r = mem_perf_if.l2cache_reads[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l2cache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_WRITES : read_data_ro_r = mem_perf_if.l2cache_writes[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.l2cache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_MISS_R : read_data_ro_r = mem_perf_if.l2cache_read_misses[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.l2cache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_MISS_W : read_data_ro_r = mem_perf_if.l2cache_write_misses[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.l2cache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l2cache_bank_stalls[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.l2cache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l2cache_mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.l2cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: l3cache
|
||||
`VX_CSR_MPM_L3CACHE_READS : read_data_ro_r = mem_perf_if.l3cache_reads[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l3cache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_WRITES : read_data_ro_r = mem_perf_if.l3cache_writes[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.l3cache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_MISS_R : read_data_ro_r = mem_perf_if.l3cache_read_misses[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.l3cache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_MISS_W : read_data_ro_r = mem_perf_if.l3cache_write_misses[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.l3cache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l3cache_bank_stalls[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.l3cache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l3cache_mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.l3cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: memory
|
||||
`VX_CSR_MPM_MEM_READS : read_data_ro_r = mem_perf_if.mem_reads[31:0];
|
||||
`VX_CSR_MPM_MEM_READS_H : read_data_ro_r = 32'(mem_perf_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_MEM_WRITES : read_data_ro_r = mem_perf_if.mem_writes[31:0];
|
||||
`VX_CSR_MPM_MEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_MEM_LAT : read_data_ro_r = mem_perf_if.mem_latency[31:0];
|
||||
`VX_CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(mem_perf_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
`endif
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign read_data_ro = read_data_ro_r;
|
||||
assign read_data_rw = read_data_rw_r;
|
||||
|
||||
`UNUSED_VAR (base_dcrs)
|
||||
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [`PERF_CTR_BITS-1:0] perf_wctl_stalls = sfu_perf_if.wctl_stalls;
|
||||
`UNUSED_VAR (perf_wctl_stalls);
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
181
hw/rtl/core/VX_csr_unit.sv
Normal file
181
hw/rtl/core/VX_csr_unit.sv
Normal file
@@ -0,0 +1,181 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
VX_sfu_perf_if.slave sfu_perf_if,
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if [`NUM_FPU_BLOCKS],
|
||||
`endif
|
||||
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
VX_execute_if.slave execute_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * 32 + PID_WIDTH + 1 + 1;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
reg [NUM_LANES-1:0][31:0] csr_read_data;
|
||||
reg [31:0] csr_write_data;
|
||||
wire [31:0] csr_read_data_ro, csr_read_data_rw;
|
||||
wire [31:0] csr_req_data;
|
||||
reg csr_rd_enable;
|
||||
wire csr_wr_enable;
|
||||
wire csr_req_ready;
|
||||
|
||||
// wait for all pending instructions to complete
|
||||
assign sched_csr_if.alm_empty_wid = execute_if.data.wid;
|
||||
wire no_pending_instr = sched_csr_if.alm_empty;
|
||||
|
||||
wire csr_req_valid = execute_if.valid && no_pending_instr;
|
||||
assign execute_if.ready = csr_req_ready && no_pending_instr;
|
||||
|
||||
wire [`VX_CSR_ADDR_BITS-1:0] csr_addr = execute_if.data.imm[`VX_CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = execute_if.data.imm[`VX_CSR_ADDR_BITS +: `NRI_BITS];
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] rs1_data;
|
||||
`UNUSED_VAR (rs1_data)
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign rs1_data[i] = execute_if.data.rs1_data[i][31:0];
|
||||
end
|
||||
|
||||
wire csr_write_enable = (execute_if.data.op_type == `INST_SFU_CSRRW);
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if(pipeline_perf_if),
|
||||
.sfu_perf_if (sfu_perf_if),
|
||||
`endif
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.cycles (sched_csr_if.cycles),
|
||||
.active_warps (sched_csr_if.active_warps),
|
||||
.thread_masks (sched_csr_if.thread_masks),
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`endif
|
||||
|
||||
.read_enable (csr_req_valid && csr_rd_enable),
|
||||
.read_uuid (execute_if.data.uuid),
|
||||
.read_wid (execute_if.data.wid),
|
||||
.read_addr (csr_addr),
|
||||
.read_data_ro (csr_read_data_ro),
|
||||
.read_data_rw (csr_read_data_rw),
|
||||
|
||||
.write_enable (csr_req_valid && csr_wr_enable),
|
||||
.write_uuid (execute_if.data.uuid),
|
||||
.write_wid (execute_if.data.wid),
|
||||
.write_addr (csr_addr),
|
||||
.write_data (csr_write_data)
|
||||
);
|
||||
|
||||
// CSR read
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] wtid, gtid;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
if (PID_BITS != 0) begin
|
||||
assign wtid[i] = 32'(execute_if.data.pid * NUM_LANES + i);
|
||||
end else begin
|
||||
assign wtid[i] = 32'(i);
|
||||
end
|
||||
assign gtid[i] = (32'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (32'(execute_if.data.wid) << `NT_BITS) + wtid[i];
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
csr_rd_enable = 0;
|
||||
case (csr_addr)
|
||||
`VX_CSR_THREAD_ID : csr_read_data = wtid;
|
||||
`VX_CSR_MHARTID : csr_read_data = gtid;
|
||||
default : begin
|
||||
csr_read_data = {NUM_LANES{csr_read_data_ro | csr_read_data_rw}};
|
||||
csr_rd_enable = 1;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// CSR write
|
||||
|
||||
assign csr_req_data = execute_if.data.use_imm ? 32'(csr_imm) : rs1_data[0];
|
||||
|
||||
assign csr_wr_enable = (csr_write_enable || (| csr_req_data));
|
||||
|
||||
always @(*) begin
|
||||
case (execute_if.data.op_type)
|
||||
`INST_SFU_CSRRW: begin
|
||||
csr_write_data = csr_req_data;
|
||||
end
|
||||
`INST_SFU_CSRRS: begin
|
||||
csr_write_data = csr_read_data_rw | csr_req_data;
|
||||
end
|
||||
//`INST_SFU_CSRRC
|
||||
default: begin
|
||||
csr_write_data = csr_read_data_rw & ~csr_req_data;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// unlock the warp
|
||||
assign sched_csr_if.unlock_warp = csr_req_valid && csr_req_ready && execute_if.data.eop;
|
||||
assign sched_csr_if.unlock_wid = execute_if.data.wid;
|
||||
|
||||
// send response
|
||||
wire [NUM_LANES-1:0][31:0] csr_commit_data;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, csr_read_data, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, csr_commit_data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = `XLEN'(csr_commit_data[i]);
|
||||
end
|
||||
|
||||
endmodule
|
||||
57
hw/rtl/core/VX_dcr_data.sv
Normal file
57
hw/rtl/core/VX_dcr_data.sv
Normal file
@@ -0,0 +1,57 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_dcr_data import VX_gpu_pkg::*; (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_dcr_bus_if.slave dcr_bus_if,
|
||||
|
||||
// Outputs
|
||||
output base_dcrs_t base_dcrs
|
||||
);
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
base_dcrs_t dcrs;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (dcr_bus_if.write_valid) begin
|
||||
case (dcr_bus_if.write_addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0 : dcrs.startup_addr[31:0] <= dcr_bus_if.write_data;
|
||||
`ifdef XLEN_64
|
||||
`VX_DCR_BASE_STARTUP_ADDR1 : dcrs.startup_addr[63:32] <= dcr_bus_if.write_data;
|
||||
`endif
|
||||
`VX_DCR_BASE_MPM_CLASS : dcrs.mpm_class <= dcr_bus_if.write_data[7:0];
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign base_dcrs = dcrs;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (dcr_bus_if.write_valid) begin
|
||||
`TRACE(1, ("%d: base-dcr: state=", $time));
|
||||
trace_base_dcr(1, dcr_bus_if.write_addr);
|
||||
`TRACE(1, (", data=0x%0h\n", dcr_bus_if.write_data));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
552
hw/rtl/core/VX_decode.sv
Normal file
552
hw/rtl/core/VX_decode.sv
Normal file
@@ -0,0 +1,552 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(x) \
|
||||
x``_r = {1'b0, ``x}; \
|
||||
use_``x = 1
|
||||
|
||||
`define USED_FREG(x) \
|
||||
x``_r = {1'b1, ``x}; \
|
||||
use_``x = 1
|
||||
`else
|
||||
`define USED_IREG(x) \
|
||||
x``_r = ``x; \
|
||||
use_``x = 1
|
||||
`endif
|
||||
|
||||
module VX_decode #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_fetch_if.slave fetch_if,
|
||||
|
||||
// outputs
|
||||
VX_decode_if.master decode_if,
|
||||
VX_decode_sched_if.master decode_sched_if
|
||||
);
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
reg [`INST_MOD_BITS-1:0] op_mod;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [`XLEN-1:0] imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
||||
reg is_wstall;
|
||||
|
||||
wire [31:0] instr = fetch_if.data.instr;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [1:0] func2 = instr[26:25];
|
||||
wire [2:0] func3 = instr[14:12];
|
||||
wire [4:0] func5 = instr[31:27];
|
||||
wire [6:0] func7 = instr[31:25];
|
||||
wire [11:0] u_12 = instr[31:20];
|
||||
|
||||
wire [4:0] rd = instr[11:7];
|
||||
wire [4:0] rs1 = instr[19:15];
|
||||
wire [4:0] rs2 = instr[24:20];
|
||||
wire [4:0] rs3 = instr[31:27];
|
||||
|
||||
`UNUSED_VAR (func2)
|
||||
`UNUSED_VAR (func5)
|
||||
`UNUSED_VAR (rs3)
|
||||
`UNUSED_VAR (use_rd)
|
||||
`UNUSED_VAR (use_rs1)
|
||||
`UNUSED_VAR (use_rs2)
|
||||
`UNUSED_VAR (use_rs3)
|
||||
|
||||
wire is_itype_sh = func3[0] && ~func3[1];
|
||||
|
||||
wire [19:0] ui_imm = instr[31:12];
|
||||
`ifdef XLEN_64
|
||||
wire [11:0] i_imm = is_itype_sh ? {6'b0, instr[25:20]} : u_12;
|
||||
wire [11:0] iw_imm = is_itype_sh ? {7'b0, instr[24:20]} : u_12;
|
||||
`else
|
||||
wire [11:0] i_imm = is_itype_sh ? {7'b0, instr[24:20]} : u_12;
|
||||
`endif
|
||||
wire [11:0] s_imm = {func7, rd};
|
||||
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
|
||||
reg [`INST_ALU_BITS-1:0] r_type;
|
||||
always @(*) begin
|
||||
case (func3)
|
||||
3'h0: r_type = (opcode[5] && func7[5]) ? `INST_ALU_SUB : `INST_ALU_ADD;
|
||||
3'h1: r_type = `INST_ALU_SLL;
|
||||
3'h2: r_type = `INST_ALU_SLT;
|
||||
3'h3: r_type = `INST_ALU_SLTU;
|
||||
3'h4: r_type = `INST_ALU_XOR;
|
||||
3'h5: r_type = func7[5] ? `INST_ALU_SRA : `INST_ALU_SRL;
|
||||
3'h6: r_type = `INST_ALU_OR;
|
||||
3'h7: r_type = `INST_ALU_AND;
|
||||
endcase
|
||||
end
|
||||
|
||||
reg [`INST_BR_BITS-1:0] b_type;
|
||||
always @(*) begin
|
||||
case (func3)
|
||||
3'h0: b_type = `INST_BR_EQ;
|
||||
3'h1: b_type = `INST_BR_NE;
|
||||
3'h4: b_type = `INST_BR_LT;
|
||||
3'h5: b_type = `INST_BR_GE;
|
||||
3'h6: b_type = `INST_BR_LTU;
|
||||
3'h7: b_type = `INST_BR_GEU;
|
||||
default: b_type = 'x;
|
||||
endcase
|
||||
end
|
||||
|
||||
reg [`INST_BR_BITS-1:0] s_type;
|
||||
always @(*) begin
|
||||
case (u_12)
|
||||
12'h000: s_type = `INST_OP_BITS'(`INST_BR_ECALL);
|
||||
12'h001: s_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h002: s_type = `INST_OP_BITS'(`INST_BR_URET);
|
||||
12'h102: s_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h302: s_type = `INST_OP_BITS'(`INST_BR_MRET);
|
||||
default: s_type = 'x;
|
||||
endcase
|
||||
end
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
reg [`INST_M_BITS-1:0] m_type;
|
||||
always @(*) begin
|
||||
case (func3)
|
||||
3'h0: m_type = `INST_M_MUL;
|
||||
3'h1: m_type = `INST_M_MULH;
|
||||
3'h2: m_type = `INST_M_MULHSU;
|
||||
3'h3: m_type = `INST_M_MULHU;
|
||||
3'h4: m_type = `INST_M_DIV;
|
||||
3'h5: m_type = `INST_M_DIVU;
|
||||
3'h6: m_type = `INST_M_REM;
|
||||
3'h7: m_type = `INST_M_REMU;
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = '0;
|
||||
op_type = 'x;
|
||||
op_mod = '0;
|
||||
rd_r = '0;
|
||||
rs1_r = '0;
|
||||
rs2_r = '0;
|
||||
rs3_r = '0;
|
||||
imm = 'x;
|
||||
use_imm = 0;
|
||||
use_PC = 0;
|
||||
use_rd = 0;
|
||||
use_rs1 = 0;
|
||||
use_rs2 = 0;
|
||||
use_rs3 = 0;
|
||||
is_wstall = 0;
|
||||
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{(`XLEN-12){i_imm[11]}}, i_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
end
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
`INST_I_W: begin
|
||||
// ADDIW, SLLIW, SRLIW, SRAIW
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
op_mod[2] = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{(`XLEN-12){iw_imm[11]}}, iw_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R_W: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
// MULW, DIVW, DIVUW, REMW, REMUW
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
// ADDW, SUBW, SLLW, SRLW, SRAW
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
end
|
||||
op_mod[2] = 1;
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`endif
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JAL);
|
||||
op_mod[0] = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-21){jal_imm[20]}}, jal_imm};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JALR);
|
||||
op_mod[0] = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(b_type);
|
||||
op_mod[0] = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-13){b_imm[12]}}, b_imm};
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_FENCE: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_LSU_FENCE;
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CSR(func3[1:0]));
|
||||
use_rd = 1;
|
||||
is_wstall = 1;
|
||||
use_imm = func3[2];
|
||||
imm[`VX_CSR_ADDR_BITS-1:0] = u_12; // addr
|
||||
`USED_IREG (rd);
|
||||
if (func3[2]) begin
|
||||
imm[`VX_CSR_ADDR_BITS +: `NRI_BITS] = rs1; // imm
|
||||
end else begin
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
end else begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(s_type);
|
||||
op_mod[0] = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = `XLEN'd4;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FL,
|
||||
`endif
|
||||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b0, func3});
|
||||
use_rd = 1;
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
use_imm = 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rd);
|
||||
end else
|
||||
`endif
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FS,
|
||||
`endif
|
||||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b1, func3});
|
||||
imm = {{(`XLEN-12){s_imm[11]}}, s_imm};
|
||||
use_imm = 1;
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rs2);
|
||||
end else
|
||||
`endif
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FMADD,
|
||||
`INST_FMSUB,
|
||||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `INST_OP_BITS'({2'b11, opcode[3:2]});
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
`USED_FREG (rs3);
|
||||
end
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
`ifdef FLEN_64
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
`endif
|
||||
use_rd = 1;
|
||||
case (func5)
|
||||
5'b00000, // FADD
|
||||
5'b00001, // FSUB
|
||||
5'b00010, // FMUL
|
||||
5'b00011: begin // FDIV
|
||||
op_type = `INST_OP_BITS'(func5[1:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
5'b00100: begin
|
||||
// NCP: FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = `INST_MOD_BITS'(func3[1:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
5'b00101: begin
|
||||
// NCP: FMIN=6, FMAX=7
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = func3[0] ? 7 : 6;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
`ifdef FLEN_64
|
||||
5'b01000: begin
|
||||
// CVT.S.D, CVT.D.S
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_F2F);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
`endif
|
||||
5'b01011: begin
|
||||
// SQRT
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b10100: begin
|
||||
// CMP
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
5'b11000: begin
|
||||
// CVT.W.X, CVT.WU.X
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_F2U) : `INST_OP_BITS'(`INST_FPU_F2I);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
`endif
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11010: begin
|
||||
// CVT.X.W, CVT.X.WU
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_U2F) : `INST_OP_BITS'(`INST_FPU_I2F);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
`endif
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
5'b11100: begin
|
||||
if (func3[0]) begin
|
||||
// NCP: FCLASS=3
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 3;
|
||||
end else begin
|
||||
// NCP: FMV.X.W=4
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 4;
|
||||
end
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11110: begin
|
||||
// NCP: FMV.W.X=5
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 5;
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
`INST_EXT1: begin
|
||||
case (func7)
|
||||
7'h00: begin
|
||||
ex_type = `EX_SFU;
|
||||
is_wstall = 1;
|
||||
case (func3)
|
||||
3'h0: begin // TMC
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_TMC);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h1: begin // WSPAWN
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_WSPAWN);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
3'h2: begin // SPLIT
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_SPLIT);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
3'h3: begin // JOIN
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_JOIN);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h4: begin // BAR
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_BAR);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
3'h5: begin // PRED
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_PRED);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_EXT2: begin
|
||||
case (func3)
|
||||
3'h1: begin
|
||||
case (func2)
|
||||
2'h0: begin // CMOV
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CMOV);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
`USED_IREG (rs3);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
// disable write to integer register r0
|
||||
wire wb = use_rd && (rd_r != 0);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (0)
|
||||
) req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (fetch_if.valid),
|
||||
.ready_in (fetch_if.ready),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_mod, use_PC, imm, use_imm, wb, rd_r, rs1_r, rs2_r, rs3_r}),
|
||||
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod, decode_if.data.use_PC, decode_if.data.imm, decode_if.data.use_imm, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}),
|
||||
.valid_out (decode_if.valid),
|
||||
.ready_out (decode_if.ready)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
|
||||
assign decode_sched_if.valid = fetch_fire;
|
||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||
assign decode_sched_if.is_wstall = is_wstall;
|
||||
|
||||
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, decode_if.data.PC, instr));
|
||||
trace_ex_type(1, decode_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod, decode_if.data.rd, decode_if.data.rs2, decode_if.data.use_imm, decode_if.data.imm);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=0x%0h, opds=%b%b%b%b, use_pc=%b, use_imm=%b (#%0d)\n",
|
||||
decode_if.data.op_mod, decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, decode_if.data.imm, use_rd, use_rs1, use_rs2, use_rs3, decode_if.data.use_PC, decode_if.data.use_imm, decode_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
227
hw/rtl/core/VX_dispatch.sv
Normal file
227
hw/rtl/core/VX_dispatch.sv
Normal file
@@ -0,0 +1,227 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS],
|
||||
`endif
|
||||
// inputs
|
||||
VX_operands_if.slave operands_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_dispatch_if.master alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if.master fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0][`NT_WIDTH-1:0] last_active_tid;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign tids[i] = `NT_WIDTH'(i);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_find_first #(
|
||||
.N (`NUM_THREADS),
|
||||
.DATAW (`NT_WIDTH),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.valid_in (operands_if[i].data.tmask),
|
||||
.data_in (tids),
|
||||
.data_out (last_active_tid[i]),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
end
|
||||
|
||||
// ALU dispatch
|
||||
|
||||
VX_operands_if alu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign alu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_ALU);
|
||||
assign alu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.valid_in (alu_operands_if[i].valid),
|
||||
.ready_in (alu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(alu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (alu_dispatch_if[i].data),
|
||||
.valid_out (alu_dispatch_if[i].valid),
|
||||
.ready_out (alu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
// LSU dispatch
|
||||
|
||||
VX_operands_if lsu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign lsu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_LSU);
|
||||
assign lsu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
.valid_in (lsu_operands_if[i].valid),
|
||||
.ready_in (lsu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(lsu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (lsu_dispatch_if[i].data),
|
||||
.valid_out (lsu_dispatch_if[i].valid),
|
||||
.ready_out (lsu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
// FPU dispatch
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
||||
VX_operands_if fpu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign fpu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_FPU);
|
||||
assign fpu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.valid_in (fpu_operands_if[i].valid),
|
||||
.ready_in (fpu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(fpu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (fpu_dispatch_if[i].data),
|
||||
.valid_out (fpu_dispatch_if[i].valid),
|
||||
.ready_out (fpu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
`endif
|
||||
|
||||
// SFU dispatch
|
||||
|
||||
VX_operands_if sfu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign sfu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_SFU);
|
||||
assign sfu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) sfu_buffer (
|
||||
.clk (clk),
|
||||
.reset (sfu_reset),
|
||||
.valid_in (sfu_operands_if[i].valid),
|
||||
.ready_in (sfu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(sfu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (sfu_dispatch_if[i].data),
|
||||
.valid_out (sfu_dispatch_if[i].valid),
|
||||
.ready_out (sfu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
// can take next request?
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign operands_if[i].ready = (alu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_ALU))
|
||||
|| (lsu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_LSU))
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| (fpu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_FPU))
|
||||
`endif
|
||||
|| (sfu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_SFU));
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_n, perf_stalls_r;
|
||||
wire [`ISSUE_WIDTH-1:0] operands_stall;
|
||||
wire [`ISSUE_WIDTH-1:0][`EX_BITS-1:0] operands_ex_type;
|
||||
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign operands_stall[i] = operands_if[i].valid && ~operands_if[i].ready;
|
||||
assign operands_ex_type[i] = operands_if[i].data.ex_type;
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
perf_stalls_n = perf_stalls_r;
|
||||
for (integer i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
if (operands_stall[i]) begin
|
||||
perf_stalls_n[operands_ex_type[i]] += `PERF_CTR_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_stalls_r <= '0;
|
||||
end else begin
|
||||
perf_stalls_r <= perf_stalls_n;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
|
||||
assign perf_stalls[i] = perf_stalls_r[i];
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), operands_if[i].data.PC));
|
||||
trace_ex_type(1, operands_if[i].data.ex_type);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.op_mod, operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
|
||||
`TRACE_ARRAY1D(1, operands_if[i].data.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, operands_if[i].data.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, operands_if[i].data.rs3_data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
256
hw/rtl/core/VX_dispatch_unit.sv
Normal file
256
hw/rtl/core/VX_dispatch_unit.sv
Normal file
@@ -0,0 +1,256 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||
parameter BLOCK_SIZE = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_execute_if.master execute_if [BLOCK_SIZE]
|
||||
|
||||
);
|
||||
`STATIC_ASSERT ((`NUM_THREADS == NUM_LANES * (`NUM_THREADS / NUM_LANES)), ("invalid parameter"))
|
||||
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
||||
localparam NUM_PACKETS = `NUM_THREADS / NUM_LANES;
|
||||
localparam PID_BITS = `CLOG2(NUM_PACKETS);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam BATCH_COUNT = `ISSUE_WIDTH / BLOCK_SIZE;
|
||||
localparam BATCH_COUNT_W= `LOG2UP(BATCH_COUNT);
|
||||
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT/2));
|
||||
|
||||
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
|
||||
localparam DATA_REGS_OFF = 0;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] dispatch_valid;
|
||||
wire [`ISSUE_WIDTH-1:0][IN_DATAW-1:0] dispatch_data;
|
||||
wire [`ISSUE_WIDTH-1:0] dispatch_ready;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign dispatch_valid[i] = dispatch_if[i].valid;
|
||||
assign dispatch_data[i] = dispatch_if[i].data;
|
||||
assign dispatch_if[i].ready = dispatch_ready[i];
|
||||
end
|
||||
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices;
|
||||
wire [BLOCK_SIZE-1:0] block_ready;
|
||||
wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask;
|
||||
wire [BLOCK_SIZE-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] block_regs;
|
||||
wire [BLOCK_SIZE-1:0][PID_WIDTH-1:0] block_pid;
|
||||
wire [BLOCK_SIZE-1:0] block_sop;
|
||||
wire [BLOCK_SIZE-1:0] block_eop;
|
||||
wire [BLOCK_SIZE-1:0] block_done;
|
||||
|
||||
wire batch_done = (& block_done);
|
||||
|
||||
logic [BATCH_COUNT_W-1:0] batch_idx;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
batch_idx <= '0;
|
||||
end else if (batch_done) begin
|
||||
batch_idx <= batch_idx + BATCH_COUNT_W'(1);
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign batch_idx = 0;
|
||||
`UNUSED_VAR (batch_done)
|
||||
end
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||
assign issue_indices[block_idx] = issue_idx;
|
||||
|
||||
wire valid_p, ready_p;
|
||||
|
||||
if (`NUM_THREADS != NUM_LANES) begin
|
||||
reg [NUM_PACKETS-1:0] sent_mask_p;
|
||||
wire [PID_WIDTH-1:0] start_p_n, start_p, end_p;
|
||||
wire dispatch_valid_r;
|
||||
reg is_first_p;
|
||||
|
||||
wire fire_p = valid_p && ready_p;
|
||||
|
||||
wire is_last_p = (start_p == end_p);
|
||||
|
||||
wire fire_eop = fire_p && is_last_p;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
sent_mask_p <= '0;
|
||||
is_first_p <= 1;
|
||||
end else begin
|
||||
if ((BATCH_COUNT != 1) ? batch_done : fire_eop) begin
|
||||
sent_mask_p <= '0;
|
||||
is_first_p <= 1;
|
||||
end else if (fire_p) begin
|
||||
sent_mask_p[start_p] <= 1;
|
||||
is_first_p <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0][NUM_LANES-1:0] per_packet_tmask;
|
||||
wire [NUM_PACKETS-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] per_packet_regs;
|
||||
|
||||
wire [`NUM_THREADS-1:0] dispatch_tmask = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
for (genvar j = 0; j < NUM_LANES; ++j) begin
|
||||
localparam k = i * NUM_LANES + j;
|
||||
assign per_packet_tmask[i][j] = dispatch_tmask[k];
|
||||
assign per_packet_regs[i][0][j] = dispatch_rs1_data[k];
|
||||
assign per_packet_regs[i][1][j] = dispatch_rs2_data[k];
|
||||
assign per_packet_regs[i][2][j] = dispatch_rs3_data[k];
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0] packet_valids;
|
||||
wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids;
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
assign packet_valids[i] = (| per_packet_tmask[i]);
|
||||
assign packet_ids[i] = PID_WIDTH'(i);
|
||||
end
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_PACKETS),
|
||||
.DATAW (PID_WIDTH),
|
||||
.REVERSE (0)
|
||||
) find_first (
|
||||
.valid_in (packet_valids & ~sent_mask_p),
|
||||
.data_in (packet_ids),
|
||||
.data_out (start_p_n),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_PACKETS),
|
||||
.DATAW (PID_WIDTH),
|
||||
.REVERSE (1)
|
||||
) find_last (
|
||||
.valid_in (packet_valids),
|
||||
.data_in (packet_ids),
|
||||
.data_out (end_p),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + PID_WIDTH),
|
||||
.RESETW (1),
|
||||
.DEPTH (FANOUT_ENABLE ? 1 : 0)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset || fire_p), // should flush on fire
|
||||
.enable (1'b1),
|
||||
.data_in ({dispatch_valid[issue_idx], start_p_n}),
|
||||
.data_out ({dispatch_valid_r, start_p})
|
||||
);
|
||||
|
||||
wire [NUM_LANES-1:0] tmask_p = per_packet_tmask[start_p];
|
||||
wire [2:0][NUM_LANES-1:0][`XLEN-1:0] regs_p = per_packet_regs[start_p];
|
||||
|
||||
wire block_enable = (BATCH_COUNT == 1 || ~(& sent_mask_p));
|
||||
|
||||
assign valid_p = dispatch_valid_r && block_enable;
|
||||
assign block_tmask[block_idx] = tmask_p;
|
||||
assign block_regs[block_idx] = regs_p;
|
||||
assign block_pid[block_idx] = start_p;
|
||||
assign block_sop[block_idx] = is_first_p;
|
||||
assign block_eop[block_idx] = is_last_p;
|
||||
if (FANOUT_ENABLE) begin
|
||||
assign block_ready[block_idx] = dispatch_valid_r && ready_p && block_enable;
|
||||
end else begin
|
||||
assign block_ready[block_idx] = ready_p && block_enable;
|
||||
end
|
||||
assign block_done[block_idx] = ~dispatch_valid[issue_idx] || fire_eop;
|
||||
end else begin
|
||||
assign valid_p = dispatch_valid[issue_idx];
|
||||
assign block_tmask[block_idx] = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS];
|
||||
assign block_regs[block_idx][0] = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
assign block_regs[block_idx][1] = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
assign block_regs[block_idx][2] = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
assign block_pid[block_idx] = '0;
|
||||
assign block_sop[block_idx] = 1'b1;
|
||||
assign block_eop[block_idx] = 1'b1;
|
||||
assign block_ready[block_idx] = ready_p;
|
||||
assign block_done[block_idx] = ~valid_p || ready_p;
|
||||
end
|
||||
|
||||
wire [ISSUE_IDX_W-1:0] wsi;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
if (BLOCK_SIZE != 1) begin
|
||||
assign wsi = {batch_idx, BLOCK_SIZE_W'(block_idx)};
|
||||
end else begin
|
||||
assign wsi = batch_idx;
|
||||
end
|
||||
end else begin
|
||||
assign wsi = block_idx;
|
||||
end
|
||||
|
||||
`RESET_RELAY(buf_out_reset, reset);
|
||||
|
||||
wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], wsi);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (OUT_DATAW),
|
||||
.SIZE (`OUT_REG_TO_EB_SIZE(OUT_REG)),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(OUT_REG))
|
||||
) buf_out (
|
||||
.clk (clk),
|
||||
.reset (buf_out_reset),
|
||||
.valid_in (valid_p),
|
||||
.ready_in (ready_p),
|
||||
.data_in ({
|
||||
dispatch_data[issue_idx][IN_DATAW-1 : DATA_TMASK_OFF+`NUM_THREADS+ISSUE_WIS_W],
|
||||
block_wid,
|
||||
block_tmask[block_idx],
|
||||
dispatch_data[issue_idx][DATA_TMASK_OFF-1 : DATA_REGS_OFF + 3 * `NUM_THREADS * `XLEN],
|
||||
block_regs[block_idx][0],
|
||||
block_regs[block_idx][1],
|
||||
block_regs[block_idx][2],
|
||||
block_pid[block_idx],
|
||||
block_sop[block_idx],
|
||||
block_eop[block_idx]}),
|
||||
.data_out (execute_if[block_idx].data),
|
||||
.valid_out (execute_if[block_idx].valid),
|
||||
.ready_out (execute_if[block_idx].ready)
|
||||
);
|
||||
end
|
||||
|
||||
reg [`ISSUE_WIDTH-1:0] ready_in;
|
||||
always @(*) begin
|
||||
ready_in = 0;
|
||||
for (integer i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
ready_in[issue_indices[i]] = block_ready[i] && block_eop[i];
|
||||
end
|
||||
end
|
||||
assign dispatch_ready = ready_in;
|
||||
|
||||
endmodule
|
||||
137
hw/rtl/core/VX_execute.sv
Normal file
137
hw/rtl/core/VX_execute.sv
Normal file
@@ -0,0 +1,137 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
// Dcache interface
|
||||
VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS],
|
||||
|
||||
// commit interface
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
|
||||
// fetch interface
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if.slave fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master fpu_commit_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
|
||||
VX_dispatch_if.slave alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master alu_commit_if [`ISSUE_WIDTH],
|
||||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS],
|
||||
|
||||
VX_dispatch_if.slave lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master lsu_commit_if [`ISSUE_WIDTH],
|
||||
|
||||
VX_dispatch_if.slave sfu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master sfu_commit_if [`ISSUE_WIDTH],
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if fpu_to_csr_if[`NUM_FPU_BLOCKS]();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.dispatch_if (alu_dispatch_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.commit_if (alu_commit_if)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) lsu_unit (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
.cache_bus_if (dcache_bus_if),
|
||||
.dispatch_if (lsu_dispatch_if),
|
||||
.commit_if (lsu_commit_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.dispatch_if (fpu_dispatch_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.commit_if (fpu_commit_if)
|
||||
);
|
||||
`endif
|
||||
|
||||
VX_sfu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) sfu_unit (
|
||||
.clk (clk),
|
||||
.reset (sfu_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if (pipeline_perf_if),
|
||||
`endif
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
.dispatch_if (sfu_dispatch_if),
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`endif
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.commit_if (sfu_commit_if)
|
||||
);
|
||||
|
||||
// simulation helper signal to get RISC-V tests Pass/Fail status
|
||||
assign sim_ebreak = alu_dispatch_if[0].valid && alu_dispatch_if[0].ready
|
||||
&& alu_dispatch_if[0].data.wis == 0
|
||||
&& `INST_ALU_IS_BR(alu_dispatch_if[0].data.op_mod)
|
||||
&& (`INST_BR_BITS'(alu_dispatch_if[0].data.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_BITS'(alu_dispatch_if[0].data.op_type) == `INST_BR_ECALL);
|
||||
|
||||
endmodule
|
||||
184
hw/rtl/core/VX_fetch.sv
Normal file
184
hw/rtl/core/VX_fetch.sv
Normal file
@@ -0,0 +1,184 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Icache interface
|
||||
VX_mem_bus_if.master icache_bus_if,
|
||||
|
||||
// inputs
|
||||
VX_schedule_if.slave schedule_if,
|
||||
|
||||
// outputs
|
||||
VX_fetch_if.master fetch_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
|
||||
|
||||
wire icache_req_valid;
|
||||
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
|
||||
wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag;
|
||||
wire icache_req_ready;
|
||||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] req_tag, rsp_tag;
|
||||
|
||||
wire icache_req_fire = icache_req_valid && icache_req_ready;
|
||||
|
||||
wire [ISW_WIDTH-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
|
||||
|
||||
assign req_tag = schedule_if.data.wid;
|
||||
|
||||
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
|
||||
|
||||
wire [`XLEN-1:0] rsp_PC;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`XLEN + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (icache_req_fire),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (req_tag),
|
||||
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
||||
.raddr (rsp_tag),
|
||||
.rdata ({rsp_PC, rsp_tmask})
|
||||
);
|
||||
|
||||
// Ensure that the ibuffer doesn't fill up.
|
||||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
|
||||
// This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus.
|
||||
wire [`ISSUE_WIDTH-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_pending_size #(
|
||||
.SIZE (`IBUF_SIZE)
|
||||
) pending_reads (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (icache_req_fire && schedule_isw == i),
|
||||
.decr (fetch_if.ibuf_pop[i]),
|
||||
.full (pending_ibuf_full[i]),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
|
||||
// Icache Request
|
||||
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
|
||||
assign icache_req_valid = schedule_if.valid && ibuf_ready;
|
||||
assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2];
|
||||
assign icache_req_tag = {schedule_if.data.uuid, req_tag};
|
||||
assign schedule_if.ready = icache_req_ready && ibuf_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (ICACHE_ADDR_WIDTH + ICACHE_TAG_WIDTH),
|
||||
.SIZE (2),
|
||||
.OUT_REG (1) // external bus should be registered
|
||||
) req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (icache_req_valid),
|
||||
.ready_in (icache_req_ready),
|
||||
.data_in ({icache_req_addr, icache_req_tag}),
|
||||
.data_out ({icache_bus_if.req_data.addr, icache_bus_if.req_data.tag}),
|
||||
.valid_out (icache_bus_if.req_valid),
|
||||
.ready_out (icache_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign icache_bus_if.req_data.rw = 0;
|
||||
assign icache_bus_if.req_data.byteen = 4'b1111;
|
||||
assign icache_bus_if.req_data.data = '0;
|
||||
|
||||
// Icache Response
|
||||
|
||||
assign fetch_if.valid = icache_bus_if.rsp_valid;
|
||||
assign fetch_if.data.tmask = rsp_tmask;
|
||||
assign fetch_if.data.wid = rsp_tag;
|
||||
assign fetch_if.data.PC = rsp_PC;
|
||||
assign fetch_if.data.instr = icache_bus_if.rsp_data.data;
|
||||
assign fetch_if.data.uuid = rsp_uuid;
|
||||
assign icache_bus_if.rsp_ready = fetch_if.ready;
|
||||
|
||||
`ifdef DBG_SCOPE_FETCH
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (3*`UUID_WIDTH + 108)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
|
||||
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
|
||||
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_ICACHE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
always @(posedge clk) begin
|
||||
if (schedule_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, schedule_if.data.PC, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
end
|
||||
if (fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, fetch_if.data.PC, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
129
hw/rtl/core/VX_gather_unit.sv
Normal file
129
hw/rtl/core/VX_gather_unit.sv
Normal file
@@ -0,0 +1,129 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gather_unit import VX_gpu_pkg::*; #(
|
||||
parameter BLOCK_SIZE = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave commit_in_if [BLOCK_SIZE],
|
||||
|
||||
// outputs
|
||||
VX_commit_if.master commit_out_if [`ISSUE_WIDTH]
|
||||
|
||||
);
|
||||
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam DATA_WIS_OFF = DATAW - (`UUID_WIDTH + `NW_WIDTH);
|
||||
|
||||
wire [BLOCK_SIZE-1:0] commit_in_valid;
|
||||
wire [BLOCK_SIZE-1:0][DATAW-1:0] commit_in_data;
|
||||
wire [BLOCK_SIZE-1:0] commit_in_ready;
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_IDX_W-1:0] commit_in_wsi;
|
||||
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
assign commit_in_valid[i] = commit_in_if[i].valid;
|
||||
assign commit_in_data[i] = commit_in_if[i].data;
|
||||
assign commit_in_if[i].ready = commit_in_ready[i];
|
||||
if (BLOCK_SIZE != `ISSUE_WIDTH) begin
|
||||
if (BLOCK_SIZE != 1) begin
|
||||
assign commit_in_wsi[i] = {commit_in_data[i][DATA_WIS_OFF+BLOCK_SIZE_W +: (ISSUE_IDX_W-BLOCK_SIZE_W)], BLOCK_SIZE_W'(i)};
|
||||
end else begin
|
||||
assign commit_in_wsi[i] = commit_in_data[i][DATA_WIS_OFF +: ISSUE_IDX_W];
|
||||
end
|
||||
end else begin
|
||||
assign commit_in_wsi[i] = BLOCK_SIZE_W'(i);
|
||||
end
|
||||
end
|
||||
|
||||
reg [`ISSUE_WIDTH-1:0] commit_out_valid;
|
||||
reg [`ISSUE_WIDTH-1:0][DATAW-1:0] commit_out_data;
|
||||
wire [`ISSUE_WIDTH-1:0] commit_out_ready;
|
||||
|
||||
always @(*) begin
|
||||
commit_out_valid = '0;
|
||||
for (integer i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
commit_out_data[i] = 'x;
|
||||
end
|
||||
for (integer i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
commit_out_valid[commit_in_wsi[i]] = commit_in_valid[i];
|
||||
commit_out_data[commit_in_wsi[i]] = commit_in_data[i];
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
assign commit_in_ready[i] = commit_out_ready[commit_in_wsi[i]];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_tmp_if();
|
||||
|
||||
`RESET_RELAY(commit_out_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`OUT_REG_TO_EB_SIZE(OUT_REG)),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(OUT_REG))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (commit_out_reset),
|
||||
.valid_in (commit_out_valid[i]),
|
||||
.ready_in (commit_out_ready[i]),
|
||||
.data_in (commit_out_data[i]),
|
||||
.data_out (commit_tmp_if.data),
|
||||
.valid_out (commit_tmp_if.valid),
|
||||
.ready_out (commit_tmp_if.ready)
|
||||
);
|
||||
|
||||
logic [`NUM_THREADS-1:0] commit_tmask_r;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_r;
|
||||
if (PID_BITS != 0) begin
|
||||
always @(*) begin
|
||||
commit_tmask_r = '0;
|
||||
commit_data_r = 'x;
|
||||
for (integer j = 0; j < NUM_LANES; ++j) begin
|
||||
commit_tmask_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j];
|
||||
commit_data_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign commit_tmask_r = commit_tmp_if.data.tmask;
|
||||
assign commit_data_r = commit_tmp_if.data.data;
|
||||
end
|
||||
|
||||
assign commit_out_if[i].valid = commit_tmp_if.valid;
|
||||
assign commit_out_if[i].data = {
|
||||
commit_tmp_if.data.uuid,
|
||||
commit_tmp_if.data.wid,
|
||||
commit_tmask_r,
|
||||
commit_tmp_if.data.PC,
|
||||
commit_tmp_if.data.wb,
|
||||
commit_tmp_if.data.rd,
|
||||
commit_data_r,
|
||||
1'b0, // PID
|
||||
commit_tmp_if.data.sop,
|
||||
commit_tmp_if.data.eop
|
||||
};
|
||||
assign commit_tmp_if.ready = commit_out_if[i].ready;
|
||||
end
|
||||
|
||||
endmodule
|
||||
73
hw/rtl/core/VX_ibuffer.sv
Normal file
73
hw/rtl/core/VX_ibuffer.sv
Normal file
@@ -0,0 +1,73 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_ibuffer import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_ibuffer_if.master ibuffer_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4);
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] ibuf_ready_in;
|
||||
|
||||
wire [ISW_WIDTH-1:0] decode_isw = wid_to_isw(decode_if.data.wid);
|
||||
wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid);
|
||||
|
||||
assign decode_if.ready = ibuf_ready_in[decode_isw];
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
.OUT_REG (1)
|
||||
) instr_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (decode_if.valid && decode_isw == i),
|
||||
.ready_in (ibuf_ready_in[i]),
|
||||
.data_in ({
|
||||
decode_if.data.uuid,
|
||||
decode_wis,
|
||||
decode_if.data.tmask,
|
||||
decode_if.data.ex_type,
|
||||
decode_if.data.op_type,
|
||||
decode_if.data.op_mod,
|
||||
decode_if.data.wb,
|
||||
decode_if.data.use_PC,
|
||||
decode_if.data.use_imm,
|
||||
decode_if.data.PC,
|
||||
decode_if.data.imm,
|
||||
decode_if.data.rd,
|
||||
decode_if.data.rs1,
|
||||
decode_if.data.rs2,
|
||||
decode_if.data.rs3}),
|
||||
.data_out(ibuffer_if[i].data),
|
||||
.valid_out (ibuffer_if[i].valid),
|
||||
.ready_out(ibuffer_if[i].ready)
|
||||
);
|
||||
|
||||
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||
end
|
||||
|
||||
endmodule
|
||||
191
hw/rtl/core/VX_int_unit.sv
Normal file
191
hw/rtl/core/VX_int_unit.sv
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_int_unit #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter BLOCK_IDX = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if,
|
||||
VX_branch_ctl_if.master branch_ctl_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam LANE_WIDTH = `UP(LANE_BITS);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam SHIFT_IMM_BITS = `CLOG2(`XLEN);
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] add_result;
|
||||
wire [NUM_LANES-1:0][`XLEN:0] sub_result; // +1 bit for branch compare
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] shr_result;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] msc_result;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] add_result_w;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] sub_result_w;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] shr_result_w;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] msc_result_w;
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] alu_result;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_result_r;
|
||||
|
||||
`ifdef XLEN_64
|
||||
wire is_alu_w = `INST_ALU_IS_W(execute_if.data.op_mod);
|
||||
`else
|
||||
wire is_alu_w = 0;
|
||||
`endif
|
||||
|
||||
`UNUSED_VAR (execute_if.data.op_mod)
|
||||
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(execute_if.data.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(execute_if.data.op_type);
|
||||
wire is_br_op = `INST_ALU_IS_BR(execute_if.data.op_mod);
|
||||
wire is_sub_op = `INST_ALU_IS_SUB(alu_op);
|
||||
wire is_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire [1:0] op_class = is_br_op ? `INST_BR_CLASS(alu_op) : `INST_ALU_CLASS(alu_op);
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1 = execute_if.data.rs1_data;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2 = execute_if.data.rs2_data;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1_PC = execute_if.data.use_PC ? {NUM_LANES{execute_if.data.PC}} : alu_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.use_imm ? {NUM_LANES{execute_if.data.imm}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.use_imm && ~is_br_op) ? {NUM_LANES{execute_if.data.imm}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
assign add_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] sub_in1 = {is_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
wire [`XLEN:0] sub_in2 = {is_signed & alu_in2_br[i][`XLEN-1], alu_in2_br[i]};
|
||||
assign sub_result[i] = sub_in1 - sub_in2;
|
||||
assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
assign shr_result[i] = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
|
||||
wire [32:0] shr_in1_w = {is_signed && alu_in1[i][31], alu_in1[i][31:0]};
|
||||
wire [31:0] shr_res_w = 32'($signed(shr_in1_w) >>> alu_in2_imm[i][4:0]);
|
||||
assign shr_result_w[i] = `XLEN'($signed(shr_res_w));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
always @(*) begin
|
||||
case (alu_op[1:0])
|
||||
2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND
|
||||
2'b01: msc_result[i] = alu_in1[i] | alu_in2_imm[i]; // OR
|
||||
2'b10: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i]; // XOR
|
||||
2'b11: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]; // SLL
|
||||
endcase
|
||||
end
|
||||
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] slt_br_result = `XLEN'({is_br_op && ~(| sub_result[i][`XLEN-1:0]), sub_result[i][`XLEN]});
|
||||
wire [`XLEN-1:0] sub_slt_br_result = (is_sub_op && ~is_br_op) ? sub_result[i][`XLEN-1:0] : slt_br_result;
|
||||
always @(*) begin
|
||||
case ({is_alu_w, op_class})
|
||||
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
|
||||
3'b010: alu_result[i] = shr_result[i]; // SRL, SRA, SRLI, SRAI
|
||||
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
|
||||
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
|
||||
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
|
||||
3'b110: alu_result[i] = shr_result_w[i]; // SRLW, SRAW, SRLIW, SRAIW
|
||||
3'b111: alu_result[i] = msc_result_w[i]; // SLLW
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// branch
|
||||
|
||||
wire [`XLEN-1:0] PC_r, imm_r;
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire [LANE_WIDTH-1:0] tid, tid_r;
|
||||
wire is_br_op_r;
|
||||
|
||||
if (LANE_BITS != 0) begin
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
end else begin
|
||||
assign tid = 0;
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + (NUM_LANES * `XLEN) + `XLEN + `XLEN + 1 + `INST_BR_BITS + LANE_WIDTH)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (execute_if.valid),
|
||||
.ready_in (execute_if.ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, execute_if.data.imm, is_br_op, br_op, tid}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, alu_result_r, PC_r, imm_r, is_br_op_r, br_op_r, tid_r}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (br_op_r)
|
||||
wire is_br_neg = `INST_BR_IS_NEG(br_op_r);
|
||||
wire is_br_less = `INST_BR_IS_LESS(br_op_r);
|
||||
wire is_br_static = `INST_BR_IS_STATIC(br_op_r);
|
||||
wire [`XLEN-1:0] br_result = alu_result_r[tid_r];
|
||||
|
||||
wire is_less = br_result[0];
|
||||
wire is_equal = br_result[1];
|
||||
|
||||
wire br_enable = is_br_op_r && commit_if.valid && commit_if.ready && commit_if.data.eop;
|
||||
wire br_taken = ((is_br_less ? is_less : is_equal) ^ is_br_neg) | is_br_static;
|
||||
wire [`XLEN-1:0] br_dest = is_br_static ? br_result : (PC_r + imm_r);
|
||||
wire [`NW_WIDTH-1:0] br_wid;
|
||||
`ASSIGN_BLOCKED_WID (br_wid, commit_if.data.wid, BLOCK_IDX, `NUM_ALU_BLOCKS)
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_WIDTH + 1 + `XLEN)
|
||||
) branch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? (PC_r + 4) : alu_result_r[i];
|
||||
end
|
||||
|
||||
assign commit_if.data.PC = PC_r;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, commit_if.data.PC, branch_ctl_if.taken, branch_ctl_if.dest, commit_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
108
hw/rtl/core/VX_ipdom_stack.sv
Normal file
108
hw/rtl/core/VX_ipdom_stack.sv
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_ipdom_stack #(
|
||||
parameter WIDTH = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter ADDRW = `LOG2UP(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [WIDTH-1:0] q0,
|
||||
input wire [WIDTH-1:0] q1,
|
||||
output wire [WIDTH-1:0] d,
|
||||
output wire d_set,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
reg slot_set [DEPTH-1:0];
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr, wr_ptr;
|
||||
|
||||
reg empty_r, full_r;
|
||||
|
||||
wire [WIDTH-1:0] d0, d1;
|
||||
|
||||
wire d_set_n = slot_set[rd_ptr];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr <= '0;
|
||||
wr_ptr <= '0;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
`ASSERT(~push || ~full, ("runtime error: writing to a full stack!"));
|
||||
`ASSERT(~pop || ~empty, ("runtime error: reading an empty stack!"));
|
||||
`ASSERT(~push || ~pop, ("runtime error: push and pop in same cycle not supported!"));
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
empty_r <= 0;
|
||||
full_r <= (ADDRW'(DEPTH-1) == wr_ptr);
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - ADDRW'(d_set_n);
|
||||
rd_ptr <= rd_ptr - ADDRW'(d_set_n);
|
||||
empty_r <= (rd_ptr == 0) && (d_set_n == 1);
|
||||
full_r <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (WIDTH * 2),
|
||||
.SIZE (DEPTH),
|
||||
.OUT_REG (OUT_REG ? 1 : 0),
|
||||
.LUTRAM (OUT_REG ? 0 : 1)
|
||||
) store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q1, q0}),
|
||||
.raddr (rd_ptr),
|
||||
.rdata ({d1, d0})
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
slot_set[wr_ptr] <= 0;
|
||||
end else if (pop) begin
|
||||
slot_set[rd_ptr] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire d_set_r;
|
||||
VX_pipe_register #(
|
||||
.DATAW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in (d_set_n),
|
||||
.data_out (d_set_r)
|
||||
);
|
||||
|
||||
assign d = d_set_r ? d0 : d1;
|
||||
assign d_set = ~d_set_r;
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
|
||||
endmodule
|
||||
180
hw/rtl/core/VX_issue.sv
Normal file
180
hw/rtl/core/VX_issue.sv
Normal file
@@ -0,0 +1,180 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_issue #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_pipeline_perf_if.issue perf_issue_if,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
|
||||
VX_dispatch_if.master alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if.master fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if [`ISSUE_WIDTH]();
|
||||
VX_ibuffer_if scoreboard_if [`ISSUE_WIDTH]();
|
||||
VX_operands_if operands_if [`ISSUE_WIDTH]();
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
`RESET_RELAY (scoreboard_reset, reset);
|
||||
`RESET_RELAY (operands_reset, reset);
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
||||
VX_operands #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if)
|
||||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (perf_issue_if.dsp_stalls),
|
||||
`endif
|
||||
.operands_if (operands_if),
|
||||
.alu_dispatch_if(alu_dispatch_if),
|
||||
.lsu_dispatch_if(lsu_dispatch_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_dispatch_if(fpu_dispatch_if),
|
||||
`endif
|
||||
.sfu_dispatch_if(sfu_dispatch_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire operands_if_fire = operands_if[0].valid && operands_if[0].ready;
|
||||
wire operands_if_not_ready = ~operands_if[0].ready;
|
||||
wire writeback_if_valid = writeback_if[0].valid;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS +
|
||||
1 + `NR_BITS + `XLEN + 1 + 1 + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
operands_if_fire,
|
||||
operands_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes({
|
||||
operands_if[0].data.uuid,
|
||||
operands_if[0].data.tmask,
|
||||
operands_if[0].data.ex_type,
|
||||
operands_if[0].data.op_type,
|
||||
operands_if[0].data.op_mod,
|
||||
operands_if[0].data.wb,
|
||||
operands_if[0].data.rd,
|
||||
operands_if[0].data.imm,
|
||||
operands_if[0].data.use_PC,
|
||||
operands_if[0].data.use_imm,
|
||||
operands_if[0].data.rs1_data,
|
||||
operands_if[0].data.rs2_data,
|
||||
operands_if[0].data.rs3_data,
|
||||
writeback_if[0].data.uuid,
|
||||
writeback_if[0].data.tmask,
|
||||
writeback_if[0].data.rd,
|
||||
writeback_if[0].data.data,
|
||||
writeback_if[0].data.eop
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({operands_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, operands_if.PC, operands_if.tmask, operands_if.wid, operands_if.ex_type, operands_if.op_type, operands_if.ready, operands_if.valid}),
|
||||
.probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_scb_stalls;
|
||||
|
||||
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] scoreboard_stalls_per_cycle;
|
||||
reg [`ISSUE_WIDTH-1:0] scoreboard_stalls;
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign scoreboard_stalls[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready;
|
||||
end
|
||||
`POP_COUNT(scoreboard_stalls_per_cycle, scoreboard_stalls);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ibf_stalls <= '0;
|
||||
perf_scb_stalls <= '0;
|
||||
end else begin
|
||||
if (decode_if.valid && ~decode_if.ready) begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(1);
|
||||
end
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(scoreboard_stalls_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
|
||||
assign perf_issue_if.scb_stalls = perf_scb_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
647
hw/rtl/core/VX_lsu_unit.sv
Normal file
647
hw/rtl/core/VX_lsu_unit.sv
Normal file
@@ -0,0 +1,647 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_mem_bus_if.master cache_bus_if [DCACHE_NUM_REQS],
|
||||
|
||||
// inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH]
|
||||
);
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_SIZE);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = `XLEN - MEM_ASHIFT;
|
||||
localparam REQ_ASHIFT = `CLOG2(DCACHE_WORD_SIZE);
|
||||
localparam CACHE_TAG_WIDTH = `UUID_WIDTH + (NUM_LANES * `CACHE_ADDR_TYPE_BITS) + LSUQ_TAG_BITS;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) execute_if[BLOCK_SIZE]();
|
||||
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (1)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.execute_if (execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_st_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_ld_if();
|
||||
|
||||
`UNUSED_VAR (execute_if[0].data.op_mod)
|
||||
`UNUSED_VAR (execute_if[0].data.use_PC)
|
||||
`UNUSED_VAR (execute_if[0].data.use_imm)
|
||||
`UNUSED_VAR (execute_if[0].data.rs3_data)
|
||||
`UNUSED_VAR (execute_if[0].data.tid)
|
||||
|
||||
`ifdef SM_ENABLE
|
||||
`STATIC_ASSERT((1 << `SMEM_LOG_SIZE) == `MEM_BLOCK_SIZE * ((1 << `SMEM_LOG_SIZE) / `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % (1 << `SMEM_LOG_SIZE)), ("invalid parameter"))
|
||||
localparam SMEM_START_B = MEM_ADDRW'(`XLEN'(`SMEM_BASE_ADDR) >> MEM_ASHIFT);
|
||||
localparam SMEM_END_B = MEM_ADDRW'((`XLEN'(`SMEM_BASE_ADDR) + (1 << `SMEM_LOG_SIZE)) >> MEM_ASHIFT);
|
||||
`endif
|
||||
|
||||
// tag = uuid + addr_type + wid + PC + tmask + rd + op_type + align + is_dup + pid + pkt_addr
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + (NUM_LANES * `CACHE_ADDR_TYPE_BITS) + `NW_WIDTH + `XLEN + NUM_LANES + `NR_BITS + `INST_LSU_BITS + (NUM_LANES * (REQ_ASHIFT)) + `LSU_DUP_ENABLED + PID_WIDTH + LSUQ_SIZEW;
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
wire [NUM_LANES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] lsu_addr_type;
|
||||
|
||||
// full address calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] full_addr;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign full_addr[i] = execute_if[0].data.rs1_data[i][`XLEN-1:0] + execute_if[0].data.imm;
|
||||
end
|
||||
|
||||
// detect duplicate addresses
|
||||
|
||||
wire lsu_is_dup;
|
||||
`ifdef LSU_DUP
|
||||
if (NUM_LANES > 1) begin
|
||||
wire [NUM_LANES-2:0] addr_matches;
|
||||
for (genvar i = 0; i < (NUM_LANES-1); ++i) begin
|
||||
assign addr_matches[i] = (execute_if[0].data.rs1_data[i+1] == execute_if[0].data.rs1_data[0]) || ~execute_if[0].data.tmask[i+1];
|
||||
end
|
||||
assign lsu_is_dup = execute_if[0].data.tmask[0] && (& addr_matches);
|
||||
end else begin
|
||||
assign lsu_is_dup = 0;
|
||||
end
|
||||
`else
|
||||
assign lsu_is_dup = 0;
|
||||
`endif
|
||||
|
||||
// detect address type
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [MEM_ADDRW-1:0] full_addr_b = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
// is non-cacheable I/O address
|
||||
wire is_addr_io = (full_addr_b >= MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT));
|
||||
`ifdef SM_ENABLE
|
||||
// is shared memory address
|
||||
wire is_addr_sm = (full_addr_b >= SMEM_START_B) && (full_addr_b < SMEM_END_B);
|
||||
assign lsu_addr_type[i] = {is_addr_io, is_addr_sm};
|
||||
`else
|
||||
assign lsu_addr_type[i] = is_addr_io;
|
||||
`endif
|
||||
end
|
||||
|
||||
wire mem_req_empty;
|
||||
wire st_rsp_ready;
|
||||
wire lsu_valid, lsu_ready;
|
||||
|
||||
// fence: stall the pipeline until all pending requests are sent
|
||||
wire is_fence = `INST_LSU_IS_FENCE(execute_if[0].data.op_type);
|
||||
wire fence_wait = is_fence && ~mem_req_empty;
|
||||
|
||||
assign lsu_valid = execute_if[0].valid && ~fence_wait;
|
||||
assign execute_if[0].ready = lsu_ready && ~fence_wait;
|
||||
|
||||
// schedule memory request
|
||||
|
||||
wire mem_req_valid;
|
||||
wire [NUM_LANES-1:0] mem_req_mask;
|
||||
wire mem_req_rw;
|
||||
wire [NUM_LANES-1:0][`MEM_ADDR_WIDTH-REQ_ASHIFT-1:0] mem_req_addr;
|
||||
reg [NUM_LANES-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] mem_req_data;
|
||||
wire [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_rsp_valid;
|
||||
wire [NUM_LANES-1:0] mem_rsp_mask;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] mem_rsp_data;
|
||||
wire [TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_sop;
|
||||
wire mem_rsp_eop;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
assign mem_req_valid = lsu_valid;
|
||||
assign lsu_ready = mem_req_ready
|
||||
&& (~mem_req_rw || st_rsp_ready); // writes commit directly
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign mem_req_mask[i] = execute_if[0].data.tmask[i] && (~lsu_is_dup || (i == 0));
|
||||
end
|
||||
|
||||
assign mem_req_rw = ~execute_if[0].data.wb;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
// address formatting
|
||||
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0];
|
||||
assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT];
|
||||
end
|
||||
|
||||
// byte enable formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
always @(*) begin
|
||||
mem_req_byteen[i] = '0;
|
||||
case (`INST_LSU_WSIZE(execute_if[0].data.op_type))
|
||||
0: begin // 8-bit
|
||||
mem_req_byteen[i][req_align[i]] = 1'b1;
|
||||
end
|
||||
1: begin // 16 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
2: begin // 32 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
end
|
||||
`endif
|
||||
default : mem_req_byteen[i] = {DCACHE_WORD_SIZE{1'b1}};
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// memory misalignment not supported!
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire lsu_req_fire = execute_if[0].valid && execute_if[0].ready;
|
||||
`RUNTIME_ASSERT((~lsu_req_fire || ~execute_if[0].data.tmask[i] || is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if[0].data.op_type))) == 0),
|
||||
("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
execute_if[0].data.wid, execute_if[0].data.PC, full_addr[i], `INST_LSU_WSIZE(execute_if[0].data.op_type), execute_if[0].data.uuid));
|
||||
end
|
||||
|
||||
// store data formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
always @(*) begin
|
||||
mem_req_data[i] = execute_if[0].data.rs2_data[i];
|
||||
case (req_align[i])
|
||||
1: mem_req_data[i][`XLEN-1:8] = execute_if[0].data.rs2_data[i][`XLEN-9:0];
|
||||
2: mem_req_data[i][`XLEN-1:16] = execute_if[0].data.rs2_data[i][`XLEN-17:0];
|
||||
3: mem_req_data[i][`XLEN-1:24] = execute_if[0].data.rs2_data[i][`XLEN-25:0];
|
||||
`ifdef XLEN_64
|
||||
4: mem_req_data[i][`XLEN-1:32] = execute_if[0].data.rs2_data[i][`XLEN-33:0];
|
||||
5: mem_req_data[i][`XLEN-1:40] = execute_if[0].data.rs2_data[i][`XLEN-41:0];
|
||||
6: mem_req_data[i][`XLEN-1:48] = execute_if[0].data.rs2_data[i][`XLEN-49:0];
|
||||
7: mem_req_data[i][`XLEN-1:56] = execute_if[0].data.rs2_data[i][`XLEN-57:0];
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// track SOP/EOP for out-of-order memory responses
|
||||
|
||||
wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr;
|
||||
wire mem_rsp_sop_pkt, mem_rsp_eop_pkt;
|
||||
|
||||
if (PID_BITS != 0) begin
|
||||
reg [`LSUQ_SIZE-1:0][PID_BITS:0] pkt_ctr;
|
||||
reg [`LSUQ_SIZE-1:0] pkt_sop, pkt_eop;
|
||||
|
||||
wire mem_req_rd_fire = mem_req_fire && execute_if[0].data.wb;
|
||||
wire mem_req_rd_sop_fire = mem_req_rd_fire && execute_if[0].data.sop;
|
||||
wire mem_req_rd_eop_fire = mem_req_rd_fire && execute_if[0].data.eop;
|
||||
wire mem_rsp_eop_fire = mem_rsp_fire && mem_rsp_eop;
|
||||
wire full;
|
||||
|
||||
VX_allocator #(
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) pkt_allocator (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_en (mem_req_rd_eop_fire),
|
||||
.acquire_addr(pkt_waddr),
|
||||
.release_en (mem_rsp_eop_pkt),
|
||||
.release_addr(pkt_raddr),
|
||||
`UNUSED_PIN (empty),
|
||||
.full (full)
|
||||
);
|
||||
|
||||
wire rd_during_wr = mem_req_rd_fire && mem_rsp_eop_fire && (pkt_raddr == pkt_waddr);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pkt_ctr <= '0;
|
||||
pkt_sop <= '0;
|
||||
pkt_eop <= '0;
|
||||
end else begin
|
||||
if (mem_req_rd_sop_fire) begin
|
||||
pkt_sop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (mem_req_rd_eop_fire) begin
|
||||
pkt_eop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
pkt_sop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (mem_rsp_eop_pkt) begin
|
||||
pkt_eop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (~rd_during_wr) begin
|
||||
if (mem_req_rd_fire) begin
|
||||
pkt_ctr[pkt_waddr] <= pkt_ctr[pkt_waddr] + PID_BITS'(1);
|
||||
end
|
||||
if (mem_rsp_eop_fire) begin
|
||||
pkt_ctr[pkt_raddr] <= pkt_ctr[pkt_raddr] - PID_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr];
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1);
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("allocator full!"))
|
||||
`RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("Oops!"))
|
||||
`UNUSED_VAR (mem_rsp_sop)
|
||||
end else begin
|
||||
assign pkt_waddr = 0;
|
||||
assign mem_rsp_sop_pkt = mem_rsp_sop;
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop;
|
||||
`UNUSED_VAR (pkt_raddr)
|
||||
end
|
||||
|
||||
assign mem_req_tag = {
|
||||
execute_if[0].data.uuid, lsu_addr_type, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, execute_if[0].data.rd, execute_if[0].data.op_type, req_align, execute_if[0].data.pid, pkt_waddr
|
||||
`ifdef LSU_DUP
|
||||
, lsu_is_dup
|
||||
`endif
|
||||
};
|
||||
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_req_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_req_rw;
|
||||
wire [DCACHE_NUM_REQS-1:0][(`XLEN/8)-1:0] cache_req_byteen;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] cache_req_addr;
|
||||
wire [DCACHE_NUM_REQS-1:0][`XLEN-1:0] cache_req_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][CACHE_TAG_WIDTH-1:0] cache_req_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_req_ready;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_rsp_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0][`XLEN-1:0] cache_rsp_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][CACHE_TAG_WIDTH-1:0] cache_rsp_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_rsp_ready;
|
||||
|
||||
`RESET_RELAY (mem_scheduler_reset, reset);
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched", CORE_ID)),
|
||||
.NUM_REQS (LSU_MEM_REQS),
|
||||
.NUM_BANKS (DCACHE_NUM_REQS),
|
||||
.ADDR_WIDTH (DCACHE_ADDR_WIDTH),
|
||||
.DATA_WIDTH (`XLEN),
|
||||
.QUEUE_SIZE (`LSUQ_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.MEM_TAG_ID (`UUID_WIDTH + (NUM_LANES * `CACHE_ADDR_TYPE_BITS)),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.RSP_PARTIAL (1),
|
||||
.MEM_OUT_REG (2)
|
||||
) mem_scheduler (
|
||||
.clk (clk),
|
||||
.reset (mem_scheduler_reset),
|
||||
|
||||
// Input request
|
||||
.req_valid (mem_req_valid),
|
||||
.req_rw (mem_req_rw),
|
||||
.req_mask (mem_req_mask),
|
||||
.req_byteen (mem_req_byteen),
|
||||
.req_addr (mem_req_addr),
|
||||
.req_data (mem_req_data),
|
||||
.req_tag (mem_req_tag),
|
||||
.req_empty (mem_req_empty),
|
||||
.req_ready (mem_req_ready),
|
||||
`UNUSED_PIN (write_notify),
|
||||
|
||||
// Output response
|
||||
.rsp_valid (mem_rsp_valid),
|
||||
.rsp_mask (mem_rsp_mask),
|
||||
.rsp_data (mem_rsp_data),
|
||||
.rsp_tag (mem_rsp_tag),
|
||||
.rsp_sop (mem_rsp_sop),
|
||||
.rsp_eop (mem_rsp_eop),
|
||||
.rsp_ready (mem_rsp_ready),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (cache_req_valid),
|
||||
.mem_req_rw (cache_req_rw),
|
||||
.mem_req_byteen (cache_req_byteen),
|
||||
.mem_req_addr (cache_req_addr),
|
||||
.mem_req_data (cache_req_data),
|
||||
.mem_req_tag (cache_req_tag),
|
||||
.mem_req_ready (cache_req_ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (cache_rsp_valid),
|
||||
.mem_rsp_data (cache_rsp_data),
|
||||
.mem_rsp_tag (cache_rsp_tag),
|
||||
.mem_rsp_ready (cache_rsp_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
assign cache_bus_if[i].req_valid = cache_req_valid[i];
|
||||
assign cache_bus_if[i].req_data.rw = cache_req_rw[i];
|
||||
assign cache_bus_if[i].req_data.byteen = cache_req_byteen[i];
|
||||
assign cache_bus_if[i].req_data.addr = cache_req_addr[i];
|
||||
assign cache_bus_if[i].req_data.data = cache_req_data[i];
|
||||
assign cache_req_ready[i] = cache_bus_if[i].req_ready;
|
||||
|
||||
assign cache_rsp_valid[i] = cache_bus_if[i].rsp_valid;
|
||||
assign cache_rsp_data[i] = cache_bus_if[i].rsp_data.data;
|
||||
assign cache_bus_if[i].rsp_ready = cache_rsp_ready[i];
|
||||
end
|
||||
|
||||
// cache tag formatting: <uuid, tag, type>
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
wire [`UUID_WIDTH-1:0] cache_req_uuid, cache_rsp_uuid;
|
||||
wire [NUM_LANES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type, cache_rsp_type;
|
||||
wire [`CLOG2(`LSUQ_SIZE)-1:0] cache_req_tag_x, cache_rsp_tag_x;
|
||||
if (DCACHE_NUM_BATCHES > 1) begin
|
||||
|
||||
wire [DCACHE_NUM_BATCHES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type_b, cache_rsp_type_b;
|
||||
wire [`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type_bi, cache_rsp_type_bi;
|
||||
wire [DCACHE_BATCH_SEL_BITS-1:0] cache_req_bid, cache_rsp_bid;
|
||||
|
||||
assign {cache_req_uuid, cache_req_type, cache_req_bid, cache_req_tag_x} = cache_req_tag[i];
|
||||
assign cache_req_type_bi = cache_req_type_b[cache_req_bid];
|
||||
assign cache_bus_if[i].req_data.tag = {cache_req_uuid, cache_req_bid, cache_req_tag_x, cache_req_type_bi};
|
||||
|
||||
assign {cache_rsp_uuid, cache_rsp_bid, cache_rsp_tag_x, cache_rsp_type_bi} = cache_bus_if[i].rsp_data.tag;
|
||||
assign cache_rsp_type_b = {DCACHE_NUM_BATCHES{cache_rsp_type_bi}};
|
||||
assign cache_rsp_tag[i] = {cache_rsp_uuid, cache_rsp_type, cache_rsp_bid, cache_rsp_tag_x};
|
||||
|
||||
for (genvar j = 0; j < DCACHE_NUM_BATCHES; ++j) begin
|
||||
localparam k = j * DCACHE_NUM_REQS + i;
|
||||
if (k < NUM_LANES) begin
|
||||
assign cache_req_type_b[j] = cache_req_type[k];
|
||||
assign cache_rsp_type[k] = cache_rsp_type_b[j];
|
||||
end else begin
|
||||
assign cache_req_type_b[j] = '0;
|
||||
`UNUSED_VAR (cache_rsp_type_b[j])
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
assign {cache_req_uuid, cache_req_type, cache_req_tag_x} = cache_req_tag[i];
|
||||
assign cache_bus_if[i].req_data.tag = {cache_req_uuid, cache_req_tag_x, cache_req_type[i]};
|
||||
|
||||
assign {cache_rsp_uuid, cache_rsp_tag_x, cache_rsp_type[i]} = cache_bus_if[i].rsp_data.tag;
|
||||
assign cache_rsp_tag[i] = {cache_rsp_uuid, cache_rsp_type, cache_rsp_tag_x};
|
||||
|
||||
for (genvar j = 0; j < DCACHE_NUM_REQS; ++j) begin
|
||||
if (i != j) begin
|
||||
`UNUSED_VAR (cache_req_type[j])
|
||||
assign cache_rsp_type[j] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [NUM_LANES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] rsp_addr_type;
|
||||
wire [`NW_WIDTH-1:0] rsp_wid;
|
||||
wire [NUM_LANES-1:0] rsp_tmask_uq;
|
||||
wire [`XLEN-1:0] rsp_pc;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire [`INST_LSU_BITS-1:0] rsp_op_type;
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] rsp_align;
|
||||
wire [PID_WIDTH-1:0] rsp_pid;
|
||||
wire rsp_is_dup;
|
||||
|
||||
`ifndef LSU_DUP
|
||||
assign rsp_is_dup = 0;
|
||||
`endif
|
||||
|
||||
assign {
|
||||
rsp_uuid, rsp_addr_type, rsp_wid, rsp_tmask_uq, rsp_pc, rsp_rd, rsp_op_type, rsp_align, rsp_pid, pkt_raddr
|
||||
`ifdef LSU_DUP
|
||||
, rsp_is_dup
|
||||
`endif
|
||||
} = mem_rsp_tag;
|
||||
`UNUSED_VAR (rsp_addr_type)
|
||||
`UNUSED_VAR (rsp_op_type)
|
||||
|
||||
// load response formatting
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] rsp_data;
|
||||
wire [NUM_LANES-1:0] rsp_tmask;
|
||||
|
||||
`ifdef XLEN_64
|
||||
`ifdef EXT_F_ENABLE
|
||||
// apply nan-boxing to flw outputs
|
||||
wire rsp_is_float = rsp_rd[5];
|
||||
`else
|
||||
wire rsp_is_float = 0;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; i++) begin
|
||||
`ifdef XLEN_64
|
||||
wire [63:0] rsp_data64 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? (rsp_align[0][2] ? mem_rsp_data[0][63:32] : mem_rsp_data[0][31:0]) :
|
||||
(rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]);
|
||||
`else
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
|
||||
`endif
|
||||
wire [15:0] rsp_data16 = rsp_align[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
|
||||
wire [7:0] rsp_data8 = rsp_align[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
always @(*) begin
|
||||
case (`INST_LSU_FMT(rsp_op_type))
|
||||
`INST_FMT_B: rsp_data[i] = `XLEN'(signed'(rsp_data8));
|
||||
`INST_FMT_H: rsp_data[i] = `XLEN'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = `XLEN'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = `XLEN'(unsigned'(rsp_data16));
|
||||
`ifdef XLEN_64
|
||||
`INST_FMT_W: rsp_data[i] = rsp_is_float ? (`XLEN'(rsp_data32) | 64'hffffffff00000000) : `XLEN'(signed'(rsp_data32));
|
||||
`INST_FMT_WU: rsp_data[i] = `XLEN'(unsigned'(rsp_data32));
|
||||
`INST_FMT_D: rsp_data[i] = `XLEN'(signed'(rsp_data64));
|
||||
`else
|
||||
`INST_FMT_W: rsp_data[i] = `XLEN'(signed'(rsp_data32));
|
||||
`endif
|
||||
default: rsp_data[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign rsp_tmask = rsp_is_dup ? rsp_tmask_uq : mem_rsp_mask;
|
||||
|
||||
// load commit
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) ld_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_rsp_valid),
|
||||
.ready_in (mem_rsp_ready),
|
||||
.data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
||||
.data_out ({commit_ld_if.data.uuid, commit_ld_if.data.wid, commit_ld_if.data.tmask, commit_ld_if.data.PC, commit_ld_if.data.rd, commit_ld_if.data.data, commit_ld_if.data.pid, commit_ld_if.data.sop, commit_ld_if.data.eop}),
|
||||
.valid_out (commit_ld_if.valid),
|
||||
.ready_out (commit_ld_if.ready)
|
||||
);
|
||||
|
||||
assign commit_ld_if.data.wb = 1'b1;
|
||||
|
||||
// store commit
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) st_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_fire && mem_req_rw),
|
||||
.ready_in (st_rsp_ready),
|
||||
.data_in ({execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, execute_if[0].data.pid, execute_if[0].data.sop, execute_if[0].data.eop}),
|
||||
.data_out ({commit_st_if.data.uuid, commit_st_if.data.wid, commit_st_if.data.tmask, commit_st_if.data.PC, commit_st_if.data.pid, commit_st_if.data.sop, commit_st_if.data.eop}),
|
||||
.valid_out (commit_st_if.valid),
|
||||
.ready_out (commit_st_if.ready)
|
||||
);
|
||||
assign commit_st_if.data.rd = '0;
|
||||
assign commit_st_if.data.wb = 1'b0;
|
||||
assign commit_st_if.data.data = commit_ld_if.data.data; // force arbiter passthru
|
||||
|
||||
// lsu commit
|
||||
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_arb_if[1]();
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_REG (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.valid_in ({commit_st_if.valid, commit_ld_if.valid}),
|
||||
.ready_in ({commit_st_if.ready, commit_ld_if.ready}),
|
||||
.data_in ({commit_st_if.data, commit_ld_if.data}),
|
||||
.data_out (commit_arb_if[0].data),
|
||||
.valid_out (commit_arb_if[0].valid),
|
||||
.ready_out (commit_arb_if[0].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (3)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.commit_in_if (commit_arb_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes({execute_if[0].data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
wire [31:0] full_addr_0 = full_addr[0];
|
||||
wire [31:0] mem_req_data_0 = mem_req_data[0];
|
||||
wire [31:0] rsp_data_0 = rsp_data[0];
|
||||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({mem_req_data_0, execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
|
||||
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, rsp_tmask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
|
||||
.probe2 ({cache_bus_if.req_data.data, cache_bus_if.req_data.tag, cache_bus_if.req_data.byteen, cache_bus_if.req_data.addr, cache_bus_if.req_data.rw, cache_bus_if.req_ready, cache_bus_if.req_valid}),
|
||||
.probe3 ({cache_bus_if.rsp_data.data, cache_bus_if.rsp_data.tag, cache_bus_if.rsp_ready, cache_bus_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
if (execute_if[0].valid && fence_wait) begin
|
||||
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID));
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, type=", mem_req_tag, mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, lsu_addr_type, NUM_LANES);
|
||||
`TRACE(1, (", data="));
|
||||
`TRACE_ARRAY1D(1, mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", is_dup=%b (#%0d)\n", lsu_is_dup, execute_if[0].data.uuid));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, type=", mem_req_tag, mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, lsu_addr_type, NUM_LANES);
|
||||
`TRACE(1, (", rd=%0d, is_dup=%b (#%0d)\n", execute_if[0].data.rd, lsu_is_dup, execute_if[0].data.uuid));
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, tag=0x%0h, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_pc, mem_rsp_mask, mem_rsp_tag, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", is_dup=%b (#%0d)\n", rsp_is_dup, rsp_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
336
hw/rtl/core/VX_muldiv_unit.sv
Normal file
336
hw/rtl/core/VX_muldiv_unit.sv
Normal file
@@ -0,0 +1,336 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_muldiv_unit #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam TAGW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
wire [`INST_M_BITS-1:0] muldiv_op = `INST_M_BITS'(execute_if.data.op_type);
|
||||
|
||||
wire is_mulx_op = `INST_M_IS_MULX(muldiv_op);
|
||||
wire is_signed_op = `INST_M_SIGNED(muldiv_op);
|
||||
`ifdef XLEN_64
|
||||
wire is_alu_w = `INST_ALU_IS_W(execute_if.data.op_mod);
|
||||
`else
|
||||
wire is_alu_w = 0;
|
||||
`endif
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] mul_result_out;
|
||||
wire [`UUID_WIDTH-1:0] mul_uuid_out;
|
||||
wire [`NW_WIDTH-1:0] mul_wid_out;
|
||||
wire [NUM_LANES-1:0] mul_tmask_out;
|
||||
wire [`XLEN-1:0] mul_PC_out;
|
||||
wire [`NR_BITS-1:0] mul_rd_out;
|
||||
wire mul_wb_out;
|
||||
wire [PID_WIDTH-1:0] mul_pid_out;
|
||||
wire mul_sop_out, mul_eop_out;
|
||||
|
||||
wire mul_valid_in = execute_if.valid && is_mulx_op;
|
||||
wire mul_ready_in;
|
||||
wire mul_valid_out;
|
||||
wire mul_ready_out;
|
||||
|
||||
wire is_mulh_in = `INST_M_IS_MULH(muldiv_op);
|
||||
wire is_signed_mul_a = `INST_M_SIGNED_A(muldiv_op);
|
||||
wire is_signed_mul_b = is_signed_op;
|
||||
|
||||
`ifdef IMUL_DPI
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] mul_result_tmp;
|
||||
|
||||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
|
||||
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
|
||||
always @(*) begin
|
||||
dpi_imul (mul_fire_in, is_signed_mul_a, is_signed_mul_b, mul_in1, mul_in2, mul_resultl, mul_resulth);
|
||||
end
|
||||
assign mul_result_tmp[i] = is_mulh_in ? mul_resulth : (is_alu_w ? `XLEN'($signed(mul_resultl[31:0])) : mul_resultl);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + (NUM_LANES * `XLEN)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
|
||||
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out})
|
||||
);
|
||||
|
||||
assign mul_ready_in = mul_ready_out || ~mul_valid_out;
|
||||
|
||||
`else
|
||||
|
||||
wire [NUM_LANES-1:0][2*(`XLEN+1)-1:0] mul_result_tmp;
|
||||
wire is_mulh_out;
|
||||
wire is_mul_w_out;
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN:0] mul_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN:0] mul_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]} : {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]} : {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
end
|
||||
|
||||
wire mul_strode;
|
||||
wire mul_busy;
|
||||
|
||||
VX_elastic_adapter mul_elastic_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mul_valid_in),
|
||||
.ready_in (mul_ready_in),
|
||||
.valid_out (mul_valid_out),
|
||||
.ready_out (mul_ready_out),
|
||||
.strobe (mul_strode),
|
||||
.busy (mul_busy)
|
||||
);
|
||||
|
||||
VX_serial_mul #(
|
||||
.A_WIDTH (`XLEN+1),
|
||||
.LANES (NUM_LANES),
|
||||
.SIGNED (1)
|
||||
) serial_mul (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.strobe (mul_strode),
|
||||
.busy (mul_busy),
|
||||
|
||||
.dataa (mul_in1),
|
||||
.datab (mul_in2),
|
||||
.result (mul_result_tmp)
|
||||
);
|
||||
|
||||
reg [TAGW+2-1:0] mul_tag_r;
|
||||
always @(posedge clk) begin
|
||||
if (mul_valid_in && mul_ready_in) begin
|
||||
mul_tag_r <= {execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, is_mulh_in, is_alu_w, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop};
|
||||
end
|
||||
end
|
||||
|
||||
assign {mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out, is_mul_w_out, mul_pid_out, mul_sop_out, mul_eop_out} = mul_tag_r;
|
||||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] mul_in1 = {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
|
||||
VX_multiplier #(
|
||||
.A_WIDTH (`XLEN+1),
|
||||
.B_WIDTH (`XLEN+1),
|
||||
.R_WIDTH (2*(`XLEN+1)),
|
||||
.SIGNED (1),
|
||||
.LATENCY (`LATENCY_IMUL)
|
||||
) multiplier (
|
||||
.clk (clk),
|
||||
.enable (mul_ready_in),
|
||||
.dataa (mul_in1),
|
||||
.datab (mul_in2),
|
||||
.result (mul_result_tmp[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, is_mulh_in, is_alu_w}),
|
||||
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, is_mulh_out, is_mul_w_out})
|
||||
);
|
||||
|
||||
assign mul_ready_in = mul_ready_out || ~mul_valid_out;
|
||||
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef XLEN_64
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] :
|
||||
(is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) :
|
||||
mul_result_tmp[i][`XLEN-1:0]);
|
||||
`else
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] : mul_result_tmp[i][`XLEN-1:0];
|
||||
`UNUSED_VAR (is_mul_w_out)
|
||||
`endif
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_result_out;
|
||||
wire [`UUID_WIDTH-1:0] div_uuid_out;
|
||||
wire [`NW_WIDTH-1:0] div_wid_out;
|
||||
wire [NUM_LANES-1:0] div_tmask_out;
|
||||
wire [`XLEN-1:0] div_PC_out;
|
||||
wire [`NR_BITS-1:0] div_rd_out;
|
||||
wire div_wb_out;
|
||||
wire [PID_WIDTH-1:0] div_pid_out;
|
||||
wire div_sop_out, div_eop_out;
|
||||
|
||||
wire is_rem_op = `INST_M_IS_REM(muldiv_op);
|
||||
|
||||
wire div_valid_in = execute_if.valid && ~is_mulx_op;
|
||||
wire div_ready_in;
|
||||
wire div_valid_out;
|
||||
wire div_ready_out;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign div_in1[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]}: execute_if.data.rs1_data[i];
|
||||
assign div_in2[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]}: execute_if.data.rs2_data[i];
|
||||
end
|
||||
|
||||
`ifdef IDIV_DPI
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_result_in;
|
||||
wire div_fire_in = div_valid_in && div_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
|
||||
end
|
||||
assign div_result_in[i] = is_rem_op ? (is_alu_w ? `XLEN'($signed(div_remainder[31:0])) : div_remainder) :
|
||||
(is_alu_w ? `XLEN'($signed(div_quotient[31:0])) : div_quotient);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + (NUM_LANES * `XLEN)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) div_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (div_ready_in),
|
||||
.data_in ({div_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, div_result_in}),
|
||||
.data_out ({div_valid_out, div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_pid_out, div_sop_out, div_eop_out, div_result_out})
|
||||
);
|
||||
|
||||
assign div_ready_in = div_ready_out || ~div_valid_out;
|
||||
|
||||
`else
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_quotient, div_remainder;
|
||||
wire is_rem_op_out;
|
||||
wire is_div_w_out;
|
||||
wire div_strode;
|
||||
wire div_busy;
|
||||
|
||||
VX_elastic_adapter div_elastic_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (div_valid_in),
|
||||
.ready_in (div_ready_in),
|
||||
.valid_out (div_valid_out),
|
||||
.ready_out (div_ready_out),
|
||||
.strobe (div_strode),
|
||||
.busy (div_busy)
|
||||
);
|
||||
|
||||
VX_serial_div #(
|
||||
.WIDTHN (`XLEN),
|
||||
.WIDTHD (`XLEN),
|
||||
.WIDTHQ (`XLEN),
|
||||
.WIDTHR (`XLEN),
|
||||
.LANES (NUM_LANES)
|
||||
) serial_div (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.strobe (div_strode),
|
||||
.busy (div_busy),
|
||||
|
||||
.is_signed (is_signed_op),
|
||||
.numer (div_in1),
|
||||
.denom (div_in2),
|
||||
|
||||
.quotient (div_quotient),
|
||||
.remainder (div_remainder)
|
||||
);
|
||||
|
||||
reg [TAGW+2-1:0] div_tag_r;
|
||||
always @(posedge clk) begin
|
||||
if (div_valid_in && div_ready_in) begin
|
||||
div_tag_r <= {execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, is_rem_op, is_alu_w, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop};
|
||||
end
|
||||
end
|
||||
|
||||
assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out, div_pid_out, div_sop_out, div_eop_out} = div_tag_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef XLEN_64
|
||||
assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) :
|
||||
(is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]);
|
||||
`else
|
||||
assign div_result_out[i] = is_rem_op_out ? div_remainder[i] : div_quotient[i];
|
||||
`UNUSED_VAR (is_div_w_out)
|
||||
`endif
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
// can accept new request?
|
||||
assign execute_if.ready = is_mulx_op ? mul_ready_in : div_ready_in;
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (TAGW + (NUM_LANES * `XLEN)),
|
||||
.OUT_REG (1)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({div_valid_out, mul_valid_out}),
|
||||
.ready_in ({div_ready_out, mul_ready_out}),
|
||||
.data_in ({{div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_pid_out, div_sop_out, div_eop_out, div_result_out},
|
||||
{mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out}}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, commit_if.data.data}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
endmodule
|
||||
302
hw/rtl/core/VX_operands.sv
Normal file
302
hw/rtl/core/VX_operands.sv
Normal file
@@ -0,0 +1,302 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_operands import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter CACHE_ENABLE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.master operands_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + `NR_BITS;
|
||||
|
||||
localparam STATE_IDLE = 2'd0;
|
||||
localparam STATE_FETCH1 = 2'd1;
|
||||
localparam STATE_FETCH2 = 2'd2;
|
||||
localparam STATE_FETCH3 = 2'd3;
|
||||
localparam STATE_BITS = 2;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
|
||||
reg [`NR_BITS-1:0] gpr_rd_rid, gpr_rd_rid_n;
|
||||
reg [ISSUE_WIS_W-1:0] gpr_rd_wis, gpr_rd_wis_n;
|
||||
|
||||
reg [ISSUE_RATIO-1:0][`NUM_THREADS-1:0][`XLEN-1:0] cache_data, cache_data_n;
|
||||
reg [ISSUE_RATIO-1:0][`NR_BITS-1:0] cache_reg, cache_reg_n;
|
||||
reg [ISSUE_RATIO-1:0][`NUM_THREADS-1:0] cache_tmask, cache_tmask_n;
|
||||
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
||||
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
||||
|
||||
reg [STATE_BITS-1:0] state, state_n;
|
||||
reg [`NR_BITS-1:0] rs2, rs2_n;
|
||||
reg [`NR_BITS-1:0] rs3, rs3_n;
|
||||
reg rs2_ready, rs2_ready_n;
|
||||
reg rs3_ready, rs3_ready_n;
|
||||
reg data_ready, data_ready_n;
|
||||
|
||||
wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0);
|
||||
wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0);
|
||||
wire is_rs3_zero = (scoreboard_if[i].data.rs3 == 0);
|
||||
|
||||
VX_operands_if staging_if();
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
rs2_n = rs2;
|
||||
rs3_n = rs3;
|
||||
rs2_ready_n = rs2_ready;
|
||||
rs3_ready_n = rs3_ready;
|
||||
rs1_data_n = rs1_data;
|
||||
rs2_data_n = rs2_data;
|
||||
rs3_data_n = rs3_data;
|
||||
cache_data_n = cache_data;
|
||||
cache_reg_n = cache_reg;
|
||||
cache_tmask_n= cache_tmask;
|
||||
cache_eop_n = cache_eop;
|
||||
gpr_rd_rid_n = gpr_rd_rid;
|
||||
gpr_rd_wis_n = gpr_rd_wis;
|
||||
data_ready_n = data_ready;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (staging_if.valid && staging_if.ready) begin
|
||||
data_ready_n = 0;
|
||||
end
|
||||
if (scoreboard_if[i].valid && data_ready_n == 0) begin
|
||||
data_ready_n = 1;
|
||||
if (is_rs3_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if[i].data.rs3 == cache_reg[scoreboard_if[i].data.wis] &&
|
||||
(scoreboard_if[i].data.tmask & cache_tmask[scoreboard_if[i].data.wis]) == scoreboard_if[i].data.tmask)) begin
|
||||
rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if[i].data.wis];
|
||||
rs3_ready_n = 1;
|
||||
end else begin
|
||||
rs3_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if[i].data.rs3;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH3;
|
||||
end
|
||||
if (is_rs2_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if[i].data.rs2 == cache_reg[scoreboard_if[i].data.wis] &&
|
||||
(scoreboard_if[i].data.tmask & cache_tmask[scoreboard_if[i].data.wis]) == scoreboard_if[i].data.tmask)) begin
|
||||
rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if[i].data.wis];
|
||||
rs2_ready_n = 1;
|
||||
end else begin
|
||||
rs2_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if[i].data.rs2;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH2;
|
||||
end
|
||||
if (is_rs1_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if[i].data.rs1 == cache_reg[scoreboard_if[i].data.wis] &&
|
||||
(scoreboard_if[i].data.tmask & cache_tmask[scoreboard_if[i].data.wis]) == scoreboard_if[i].data.tmask)) begin
|
||||
rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if[i].data.wis];
|
||||
end else begin
|
||||
gpr_rd_rid_n = scoreboard_if[i].data.rs1;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH1;
|
||||
end
|
||||
end
|
||||
gpr_rd_wis_n = scoreboard_if[i].data.wis;
|
||||
rs2_n = scoreboard_if[i].data.rs2;
|
||||
rs3_n = scoreboard_if[i].data.rs3;
|
||||
end
|
||||
STATE_FETCH1: begin
|
||||
rs1_data_n = gpr_rd_data;
|
||||
if (~rs2_ready) begin
|
||||
gpr_rd_rid_n = rs2;
|
||||
state_n = STATE_FETCH2;
|
||||
end else if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH2: begin
|
||||
rs2_data_n = gpr_rd_data;
|
||||
if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH3: begin
|
||||
rs3_data_n = gpr_rd_data;
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
|
||||
if (CACHE_ENABLE != 0 && writeback_if[i].valid) begin
|
||||
if ((cache_reg[writeback_if[i].data.wis] == writeback_if[i].data.rd)
|
||||
|| (cache_eop[writeback_if[i].data.wis] && writeback_if[i].data.sop)) begin
|
||||
for (integer j = 0; j < `NUM_THREADS; ++j) begin
|
||||
if (writeback_if[i].data.tmask[j]) begin
|
||||
cache_data_n[writeback_if[i].data.wis][j] = writeback_if[i].data.data[j];
|
||||
end
|
||||
end
|
||||
cache_reg_n[writeback_if[i].data.wis] = writeback_if[i].data.rd;
|
||||
cache_eop_n[writeback_if[i].data.wis] = writeback_if[i].data.eop;
|
||||
if (writeback_if[i].data.sop) begin
|
||||
cache_tmask_n[writeback_if[i].data.wis] = writeback_if[i].data.tmask;
|
||||
end else begin
|
||||
cache_tmask_n[writeback_if[i].data.wis] |= writeback_if[i].data.tmask;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
gpr_rd_rid <= '0;
|
||||
gpr_rd_wis <= '0;
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
cache_reg <= '0;
|
||||
data_ready <= 0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
rs2 <= rs2_n;
|
||||
rs3 <= rs3_n;
|
||||
rs2_ready <= rs2_ready_n;
|
||||
rs3_ready <= rs3_ready_n;
|
||||
rs1_data <= rs1_data_n;
|
||||
rs2_data <= rs2_data_n;
|
||||
rs3_data <= rs3_data_n;
|
||||
gpr_rd_rid <= gpr_rd_rid_n;
|
||||
gpr_rd_wis <= gpr_rd_wis_n;
|
||||
cache_data <= cache_data_n;
|
||||
cache_reg <= cache_reg_n;
|
||||
cache_tmask <= cache_tmask_n;
|
||||
cache_eop <= cache_eop_n;
|
||||
data_ready <= data_ready_n;
|
||||
end
|
||||
end
|
||||
|
||||
// GPR banks
|
||||
|
||||
`ifdef GPR_RESET
|
||||
reg wr_enabled = 0;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_enabled <= 1;
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire wr_enabled = 1;
|
||||
`endif
|
||||
|
||||
for (genvar j = 0; j < `NUM_THREADS; ++j) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (`XLEN),
|
||||
.SIZE (`NUM_REGS * ISSUE_RATIO),
|
||||
`ifdef GPR_RESET
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
`UNUSED_PIN (wren),
|
||||
.write (wr_enabled && writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||
.waddr (wis_to_addr(writeback_if[i].data.rd, writeback_if[i].data.wis)),
|
||||
.wdata (writeback_if[i].data.data[j]),
|
||||
.raddr (wis_to_addr(gpr_rd_rid, gpr_rd_wis)),
|
||||
.rdata (gpr_rd_data[j])
|
||||
);
|
||||
end
|
||||
|
||||
// staging buffer
|
||||
|
||||
`RESET_RELAY (stg_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) stg_buf (
|
||||
.clk (clk),
|
||||
.reset (stg_buf_reset),
|
||||
.valid_in (scoreboard_if[i].valid),
|
||||
.ready_in (scoreboard_if[i].ready),
|
||||
.data_in ({
|
||||
scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.wis,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd}),
|
||||
.data_out ({
|
||||
staging_if.data.uuid,
|
||||
staging_if.data.wis,
|
||||
staging_if.data.tmask,
|
||||
staging_if.data.PC,
|
||||
staging_if.data.wb,
|
||||
staging_if.data.ex_type,
|
||||
staging_if.data.op_type,
|
||||
staging_if.data.op_mod,
|
||||
staging_if.data.use_PC,
|
||||
staging_if.data.use_imm,
|
||||
staging_if.data.imm,
|
||||
staging_if.data.rd}),
|
||||
.valid_out (staging_if.valid),
|
||||
.ready_out (staging_if.ready)
|
||||
);
|
||||
|
||||
assign staging_if.data.rs1_data = rs1_data;
|
||||
assign staging_if.data.rs2_data = rs2_data;
|
||||
assign staging_if.data.rs3_data = rs3_data;
|
||||
|
||||
// output buffer
|
||||
|
||||
wire valid_stg, ready_stg;
|
||||
assign valid_stg = staging_if.valid && data_ready;
|
||||
assign staging_if.ready = ready_stg && data_ready;
|
||||
|
||||
`RESET_RELAY (out_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW + (3 * `NUM_THREADS * `XLEN)),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.valid_in (valid_stg),
|
||||
.ready_in (ready_stg),
|
||||
.data_in (staging_if.data),
|
||||
.data_out (operands_if[i].data),
|
||||
.valid_out (operands_if[i].valid),
|
||||
.ready_out (operands_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
endmodule
|
||||
79
hw/rtl/core/VX_pending_instr.sv
Normal file
79
hw/rtl/core/VX_pending_instr.sv
Normal file
@@ -0,0 +1,79 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_pending_instr #(
|
||||
parameter CTR_WIDTH = 12,
|
||||
parameter ALM_EMPTY = 1,
|
||||
parameter DECR_COUNT = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire incr,
|
||||
input wire [`NW_WIDTH-1:0] incr_wid,
|
||||
input wire [DECR_COUNT-1:0] decr,
|
||||
input wire [DECR_COUNT-1:0][`NW_WIDTH-1:0] decr_wid,
|
||||
input wire [`NW_WIDTH-1:0] alm_empty_wid,
|
||||
output wire empty,
|
||||
output wire alm_empty
|
||||
);
|
||||
localparam COUNTW = `CLOG2(DECR_COUNT+1);
|
||||
|
||||
reg [`NUM_WARPS-1:0][CTR_WIDTH-1:0] pending_instrs;
|
||||
reg [`NUM_WARPS-1:0][COUNTW-1:0] decr_cnt;
|
||||
reg [`NUM_WARPS-1:0][DECR_COUNT-1:0] decr_mask;
|
||||
reg [`NUM_WARPS-1:0] incr_cnt, incr_cnt_n;
|
||||
reg [`NUM_WARPS-1:0] alm_empty_r, empty_r;
|
||||
|
||||
always @(*) begin
|
||||
incr_cnt_n = 0;
|
||||
decr_mask = 0;
|
||||
if (incr) begin
|
||||
incr_cnt_n[incr_wid] = 1;
|
||||
end
|
||||
for (integer i = 0; i < DECR_COUNT; ++i) begin
|
||||
if (decr[i]) begin
|
||||
decr_mask[decr_wid[i]][i] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
wire [COUNTW-1:0] decr_cnt_n;
|
||||
`POP_COUNT(decr_cnt_n, decr_mask[i]);
|
||||
|
||||
wire [CTR_WIDTH-1:0] pending_instrs_n = pending_instrs[i] + CTR_WIDTH'(incr_cnt[i]) - CTR_WIDTH'(decr_cnt[i]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
incr_cnt[i] <= '0;
|
||||
decr_cnt[i] <= '0;
|
||||
pending_instrs[i] <= '0;
|
||||
alm_empty_r[i] <= 0;
|
||||
empty_r[i] <= 1;
|
||||
end else begin
|
||||
incr_cnt[i] <= incr_cnt_n[i];
|
||||
decr_cnt[i] <= decr_cnt_n;
|
||||
pending_instrs[i] <= pending_instrs_n;
|
||||
alm_empty_r[i] <= (pending_instrs_n == ALM_EMPTY);
|
||||
empty_r[i] <= (pending_instrs_n == 0);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign alm_empty = alm_empty_r[alm_empty_wid];
|
||||
assign empty = (& empty_r);
|
||||
|
||||
endmodule
|
||||
379
hw/rtl/core/VX_schedule.sv
Normal file
379
hw/rtl/core/VX_schedule.sv
Normal file
@@ -0,0 +1,379 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_schedule import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// configuration
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
// inputsdecode_if
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
VX_branch_ctl_if.slave branch_ctl_if [`NUM_ALU_BLOCKS],
|
||||
VX_decode_sched_if.slave decode_sched_if,
|
||||
VX_commit_sched_if.slave commit_sched_if,
|
||||
|
||||
// outputs
|
||||
VX_schedule_if.master schedule_if,
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if.master gbar_bus_if,
|
||||
`endif
|
||||
VX_sched_csr_if.master sched_csr_if,
|
||||
|
||||
// status
|
||||
output wire busy
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled
|
||||
reg [`NUM_WARPS-1:0] stalled_warps, stalled_warps_n; // set when branch/gpgpu instructions are issued
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks, thread_masks_n;
|
||||
reg [`NUM_WARPS-1:0][`XLEN-1:0] warp_pcs, warp_pcs_n;
|
||||
|
||||
wire [`NW_WIDTH-1:0] schedule_wid;
|
||||
wire [`NUM_THREADS-1:0] schedule_tmask;
|
||||
wire [`XLEN-1:0] schedule_pc;
|
||||
wire schedule_valid;
|
||||
wire schedule_ready;
|
||||
|
||||
// split/join
|
||||
wire join_valid;
|
||||
wire join_is_dvg;
|
||||
wire join_is_else;
|
||||
wire [`NW_WIDTH-1:0] join_wid;
|
||||
wire [`NUM_THREADS-1:0] join_tmask;
|
||||
wire [`XLEN-1:0] join_pc;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] cycles;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`UUID_WIDTH-1:0] issued_instrs;
|
||||
|
||||
wire schedule_fire = schedule_valid && schedule_ready;
|
||||
wire schedule_if_fire = schedule_if.valid && schedule_if.ready;
|
||||
|
||||
// branch
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_valid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_taken;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`XLEN-1:0] branch_dest;
|
||||
for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin
|
||||
assign branch_valid[i] = branch_ctl_if[i].valid;
|
||||
assign branch_wid[i] = branch_ctl_if[i].wid;
|
||||
assign branch_taken[i] = branch_ctl_if[i].taken;
|
||||
assign branch_dest[i] = branch_ctl_if[i].dest;
|
||||
end
|
||||
|
||||
// barriers
|
||||
reg [`NUM_BARRIERS-1:0][`NUM_WARPS-1:0] barrier_masks, barrier_masks_n;
|
||||
reg [`NUM_WARPS-1:0] barrier_stalls, barrier_stalls_n;
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] active_barrier_count;
|
||||
wire [`NUM_WARPS-1:0] curr_barrier_mask;
|
||||
`ifdef GBAR_ENABLE
|
||||
reg [`NUM_WARPS-1:0] curr_barrier_mask_n;
|
||||
reg gbar_req_valid;
|
||||
reg [`NB_WIDTH-1:0] gbar_req_id;
|
||||
reg [`NC_WIDTH-1:0] gbar_req_size_m1;
|
||||
`endif
|
||||
|
||||
assign curr_barrier_mask = barrier_masks[warp_ctl_if.barrier.id];
|
||||
`POP_COUNT(active_barrier_count, curr_barrier_mask);
|
||||
`UNUSED_VAR (active_barrier_count)
|
||||
|
||||
always @(*) begin
|
||||
active_warps_n = active_warps;
|
||||
stalled_warps_n = stalled_warps;
|
||||
thread_masks_n = thread_masks;
|
||||
barrier_masks_n = barrier_masks;
|
||||
barrier_stalls_n= barrier_stalls;
|
||||
warp_pcs_n = warp_pcs;
|
||||
|
||||
// wspawn handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
active_warps_n |= warp_ctl_if.wspawn.wmask;
|
||||
for (integer i = 0; i < `NUM_WARPS; ++i) begin
|
||||
if (warp_ctl_if.wspawn.wmask[i]) begin
|
||||
thread_masks_n[i][0] = 1;
|
||||
warp_pcs_n[i] = warp_ctl_if.wspawn.pc;
|
||||
end
|
||||
end
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
// TMC handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
active_warps_n[warp_ctl_if.wid] = (warp_ctl_if.tmc.tmask != 0);
|
||||
thread_masks_n[warp_ctl_if.wid] = warp_ctl_if.tmc.tmask;
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
// split handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
|
||||
if (warp_ctl_if.split.is_dvg) begin
|
||||
thread_masks_n[warp_ctl_if.wid] = warp_ctl_if.split.then_tmask;
|
||||
end
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
// join handling
|
||||
if (join_valid) begin
|
||||
if (join_is_dvg) begin
|
||||
if (join_is_else) begin
|
||||
warp_pcs_n[join_wid] = join_pc;
|
||||
end
|
||||
thread_masks_n[join_wid] = join_tmask;
|
||||
end
|
||||
stalled_warps_n[join_wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
// barrier handling
|
||||
`ifdef GBAR_ENABLE
|
||||
curr_barrier_mask_n = curr_barrier_mask;
|
||||
curr_barrier_mask_n[warp_ctl_if.wid] = 1;
|
||||
`endif
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
|
||||
if (~warp_ctl_if.barrier.is_global
|
||||
&& (active_barrier_count[`NW_WIDTH-1:0] == warp_ctl_if.barrier.size_m1[`NW_WIDTH-1:0])) begin
|
||||
barrier_masks_n[warp_ctl_if.barrier.id] = '0;
|
||||
barrier_stalls_n &= ~barrier_masks[warp_ctl_if.barrier.id];
|
||||
end else begin
|
||||
barrier_masks_n[warp_ctl_if.barrier.id][warp_ctl_if.wid] = 1;
|
||||
barrier_stalls_n[warp_ctl_if.wid] = 1;
|
||||
end
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
`ifdef GBAR_ENABLE
|
||||
if (gbar_bus_if.rsp_valid && (gbar_req_id == gbar_bus_if.rsp_id)) begin
|
||||
barrier_masks_n[gbar_bus_if.rsp_id] = '0;
|
||||
barrier_stalls_n = '0; // unlock all warps
|
||||
end
|
||||
`endif
|
||||
|
||||
// Branch handling
|
||||
for (integer i = 0; i < `NUM_ALU_BLOCKS; ++i) begin
|
||||
if (branch_valid[i]) begin
|
||||
if (branch_taken[i]) begin
|
||||
warp_pcs_n[branch_wid[i]] = branch_dest[i];
|
||||
end
|
||||
stalled_warps_n[branch_wid[i]] = 0; // unlock warp
|
||||
end
|
||||
end
|
||||
|
||||
// decode unlock
|
||||
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
|
||||
stalled_warps_n[decode_sched_if.wid] = 0;
|
||||
end
|
||||
|
||||
// CSR unlock
|
||||
if (sched_csr_if.unlock_warp) begin
|
||||
stalled_warps_n[sched_csr_if.unlock_wid] = 0;
|
||||
end
|
||||
|
||||
// stall the warp until decode stage
|
||||
if (schedule_fire) begin
|
||||
stalled_warps_n[schedule_wid] = 1;
|
||||
end
|
||||
|
||||
// advance PC
|
||||
if (schedule_if_fire) begin
|
||||
warp_pcs_n[schedule_if.data.wid] = schedule_if.data.PC + 4;
|
||||
end
|
||||
end
|
||||
|
||||
`UNUSED_VAR (base_dcrs)
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
barrier_masks <= '0;
|
||||
`ifdef GBAR_ENABLE
|
||||
gbar_req_valid <= 0;
|
||||
`endif
|
||||
stalled_warps <= '0;
|
||||
warp_pcs <= '0;
|
||||
active_warps <= '0;
|
||||
thread_masks <= '0;
|
||||
barrier_stalls <= '0;
|
||||
issued_instrs <= '0;
|
||||
cycles <= '0;
|
||||
|
||||
// activate first warp
|
||||
warp_pcs[0] <= base_dcrs.startup_addr;
|
||||
active_warps[0] <= 1;
|
||||
thread_masks[0][0] <= 1;
|
||||
end else begin
|
||||
active_warps <= active_warps_n;
|
||||
stalled_warps <= stalled_warps_n;
|
||||
thread_masks <= thread_masks_n;
|
||||
warp_pcs <= warp_pcs_n;
|
||||
barrier_masks <= barrier_masks_n;
|
||||
barrier_stalls <= barrier_stalls_n;
|
||||
|
||||
// global barrier scheduling
|
||||
`ifdef GBAR_ENABLE
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid
|
||||
&& warp_ctl_if.barrier.is_global
|
||||
&& (curr_barrier_mask_n == active_warps)) begin
|
||||
gbar_req_valid <= 1;
|
||||
gbar_req_id <= warp_ctl_if.barrier.id;
|
||||
gbar_req_size_m1 <= warp_ctl_if.barrier.size_m1[`NC_WIDTH-1:0];
|
||||
end
|
||||
if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin
|
||||
gbar_req_valid <= 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
if (schedule_if_fire) begin
|
||||
issued_instrs[schedule_if.data.wid] <= issued_instrs[schedule_if.data.wid] + `UUID_WIDTH'(1);
|
||||
end
|
||||
|
||||
if (busy) begin
|
||||
cycles <= cycles + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// barrier handling
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
assign gbar_bus_if.req_valid = gbar_req_valid;
|
||||
assign gbar_bus_if.req_id = gbar_req_id;
|
||||
assign gbar_bus_if.req_size_m1 = gbar_req_size_m1;
|
||||
assign gbar_bus_if.req_core_id = `NC_WIDTH'(CORE_ID % `NUM_CORES);
|
||||
`endif
|
||||
|
||||
// split/join handling
|
||||
|
||||
`RESET_RELAY (split_join_reset, reset);
|
||||
|
||||
VX_split_join #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) split_join (
|
||||
.clk (clk),
|
||||
.reset (split_join_reset),
|
||||
.valid (warp_ctl_if.valid),
|
||||
.wid (warp_ctl_if.wid),
|
||||
.split (warp_ctl_if.split),
|
||||
.sjoin (warp_ctl_if.sjoin),
|
||||
.join_valid (join_valid),
|
||||
.join_is_dvg (join_is_dvg),
|
||||
.join_is_else (join_is_else),
|
||||
.join_wid (join_wid),
|
||||
.join_tmask (join_tmask),
|
||||
.join_pc (join_pc)
|
||||
);
|
||||
|
||||
// schedule the next ready warp
|
||||
|
||||
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls);
|
||||
|
||||
VX_lzc #(
|
||||
.N (`NUM_WARPS),
|
||||
.REVERSE (1)
|
||||
) wid_select (
|
||||
.data_in (ready_warps),
|
||||
.data_out (schedule_wid),
|
||||
.valid_out (schedule_valid)
|
||||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + `XLEN)-1:0] schedule_data;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign schedule_data[i] = {thread_masks[i], warp_pcs[i]};
|
||||
end
|
||||
|
||||
assign {schedule_tmask, schedule_pc} = {
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `XLEN)-1:(`NUM_THREADS + `XLEN)-4],
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `XLEN)-5:0]
|
||||
};
|
||||
|
||||
`ifndef NDEBUG
|
||||
localparam GNW_WIDTH = `LOG2UP(`NUM_CLUSTERS * `NUM_CORES * `NUM_WARPS);
|
||||
reg [`UUID_WIDTH-1:0] instr_uuid;
|
||||
wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(schedule_wid);
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(1, 0, 0));
|
||||
end else if (schedule_fire) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(0, 32'(g_wid), 64'(schedule_pc)));
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire [`UUID_WIDTH-1:0] instr_uuid = '0;
|
||||
`endif
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`NUM_THREADS + `XLEN + `NW_WIDTH)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (schedule_valid),
|
||||
.ready_in (schedule_ready),
|
||||
.data_in ({schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid}),
|
||||
.valid_out (schedule_if.valid),
|
||||
.ready_out (schedule_if.ready)
|
||||
);
|
||||
|
||||
assign schedule_if.data.uuid = instr_uuid;
|
||||
|
||||
`RESET_RELAY (pending_instr_reset, reset);
|
||||
|
||||
wire no_pending_instr;
|
||||
VX_pending_instr #(
|
||||
.CTR_WIDTH (12),
|
||||
.DECR_COUNT (`ISSUE_WIDTH),
|
||||
.ALM_EMPTY (1)
|
||||
) pending_instr(
|
||||
.clk (clk),
|
||||
.reset (pending_instr_reset),
|
||||
.incr (schedule_if_fire),
|
||||
.incr_wid (schedule_if.data.wid),
|
||||
.decr (commit_sched_if.committed),
|
||||
.decr_wid (commit_sched_if.committed_wid),
|
||||
.alm_empty_wid (sched_csr_if.alm_empty_wid),
|
||||
.alm_empty (sched_csr_if.alm_empty),
|
||||
.empty (no_pending_instr)
|
||||
);
|
||||
|
||||
`BUFFER_BUSY (busy, (active_warps != 0 || ~no_pending_instr), 1);
|
||||
|
||||
// export CSRs
|
||||
assign sched_csr_if.cycles = cycles;
|
||||
assign sched_csr_if.active_warps = active_warps;
|
||||
assign sched_csr_if.thread_masks = thread_masks;
|
||||
|
||||
// timeout handling
|
||||
reg [31:0] timeout_ctr;
|
||||
reg timeout_enable;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
timeout_ctr <= '0;
|
||||
timeout_enable <= 0;
|
||||
end else begin
|
||||
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
|
||||
timeout_enable <= 1;
|
||||
end
|
||||
if (timeout_enable && active_warps !=0 && active_warps == stalled_warps) begin
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (active_warps == 0 || active_warps != stalled_warps) begin
|
||||
timeout_ctr <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps));
|
||||
|
||||
endmodule
|
||||
139
hw/rtl/core/VX_scoreboard.sv
Normal file
139
hw/rtl/core/VX_scoreboard.sv
Normal file
@@ -0,0 +1,139 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave ibuffer_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.master scoreboard_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4) + 1;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
reg [3:0] ready_masks, ready_masks_n;
|
||||
VX_ibuffer_if staging_if();
|
||||
|
||||
wire writeback_fire = writeback_if[i].valid && writeback_if[i].data.eop;
|
||||
|
||||
always @(*) begin
|
||||
inuse_regs_n = inuse_regs;
|
||||
ready_masks_n = ready_masks;
|
||||
if (writeback_fire) begin
|
||||
inuse_regs_n[writeback_if[i].data.wis][writeback_if[i].data.rd] = 0;
|
||||
ready_masks_n |= {4{(ISSUE_RATIO == 0) || writeback_if[i].data.wis == staging_if.data.wis}}
|
||||
& {(writeback_if[i].data.rd == staging_if.data.rd),
|
||||
(writeback_if[i].data.rd == staging_if.data.rs1),
|
||||
(writeback_if[i].data.rd == staging_if.data.rs2),
|
||||
(writeback_if[i].data.rd == staging_if.data.rs3)};
|
||||
end
|
||||
if (staging_if.valid && staging_if.ready && staging_if.data.wb) begin
|
||||
inuse_regs_n[staging_if.data.wis][staging_if.data.rd] = 1;
|
||||
ready_masks_n = '0;
|
||||
end
|
||||
if (ibuffer_if[i].valid && ibuffer_if[i].ready) begin
|
||||
ready_masks_n = ~{inuse_regs_n[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd],
|
||||
inuse_regs_n[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1],
|
||||
inuse_regs_n[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2],
|
||||
inuse_regs_n[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]};
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
inuse_regs <= '0;
|
||||
ready_masks <= '0;
|
||||
end else begin
|
||||
inuse_regs <= inuse_regs_n;
|
||||
ready_masks <= ready_masks_n;
|
||||
end
|
||||
end
|
||||
|
||||
// staging buffer
|
||||
|
||||
`RESET_RELAY (stg_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) stg_buf (
|
||||
.clk (clk),
|
||||
.reset (stg_buf_reset),
|
||||
.valid_in (ibuffer_if[i].valid),
|
||||
.ready_in (ibuffer_if[i].ready),
|
||||
.data_in (ibuffer_if[i].data),
|
||||
.data_out (staging_if.data),
|
||||
.valid_out (staging_if.valid),
|
||||
.ready_out (staging_if.ready)
|
||||
);
|
||||
|
||||
// output buffer
|
||||
|
||||
wire valid_stg, ready_stg;
|
||||
wire regs_ready = (& ready_masks);
|
||||
assign valid_stg = staging_if.valid && regs_ready;
|
||||
assign staging_if.ready = ready_stg && regs_ready;
|
||||
|
||||
`RESET_RELAY (out_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.valid_in (valid_stg),
|
||||
.ready_in (ready_stg),
|
||||
.data_in (staging_if.data),
|
||||
.data_out (scoreboard_if[i].data),
|
||||
.valid_out (scoreboard_if[i].valid),
|
||||
.ready_out (scoreboard_if[i].ready)
|
||||
);
|
||||
|
||||
reg [31:0] timeout_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
timeout_ctr <= '0;
|
||||
end else begin
|
||||
if (staging_if.valid && ~regs_ready) begin
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, CORE_ID, wis_to_wid(staging_if.data.wis, i), staging_if.data.PC, staging_if.data.tmask, timeout_ctr,
|
||||
~ready_masks, staging_if.data.uuid));
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (staging_if.valid && staging_if.ready) begin
|
||||
timeout_ctr <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, CORE_ID, wis_to_wid(staging_if.data.wis, i), staging_if.data.PC, staging_if.data.tmask, timeout_ctr,
|
||||
~ready_masks, staging_if.data.uuid));
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, wis_to_wid(writeback_if[i].data.wis, i), writeback_if[i].data.PC, writeback_if[i].data.tmask, writeback_if[i].data.rd, writeback_if[i].data.uuid));
|
||||
end
|
||||
|
||||
endmodule
|
||||
209
hw/rtl/core/VX_sfu_unit.sv
Normal file
209
hw/rtl/core/VX_sfu_unit.sv
Normal file
@@ -0,0 +1,209 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_sfu_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
`endif
|
||||
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if [`NUM_FPU_BLOCKS],
|
||||
`endif
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
VX_warp_ctl_if.master warp_ctl_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + 1;
|
||||
localparam RSP_ARB_IDX_WCTL = 0;
|
||||
localparam RSP_ARB_IDX_CSR = 1;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) execute_if[BLOCK_SIZE]();
|
||||
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (1)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.execute_if (execute_if)
|
||||
);
|
||||
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in;
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in;
|
||||
wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_sfu_perf_if sfu_perf_if();
|
||||
`endif
|
||||
|
||||
// Warp control block
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_execute_if();
|
||||
VX_commit_if#(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_commit_if();
|
||||
|
||||
assign wctl_execute_if.valid = execute_if[0].valid && `INST_SFU_IS_WCTL(execute_if[0].data.op_type);
|
||||
assign wctl_execute_if.data = execute_if[0].data;
|
||||
|
||||
`RESET_RELAY (wctl_reset, reset);
|
||||
|
||||
VX_wctl_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_unit (
|
||||
.clk (clk),
|
||||
.reset (wctl_reset),
|
||||
.execute_if (wctl_execute_if),
|
||||
.warp_ctl_if(warp_ctl_if),
|
||||
.commit_if (wctl_commit_if)
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.data;
|
||||
assign wctl_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_WCTL];
|
||||
|
||||
// CSR unit
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_execute_if();
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_commit_if();
|
||||
|
||||
assign csr_execute_if.valid = execute_if[0].valid && `INST_SFU_IS_CSR(execute_if[0].data.op_type);
|
||||
assign csr_execute_if.data = execute_if[0].data;
|
||||
|
||||
`RESET_RELAY (csr_reset, reset);
|
||||
|
||||
VX_csr_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_unit (
|
||||
.clk (clk),
|
||||
.reset (csr_reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
.execute_if (csr_execute_if),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if(pipeline_perf_if),
|
||||
.sfu_perf_if (sfu_perf_if),
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`endif
|
||||
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_if (csr_commit_if)
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_CSR] = csr_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_CSR] = csr_commit_if.data;
|
||||
assign csr_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_CSR];
|
||||
|
||||
// can accept new request?
|
||||
|
||||
reg sfu_req_ready;
|
||||
always @(*) begin
|
||||
case (execute_if[0].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_req_ready = csr_execute_if.ready;
|
||||
default: sfu_req_ready = wctl_execute_if.ready;
|
||||
endcase
|
||||
end
|
||||
assign execute_if[0].ready = sfu_req_ready;
|
||||
|
||||
// response arbitration
|
||||
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) arb_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.valid_in (rsp_arb_valid_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
.data_out (arb_commit_if[0].data),
|
||||
.valid_out (arb_commit_if[0].valid),
|
||||
.ready_out (arb_commit_if[0].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (3)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.commit_in_if (arb_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_wctl_stalls;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_wctl_stalls <= '0;
|
||||
end else begin
|
||||
perf_wctl_stalls <= perf_wctl_stalls + `PERF_CTR_BITS'(wctl_execute_if.valid && ~wctl_execute_if.ready);
|
||||
end
|
||||
end
|
||||
assign sfu_perf_if.wctl_stalls = perf_wctl_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
185
hw/rtl/core/VX_smem_unit.sv
Normal file
185
hw/rtl/core/VX_smem_unit.sv
Normal file
@@ -0,0 +1,185 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_smem_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_in_if,
|
||||
VX_mem_perf_if.master mem_perf_out_if,
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if.slave dcache_bus_in_if [DCACHE_NUM_REQS],
|
||||
VX_mem_bus_if.master dcache_bus_out_if [DCACHE_NUM_REQS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
`ifdef SM_ENABLE
|
||||
localparam SMEM_ADDR_WIDTH = `SMEM_LOG_SIZE - `CLOG2(DCACHE_WORD_SIZE);
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
) switch_out_bus_if[2 * DCACHE_NUM_REQS]();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_cache_perf_if perf_smem_if();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (switch_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
VX_smem_switch #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (0),
|
||||
.ARBITER ("P"),
|
||||
.OUT_REG_REQ (2),
|
||||
.OUT_REG_RSP (2)
|
||||
) smem_switch (
|
||||
.clk (clk),
|
||||
.reset (switch_reset),
|
||||
.bus_in_if (dcache_bus_in_if[i]),
|
||||
.bus_out_if (switch_out_bus_if[i * 2 +: 2])
|
||||
);
|
||||
end
|
||||
|
||||
// this bus goes to the dcache
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_out_if[i], switch_out_bus_if[i * 2]);
|
||||
end
|
||||
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_req_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_req_rw;
|
||||
wire [DCACHE_NUM_REQS-1:0][SMEM_ADDR_WIDTH-1:0] smem_req_addr;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] smem_req_byteen;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] smem_req_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] smem_req_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_req_ready;
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_rsp_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] smem_rsp_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] smem_rsp_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] smem_rsp_ready;
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
|
||||
assign smem_req_valid[i] = switch_out_bus_if[i * 2 + 1].req_valid;
|
||||
assign smem_req_rw[i] = switch_out_bus_if[i * 2 + 1].req_data.rw;
|
||||
assign smem_req_byteen[i] = switch_out_bus_if[i * 2 + 1].req_data.byteen;
|
||||
assign smem_req_data[i] = switch_out_bus_if[i * 2 + 1].req_data.data;
|
||||
assign smem_req_tag[i] = switch_out_bus_if[i * 2 + 1].req_data.tag;
|
||||
assign switch_out_bus_if[i * 2 + 1].req_ready = smem_req_ready[i];
|
||||
|
||||
assign switch_out_bus_if[i * 2 + 1].rsp_valid = smem_rsp_valid[i];
|
||||
assign switch_out_bus_if[i * 2 + 1].rsp_data.data = smem_rsp_data[i];
|
||||
assign switch_out_bus_if[i * 2 + 1].rsp_data.tag = smem_rsp_tag[i];
|
||||
assign smem_rsp_ready[i] = switch_out_bus_if[i * 2 + 1].rsp_ready;
|
||||
|
||||
assign smem_req_addr[i] = switch_out_bus_if[i * 2 + 1].req_data.addr[SMEM_ADDR_WIDTH-1:0];
|
||||
end
|
||||
|
||||
`RESET_RELAY (smem_reset, reset);
|
||||
|
||||
VX_shared_mem #(
|
||||
.INSTANCE_ID($sformatf("core%0d-smem", CORE_ID)),
|
||||
.SIZE (1 << `SMEM_LOG_SIZE),
|
||||
.NUM_REQS (DCACHE_NUM_REQS),
|
||||
.NUM_BANKS (`SMEM_NUM_BANKS),
|
||||
.WORD_SIZE (DCACHE_WORD_SIZE),
|
||||
.ADDR_WIDTH (SMEM_ADDR_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
) shared_mem (
|
||||
.clk (clk),
|
||||
.reset (smem_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf_if(perf_smem_if),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.req_valid (smem_req_valid),
|
||||
.req_rw (smem_req_rw),
|
||||
.req_byteen (smem_req_byteen),
|
||||
.req_addr (smem_req_addr),
|
||||
.req_data (smem_req_data),
|
||||
.req_tag (smem_req_tag),
|
||||
.req_ready (smem_req_ready),
|
||||
|
||||
// Core response
|
||||
.rsp_valid (smem_rsp_valid),
|
||||
.rsp_data (smem_rsp_data),
|
||||
.rsp_tag (smem_rsp_tag),
|
||||
.rsp_ready (smem_rsp_ready)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_out_if[i], dcache_bus_in_if[i]);
|
||||
end
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
assign mem_perf_out_if.icache_reads = mem_perf_in_if.icache_reads;
|
||||
assign mem_perf_out_if.icache_read_misses = mem_perf_in_if.icache_read_misses;
|
||||
|
||||
assign mem_perf_out_if.dcache_reads = mem_perf_in_if.dcache_reads;
|
||||
assign mem_perf_out_if.dcache_writes = mem_perf_in_if.dcache_writes;
|
||||
assign mem_perf_out_if.dcache_read_misses = mem_perf_in_if.dcache_read_misses;
|
||||
assign mem_perf_out_if.dcache_write_misses = mem_perf_in_if.dcache_write_misses;
|
||||
assign mem_perf_out_if.dcache_bank_stalls = mem_perf_in_if.dcache_bank_stalls;
|
||||
assign mem_perf_out_if.dcache_mshr_stalls = mem_perf_in_if.dcache_mshr_stalls;
|
||||
|
||||
assign mem_perf_out_if.l2cache_reads = mem_perf_in_if.l2cache_reads;
|
||||
assign mem_perf_out_if.l2cache_writes = mem_perf_in_if.l2cache_writes;
|
||||
assign mem_perf_out_if.l2cache_read_misses = mem_perf_in_if.l2cache_read_misses;
|
||||
assign mem_perf_out_if.l2cache_write_misses = mem_perf_in_if.l2cache_write_misses;
|
||||
assign mem_perf_out_if.l2cache_bank_stalls = mem_perf_in_if.l2cache_bank_stalls;
|
||||
assign mem_perf_out_if.l2cache_mshr_stalls = mem_perf_in_if.l2cache_mshr_stalls;
|
||||
|
||||
assign mem_perf_out_if.l3cache_reads = mem_perf_in_if.l3cache_reads;
|
||||
assign mem_perf_out_if.l3cache_writes = mem_perf_in_if.l3cache_writes;
|
||||
assign mem_perf_out_if.l3cache_read_misses = mem_perf_in_if.l3cache_read_misses;
|
||||
assign mem_perf_out_if.l3cache_write_misses = mem_perf_in_if.l3cache_write_misses;
|
||||
assign mem_perf_out_if.l3cache_bank_stalls = mem_perf_in_if.l3cache_bank_stalls;
|
||||
assign mem_perf_out_if.l3cache_mshr_stalls = mem_perf_in_if.l3cache_mshr_stalls;
|
||||
|
||||
assign mem_perf_out_if.mem_reads = mem_perf_in_if.mem_reads;
|
||||
assign mem_perf_out_if.mem_writes = mem_perf_in_if.mem_writes;
|
||||
assign mem_perf_out_if.mem_latency = mem_perf_in_if.mem_latency;
|
||||
|
||||
`ifdef SM_ENABLE
|
||||
assign mem_perf_out_if.smem_reads = perf_smem_if.reads;
|
||||
assign mem_perf_out_if.smem_writes = perf_smem_if.writes;
|
||||
assign mem_perf_out_if.smem_bank_stalls = perf_smem_if.bank_stalls;
|
||||
`else
|
||||
assign mem_perf_out_if.smem_reads = '0;
|
||||
assign mem_perf_out_if.smem_writes = '0;
|
||||
assign mem_perf_out_if.smem_bank_stalls = '0;
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
76
hw/rtl/core/VX_split_join.sv
Normal file
76
hw/rtl/core/VX_split_join.sv
Normal file
@@ -0,0 +1,76 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_split_join import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid,
|
||||
input wire [`NW_WIDTH-1:0] wid,
|
||||
input split_t split,
|
||||
input join_t sjoin,
|
||||
output wire join_valid,
|
||||
output wire join_is_dvg,
|
||||
output wire join_is_else,
|
||||
output wire [`NW_WIDTH-1:0] join_wid,
|
||||
output wire [`NUM_THREADS-1:0] join_tmask,
|
||||
output wire [`XLEN-1:0] join_pc
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire ipdom_set [`NUM_WARPS-1:0];
|
||||
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_q0 = {split.then_tmask | split.else_tmask, `XLEN'(0)};
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_q1 = {split.else_tmask, split.next_pc};
|
||||
|
||||
wire ipdom_push = valid && split.valid && split.is_dvg;
|
||||
wire ipdom_pop = valid && sjoin.valid && sjoin.is_dvg;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
`RESET_RELAY (ipdom_reset, reset);
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (`XLEN+`NUM_THREADS),
|
||||
.DEPTH (`UP(`NUM_THREADS-1))
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (ipdom_reset),
|
||||
.push (ipdom_push && (i == wid)),
|
||||
.pop (ipdom_pop && (i == wid)),
|
||||
.q0 (ipdom_q0),
|
||||
.q1 (ipdom_q1),
|
||||
.d (ipdom_data[i]),
|
||||
.d_set (ipdom_set[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full)
|
||||
);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + `NW_WIDTH + 1 + `XLEN + `NUM_THREADS),
|
||||
.DEPTH (1),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({valid && sjoin.valid, sjoin.is_dvg, ipdom_set[wid], wid, ipdom_data[wid]}),
|
||||
.data_out ({join_valid, join_is_dvg, join_is_else, join_wid, join_tmask, join_pc})
|
||||
);
|
||||
|
||||
endmodule
|
||||
378
hw/rtl/core/VX_trace.vh
Normal file
378
hw/rtl/core/VX_trace.vh
Normal file
@@ -0,0 +1,378 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_TRACE_VH
|
||||
`define VX_TRACE_VH
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"));
|
||||
`EX_LSU: `TRACE(level, ("LSU"));
|
||||
`EX_FPU: `TRACE(level, ("FPU"));
|
||||
`EX_SFU: `TRACE(level, ("SFU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input [`INST_MOD_BITS-1:0] op_mod,
|
||||
`UNUSED_ARG(input [`NR_BITS-1:0] rd),
|
||||
`UNUSED_ARG(input [`NR_BITS-1:0] rs2),
|
||||
input use_imm,
|
||||
`UNUSED_ARG(input [`XLEN-1:0] imm)
|
||||
);
|
||||
|
||||
`ifdef FLEN_64
|
||||
logic fdst_d = imm[0];
|
||||
`else
|
||||
logic fdst_d = 0;
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
logic fcvt_l = imm[1];
|
||||
`else
|
||||
logic fcvt_l = 0;
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
logic rd_float = 1'(rd >> 5) || 1'(rs2 >> 5);
|
||||
`else
|
||||
logic rd_float = 0;
|
||||
`endif
|
||||
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
if (`INST_ALU_IS_BR(op_mod)) begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"));
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"));
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"));
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"));
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"));
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"));
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"));
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
|
||||
`INST_BR_URET: `TRACE(level, ("URET"));
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"));
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else if (`INST_ALU_IS_M(op_mod)) begin
|
||||
if (`INST_ALU_IS_W(op_mod)) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
|
||||
`INST_M_REM: `TRACE(level, ("REMW"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"));
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"));
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"));
|
||||
`INST_M_REM: `TRACE(level, ("REM"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (`INST_ALU_IS_W(op_mod)) begin
|
||||
if (use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"));
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"));
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"));
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"));
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (rd_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"));
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"));
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"));
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"));
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"));
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"));
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"));
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"));
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"));
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"));
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (fdst_d) begin
|
||||
case (op_mod[1:0])
|
||||
0: `TRACE(level, ("FLE.D"));
|
||||
1: `TRACE(level, ("FLT.D"));
|
||||
2: `TRACE(level, ("FEQ.D"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_mod[1:0])
|
||||
0: `TRACE(level, ("FLE.S"));
|
||||
1: `TRACE(level, ("FLT.S"));
|
||||
2: `TRACE(level, ("FEQ.S"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (fdst_d) begin
|
||||
`TRACE(level, ("FCVT.D.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"));
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.L.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.L.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.LU.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.LU.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.D.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.S.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.D.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.S.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (fdst_d) begin
|
||||
case (op_mod)
|
||||
0: `TRACE(level, ("FSGNJ.D"));
|
||||
1: `TRACE(level, ("FSGNJN.D"));
|
||||
2: `TRACE(level, ("FSGNJX.D"));
|
||||
3: `TRACE(level, ("FCLASS.D"));
|
||||
4: `TRACE(level, ("FMV.X.D"));
|
||||
5: `TRACE(level, ("FMV.D.X"));
|
||||
6: `TRACE(level, ("FMIN.D"));
|
||||
7: `TRACE(level, ("FMAX.D"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_mod)
|
||||
0: `TRACE(level, ("FSGNJ.S"));
|
||||
1: `TRACE(level, ("FSGNJN.S"));
|
||||
2: `TRACE(level, ("FSGNJX.S"));
|
||||
3: `TRACE(level, ("FCLASS.S"));
|
||||
4: `TRACE(level, ("FMV.X.S"));
|
||||
5: `TRACE(level, ("FMV.S.X"));
|
||||
6: `TRACE(level, ("FMIN.S"));
|
||||
7: `TRACE(level, ("FMAX.S"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: `TRACE(level, ("SPLIT"));
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: `TRACE(level, ("PRED"));
|
||||
`INST_SFU_CSRRW: begin if (use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
`INST_SFU_TEX: `TRACE(level, ("TEX"));
|
||||
`INST_SFU_RASTER:`TRACE(level, ("RASTER"));
|
||||
`INST_SFU_ROP: `TRACE(level, ("ROP"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif // VX_TRACE_VH
|
||||
157
hw/rtl/core/VX_wctl_unit.sv
Normal file
157
hw/rtl/core/VX_wctl_unit.sv
Normal file
@@ -0,0 +1,157 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_wctl_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam LANE_WIDTH = `UP(LANE_BITS);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam WCTL_WIDTH = $bits(tmc_t) + $bits(wspawn_t) + $bits(split_t) + $bits(join_t) + $bits(barrier_t);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + WCTL_WIDTH + PID_WIDTH + 1 + 1;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
tmc_t tmc, tmc_r;
|
||||
wspawn_t wspawn, wspawn_r;
|
||||
split_t split, split_r;
|
||||
join_t sjoin, sjoin_r;
|
||||
barrier_t barrier, barrier_r;
|
||||
|
||||
wire is_wspawn = (execute_if.data.op_type == `INST_SFU_WSPAWN);
|
||||
wire is_tmc = (execute_if.data.op_type == `INST_SFU_TMC);
|
||||
wire is_pred = (execute_if.data.op_type == `INST_SFU_PRED);
|
||||
wire is_split = (execute_if.data.op_type == `INST_SFU_SPLIT);
|
||||
wire is_join = (execute_if.data.op_type == `INST_SFU_JOIN);
|
||||
wire is_bar = (execute_if.data.op_type == `INST_SFU_BAR);
|
||||
|
||||
wire [LANE_WIDTH-1:0] tid;
|
||||
if (LANE_BITS != 0) begin
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
end else begin
|
||||
assign tid = 0;
|
||||
end
|
||||
|
||||
wire [`XLEN-1:0] rs1_data = execute_if.data.rs1_data[tid];
|
||||
wire [`XLEN-1:0] rs2_data = execute_if.data.rs2_data[tid];
|
||||
`UNUSED_VAR (rs1_data)
|
||||
|
||||
wire [NUM_LANES-1:0] taken;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign taken[i] = execute_if.data.rs1_data[i][0];
|
||||
end
|
||||
|
||||
reg [`NUM_THREADS-1:0] then_tmask_r, then_tmask_n;
|
||||
reg [`NUM_THREADS-1:0] else_tmask_r, else_tmask_n;
|
||||
always @(*) begin
|
||||
then_tmask_n = then_tmask_r;
|
||||
else_tmask_n = else_tmask_r;
|
||||
if (execute_if.data.sop) begin
|
||||
then_tmask_n = '0;
|
||||
else_tmask_n = '0;
|
||||
end
|
||||
then_tmask_n[execute_if.data.pid * NUM_LANES +: NUM_LANES] = taken & execute_if.data.tmask;
|
||||
else_tmask_n[execute_if.data.pid * NUM_LANES +: NUM_LANES] = ~taken & execute_if.data.tmask;
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (execute_if.valid) begin
|
||||
then_tmask_r <= then_tmask_n;
|
||||
else_tmask_r <= else_tmask_n;
|
||||
end
|
||||
end
|
||||
wire has_then = (then_tmask_n != 0);
|
||||
wire has_else = (else_tmask_n != 0);
|
||||
|
||||
// tmc / pred
|
||||
|
||||
wire [`NUM_THREADS-1:0] pred_mask = has_then ? then_tmask_n : rs2_data[`NUM_THREADS-1:0];
|
||||
assign tmc.valid = (is_tmc || is_pred);
|
||||
assign tmc.tmask = is_pred ? pred_mask : rs1_data[`NUM_THREADS-1:0];
|
||||
|
||||
// split
|
||||
|
||||
assign split.valid = is_split;
|
||||
assign split.is_dvg = has_then && has_else;
|
||||
assign split.then_tmask = then_tmask_n;
|
||||
assign split.else_tmask = else_tmask_n;
|
||||
assign split.next_pc = execute_if.data.PC + 4;
|
||||
|
||||
// join
|
||||
|
||||
assign sjoin.valid = is_join;
|
||||
assign sjoin.is_dvg = rs1_data[0];
|
||||
|
||||
// barrier
|
||||
assign barrier.valid = is_bar;
|
||||
assign barrier.id = rs1_data[`NB_WIDTH-1:0];
|
||||
`ifdef GBAR_ENABLE
|
||||
assign barrier.is_global = rs1_data[31];
|
||||
`else
|
||||
assign barrier.is_global = 1'b0;
|
||||
`endif
|
||||
assign barrier.size_m1 = rs2_data[$bits(barrier.size_m1)-1:0] - $bits(barrier.size_m1)'(1);
|
||||
|
||||
// wspawn
|
||||
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign wspawn_wmask[i] = (i < rs1_data[`NW_BITS:0]) && (i != execute_if.data.wid);
|
||||
end
|
||||
assign wspawn.valid = is_wspawn;
|
||||
assign wspawn.wmask = wspawn_wmask;
|
||||
assign wspawn.pc = rs2_data;
|
||||
|
||||
// response
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (execute_if.valid),
|
||||
.ready_in (execute_if.ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, {tmc, wspawn, split, sjoin, barrier}}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, {tmc_r, wspawn_r, split_r, sjoin_r, barrier_r}}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
||||
assign warp_ctl_if.valid = commit_if.valid && commit_if.ready && commit_if.data.eop;
|
||||
assign warp_ctl_if.wid = commit_if.data.wid;
|
||||
assign warp_ctl_if.tmc = tmc_r;
|
||||
assign warp_ctl_if.wspawn = wspawn_r;
|
||||
assign warp_ctl_if.split = split_r;
|
||||
assign warp_ctl_if.sjoin = sjoin_r;
|
||||
assign warp_ctl_if.barrier = barrier_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = `XLEN'(split_r.is_dvg);
|
||||
end
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user