pipeline refactoring
This commit is contained in:
@@ -10,7 +10,7 @@ module VX_alu_unit #(
|
|||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
|
|
||||||
// Outputs
|
// Outputs
|
||||||
VX_wb_if alu_wb_if
|
VX_commit_if alu_commit_if
|
||||||
);
|
);
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||||
@@ -48,7 +48,7 @@ module VX_alu_unit #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
wire stall = ~alu_wb_if.ready && (| alu_wb_if.valid);
|
wire stall = ~alu_commit_if.ready && (| alu_commit_if.valid);
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
||||||
@@ -57,8 +57,8 @@ module VX_alu_unit #(
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_result}),
|
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_result}),
|
||||||
.out ({alu_wb_if.valid, alu_wb_if.warp_num, alu_wb_if.curr_PC, alu_wb_if.rd, alu_wb_if.wb, alu_wb_if.data})
|
.out ({alu_commit_if.valid, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign alu_req_if.ready = ~stall;
|
assign alu_req_if.ready = ~stall;
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ module VX_branch_unit #(
|
|||||||
VX_branch_req_if branch_req_if,
|
VX_branch_req_if branch_req_if,
|
||||||
|
|
||||||
// Outputs
|
// Outputs
|
||||||
VX_branch_rsp_if branch_rsp_if,
|
VX_branch_ctl_if branch_ctl_if,
|
||||||
VX_wb_if branch_wb_if
|
VX_commit_if branch_commit_if
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`NT_BITS-1:0] br_result_index;
|
wire [`NT_BITS-1:0] br_result_index;
|
||||||
@@ -19,7 +19,7 @@ module VX_branch_unit #(
|
|||||||
VX_priority_encoder #(
|
VX_priority_encoder #(
|
||||||
.N(`NUM_THREADS)
|
.N(`NUM_THREADS)
|
||||||
) choose_alu_result (
|
) choose_alu_result (
|
||||||
.data_in (alu_req_if.valid),
|
.data_in (branch_req_if.valid),
|
||||||
.data_out (br_result_index),
|
.data_out (br_result_index),
|
||||||
`UNUSED_PIN (valid_out)
|
`UNUSED_PIN (valid_out)
|
||||||
);
|
);
|
||||||
@@ -53,7 +53,7 @@ module VX_branch_unit #(
|
|||||||
wire [31:0] base_addr = (br_op == `BR_JALR) ? rs1_data : branch_req_if.curr_PC;
|
wire [31:0] base_addr = (br_op == `BR_JALR) ? rs1_data : branch_req_if.curr_PC;
|
||||||
wire [31:0] br_dest = $signed(base_addr) + $signed(branch_req_if.offset);
|
wire [31:0] br_dest = $signed(base_addr) + $signed(branch_req_if.offset);
|
||||||
|
|
||||||
wire stall = (~branch_wb_if.ready && (| branch_wb_if.valid));
|
wire stall = (~branch_commit_if.ready && (| branch_commit_if.valid));
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + 1 + 32)
|
.N(1 + `NW_BITS + 1 + 32)
|
||||||
@@ -63,7 +63,7 @@ module VX_branch_unit #(
|
|||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({in_valid, branch_req_if.warp_num, br_taken, br_dest}),
|
.in ({in_valid, branch_req_if.warp_num, br_taken, br_dest}),
|
||||||
.out ({branch_rsp_if.valid, branch_rsp_if.warp_num, branch_rsp_if.taken, branch_rsp_if.dest})
|
.out ({branch_ctl_if.valid, branch_ctl_if.warp_num, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
@@ -74,7 +74,7 @@ module VX_branch_unit #(
|
|||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({branch_req_if.valid, branch_req_if.warp_num, branch_req_if.curr_PC, branch_req_if.rd, branch_req_if.wb, {`NUM_THREADS{branch_req_if.next_PC}}}),
|
.in ({branch_req_if.valid, branch_req_if.warp_num, branch_req_if.curr_PC, branch_req_if.rd, branch_req_if.wb, {`NUM_THREADS{branch_req_if.next_PC}}}),
|
||||||
.out ({branch_wb_if.valid, branch_wb_if.warp_num, branch_wb_if.curr_PC, branch_wb_if.rd, branch_wb_if.wb, branch_wb_if.data})
|
.out ({branch_commit_if.valid, branch_commit_if.warp_num, branch_commit_if.curr_PC, branch_commit_if.rd, branch_commit_if.wb, branch_commit_if.data})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign branch_req_if.ready = ~stall;
|
assign branch_req_if.ready = ~stall;
|
||||||
|
|||||||
105
hw/rtl/VX_commit.v
Normal file
105
hw/rtl/VX_commit.v
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
module VX_commit #(
|
||||||
|
parameter CORE_ID = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
|
||||||
|
// inputs
|
||||||
|
VX_commit_if alu_commit_if,
|
||||||
|
VX_commit_if branch_commit_if,
|
||||||
|
VX_commit_if lsu_commit_if,
|
||||||
|
VX_commit_if mul_commit_if,
|
||||||
|
VX_commit_if csr_commit_if,
|
||||||
|
VX_commit_if gpu_commit_if,
|
||||||
|
|
||||||
|
// outputs
|
||||||
|
VX_wb_if writeback_if,
|
||||||
|
VX_perf_cntrs_if perf_cntrs_if
|
||||||
|
);
|
||||||
|
|
||||||
|
wire [`NUM_EXS-1:0] commited_mask;
|
||||||
|
assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready),
|
||||||
|
((| branch_commit_if.valid) && branch_commit_if.ready),
|
||||||
|
((| lsu_commit_if.valid) && lsu_commit_if.ready),
|
||||||
|
((| mul_commit_if.valid) && mul_commit_if.ready),
|
||||||
|
((| csr_commit_if.valid) && csr_commit_if.ready),
|
||||||
|
((| gpu_commit_if.valid) && gpu_commit_if.ready)};
|
||||||
|
|
||||||
|
wire [`NE_BITS:0] num_commits;
|
||||||
|
|
||||||
|
VX_countones #(
|
||||||
|
.N(`NUM_EXS)
|
||||||
|
) valids_counter (
|
||||||
|
.valids(commited_mask),
|
||||||
|
.count (num_commits)
|
||||||
|
);
|
||||||
|
|
||||||
|
wire has_committed = (| commited_mask);
|
||||||
|
|
||||||
|
reg [63:0] total_cycles, total_instrs;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
total_cycles <= 0;
|
||||||
|
total_instrs <= 0;
|
||||||
|
end else begin
|
||||||
|
total_cycles <= total_cycles + 1;
|
||||||
|
if (has_committed) begin
|
||||||
|
total_instrs <= total_instrs + 64'(num_commits);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign perf_cntrs_if.total_cycles = total_cycles;
|
||||||
|
assign perf_cntrs_if.total_instrs = total_instrs;
|
||||||
|
|
||||||
|
assign gpu_commit_if.ready = 1'b1; // doesn't writeback
|
||||||
|
|
||||||
|
VX_writeback #(
|
||||||
|
.CORE_ID(CORE_ID)
|
||||||
|
) writeback (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
|
||||||
|
.alu_commit_if (alu_commit_if),
|
||||||
|
.branch_commit_if(branch_commit_if),
|
||||||
|
.lsu_commit_if (lsu_commit_if),
|
||||||
|
.csr_commit_if (csr_commit_if),
|
||||||
|
.mul_commit_if (mul_commit_if),
|
||||||
|
|
||||||
|
.writeback_if (writeback_if)
|
||||||
|
);
|
||||||
|
|
||||||
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if ((| alu_commit_if.valid) && alu_commit_if.ready) begin
|
||||||
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data);
|
||||||
|
end
|
||||||
|
if ((| branch_commit_if.valid) && branch_commit_if.ready) begin
|
||||||
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, branch_commit_if.warp_num, branch_commit_if.curr_PC, branch_commit_if.wb, branch_commit_if.rd, branch_commit_if.data);
|
||||||
|
end
|
||||||
|
if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin
|
||||||
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
|
||||||
|
end
|
||||||
|
if ((| mul_commit_if.valid) && mul_commit_if.ready) begin
|
||||||
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
|
||||||
|
end
|
||||||
|
if ((| csr_commit_if.valid) && csr_commit_if.ready) begin
|
||||||
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
|
||||||
|
end
|
||||||
|
if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin
|
||||||
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -12,11 +12,11 @@ module VX_csr_arb (
|
|||||||
VX_csr_req_if csr_req_if,
|
VX_csr_req_if csr_req_if,
|
||||||
|
|
||||||
// input
|
// input
|
||||||
VX_wb_if csr_rsp_if,
|
VX_commit_if csr_rsp_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||||
VX_wb_if csr_wb_if
|
VX_commit_if csr_commit_if
|
||||||
);
|
);
|
||||||
|
|
||||||
`UNUSED_VAR (clk)
|
`UNUSED_VAR (clk)
|
||||||
@@ -42,13 +42,13 @@ module VX_csr_arb (
|
|||||||
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & csr_rsp_if.is_io;
|
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & csr_rsp_if.is_io;
|
||||||
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
|
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
|
||||||
|
|
||||||
assign csr_wb_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}};
|
assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}};
|
||||||
assign csr_wb_if.warp_num = csr_rsp_if.warp_num;
|
assign csr_commit_if.warp_num = csr_rsp_if.warp_num;
|
||||||
assign csr_wb_if.curr_PC = csr_rsp_if.curr_PC;
|
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
|
||||||
assign csr_wb_if.data = csr_rsp_if.data;
|
assign csr_commit_if.data = csr_rsp_if.data;
|
||||||
assign csr_wb_if.rd = csr_rsp_if.rd;
|
assign csr_commit_if.rd = csr_rsp_if.rd;
|
||||||
assign csr_wb_if.wb = csr_rsp_if.wb;
|
assign csr_commit_if.wb = csr_rsp_if.wb;
|
||||||
|
|
||||||
assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_wb_if.ready;
|
assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -3,8 +3,7 @@
|
|||||||
module VX_csr_data #(
|
module VX_csr_data #(
|
||||||
parameter CORE_ID = 0
|
parameter CORE_ID = 0
|
||||||
) (
|
) (
|
||||||
input wire clk, // Clock
|
input wire clk,
|
||||||
input wire reset,
|
|
||||||
|
|
||||||
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
|
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
|
||||||
output reg[31:0] read_data,
|
output reg[31:0] read_data,
|
||||||
@@ -15,29 +14,18 @@ module VX_csr_data #(
|
|||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
input wire[`CSR_WIDTH-1:0] write_data,
|
input wire[`CSR_WIDTH-1:0] write_data,
|
||||||
input wire[`NW_BITS-1:0] warp_num,
|
input wire[`NW_BITS-1:0] warp_num,
|
||||||
input wire notify_commit
|
VX_perf_cntrs_if perf_cntrs_if
|
||||||
);
|
);
|
||||||
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
|
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
|
||||||
|
|
||||||
reg [63:0] num_cycles, num_instrs;
|
|
||||||
|
|
||||||
// cast address to physical CSR range
|
// cast address to physical CSR range
|
||||||
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
|
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
|
||||||
assign rd_addr = $size(rd_addr)'(read_addr);
|
assign rd_addr = $size(rd_addr)'(read_addr);
|
||||||
assign wr_addr = $size(wr_addr)'(write_addr);
|
assign wr_addr = $size(wr_addr)'(write_addr);
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (write_enable) begin
|
||||||
num_cycles <= 0;
|
csr_table[wr_addr] <= write_data;
|
||||||
num_instrs <= 0;
|
|
||||||
end else begin
|
|
||||||
if (write_enable) begin
|
|
||||||
csr_table[wr_addr] <= write_data;
|
|
||||||
end
|
|
||||||
num_cycles <= num_cycles + 1;
|
|
||||||
if (notify_commit) begin
|
|
||||||
num_instrs <= num_instrs + 1;
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -50,10 +38,10 @@ module VX_csr_data #(
|
|||||||
`CSR_NT : read_data = `NUM_THREADS;
|
`CSR_NT : read_data = `NUM_THREADS;
|
||||||
`CSR_NW : read_data = `NUM_WARPS;
|
`CSR_NW : read_data = `NUM_WARPS;
|
||||||
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
|
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
|
||||||
`CSR_CYCLE_L : read_data = num_cycles[31:0];
|
`CSR_CYCLE_L : read_data = perf_cntrs_if.total_cycles[31:0];
|
||||||
`CSR_CYCLE_H : read_data = num_cycles[63:32];
|
`CSR_CYCLE_H : read_data = perf_cntrs_if.total_cycles[63:32];
|
||||||
`CSR_INSTR_L : read_data = num_instrs[31:0];
|
`CSR_INSTR_L : read_data = perf_cntrs_if.total_instrs[31:0];
|
||||||
`CSR_INSTR_H : read_data = num_instrs[63:32];
|
`CSR_INSTR_H : read_data = perf_cntrs_if.total_instrs[63:32];
|
||||||
`CSR_VEND_ID : read_data = `VENDOR_ID;
|
`CSR_VEND_ID : read_data = `VENDOR_ID;
|
||||||
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
|
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
|
||||||
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;
|
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;
|
||||||
|
|||||||
@@ -5,14 +5,17 @@ module VX_csr_pipe #(
|
|||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
VX_csr_req_if csr_req_if,
|
|
||||||
|
VX_perf_cntrs_if perf_cntrs_if,
|
||||||
|
|
||||||
VX_csr_io_req_if csr_io_req_if,
|
VX_csr_io_req_if csr_io_req_if,
|
||||||
VX_wb_if csr_wb_if,
|
|
||||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||||
input wire notify_commit
|
|
||||||
|
VX_csr_req_if csr_req_if,
|
||||||
|
VX_commit_if csr_commit_if
|
||||||
);
|
);
|
||||||
VX_csr_req_if csr_pipe_req_if();
|
VX_csr_req_if csr_pipe_req_if();
|
||||||
VX_wb_if csr_pipe_wb_if();
|
VX_commit_if csr_pipe_commit_if();
|
||||||
|
|
||||||
VX_csr_arb csr_arb (
|
VX_csr_arb csr_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
@@ -20,9 +23,9 @@ module VX_csr_pipe #(
|
|||||||
.csr_core_req_if (csr_req_if),
|
.csr_core_req_if (csr_req_if),
|
||||||
.csr_io_req_if (csr_io_req_if),
|
.csr_io_req_if (csr_io_req_if),
|
||||||
.csr_req_if (csr_pipe_req_if),
|
.csr_req_if (csr_pipe_req_if),
|
||||||
.csr_rsp_if (csr_pipe_wb_if),
|
.csr_rsp_if (csr_pipe_commit_if),
|
||||||
.csr_io_rsp_if (csr_io_rsp_if),
|
.csr_io_rsp_if (csr_io_rsp_if),
|
||||||
.csr_wb_if (csr_wb_if)
|
.csr_commit_if (csr_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
|
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
|
||||||
@@ -30,24 +33,23 @@ module VX_csr_pipe #(
|
|||||||
wire [31:0] csr_updated_data_s2;
|
wire [31:0] csr_updated_data_s2;
|
||||||
wire [31:0] csr_read_data_unqual;
|
wire [31:0] csr_read_data_unqual;
|
||||||
|
|
||||||
wire is_csr_s2 = (| csr_pipe_wb_if.valid);
|
wire is_csr_s2 = (| csr_pipe_commit_if.valid);
|
||||||
|
|
||||||
VX_csr_data #(
|
VX_csr_data #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
) csr_data (
|
) csr_data (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
|
||||||
.read_addr (csr_pipe_req_if.csr_addr),
|
.read_addr (csr_pipe_req_if.csr_addr),
|
||||||
.read_data (csr_read_data_unqual),
|
.read_data (csr_read_data_unqual),
|
||||||
.write_enable (is_csr_s2),
|
.write_enable (is_csr_s2),
|
||||||
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
||||||
.write_addr (csr_addr_s2),
|
.write_addr (csr_addr_s2),
|
||||||
.warp_num (csr_pipe_req_if.warp_num),
|
.warp_num (csr_pipe_req_if.warp_num),
|
||||||
.notify_commit (notify_commit)
|
.perf_cntrs_if (perf_cntrs_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
|
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
|
||||||
&& (csr_pipe_wb_if.warp_num == csr_pipe_req_if.warp_num)
|
&& (csr_pipe_commit_if.warp_num == csr_pipe_req_if.warp_num)
|
||||||
&& is_csr_s2;
|
&& is_csr_s2;
|
||||||
|
|
||||||
wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
|
wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||||
@@ -63,7 +65,7 @@ module VX_csr_pipe #(
|
|||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
wire stall = ~csr_pipe_wb_if.ready && (| csr_pipe_wb_if.valid);
|
wire stall = ~csr_pipe_commit_if.ready && (| csr_pipe_commit_if.valid);
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
|
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
|
||||||
@@ -73,12 +75,12 @@ module VX_csr_pipe #(
|
|||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
|
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
|
||||||
.out ({csr_pipe_wb_if.valid, csr_pipe_wb_if.warp_num, csr_pipe_wb_if.curr_PC, csr_pipe_wb_if.rd, csr_pipe_wb_if.wb, csr_addr_s2, csr_pipe_wb_if.is_io, csr_read_data_s2, csr_updated_data_s2})
|
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, csr_pipe_commit_if.is_io, csr_read_data_s2, csr_updated_data_s2})
|
||||||
);
|
);
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||||
assign csr_pipe_wb_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
|
assign csr_pipe_commit_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
|
||||||
(csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
|
(csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
|
||||||
csr_read_data_s2;
|
csr_read_data_s2;
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -173,11 +173,13 @@
|
|||||||
`define EX_BR 3'h2
|
`define EX_BR 3'h2
|
||||||
`define EX_MUL 3'h3
|
`define EX_MUL 3'h3
|
||||||
`define EX_LSU 3'h4
|
`define EX_LSU 3'h4
|
||||||
`define EX_FPU 3'h5
|
`define EX_CSR 3'h5
|
||||||
`define EX_CSR 3'h6
|
`define EX_GPU 3'h6
|
||||||
`define EX_GPU 3'h7
|
|
||||||
`define EX_BITS 3
|
`define EX_BITS 3
|
||||||
|
|
||||||
|
`define NUM_EXS 6
|
||||||
|
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||||
|
|
||||||
`define WB_NO 2'h0
|
`define WB_NO 2'h0
|
||||||
`define WB_ALU 2'h1
|
`define WB_ALU 2'h1
|
||||||
`define WB_MEM 2'h2
|
`define WB_MEM 2'h2
|
||||||
@@ -374,7 +376,6 @@ task print_ex_type;
|
|||||||
`EX_LSU: $write("LSU");
|
`EX_LSU: $write("LSU");
|
||||||
`EX_CSR: $write("CSR");
|
`EX_CSR: $write("CSR");
|
||||||
`EX_MUL: $write("MUL");
|
`EX_MUL: $write("MUL");
|
||||||
`EX_FPU: $write("FPU");
|
|
||||||
`EX_GPU: $write("GPU");
|
`EX_GPU: $write("GPU");
|
||||||
default: $write("NOP");
|
default: $write("NOP");
|
||||||
endcase
|
endcase
|
||||||
|
|||||||
@@ -17,43 +17,29 @@ module VX_execute #(
|
|||||||
VX_cache_core_req_if dcache_req_if,
|
VX_cache_core_req_if dcache_req_if,
|
||||||
VX_cache_core_rsp_if dcache_rsp_if,
|
VX_cache_core_rsp_if dcache_rsp_if,
|
||||||
|
|
||||||
|
// perf
|
||||||
|
VX_perf_cntrs_if perf_cntrs_if,
|
||||||
|
|
||||||
// inputs
|
// inputs
|
||||||
VX_execute_if execute_if,
|
VX_alu_req_if alu_req_if,
|
||||||
VX_wb_if writeback_if,
|
VX_branch_req_if branch_req_if,
|
||||||
|
VX_lsu_req_if lsu_req_if,
|
||||||
|
VX_csr_req_if csr_req_if,
|
||||||
|
VX_mul_req_if mul_req_if,
|
||||||
|
VX_gpu_req_if gpu_req_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_branch_rsp_if branch_rsp_if,
|
VX_branch_ctl_if branch_ctl_if,
|
||||||
VX_warp_ctl_if warp_ctl_if,
|
VX_warp_ctl_if warp_ctl_if,
|
||||||
VX_wb_if alu_wb_if,
|
VX_commit_if alu_commit_if,
|
||||||
VX_wb_if branch_wb_if,
|
VX_commit_if branch_commit_if,
|
||||||
VX_wb_if lsu_wb_if,
|
VX_commit_if lsu_commit_if,
|
||||||
VX_wb_if csr_wb_if,
|
VX_commit_if csr_commit_if,
|
||||||
VX_wb_if mul_wb_if,
|
VX_commit_if mul_commit_if,
|
||||||
|
VX_commit_if gpu_commit_if,
|
||||||
|
|
||||||
input wire notify_commit,
|
|
||||||
output wire ebreak
|
output wire ebreak
|
||||||
);
|
);
|
||||||
VX_alu_req_if alu_req_if();
|
|
||||||
VX_branch_req_if branch_req_if();
|
|
||||||
VX_csr_req_if csr_req_if();
|
|
||||||
VX_lsu_req_if lsu_req_if();
|
|
||||||
VX_mul_req_if mul_req_if();
|
|
||||||
VX_gpu_req_if gpu_req_if();
|
|
||||||
|
|
||||||
VX_gpr_stage #(
|
|
||||||
.CORE_ID(CORE_ID)
|
|
||||||
) gpr_stage (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.writeback_if (writeback_if),
|
|
||||||
.execute_if (execute_if),
|
|
||||||
.alu_req_if (alu_req_if),
|
|
||||||
.branch_req_if (branch_req_if),
|
|
||||||
.lsu_req_if (lsu_req_if),
|
|
||||||
.csr_req_if (csr_req_if),
|
|
||||||
.mul_req_if (mul_req_if),
|
|
||||||
.gpu_req_if (gpu_req_if)
|
|
||||||
);
|
|
||||||
|
|
||||||
VX_alu_unit #(
|
VX_alu_unit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
@@ -61,7 +47,7 @@ module VX_execute #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
.alu_wb_if (alu_wb_if)
|
.alu_commit_if (alu_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_branch_unit #(
|
VX_branch_unit #(
|
||||||
@@ -70,8 +56,8 @@ module VX_execute #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.branch_req_if (branch_req_if),
|
.branch_req_if (branch_req_if),
|
||||||
.branch_rsp_if (branch_rsp_if),
|
.branch_ctl_if (branch_ctl_if),
|
||||||
.branch_wb_if (branch_wb_if)
|
.branch_commit_if(branch_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_lsu_unit #(
|
VX_lsu_unit #(
|
||||||
@@ -83,7 +69,7 @@ module VX_execute #(
|
|||||||
.dcache_req_if (dcache_req_if),
|
.dcache_req_if (dcache_req_if),
|
||||||
.dcache_rsp_if (dcache_rsp_if),
|
.dcache_rsp_if (dcache_rsp_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
.lsu_wb_if (lsu_wb_if)
|
.lsu_commit_if (lsu_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_csr_pipe #(
|
VX_csr_pipe #(
|
||||||
@@ -91,11 +77,11 @@ module VX_execute #(
|
|||||||
) csr_pipe (
|
) csr_pipe (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.csr_req_if (csr_req_if),
|
.perf_cntrs_if (perf_cntrs_if),
|
||||||
.csr_io_req_if (csr_io_req_if),
|
.csr_io_req_if (csr_io_req_if),
|
||||||
.csr_wb_if (csr_wb_if),
|
|
||||||
.csr_io_rsp_if (csr_io_rsp_if),
|
.csr_io_rsp_if (csr_io_rsp_if),
|
||||||
.notify_commit (notify_commit)
|
.csr_req_if (csr_req_if),
|
||||||
|
.csr_commit_if (csr_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_mul_unit #(
|
VX_mul_unit #(
|
||||||
@@ -104,14 +90,15 @@ module VX_execute #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.mul_req_if (mul_req_if),
|
.mul_req_if (mul_req_if),
|
||||||
.mul_wb_if (mul_wb_if)
|
.mul_commit_if (mul_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_gpu_unit #(
|
VX_gpu_unit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
) gpu_unit (
|
) gpu_unit (
|
||||||
.gpu_req_if (gpu_req_if),
|
.gpu_req_if (gpu_req_if),
|
||||||
.warp_ctl_if (warp_ctl_if)
|
.warp_ctl_if (warp_ctl_if),
|
||||||
|
.gpu_commit_if (gpu_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign ebreak = (| branch_req_if.valid) && (branch_req_if.br_op == `BR_EBREAK || branch_req_if.br_op == `BR_ECALL);
|
assign ebreak = (| branch_req_if.valid) && (branch_req_if.br_op == `BR_EBREAK || branch_req_if.br_op == `BR_ECALL);
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ module VX_fetch #(
|
|||||||
// inputs
|
// inputs
|
||||||
VX_wstall_if wstall_if,
|
VX_wstall_if wstall_if,
|
||||||
VX_join_if join_if,
|
VX_join_if join_if,
|
||||||
VX_branch_rsp_if branch_rsp_if,
|
VX_branch_ctl_if branch_ctl_if,
|
||||||
VX_warp_ctl_if warp_ctl_if,
|
VX_warp_ctl_if warp_ctl_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
@@ -32,7 +32,7 @@ module VX_fetch #(
|
|||||||
.warp_ctl_if (warp_ctl_if),
|
.warp_ctl_if (warp_ctl_if),
|
||||||
.wstall_if (wstall_if),
|
.wstall_if (wstall_if),
|
||||||
.join_if (join_if),
|
.join_if (join_if),
|
||||||
.branch_rsp_if (branch_rsp_if),
|
.branch_ctl_if (branch_ctl_if),
|
||||||
.ifetch_req_if (ifetch_req_if),
|
.ifetch_req_if (ifetch_req_if),
|
||||||
.ifetch_rsp_if (ifetch_rsp_if),
|
.ifetch_rsp_if (ifetch_rsp_if),
|
||||||
.busy (busy)
|
.busy (busy)
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
module VX_gpr_mux (
|
module VX_gpr_mux (
|
||||||
// inputs
|
// inputs
|
||||||
VX_execute_if execute_if,
|
VX_execute_if execute_if,
|
||||||
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
||||||
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
||||||
|
|
||||||
@@ -80,9 +80,10 @@ module VX_gpr_mux (
|
|||||||
// GPU unit
|
// GPU unit
|
||||||
assign gpu_req_if.valid = execute_if.valid & is_gpu;
|
assign gpu_req_if.valid = execute_if.valid & is_gpu;
|
||||||
assign gpu_req_if.warp_num = execute_if.warp_num;
|
assign gpu_req_if.warp_num = execute_if.warp_num;
|
||||||
assign gpu_req_if.next_PC = execute_if.next_PC;
|
assign gpu_req_if.curr_PC = execute_if.curr_PC;
|
||||||
assign gpu_req_if.gpu_op = `GPU_OP(execute_if.instr_op);
|
assign gpu_req_if.gpu_op = `GPU_OP(execute_if.instr_op);
|
||||||
assign gpu_req_if.rs1_data = rs1_data;
|
assign gpu_req_if.rs1_data = rs1_data;
|
||||||
assign gpu_req_if.rs2_data = rs2_data[0];
|
assign gpu_req_if.rs2_data = rs2_data[0];
|
||||||
|
assign gpu_req_if.next_PC = execute_if.next_PC;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -12,17 +12,17 @@ module VX_gpr_ram (
|
|||||||
);
|
);
|
||||||
`ifndef ASIC
|
`ifndef ASIC
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0][3:0][7:0] ram [31:0];
|
reg [`NUM_THREADS-1:0][3:0][7:0] ram [`NUM_REGS-1:0];
|
||||||
|
|
||||||
integer i;
|
integer i;
|
||||||
|
|
||||||
initial begin
|
initial begin
|
||||||
// initialize r0 to 0
|
// initialize r0 to 0
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||||
ram[i][0] = 0;
|
ram[0][i][0] = 0;
|
||||||
ram[i][1] = 0;
|
ram[0][i][1] = 0;
|
||||||
ram[i][2] = 0;
|
ram[0][i][2] = 0;
|
||||||
ram[i][3] = 0;
|
ram[0][i][3] = 0;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ module VX_gpr_stage #(
|
|||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
// inputs
|
// inputs
|
||||||
VX_execute_if execute_if,
|
|
||||||
VX_wb_if writeback_if,
|
VX_wb_if writeback_if,
|
||||||
|
VX_execute_if execute_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
@@ -38,7 +38,7 @@ module VX_gpr_stage #(
|
|||||||
|
|
||||||
generate
|
generate
|
||||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||||
assign we[i] = writeback_if.valid & {`NUM_THREADS{(writeback_if.wb != 0) && (i == writeback_if.warp_num)}};
|
assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}};
|
||||||
VX_gpr_ram gpr_ram (
|
VX_gpr_ram gpr_ram (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.we (we[i]),
|
.we (we[i]),
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ module VX_gpu_unit #(
|
|||||||
VX_gpu_req_if gpu_req_if,
|
VX_gpu_req_if gpu_req_if,
|
||||||
|
|
||||||
// Output
|
// Output
|
||||||
VX_warp_ctl_if warp_ctl_if
|
VX_warp_ctl_if warp_ctl_if,
|
||||||
|
VX_commit_if gpu_commit_if
|
||||||
);
|
);
|
||||||
wire [`NUM_THREADS-1:0] curr_valids = gpu_req_if.valid;
|
wire [`NUM_THREADS-1:0] curr_valids = gpu_req_if.valid;
|
||||||
wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN);
|
wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN);
|
||||||
@@ -76,4 +77,10 @@ module VX_gpu_unit #(
|
|||||||
|
|
||||||
assign gpu_req_if.ready = 1'b1; // has no stalls
|
assign gpu_req_if.ready = 1'b1; // has no stalls
|
||||||
|
|
||||||
|
// commit
|
||||||
|
assign gpu_commit_if.valid = gpu_req_if.valid;
|
||||||
|
assign gpu_commit_if.warp_num = gpu_req_if.warp_num;
|
||||||
|
assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC;
|
||||||
|
assign gpu_commit_if.wb = `WB_NO;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
module VX_issue #(
|
module VX_issue #(
|
||||||
parameter CORE_ID = 0
|
parameter CORE_ID = 0
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
@@ -9,79 +9,41 @@ module VX_issue #(
|
|||||||
VX_decode_if decode_if,
|
VX_decode_if decode_if,
|
||||||
VX_wb_if writeback_if,
|
VX_wb_if writeback_if,
|
||||||
|
|
||||||
VX_execute_if execute_if,
|
VX_alu_req_if alu_req_if,
|
||||||
|
VX_branch_req_if branch_req_if,
|
||||||
output wire is_empty
|
VX_lsu_req_if lsu_req_if,
|
||||||
|
VX_csr_req_if csr_req_if,
|
||||||
|
VX_mul_req_if mul_req_if,
|
||||||
|
VX_gpu_req_if gpu_req_if
|
||||||
);
|
);
|
||||||
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
|
VX_execute_if execute_if();
|
||||||
|
|
||||||
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
VX_scheduler #(
|
||||||
reg [CTVW-1:0] count_valid;
|
.CORE_ID(CORE_ID)
|
||||||
|
) scheduler (
|
||||||
wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0);
|
.clk (clk),
|
||||||
wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0);
|
.reset (reset),
|
||||||
wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0);
|
.decode_if (decode_if),
|
||||||
|
.writeback_if (writeback_if),
|
||||||
wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1);
|
.execute_if (execute_if),
|
||||||
wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2);
|
`UNUSED_PIN (is_empty)
|
||||||
wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0);
|
|
||||||
|
|
||||||
wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual);
|
|
||||||
|
|
||||||
wire ex_stalled = (| decode_if.valid)
|
|
||||||
&& ((!execute_if.alu_ready && (decode_if.ex_type == `EX_ALU))
|
|
||||||
|| (!execute_if.br_ready && (decode_if.ex_type == `EX_BR))
|
|
||||||
|| (!execute_if.lsu_ready && (decode_if.ex_type == `EX_LSU))
|
|
||||||
|| (!execute_if.csr_ready && (decode_if.ex_type == `EX_CSR))
|
|
||||||
|| (!execute_if.mul_ready && (decode_if.ex_type == `EX_MUL))
|
|
||||||
|| (!execute_if.gpu_ready && (decode_if.ex_type == `EX_GPU)));
|
|
||||||
|
|
||||||
wire stall = rename_valid || ex_stalled;
|
|
||||||
|
|
||||||
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && (decode_if.rd != 0) && ~stall;
|
|
||||||
|
|
||||||
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
|
|
||||||
|
|
||||||
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
|
|
||||||
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
|
|
||||||
count_valid;
|
|
||||||
integer i, w;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
for (w = 0; w < `NUM_WARPS; w++) begin
|
|
||||||
for (i = 0; i < 32; i++) begin
|
|
||||||
rename_table[w][i] <= 0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
count_valid <= 0;
|
|
||||||
end else begin
|
|
||||||
if (acquire_rd) begin
|
|
||||||
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
|
|
||||||
end
|
|
||||||
if (release_rd) begin
|
|
||||||
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
|
|
||||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
|
||||||
end
|
|
||||||
count_valid <= count_valid_next;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS),
|
|
||||||
) schedule_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (stall),
|
|
||||||
.flush (0),
|
|
||||||
.in ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.instr_op, decode_if.wb}),
|
|
||||||
.out ({execute_if.valid, execute_if.warp_num, execute_if.curr_PC, execute_if.next_PC, execute_if.rd, execute_if.rs1, execute_if.rs2, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.ex_type, execute_if.instr_op, execute_if.wb})
|
|
||||||
);
|
);
|
||||||
|
|
||||||
assign decode_if.ready = ~stall;
|
VX_gpr_stage #(
|
||||||
|
.CORE_ID(CORE_ID)
|
||||||
|
) gpr_stage (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
|
||||||
assign is_empty = (0 == count_valid);
|
.execute_if (execute_if),
|
||||||
|
.writeback_if (writeback_if),
|
||||||
|
|
||||||
|
.alu_req_if (alu_req_if),
|
||||||
|
.branch_req_if (branch_req_if),
|
||||||
|
.lsu_req_if (lsu_req_if),
|
||||||
|
.csr_req_if (csr_req_if),
|
||||||
|
.mul_req_if (mul_req_if),
|
||||||
|
.gpu_req_if (gpu_req_if)
|
||||||
|
);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -16,7 +16,7 @@ module VX_lsu_unit #(
|
|||||||
VX_lsu_req_if lsu_req_if,
|
VX_lsu_req_if lsu_req_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_wb_if lsu_wb_if
|
VX_commit_if lsu_commit_if
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] use_valid;
|
wire [`NUM_THREADS-1:0] use_valid;
|
||||||
@@ -108,7 +108,7 @@ module VX_lsu_unit #(
|
|||||||
.full (mrq_full),
|
.full (mrq_full),
|
||||||
.pop (mrq_pop),
|
.pop (mrq_pop),
|
||||||
.read_addr (mrq_read_addr),
|
.read_addr (mrq_read_addr),
|
||||||
.read_data ({dbg_mrq_write_addr, lsu_wb_if.curr_PC, lsu_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_wb_if.rd, lsu_wb_if.warp_num}),
|
.read_data ({dbg_mrq_write_addr, lsu_commit_if.curr_PC, lsu_commit_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_commit_if.rd, lsu_commit_if.warp_num}),
|
||||||
`UNUSED_PIN (empty)
|
`UNUSED_PIN (empty)
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -151,11 +151,11 @@ module VX_lsu_unit #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign lsu_wb_if.valid = dcache_rsp_if.valid;
|
assign lsu_commit_if.valid = dcache_rsp_if.valid;
|
||||||
assign lsu_wb_if.data = core_rsp_data;
|
assign lsu_commit_if.data = core_rsp_data;
|
||||||
|
|
||||||
// Can accept new cache response
|
// Can accept new cache response
|
||||||
assign dcache_rsp_if.ready = lsu_wb_if.ready;
|
assign dcache_rsp_if.ready = lsu_commit_if.ready;
|
||||||
|
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
|
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
|
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
|
||||||
@@ -180,7 +180,7 @@ module VX_lsu_unit #(
|
|||||||
end
|
end
|
||||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
||||||
$display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
$display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
||||||
$time, CORE_ID, lsu_wb_if.valid, lsu_wb_if.warp_num, lsu_wb_if.curr_PC, mrq_read_addr, lsu_wb_if.rd, lsu_wb_if.data);
|
$time, CORE_ID, lsu_commit_if.valid, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, mrq_read_addr, lsu_commit_if.rd, lsu_commit_if.data);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ module VX_mul_unit #(
|
|||||||
VX_mul_req_if mul_req_if,
|
VX_mul_req_if mul_req_if,
|
||||||
|
|
||||||
// Outputs
|
// Outputs
|
||||||
VX_wb_if mul_wb_if
|
VX_commit_if mul_commit_if
|
||||||
);
|
);
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||||
wire [`NUM_THREADS-1:0][63:0] mul_result;
|
wire [`NUM_THREADS-1:0][63:0] mul_result;
|
||||||
@@ -71,7 +71,7 @@ module VX_mul_unit #(
|
|||||||
`MUL_DIV,
|
`MUL_DIV,
|
||||||
`MUL_DIVU: alu_result[i] = (alu_in2[i] == 0) ? 32'hffffffff : div_result[i];
|
`MUL_DIVU: alu_result[i] = (alu_in2[i] == 0) ? 32'hffffffff : div_result[i];
|
||||||
`MUL_REM,
|
`MUL_REM,
|
||||||
`MUL_REMU: alu_result[i] = (alu_in2 == 0) ? alu_in1[i] : rem_result[i];
|
`MUL_REMU: alu_result[i] = (alu_in2[i] == 0) ? alu_in1[i] : rem_result[i];
|
||||||
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC, FENCE
|
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC, FENCE
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
@@ -104,7 +104,7 @@ module VX_mul_unit #(
|
|||||||
|
|
||||||
wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
|
wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
|
||||||
|
|
||||||
wire stall = (~mul_wb_if.ready && (| mul_wb_if.valid))
|
wire stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|
||||||
|| pipeline_stall;
|
|| pipeline_stall;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
@@ -115,7 +115,7 @@ module VX_mul_unit #(
|
|||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb, alu_result}),
|
.in ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb, alu_result}),
|
||||||
.out ({mul_wb_if.valid, mul_wb_if.warp_num, mul_wb_if.curr_PC, mul_wb_if.rd, mul_wb_if.wb, mul_wb_if.data})
|
.out ({mul_commit_if.valid, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign mul_req_if.ready = ~stall;
|
assign mul_req_if.ready = ~stall;
|
||||||
|
|||||||
@@ -101,21 +101,26 @@ module VX_pipeline #(
|
|||||||
assign csr_io_rsp_data = csr_io_rsp_if.data;
|
assign csr_io_rsp_data = csr_io_rsp_if.data;
|
||||||
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
|
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
|
||||||
|
|
||||||
|
VX_perf_cntrs_if perf_cntrs_if();
|
||||||
VX_decode_if decode_if();
|
VX_decode_if decode_if();
|
||||||
VX_execute_if execute_if();
|
VX_branch_ctl_if branch_ctl_if();
|
||||||
VX_branch_rsp_if branch_rsp_if();
|
|
||||||
VX_warp_ctl_if warp_ctl_if();
|
VX_warp_ctl_if warp_ctl_if();
|
||||||
VX_ifetch_rsp_if ifetch_rsp_if();
|
VX_ifetch_rsp_if ifetch_rsp_if();
|
||||||
|
VX_alu_req_if alu_req_if();
|
||||||
|
VX_branch_req_if branch_req_if();
|
||||||
|
VX_lsu_req_if lsu_req_if();
|
||||||
|
VX_csr_req_if csr_req_if();
|
||||||
|
VX_mul_req_if mul_req_if();
|
||||||
|
VX_gpu_req_if gpu_req_if();
|
||||||
VX_wb_if writeback_if();
|
VX_wb_if writeback_if();
|
||||||
VX_wstall_if wstall_if();
|
VX_wstall_if wstall_if();
|
||||||
VX_join_if join_if();
|
VX_join_if join_if();
|
||||||
VX_wb_if alu_wb_if();
|
VX_commit_if alu_commit_if();
|
||||||
VX_wb_if branch_wb_if();
|
VX_commit_if branch_commit_if();
|
||||||
VX_wb_if lsu_wb_if();
|
VX_commit_if lsu_commit_if();
|
||||||
VX_wb_if csr_wb_if();
|
VX_commit_if csr_commit_if();
|
||||||
VX_wb_if mul_wb_if();
|
VX_commit_if mul_commit_if();
|
||||||
|
VX_commit_if gpu_commit_if();
|
||||||
wire notify_commit;
|
|
||||||
|
|
||||||
VX_fetch #(
|
VX_fetch #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
@@ -127,7 +132,7 @@ module VX_pipeline #(
|
|||||||
.wstall_if (wstall_if),
|
.wstall_if (wstall_if),
|
||||||
.join_if (join_if),
|
.join_if (join_if),
|
||||||
.warp_ctl_if (warp_ctl_if),
|
.warp_ctl_if (warp_ctl_if),
|
||||||
.branch_rsp_if (branch_rsp_if),
|
.branch_ctl_if (branch_ctl_if),
|
||||||
.ifetch_rsp_if (ifetch_rsp_if),
|
.ifetch_rsp_if (ifetch_rsp_if),
|
||||||
.busy (busy)
|
.busy (busy)
|
||||||
);
|
);
|
||||||
@@ -148,10 +153,16 @@ module VX_pipeline #(
|
|||||||
) issue (
|
) issue (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
||||||
.decode_if (decode_if),
|
.decode_if (decode_if),
|
||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
.execute_if (execute_if),
|
|
||||||
`UNUSED_PIN (is_empty)
|
.alu_req_if (alu_req_if),
|
||||||
|
.branch_req_if (branch_req_if),
|
||||||
|
.lsu_req_if (lsu_req_if),
|
||||||
|
.csr_req_if (csr_req_if),
|
||||||
|
.mul_req_if (mul_req_if),
|
||||||
|
.gpu_req_if (gpu_req_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_execute #(
|
VX_execute #(
|
||||||
@@ -160,35 +171,49 @@ module VX_pipeline #(
|
|||||||
`SCOPE_SIGNALS_LSU_BIND
|
`SCOPE_SIGNALS_LSU_BIND
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
||||||
.dcache_req_if (core_dcache_req_if),
|
.dcache_req_if (core_dcache_req_if),
|
||||||
.dcache_rsp_if (core_dcache_rsp_if),
|
.dcache_rsp_if (core_dcache_rsp_if),
|
||||||
|
|
||||||
.csr_io_req_if (csr_io_req_if),
|
.csr_io_req_if (csr_io_req_if),
|
||||||
.csr_io_rsp_if (csr_io_rsp_if),
|
.csr_io_rsp_if (csr_io_rsp_if),
|
||||||
.execute_if (execute_if),
|
|
||||||
.writeback_if (writeback_if),
|
.perf_cntrs_if (perf_cntrs_if),
|
||||||
|
|
||||||
|
.alu_req_if (alu_req_if),
|
||||||
|
.branch_req_if (branch_req_if),
|
||||||
|
.lsu_req_if (lsu_req_if),
|
||||||
|
.csr_req_if (csr_req_if),
|
||||||
|
.mul_req_if (mul_req_if),
|
||||||
|
.gpu_req_if (gpu_req_if),
|
||||||
|
|
||||||
.warp_ctl_if (warp_ctl_if),
|
.warp_ctl_if (warp_ctl_if),
|
||||||
.branch_rsp_if (branch_rsp_if),
|
.branch_ctl_if (branch_ctl_if),
|
||||||
.alu_wb_if (alu_wb_if),
|
.alu_commit_if (alu_commit_if),
|
||||||
.branch_wb_if (branch_wb_if),
|
.branch_commit_if(branch_commit_if),
|
||||||
.lsu_wb_if (lsu_wb_if),
|
.lsu_commit_if (lsu_commit_if),
|
||||||
.csr_wb_if (csr_wb_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
.mul_wb_if (mul_wb_if),
|
.mul_commit_if (mul_commit_if),
|
||||||
.notify_commit (notify_commit),
|
.gpu_commit_if (gpu_commit_if),
|
||||||
|
|
||||||
.ebreak (ebreak)
|
.ebreak (ebreak)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_writeback #(
|
VX_commit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
) writeback (
|
) commit (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.alu_wb_if (alu_wb_if),
|
|
||||||
.branch_wb_if (branch_wb_if),
|
.alu_commit_if (alu_commit_if),
|
||||||
.lsu_wb_if (lsu_wb_if),
|
.branch_commit_if(branch_commit_if),
|
||||||
.csr_wb_if (csr_wb_if),
|
.lsu_commit_if (lsu_commit_if),
|
||||||
.mul_wb_if (mul_wb_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
|
.mul_commit_if (mul_commit_if),
|
||||||
|
.gpu_commit_if (gpu_commit_if),
|
||||||
|
|
||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
.notify_commit (notify_commit)
|
.perf_cntrs_if (perf_cntrs_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign dcache_req_valid = core_dcache_req_if.valid;
|
assign dcache_req_valid = core_dcache_req_if.valid;
|
||||||
@@ -223,12 +248,4 @@ module VX_pipeline #(
|
|||||||
`SCOPE_ASSIGN(scope_exec_delay, exec_delay);
|
`SCOPE_ASSIGN(scope_exec_delay, exec_delay);
|
||||||
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_delay);
|
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_delay);
|
||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if ((| execute_if.valid) && (~execute_if.alu_ready || ~execute_if.br_ready || ~execute_if.lsu_ready || ~execute_if.csr_ready || ~execute_if.mul_ready || ~execute_if.gpu_ready)) begin
|
|
||||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, alu=%b, br=%b, lsu=%b, csr=%b, mul=%b, gpu=%b", $time, CORE_ID, execute_if.warp_num, execute_if.curr_PC, ~execute_if.alu_ready, ~execute_if.br_ready, ~execute_if.lsu_ready, ~execute_if.csr_ready, ~execute_if.mul_ready, ~execute_if.gpu_ready);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
`endif
|
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
86
hw/rtl/VX_scheduler.v
Normal file
86
hw/rtl/VX_scheduler.v
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
module VX_scheduler #(
|
||||||
|
parameter CORE_ID = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
|
||||||
|
VX_decode_if decode_if,
|
||||||
|
VX_wb_if writeback_if,
|
||||||
|
|
||||||
|
VX_execute_if execute_if,
|
||||||
|
output wire is_empty
|
||||||
|
);
|
||||||
|
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
|
||||||
|
|
||||||
|
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||||
|
reg [CTVW-1:0] count_valid;
|
||||||
|
|
||||||
|
wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0);
|
||||||
|
wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0);
|
||||||
|
wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0);
|
||||||
|
|
||||||
|
wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1);
|
||||||
|
wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2);
|
||||||
|
wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0);
|
||||||
|
|
||||||
|
wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual);
|
||||||
|
|
||||||
|
wire ex_stalled = (| decode_if.valid)
|
||||||
|
&& ((!execute_if.alu_ready && (decode_if.ex_type == `EX_ALU))
|
||||||
|
|| (!execute_if.br_ready && (decode_if.ex_type == `EX_BR))
|
||||||
|
|| (!execute_if.lsu_ready && (decode_if.ex_type == `EX_LSU))
|
||||||
|
|| (!execute_if.csr_ready && (decode_if.ex_type == `EX_CSR))
|
||||||
|
|| (!execute_if.mul_ready && (decode_if.ex_type == `EX_MUL))
|
||||||
|
|| (!execute_if.gpu_ready && (decode_if.ex_type == `EX_GPU)));
|
||||||
|
|
||||||
|
wire stall = rename_valid || ex_stalled;
|
||||||
|
|
||||||
|
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && ~stall;
|
||||||
|
|
||||||
|
wire release_rd = (| writeback_if.valid);
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
|
||||||
|
|
||||||
|
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
|
||||||
|
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
|
||||||
|
count_valid;
|
||||||
|
integer i, w;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
for (w = 0; w < `NUM_WARPS; w++) begin
|
||||||
|
for (i = 0; i < 32; i++) begin
|
||||||
|
rename_table[w][i] <= 0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
count_valid <= 0;
|
||||||
|
end else begin
|
||||||
|
if (acquire_rd) begin
|
||||||
|
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
|
||||||
|
end
|
||||||
|
if (release_rd) begin
|
||||||
|
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||||
|
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||||
|
end
|
||||||
|
count_valid <= count_valid_next;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
VX_generic_register #(
|
||||||
|
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS),
|
||||||
|
) schedule_reg (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.stall (stall),
|
||||||
|
.flush (0),
|
||||||
|
.in ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.instr_op, decode_if.wb}),
|
||||||
|
.out ({execute_if.valid, execute_if.warp_num, execute_if.curr_PC, execute_if.next_PC, execute_if.rd, execute_if.rs1, execute_if.rs2, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.ex_type, execute_if.instr_op, execute_if.wb})
|
||||||
|
);
|
||||||
|
|
||||||
|
assign decode_if.ready = ~stall;
|
||||||
|
|
||||||
|
assign is_empty = (0 == count_valid);
|
||||||
|
|
||||||
|
endmodule
|
||||||
@@ -9,7 +9,7 @@ module VX_warp_sched #(
|
|||||||
VX_warp_ctl_if warp_ctl_if,
|
VX_warp_ctl_if warp_ctl_if,
|
||||||
VX_wstall_if wstall_if,
|
VX_wstall_if wstall_if,
|
||||||
VX_join_if join_if,
|
VX_join_if join_if,
|
||||||
VX_branch_rsp_if branch_rsp_if,
|
VX_branch_ctl_if branch_ctl_if,
|
||||||
|
|
||||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||||
VX_ifetch_req_if ifetch_req_if,
|
VX_ifetch_req_if ifetch_req_if,
|
||||||
@@ -158,11 +158,11 @@ module VX_warp_sched #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Branch
|
// Branch
|
||||||
if (branch_rsp_if.valid) begin
|
if (branch_ctl_if.valid) begin
|
||||||
if (branch_rsp_if.taken) begin
|
if (branch_ctl_if.taken) begin
|
||||||
warp_pcs[branch_rsp_if.warp_num] <= branch_rsp_if.dest;
|
warp_pcs[branch_ctl_if.warp_num] <= branch_ctl_if.dest;
|
||||||
end
|
end
|
||||||
warp_stalled[branch_rsp_if.warp_num] <= 0;
|
warp_stalled[branch_ctl_if.warp_num] <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Lock/Release
|
// Lock/Release
|
||||||
@@ -230,7 +230,7 @@ module VX_warp_sched #(
|
|||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
wire should_bra = (branch_rsp_if.valid && branch_rsp_if.taken && (warp_to_schedule == branch_rsp_if.warp_num));
|
wire should_bra = (branch_ctl_if.valid && branch_ctl_if.taken && (warp_to_schedule == branch_ctl_if.warp_num));
|
||||||
|
|
||||||
assign hazard = should_bra && schedule;
|
assign hazard = should_bra && schedule;
|
||||||
|
|
||||||
@@ -244,7 +244,7 @@ module VX_warp_sched #(
|
|||||||
|
|
||||||
assign warp_pc = real_use_wspawn ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
assign warp_pc = real_use_wspawn ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
||||||
|
|
||||||
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]);
|
assign thread_mask = global_stall ? 0 : (real_use_wspawn ? `NUM_THREADS'(1) : thread_masks[warp_to_schedule]);
|
||||||
|
|
||||||
assign warp_num = warp_to_schedule;
|
assign warp_num = warp_to_schedule;
|
||||||
|
|
||||||
|
|||||||
@@ -3,109 +3,84 @@
|
|||||||
module VX_writeback #(
|
module VX_writeback #(
|
||||||
parameter CORE_ID = 0
|
parameter CORE_ID = 0
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
// inputs
|
// inputs
|
||||||
VX_wb_if alu_wb_if,
|
VX_commit_if alu_commit_if,
|
||||||
VX_wb_if branch_wb_if,
|
VX_commit_if branch_commit_if,
|
||||||
VX_wb_if lsu_wb_if,
|
VX_commit_if lsu_commit_if,
|
||||||
VX_wb_if mul_wb_if,
|
VX_commit_if mul_commit_if,
|
||||||
VX_wb_if csr_wb_if,
|
VX_commit_if csr_commit_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_wb_if writeback_if,
|
VX_wb_if writeback_if
|
||||||
output wire notify_commit
|
|
||||||
);
|
);
|
||||||
|
|
||||||
wire br_valid = (| branch_wb_if.valid);
|
wire br_valid = (| branch_commit_if.valid) && (branch_commit_if.wb != `WB_NO);
|
||||||
wire lsu_valid = (| lsu_wb_if.valid);
|
wire lsu_valid = (| lsu_commit_if.valid) && (lsu_commit_if.wb != `WB_NO);
|
||||||
wire mul_valid = (| mul_wb_if.valid);
|
wire mul_valid = (| mul_commit_if.valid) && (mul_commit_if.wb != `WB_NO);
|
||||||
wire alu_valid = (| alu_wb_if.valid);
|
wire alu_valid = (| alu_commit_if.valid) && (alu_commit_if.wb != `WB_NO);
|
||||||
wire csr_valid = (| csr_wb_if.valid);
|
wire csr_valid = (| csr_commit_if.valid) && (csr_commit_if.wb != `WB_NO);
|
||||||
|
|
||||||
VX_wb_if writeback_tmp_if();
|
VX_wb_if writeback_tmp_if();
|
||||||
|
|
||||||
assign writeback_tmp_if.valid = br_valid ? branch_wb_if.valid :
|
assign writeback_tmp_if.valid = br_valid ? branch_commit_if.valid :
|
||||||
lsu_valid ? lsu_wb_if.valid :
|
lsu_valid ? lsu_commit_if.valid :
|
||||||
mul_valid ? mul_wb_if.valid :
|
mul_valid ? mul_commit_if.valid :
|
||||||
alu_valid ? alu_wb_if.valid :
|
alu_valid ? alu_commit_if.valid :
|
||||||
csr_valid ? csr_wb_if.valid :
|
csr_valid ? csr_commit_if.valid :
|
||||||
0;
|
0;
|
||||||
|
|
||||||
assign writeback_tmp_if.warp_num = br_valid ? branch_wb_if.warp_num :
|
assign writeback_tmp_if.warp_num = br_valid ? branch_commit_if.warp_num :
|
||||||
lsu_valid ? lsu_wb_if.warp_num :
|
lsu_valid ? lsu_commit_if.warp_num :
|
||||||
mul_valid ? mul_wb_if.warp_num :
|
mul_valid ? mul_commit_if.warp_num :
|
||||||
alu_valid ? alu_wb_if.warp_num :
|
alu_valid ? alu_commit_if.warp_num :
|
||||||
csr_valid ? csr_wb_if.warp_num :
|
csr_valid ? csr_commit_if.warp_num :
|
||||||
|
|
||||||
0;
|
0;
|
||||||
|
|
||||||
assign writeback_tmp_if.curr_PC = br_valid ? branch_wb_if.curr_PC :
|
assign writeback_tmp_if.data = br_valid ? branch_commit_if.data :
|
||||||
lsu_valid ? lsu_wb_if.curr_PC :
|
lsu_valid ? lsu_commit_if.data :
|
||||||
mul_valid ? mul_wb_if.curr_PC :
|
mul_valid ? mul_commit_if.data :
|
||||||
alu_valid ? alu_wb_if.curr_PC :
|
alu_valid ? alu_commit_if.data :
|
||||||
csr_valid ? csr_wb_if.curr_PC :
|
csr_valid ? csr_commit_if.data :
|
||||||
0;
|
0;
|
||||||
|
|
||||||
assign writeback_tmp_if.data = br_valid ? branch_wb_if.data :
|
assign writeback_tmp_if.rd = br_valid ? branch_commit_if.rd :
|
||||||
lsu_valid ? lsu_wb_if.data :
|
lsu_valid ? lsu_commit_if.rd :
|
||||||
mul_valid ? mul_wb_if.data :
|
mul_valid ? mul_commit_if.rd :
|
||||||
alu_valid ? alu_wb_if.data :
|
alu_valid ? alu_commit_if.rd :
|
||||||
csr_valid ? csr_wb_if.data :
|
csr_valid ? csr_commit_if.rd :
|
||||||
0;
|
|
||||||
|
|
||||||
assign writeback_tmp_if.rd = br_valid ? branch_wb_if.rd :
|
|
||||||
lsu_valid ? lsu_wb_if.rd :
|
|
||||||
mul_valid ? mul_wb_if.rd :
|
|
||||||
alu_valid ? alu_wb_if.rd :
|
|
||||||
csr_valid ? csr_wb_if.rd :
|
|
||||||
0;
|
|
||||||
|
|
||||||
assign writeback_tmp_if.wb = br_valid ? branch_wb_if.wb :
|
|
||||||
lsu_valid ? lsu_wb_if.wb :
|
|
||||||
alu_valid ? alu_wb_if.wb :
|
|
||||||
csr_valid ? csr_wb_if.wb :
|
|
||||||
mul_valid ? mul_wb_if.wb :
|
|
||||||
0;
|
0;
|
||||||
|
|
||||||
wire stall = ~writeback_if.ready && (| writeback_if.valid);
|
wire stall = ~writeback_if.ready && (| writeback_if.valid);
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + (`NUM_THREADS * 32) + `WB_BITS)
|
.N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32))
|
||||||
) wb_reg (
|
) wb_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.rd, writeback_tmp_if.data, writeback_tmp_if.wb}),
|
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data}),
|
||||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.curr_PC, writeback_if.rd, writeback_if.data, writeback_if.wb})
|
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign branch_wb_if.ready = !stall;
|
assign branch_commit_if.ready = !stall;
|
||||||
assign lsu_wb_if.ready = !stall && !br_valid;
|
assign lsu_commit_if.ready = !stall && !br_valid;
|
||||||
assign mul_wb_if.ready = !stall && !br_valid && !lsu_valid;
|
assign mul_commit_if.ready = !stall && !br_valid && !lsu_valid;
|
||||||
assign alu_wb_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid;
|
assign alu_commit_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid;
|
||||||
assign csr_wb_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid && !alu_valid;
|
assign csr_commit_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid && !alu_valid;
|
||||||
|
|
||||||
assign notify_commit = (| writeback_tmp_if.valid) && ~stall;
|
|
||||||
|
|
||||||
// special workaround to control RISC-V benchmarks termination on Verilator
|
// special workaround to control RISC-V benchmarks termination on Verilator
|
||||||
reg [31:0] last_data_wb /* verilator public */;
|
reg [31:0] last_data_wb /* verilator public */;
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (notify_commit && (writeback_tmp_if.wb != 0) && (writeback_tmp_if.rd == 28)) begin
|
if ((| writeback_tmp_if.valid) && ~stall && (writeback_tmp_if.rd == 28)) begin
|
||||||
last_data_wb <= writeback_tmp_if.data[0];
|
last_data_wb <= writeback_tmp_if.data[0];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if ((| writeback_tmp_if.valid) && ~stall) begin
|
|
||||||
$display("%t: Core%0d-WB: warp=%0d, PC=%0h, rd=%0d, wb=%0d, data=%0h", $time, CORE_ID, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.data);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
`endif
|
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
2
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
2
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
@@ -46,7 +46,7 @@ module VX_cache_core_rsp_merge #(
|
|||||||
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||||
reg [NUM_BANKS-1:0] core_rsp_bank_select;
|
reg [NUM_BANKS-1:0] core_rsp_bank_select;
|
||||||
|
|
||||||
wire stall = ~core_rsp_ready;
|
wire stall = ~core_rsp_ready && (| core_rsp_valid);
|
||||||
|
|
||||||
integer i;
|
integer i;
|
||||||
|
|
||||||
|
|||||||
15
hw/rtl/interfaces/VX_branch_ctl_if.v
Normal file
15
hw/rtl/interfaces/VX_branch_ctl_if.v
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
`ifndef VX_BRANCH_RSP_IF
|
||||||
|
`define VX_BRANCH_RSP_IF
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
interface VX_branch_ctl_if ();
|
||||||
|
|
||||||
|
wire valid;
|
||||||
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
|
wire taken;
|
||||||
|
wire [31:0] dest;
|
||||||
|
|
||||||
|
endinterface
|
||||||
|
|
||||||
|
`endif
|
||||||
19
hw/rtl/interfaces/VX_commit_if.v
Normal file
19
hw/rtl/interfaces/VX_commit_if.v
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
`ifndef VX_COMMIT_IF
|
||||||
|
`define VX_COMMIT_IF
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
interface VX_commit_if ();
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0] valid;
|
||||||
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
|
wire [31:0] curr_PC;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] data;
|
||||||
|
wire [`NR_BITS-1:0] rd;
|
||||||
|
wire [`WB_BITS-1:0] wb;
|
||||||
|
wire is_io;
|
||||||
|
wire ready;
|
||||||
|
|
||||||
|
endinterface
|
||||||
|
|
||||||
|
`endif
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
interface VX_execute_if();
|
interface VX_execute_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire [`NUM_THREADS-1:0] valid;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
|
|||||||
@@ -7,12 +7,13 @@ interface VX_gpu_req_if();
|
|||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire [`NUM_THREADS-1:0] valid;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [31:0] next_PC;
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
wire [`GPU_BITS-1:0] gpu_op;
|
wire [`GPU_BITS-1:0] gpu_op;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
wire [31:0] rs2_data;
|
wire [31:0] rs2_data;
|
||||||
|
wire [31:0] next_PC;
|
||||||
|
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
|
|||||||
13
hw/rtl/interfaces/VX_perf_cntrs_if.v
Normal file
13
hw/rtl/interfaces/VX_perf_cntrs_if.v
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
`ifndef VX_PERF_CNTRS_IF
|
||||||
|
`define VX_PERF_CNTRS_IF
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
interface VX_perf_cntrs_if ();
|
||||||
|
|
||||||
|
wire [63:0] total_cycles;
|
||||||
|
wire [63:0] total_instrs;
|
||||||
|
|
||||||
|
endinterface
|
||||||
|
|
||||||
|
`endif
|
||||||
@@ -7,11 +7,8 @@ interface VX_wb_if ();
|
|||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire [`NUM_THREADS-1:0] valid;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [31:0] curr_PC;
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] data;
|
wire [`NUM_THREADS-1:0][31:0] data;
|
||||||
wire [`NR_BITS-1:0] rd;
|
wire [`NR_BITS-1:0] rd;
|
||||||
wire [`WB_BITS-1:0] wb;
|
|
||||||
wire is_io;
|
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|||||||
@@ -257,12 +257,12 @@ bool Simulator::run() {
|
|||||||
|
|
||||||
// check riscv-tests PASSED/FAILED status
|
// check riscv-tests PASSED/FAILED status
|
||||||
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
|
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
|
||||||
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf;
|
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
|
||||||
#else
|
#else
|
||||||
#if (NUM_CLUSTERS == 1)
|
#if (NUM_CLUSTERS == 1)
|
||||||
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf;
|
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
|
||||||
#else
|
#else
|
||||||
int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf;
|
int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user