sync rf, x0 fix

This commit is contained in:
Richard Yan
2024-09-05 16:49:01 -07:00
parent 2b1a9b7c16
commit 3f8c28c7d6
6 changed files with 203 additions and 138 deletions

View File

@@ -84,7 +84,7 @@
#endif #endif
#ifndef NUM_CORES #ifndef NUM_CORES
#define NUM_CORES 4 #define NUM_CORES 8
#endif #endif
#ifndef NUM_WARPS #ifndef NUM_WARPS

View File

@@ -83,7 +83,7 @@
`endif `endif
`ifndef NUM_CORES `ifndef NUM_CORES
`define NUM_CORES 4 `define NUM_CORES 8
`endif `endif
`ifndef NUM_WARPS `ifndef NUM_WARPS
@@ -179,7 +179,7 @@
`endif `endif
`ifndef SMEM_LOG_SIZE `ifndef SMEM_LOG_SIZE
`define SMEM_LOG_SIZE 17 `define SMEM_LOG_SIZE 19
`endif `endif
`ifndef IO_BASE_ADDR `ifndef IO_BASE_ADDR

View File

@@ -33,7 +33,7 @@
`ifdef SYNTHESIS `ifdef SYNTHESIS
`define NUM_BARRIERS 8 `define NUM_BARRIERS 8
`define NUM_CORES 4 `define NUM_CORES 8
`define NUM_THREADS 8 `define NUM_THREADS 8
`define NUM_WARPS 8 `define NUM_WARPS 8
@@ -60,6 +60,8 @@
`endif `endif
`ifdef SYNTHESIS `ifdef SYNTHESIS
`define TRACE(level, args) $write args
`define TRACE_STARTTIME 32'd10
`define TRACING_ON `define TRACING_ON
`define TRACING_OFF `define TRACING_OFF
`ifndef NDEBUG `ifndef NDEBUG

View File

@@ -53,7 +53,7 @@ module VX_operands import VX_gpu_pkg::*; #(
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n; reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n; reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
reg [STATE_BITS-1:0] state, state_n; reg [STATE_BITS-1:0] state, state_n, state_p;
reg [`NR_BITS-1:0] rs2, rs2_n; reg [`NR_BITS-1:0] rs2, rs2_n;
reg [`NR_BITS-1:0] rs3, rs3_n; reg [`NR_BITS-1:0] rs3, rs3_n;
reg rs2_ready, rs2_ready_n; reg rs2_ready, rs2_ready_n;
@@ -175,10 +175,12 @@ module VX_operands import VX_gpu_pkg::*; #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= STATE_IDLE; state <= STATE_IDLE;
state_p <= STATE_IDLE;
cache_eop <= {ISSUE_RATIO{1'b1}}; cache_eop <= {ISSUE_RATIO{1'b1}};
data_ready <= 0; data_ready <= 0;
end else begin end else begin
state <= state_n; state <= state_n;
state_p <= state;
cache_eop <= cache_eop_n; cache_eop <= cache_eop_n;
data_ready <= data_ready_n; data_ready <= data_ready_n;
end end
@@ -190,7 +192,7 @@ module VX_operands import VX_gpu_pkg::*; #(
rs3 <= rs3_n; rs3 <= rs3_n;
rs1_data <= rs1_data_n; rs1_data <= rs1_data_n;
rs2_data <= rs2_data_n; rs2_data <= rs2_data_n;
rs3_data <= rs3_data_n; rs3_data <= rs3_data_n;
cache_data <= cache_data_n; cache_data <= cache_data_n;
cache_reg <= cache_reg_n; cache_reg <= cache_reg_n;
cache_tmask <= cache_tmask_n; cache_tmask <= cache_tmask_n;
@@ -242,9 +244,9 @@ module VX_operands import VX_gpu_pkg::*; #(
.ready_out (operands_if[i].ready) .ready_out (operands_if[i].ready)
); );
assign operands_if[i].data.rs1_data = rs1_data; assign operands_if[i].data.rs1_data = (state_p == STATE_FETCH1) ? gpr_rd_data : rs1_data;
assign operands_if[i].data.rs2_data = rs2_data; assign operands_if[i].data.rs2_data = (state_p == STATE_FETCH2) ? gpr_rd_data : rs2_data;
assign operands_if[i].data.rs3_data = rs3_data; assign operands_if[i].data.rs3_data = (state_p == STATE_FETCH3) ? gpr_rd_data : rs3_data;
// GPR banks // GPR banks
@@ -279,7 +281,8 @@ module VX_operands import VX_gpu_pkg::*; #(
.INIT_ENABLE (1), .INIT_ENABLE (1),
.INIT_VALUE (0), .INIT_VALUE (0),
`endif `endif
.NO_RWCHECK (1) .NO_RWCHECK (1),
.OUT_REG (1),
) gpr_ram ( ) gpr_ram (
.clk (clk), .clk (clk),
.read (1'b1), .read (1'b1),

View File

@@ -35,18 +35,26 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
logic [`ISSUE_WIDTH-1:0][`PERF_CTR_BITS-1:0] perf_rf_write_per_warp; logic [`ISSUE_WIDTH-1:0][`PERF_CTR_BITS-1:0] perf_rf_write_per_warp;
`endif `endif
logic [`ISSUE_WIDTH-1:0][DATAW-1:0] scoreboard_if_stored;
logic [`ISSUE_WIDTH-1:0] scoreboard_if_stored_valid;
logic [`ISSUE_WIDTH-1:0] full1;
logic [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] full2;
logic [`ISSUE_WIDTH-1:0] empty1;
logic [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] empty2;
logic [`ISSUE_WIDTH-1:0][2:0] size1;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
VX_stream_buffer #(
.DATAW (DATAW) always @(posedge clk) begin
) staging_buffer ( if (reset) begin
.clk (clk), scoreboard_if_stored[i] <= '0;
.reset (reset), scoreboard_if_stored_valid[i] <= '0;
.valid_in (scoreboard_if[i].valid), end else begin
.data_in ({ scoreboard_if_stored[i] <= {
scoreboard_if[i].data.uuid, scoreboard_if[i].data.uuid,
scoreboard_if[i].data.wis, scoreboard_if[i].data.wis,
scoreboard_if[i].data.tmask, scoreboard_if[i].data.tmask,
scoreboard_if[i].data.PC, scoreboard_if[i].data.PC,
scoreboard_if[i].data.wb, scoreboard_if[i].data.wb,
scoreboard_if[i].data.ex_type, scoreboard_if[i].data.ex_type,
scoreboard_if[i].data.op_type, scoreboard_if[i].data.op_type,
@@ -55,14 +63,27 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
scoreboard_if[i].data.use_imm, scoreboard_if[i].data.use_imm,
scoreboard_if[i].data.imm, scoreboard_if[i].data.imm,
scoreboard_if[i].data.rd scoreboard_if[i].data.rd
}), };
.ready_in (scoreboard_if[i].ready), scoreboard_if_stored_valid[i] <= scoreboard_if[i].valid && scoreboard_if[i].ready;
.valid_out (operands_if[i].valid), end
.data_out ({ end
VX_fifo_queue #(
.DATAW (DATAW),
.DEPTH (4), // could be 3 but limited by power of 2
.OUT_REG (0),
.LUTRAM (0)
) fifo_queue (
.clk (clk),
.reset (reset),
.push (scoreboard_if_stored_valid[i]),
.pop (operands_if[i].ready && ~empty1[i]),
.data_in (scoreboard_if_stored[i]),
.data_out ({
operands_if[i].data.uuid, operands_if[i].data.uuid,
operands_if[i].data.wis, operands_if[i].data.wis,
operands_if[i].data.tmask, operands_if[i].data.tmask,
operands_if[i].data.PC, operands_if[i].data.PC,
operands_if[i].data.wb, operands_if[i].data.wb,
operands_if[i].data.ex_type, operands_if[i].data.ex_type,
operands_if[i].data.op_type, operands_if[i].data.op_type,
@@ -72,31 +93,52 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
operands_if[i].data.imm, operands_if[i].data.imm,
operands_if[i].data.rd operands_if[i].data.rd
}), }),
.ready_out (operands_if[i].ready) .empty (empty1[i]),
.full (full1[i]),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
.size (size1[i])
); );
assign operands_if[i].valid = ~empty1[i];
assign scoreboard_if[i].ready = (size1[i] < 2'd2);
// assert (full1[i] == full2[i]);
// assert (empty1[i] == empty2[i]);
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data; wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data; wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data; wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
reg [RAM_ADDRW-1:0] gpr_rd_addr_rs1_stored;
reg [RAM_ADDRW-1:0] gpr_rd_addr_rs2_stored;
reg [RAM_ADDRW-1:0] gpr_rd_addr_rs3_stored;
for (genvar j = 0; j < `NUM_THREADS; ++j) begin for (genvar j = 0; j < `NUM_THREADS; ++j) begin
VX_stream_buffer #( VX_fifo_queue #(
.DATAW (`XLEN + `XLEN + `XLEN) .DATAW (`XLEN + `XLEN + `XLEN),
) staging_data_buffer ( .DEPTH (4),
.clk (clk), .OUT_REG (0),
.reset (reset), .LUTRAM (0)
.valid_in (scoreboard_if[i].valid), ) fifo_queue (
.data_in ({ .clk (clk),
rs1_data[j], rs2_data[j], rs3_data[j] .reset (reset),
.push (scoreboard_if_stored_valid[i]),
.pop (operands_if[i].ready && ~empty2[i][0]),
.data_in ({
(gpr_rd_addr_rs1_stored == '0) ? 32'd0 : rs1_data[j],
(gpr_rd_addr_rs2_stored == '0) ? 32'd0 : rs2_data[j],
(gpr_rd_addr_rs3_stored == '0) ? 32'd0 : rs3_data[j]
}), }),
`UNUSED_PIN (ready_in), .data_out ({
`UNUSED_PIN (valid_out), operands_if[i].data.rs1_data[j],
.data_out ({ operands_if[i].data.rs2_data[j],
operands_if[i].data.rs1_data[j], operands_if[i].data.rs3_data[j]
operands_if[i].data.rs2_data[j],
operands_if[i].data.rs3_data[j]
}), }),
.ready_out (operands_if[i].ready) .empty (empty2[i][j]),
.full (full2[i][j]),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
); );
end end
@@ -106,6 +148,19 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
wire [RAM_ADDRW-1:0] gpr_rd_addr_rs2; wire [RAM_ADDRW-1:0] gpr_rd_addr_rs2;
wire [RAM_ADDRW-1:0] gpr_rd_addr_rs3; wire [RAM_ADDRW-1:0] gpr_rd_addr_rs3;
wire [RAM_ADDRW-1:0] gpr_wr_addr; wire [RAM_ADDRW-1:0] gpr_wr_addr;
always @(posedge clk) begin
if (reset) begin
gpr_rd_addr_rs1_stored <= '0;
gpr_rd_addr_rs2_stored <= '0;
gpr_rd_addr_rs3_stored <= '0;
end else begin
gpr_rd_addr_rs1_stored <= gpr_rd_addr_rs1;
gpr_rd_addr_rs2_stored <= gpr_rd_addr_rs2;
gpr_rd_addr_rs3_stored <= gpr_rd_addr_rs3;
end
end
if (ISSUE_WIS != 0) begin if (ISSUE_WIS != 0) begin
assign gpr_wr_addr = {writeback_if[i].data.wis, writeback_if[i].data.rd}; assign gpr_wr_addr = {writeback_if[i].data.wis, writeback_if[i].data.rd};
assign gpr_rd_addr_rs1 = {scoreboard_if[i].data.wis, scoreboard_if[i].data.rs1}; assign gpr_rd_addr_rs1 = {scoreboard_if[i].data.wis, scoreboard_if[i].data.rs1};
@@ -165,6 +220,7 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
VX_dp_ram #( VX_dp_ram #(
.DATAW (`XLEN), .DATAW (`XLEN),
.SIZE (`NUM_REGS * ISSUE_RATIO), .SIZE (`NUM_REGS * ISSUE_RATIO),
.OUT_REG (1),
`ifdef GPR_RESET `ifdef GPR_RESET
.INIT_ENABLE (1), .INIT_ENABLE (1),
.INIT_VALUE (0), .INIT_VALUE (0),
@@ -188,6 +244,7 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
VX_dp_ram #( VX_dp_ram #(
.DATAW (`XLEN), .DATAW (`XLEN),
.SIZE (`NUM_REGS * ISSUE_RATIO), .SIZE (`NUM_REGS * ISSUE_RATIO),
.OUT_REG (1),
`ifdef GPR_RESET `ifdef GPR_RESET
.INIT_ENABLE (1), .INIT_ENABLE (1),
.INIT_VALUE (0), .INIT_VALUE (0),
@@ -211,6 +268,7 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
VX_dp_ram #( VX_dp_ram #(
.DATAW (`XLEN), .DATAW (`XLEN),
.SIZE (`NUM_REGS * ISSUE_RATIO), .SIZE (`NUM_REGS * ISSUE_RATIO),
.OUT_REG (1),
`ifdef GPR_RESET `ifdef GPR_RESET
.INIT_ENABLE (1), .INIT_ENABLE (1),
.INIT_VALUE (0), .INIT_VALUE (0),

View File

@@ -161,75 +161,76 @@ module VX_dp_ram #(
end end
end else begin end else begin
`ifndef FIRESIM `ifndef FIRESIM
if (DATAW == 1024 && SIZE == 16) begin // dcache data // if (DATAW == 1024 && SIZE == 16) begin // dcache data
(* dont_touch = "yes" *) dcache_data ram ( // (* dont_touch = "yes" *) dcache_data ram (
// .R0_addr(raddr),
// .R0_clk(clk),
// .R0_data(/*rdata*/),
// .R0_en(read),
// .W0_addr(waddr),
// .W0_clk(clk),
// .W0_data(wdata),
// .W0_en(write),
// .W0_mask(wren)
// );
// end else if (DATAW == 305 && SIZE == 8) begin // mshr
// (* dont_touch = "yes" *) cache_mshr ram (
// .R0_addr(raddr),
// .R0_clk(clk),
// .R0_data(/*rdata*/),
// .R0_en(read),
// .W0_addr(waddr),
// .W0_clk(clk),
// .W0_data(wdata),
// .W0_en(write)
// );
// end else if (DATAW == 24 && SIZE == 16) begin // dcache tags
// (* dont_touch = "yes" *) dcache_tags ram (
// .R0_addr(raddr),
// .R0_clk(clk),
// .R0_data(/*rdata*/),
// .R0_en(read),
// .W0_addr(waddr),
// .W0_clk(clk),
// .W0_data(wdata),
// .W0_en(write)
// );
// end else if (DATAW == 1024 && SIZE == 128) begin // icache data
// (* dont_touch = "yes" *) icache_data ram (
// .R0_addr(raddr),
// .R0_clk(clk),
// .R0_data(/*rdata*/),
// .R0_en(read),
// .W0_addr(waddr),
// .W0_clk(clk),
// .W0_data(wdata),
// .W0_en(write),
// .W0_mask(wren)
// );
// end else if (DATAW == 21 && SIZE == 128) begin // icache tags
// (* dont_touch = "yes" *) icache_tags ram (
// .R0_addr(raddr),
// .R0_clk(clk),
// .R0_data(/*rdata*/),
// .R0_en(read),
// .W0_addr(waddr),
// .W0_clk(clk),
// .W0_data(wdata),
// .W0_en(write)
// );
// end else if (DATAW == 32 && SIZE == 64) begin // register file
if (DATAW == 32 && SIZE == 64) begin // register file
rf_bank ram (
.R0_addr(raddr), .R0_addr(raddr),
.R0_clk(clk), .R0_clk(clk),
.R0_data(/*rdata*/), .R0_data(rdata),
.R0_en(read),
.W0_addr(waddr),
.W0_clk(clk),
.W0_data(wdata),
.W0_en(write),
.W0_mask(wren)
);
end else if (DATAW == 305 && SIZE == 8) begin // mshr
(* dont_touch = "yes" *) cache_mshr ram (
.R0_addr(raddr),
.R0_clk(clk),
.R0_data(/*rdata*/),
.R0_en(read), .R0_en(read),
.W0_addr(waddr), .W0_addr(waddr),
.W0_clk(clk), .W0_clk(clk),
.W0_data(wdata), .W0_data(wdata),
.W0_en(write) .W0_en(write)
); );
end else if (DATAW == 24 && SIZE == 16) begin // dcache tags end else begin
(* dont_touch = "yes" *) dcache_tags ram (
.R0_addr(raddr),
.R0_clk(clk),
.R0_data(/*rdata*/),
.R0_en(read),
.W0_addr(waddr),
.W0_clk(clk),
.W0_data(wdata),
.W0_en(write)
);
end else if (DATAW == 1024 && SIZE == 128) begin // icache data
(* dont_touch = "yes" *) icache_data ram (
.R0_addr(raddr),
.R0_clk(clk),
.R0_data(/*rdata*/),
.R0_en(read),
.W0_addr(waddr),
.W0_clk(clk),
.W0_data(wdata),
.W0_en(write),
.W0_mask(wren)
);
end else if (DATAW == 21 && SIZE == 128) begin // icache tags
(* dont_touch = "yes" *) icache_tags ram (
.R0_addr(raddr),
.R0_clk(clk),
.R0_data(/*rdata*/),
.R0_en(read),
.W0_addr(waddr),
.W0_clk(clk),
.W0_data(wdata),
.W0_en(write)
);
end else if (DATAW == 32 && SIZE == 64) begin // register file
(* dont_touch = "yes" *) rf_bank ram (
.R0_addr(raddr),
.R0_clk(clk),
.R0_data(/*rdata*/),
.R0_en(read),
.W0_addr(waddr),
.W0_clk(clk),
.W0_data(wdata),
.W0_en(write)
);
end // else begin
`endif `endif
if (OUT_REG != 0) begin if (OUT_REG != 0) begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [SIZE-1:0];
@@ -275,7 +276,7 @@ module VX_dp_ram #(
end end
end end
`ifndef FIRESIM `ifndef FIRESIM
// end end
`endif `endif
end end
`endif `endif
@@ -304,51 +305,52 @@ module VX_dp_ram #(
assign rdata = ram[raddr]; assign rdata = ram[raddr];
end end
end else begin end else begin
if (DATAW == 305 && SIZE == 8) begin // mshr // if (DATAW == 305 && SIZE == 8) begin // mshr
(* dont_touch = "yes" *) cache_mshr ram ( // (* dont_touch = "yes" *) cache_mshr ram (
// .R0_addr(raddr),
// .R0_clk(clk),
// .R0_data(/*rdata*/),
// .R0_en(read),
// .W0_addr(waddr),
// .W0_clk(clk),
// .W0_data(wdata),
// .W0_en(write)
// );
// end else if (DATAW == 24 && SIZE == 16) begin // dcache tags
// (* dont_touch = "yes" *) dcache_tags ram (
// .R0_addr(raddr),
// .R0_clk(clk),
// .R0_data(/*rdata*/),
// .R0_en(read),
// .W0_addr(waddr),
// .W0_clk(clk),
// .W0_data(wdata),
// .W0_en(write)
// );
// end else if (DATAW == 21 && SIZE == 128) begin // icache tags
// (* dont_touch = "yes" *) icache_tags ram (
// .R0_addr(raddr),
// .R0_clk(clk),
// .R0_data(/*rdata*/),
// .R0_en(read),
// .W0_addr(waddr),
// .W0_clk(clk),
// .W0_data(wdata),
// .W0_en(write)
// );
// end else if (DATAW == 32 && SIZE == 64) begin // register file
if (DATAW == 32 && SIZE == 64) begin // register file
rf_bank ram (
.R0_addr(raddr), .R0_addr(raddr),
.R0_clk(clk), .R0_clk(clk),
.R0_data(/*rdata*/), .R0_data(rdata),
.R0_en(read), .R0_en(read),
.W0_addr(waddr), .W0_addr(waddr),
.W0_clk(clk), .W0_clk(clk),
.W0_data(wdata), .W0_data(wdata),
.W0_en(write) .W0_en(write)
); );
end else if (DATAW == 24 && SIZE == 16) begin // dcache tags end else
(* dont_touch = "yes" *) dcache_tags ram (
.R0_addr(raddr),
.R0_clk(clk),
.R0_data(/*rdata*/),
.R0_en(read),
.W0_addr(waddr),
.W0_clk(clk),
.W0_data(wdata),
.W0_en(write)
);
end else if (DATAW == 21 && SIZE == 128) begin // icache tags
(* dont_touch = "yes" *) icache_tags ram (
.R0_addr(raddr),
.R0_clk(clk),
.R0_data(/*rdata*/),
.R0_en(read),
.W0_addr(waddr),
.W0_clk(clk),
.W0_data(wdata),
.W0_en(write)
);
end else if (DATAW == 32 && SIZE == 64) begin // register file
(* dont_touch = "yes" *) rf_bank ram (
.R0_addr(raddr),
.R0_clk(clk),
.R0_data(/*rdata*/),
.R0_en(read),
.W0_addr(waddr),
.W0_clk(clk),
.W0_data(wdata),
.W0_en(write)
);
end // else begin
if (OUT_REG != 0) begin if (OUT_REG != 0) begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [SIZE-1:0];
reg [DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;