register file refactoring

This commit is contained in:
Blaise Tine
2020-12-05 01:40:50 -08:00
parent 478d971389
commit 13a5370254
33 changed files with 524 additions and 605 deletions

View File

@@ -100,12 +100,12 @@ module VX_alu_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33), .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33),
.R(1) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_out), .stall (stall_out),
.flush (1'b0), .flush (1'b0),
.in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}), .data_in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}),
.out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r}) .data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r})
); );
wire is_less = cmp_result_r[32]; wire is_less = cmp_result_r[32];

View File

@@ -64,12 +64,12 @@ module VX_commit #(
.N(1 + CMTW), .N(1 + CMTW),
.R(1) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (1'b0), .stall (1'b0),
.flush (1'b0), .flush (1'b0),
.in ({commit_fire, commit_size}), .data_in ({commit_fire, commit_size}),
.out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size}) .data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
); );
// Writeback // Writeback

View File

@@ -1,57 +1,59 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_csr_arb ( module VX_csr_arb (
input wire clk, input wire clk,
input wire reset, input wire reset,
// bus select // bus select
input wire select_io_req, input wire select_io_req,
input wire select_io_rsp, input wire select_io_rsp,
// input requets // input requets
VX_csr_req_if csr_core_req_if, VX_csr_req_if csr_core_req_if,
VX_csr_io_req_if csr_io_req_if, VX_csr_io_req_if csr_io_req_if,
// output request // output request
VX_csr_req_if csr_req_if, VX_csr_pipe_req_if csr_pipe_req_if,
// input response // input response
VX_commit_if csr_rsp_if, VX_commit_if csr_pipe_rsp_if,
// outputs responses // outputs responses
VX_commit_if csr_commit_if, VX_commit_if csr_commit_if,
VX_csr_io_rsp_if csr_io_rsp_if VX_csr_io_rsp_if csr_io_rsp_if
); );
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
// requests wire [31:0] csr_core_req_mask = csr_core_req_if.rs2_is_imm ? 32'(csr_core_req_if.rs1) : csr_core_req_if.rs1_data;
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
assign csr_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0;
assign csr_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0;
assign csr_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
assign csr_req_if.is_io = select_io_req;
assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req); // requests
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req; assign csr_pipe_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
assign csr_pipe_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
assign csr_pipe_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0;
assign csr_pipe_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0;
assign csr_pipe_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_pipe_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign csr_pipe_req_if.csr_mask = (~select_io_req) ? csr_core_req_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_pipe_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
assign csr_pipe_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
assign csr_pipe_req_if.is_io = select_io_req;
assign csr_core_req_if.ready = csr_pipe_req_if.ready && (~select_io_req);
assign csr_io_req_if.ready = csr_pipe_req_if.ready && select_io_req;
// responses // responses
assign csr_io_rsp_if.valid = csr_rsp_if.valid & select_io_rsp; assign csr_io_rsp_if.valid = csr_pipe_rsp_if.valid & select_io_rsp;
assign csr_io_rsp_if.data = csr_rsp_if.data[0]; assign csr_io_rsp_if.data = csr_pipe_rsp_if.data[0];
assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp; assign csr_commit_if.valid = csr_pipe_rsp_if.valid & ~select_io_rsp;
assign csr_commit_if.wid = csr_rsp_if.wid; assign csr_commit_if.wid = csr_pipe_rsp_if.wid;
assign csr_commit_if.tmask = csr_rsp_if.tmask; assign csr_commit_if.tmask = csr_pipe_rsp_if.tmask;
assign csr_commit_if.PC = csr_rsp_if.PC; assign csr_commit_if.PC = csr_pipe_rsp_if.PC;
assign csr_commit_if.rd = csr_rsp_if.rd; assign csr_commit_if.rd = csr_pipe_rsp_if.rd;
assign csr_commit_if.wb = csr_rsp_if.wb; assign csr_commit_if.wb = csr_pipe_rsp_if.wb;
assign csr_commit_if.data = csr_rsp_if.data; assign csr_commit_if.data = csr_pipe_rsp_if.data;
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready; assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
endmodule endmodule

View File

@@ -12,15 +12,15 @@ module VX_csr_unit #(
VX_csr_io_req_if csr_io_req_if, VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if, VX_csr_io_rsp_if csr_io_rsp_if,
VX_csr_req_if csr_req_if, VX_csr_req_if csr_req_if,
VX_commit_if csr_commit_if, VX_commit_if csr_commit_if,
input wire busy, input wire busy,
input wire[`NUM_WARPS-1:0] fpu_pending, input wire[`NUM_WARPS-1:0] fpu_pending,
output wire[`NUM_WARPS-1:0] pending output wire[`NUM_WARPS-1:0] pending
); );
VX_csr_req_if csr_pipe_req_if(); VX_csr_pipe_req_if csr_pipe_req_if();
VX_commit_if csr_pipe_rsp_if(); VX_commit_if csr_pipe_rsp_if();
wire select_io_req = csr_io_req_if.valid; wire select_io_req = csr_io_req_if.valid;
wire select_io_rsp; wire select_io_rsp;
@@ -34,9 +34,9 @@ module VX_csr_unit #(
.csr_core_req_if (csr_req_if), .csr_core_req_if (csr_req_if),
.csr_io_req_if (csr_io_req_if), .csr_io_req_if (csr_io_req_if),
.csr_req_if (csr_pipe_req_if), .csr_pipe_req_if (csr_pipe_req_if),
.csr_rsp_if (csr_pipe_rsp_if), .csr_pipe_rsp_if (csr_pipe_rsp_if),
.csr_io_rsp_if (csr_io_rsp_if), .csr_io_rsp_if (csr_io_rsp_if),
.csr_commit_if (csr_commit_if) .csr_commit_if (csr_commit_if)
); );
@@ -105,12 +105,12 @@ module VX_csr_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32), .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32),
.R(1) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_out), .stall (stall_out),
.flush (1'b0), .flush (1'b0),
.in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}), .data_in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
.out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1}) .data_out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
); );
for (genvar i = 0; i < `NUM_THREADS; i++) begin for (genvar i = 0; i < `NUM_THREADS; i++) begin

View File

@@ -347,11 +347,9 @@ module VX_decode #(
assign decode_if.rd = rd; assign decode_if.rd = rd;
assign decode_if.rs1 = rs1_qual; assign decode_if.rs1 = rs1_qual;
assign decode_if.rs2 = rs2; assign decode_if.rs2 = rs2;
assign decode_if.rs3 = 0; assign decode_if.rs3 = rs3;
`endif `endif
assign decode_if.use_rs3 = use_rs3;
assign decode_if.used_regs = ((`NUM_REGS)'(use_rd) << decode_if.rd) assign decode_if.used_regs = ((`NUM_REGS)'(use_rd) << decode_if.rd)
| ((`NUM_REGS)'(use_rs1) << decode_if.rs1) | ((`NUM_REGS)'(use_rs1) << decode_if.rs1)
| ((`NUM_REGS)'(use_rs2) << decode_if.rs2) | ((`NUM_REGS)'(use_rs2) << decode_if.rs2)

View File

@@ -117,6 +117,8 @@ module VX_execute #(
.pending (fpu_pending) .pending (fpu_pending)
); );
`else `else
`UNUSED_VAR (csr_pending)
`UNUSED_VAR (fpu_to_csr_if.read_frm)
assign fpu_req_if.ready = 0; assign fpu_req_if.ready = 0;
assign fpu_commit_if.valid = 0; assign fpu_commit_if.valid = 0;
assign fpu_commit_if.wid = 0; assign fpu_commit_if.wid = 0;
@@ -124,9 +126,12 @@ module VX_execute #(
assign fpu_commit_if.tmask = 0; assign fpu_commit_if.tmask = 0;
assign fpu_commit_if.wb = 0; assign fpu_commit_if.wb = 0;
assign fpu_commit_if.rd = 0; assign fpu_commit_if.rd = 0;
assign fpu_commit_if.data = 0; assign fpu_commit_if.data = 0;
assign fpu_commit_if.has_fflags = 0; assign fpu_to_csr_if.write_enable = 0;
assign fpu_commit_if.fflags = 0; assign fpu_to_csr_if.write_wid = 0;
assign fpu_to_csr_if.write_fflags = 0;
assign fpu_to_csr_if.read_wid = 0;
assign fpu_pending = 0;
`endif `endif
VX_gpu_unit #( VX_gpu_unit #(

View File

@@ -153,12 +153,12 @@ module VX_fpu_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS), .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS),
.R(1) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_out), .stall (stall_out),
.flush (1'b0), .flush (1'b0),
.in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}), .data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
.out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r}) .data_out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
); );
assign ready_out = ~stall_out; assign ready_out = ~stall_out;

View File

@@ -1,78 +0,0 @@
`include "VX_platform.vh"
module VX_gpr_bypass #(
parameter DATAW = 1,
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire push,
input wire pop,
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out
);
if (PASSTHRU) begin
reg delayed_push;
always @(posedge clk) begin
if (reset) begin
delayed_push <= 0;
end else begin
delayed_push <= push;
assert(!delayed_push || pop);
end
end
assign data_out = data_in;
end else begin
reg [DATAW-1:0] buffer, buffer2;
reg use_buffer, use_buffer2;
reg delayed_push;
always @(posedge clk) begin
if (reset) begin
delayed_push <= 0;
use_buffer <= 0;
use_buffer2 <= 0;
end else begin
delayed_push <= push;
assert(!use_buffer2 || use_buffer);
if (pop) begin
use_buffer <= use_buffer2;
use_buffer2 <= 0;
end
if (delayed_push) begin
if (use_buffer) begin
assert(!use_buffer2); // full!
use_buffer <= 1;
if (!pop) begin
use_buffer2 <= 1;
end
end else if (!pop) begin
use_buffer <= 1;
end
end
end
if (pop) begin
buffer <= buffer2;
end
if (delayed_push) begin
if (use_buffer) begin
if (pop) begin
buffer <= data_in;
end else begin
buffer2 <= data_in;
end
end else if (!pop) begin
buffer <= data_in;
end
end
end
assign data_out = use_buffer ? buffer : data_in;
end
endmodule

View File

@@ -4,33 +4,79 @@
module VX_gpr_ram ( module VX_gpr_ram (
input wire clk, input wire clk,
input wire [`NUM_THREADS-1:0] we, input wire wren,
input wire [`NUM_THREADS-1:0] tmask,
input wire [`NW_BITS+`NR_BITS-1:0] waddr, input wire [`NW_BITS+`NR_BITS-1:0] waddr,
input wire [`NUM_THREADS-1:0][31:0] wdata, input wire [`NUM_THREADS-1:0][31:0] wdata,
input wire [`NW_BITS+`NR_BITS-1:0] rs1, input wire [`NW_BITS+`NR_BITS-1:0] raddr1,
input wire [`NW_BITS+`NR_BITS-1:0] rs2, input wire [`NW_BITS+`NR_BITS-1:0] raddr2,
output wire [`NUM_THREADS-1:0][31:0] rs1_data, input wire [`NW_BITS+`NR_BITS-1:0] raddr3,
output wire [`NUM_THREADS-1:0][31:0] rs2_data output wire [`NUM_THREADS-1:0][31:0] rdata1,
output wire [`NUM_THREADS-1:0][31:0] rdata2,
output wire [`NUM_THREADS-1:0][31:0] rdata3
); );
localparam RAM_DATAW = `NUM_THREADS * 32;
localparam RAM_ADDRW = `NW_BITS + `NR_BITS;
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
localparam RAM_BYTEEN = `NUM_THREADS * 4;
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0]; `UNUSED_VAR (raddr3)
reg [`NUM_THREADS-1:0][31:0] q1, q2;
`ifdef EXT_F_ENABLE
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin for (genvar i = 0; i < `NUM_THREADS; ++i) begin
if (we[i]) begin
mem[waddr][i][0] <= wdata[i][07:00]; reg [31:0] mem_i [(RAM_DEPTH/2)-1:0];
mem[waddr][i][1] <= wdata[i][15:08]; reg [31:0] mem_f [(RAM_DEPTH/2)-1:0];
mem[waddr][i][2] <= wdata[i][23:16];
mem[waddr][i][3] <= wdata[i][31:24]; initial mem_i = '{default: 0};
wire waddr_is_fp = waddr[RAM_ADDRW-1];
wire raddr1_is_fp = raddr1[RAM_ADDRW-1];
wire raddr2_is_fp = raddr2[RAM_ADDRW-1];
wire [RAM_ADDRW-2:0] waddr_qual = waddr[RAM_ADDRW-2:0];
wire [RAM_ADDRW-2:0] raddr1_qual = raddr1[RAM_ADDRW-2:0];
wire [RAM_ADDRW-2:0] raddr2_qual = raddr2[RAM_ADDRW-2:0];
wire [RAM_ADDRW-2:0] raddr3_qual = raddr3[RAM_ADDRW-2:0];
always @(posedge clk) begin
if (wren && tmask[i] && !waddr_is_fp) begin
mem_i[waddr_qual] <= wdata[i];
end end
end end
q1 <= mem[rs1];
q2 <= mem[rs2]; always @(posedge clk) begin
if (wren && tmask[i] && waddr_is_fp) begin
mem_f[waddr_qual] <= wdata[i];
end
end
assign rdata1[i] = raddr1_is_fp ? mem_f[raddr1_qual] : mem_i[raddr1_qual];
assign rdata2[i] = raddr2_is_fp ? mem_f[raddr2_qual] : mem_i[raddr2_qual];
assign rdata3[i] = mem_f[raddr3_qual];
end end
assign rs1_data = q1; `else
assign rs2_data = q2;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
reg [31:0] mem [RAM_DEPTH-1:0];
initial mem = '{default: 0};
always @(posedge clk) begin
if (wren && tmask[i]) begin
mem[waddr] <= wdata[i];
end
end
assign rdata1[i] = mem[raddr1];
assign rdata2[i] = mem[raddr2];
assign rdata3[i] = 0;
end
`endif
endmodule endmodule

View File

@@ -15,91 +15,38 @@ module VX_gpr_stage #(
); );
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
reg rsp_valid; wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
reg [`NW_BITS-1:0] rsp_wid; wire [`NW_BITS+`NR_BITS-1:0] waddr, raddr1, raddr2, raddr3;
reg [31:0] rsp_pc;
reg rs1_is_zero, rs2_is_zero;
wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data;
wire [`NW_BITS+`NR_BITS-1:0] raddr1, raddr2;
`ifdef EXT_F_ENABLE
assign waddr = {writeback_if.rd[`NR_BITS-1], writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]};
assign raddr1 = {gpr_req_if.rs1[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]};
assign raddr2 = {gpr_req_if.rs2[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]};
assign raddr3 = {gpr_req_if.rs3[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]};
`else
assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
`endif
VX_gpr_ram gpr_ram ( VX_gpr_ram gpr_ram (
.clk (clk), .clk (clk),
.we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.tmask), .wren (writeback_if.valid),
.waddr ({writeback_if.wid, writeback_if.rd}), .tmask (writeback_if.tmask),
.wdata (writeback_if.data), .waddr (waddr),
.rs1 (raddr1), .wdata (writeback_if.data),
.rs2 (raddr2), .raddr1 (raddr1),
.rs1_data (rs1_data), .raddr2 (raddr2),
.rs2_data (rs2_data) .raddr3 (raddr3),
); .rdata1 (rdata1),
.rdata2 (rdata2),
always @(posedge clk) begin .rdata3 (rdata3)
if (reset) begin );
rsp_valid <= 0;
end else begin
rsp_valid <= gpr_req_if.valid;
end
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
rs1_is_zero <= (0 == gpr_req_if.rs1);
rs2_is_zero <= (0 == gpr_req_if.rs2);
end
`ifdef EXT_F_ENABLE
reg [`NUM_THREADS-1:0][31:0] rs3_data;
reg read_rs3, save_rs3;
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && !read_rs3;
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
always @(posedge clk) begin
if (reset) begin
read_rs3 <= 0;
end else begin
if (rs3_delay) begin
read_rs3 <= 1;
end else if (read_fire) begin
read_rs3 <= 0;
end
assert(!read_rs3 || rsp_wid == gpr_req_if.wid);
end
if (rs3_delay) begin
save_rs3 <= 1;
end
if (save_rs3) begin
rs3_data <= rs1_data;
save_rs3 <= 0;
end
end
assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)};
assign gpr_req_if.ready = ~rs3_delay;
assign gpr_rsp_if.rs3_data = rs3_data;
`else
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign gpr_req_if.ready = 1;
assign gpr_rsp_if.rs3_data = 0;
`UNUSED_VAR (gpr_req_if.valid);
`UNUSED_VAR (gpr_req_if.rs3);
`UNUSED_VAR (gpr_req_if.use_rs3);
`UNUSED_VAR (gpr_rsp_if.ready);
`endif assign gpr_rsp_if.rs1_data = rdata1;
assign gpr_rsp_if.rs2_data = rdata2;
assign gpr_rsp_if.rs1_data = rs1_is_zero ? (`NUM_THREADS*32)'(0) : rs1_data; assign gpr_rsp_if.rs3_data = rdata3;
assign gpr_rsp_if.rs2_data = rs2_is_zero ? (`NUM_THREADS*32)'(0) : rs2_data;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign writeback_if.ready = 1'b1; assign writeback_if.ready = 1'b1;

View File

@@ -79,12 +79,12 @@ module VX_gpu_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE), .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE),
.R(1) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (1'b0), .flush (1'b0),
.in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}), .data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
.out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier}) .data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
); );
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready; assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready;

View File

@@ -14,7 +14,7 @@ module VX_ibuffer #(
output wire [`NW_BITS-1:0] deq_wid_next, output wire [`NW_BITS-1:0] deq_wid_next,
VX_decode_if ibuf_deq_if VX_decode_if ibuf_deq_if
); );
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + 1 + `NUM_REGS; localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
localparam SIZE = `IBUF_SIZE; localparam SIZE = `IBUF_SIZE;
localparam SIZEW = $clog2(SIZE+1); localparam SIZEW = $clog2(SIZE+1);
localparam ADDRW = $clog2(SIZE); localparam ADDRW = $clog2(SIZE);
@@ -192,8 +192,7 @@ module VX_ibuffer #(
ibuf_enq_if.rs3, ibuf_enq_if.rs3,
ibuf_enq_if.imm, ibuf_enq_if.imm,
ibuf_enq_if.rs1_is_PC, ibuf_enq_if.rs1_is_PC,
ibuf_enq_if.rs2_is_imm, ibuf_enq_if.rs2_is_imm,
ibuf_enq_if.use_rs3,
ibuf_enq_if.used_regs}; ibuf_enq_if.used_regs};
assign ibuf_deq_if.valid = deq_valid; assign ibuf_deq_if.valid = deq_valid;
@@ -211,7 +210,6 @@ module VX_ibuffer #(
ibuf_deq_if.imm, ibuf_deq_if.imm,
ibuf_deq_if.rs1_is_PC, ibuf_deq_if.rs1_is_PC,
ibuf_deq_if.rs2_is_imm, ibuf_deq_if.rs2_is_imm,
ibuf_deq_if.use_rs3,
ibuf_deq_if.used_regs} = deq_instr; ibuf_deq_if.used_regs} = deq_instr;
endmodule endmodule

View File

@@ -30,94 +30,72 @@ module VX_instr_demux (
// ALU unit // ALU unit
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU); wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
wire alu_req_ready; wire alu_stall = alu_req_if.valid && ~alu_req_if.ready;
wire is_br_op = `IS_BR_MOD(execute_if.op_mod); wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
VX_opd_collect #( VX_generic_register #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS), .N (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.OPDSW (2 * `NUM_THREADS * 32), .R (1)
.PASSTHRU (1) // ALU has no backpressure ) alu_pipe (
) alu_opc ( .clk (clk),
.clk (clk), .reset (reset),
.reset (reset), .stall (alu_stall),
.ready_in (alu_req_ready), .flush (1'b0),
.valid_in (alu_req_valid), .data_in ({alu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}), .data_out ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data})
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
.ready_out (alu_req_if.ready),
.valid_out (alu_req_if.valid)
); );
// lsu unit // lsu unit
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU); wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
wire lsu_req_ready; wire lsu_stall = lsu_req_if.valid && ~lsu_req_if.ready;
VX_opd_collect #( VX_generic_register #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1), .N (1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.OPDSW (2 * `NUM_THREADS * 32) .R (1)
) lsu_opc ( ) lsu_pipe (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.ready_in (lsu_req_ready), .stall (lsu_stall),
.valid_in (lsu_req_valid), .flush (1'b0),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}), .data_in ({lsu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), .data_out ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data})
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
.ready_out (lsu_req_if.ready),
.valid_out (lsu_req_if.valid)
); );
// csr unit // csr unit
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR); wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
wire csr_req_ready; wire csr_stall = csr_req_if.valid && ~csr_req_if.ready;
reg tmp_rs2_is_imm; VX_generic_register #(
reg [`NR_BITS-1:0] tmp_rs1; .N (1 + `NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.R (1)
always @(posedge clk) begin ) csr_pipe (
tmp_rs2_is_imm <= execute_if.rs2_is_imm;
tmp_rs1 <= execute_if.rs1;
end
wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_rsp_if.rs1_data[0];
VX_opd_collect #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1),
.OPDSW (32)
) csr_opc (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.ready_in (csr_req_ready), .stall (csr_stall),
.valid_in (csr_req_valid), .flush (1'b0),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}), .data_in ({csr_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, execute_if.rs2_is_imm, execute_if.rs1, gpr_rsp_if.rs1_data[0]}),
.opds_in ({csr_req_mask}), .data_out ({csr_req_if.valid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.rs2_is_imm, csr_req_if.rs1, csr_req_if.rs1_data})
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io, csr_req_if.csr_mask}),
.ready_out (csr_req_if.ready),
.valid_out (csr_req_if.valid)
); );
// mul unit // mul unit
`ifdef EXT_M_ENABLE `ifdef EXT_M_ENABLE
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL); wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
wire mul_req_ready; wire mul_stall = mul_req_if.valid && ~mul_req_if.ready;
VX_opd_collect #( VX_generic_register #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1), .N (1 + `NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.OPDSW (2 * `NUM_THREADS * 32) .R (1)
) mul_opc ( ) mul_pipe (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.ready_in (mul_req_ready), .stall (mul_stall),
.valid_in (mul_req_valid), .flush (1'b0),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), .data_in ({mul_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), .data_out ({mul_req_if.valid, mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data})
.data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}),
.ready_out (mul_req_if.ready),
.valid_out (mul_req_if.valid)
); );
`endif `endif
@@ -125,54 +103,50 @@ module VX_instr_demux (
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU); wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
wire fpu_req_ready; wire fpu_stall = fpu_req_if.valid && ~fpu_req_if.ready;
VX_opd_collect #( VX_generic_register #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1), .N (1 + `NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.OPDSW (3 * `NUM_THREADS * 32) .R (1)
) fpu_opc ( ) fpu_pipe (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.ready_in (fpu_req_ready), .stall (fpu_stall),
.valid_in (fpu_req_valid), .flush (1'b0),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb}), .data_in ({fpu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}), .data_out ({fpu_req_if.valid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data})
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
.ready_out (fpu_req_if.ready),
.valid_out (fpu_req_if.valid)
); );
`else
`UNUSED_VAR (gpr_rsp_if.rs3_data)
`endif `endif
// gpu unit // gpu unit
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU); wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
wire gpu_req_ready; wire gpu_stall = gpu_req_if.valid && ~gpu_req_if.ready;
VX_opd_collect #( VX_generic_register #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1), .N (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
.OPDSW (`NUM_THREADS * 32 + 32) .R (1)
) gpu_opc ( ) gpu_pipe (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.ready_in (gpu_req_ready), .stall (gpu_stall),
.valid_in (gpu_req_valid), .flush (1'b0),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}), .data_in ({gpu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}), .data_out ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data})
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
.ready_out (gpu_req_if.ready),
.valid_out (gpu_req_if.valid)
); );
// can take next request? // can take next request?
assign execute_if.ready = (alu_req_ready && (execute_if.ex_type == `EX_ALU)) assign execute_if.ready = (!alu_stall && (execute_if.ex_type == `EX_ALU))
|| (lsu_req_ready && (execute_if.ex_type == `EX_LSU)) || (!lsu_stall && (execute_if.ex_type == `EX_LSU))
|| (csr_req_ready && (execute_if.ex_type == `EX_CSR)) || (!csr_stall && (execute_if.ex_type == `EX_CSR))
`ifdef EXT_M_ENABLE `ifdef EXT_M_ENABLE
|| (mul_req_ready && (execute_if.ex_type == `EX_MUL)) || (!mul_stall && (execute_if.ex_type == `EX_MUL))
`endif `endif
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
|| (fpu_req_ready && (execute_if.ex_type == `EX_FPU)) || (!fpu_stall && (execute_if.ex_type == `EX_FPU))
`endif `endif
|| (gpu_req_ready && (execute_if.ex_type == `EX_GPU)); || (!gpu_stall && (execute_if.ex_type == `EX_GPU));
endmodule endmodule

View File

@@ -5,23 +5,23 @@ module VX_issue #(
) ( ) (
`SCOPE_IO_VX_issue `SCOPE_IO_VX_issue
input wire clk, input wire clk,
input wire reset, input wire reset,
VX_decode_if decode_if, VX_decode_if decode_if,
VX_writeback_if writeback_if, VX_writeback_if writeback_if,
VX_alu_req_if alu_req_if, VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if, VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if, VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if, VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if, VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if VX_gpu_req_if gpu_req_if
); );
VX_decode_if ibuf_deq_if(); VX_decode_if ibuf_deq_if();
VX_decode_if execute_if(); VX_decode_if execute_if();
VX_gpr_req_if gpr_req_if(); VX_gpr_req_if gpr_req_if();
VX_gpr_rsp_if gpr_rsp_if(); VX_gpr_rsp_if gpr_rsp_if();
wire scoreboard_delay; wire scoreboard_delay;
wire [`NW_BITS-1:0] deq_wid_next; wire [`NW_BITS-1:0] deq_wid_next;
@@ -29,49 +29,42 @@ module VX_issue #(
VX_ibuffer #( VX_ibuffer #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) ibuffer ( ) ibuffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.freeze (~gpr_req_if.ready), .freeze (1'b0),
.ibuf_enq_if (decode_if), .ibuf_enq_if (decode_if),
.deq_wid_next (deq_wid_next), .deq_wid_next (deq_wid_next),
.ibuf_deq_if (ibuf_deq_if) .ibuf_deq_if (ibuf_deq_if)
); );
VX_scoreboard #( VX_scoreboard #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) scoreboard ( ) scoreboard (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.ibuf_deq_if (ibuf_deq_if), .ibuf_deq_if (ibuf_deq_if),
.writeback_if (writeback_if), .writeback_if (writeback_if),
.deq_wid_next (deq_wid_next), .deq_wid_next (deq_wid_next),
.exe_delay (~execute_if.ready), .exe_delay (~execute_if.ready),
.gpr_delay (~gpr_req_if.ready), .delay (scoreboard_delay)
.delay (scoreboard_delay)
); );
assign gpr_req_if.valid = ibuf_deq_if.valid && ~scoreboard_delay; assign gpr_req_if.wid = ibuf_deq_if.wid;
assign gpr_req_if.wid = ibuf_deq_if.wid; assign gpr_req_if.rs1 = ibuf_deq_if.rs1;
assign gpr_req_if.PC = ibuf_deq_if.PC; assign gpr_req_if.rs2 = ibuf_deq_if.rs2;
assign gpr_req_if.rs1 = ibuf_deq_if.rs1; assign gpr_req_if.rs3 = ibuf_deq_if.rs3;
assign gpr_req_if.rs2 = ibuf_deq_if.rs2;
assign gpr_req_if.rs3 = ibuf_deq_if.rs3;
assign gpr_req_if.use_rs3 = ibuf_deq_if.use_rs3;
assign gpr_rsp_if.ready = execute_if.ready;
VX_gpr_stage #( VX_gpr_stage #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) gpr_stage ( ) gpr_stage (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.writeback_if (writeback_if), .writeback_if (writeback_if),
.gpr_req_if (gpr_req_if), .gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if) .gpr_rsp_if (gpr_rsp_if)
); );
`UNUSED_VAR (gpr_rsp_if.valid); assign execute_if.valid = ibuf_deq_if.valid && ~scoreboard_delay;
assign execute_if.valid = ibuf_deq_if.valid && gpr_req_if.ready && ~scoreboard_delay;
assign execute_if.wid = ibuf_deq_if.wid; assign execute_if.wid = ibuf_deq_if.wid;
assign execute_if.tmask = ibuf_deq_if.tmask; assign execute_if.tmask = ibuf_deq_if.tmask;
assign execute_if.PC = ibuf_deq_if.PC; assign execute_if.PC = ibuf_deq_if.PC;
@@ -83,19 +76,19 @@ module VX_issue #(
assign execute_if.rs1 = ibuf_deq_if.rs1; assign execute_if.rs1 = ibuf_deq_if.rs1;
assign execute_if.imm = ibuf_deq_if.imm; assign execute_if.imm = ibuf_deq_if.imm;
assign execute_if.rs1_is_PC = ibuf_deq_if.rs1_is_PC; assign execute_if.rs1_is_PC = ibuf_deq_if.rs1_is_PC;
assign execute_if.rs2_is_imm = ibuf_deq_if.rs2_is_imm; assign execute_if.rs2_is_imm= ibuf_deq_if.rs2_is_imm;
VX_instr_demux instr_demux ( VX_instr_demux instr_demux (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.execute_if (execute_if), .execute_if (execute_if),
.gpr_rsp_if (gpr_rsp_if), .gpr_rsp_if (gpr_rsp_if),
.alu_req_if (alu_req_if), .alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if), .lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if), .csr_req_if (csr_req_if),
.mul_req_if (mul_req_if), .mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if), .fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if) .gpu_req_if (gpu_req_if)
); );
`SCOPE_ASSIGN (issue_fire, ibuf_deq_if.valid && ibuf_deq_if.ready); `SCOPE_ASSIGN (issue_fire, ibuf_deq_if.valid && ibuf_deq_if.ready);
@@ -115,12 +108,8 @@ module VX_issue #(
`SCOPE_ASSIGN (issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm); `SCOPE_ASSIGN (issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm);
`SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay); `SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay);
`SCOPE_ASSIGN (gpr_delay, ~gpr_req_if.ready);
`SCOPE_ASSIGN (execute_delay, ~execute_if.ready); `SCOPE_ASSIGN (execute_delay, ~execute_if.ready);
`SCOPE_ASSIGN (gpr_rsp_valid, gpr_rsp_if.valid);
`SCOPE_ASSIGN (gpr_rsp_wid, gpr_rsp_if.wid);
`SCOPE_ASSIGN (gpr_rsp_pc, gpr_rsp_if.PC);
`SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data); `SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data);
`SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data); `SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data);
`SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data); `SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data);
@@ -140,7 +129,7 @@ module VX_issue #(
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
end end
if (csr_req_if.valid && csr_req_if.ready) begin if (csr_req_if.valid && csr_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask); $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.rs1_data);
end end
if (mul_req_if.valid && mul_req_if.ready) begin if (mul_req_if.valid && mul_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data); $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);

View File

@@ -19,6 +19,7 @@ module VX_lsu_unit #(
VX_commit_if ld_commit_if, VX_commit_if ld_commit_if,
VX_commit_if st_commit_if VX_commit_if st_commit_if
); );
wire req_valid;
wire [`NUM_THREADS-1:0] req_tmask; wire [`NUM_THREADS-1:0] req_tmask;
wire req_rw; wire req_rw;
wire [`NUM_THREADS-1:0][29:0] req_addr; wire [`NUM_THREADS-1:0][29:0] req_addr;
@@ -71,19 +72,18 @@ module VX_lsu_unit #(
reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags; reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
wire valid_in;
wire stall_in; wire stall_in;
VX_generic_register #( VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))), .N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))),
.R(1) .R(1)
) pipe_reg0 ( ) pipe_reg0 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_in), .stall (stall_in),
.flush (1'b0), .flush (1'b0),
.in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}), .data_in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}),
.out ({valid_in, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data}) .data_out ({req_valid, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
); );
wire [`NW_BITS-1:0] rsp_wid; wire [`NW_BITS-1:0] rsp_wid;
@@ -136,11 +136,11 @@ module VX_lsu_unit #(
end end
end end
wire stall_out = ~ld_commit_if.ready && ld_commit_if.valid; wire load_req_stall = req_valid && !req_rw && lsuq_full;
wire store_stall = valid_in && req_rw && stall_out; wire store_req_stall = req_valid && req_rw && !st_commit_if.ready;
// Core Request // Core Request
assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~lsuq_full && ~store_stall}} & req_tmask; assign dcache_req_if.valid = {`NUM_THREADS{req_valid && !load_req_stall && !store_req_stall}} & req_tmask;
assign dcache_req_if.rw = req_rw; assign dcache_req_if.rw = req_rw;
assign dcache_req_if.byteen = req_byteen; assign dcache_req_if.byteen = req_byteen;
assign dcache_req_if.addr = req_addr; assign dcache_req_if.addr = req_addr;
@@ -152,7 +152,9 @@ module VX_lsu_unit #(
assign dcache_req_if.tag = req_tag; assign dcache_req_if.tag = req_tag;
`endif `endif
assign stall_in = ~dcache_req_if.ready || lsuq_full || store_stall; assign stall_in = ~dcache_req_if.ready
|| load_req_stall
|| store_req_stall;
// Can accept new request? // Can accept new request?
assign lsu_req_if.ready = ~stall_in; assign lsu_req_if.ready = ~stall_in;
@@ -171,7 +173,7 @@ module VX_lsu_unit #(
// send store commit // send store commit
wire is_store_rsp = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready; wire is_store_rsp = req_valid && req_rw && dcache_req_if.ready;
assign st_commit_if.valid = is_store_rsp; assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid; assign st_commit_if.wid = req_wid;
@@ -180,26 +182,27 @@ module VX_lsu_unit #(
assign st_commit_if.rd = 0; assign st_commit_if.rd = 0;
assign st_commit_if.wb = 0; assign st_commit_if.wb = 0;
assign st_commit_if.data = 0; assign st_commit_if.data = 0;
`UNUSED_VAR (st_commit_if.ready)
// send load commit // send load commit
wire is_load_rsp = (| dcache_rsp_if.valid); wire is_load_rsp = (| dcache_rsp_if.valid);
wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid;
VX_generic_register #( VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.R(1) .R(1)
) pipe_reg1 ( ) pipe_reg1 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_out), .stall (load_rsp_stall),
.flush (1'b0), .flush (1'b0),
.in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}), .data_in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}),
.out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data}) .data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data})
); );
// Can accept new cache response? // Can accept new cache response?
assign dcache_rsp_if.ready = ~stall_out; assign dcache_rsp_if.ready = ~load_rsp_stall;
// scope registration // scope registration
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}}); `SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}});

View File

@@ -147,12 +147,12 @@ module VX_mul_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.R(1) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_out), .stall (stall_out),
.flush (1'b0), .flush (1'b0),
.in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}), .data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}),
.out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data}) .data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
); );
// can accept new request? // can accept new request?

View File

@@ -1,65 +0,0 @@
`include "VX_platform.vh"
module VX_opd_collect #(
parameter INSTW = 1,
parameter OPDSW = 1,
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire valid_in,
output wire ready_in,
input wire [INSTW-1:0] inst_in,
input wire [OPDSW-1:0] opds_in,
output wire [INSTW+OPDSW-1:0] data_out,
output wire valid_out,
input wire ready_out
);
wire [INSTW-1:0] inst_out;
wire [OPDSW-1:0] opds_out;
wire valid_out_tmp, ready_out_tmp;
VX_skid_buffer #(
.DATAW (INSTW)
) skid_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.data_in (inst_in),
.data_out (inst_out),
.valid_out (valid_out_tmp),
.ready_out (ready_out_tmp)
);
VX_gpr_bypass #(
.DATAW (OPDSW),
.PASSTHRU (PASSTHRU)
) gpr_bypass (
.clk (clk),
.reset (reset),
.push (valid_in && ready_in),
.pop (valid_out_tmp && ready_out_tmp),
.data_in (opds_in),
.data_out (opds_out)
);
wire stall_out = valid_out && ~ready_out;
VX_generic_register #(
.N(1 + INSTW + OPDSW),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({valid_out_tmp, inst_out, opds_out}),
.out ({valid_out, data_out})
);
assign ready_out_tmp = ~stall_out;
endmodule

View File

@@ -10,7 +10,6 @@ module VX_scoreboard #(
VX_writeback_if writeback_if, VX_writeback_if writeback_if,
input wire [`NW_BITS-1:0] deq_wid_next, input wire [`NW_BITS-1:0] deq_wid_next,
input wire exe_delay, input wire exe_delay,
input wire gpr_delay,
output wire delay output wire delay
); );
@@ -63,14 +62,14 @@ module VX_scoreboard #(
end end
// issue the instruction // issue the instruction
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay); assign ibuf_deq_if.ready = ~(delay || exe_delay);
`ifdef DBG_PRINT_PIPELINE `ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", $display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay); inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay);
end end
end end
`endif `endif
@@ -81,9 +80,9 @@ module VX_scoreboard #(
stall_ctr <= 0; stall_ctr <= 0;
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
stall_ctr <= stall_ctr + 1; stall_ctr <= stall_ctr + 1;
assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay); inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay);
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
stall_ctr <= 0; stall_ctr <= 0;
end end

View File

@@ -241,12 +241,12 @@ module VX_warp_sched #(
.N(1 + `NUM_THREADS + 32 + `NW_BITS), .N(1 + `NUM_THREADS + 32 + `NW_BITS),
.R(1) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_out), .stall (stall_out),
.flush (1'b0), .flush (1'b0),
.in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}), .data_in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}),
.out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid}) .data_out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
); );
assign busy = (active_warps != 0); assign busy = (active_warps != 0);

View File

@@ -78,12 +78,12 @@ module VX_writeback #(
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)), .N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)),
.R(1) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (1'b0), .flush (1'b0),
.in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}), .data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data}) .data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
); );
assign alu_commit_if.ready = !stall; assign alu_commit_if.ready = !stall;

View File

@@ -435,12 +435,12 @@ if (DRAM_ENABLE) begin
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH), .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH),
.R(1) .R(1)
) pipe_reg0 ( ) pipe_reg0 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (pipeline_stall), .stall (pipeline_stall),
.flush (1'b0), .flush (1'b0),
.in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}), .data_in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}),
.out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) .data_out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
); );
`ifdef DBG_CACHE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
@@ -508,12 +508,12 @@ if (DRAM_ENABLE) begin
.N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH), .N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH),
.R(1) .R(1)
) pipe_reg1 ( ) pipe_reg1 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (pipeline_stall), .stall (pipeline_stall),
.flush (1'b0), .flush (1'b0),
.in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}), .data_in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}),
.out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) .data_out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
); );
end else begin end else begin
@@ -650,12 +650,12 @@ end
.N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH), .N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH),
.R(1) .R(1)
) pipe_reg2 ( ) pipe_reg2 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (pipeline_stall), .stall (pipeline_stall),
.flush (1'b0), .flush (1'b0),
.in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}), .data_in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}),
.out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) .data_out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
); );
`ifdef DBG_CACHE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO

View File

@@ -57,7 +57,8 @@ module VX_bank_core_req_arb #(
VX_generic_queue #( VX_generic_queue #(
.DATAW($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(writedata_in)), .DATAW($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(writedata_in)),
.SIZE(CREQ_SIZE) .SIZE(CREQ_SIZE),
.BUFFERED(1)
) req_queue ( ) req_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -92,12 +92,12 @@ module VX_cache_core_rsp_merge #(
.R(NUM_REQS), .R(NUM_REQS),
.PASSTHRU(NUM_BANKS <= 2) .PASSTHRU(NUM_BANKS <= 2)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (1'b0), .flush (1'b0),
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}), .data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag}) .data_out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
); );
for (genvar i = 0; i < NUM_BANKS; i++) begin for (genvar i = 0; i < NUM_BANKS; i++) begin

View File

@@ -91,12 +91,12 @@ module VX_fp_noncomp #(
.N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1), .N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1),
.R(0) .R(0)
) pipe_reg0 ( ) pipe_reg0 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (1'b0), .flush (1'b0),
.in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}), .data_in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
.out ({a_sign[i], b_sign[i], a_exponent[i], a_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]}) .data_out ({a_sign[i], b_sign[i], a_exponent[i], a_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]})
); );
end end
@@ -104,12 +104,12 @@ module VX_fp_noncomp #(
.N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)), .N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)),
.R(1) .R(1)
) pipe_reg1 ( ) pipe_reg1 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (1'b0), .flush (1'b0),
.in ({valid_in, tag_in, op_type, frm, dataa, datab}), .data_in ({valid_in, tag_in, op_type, frm, dataa, datab}),
.out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r}) .data_out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r})
); );
// FCLASS // FCLASS
@@ -255,12 +255,12 @@ module VX_fp_noncomp #(
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)), .N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
.R(1) .R(1)
) pipe_reg2 ( ) pipe_reg2 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (1'b0), .flush (1'b0),
.in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}), .data_in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}),
.out ({valid_out, tag_out, result, has_fflags, fflags}) .data_out ({valid_out, tag_out, result, has_fflags, fflags})
); );
assign ready_in = ~stall; assign ready_in = ~stall;

View File

@@ -0,0 +1,24 @@
`ifndef VX_CSR_PIPE_REQ_IF
`define VX_CSR_PIPE_REQ_IF
`include "VX_define.vh"
interface VX_csr_pipe_req_if ();
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`CSR_BITS-1:0] op_type;
wire [`CSR_ADDR_BITS-1:0] csr_addr;
wire [31:0] csr_mask;
wire [`NR_BITS-1:0] rd;
wire wb;
wire is_io;
wire ready;
endinterface
`endif

View File

@@ -12,10 +12,11 @@ interface VX_csr_req_if ();
wire [31:0] PC; wire [31:0] PC;
wire [`CSR_BITS-1:0] op_type; wire [`CSR_BITS-1:0] op_type;
wire [`CSR_ADDR_BITS-1:0] csr_addr; wire [`CSR_ADDR_BITS-1:0] csr_addr;
wire [31:0] csr_mask; wire [31:0] rs1_data;
wire rs2_is_imm;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rd; wire [`NR_BITS-1:0] rd;
wire wb; wire wb;
wire is_io;
wire ready; wire ready;

View File

@@ -20,8 +20,7 @@ interface VX_decode_if ();
wire [`NR_BITS-1:0] rs3; wire [`NR_BITS-1:0] rs3;
wire [31:0] imm; wire [31:0] imm;
wire rs1_is_PC; wire rs1_is_PC;
wire rs2_is_imm; wire rs2_is_imm;
wire use_rs3;
wire [`NUM_REGS-1:0] used_regs; wire [`NUM_REGS-1:0] used_regs;
wire ready; wire ready;

View File

@@ -4,17 +4,11 @@
`include "VX_define.vh" `include "VX_define.vh"
interface VX_gpr_req_if (); interface VX_gpr_req_if ();
wire valid; wire [`NW_BITS-1:0] wid;
wire [`NR_BITS-1:0] rs1;
wire [`NW_BITS-1:0] wid; wire [`NR_BITS-1:0] rs2;
wire [31:0] PC; wire [`NR_BITS-1:0] rs3;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
wire use_rs3;
wire ready;
endinterface endinterface

View File

@@ -4,17 +4,11 @@
`include "VX_define.vh" `include "VX_define.vh"
interface VX_gpr_rsp_if (); interface VX_gpr_rsp_if ();
wire valid;
`IGNORE_WARNINGS_BEGIN
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;
`IGNORE_WARNINGS_END
wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data; wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data; wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire ready;
endinterface endinterface
`endif `endif

View File

@@ -5,7 +5,7 @@ module VX_dp_ram #(
parameter DATAW = 1, parameter DATAW = 1,
parameter SIZE = 1, parameter SIZE = 1,
parameter BYTEENW = 1, parameter BYTEENW = 1,
parameter BUFFERED = 1, parameter BUFFERED = 0,
parameter RWCHECK = 1, parameter RWCHECK = 1,
parameter ADDRW = $clog2(SIZE), parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1), parameter SIZEW = $clog2(SIZE+1),
@@ -26,8 +26,10 @@ module VX_dp_ram #(
localparam DATA32W = DATAW / 32; localparam DATA32W = DATAW / 32;
localparam BYTEEN32W = BYTEENW / 4; localparam BYTEEN32W = BYTEENW / 4;
if (FASTRAM) begin //`ifndef QUARTUS
if (BUFFERED) begin
if (FASTRAM) begin
if (BUFFERED) begin
reg [DATAW-1:0] dout_r; reg [DATAW-1:0] dout_r;
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
@@ -207,5 +209,95 @@ module VX_dp_ram #(
end end
end end
/*`else
localparam OUTDATA_REG_B = BUFFERED ? "CLOCK0" : "UNREGISTERED";
localparam RAM_BLOCK_TYPE = FASTRAM ? "MLAB" : "AUTO";
if (RWCHECK) begin
altsyncram #(
.init_file (),
.operation_mode ("DUAL_PORT"),
.numwords_a (SIZE),
.numwords_b (SIZE),
.widthad_a (ADDRW),
.widthad_b (ADDRW),
.width_a (DATAW),
.width_b (DATAW),
.width_byteena_a(BYTEENW),
.address_reg_b ("CLOCK0"),
.outdata_reg_b (OUTDATA_REG_B),
.ram_block_type (RAM_BLOCK_TYPE)
) mem (
.clocken0 (1'b1),
.clocken1 (),
.clocken2 (),
.clocken3 (),
.clock0 (clk),
.clock1 (),
.address_a (waddr),
.address_b (raddr),
.byteena_a (byteen),
.byteena_b (1'b1),
.wren_a (wren),
.wren_b (1'b0),
.data_a (din),
.data_b (),
.rden_a (),
.rden_b (1'b1),
.q_a (),
.q_b (dout),
.addressstall_a (1'b0),
.addressstall_b (1'b0),
.aclr0 (1'b0),
.aclr1 (1'b0),
.eccstatus ()
);
end else begin
`NO_RW_RAM_CHECK altsyncram #(
.init_file (),
.operation_mode ("DUAL_PORT"),
.numwords_a (SIZE),
.numwords_b (SIZE),
.widthad_a (ADDRW),
.widthad_b (ADDRW),
.width_a (DATAW),
.width_b (DATAW),
.width_byteena_a(BYTEENW),
.outdata_reg_b (OUTDATA_REG_B),
.ram_block_type (RAM_BLOCK_TYPE)
) mem (
.clocken0 (1'b1),
.clocken1 (1'b1),
.clocken2 (1'b1),
.clocken3 (1'b1),
.clock0 (clk),
.clock1 (clk),
.address_a (waddr),
.address_b (raddr),
.byteena_a (byteen),
.byteena_b (1'b1),
.wren_a (wren),
.wren_b (1'b0),
.data_a (din),
.data_b (),
.rden_a (),
.rden_b (1'b1),
.q_a (),
.q_b (dout),
.addressstall_a (1'b0),
.addressstall_b (1'b0),
.aclr0 (1'b0),
.aclr1 (1'b0),
.eccstatus ()
);
end
`endif*/
endmodule endmodule
`TRACING_ON `TRACING_ON

View File

@@ -5,25 +5,25 @@ module VX_generic_register #(
parameter R = N, parameter R = N,
parameter PASSTHRU = 0 parameter PASSTHRU = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire stall, input wire stall,
input wire flush, input wire flush,
input wire[N-1:0] in, input wire[N-1:0] data_in,
output wire[N-1:0] out output wire[N-1:0] data_out
); );
if (PASSTHRU) begin if (PASSTHRU) begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
`UNUSED_VAR (stall) `UNUSED_VAR (stall)
assign out = flush ? N'(0) : in; assign data_out = flush ? N'(0) : data_in;
end else begin end else begin
reg [N-1:0] value; reg [N-1:0] value;
if (R != 0) begin if (R != 0) begin
always @(posedge clk) begin always @(posedge clk) begin
if (~stall) begin if (~stall) begin
value <= in; value <= data_in;
end end
if (reset || flush) begin if (reset || flush) begin
value[N-1:N-R] <= R'(0); value[N-1:N-R] <= R'(0);
@@ -34,12 +34,12 @@ module VX_generic_register #(
`UNUSED_VAR (flush) `UNUSED_VAR (flush)
always @(posedge clk) begin always @(posedge clk) begin
if (~stall) begin if (~stall) begin
value <= in; value <= data_in;
end end
end end
end end
assign out = value; assign data_out = value;
end end
endmodule endmodule

View File

@@ -106,12 +106,12 @@ module VX_stream_arbiter #(
.N(1 + DATAW), .N(1 + DATAW),
.R(1) .R(1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (1'b0), .flush (1'b0),
.in ({sel_valid, data_in[sel_idx]}), .data_in ({sel_valid, data_in[sel_idx]}),
.out ({valid_out, data_out}) .data_out ({valid_out, data_out})
); );
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin

View File

@@ -184,13 +184,9 @@
"issue_imm": 32, "issue_imm": 32,
"issue_rs1_is_pc": 1, "issue_rs1_is_pc": 1,
"issue_rs2_is_imm": 1, "issue_rs2_is_imm": 1,
"?gpr_rsp_valid": 1,
"gpr_rsp_wid":"`NW_BITS",
"gpr_rsp_pc": 32,
"gpr_rsp_a":"`NUM_THREADS * 32", "gpr_rsp_a":"`NUM_THREADS * 32",
"gpr_rsp_b":"`NUM_THREADS * 32", "gpr_rsp_b":"`NUM_THREADS * 32",
"gpr_rsp_c":"`NUM_THREADS * 32", "gpr_rsp_c":"`NUM_THREADS * 32",
"!gpr_delay": 1,
"?writeback_valid": 1, "?writeback_valid": 1,
"writeback_wid":"`NW_BITS", "writeback_wid":"`NW_BITS",
"writeback_pc": 32, "writeback_pc": 32,