register file refactoring

This commit is contained in:
Blaise Tine
2020-12-05 01:40:50 -08:00
parent 478d971389
commit 13a5370254
33 changed files with 524 additions and 605 deletions

View File

@@ -100,12 +100,12 @@ module VX_alu_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}),
.out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r})
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.data_in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}),
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r})
);
wire is_less = cmp_result_r[32];

View File

@@ -64,12 +64,12 @@ module VX_commit #(
.N(1 + CMTW),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (1'b0),
.flush (1'b0),
.in ({commit_fire, commit_size}),
.out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
.clk (clk),
.reset (reset),
.stall (1'b0),
.flush (1'b0),
.data_in ({commit_fire, commit_size}),
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
);
// Writeback

View File

@@ -1,57 +1,59 @@
`include "VX_define.vh"
module VX_csr_arb (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// bus select
input wire select_io_req,
input wire select_io_rsp,
input wire select_io_req,
input wire select_io_rsp,
// input requets
VX_csr_req_if csr_core_req_if,
VX_csr_io_req_if csr_io_req_if,
VX_csr_req_if csr_core_req_if,
VX_csr_io_req_if csr_io_req_if,
// output request
VX_csr_req_if csr_req_if,
VX_csr_pipe_req_if csr_pipe_req_if,
// input response
VX_commit_if csr_rsp_if,
VX_commit_if csr_pipe_rsp_if,
// outputs responses
VX_commit_if csr_commit_if,
VX_csr_io_rsp_if csr_io_rsp_if
VX_commit_if csr_commit_if,
VX_csr_io_rsp_if csr_io_rsp_if
);
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
// requests
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
assign csr_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0;
assign csr_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0;
assign csr_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
assign csr_req_if.is_io = select_io_req;
wire [31:0] csr_core_req_mask = csr_core_req_if.rs2_is_imm ? 32'(csr_core_req_if.rs1) : csr_core_req_if.rs1_data;
assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req);
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
// requests
assign csr_pipe_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
assign csr_pipe_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
assign csr_pipe_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0;
assign csr_pipe_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0;
assign csr_pipe_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_pipe_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign csr_pipe_req_if.csr_mask = (~select_io_req) ? csr_core_req_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_pipe_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
assign csr_pipe_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
assign csr_pipe_req_if.is_io = select_io_req;
assign csr_core_req_if.ready = csr_pipe_req_if.ready && (~select_io_req);
assign csr_io_req_if.ready = csr_pipe_req_if.ready && select_io_req;
// responses
assign csr_io_rsp_if.valid = csr_rsp_if.valid & select_io_rsp;
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
assign csr_io_rsp_if.valid = csr_pipe_rsp_if.valid & select_io_rsp;
assign csr_io_rsp_if.data = csr_pipe_rsp_if.data[0];
assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp;
assign csr_commit_if.wid = csr_rsp_if.wid;
assign csr_commit_if.tmask = csr_rsp_if.tmask;
assign csr_commit_if.PC = csr_rsp_if.PC;
assign csr_commit_if.rd = csr_rsp_if.rd;
assign csr_commit_if.wb = csr_rsp_if.wb;
assign csr_commit_if.data = csr_rsp_if.data;
assign csr_commit_if.valid = csr_pipe_rsp_if.valid & ~select_io_rsp;
assign csr_commit_if.wid = csr_pipe_rsp_if.wid;
assign csr_commit_if.tmask = csr_pipe_rsp_if.tmask;
assign csr_commit_if.PC = csr_pipe_rsp_if.PC;
assign csr_commit_if.rd = csr_pipe_rsp_if.rd;
assign csr_commit_if.wb = csr_pipe_rsp_if.wb;
assign csr_commit_if.data = csr_pipe_rsp_if.data;
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
endmodule

View File

@@ -12,15 +12,15 @@ module VX_csr_unit #(
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
VX_csr_req_if csr_req_if,
VX_csr_req_if csr_req_if,
VX_commit_if csr_commit_if,
input wire busy,
input wire[`NUM_WARPS-1:0] fpu_pending,
output wire[`NUM_WARPS-1:0] pending
);
VX_csr_req_if csr_pipe_req_if();
VX_commit_if csr_pipe_rsp_if();
VX_csr_pipe_req_if csr_pipe_req_if();
VX_commit_if csr_pipe_rsp_if();
wire select_io_req = csr_io_req_if.valid;
wire select_io_rsp;
@@ -34,9 +34,9 @@ module VX_csr_unit #(
.csr_core_req_if (csr_req_if),
.csr_io_req_if (csr_io_req_if),
.csr_req_if (csr_pipe_req_if),
.csr_pipe_req_if (csr_pipe_req_if),
.csr_rsp_if (csr_pipe_rsp_if),
.csr_pipe_rsp_if (csr_pipe_rsp_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_commit_if (csr_commit_if)
);
@@ -105,12 +105,12 @@ module VX_csr_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
.out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.data_in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
.data_out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
);
for (genvar i = 0; i < `NUM_THREADS; i++) begin

View File

@@ -347,11 +347,9 @@ module VX_decode #(
assign decode_if.rd = rd;
assign decode_if.rs1 = rs1_qual;
assign decode_if.rs2 = rs2;
assign decode_if.rs3 = 0;
assign decode_if.rs3 = rs3;
`endif
assign decode_if.use_rs3 = use_rs3;
assign decode_if.used_regs = ((`NUM_REGS)'(use_rd) << decode_if.rd)
| ((`NUM_REGS)'(use_rs1) << decode_if.rs1)
| ((`NUM_REGS)'(use_rs2) << decode_if.rs2)

View File

@@ -117,6 +117,8 @@ module VX_execute #(
.pending (fpu_pending)
);
`else
`UNUSED_VAR (csr_pending)
`UNUSED_VAR (fpu_to_csr_if.read_frm)
assign fpu_req_if.ready = 0;
assign fpu_commit_if.valid = 0;
assign fpu_commit_if.wid = 0;
@@ -124,9 +126,12 @@ module VX_execute #(
assign fpu_commit_if.tmask = 0;
assign fpu_commit_if.wb = 0;
assign fpu_commit_if.rd = 0;
assign fpu_commit_if.data = 0;
assign fpu_commit_if.has_fflags = 0;
assign fpu_commit_if.fflags = 0;
assign fpu_commit_if.data = 0;
assign fpu_to_csr_if.write_enable = 0;
assign fpu_to_csr_if.write_wid = 0;
assign fpu_to_csr_if.write_fflags = 0;
assign fpu_to_csr_if.read_wid = 0;
assign fpu_pending = 0;
`endif
VX_gpu_unit #(

View File

@@ -153,12 +153,12 @@ module VX_fpu_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
.out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
.data_out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
);
assign ready_out = ~stall_out;

View File

@@ -1,78 +0,0 @@
`include "VX_platform.vh"
module VX_gpr_bypass #(
parameter DATAW = 1,
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire push,
input wire pop,
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out
);
if (PASSTHRU) begin
reg delayed_push;
always @(posedge clk) begin
if (reset) begin
delayed_push <= 0;
end else begin
delayed_push <= push;
assert(!delayed_push || pop);
end
end
assign data_out = data_in;
end else begin
reg [DATAW-1:0] buffer, buffer2;
reg use_buffer, use_buffer2;
reg delayed_push;
always @(posedge clk) begin
if (reset) begin
delayed_push <= 0;
use_buffer <= 0;
use_buffer2 <= 0;
end else begin
delayed_push <= push;
assert(!use_buffer2 || use_buffer);
if (pop) begin
use_buffer <= use_buffer2;
use_buffer2 <= 0;
end
if (delayed_push) begin
if (use_buffer) begin
assert(!use_buffer2); // full!
use_buffer <= 1;
if (!pop) begin
use_buffer2 <= 1;
end
end else if (!pop) begin
use_buffer <= 1;
end
end
end
if (pop) begin
buffer <= buffer2;
end
if (delayed_push) begin
if (use_buffer) begin
if (pop) begin
buffer <= data_in;
end else begin
buffer2 <= data_in;
end
end else if (!pop) begin
buffer <= data_in;
end
end
end
assign data_out = use_buffer ? buffer : data_in;
end
endmodule

View File

@@ -4,33 +4,79 @@
module VX_gpr_ram (
input wire clk,
input wire [`NUM_THREADS-1:0] we,
input wire wren,
input wire [`NUM_THREADS-1:0] tmask,
input wire [`NW_BITS+`NR_BITS-1:0] waddr,
input wire [`NUM_THREADS-1:0][31:0] wdata,
input wire [`NW_BITS+`NR_BITS-1:0] rs1,
input wire [`NW_BITS+`NR_BITS-1:0] rs2,
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
output wire [`NUM_THREADS-1:0][31:0] rs2_data
input wire [`NW_BITS+`NR_BITS-1:0] raddr1,
input wire [`NW_BITS+`NR_BITS-1:0] raddr2,
input wire [`NW_BITS+`NR_BITS-1:0] raddr3,
output wire [`NUM_THREADS-1:0][31:0] rdata1,
output wire [`NUM_THREADS-1:0][31:0] rdata2,
output wire [`NUM_THREADS-1:0][31:0] rdata3
);
localparam RAM_DATAW = `NUM_THREADS * 32;
localparam RAM_ADDRW = `NW_BITS + `NR_BITS;
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
localparam RAM_BYTEEN = `NUM_THREADS * 4;
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
reg [`NUM_THREADS-1:0][31:0] q1, q2;
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (we[i]) begin
mem[waddr][i][0] <= wdata[i][07:00];
mem[waddr][i][1] <= wdata[i][15:08];
mem[waddr][i][2] <= wdata[i][23:16];
mem[waddr][i][3] <= wdata[i][31:24];
`UNUSED_VAR (raddr3)
`ifdef EXT_F_ENABLE
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
reg [31:0] mem_i [(RAM_DEPTH/2)-1:0];
reg [31:0] mem_f [(RAM_DEPTH/2)-1:0];
initial mem_i = '{default: 0};
wire waddr_is_fp = waddr[RAM_ADDRW-1];
wire raddr1_is_fp = raddr1[RAM_ADDRW-1];
wire raddr2_is_fp = raddr2[RAM_ADDRW-1];
wire [RAM_ADDRW-2:0] waddr_qual = waddr[RAM_ADDRW-2:0];
wire [RAM_ADDRW-2:0] raddr1_qual = raddr1[RAM_ADDRW-2:0];
wire [RAM_ADDRW-2:0] raddr2_qual = raddr2[RAM_ADDRW-2:0];
wire [RAM_ADDRW-2:0] raddr3_qual = raddr3[RAM_ADDRW-2:0];
always @(posedge clk) begin
if (wren && tmask[i] && !waddr_is_fp) begin
mem_i[waddr_qual] <= wdata[i];
end
end
q1 <= mem[rs1];
q2 <= mem[rs2];
always @(posedge clk) begin
if (wren && tmask[i] && waddr_is_fp) begin
mem_f[waddr_qual] <= wdata[i];
end
end
assign rdata1[i] = raddr1_is_fp ? mem_f[raddr1_qual] : mem_i[raddr1_qual];
assign rdata2[i] = raddr2_is_fp ? mem_f[raddr2_qual] : mem_i[raddr2_qual];
assign rdata3[i] = mem_f[raddr3_qual];
end
assign rs1_data = q1;
assign rs2_data = q2;
`else
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
reg [31:0] mem [RAM_DEPTH-1:0];
initial mem = '{default: 0};
always @(posedge clk) begin
if (wren && tmask[i]) begin
mem[waddr] <= wdata[i];
end
end
assign rdata1[i] = mem[raddr1];
assign rdata2[i] = mem[raddr2];
assign rdata3[i] = 0;
end
`endif
endmodule

View File

@@ -15,91 +15,38 @@ module VX_gpr_stage #(
);
`UNUSED_VAR (reset)
reg rsp_valid;
reg [`NW_BITS-1:0] rsp_wid;
reg [31:0] rsp_pc;
reg rs1_is_zero, rs2_is_zero;
wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data;
wire [`NW_BITS+`NR_BITS-1:0] raddr1, raddr2;
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
wire [`NW_BITS+`NR_BITS-1:0] waddr, raddr1, raddr2, raddr3;
`ifdef EXT_F_ENABLE
assign waddr = {writeback_if.rd[`NR_BITS-1], writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]};
assign raddr1 = {gpr_req_if.rs1[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]};
assign raddr2 = {gpr_req_if.rs2[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]};
assign raddr3 = {gpr_req_if.rs3[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]};
`else
assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
`endif
VX_gpr_ram gpr_ram (
.clk (clk),
.we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.tmask),
.waddr ({writeback_if.wid, writeback_if.rd}),
.wdata (writeback_if.data),
.rs1 (raddr1),
.rs2 (raddr2),
.rs1_data (rs1_data),
.rs2_data (rs2_data)
);
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
end else begin
rsp_valid <= gpr_req_if.valid;
end
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
rs1_is_zero <= (0 == gpr_req_if.rs1);
rs2_is_zero <= (0 == gpr_req_if.rs2);
end
`ifdef EXT_F_ENABLE
reg [`NUM_THREADS-1:0][31:0] rs3_data;
reg read_rs3, save_rs3;
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && !read_rs3;
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
always @(posedge clk) begin
if (reset) begin
read_rs3 <= 0;
end else begin
if (rs3_delay) begin
read_rs3 <= 1;
end else if (read_fire) begin
read_rs3 <= 0;
end
assert(!read_rs3 || rsp_wid == gpr_req_if.wid);
end
if (rs3_delay) begin
save_rs3 <= 1;
end
if (save_rs3) begin
rs3_data <= rs1_data;
save_rs3 <= 0;
end
end
assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)};
assign gpr_req_if.ready = ~rs3_delay;
assign gpr_rsp_if.rs3_data = rs3_data;
`else
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign gpr_req_if.ready = 1;
assign gpr_rsp_if.rs3_data = 0;
`UNUSED_VAR (gpr_req_if.valid);
`UNUSED_VAR (gpr_req_if.rs3);
`UNUSED_VAR (gpr_req_if.use_rs3);
`UNUSED_VAR (gpr_rsp_if.ready);
.clk (clk),
.wren (writeback_if.valid),
.tmask (writeback_if.tmask),
.waddr (waddr),
.wdata (writeback_if.data),
.raddr1 (raddr1),
.raddr2 (raddr2),
.raddr3 (raddr3),
.rdata1 (rdata1),
.rdata2 (rdata2),
.rdata3 (rdata3)
);
`endif
assign gpr_rsp_if.rs1_data = rs1_is_zero ? (`NUM_THREADS*32)'(0) : rs1_data;
assign gpr_rsp_if.rs2_data = rs2_is_zero ? (`NUM_THREADS*32)'(0) : rs2_data;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign gpr_rsp_if.rs1_data = rdata1;
assign gpr_rsp_if.rs2_data = rdata2;
assign gpr_rsp_if.rs3_data = rdata3;
assign writeback_if.ready = 1'b1;

View File

@@ -79,12 +79,12 @@ module VX_gpu_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
.out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
);
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready;

View File

@@ -14,7 +14,7 @@ module VX_ibuffer #(
output wire [`NW_BITS-1:0] deq_wid_next,
VX_decode_if ibuf_deq_if
);
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + 1 + `NUM_REGS;
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
localparam SIZE = `IBUF_SIZE;
localparam SIZEW = $clog2(SIZE+1);
localparam ADDRW = $clog2(SIZE);
@@ -192,8 +192,7 @@ module VX_ibuffer #(
ibuf_enq_if.rs3,
ibuf_enq_if.imm,
ibuf_enq_if.rs1_is_PC,
ibuf_enq_if.rs2_is_imm,
ibuf_enq_if.use_rs3,
ibuf_enq_if.rs2_is_imm,
ibuf_enq_if.used_regs};
assign ibuf_deq_if.valid = deq_valid;
@@ -211,7 +210,6 @@ module VX_ibuffer #(
ibuf_deq_if.imm,
ibuf_deq_if.rs1_is_PC,
ibuf_deq_if.rs2_is_imm,
ibuf_deq_if.use_rs3,
ibuf_deq_if.used_regs} = deq_instr;
endmodule

View File

@@ -30,94 +30,72 @@ module VX_instr_demux (
// ALU unit
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
wire alu_req_ready;
wire alu_stall = alu_req_if.valid && ~alu_req_if.ready;
wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
VX_opd_collect #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS),
.OPDSW (2 * `NUM_THREADS * 32),
.PASSTHRU (1) // ALU has no backpressure
) alu_opc (
.clk (clk),
.reset (reset),
.ready_in (alu_req_ready),
.valid_in (alu_req_valid),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}),
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
.ready_out (alu_req_if.ready),
.valid_out (alu_req_if.valid)
VX_generic_register #(
.N (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.R (1)
) alu_pipe (
.clk (clk),
.reset (reset),
.stall (alu_stall),
.flush (1'b0),
.data_in ({alu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data})
);
// lsu unit
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
wire lsu_req_ready;
wire lsu_stall = lsu_req_if.valid && ~lsu_req_if.ready;
VX_opd_collect #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1),
.OPDSW (2 * `NUM_THREADS * 32)
) lsu_opc (
VX_generic_register #(
.N (1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.R (1)
) lsu_pipe (
.clk (clk),
.reset (reset),
.ready_in (lsu_req_ready),
.valid_in (lsu_req_valid),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}),
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
.ready_out (lsu_req_if.ready),
.valid_out (lsu_req_if.valid)
.stall (lsu_stall),
.flush (1'b0),
.data_in ({lsu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data})
);
// csr unit
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
wire csr_req_ready;
wire csr_stall = csr_req_if.valid && ~csr_req_if.ready;
reg tmp_rs2_is_imm;
reg [`NR_BITS-1:0] tmp_rs1;
always @(posedge clk) begin
tmp_rs2_is_imm <= execute_if.rs2_is_imm;
tmp_rs1 <= execute_if.rs1;
end
wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_rsp_if.rs1_data[0];
VX_opd_collect #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1),
.OPDSW (32)
) csr_opc (
VX_generic_register #(
.N (1 + `NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.R (1)
) csr_pipe (
.clk (clk),
.reset (reset),
.ready_in (csr_req_ready),
.valid_in (csr_req_valid),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}),
.opds_in ({csr_req_mask}),
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io, csr_req_if.csr_mask}),
.ready_out (csr_req_if.ready),
.valid_out (csr_req_if.valid)
.stall (csr_stall),
.flush (1'b0),
.data_in ({csr_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, execute_if.rs2_is_imm, execute_if.rs1, gpr_rsp_if.rs1_data[0]}),
.data_out ({csr_req_if.valid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.rs2_is_imm, csr_req_if.rs1, csr_req_if.rs1_data})
);
// mul unit
`ifdef EXT_M_ENABLE
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
wire mul_req_ready;
wire mul_stall = mul_req_if.valid && ~mul_req_if.ready;
VX_opd_collect #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1),
.OPDSW (2 * `NUM_THREADS * 32)
) mul_opc (
VX_generic_register #(
.N (1 + `NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.R (1)
) mul_pipe (
.clk (clk),
.reset (reset),
.ready_in (mul_req_ready),
.valid_in (mul_req_valid),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}),
.ready_out (mul_req_if.ready),
.valid_out (mul_req_if.valid)
.stall (mul_stall),
.flush (1'b0),
.data_in ({mul_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({mul_req_if.valid, mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data})
);
`endif
@@ -125,54 +103,50 @@ module VX_instr_demux (
`ifdef EXT_F_ENABLE
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
wire fpu_req_ready;
wire fpu_stall = fpu_req_if.valid && ~fpu_req_if.ready;
VX_opd_collect #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1),
.OPDSW (3 * `NUM_THREADS * 32)
) fpu_opc (
VX_generic_register #(
.N (1 + `NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.R (1)
) fpu_pipe (
.clk (clk),
.reset (reset),
.ready_in (fpu_req_ready),
.valid_in (fpu_req_valid),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb}),
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
.ready_out (fpu_req_if.ready),
.valid_out (fpu_req_if.valid)
.stall (fpu_stall),
.flush (1'b0),
.data_in ({fpu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({fpu_req_if.valid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data})
);
`else
`UNUSED_VAR (gpr_rsp_if.rs3_data)
`endif
// gpu unit
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
wire gpu_req_ready;
wire gpu_stall = gpu_req_if.valid && ~gpu_req_if.ready;
VX_opd_collect #(
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1),
.OPDSW (`NUM_THREADS * 32 + 32)
) gpu_opc (
VX_generic_register #(
.N (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
.R (1)
) gpu_pipe (
.clk (clk),
.reset (reset),
.ready_in (gpu_req_ready),
.valid_in (gpu_req_valid),
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
.ready_out (gpu_req_if.ready),
.valid_out (gpu_req_if.valid)
.stall (gpu_stall),
.flush (1'b0),
.data_in ({gpu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
.data_out ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data})
);
// can take next request?
assign execute_if.ready = (alu_req_ready && (execute_if.ex_type == `EX_ALU))
|| (lsu_req_ready && (execute_if.ex_type == `EX_LSU))
|| (csr_req_ready && (execute_if.ex_type == `EX_CSR))
assign execute_if.ready = (!alu_stall && (execute_if.ex_type == `EX_ALU))
|| (!lsu_stall && (execute_if.ex_type == `EX_LSU))
|| (!csr_stall && (execute_if.ex_type == `EX_CSR))
`ifdef EXT_M_ENABLE
|| (mul_req_ready && (execute_if.ex_type == `EX_MUL))
|| (!mul_stall && (execute_if.ex_type == `EX_MUL))
`endif
`ifdef EXT_F_ENABLE
|| (fpu_req_ready && (execute_if.ex_type == `EX_FPU))
|| (!fpu_stall && (execute_if.ex_type == `EX_FPU))
`endif
|| (gpu_req_ready && (execute_if.ex_type == `EX_GPU));
|| (!gpu_stall && (execute_if.ex_type == `EX_GPU));
endmodule

View File

@@ -5,23 +5,23 @@ module VX_issue #(
) (
`SCOPE_IO_VX_issue
input wire clk,
input wire reset,
input wire clk,
input wire reset,
VX_decode_if decode_if,
VX_writeback_if writeback_if,
VX_decode_if decode_if,
VX_writeback_if writeback_if,
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
VX_decode_if ibuf_deq_if();
VX_decode_if execute_if();
VX_gpr_req_if gpr_req_if();
VX_gpr_rsp_if gpr_rsp_if();
VX_decode_if ibuf_deq_if();
VX_decode_if execute_if();
VX_gpr_req_if gpr_req_if();
VX_gpr_rsp_if gpr_rsp_if();
wire scoreboard_delay;
wire [`NW_BITS-1:0] deq_wid_next;
@@ -29,49 +29,42 @@ module VX_issue #(
VX_ibuffer #(
.CORE_ID(CORE_ID)
) ibuffer (
.clk (clk),
.reset (reset),
.freeze (~gpr_req_if.ready),
.ibuf_enq_if (decode_if),
.deq_wid_next (deq_wid_next),
.ibuf_deq_if (ibuf_deq_if)
.clk (clk),
.reset (reset),
.freeze (1'b0),
.ibuf_enq_if (decode_if),
.deq_wid_next (deq_wid_next),
.ibuf_deq_if (ibuf_deq_if)
);
VX_scoreboard #(
.CORE_ID(CORE_ID)
) scoreboard (
.clk (clk),
.reset (reset),
.ibuf_deq_if (ibuf_deq_if),
.writeback_if (writeback_if),
.deq_wid_next (deq_wid_next),
.exe_delay (~execute_if.ready),
.gpr_delay (~gpr_req_if.ready),
.delay (scoreboard_delay)
.clk (clk),
.reset (reset),
.ibuf_deq_if (ibuf_deq_if),
.writeback_if (writeback_if),
.deq_wid_next (deq_wid_next),
.exe_delay (~execute_if.ready),
.delay (scoreboard_delay)
);
assign gpr_req_if.valid = ibuf_deq_if.valid && ~scoreboard_delay;
assign gpr_req_if.wid = ibuf_deq_if.wid;
assign gpr_req_if.PC = ibuf_deq_if.PC;
assign gpr_req_if.rs1 = ibuf_deq_if.rs1;
assign gpr_req_if.rs2 = ibuf_deq_if.rs2;
assign gpr_req_if.rs3 = ibuf_deq_if.rs3;
assign gpr_req_if.use_rs3 = ibuf_deq_if.use_rs3;
assign gpr_rsp_if.ready = execute_if.ready;
assign gpr_req_if.wid = ibuf_deq_if.wid;
assign gpr_req_if.rs1 = ibuf_deq_if.rs1;
assign gpr_req_if.rs2 = ibuf_deq_if.rs2;
assign gpr_req_if.rs3 = ibuf_deq_if.rs3;
VX_gpr_stage #(
.CORE_ID(CORE_ID)
) gpr_stage (
.clk (clk),
.reset (reset),
.writeback_if (writeback_if),
.gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if)
.clk (clk),
.reset (reset),
.writeback_if (writeback_if),
.gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if)
);
`UNUSED_VAR (gpr_rsp_if.valid);
assign execute_if.valid = ibuf_deq_if.valid && gpr_req_if.ready && ~scoreboard_delay;
assign execute_if.valid = ibuf_deq_if.valid && ~scoreboard_delay;
assign execute_if.wid = ibuf_deq_if.wid;
assign execute_if.tmask = ibuf_deq_if.tmask;
assign execute_if.PC = ibuf_deq_if.PC;
@@ -83,19 +76,19 @@ module VX_issue #(
assign execute_if.rs1 = ibuf_deq_if.rs1;
assign execute_if.imm = ibuf_deq_if.imm;
assign execute_if.rs1_is_PC = ibuf_deq_if.rs1_is_PC;
assign execute_if.rs2_is_imm = ibuf_deq_if.rs2_is_imm;
assign execute_if.rs2_is_imm= ibuf_deq_if.rs2_is_imm;
VX_instr_demux instr_demux (
.clk (clk),
.reset (reset),
.execute_if (execute_if),
.gpr_rsp_if (gpr_rsp_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if)
.clk (clk),
.reset (reset),
.execute_if (execute_if),
.gpr_rsp_if (gpr_rsp_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if)
);
`SCOPE_ASSIGN (issue_fire, ibuf_deq_if.valid && ibuf_deq_if.ready);
@@ -115,12 +108,8 @@ module VX_issue #(
`SCOPE_ASSIGN (issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm);
`SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay);
`SCOPE_ASSIGN (gpr_delay, ~gpr_req_if.ready);
`SCOPE_ASSIGN (execute_delay, ~execute_if.ready);
`SCOPE_ASSIGN (gpr_rsp_valid, gpr_rsp_if.valid);
`SCOPE_ASSIGN (gpr_rsp_wid, gpr_rsp_if.wid);
`SCOPE_ASSIGN (gpr_rsp_pc, gpr_rsp_if.PC);
`SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data);
`SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data);
`SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data);
@@ -140,7 +129,7 @@ module VX_issue #(
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
end
if (csr_req_if.valid && csr_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.rs1_data);
end
if (mul_req_if.valid && mul_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);

View File

@@ -19,6 +19,7 @@ module VX_lsu_unit #(
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if
);
wire req_valid;
wire [`NUM_THREADS-1:0] req_tmask;
wire req_rw;
wire [`NUM_THREADS-1:0][29:0] req_addr;
@@ -71,19 +72,18 @@ module VX_lsu_unit #(
reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags;
`IGNORE_WARNINGS_END
wire valid_in;
wire stall_in;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))),
.R(1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.stall (stall_in),
.flush (1'b0),
.in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}),
.out ({valid_in, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
.clk (clk),
.reset (reset),
.stall (stall_in),
.flush (1'b0),
.data_in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}),
.data_out ({req_valid, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
);
wire [`NW_BITS-1:0] rsp_wid;
@@ -136,11 +136,11 @@ module VX_lsu_unit #(
end
end
wire stall_out = ~ld_commit_if.ready && ld_commit_if.valid;
wire store_stall = valid_in && req_rw && stall_out;
wire load_req_stall = req_valid && !req_rw && lsuq_full;
wire store_req_stall = req_valid && req_rw && !st_commit_if.ready;
// Core Request
assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~lsuq_full && ~store_stall}} & req_tmask;
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && !load_req_stall && !store_req_stall}} & req_tmask;
assign dcache_req_if.rw = req_rw;
assign dcache_req_if.byteen = req_byteen;
assign dcache_req_if.addr = req_addr;
@@ -152,7 +152,9 @@ module VX_lsu_unit #(
assign dcache_req_if.tag = req_tag;
`endif
assign stall_in = ~dcache_req_if.ready || lsuq_full || store_stall;
assign stall_in = ~dcache_req_if.ready
|| load_req_stall
|| store_req_stall;
// Can accept new request?
assign lsu_req_if.ready = ~stall_in;
@@ -171,7 +173,7 @@ module VX_lsu_unit #(
// send store commit
wire is_store_rsp = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready;
wire is_store_rsp = req_valid && req_rw && dcache_req_if.ready;
assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid;
@@ -180,26 +182,27 @@ module VX_lsu_unit #(
assign st_commit_if.rd = 0;
assign st_commit_if.wb = 0;
assign st_commit_if.data = 0;
`UNUSED_VAR (st_commit_if.ready)
// send load commit
wire is_load_rsp = (| dcache_rsp_if.valid);
wire is_load_rsp = (| dcache_rsp_if.valid);
wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.R(1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}),
.out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data})
.clk (clk),
.reset (reset),
.stall (load_rsp_stall),
.flush (1'b0),
.data_in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}),
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data})
);
// Can accept new cache response?
assign dcache_rsp_if.ready = ~stall_out;
assign dcache_rsp_if.ready = ~load_rsp_stall;
// scope registration
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}});

View File

@@ -147,12 +147,12 @@ module VX_mul_unit #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}),
.out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}),
.data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
);
// can accept new request?

View File

@@ -1,65 +0,0 @@
`include "VX_platform.vh"
module VX_opd_collect #(
parameter INSTW = 1,
parameter OPDSW = 1,
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire valid_in,
output wire ready_in,
input wire [INSTW-1:0] inst_in,
input wire [OPDSW-1:0] opds_in,
output wire [INSTW+OPDSW-1:0] data_out,
output wire valid_out,
input wire ready_out
);
wire [INSTW-1:0] inst_out;
wire [OPDSW-1:0] opds_out;
wire valid_out_tmp, ready_out_tmp;
VX_skid_buffer #(
.DATAW (INSTW)
) skid_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.data_in (inst_in),
.data_out (inst_out),
.valid_out (valid_out_tmp),
.ready_out (ready_out_tmp)
);
VX_gpr_bypass #(
.DATAW (OPDSW),
.PASSTHRU (PASSTHRU)
) gpr_bypass (
.clk (clk),
.reset (reset),
.push (valid_in && ready_in),
.pop (valid_out_tmp && ready_out_tmp),
.data_in (opds_in),
.data_out (opds_out)
);
wire stall_out = valid_out && ~ready_out;
VX_generic_register #(
.N(1 + INSTW + OPDSW),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({valid_out_tmp, inst_out, opds_out}),
.out ({valid_out, data_out})
);
assign ready_out_tmp = ~stall_out;
endmodule

View File

@@ -10,7 +10,6 @@ module VX_scoreboard #(
VX_writeback_if writeback_if,
input wire [`NW_BITS-1:0] deq_wid_next,
input wire exe_delay,
input wire gpr_delay,
output wire delay
);
@@ -63,14 +62,14 @@ module VX_scoreboard #(
end
// issue the instruction
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
assign ibuf_deq_if.ready = ~(delay || exe_delay);
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay);
end
end
`endif
@@ -81,9 +80,9 @@ module VX_scoreboard #(
stall_ctr <= 0;
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
stall_ctr <= stall_ctr + 1;
assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay);
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
stall_ctr <= 0;
end

View File

@@ -241,12 +241,12 @@ module VX_warp_sched #(
.N(1 + `NUM_THREADS + 32 + `NW_BITS),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}),
.out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.data_in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}),
.data_out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
);
assign busy = (active_warps != 0);

View File

@@ -78,12 +78,12 @@ module VX_writeback #(
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
);
assign alu_commit_if.ready = !stall;

View File

@@ -435,12 +435,12 @@ if (DRAM_ENABLE) begin
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH),
.R(1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.stall (pipeline_stall),
.flush (1'b0),
.in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}),
.out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
.clk (clk),
.reset (reset),
.stall (pipeline_stall),
.flush (1'b0),
.data_in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}),
.data_out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
);
`ifdef DBG_CACHE_REQ_INFO
@@ -508,12 +508,12 @@ if (DRAM_ENABLE) begin
.N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH),
.R(1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.stall (pipeline_stall),
.flush (1'b0),
.in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}),
.out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
.clk (clk),
.reset (reset),
.stall (pipeline_stall),
.flush (1'b0),
.data_in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}),
.data_out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
);
end else begin
@@ -650,12 +650,12 @@ end
.N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH),
.R(1)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.stall (pipeline_stall),
.flush (1'b0),
.in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}),
.out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
.clk (clk),
.reset (reset),
.stall (pipeline_stall),
.flush (1'b0),
.data_in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}),
.data_out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
);
`ifdef DBG_CACHE_REQ_INFO

View File

@@ -57,7 +57,8 @@ module VX_bank_core_req_arb #(
VX_generic_queue #(
.DATAW($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(writedata_in)),
.SIZE(CREQ_SIZE)
.SIZE(CREQ_SIZE),
.BUFFERED(1)
) req_queue (
.clk (clk),
.reset (reset),

View File

@@ -92,12 +92,12 @@ module VX_cache_core_rsp_merge #(
.R(NUM_REQS),
.PASSTHRU(NUM_BANKS <= 2)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.data_out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);
for (genvar i = 0; i < NUM_BANKS; i++) begin

View File

@@ -91,12 +91,12 @@ module VX_fp_noncomp #(
.N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1),
.R(0)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
.out ({a_sign[i], b_sign[i], a_exponent[i], a_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
.data_out ({a_sign[i], b_sign[i], a_exponent[i], a_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]})
);
end
@@ -104,12 +104,12 @@ module VX_fp_noncomp #(
.N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)),
.R(1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({valid_in, tag_in, op_type, frm, dataa, datab}),
.out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab}),
.data_out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r})
);
// FCLASS
@@ -255,12 +255,12 @@ module VX_fp_noncomp #(
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
.R(1)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}),
.out ({valid_out, tag_out, result, has_fflags, fflags})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}),
.data_out ({valid_out, tag_out, result, has_fflags, fflags})
);
assign ready_in = ~stall;

View File

@@ -0,0 +1,24 @@
`ifndef VX_CSR_PIPE_REQ_IF
`define VX_CSR_PIPE_REQ_IF
`include "VX_define.vh"
interface VX_csr_pipe_req_if ();
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`CSR_BITS-1:0] op_type;
wire [`CSR_ADDR_BITS-1:0] csr_addr;
wire [31:0] csr_mask;
wire [`NR_BITS-1:0] rd;
wire wb;
wire is_io;
wire ready;
endinterface
`endif

View File

@@ -12,10 +12,11 @@ interface VX_csr_req_if ();
wire [31:0] PC;
wire [`CSR_BITS-1:0] op_type;
wire [`CSR_ADDR_BITS-1:0] csr_addr;
wire [31:0] csr_mask;
wire [31:0] rs1_data;
wire rs2_is_imm;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rd;
wire wb;
wire is_io;
wire ready;

View File

@@ -20,8 +20,7 @@ interface VX_decode_if ();
wire [`NR_BITS-1:0] rs3;
wire [31:0] imm;
wire rs1_is_PC;
wire rs2_is_imm;
wire use_rs3;
wire rs2_is_imm;
wire [`NUM_REGS-1:0] used_regs;
wire ready;

View File

@@ -4,17 +4,11 @@
`include "VX_define.vh"
interface VX_gpr_req_if ();
wire valid;
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
wire use_rs3;
wire ready;
wire [`NW_BITS-1:0] wid;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
endinterface

View File

@@ -4,17 +4,11 @@
`include "VX_define.vh"
interface VX_gpr_rsp_if ();
wire valid;
`IGNORE_WARNINGS_BEGIN
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;
`IGNORE_WARNINGS_END
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire ready;
endinterface
`endif

View File

@@ -5,7 +5,7 @@ module VX_dp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
parameter BUFFERED = 1,
parameter BUFFERED = 0,
parameter RWCHECK = 1,
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1),
@@ -26,8 +26,10 @@ module VX_dp_ram #(
localparam DATA32W = DATAW / 32;
localparam BYTEEN32W = BYTEENW / 4;
if (FASTRAM) begin
if (BUFFERED) begin
//`ifndef QUARTUS
if (FASTRAM) begin
if (BUFFERED) begin
reg [DATAW-1:0] dout_r;
if (BYTEENW > 1) begin
@@ -207,5 +209,95 @@ module VX_dp_ram #(
end
end
/*`else
localparam OUTDATA_REG_B = BUFFERED ? "CLOCK0" : "UNREGISTERED";
localparam RAM_BLOCK_TYPE = FASTRAM ? "MLAB" : "AUTO";
if (RWCHECK) begin
altsyncram #(
.init_file (),
.operation_mode ("DUAL_PORT"),
.numwords_a (SIZE),
.numwords_b (SIZE),
.widthad_a (ADDRW),
.widthad_b (ADDRW),
.width_a (DATAW),
.width_b (DATAW),
.width_byteena_a(BYTEENW),
.address_reg_b ("CLOCK0"),
.outdata_reg_b (OUTDATA_REG_B),
.ram_block_type (RAM_BLOCK_TYPE)
) mem (
.clocken0 (1'b1),
.clocken1 (),
.clocken2 (),
.clocken3 (),
.clock0 (clk),
.clock1 (),
.address_a (waddr),
.address_b (raddr),
.byteena_a (byteen),
.byteena_b (1'b1),
.wren_a (wren),
.wren_b (1'b0),
.data_a (din),
.data_b (),
.rden_a (),
.rden_b (1'b1),
.q_a (),
.q_b (dout),
.addressstall_a (1'b0),
.addressstall_b (1'b0),
.aclr0 (1'b0),
.aclr1 (1'b0),
.eccstatus ()
);
end else begin
`NO_RW_RAM_CHECK altsyncram #(
.init_file (),
.operation_mode ("DUAL_PORT"),
.numwords_a (SIZE),
.numwords_b (SIZE),
.widthad_a (ADDRW),
.widthad_b (ADDRW),
.width_a (DATAW),
.width_b (DATAW),
.width_byteena_a(BYTEENW),
.outdata_reg_b (OUTDATA_REG_B),
.ram_block_type (RAM_BLOCK_TYPE)
) mem (
.clocken0 (1'b1),
.clocken1 (1'b1),
.clocken2 (1'b1),
.clocken3 (1'b1),
.clock0 (clk),
.clock1 (clk),
.address_a (waddr),
.address_b (raddr),
.byteena_a (byteen),
.byteena_b (1'b1),
.wren_a (wren),
.wren_b (1'b0),
.data_a (din),
.data_b (),
.rden_a (),
.rden_b (1'b1),
.q_a (),
.q_b (dout),
.addressstall_a (1'b0),
.addressstall_b (1'b0),
.aclr0 (1'b0),
.aclr1 (1'b0),
.eccstatus ()
);
end
`endif*/
endmodule
`TRACING_ON

View File

@@ -5,25 +5,25 @@ module VX_generic_register #(
parameter R = N,
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire stall,
input wire flush,
input wire[N-1:0] in,
output wire[N-1:0] out
input wire clk,
input wire reset,
input wire stall,
input wire flush,
input wire[N-1:0] data_in,
output wire[N-1:0] data_out
);
if (PASSTHRU) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (stall)
assign out = flush ? N'(0) : in;
assign data_out = flush ? N'(0) : data_in;
end else begin
reg [N-1:0] value;
if (R != 0) begin
always @(posedge clk) begin
if (~stall) begin
value <= in;
value <= data_in;
end
if (reset || flush) begin
value[N-1:N-R] <= R'(0);
@@ -34,12 +34,12 @@ module VX_generic_register #(
`UNUSED_VAR (flush)
always @(posedge clk) begin
if (~stall) begin
value <= in;
value <= data_in;
end
end
end
assign out = value;
assign data_out = value;
end
endmodule

View File

@@ -106,12 +106,12 @@ module VX_stream_arbiter #(
.N(1 + DATAW),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({sel_valid, data_in[sel_idx]}),
.out ({valid_out, data_out})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({sel_valid, data_in[sel_idx]}),
.data_out ({valid_out, data_out})
);
for (genvar i = 0; i < NUM_REQS; i++) begin

View File

@@ -184,13 +184,9 @@
"issue_imm": 32,
"issue_rs1_is_pc": 1,
"issue_rs2_is_imm": 1,
"?gpr_rsp_valid": 1,
"gpr_rsp_wid":"`NW_BITS",
"gpr_rsp_pc": 32,
"gpr_rsp_a":"`NUM_THREADS * 32",
"gpr_rsp_b":"`NUM_THREADS * 32",
"gpr_rsp_c":"`NUM_THREADS * 32",
"!gpr_delay": 1,
"?writeback_valid": 1,
"writeback_wid":"`NW_BITS",
"writeback_pc": 32,