register file refactoring
This commit is contained in:
@@ -100,12 +100,12 @@ module VX_alu_unit #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}),
|
||||
.out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.data_in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}),
|
||||
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r})
|
||||
);
|
||||
|
||||
wire is_less = cmp_result_r[32];
|
||||
|
||||
@@ -64,12 +64,12 @@ module VX_commit #(
|
||||
.N(1 + CMTW),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (1'b0),
|
||||
.flush (1'b0),
|
||||
.in ({commit_fire, commit_size}),
|
||||
.out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (1'b0),
|
||||
.flush (1'b0),
|
||||
.data_in ({commit_fire, commit_size}),
|
||||
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
||||
@@ -1,57 +1,59 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_arb (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// bus select
|
||||
input wire select_io_req,
|
||||
input wire select_io_rsp,
|
||||
input wire select_io_req,
|
||||
input wire select_io_rsp,
|
||||
|
||||
// input requets
|
||||
VX_csr_req_if csr_core_req_if,
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_req_if csr_core_req_if,
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
|
||||
// output request
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_csr_pipe_req_if csr_pipe_req_if,
|
||||
|
||||
// input response
|
||||
VX_commit_if csr_rsp_if,
|
||||
VX_commit_if csr_pipe_rsp_if,
|
||||
|
||||
// outputs responses
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if
|
||||
);
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
// requests
|
||||
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
|
||||
assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
|
||||
assign csr_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0;
|
||||
assign csr_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0;
|
||||
assign csr_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
|
||||
assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
|
||||
assign csr_req_if.is_io = select_io_req;
|
||||
wire [31:0] csr_core_req_mask = csr_core_req_if.rs2_is_imm ? 32'(csr_core_req_if.rs1) : csr_core_req_if.rs1_data;
|
||||
|
||||
assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req);
|
||||
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
|
||||
// requests
|
||||
assign csr_pipe_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
|
||||
assign csr_pipe_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
|
||||
assign csr_pipe_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0;
|
||||
assign csr_pipe_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0;
|
||||
assign csr_pipe_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_pipe_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_pipe_req_if.csr_mask = (~select_io_req) ? csr_core_req_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_pipe_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
|
||||
assign csr_pipe_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
|
||||
assign csr_pipe_req_if.is_io = select_io_req;
|
||||
|
||||
assign csr_core_req_if.ready = csr_pipe_req_if.ready && (~select_io_req);
|
||||
assign csr_io_req_if.ready = csr_pipe_req_if.ready && select_io_req;
|
||||
|
||||
// responses
|
||||
assign csr_io_rsp_if.valid = csr_rsp_if.valid & select_io_rsp;
|
||||
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
|
||||
assign csr_io_rsp_if.valid = csr_pipe_rsp_if.valid & select_io_rsp;
|
||||
assign csr_io_rsp_if.data = csr_pipe_rsp_if.data[0];
|
||||
|
||||
assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp;
|
||||
assign csr_commit_if.wid = csr_rsp_if.wid;
|
||||
assign csr_commit_if.tmask = csr_rsp_if.tmask;
|
||||
assign csr_commit_if.PC = csr_rsp_if.PC;
|
||||
assign csr_commit_if.rd = csr_rsp_if.rd;
|
||||
assign csr_commit_if.wb = csr_rsp_if.wb;
|
||||
assign csr_commit_if.data = csr_rsp_if.data;
|
||||
assign csr_commit_if.valid = csr_pipe_rsp_if.valid & ~select_io_rsp;
|
||||
assign csr_commit_if.wid = csr_pipe_rsp_if.wid;
|
||||
assign csr_commit_if.tmask = csr_pipe_rsp_if.tmask;
|
||||
assign csr_commit_if.PC = csr_pipe_rsp_if.PC;
|
||||
assign csr_commit_if.rd = csr_pipe_rsp_if.rd;
|
||||
assign csr_commit_if.wb = csr_pipe_rsp_if.wb;
|
||||
assign csr_commit_if.data = csr_pipe_rsp_if.data;
|
||||
|
||||
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||
assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -12,15 +12,15 @@ module VX_csr_unit #(
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
|
||||
input wire busy,
|
||||
input wire[`NUM_WARPS-1:0] fpu_pending,
|
||||
output wire[`NUM_WARPS-1:0] pending
|
||||
);
|
||||
VX_csr_req_if csr_pipe_req_if();
|
||||
VX_commit_if csr_pipe_rsp_if();
|
||||
VX_csr_pipe_req_if csr_pipe_req_if();
|
||||
VX_commit_if csr_pipe_rsp_if();
|
||||
|
||||
wire select_io_req = csr_io_req_if.valid;
|
||||
wire select_io_rsp;
|
||||
@@ -34,9 +34,9 @@ module VX_csr_unit #(
|
||||
|
||||
.csr_core_req_if (csr_req_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_req_if (csr_pipe_req_if),
|
||||
.csr_pipe_req_if (csr_pipe_req_if),
|
||||
|
||||
.csr_rsp_if (csr_pipe_rsp_if),
|
||||
.csr_pipe_rsp_if (csr_pipe_rsp_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_commit_if (csr_commit_if)
|
||||
);
|
||||
@@ -105,12 +105,12 @@ module VX_csr_unit #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||
.out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.data_in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||
.data_out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
@@ -347,11 +347,9 @@ module VX_decode #(
|
||||
assign decode_if.rd = rd;
|
||||
assign decode_if.rs1 = rs1_qual;
|
||||
assign decode_if.rs2 = rs2;
|
||||
assign decode_if.rs3 = 0;
|
||||
assign decode_if.rs3 = rs3;
|
||||
`endif
|
||||
|
||||
assign decode_if.use_rs3 = use_rs3;
|
||||
|
||||
assign decode_if.used_regs = ((`NUM_REGS)'(use_rd) << decode_if.rd)
|
||||
| ((`NUM_REGS)'(use_rs1) << decode_if.rs1)
|
||||
| ((`NUM_REGS)'(use_rs2) << decode_if.rs2)
|
||||
|
||||
@@ -117,6 +117,8 @@ module VX_execute #(
|
||||
.pending (fpu_pending)
|
||||
);
|
||||
`else
|
||||
`UNUSED_VAR (csr_pending)
|
||||
`UNUSED_VAR (fpu_to_csr_if.read_frm)
|
||||
assign fpu_req_if.ready = 0;
|
||||
assign fpu_commit_if.valid = 0;
|
||||
assign fpu_commit_if.wid = 0;
|
||||
@@ -124,9 +126,12 @@ module VX_execute #(
|
||||
assign fpu_commit_if.tmask = 0;
|
||||
assign fpu_commit_if.wb = 0;
|
||||
assign fpu_commit_if.rd = 0;
|
||||
assign fpu_commit_if.data = 0;
|
||||
assign fpu_commit_if.has_fflags = 0;
|
||||
assign fpu_commit_if.fflags = 0;
|
||||
assign fpu_commit_if.data = 0;
|
||||
assign fpu_to_csr_if.write_enable = 0;
|
||||
assign fpu_to_csr_if.write_wid = 0;
|
||||
assign fpu_to_csr_if.write_fflags = 0;
|
||||
assign fpu_to_csr_if.read_wid = 0;
|
||||
assign fpu_pending = 0;
|
||||
`endif
|
||||
|
||||
VX_gpu_unit #(
|
||||
|
||||
@@ -153,12 +153,12 @@ module VX_fpu_unit #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
|
||||
.out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
|
||||
.data_out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
|
||||
);
|
||||
|
||||
assign ready_out = ~stall_out;
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_gpr_bypass #(
|
||||
parameter DATAW = 1,
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out
|
||||
);
|
||||
if (PASSTHRU) begin
|
||||
reg delayed_push;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
delayed_push <= 0;
|
||||
end else begin
|
||||
delayed_push <= push;
|
||||
assert(!delayed_push || pop);
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = data_in;
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DATAW-1:0] buffer, buffer2;
|
||||
reg use_buffer, use_buffer2;
|
||||
reg delayed_push;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
delayed_push <= 0;
|
||||
use_buffer <= 0;
|
||||
use_buffer2 <= 0;
|
||||
end else begin
|
||||
delayed_push <= push;
|
||||
assert(!use_buffer2 || use_buffer);
|
||||
if (pop) begin
|
||||
use_buffer <= use_buffer2;
|
||||
use_buffer2 <= 0;
|
||||
end
|
||||
if (delayed_push) begin
|
||||
if (use_buffer) begin
|
||||
assert(!use_buffer2); // full!
|
||||
use_buffer <= 1;
|
||||
if (!pop) begin
|
||||
use_buffer2 <= 1;
|
||||
end
|
||||
end else if (!pop) begin
|
||||
use_buffer <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (pop) begin
|
||||
buffer <= buffer2;
|
||||
end
|
||||
if (delayed_push) begin
|
||||
if (use_buffer) begin
|
||||
if (pop) begin
|
||||
buffer <= data_in;
|
||||
end else begin
|
||||
buffer2 <= data_in;
|
||||
end
|
||||
end else if (!pop) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = use_buffer ? buffer : data_in;
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -4,33 +4,79 @@
|
||||
|
||||
module VX_gpr_ram (
|
||||
input wire clk,
|
||||
input wire [`NUM_THREADS-1:0] we,
|
||||
input wire wren,
|
||||
input wire [`NUM_THREADS-1:0] tmask,
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] waddr,
|
||||
input wire [`NUM_THREADS-1:0][31:0] wdata,
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] rs1,
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] rs2,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rs2_data
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] raddr1,
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] raddr2,
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] raddr3,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rdata1,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rdata2,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rdata3
|
||||
);
|
||||
localparam RAM_DATAW = `NUM_THREADS * 32;
|
||||
localparam RAM_ADDRW = `NW_BITS + `NR_BITS;
|
||||
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
|
||||
localparam RAM_BYTEEN = `NUM_THREADS * 4;
|
||||
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_THREADS-1:0][31:0] q1, q2;
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (we[i]) begin
|
||||
mem[waddr][i][0] <= wdata[i][07:00];
|
||||
mem[waddr][i][1] <= wdata[i][15:08];
|
||||
mem[waddr][i][2] <= wdata[i][23:16];
|
||||
mem[waddr][i][3] <= wdata[i][31:24];
|
||||
`UNUSED_VAR (raddr3)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
reg [31:0] mem_i [(RAM_DEPTH/2)-1:0];
|
||||
reg [31:0] mem_f [(RAM_DEPTH/2)-1:0];
|
||||
|
||||
initial mem_i = '{default: 0};
|
||||
|
||||
wire waddr_is_fp = waddr[RAM_ADDRW-1];
|
||||
wire raddr1_is_fp = raddr1[RAM_ADDRW-1];
|
||||
wire raddr2_is_fp = raddr2[RAM_ADDRW-1];
|
||||
|
||||
wire [RAM_ADDRW-2:0] waddr_qual = waddr[RAM_ADDRW-2:0];
|
||||
wire [RAM_ADDRW-2:0] raddr1_qual = raddr1[RAM_ADDRW-2:0];
|
||||
wire [RAM_ADDRW-2:0] raddr2_qual = raddr2[RAM_ADDRW-2:0];
|
||||
wire [RAM_ADDRW-2:0] raddr3_qual = raddr3[RAM_ADDRW-2:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && tmask[i] && !waddr_is_fp) begin
|
||||
mem_i[waddr_qual] <= wdata[i];
|
||||
end
|
||||
end
|
||||
q1 <= mem[rs1];
|
||||
q2 <= mem[rs2];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && tmask[i] && waddr_is_fp) begin
|
||||
mem_f[waddr_qual] <= wdata[i];
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1[i] = raddr1_is_fp ? mem_f[raddr1_qual] : mem_i[raddr1_qual];
|
||||
assign rdata2[i] = raddr2_is_fp ? mem_f[raddr2_qual] : mem_i[raddr2_qual];
|
||||
assign rdata3[i] = mem_f[raddr3_qual];
|
||||
end
|
||||
|
||||
assign rs1_data = q1;
|
||||
assign rs2_data = q2;
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
reg [31:0] mem [RAM_DEPTH-1:0];
|
||||
|
||||
initial mem = '{default: 0};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && tmask[i]) begin
|
||||
mem[waddr] <= wdata[i];
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1[i] = mem[raddr1];
|
||||
assign rdata2[i] = mem[raddr2];
|
||||
assign rdata3[i] = 0;
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
@@ -15,91 +15,38 @@ module VX_gpr_stage #(
|
||||
);
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg rsp_valid;
|
||||
reg [`NW_BITS-1:0] rsp_wid;
|
||||
reg [31:0] rsp_pc;
|
||||
reg rs1_is_zero, rs2_is_zero;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data;
|
||||
wire [`NW_BITS+`NR_BITS-1:0] raddr1, raddr2;
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
|
||||
wire [`NW_BITS+`NR_BITS-1:0] waddr, raddr1, raddr2, raddr3;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign waddr = {writeback_if.rd[`NR_BITS-1], writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]};
|
||||
assign raddr1 = {gpr_req_if.rs1[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]};
|
||||
assign raddr2 = {gpr_req_if.rs2[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]};
|
||||
assign raddr3 = {gpr_req_if.rs3[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]};
|
||||
`else
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||
`endif
|
||||
|
||||
VX_gpr_ram gpr_ram (
|
||||
.clk (clk),
|
||||
.we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.tmask),
|
||||
.waddr ({writeback_if.wid, writeback_if.rd}),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (raddr1),
|
||||
.rs2 (raddr2),
|
||||
.rs1_data (rs1_data),
|
||||
.rs2_data (rs2_data)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rsp_valid <= 0;
|
||||
end else begin
|
||||
rsp_valid <= gpr_req_if.valid;
|
||||
end
|
||||
|
||||
rsp_wid <= gpr_req_if.wid;
|
||||
rsp_pc <= gpr_req_if.PC;
|
||||
rs1_is_zero <= (0 == gpr_req_if.rs1);
|
||||
rs2_is_zero <= (0 == gpr_req_if.rs2);
|
||||
end
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
reg read_rs3, save_rs3;
|
||||
|
||||
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && !read_rs3;
|
||||
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
read_rs3 <= 0;
|
||||
end else begin
|
||||
if (rs3_delay) begin
|
||||
read_rs3 <= 1;
|
||||
end else if (read_fire) begin
|
||||
read_rs3 <= 0;
|
||||
end
|
||||
assert(!read_rs3 || rsp_wid == gpr_req_if.wid);
|
||||
end
|
||||
|
||||
if (rs3_delay) begin
|
||||
save_rs3 <= 1;
|
||||
end
|
||||
if (save_rs3) begin
|
||||
rs3_data <= rs1_data;
|
||||
save_rs3 <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)};
|
||||
assign gpr_req_if.ready = ~rs3_delay;
|
||||
assign gpr_rsp_if.rs3_data = rs3_data;
|
||||
|
||||
`else
|
||||
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign gpr_req_if.ready = 1;
|
||||
assign gpr_rsp_if.rs3_data = 0;
|
||||
|
||||
`UNUSED_VAR (gpr_req_if.valid);
|
||||
`UNUSED_VAR (gpr_req_if.rs3);
|
||||
`UNUSED_VAR (gpr_req_if.use_rs3);
|
||||
`UNUSED_VAR (gpr_rsp_if.ready);
|
||||
.clk (clk),
|
||||
.wren (writeback_if.valid),
|
||||
.tmask (writeback_if.tmask),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data),
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.raddr3 (raddr3),
|
||||
.rdata1 (rdata1),
|
||||
.rdata2 (rdata2),
|
||||
.rdata3 (rdata3)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rs1_is_zero ? (`NUM_THREADS*32)'(0) : rs1_data;
|
||||
assign gpr_rsp_if.rs2_data = rs2_is_zero ? (`NUM_THREADS*32)'(0) : rs2_data;
|
||||
assign gpr_rsp_if.valid = rsp_valid;
|
||||
assign gpr_rsp_if.wid = rsp_wid;
|
||||
assign gpr_rsp_if.PC = rsp_pc;
|
||||
assign gpr_rsp_if.rs1_data = rdata1;
|
||||
assign gpr_rsp_if.rs2_data = rdata2;
|
||||
assign gpr_rsp_if.rs3_data = rdata3;
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
|
||||
@@ -79,12 +79,12 @@ module VX_gpu_unit #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
|
||||
.out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
);
|
||||
|
||||
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
|
||||
@@ -14,7 +14,7 @@ module VX_ibuffer #(
|
||||
output wire [`NW_BITS-1:0] deq_wid_next,
|
||||
VX_decode_if ibuf_deq_if
|
||||
);
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + 1 + `NUM_REGS;
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
|
||||
localparam SIZE = `IBUF_SIZE;
|
||||
localparam SIZEW = $clog2(SIZE+1);
|
||||
localparam ADDRW = $clog2(SIZE);
|
||||
@@ -192,8 +192,7 @@ module VX_ibuffer #(
|
||||
ibuf_enq_if.rs3,
|
||||
ibuf_enq_if.imm,
|
||||
ibuf_enq_if.rs1_is_PC,
|
||||
ibuf_enq_if.rs2_is_imm,
|
||||
ibuf_enq_if.use_rs3,
|
||||
ibuf_enq_if.rs2_is_imm,
|
||||
ibuf_enq_if.used_regs};
|
||||
|
||||
assign ibuf_deq_if.valid = deq_valid;
|
||||
@@ -211,7 +210,6 @@ module VX_ibuffer #(
|
||||
ibuf_deq_if.imm,
|
||||
ibuf_deq_if.rs1_is_PC,
|
||||
ibuf_deq_if.rs2_is_imm,
|
||||
ibuf_deq_if.use_rs3,
|
||||
ibuf_deq_if.used_regs} = deq_instr;
|
||||
|
||||
endmodule
|
||||
@@ -30,94 +30,72 @@ module VX_instr_demux (
|
||||
// ALU unit
|
||||
|
||||
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
||||
wire alu_req_ready;
|
||||
wire alu_stall = alu_req_if.valid && ~alu_req_if.ready;
|
||||
|
||||
wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
|
||||
|
||||
VX_opd_collect #(
|
||||
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS),
|
||||
.OPDSW (2 * `NUM_THREADS * 32),
|
||||
.PASSTHRU (1) // ALU has no backpressure
|
||||
) alu_opc (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (alu_req_ready),
|
||||
.valid_in (alu_req_valid),
|
||||
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}),
|
||||
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.ready_out (alu_req_if.ready),
|
||||
.valid_out (alu_req_if.valid)
|
||||
VX_generic_register #(
|
||||
.N (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.R (1)
|
||||
) alu_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (alu_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({alu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data})
|
||||
);
|
||||
|
||||
// lsu unit
|
||||
|
||||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||
wire lsu_req_ready;
|
||||
wire lsu_stall = lsu_req_if.valid && ~lsu_req_if.ready;
|
||||
|
||||
VX_opd_collect #(
|
||||
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1),
|
||||
.OPDSW (2 * `NUM_THREADS * 32)
|
||||
) lsu_opc (
|
||||
VX_generic_register #(
|
||||
.N (1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.R (1)
|
||||
) lsu_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (lsu_req_ready),
|
||||
.valid_in (lsu_req_valid),
|
||||
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}),
|
||||
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.ready_out (lsu_req_if.ready),
|
||||
.valid_out (lsu_req_if.valid)
|
||||
.stall (lsu_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({lsu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data})
|
||||
);
|
||||
|
||||
// csr unit
|
||||
|
||||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||
wire csr_req_ready;
|
||||
wire csr_stall = csr_req_if.valid && ~csr_req_if.ready;
|
||||
|
||||
reg tmp_rs2_is_imm;
|
||||
reg [`NR_BITS-1:0] tmp_rs1;
|
||||
|
||||
always @(posedge clk) begin
|
||||
tmp_rs2_is_imm <= execute_if.rs2_is_imm;
|
||||
tmp_rs1 <= execute_if.rs1;
|
||||
end
|
||||
|
||||
wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_rsp_if.rs1_data[0];
|
||||
|
||||
VX_opd_collect #(
|
||||
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1),
|
||||
.OPDSW (32)
|
||||
) csr_opc (
|
||||
VX_generic_register #(
|
||||
.N (1 + `NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.R (1)
|
||||
) csr_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (csr_req_ready),
|
||||
.valid_in (csr_req_valid),
|
||||
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}),
|
||||
.opds_in ({csr_req_mask}),
|
||||
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io, csr_req_if.csr_mask}),
|
||||
.ready_out (csr_req_if.ready),
|
||||
.valid_out (csr_req_if.valid)
|
||||
.stall (csr_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({csr_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, execute_if.rs2_is_imm, execute_if.rs1, gpr_rsp_if.rs1_data[0]}),
|
||||
.data_out ({csr_req_if.valid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.rs2_is_imm, csr_req_if.rs1, csr_req_if.rs1_data})
|
||||
);
|
||||
|
||||
// mul unit
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
|
||||
wire mul_req_ready;
|
||||
wire mul_stall = mul_req_if.valid && ~mul_req_if.ready;
|
||||
|
||||
VX_opd_collect #(
|
||||
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1),
|
||||
.OPDSW (2 * `NUM_THREADS * 32)
|
||||
) mul_opc (
|
||||
VX_generic_register #(
|
||||
.N (1 + `NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.R (1)
|
||||
) mul_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (mul_req_ready),
|
||||
.valid_in (mul_req_valid),
|
||||
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
|
||||
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}),
|
||||
.ready_out (mul_req_if.ready),
|
||||
.valid_out (mul_req_if.valid)
|
||||
.stall (mul_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({mul_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({mul_req_if.valid, mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data})
|
||||
);
|
||||
`endif
|
||||
|
||||
@@ -125,54 +103,50 @@ module VX_instr_demux (
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||
wire fpu_req_ready;
|
||||
wire fpu_stall = fpu_req_if.valid && ~fpu_req_if.ready;
|
||||
|
||||
VX_opd_collect #(
|
||||
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1),
|
||||
.OPDSW (3 * `NUM_THREADS * 32)
|
||||
) fpu_opc (
|
||||
VX_generic_register #(
|
||||
.N (1 + `NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.R (1)
|
||||
) fpu_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (fpu_req_ready),
|
||||
.valid_in (fpu_req_valid),
|
||||
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb}),
|
||||
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.ready_out (fpu_req_if.ready),
|
||||
.valid_out (fpu_req_if.valid)
|
||||
.stall (fpu_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({fpu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.valid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data})
|
||||
);
|
||||
`else
|
||||
`UNUSED_VAR (gpr_rsp_if.rs3_data)
|
||||
`endif
|
||||
|
||||
// gpu unit
|
||||
|
||||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||
wire gpu_req_ready;
|
||||
wire gpu_stall = gpu_req_if.valid && ~gpu_req_if.ready;
|
||||
|
||||
VX_opd_collect #(
|
||||
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1),
|
||||
.OPDSW (`NUM_THREADS * 32 + 32)
|
||||
) gpu_opc (
|
||||
VX_generic_register #(
|
||||
.N (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
||||
.R (1)
|
||||
) gpu_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (gpu_req_ready),
|
||||
.valid_in (gpu_req_valid),
|
||||
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
|
||||
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.ready_out (gpu_req_if.ready),
|
||||
.valid_out (gpu_req_if.valid)
|
||||
.stall (gpu_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({gpu_req_valid, execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data})
|
||||
);
|
||||
|
||||
// can take next request?
|
||||
assign execute_if.ready = (alu_req_ready && (execute_if.ex_type == `EX_ALU))
|
||||
|| (lsu_req_ready && (execute_if.ex_type == `EX_LSU))
|
||||
|| (csr_req_ready && (execute_if.ex_type == `EX_CSR))
|
||||
assign execute_if.ready = (!alu_stall && (execute_if.ex_type == `EX_ALU))
|
||||
|| (!lsu_stall && (execute_if.ex_type == `EX_LSU))
|
||||
|| (!csr_stall && (execute_if.ex_type == `EX_CSR))
|
||||
`ifdef EXT_M_ENABLE
|
||||
|| (mul_req_ready && (execute_if.ex_type == `EX_MUL))
|
||||
|| (!mul_stall && (execute_if.ex_type == `EX_MUL))
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| (fpu_req_ready && (execute_if.ex_type == `EX_FPU))
|
||||
|| (!fpu_stall && (execute_if.ex_type == `EX_FPU))
|
||||
`endif
|
||||
|| (gpu_req_ready && (execute_if.ex_type == `EX_GPU));
|
||||
|| (!gpu_stall && (execute_if.ex_type == `EX_GPU));
|
||||
|
||||
endmodule
|
||||
@@ -5,23 +5,23 @@ module VX_issue #(
|
||||
) (
|
||||
`SCOPE_IO_VX_issue
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_writeback_if writeback_if,
|
||||
VX_decode_if decode_if,
|
||||
VX_writeback_if writeback_if,
|
||||
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
VX_decode_if ibuf_deq_if();
|
||||
VX_decode_if execute_if();
|
||||
VX_gpr_req_if gpr_req_if();
|
||||
VX_gpr_rsp_if gpr_rsp_if();
|
||||
VX_decode_if ibuf_deq_if();
|
||||
VX_decode_if execute_if();
|
||||
VX_gpr_req_if gpr_req_if();
|
||||
VX_gpr_rsp_if gpr_rsp_if();
|
||||
|
||||
wire scoreboard_delay;
|
||||
wire [`NW_BITS-1:0] deq_wid_next;
|
||||
@@ -29,49 +29,42 @@ module VX_issue #(
|
||||
VX_ibuffer #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.freeze (~gpr_req_if.ready),
|
||||
.ibuf_enq_if (decode_if),
|
||||
.deq_wid_next (deq_wid_next),
|
||||
.ibuf_deq_if (ibuf_deq_if)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.freeze (1'b0),
|
||||
.ibuf_enq_if (decode_if),
|
||||
.deq_wid_next (deq_wid_next),
|
||||
.ibuf_deq_if (ibuf_deq_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ibuf_deq_if (ibuf_deq_if),
|
||||
.writeback_if (writeback_if),
|
||||
.deq_wid_next (deq_wid_next),
|
||||
.exe_delay (~execute_if.ready),
|
||||
.gpr_delay (~gpr_req_if.ready),
|
||||
.delay (scoreboard_delay)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ibuf_deq_if (ibuf_deq_if),
|
||||
.writeback_if (writeback_if),
|
||||
.deq_wid_next (deq_wid_next),
|
||||
.exe_delay (~execute_if.ready),
|
||||
.delay (scoreboard_delay)
|
||||
);
|
||||
|
||||
assign gpr_req_if.valid = ibuf_deq_if.valid && ~scoreboard_delay;
|
||||
assign gpr_req_if.wid = ibuf_deq_if.wid;
|
||||
assign gpr_req_if.PC = ibuf_deq_if.PC;
|
||||
assign gpr_req_if.rs1 = ibuf_deq_if.rs1;
|
||||
assign gpr_req_if.rs2 = ibuf_deq_if.rs2;
|
||||
assign gpr_req_if.rs3 = ibuf_deq_if.rs3;
|
||||
assign gpr_req_if.use_rs3 = ibuf_deq_if.use_rs3;
|
||||
assign gpr_rsp_if.ready = execute_if.ready;
|
||||
assign gpr_req_if.wid = ibuf_deq_if.wid;
|
||||
assign gpr_req_if.rs1 = ibuf_deq_if.rs1;
|
||||
assign gpr_req_if.rs2 = ibuf_deq_if.rs2;
|
||||
assign gpr_req_if.rs3 = ibuf_deq_if.rs3;
|
||||
|
||||
VX_gpr_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_req_if (gpr_req_if),
|
||||
.gpr_rsp_if (gpr_rsp_if)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_req_if (gpr_req_if),
|
||||
.gpr_rsp_if (gpr_rsp_if)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (gpr_rsp_if.valid);
|
||||
|
||||
assign execute_if.valid = ibuf_deq_if.valid && gpr_req_if.ready && ~scoreboard_delay;
|
||||
assign execute_if.valid = ibuf_deq_if.valid && ~scoreboard_delay;
|
||||
assign execute_if.wid = ibuf_deq_if.wid;
|
||||
assign execute_if.tmask = ibuf_deq_if.tmask;
|
||||
assign execute_if.PC = ibuf_deq_if.PC;
|
||||
@@ -83,19 +76,19 @@ module VX_issue #(
|
||||
assign execute_if.rs1 = ibuf_deq_if.rs1;
|
||||
assign execute_if.imm = ibuf_deq_if.imm;
|
||||
assign execute_if.rs1_is_PC = ibuf_deq_if.rs1_is_PC;
|
||||
assign execute_if.rs2_is_imm = ibuf_deq_if.rs2_is_imm;
|
||||
assign execute_if.rs2_is_imm= ibuf_deq_if.rs2_is_imm;
|
||||
|
||||
VX_instr_demux instr_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.execute_if (execute_if),
|
||||
.gpr_rsp_if (gpr_rsp_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.execute_if (execute_if),
|
||||
.gpr_rsp_if (gpr_rsp_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
`SCOPE_ASSIGN (issue_fire, ibuf_deq_if.valid && ibuf_deq_if.ready);
|
||||
@@ -115,12 +108,8 @@ module VX_issue #(
|
||||
`SCOPE_ASSIGN (issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm);
|
||||
|
||||
`SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay);
|
||||
`SCOPE_ASSIGN (gpr_delay, ~gpr_req_if.ready);
|
||||
`SCOPE_ASSIGN (execute_delay, ~execute_if.ready);
|
||||
|
||||
`SCOPE_ASSIGN (gpr_rsp_valid, gpr_rsp_if.valid);
|
||||
`SCOPE_ASSIGN (gpr_rsp_wid, gpr_rsp_if.wid);
|
||||
`SCOPE_ASSIGN (gpr_rsp_pc, gpr_rsp_if.PC);
|
||||
|
||||
`SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data);
|
||||
@@ -140,7 +129,7 @@ module VX_issue #(
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.rs1_data);
|
||||
end
|
||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
|
||||
@@ -19,6 +19,7 @@ module VX_lsu_unit #(
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if
|
||||
);
|
||||
wire req_valid;
|
||||
wire [`NUM_THREADS-1:0] req_tmask;
|
||||
wire req_rw;
|
||||
wire [`NUM_THREADS-1:0][29:0] req_addr;
|
||||
@@ -71,19 +72,18 @@ module VX_lsu_unit #(
|
||||
reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
wire valid_in;
|
||||
wire stall_in;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))),
|
||||
.R(1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_in),
|
||||
.flush (1'b0),
|
||||
.in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}),
|
||||
.out ({valid_in, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_in),
|
||||
.flush (1'b0),
|
||||
.data_in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}),
|
||||
.data_out ({req_valid, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
|
||||
);
|
||||
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
@@ -136,11 +136,11 @@ module VX_lsu_unit #(
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_out = ~ld_commit_if.ready && ld_commit_if.valid;
|
||||
wire store_stall = valid_in && req_rw && stall_out;
|
||||
wire load_req_stall = req_valid && !req_rw && lsuq_full;
|
||||
wire store_req_stall = req_valid && req_rw && !st_commit_if.ready;
|
||||
|
||||
// Core Request
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~lsuq_full && ~store_stall}} & req_tmask;
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && !load_req_stall && !store_req_stall}} & req_tmask;
|
||||
assign dcache_req_if.rw = req_rw;
|
||||
assign dcache_req_if.byteen = req_byteen;
|
||||
assign dcache_req_if.addr = req_addr;
|
||||
@@ -152,7 +152,9 @@ module VX_lsu_unit #(
|
||||
assign dcache_req_if.tag = req_tag;
|
||||
`endif
|
||||
|
||||
assign stall_in = ~dcache_req_if.ready || lsuq_full || store_stall;
|
||||
assign stall_in = ~dcache_req_if.ready
|
||||
|| load_req_stall
|
||||
|| store_req_stall;
|
||||
|
||||
// Can accept new request?
|
||||
assign lsu_req_if.ready = ~stall_in;
|
||||
@@ -171,7 +173,7 @@ module VX_lsu_unit #(
|
||||
|
||||
// send store commit
|
||||
|
||||
wire is_store_rsp = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready;
|
||||
wire is_store_rsp = req_valid && req_rw && dcache_req_if.ready;
|
||||
|
||||
assign st_commit_if.valid = is_store_rsp;
|
||||
assign st_commit_if.wid = req_wid;
|
||||
@@ -180,26 +182,27 @@ module VX_lsu_unit #(
|
||||
assign st_commit_if.rd = 0;
|
||||
assign st_commit_if.wb = 0;
|
||||
assign st_commit_if.data = 0;
|
||||
`UNUSED_VAR (st_commit_if.ready)
|
||||
|
||||
// send load commit
|
||||
|
||||
wire is_load_rsp = (| dcache_rsp_if.valid);
|
||||
wire is_load_rsp = (| dcache_rsp_if.valid);
|
||||
|
||||
wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.R(1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}),
|
||||
.out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (load_rsp_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data})
|
||||
);
|
||||
|
||||
// Can accept new cache response?
|
||||
assign dcache_rsp_if.ready = ~stall_out;
|
||||
assign dcache_rsp_if.ready = ~load_rsp_stall;
|
||||
|
||||
// scope registration
|
||||
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}});
|
||||
|
||||
@@ -147,12 +147,12 @@ module VX_mul_unit #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}),
|
||||
.out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}),
|
||||
.data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
|
||||
@@ -1,65 +0,0 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_opd_collect #(
|
||||
parameter INSTW = 1,
|
||||
parameter OPDSW = 1,
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
input wire [INSTW-1:0] inst_in,
|
||||
input wire [OPDSW-1:0] opds_in,
|
||||
|
||||
output wire [INSTW+OPDSW-1:0] data_out,
|
||||
output wire valid_out,
|
||||
input wire ready_out
|
||||
);
|
||||
wire [INSTW-1:0] inst_out;
|
||||
wire [OPDSW-1:0] opds_out;
|
||||
wire valid_out_tmp, ready_out_tmp;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (INSTW)
|
||||
) skid_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in (inst_in),
|
||||
.data_out (inst_out),
|
||||
.valid_out (valid_out_tmp),
|
||||
.ready_out (ready_out_tmp)
|
||||
);
|
||||
|
||||
VX_gpr_bypass #(
|
||||
.DATAW (OPDSW),
|
||||
.PASSTHRU (PASSTHRU)
|
||||
) gpr_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (valid_in && ready_in),
|
||||
.pop (valid_out_tmp && ready_out_tmp),
|
||||
.data_in (opds_in),
|
||||
.data_out (opds_out)
|
||||
);
|
||||
|
||||
wire stall_out = valid_out && ~ready_out;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + INSTW + OPDSW),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.in ({valid_out_tmp, inst_out, opds_out}),
|
||||
.out ({valid_out, data_out})
|
||||
);
|
||||
|
||||
assign ready_out_tmp = ~stall_out;
|
||||
|
||||
endmodule
|
||||
@@ -10,7 +10,6 @@ module VX_scoreboard #(
|
||||
VX_writeback_if writeback_if,
|
||||
input wire [`NW_BITS-1:0] deq_wid_next,
|
||||
input wire exe_delay,
|
||||
input wire gpr_delay,
|
||||
|
||||
output wire delay
|
||||
);
|
||||
@@ -63,14 +62,14 @@ module VX_scoreboard #(
|
||||
end
|
||||
|
||||
// issue the instruction
|
||||
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
|
||||
assign ibuf_deq_if.ready = ~(delay || exe_delay);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
|
||||
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b",
|
||||
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
@@ -81,9 +80,9 @@ module VX_scoreboard #(
|
||||
stall_ctr <= 0;
|
||||
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
stall_ctr <= stall_ctr + 1;
|
||||
assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
|
||||
assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b",
|
||||
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay);
|
||||
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
|
||||
stall_ctr <= 0;
|
||||
end
|
||||
|
||||
@@ -241,12 +241,12 @@ module VX_warp_sched #(
|
||||
.N(1 + `NUM_THREADS + 32 + `NW_BITS),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}),
|
||||
.out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.data_in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}),
|
||||
.data_out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
|
||||
);
|
||||
|
||||
assign busy = (active_warps != 0);
|
||||
|
||||
@@ -78,12 +78,12 @@ module VX_writeback #(
|
||||
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
|
||||
.out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
|
||||
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
|
||||
);
|
||||
|
||||
assign alu_commit_if.ready = !stall;
|
||||
|
||||
36
hw/rtl/cache/VX_bank.v
vendored
36
hw/rtl/cache/VX_bank.v
vendored
@@ -435,12 +435,12 @@ if (DRAM_ENABLE) begin
|
||||
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH),
|
||||
.R(1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipeline_stall),
|
||||
.flush (1'b0),
|
||||
.in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}),
|
||||
.out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipeline_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}),
|
||||
.data_out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
@@ -508,12 +508,12 @@ if (DRAM_ENABLE) begin
|
||||
.N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH),
|
||||
.R(1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipeline_stall),
|
||||
.flush (1'b0),
|
||||
.in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}),
|
||||
.out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipeline_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}),
|
||||
.data_out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
|
||||
);
|
||||
|
||||
end else begin
|
||||
@@ -650,12 +650,12 @@ end
|
||||
.N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH),
|
||||
.R(1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipeline_stall),
|
||||
.flush (1'b0),
|
||||
.in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}),
|
||||
.out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipeline_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}),
|
||||
.data_out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
|
||||
3
hw/rtl/cache/VX_bank_core_req_arb.v
vendored
3
hw/rtl/cache/VX_bank_core_req_arb.v
vendored
@@ -57,7 +57,8 @@ module VX_bank_core_req_arb #(
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(writedata_in)),
|
||||
.SIZE(CREQ_SIZE)
|
||||
.SIZE(CREQ_SIZE),
|
||||
.BUFFERED(1)
|
||||
) req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
12
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
12
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
@@ -92,12 +92,12 @@ module VX_cache_core_rsp_merge #(
|
||||
.R(NUM_REQS),
|
||||
.PASSTHRU(NUM_BANKS <= 2)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
|
||||
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
|
||||
.data_out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
|
||||
@@ -91,12 +91,12 @@ module VX_fp_noncomp #(
|
||||
.N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1),
|
||||
.R(0)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
|
||||
.out ({a_sign[i], b_sign[i], a_exponent[i], a_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
|
||||
.data_out ({a_sign[i], b_sign[i], a_exponent[i], a_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]})
|
||||
);
|
||||
end
|
||||
|
||||
@@ -104,12 +104,12 @@ module VX_fp_noncomp #(
|
||||
.N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.R(1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({valid_in, tag_in, op_type, frm, dataa, datab}),
|
||||
.out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab}),
|
||||
.data_out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r})
|
||||
);
|
||||
|
||||
// FCLASS
|
||||
@@ -255,12 +255,12 @@ module VX_fp_noncomp #(
|
||||
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
|
||||
.R(1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.data_out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
|
||||
24
hw/rtl/interfaces/VX_csr_pipe_req_if.v
Normal file
24
hw/rtl/interfaces/VX_csr_pipe_req_if.v
Normal file
@@ -0,0 +1,24 @@
|
||||
`ifndef VX_CSR_PIPE_REQ_IF
|
||||
`define VX_CSR_PIPE_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_csr_pipe_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`CSR_BITS-1:0] op_type;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr;
|
||||
wire [31:0] csr_mask;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire is_io;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -12,10 +12,11 @@ interface VX_csr_req_if ();
|
||||
wire [31:0] PC;
|
||||
wire [`CSR_BITS-1:0] op_type;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr;
|
||||
wire [31:0] csr_mask;
|
||||
wire [31:0] rs1_data;
|
||||
wire rs2_is_imm;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire is_io;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
||||
@@ -20,8 +20,7 @@ interface VX_decode_if ();
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire [31:0] imm;
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
wire use_rs3;
|
||||
wire rs2_is_imm;
|
||||
wire [`NUM_REGS-1:0] used_regs;
|
||||
|
||||
wire ready;
|
||||
|
||||
@@ -4,17 +4,11 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_gpr_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire use_rs3;
|
||||
|
||||
wire ready;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -4,17 +4,11 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_gpr_rsp_if ();
|
||||
wire valid;
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -5,7 +5,7 @@ module VX_dp_ram #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter BYTEENW = 1,
|
||||
parameter BUFFERED = 1,
|
||||
parameter BUFFERED = 0,
|
||||
parameter RWCHECK = 1,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter SIZEW = $clog2(SIZE+1),
|
||||
@@ -26,8 +26,10 @@ module VX_dp_ram #(
|
||||
localparam DATA32W = DATAW / 32;
|
||||
localparam BYTEEN32W = BYTEENW / 4;
|
||||
|
||||
if (FASTRAM) begin
|
||||
if (BUFFERED) begin
|
||||
//`ifndef QUARTUS
|
||||
|
||||
if (FASTRAM) begin
|
||||
if (BUFFERED) begin
|
||||
reg [DATAW-1:0] dout_r;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
@@ -207,5 +209,95 @@ module VX_dp_ram #(
|
||||
end
|
||||
end
|
||||
|
||||
/*`else
|
||||
|
||||
localparam OUTDATA_REG_B = BUFFERED ? "CLOCK0" : "UNREGISTERED";
|
||||
localparam RAM_BLOCK_TYPE = FASTRAM ? "MLAB" : "AUTO";
|
||||
|
||||
if (RWCHECK) begin
|
||||
|
||||
altsyncram #(
|
||||
.init_file (),
|
||||
.operation_mode ("DUAL_PORT"),
|
||||
.numwords_a (SIZE),
|
||||
.numwords_b (SIZE),
|
||||
.widthad_a (ADDRW),
|
||||
.widthad_b (ADDRW),
|
||||
.width_a (DATAW),
|
||||
.width_b (DATAW),
|
||||
.width_byteena_a(BYTEENW),
|
||||
.address_reg_b ("CLOCK0"),
|
||||
.outdata_reg_b (OUTDATA_REG_B),
|
||||
.ram_block_type (RAM_BLOCK_TYPE)
|
||||
) mem (
|
||||
.clocken0 (1'b1),
|
||||
.clocken1 (),
|
||||
.clocken2 (),
|
||||
.clocken3 (),
|
||||
.clock0 (clk),
|
||||
.clock1 (),
|
||||
.address_a (waddr),
|
||||
.address_b (raddr),
|
||||
.byteena_a (byteen),
|
||||
.byteena_b (1'b1),
|
||||
.wren_a (wren),
|
||||
.wren_b (1'b0),
|
||||
.data_a (din),
|
||||
.data_b (),
|
||||
.rden_a (),
|
||||
.rden_b (1'b1),
|
||||
.q_a (),
|
||||
.q_b (dout),
|
||||
.addressstall_a (1'b0),
|
||||
.addressstall_b (1'b0),
|
||||
.aclr0 (1'b0),
|
||||
.aclr1 (1'b0),
|
||||
.eccstatus ()
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`NO_RW_RAM_CHECK altsyncram #(
|
||||
.init_file (),
|
||||
.operation_mode ("DUAL_PORT"),
|
||||
.numwords_a (SIZE),
|
||||
.numwords_b (SIZE),
|
||||
.widthad_a (ADDRW),
|
||||
.widthad_b (ADDRW),
|
||||
.width_a (DATAW),
|
||||
.width_b (DATAW),
|
||||
.width_byteena_a(BYTEENW),
|
||||
.outdata_reg_b (OUTDATA_REG_B),
|
||||
.ram_block_type (RAM_BLOCK_TYPE)
|
||||
) mem (
|
||||
.clocken0 (1'b1),
|
||||
.clocken1 (1'b1),
|
||||
.clocken2 (1'b1),
|
||||
.clocken3 (1'b1),
|
||||
.clock0 (clk),
|
||||
.clock1 (clk),
|
||||
.address_a (waddr),
|
||||
.address_b (raddr),
|
||||
.byteena_a (byteen),
|
||||
.byteena_b (1'b1),
|
||||
.wren_a (wren),
|
||||
.wren_b (1'b0),
|
||||
.data_a (din),
|
||||
.data_b (),
|
||||
.rden_a (),
|
||||
.rden_b (1'b1),
|
||||
.q_a (),
|
||||
.q_b (dout),
|
||||
.addressstall_a (1'b0),
|
||||
.addressstall_b (1'b0),
|
||||
.aclr0 (1'b0),
|
||||
.aclr1 (1'b0),
|
||||
.eccstatus ()
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`endif*/
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
@@ -5,25 +5,25 @@ module VX_generic_register #(
|
||||
parameter R = N,
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
input wire flush,
|
||||
input wire[N-1:0] in,
|
||||
output wire[N-1:0] out
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
input wire flush,
|
||||
input wire[N-1:0] data_in,
|
||||
output wire[N-1:0] data_out
|
||||
);
|
||||
if (PASSTHRU) begin
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (stall)
|
||||
assign out = flush ? N'(0) : in;
|
||||
assign data_out = flush ? N'(0) : data_in;
|
||||
end else begin
|
||||
reg [N-1:0] value;
|
||||
|
||||
if (R != 0) begin
|
||||
always @(posedge clk) begin
|
||||
if (~stall) begin
|
||||
value <= in;
|
||||
value <= data_in;
|
||||
end
|
||||
if (reset || flush) begin
|
||||
value[N-1:N-R] <= R'(0);
|
||||
@@ -34,12 +34,12 @@ module VX_generic_register #(
|
||||
`UNUSED_VAR (flush)
|
||||
always @(posedge clk) begin
|
||||
if (~stall) begin
|
||||
value <= in;
|
||||
value <= data_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign out = value;
|
||||
assign data_out = value;
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -106,12 +106,12 @@ module VX_stream_arbiter #(
|
||||
.N(1 + DATAW),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({sel_valid, data_in[sel_idx]}),
|
||||
.out ({valid_out, data_out})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({sel_valid, data_in[sel_idx]}),
|
||||
.data_out ({valid_out, data_out})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
|
||||
@@ -184,13 +184,9 @@
|
||||
"issue_imm": 32,
|
||||
"issue_rs1_is_pc": 1,
|
||||
"issue_rs2_is_imm": 1,
|
||||
"?gpr_rsp_valid": 1,
|
||||
"gpr_rsp_wid":"`NW_BITS",
|
||||
"gpr_rsp_pc": 32,
|
||||
"gpr_rsp_a":"`NUM_THREADS * 32",
|
||||
"gpr_rsp_b":"`NUM_THREADS * 32",
|
||||
"gpr_rsp_c":"`NUM_THREADS * 32",
|
||||
"!gpr_delay": 1,
|
||||
"?writeback_valid": 1,
|
||||
"writeback_wid":"`NW_BITS",
|
||||
"writeback_pc": 32,
|
||||
|
||||
Reference in New Issue
Block a user