fixed FPU handshake, optimized writeback's critical path
This commit is contained in:
@@ -38,7 +38,7 @@ module VX_commit #(
|
||||
.count (num_commits)
|
||||
);
|
||||
|
||||
assign cmt_to_csr_if.valid = (| commited_mask);
|
||||
assign cmt_to_csr_if.valid = (| commited_mask);
|
||||
assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num;
|
||||
assign cmt_to_csr_if.num_commits = num_commits;
|
||||
|
||||
@@ -46,16 +46,16 @@ module VX_commit #(
|
||||
|
||||
integer i;
|
||||
|
||||
reg [`FFG_BITS-1:0] fflags;
|
||||
fflags_t fflags;
|
||||
always @(*) begin
|
||||
fflags = 0;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
|
||||
fflags[0] |= fpu_commit_if.fflags[i][0];
|
||||
fflags[1] |= fpu_commit_if.fflags[i][1];
|
||||
fflags[2] |= fpu_commit_if.fflags[i][2];
|
||||
fflags[3] |= fpu_commit_if.fflags[i][3];
|
||||
fflags[4] |= fpu_commit_if.fflags[i][4];
|
||||
fflags.NX |= fpu_commit_if.fflags[i].NX;
|
||||
fflags.UF |= fpu_commit_if.fflags[i].UF;
|
||||
fflags.OF |= fpu_commit_if.fflags[i].OF;
|
||||
fflags.DZ |= fpu_commit_if.fflags[i].DZ;
|
||||
fflags.NV |= fpu_commit_if.fflags[i].NV;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -71,8 +71,8 @@ module VX_csr_data #(
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
||||
|
||||
default: begin
|
||||
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
|
||||
end
|
||||
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define LATENCY_IDIV 22
|
||||
`define LATENCY_IDIV 23
|
||||
`define LATENCY_IMUL 2
|
||||
|
||||
`define LATENCY_FDIV 16
|
||||
@@ -201,13 +201,6 @@
|
||||
`define FRM_DYN 3'b111 // dynamic mode
|
||||
`define FRM_BITS 3
|
||||
|
||||
`define FFG_NX 0 // inexact
|
||||
`define FFG_UF 1 // underflow
|
||||
`define FFG_OF 2 // overflow
|
||||
`define FFG_DZ 3 // division by zero
|
||||
`define FFG_NV 4 // invalid
|
||||
`define FFG_BITS 5
|
||||
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
@@ -440,4 +433,14 @@ typedef struct packed {
|
||||
logic is_quiet;
|
||||
} fp_type_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic NV; // Invalid
|
||||
logic DZ; // Divide by zero
|
||||
logic OF; // Overflow
|
||||
logic UF; // Underflow
|
||||
logic NX; // Inexact
|
||||
} fflags_t;
|
||||
|
||||
`define FFG_BITS $bits(fflags_t)
|
||||
|
||||
`endif
|
||||
|
||||
@@ -28,13 +28,16 @@ module VX_issue #(
|
||||
|
||||
wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag;
|
||||
|
||||
wire gpr_busy = ~gpr_read_if.in_ready;
|
||||
wire alu_busy = ~alu_req_if.ready;
|
||||
wire lsu_busy = ~lsu_req_if.ready;
|
||||
wire csr_busy = ~csr_req_if.ready;
|
||||
wire mul_busy = ~mul_req_if.ready;
|
||||
wire fpu_busy = ~mul_req_if.ready;
|
||||
wire gpu_busy = ~gpu_req_if.ready;
|
||||
wire schedule_delay;
|
||||
|
||||
wire gpr_busy = ~gpr_read_if.in_ready;
|
||||
|
||||
wire ex_busy = (~alu_req_if.ready && (decode_if.ex_type == `EX_ALU))
|
||||
|| (~lsu_req_if.ready && (decode_if.ex_type == `EX_LSU))
|
||||
|| (~csr_req_if.ready && (decode_if.ex_type == `EX_CSR))
|
||||
|| (~mul_req_if.ready && (decode_if.ex_type == `EX_MUL))
|
||||
|| (~fpu_req_if.ready && (decode_if.ex_type == `EX_FPU))
|
||||
|| (~gpu_req_if.ready && (decode_if.ex_type == `EX_GPU));
|
||||
|
||||
VX_scheduler #(
|
||||
.CORE_ID(CORE_ID)
|
||||
@@ -44,14 +47,10 @@ module VX_issue #(
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
.gpr_busy (gpr_busy),
|
||||
.alu_busy (alu_busy),
|
||||
.lsu_busy (lsu_busy),
|
||||
.csr_busy (csr_busy),
|
||||
.mul_busy (mul_busy),
|
||||
.fpu_busy (fpu_busy),
|
||||
.gpu_busy (gpu_busy),
|
||||
.issue_tag (issue_tag)
|
||||
.ex_busy (ex_busy),
|
||||
.gpr_busy (gpr_busy),
|
||||
.issue_tag (issue_tag),
|
||||
.schedule_delay (schedule_delay)
|
||||
);
|
||||
|
||||
VX_gpr_stage #(
|
||||
@@ -66,8 +65,8 @@ module VX_issue #(
|
||||
VX_decode_if decode_tmp_if();
|
||||
VX_gpr_read_if gpr_read_tmp_if();
|
||||
|
||||
wire stall = ~alu_req_if.ready || ~decode_if.ready;
|
||||
wire flush = alu_req_if.ready && ~decode_if.ready;
|
||||
wire stall = schedule_delay;
|
||||
wire flush = schedule_delay && ~ex_busy;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
@@ -80,17 +79,19 @@ module VX_issue #(
|
||||
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_read_tmp_if.rs1_data, gpr_read_tmp_if.rs2_data, gpr_read_tmp_if.rs3_data})
|
||||
);
|
||||
|
||||
assign decode_if.ready = ~stall;
|
||||
|
||||
VX_issue_demux issue_demux (
|
||||
.decode_if (decode_tmp_if),
|
||||
.gpr_read_if (gpr_read_tmp_if),
|
||||
.issue_tag (issue_tmp_tag),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
.decode_if (decode_tmp_if),
|
||||
.gpr_read_if(gpr_read_tmp_if),
|
||||
.issue_tag (issue_tmp_tag),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
|
||||
@@ -8,64 +8,52 @@ module VX_scheduler #(
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
input wire ex_busy,
|
||||
input wire gpr_busy,
|
||||
input wire alu_busy,
|
||||
input wire lsu_busy,
|
||||
input wire csr_busy,
|
||||
input wire mul_busy,
|
||||
input wire fpu_busy,
|
||||
input wire gpu_busy,
|
||||
output wire [`ISTAG_BITS-1:0] issue_tag
|
||||
output wire [`ISTAG_BITS-1:0] issue_tag,
|
||||
output wire schedule_delay
|
||||
);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||
reg [`NUM_THREADS-1:0] inuse_registers [`NUM_WARPS-1:0][`NUM_REGS-1:0];
|
||||
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.warp_num] & decode_if.reg_use_mask;
|
||||
wire inuse_hazard = (inuse_mask != 0);
|
||||
|
||||
wire exu_stalled = (alu_busy && (decode_if.ex_type == `EX_ALU))
|
||||
|| (lsu_busy && (decode_if.ex_type == `EX_LSU))
|
||||
|| (csr_busy && (decode_if.ex_type == `EX_CSR))
|
||||
|| (mul_busy && (decode_if.ex_type == `EX_MUL))
|
||||
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|
||||
|| (gpu_busy && (decode_if.ex_type == `EX_GPU));
|
||||
|
||||
wire issue_buf_full;
|
||||
|
||||
wire stall = (gpr_busy || exu_stalled || inuse_hazard || issue_buf_full) && decode_if.valid;
|
||||
wire stall = gpr_busy || ex_busy || inuse_hazard || issue_buf_full;
|
||||
|
||||
wire acquire_rd = decode_if.valid && (decode_if.wb != 0) && ~stall;
|
||||
wire issue_fire = decode_if.valid && ~stall;
|
||||
|
||||
wire acquire_rd = issue_fire && (decode_if.wb != 0);
|
||||
|
||||
wire release_rd = writeback_if.valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.thread_mask;
|
||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.warp_num, writeback_if.rd}] & ~writeback_if.thread_mask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
integer i, w;
|
||||
for (w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (i = 0; i < `NUM_REGS; i++) begin
|
||||
inuse_registers[w][i] <= 0;
|
||||
for (integer w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (integer i = 0; i < `NUM_REGS; i++) begin
|
||||
inuse_registers[w * `NUM_REGS + i] <= 0;
|
||||
end
|
||||
inuse_reg_mask[w] <= `NUM_REGS'(0);
|
||||
end
|
||||
end else begin
|
||||
if (acquire_rd) begin
|
||||
inuse_registers[decode_if.warp_num][decode_if.rd] <= decode_if.thread_mask;
|
||||
inuse_registers[{decode_if.warp_num, decode_if.rd}] <= decode_if.thread_mask;
|
||||
inuse_reg_mask[decode_if.warp_num][decode_if.rd] <= 1;
|
||||
end
|
||||
if (release_rd) begin
|
||||
assert(inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
inuse_registers[writeback_if.warp_num][writeback_if.rd] <= inuse_registers_n;
|
||||
inuse_registers[{writeback_if.warp_num, writeback_if.rd}] <= inuse_registers_n;
|
||||
inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] <= (| inuse_registers_n);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire issue_fire = decode_if.valid && ~stall;
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW ($bits(issue_data_t)),
|
||||
.SIZE (`ISSUEQ_SIZE),
|
||||
@@ -82,14 +70,14 @@ module VX_scheduler #(
|
||||
.full (issue_buf_full)
|
||||
);
|
||||
|
||||
assign decode_if.ready = ~stall;
|
||||
assign schedule_delay = stall;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (stall) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, gpr=%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b",
|
||||
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full, inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1],
|
||||
inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], gpr_busy, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
|
||||
if (decode_if.valid && stall) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b, gpr_busy=%b",
|
||||
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full,
|
||||
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy, gpr_busy);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -18,113 +18,131 @@ module VX_writeback #(
|
||||
// outputs
|
||||
VX_wb_if writeback_if
|
||||
);
|
||||
reg [`ISSUEQ_SIZE-1:0] wb_valid_table, wb_valid_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] wb_data_table, wb_data_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NW_BITS-1:0] wb_warp_num_table, wb_warp_num_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] wb_thread_mask_table, wb_thread_mask_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][31:0] wb_curr_PC_table, wb_curr_PC_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NR_BITS-1:0] wb_rd_table, wb_rd_table_n;
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] wb_data_table [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NW_BITS-1:0] wb_warp_num_table [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NUM_THREADS-1:0] wb_thread_mask_table [`ISSUEQ_SIZE-1:0];
|
||||
reg [31:0] wb_curr_PC_table [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NR_BITS-1:0] wb_rd_table [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NUM_THREADS-1:0][31:0] wb_data, wb_data_n;
|
||||
reg [`NW_BITS-1:0] wb_warp_num, wb_warp_num_n;
|
||||
reg [`NUM_THREADS-1:0] wb_thread_mask, wb_thread_mask_n;
|
||||
reg [31:0] wb_curr_PC, wb_curr_PC_n;
|
||||
reg [`NR_BITS-1:0] wb_rd, wb_rd_n;
|
||||
|
||||
reg [`ISSUEQ_SIZE-1:0] wb_valid_table;
|
||||
reg [`ISSUEQ_SIZE-1:0] wb_valid_table_n;
|
||||
|
||||
reg [`ISTAG_BITS-1:0] wb_index;
|
||||
wire [`ISTAG_BITS-1:0] wb_index_n;
|
||||
reg [`ISTAG_BITS-1:0] wb_index;
|
||||
reg [`ISTAG_BITS-1:0] wb_index_n;
|
||||
|
||||
reg wb_valid;
|
||||
wire wb_valid_n;
|
||||
reg wb_valid_n;
|
||||
|
||||
always @(*) begin
|
||||
wb_valid_table_n = wb_valid_table;
|
||||
wb_valid_table_n = wb_valid_table;
|
||||
wb_warp_num_table_n = wb_warp_num_table;
|
||||
wb_thread_mask_table_n = wb_thread_mask_table;
|
||||
wb_curr_PC_table_n = wb_curr_PC_table;
|
||||
wb_rd_table_n = wb_rd_table;
|
||||
wb_data_table_n = wb_data_table;
|
||||
|
||||
if (wb_valid) begin
|
||||
wb_valid_table_n[wb_index] = 0;
|
||||
end
|
||||
|
||||
if (alu_commit_if.valid) begin
|
||||
wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb;
|
||||
wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb;
|
||||
wb_thread_mask_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.thread_mask;
|
||||
wb_data_table_n [alu_commit_if.issue_tag] = alu_commit_if.data;
|
||||
wb_warp_num_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.warp_num;
|
||||
wb_curr_PC_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.curr_PC;
|
||||
wb_rd_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.rd;
|
||||
end
|
||||
|
||||
if (lsu_commit_if.valid) begin
|
||||
wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb;
|
||||
wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb;
|
||||
wb_thread_mask_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.thread_mask;
|
||||
wb_data_table_n [lsu_commit_if.issue_tag] = lsu_commit_if.data;
|
||||
wb_warp_num_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.warp_num;
|
||||
wb_curr_PC_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.curr_PC;
|
||||
wb_rd_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.rd;
|
||||
end
|
||||
|
||||
if (csr_commit_if.valid) begin
|
||||
wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb;
|
||||
wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb;
|
||||
wb_thread_mask_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.thread_mask;
|
||||
wb_data_table_n [csr_commit_if.issue_tag] = csr_commit_if.data;
|
||||
wb_warp_num_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.warp_num;
|
||||
wb_curr_PC_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.curr_PC;
|
||||
wb_rd_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.rd;
|
||||
end
|
||||
|
||||
if (mul_commit_if.valid) begin
|
||||
wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb;
|
||||
wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb;
|
||||
wb_thread_mask_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.thread_mask;
|
||||
wb_data_table_n [mul_commit_if.issue_tag] = mul_commit_if.data;
|
||||
wb_warp_num_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.warp_num;
|
||||
wb_curr_PC_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.curr_PC;
|
||||
wb_rd_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.rd;
|
||||
end
|
||||
|
||||
if (fpu_commit_if.valid) begin
|
||||
wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb;
|
||||
wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb;
|
||||
wb_thread_mask_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.thread_mask;
|
||||
wb_data_table_n [fpu_commit_if.issue_tag] = fpu_commit_if.data;
|
||||
wb_warp_num_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.warp_num;
|
||||
wb_curr_PC_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.curr_PC;
|
||||
wb_rd_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.rd;
|
||||
end
|
||||
end
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N(`ISSUEQ_SIZE)
|
||||
) wb_select (
|
||||
.data_in (wb_valid_table_n),
|
||||
.data_out (wb_index_n),
|
||||
.valid_out (wb_valid_n)
|
||||
);
|
||||
integer i;
|
||||
|
||||
always @(*) begin
|
||||
wb_index_n = 0;
|
||||
wb_valid_n = 0;
|
||||
for (i = `ISSUEQ_SIZE-1; i >= 0; i--) begin
|
||||
if (wb_valid_table_n[i]) begin
|
||||
wb_index_n = `ISTAG_BITS'(i);
|
||||
wb_valid_n = 1;
|
||||
wb_thread_mask_n= wb_thread_mask_table_n[i];
|
||||
wb_warp_num_n = wb_warp_num_table_n[i];
|
||||
wb_curr_PC_n = wb_curr_PC_table_n[i];
|
||||
wb_rd_n = wb_rd_table_n[i];
|
||||
wb_data_n = wb_data_table_n[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wb_valid_table <= 0;
|
||||
wb_index <= 0;
|
||||
wb_valid <= 0;
|
||||
wb_index <= 0;
|
||||
wb_valid <= 0;
|
||||
end else begin
|
||||
if (alu_commit_if.valid) begin
|
||||
wb_data_table [alu_commit_if.issue_tag] <= alu_commit_if.data;
|
||||
wb_warp_num_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.warp_num;
|
||||
wb_thread_mask_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.thread_mask;
|
||||
wb_curr_PC_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.curr_PC;
|
||||
wb_rd_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd;
|
||||
end
|
||||
|
||||
if (lsu_commit_if.valid) begin
|
||||
wb_data_table [lsu_commit_if.issue_tag] <= lsu_commit_if.data;
|
||||
wb_warp_num_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.warp_num;
|
||||
wb_thread_mask_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.thread_mask;
|
||||
wb_curr_PC_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.curr_PC;
|
||||
wb_rd_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd;
|
||||
end
|
||||
|
||||
if (csr_commit_if.valid) begin
|
||||
wb_data_table [csr_commit_if.issue_tag] <= csr_commit_if.data;
|
||||
wb_warp_num_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.warp_num;
|
||||
wb_thread_mask_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.thread_mask;
|
||||
wb_curr_PC_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.curr_PC;
|
||||
wb_rd_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd;
|
||||
end
|
||||
|
||||
if (mul_commit_if.valid) begin
|
||||
wb_data_table [mul_commit_if.issue_tag] <= mul_commit_if.data;
|
||||
wb_warp_num_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.warp_num;
|
||||
wb_thread_mask_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.thread_mask;
|
||||
wb_curr_PC_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.curr_PC;
|
||||
wb_rd_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd;
|
||||
end
|
||||
wb_valid_table <= wb_valid_table_n;
|
||||
wb_thread_mask_table <= wb_thread_mask_table_n;
|
||||
wb_warp_num_table <= wb_warp_num_table_n;
|
||||
wb_curr_PC_table <= wb_curr_PC_table_n;
|
||||
wb_rd_table <= wb_rd_table_n;
|
||||
wb_data_table <= wb_data_table_n;
|
||||
|
||||
if (fpu_commit_if.valid) begin
|
||||
wb_data_table [fpu_commit_if.issue_tag] <= fpu_commit_if.data;
|
||||
wb_warp_num_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.warp_num;
|
||||
wb_thread_mask_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.thread_mask;
|
||||
wb_curr_PC_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.curr_PC;
|
||||
wb_rd_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd;
|
||||
end
|
||||
|
||||
wb_valid_table <= wb_valid_table_n;
|
||||
wb_index <= wb_index_n;
|
||||
wb_valid <= wb_valid_n && writeback_if.ready;
|
||||
wb_index <= wb_index_n;
|
||||
wb_valid <= wb_valid_n && writeback_if.ready;
|
||||
wb_thread_mask <= wb_thread_mask_n;
|
||||
wb_warp_num <= wb_warp_num_n;
|
||||
wb_curr_PC <= wb_curr_PC_n;
|
||||
wb_rd <= wb_rd_n;
|
||||
wb_data <= wb_data_n;
|
||||
end
|
||||
end
|
||||
|
||||
// writeback request
|
||||
assign writeback_if.valid = wb_valid;
|
||||
assign writeback_if.warp_num = wb_warp_num_table [wb_index];
|
||||
assign writeback_if.thread_mask = wb_thread_mask_table [wb_index];
|
||||
assign writeback_if.curr_PC = wb_curr_PC_table [wb_index];
|
||||
assign writeback_if.rd = wb_rd_table [wb_index];
|
||||
assign writeback_if.data = wb_data_table [wb_index];
|
||||
assign writeback_if.thread_mask = wb_thread_mask;
|
||||
assign writeback_if.warp_num = wb_warp_num;
|
||||
assign writeback_if.curr_PC = wb_curr_PC;
|
||||
assign writeback_if.rd = wb_rd;
|
||||
assign writeback_if.data = wb_data;
|
||||
|
||||
// commit back-pressure
|
||||
assign alu_commit_if.ready = 1'b1;
|
||||
|
||||
2
hw/rtl/cache/VX_tag_data_access.v
vendored
2
hw/rtl/cache/VX_tag_data_access.v
vendored
@@ -26,11 +26,13 @@ module VX_tag_data_access #(
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] debug_pc_st1e,
|
||||
input wire debug_wb_st1e,
|
||||
input wire[`NR_BITS-1:0] debug_rd_st1e,
|
||||
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
|
||||
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
|
||||
`IGNORE_WARNINGS_END
|
||||
`endif
|
||||
|
||||
input wire stall,
|
||||
|
||||
@@ -5,8 +5,8 @@ module VX_fp_fpga (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire in_ready,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
|
||||
@@ -19,7 +19,7 @@ module VX_fp_fpga (
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
|
||||
@@ -29,31 +29,30 @@ module VX_fp_fpga (
|
||||
localparam NUM_FPC = 12;
|
||||
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
|
||||
wire [NUM_FPC-1:0] core_in_ready;
|
||||
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] core_result;
|
||||
wire fpnew_has_fflags;
|
||||
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fpnew_fflags;
|
||||
fflags_t fpnew_fflags;
|
||||
wire [NUM_FPC-1:0][`ISTAG_BITS-1:0] core_out_tag;
|
||||
wire [NUM_FPC-1:0] core_out_ready;
|
||||
wire [NUM_FPC-1:0] core_out_valid;
|
||||
|
||||
reg negate_output;
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
reg fmadd_negate;
|
||||
|
||||
genvar i;
|
||||
|
||||
always @(*) begin
|
||||
core_select = 0;
|
||||
negate_output = 0;
|
||||
core_select = 0;
|
||||
fmadd_negate = 0;
|
||||
case (op)
|
||||
`FPU_ADD: core_select = 1;
|
||||
`FPU_SUB: core_select = 2;
|
||||
`FPU_MUL: core_select = 3;
|
||||
`FPU_MADD: core_select = 4;
|
||||
`FPU_MSUB: core_select = 5;
|
||||
`FPU_NMSUB: begin core_select = 4; negate_output = 1; end
|
||||
`FPU_NMADD: begin core_select = 5; negate_output = 1; end
|
||||
`FPU_NMSUB: begin core_select = 4; fmadd_negate = 1; end
|
||||
`FPU_NMADD: begin core_select = 5; fmadd_negate = 1; end
|
||||
`FPU_DIV: core_select = 6;
|
||||
`FPU_SQRT: core_select = 7;
|
||||
`FPU_CVTWS: core_select = 8;
|
||||
@@ -130,7 +129,7 @@ module VX_fp_fpga (
|
||||
.in_valid (in_valid && (core_select == 4)),
|
||||
.in_ready (core_in_ready[4]),
|
||||
.in_tag (in_tag),
|
||||
.negate (negate_output),
|
||||
.negate (fmadd_negate),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
@@ -146,7 +145,7 @@ module VX_fp_fpga (
|
||||
.in_valid (in_valid && (core_select == 5)),
|
||||
.in_ready (core_in_ready[5]),
|
||||
.in_tag (in_tag),
|
||||
.negate (negate_output),
|
||||
.negate (fmadd_negate),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
@@ -250,10 +249,21 @@ module VX_fp_fpga (
|
||||
assign core_out_ready[i] = out_ready && (i == fp_index);
|
||||
end
|
||||
|
||||
assign has_fflags = fpnew_has_fflags && (fp_index == 0);
|
||||
assign fflags = fpnew_fflags;
|
||||
assign out_tag = core_out_tag[fp_index];
|
||||
assign result = core_result[fp_index];
|
||||
assign out_valid = fp_valid;
|
||||
wire tmp_valid = fp_valid;
|
||||
wire [`ISTAG_BITS-1:0] tmp_tag = core_out_tag[fp_index];
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_result = core_result[fp_index];
|
||||
wire tmp_has_fflags = fpnew_has_fflags && (fp_index == 0);
|
||||
fflags_t [`NUM_THREADS-1:0] tmp_flags = fpnew_fflags;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + `FFG_BITS)
|
||||
) nc_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({tmp_valid, tmp_tag, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({out_valid, out_tag, result, has_fflags, fflags})
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -17,7 +17,7 @@ module VX_fp_noncomp (
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
|
||||
@@ -178,7 +178,7 @@ module VX_fp_noncomp (
|
||||
|
||||
reg tmp_valid;
|
||||
reg tmp_has_fflags;
|
||||
reg [`NUM_THREADS-1:0][`FFG_BITS-1:0] tmp_fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] tmp_fflags;
|
||||
reg [`NUM_THREADS-1:0][31:0] tmp_result;
|
||||
|
||||
always @(*) begin
|
||||
@@ -199,27 +199,27 @@ module VX_fp_noncomp (
|
||||
case (op)
|
||||
`FPU_CLASS: begin
|
||||
tmp_result[i] = fclass_mask[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
`FPU_MVXW,`FPU_MVWX: begin
|
||||
tmp_result[i] = dataa[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
`FPU_MIN,`FPU_MAX: begin
|
||||
tmp_result[i] = fminmax_res[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = {a_type[i][0] | b_type[i][0], 4'h0};
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
|
||||
end
|
||||
`FPU_SGNJ,`FPU_SGNJN,`FPU_SGNJX: begin
|
||||
tmp_result[i] = fsgnj_res[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
`FPU_CMP: begin
|
||||
tmp_result[i] = fcmp_res[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = fcmp_excp[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = fcmp_excp[i];
|
||||
end
|
||||
default: begin
|
||||
tmp_result[i] = 32'hdeadbeaf;
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
tmp_valid = 1'b0;
|
||||
end
|
||||
endcase
|
||||
@@ -230,7 +230,7 @@ module VX_fp_noncomp (
|
||||
assign in_ready = ~stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + `FFG_BITS)
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
|
||||
) nc_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -11,8 +11,8 @@ module VX_fpnew #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire in_ready,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
|
||||
@@ -25,7 +25,7 @@ module VX_fpnew #(
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
|
||||
@@ -75,7 +75,7 @@ module VX_fpnew #(
|
||||
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
||||
fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1];
|
||||
fpnew_pkg::status_t [0:`NUM_THREADS-1] fpu_status;
|
||||
|
||||
wire is_class_op_i, is_class_op_o;
|
||||
assign is_class_op_i = (op == `FPU_CLASS);
|
||||
@@ -194,7 +194,8 @@ module VX_fpnew #(
|
||||
`ENABLE_TRACING
|
||||
|
||||
assign fpu_in_valid = in_valid;
|
||||
assign in_ready = fpu_in_ready;
|
||||
assign in_ready = fpu_in_ready
|
||||
|| ~in_valid; // fix fpnews's in_ready containing in_valid;
|
||||
|
||||
assign fpu_in_tag = in_tag;
|
||||
assign out_tag = fpu_out_tag;
|
||||
@@ -202,14 +203,7 @@ module VX_fpnew #(
|
||||
assign result = fpu_result;
|
||||
|
||||
assign has_fflags = fpu_has_fflags_o;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign fflags[i][`FFG_NX] = fpu_status[i].NX;
|
||||
assign fflags[i][`FFG_UF] = fpu_status[i].UF;
|
||||
assign fflags[i][`FFG_OF] = fpu_status[i].OF;
|
||||
assign fflags[i][`FFG_DZ] = fpu_status[i].DZ;
|
||||
assign fflags[i][`FFG_NV] = fpu_status[i].NV;
|
||||
end
|
||||
assign fflags = fpu_status;
|
||||
|
||||
assign out_valid = fpu_out_valid;
|
||||
assign fpu_out_ready = out_ready;
|
||||
|
||||
@@ -12,7 +12,7 @@ interface VX_cmt_to_csr_if ();
|
||||
wire [`NE_BITS:0] num_commits;
|
||||
|
||||
wire has_fflags;
|
||||
wire [`FFG_BITS-1:0] fflags;
|
||||
fflags_t fflags;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@ interface VX_fpu_to_cmt_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire has_fflags;
|
||||
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -15,7 +15,7 @@ module VX_index_queue #(
|
||||
input wire [`LOG2UP(SIZE)-1:0] read_addr,
|
||||
output wire [DATAW-1:0] read_data
|
||||
);
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0];
|
||||
reg [DATAW-1:0] entries [SIZE-1:0];
|
||||
reg [SIZE-1:0] valid;
|
||||
reg [`LOG2UP(SIZE):0] rd_ptr, wr_ptr;
|
||||
|
||||
@@ -38,7 +38,7 @@ module VX_index_queue #(
|
||||
valid <= 0;
|
||||
end else begin
|
||||
if (enqueue) begin
|
||||
data[wr_a] <= write_data;
|
||||
entries[wr_a] <= write_data;
|
||||
valid[wr_a] <= 1;
|
||||
wr_ptr <= wr_ptr + 1;
|
||||
end
|
||||
@@ -52,6 +52,6 @@ module VX_index_queue #(
|
||||
end
|
||||
|
||||
assign write_addr = wr_a;
|
||||
assign read_data = data[read_addr];
|
||||
assign read_data = entries[read_addr];
|
||||
|
||||
endmodule
|
||||
@@ -14,7 +14,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
|
||||
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
set_time_format -unit ns -decimal_places 3
|
||||
|
||||
create_clock -name {clk} -period "300 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||
create_clock -name {clk} -period "200 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||
|
||||
derive_pll_clocks -create_base_clocks
|
||||
derive_clock_uncertainty
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
project_open Vortex_Socket
|
||||
project_open VX_pipeline
|
||||
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
|
||||
|
||||
Reference in New Issue
Block a user