fp_noncomp fixes
This commit is contained in:
@@ -55,6 +55,8 @@
|
|||||||
|
|
||||||
`define EXT_F_ENABLE
|
`define EXT_F_ENABLE
|
||||||
|
|
||||||
|
`define IBUF_ENABLE
|
||||||
|
|
||||||
// Device identification
|
// Device identification
|
||||||
`define VENDOR_ID 0
|
`define VENDOR_ID 0
|
||||||
`define ARCHITECTURE_ID 0
|
`define ARCHITECTURE_ID 0
|
||||||
|
|||||||
@@ -111,16 +111,16 @@ module VX_lsu_unit #(
|
|||||||
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
|
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
|
||||||
.SIZE (`LSUQ_SIZE)
|
.SIZE (`LSUQ_SIZE)
|
||||||
) lsu_queue (
|
) lsu_queue (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.write_addr (req_tag),
|
.write_addr (req_tag),
|
||||||
.acquire_slot (lsuq_push),
|
.acquire_slot (lsuq_push),
|
||||||
.read_addr (rsp_tag),
|
.read_addr (rsp_tag),
|
||||||
.write_data ({req_wid, req_curr_PC, req_rd, req_wb, req_offset, req_sext}),
|
.write_data ({req_wid, req_curr_PC, req_rd, req_wb, req_offset, req_sext}),
|
||||||
.read_data ({rsp_wid, rsp_curr_PC, rsp_rd, rsp_wb, rsp_offset, rsp_sext}),
|
.read_data ({rsp_wid, rsp_curr_PC, rsp_rd, rsp_wb, rsp_offset, rsp_sext}),
|
||||||
.release_addr (rsp_tag),
|
.release_addr (rsp_tag),
|
||||||
.release_slot (lsuq_pop),
|
.release_slot (lsuq_pop),
|
||||||
.full (lsuq_full)
|
.full (lsuq_full)
|
||||||
);
|
);
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
@@ -170,12 +170,12 @@ module VX_lsu_unit #(
|
|||||||
wire stall_out = ~lsu_commit_if.ready && lsu_commit_if.valid;
|
wire stall_out = ~lsu_commit_if.ready && lsu_commit_if.valid;
|
||||||
wire mem_rsp_stall = is_load_rsp && is_store_req; // arbitration prioritizes stores
|
wire mem_rsp_stall = is_load_rsp && is_store_req; // arbitration prioritizes stores
|
||||||
|
|
||||||
wire arb_valid = is_store_req || is_load_rsp;
|
wire arb_valid = is_store_req || is_load_rsp;
|
||||||
wire [`NW_BITS-1:0] arb_wid = is_store_req ? req_wid : rsp_wid;
|
wire [`NW_BITS-1:0] arb_wid = is_store_req ? req_wid : rsp_wid;
|
||||||
wire [`NUM_THREADS-1:0] arb_thread_mask = is_store_req ? req_thread_mask : dcache_rsp_if.valid;
|
wire [`NUM_THREADS-1:0] arb_tmask = is_store_req ? req_thread_mask : dcache_rsp_if.valid;
|
||||||
wire [31:0] arb_curr_PC = is_store_req ? req_curr_PC : rsp_curr_PC;
|
wire [31:0] arb_curr_PC = is_store_req ? req_curr_PC : rsp_curr_PC;
|
||||||
wire [`NR_BITS-1:0] arb_rd = is_store_req ? 0 : rsp_rd;
|
wire [`NR_BITS-1:0] arb_rd = is_store_req ? 0 : rsp_rd;
|
||||||
wire arb_wb = is_store_req ? 0 : rsp_wb;
|
wire arb_wb = is_store_req ? 0 : rsp_wb;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
||||||
@@ -184,7 +184,7 @@ module VX_lsu_unit #(
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_out),
|
.stall (stall_out),
|
||||||
.flush (1'b0),
|
.flush (1'b0),
|
||||||
.in ({arb_valid, arb_wid, arb_thread_mask, arb_curr_PC, arb_rd, arb_wb, rsp_data}),
|
.in ({arb_valid, arb_wid, arb_tmask, arb_curr_PC, arb_rd, arb_wb, rsp_data}),
|
||||||
.out ({lsu_commit_if.valid, lsu_commit_if.wid, lsu_commit_if.thread_mask, lsu_commit_if.curr_PC, lsu_commit_if.rd, lsu_commit_if.wb, lsu_commit_if.data})
|
.out ({lsu_commit_if.valid, lsu_commit_if.wid, lsu_commit_if.thread_mask, lsu_commit_if.curr_PC, lsu_commit_if.rd, lsu_commit_if.wb, lsu_commit_if.data})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ module VX_scoreboard #(
|
|||||||
output wire delay
|
output wire delay
|
||||||
);
|
);
|
||||||
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||||
|
|
||||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[ibuf_deq_if.wid] & ibuf_deq_if.used_regs;
|
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[ibuf_deq_if.wid] & ibuf_deq_if.used_regs;
|
||||||
|
|
||||||
|
|||||||
@@ -286,6 +286,7 @@ module VX_fp_fpga #(
|
|||||||
assign per_core_ready_out[i] = ready_out && (i == fp_index);
|
assign per_core_ready_out[i] = ready_out && (i == fp_index);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
assign ready_in = (& per_core_ready_in);
|
||||||
assign valid_out = fp_valid;
|
assign valid_out = fp_valid;
|
||||||
assign tag_out = per_core_tag_out[fp_index];
|
assign tag_out = per_core_tag_out[fp_index];
|
||||||
assign result = per_core_result[fp_index];
|
assign result = per_core_result[fp_index];
|
||||||
|
|||||||
@@ -38,12 +38,17 @@ module VX_fp_noncomp #(
|
|||||||
SIG_NAN = 32'h00000100,
|
SIG_NAN = 32'h00000100,
|
||||||
QUT_NAN = 32'h00000200;
|
QUT_NAN = 32'h00000200;
|
||||||
|
|
||||||
wire [LANES-1:0] a_sign, b_sign;
|
reg [`FPU_BITS-1:0] op_r;
|
||||||
wire [LANES-1:0][7:0] a_exponent, b_exponent;
|
reg [`FRM_BITS-1:0] frm_r;
|
||||||
wire [LANES-1:0][22:0] a_mantissa, b_mantissa;
|
|
||||||
fp_type_t [LANES-1:0] a_type, b_type;
|
|
||||||
|
|
||||||
wire [LANES-1:0] a_smaller, ab_equal;
|
reg [LANES-1:0][31:0] dataa_r;
|
||||||
|
reg [LANES-1:0][31:0] datab_r;
|
||||||
|
|
||||||
|
reg [LANES-1:0] a_sign, b_sign;
|
||||||
|
reg [LANES-1:0][7:0] a_exponent, b_exponent;
|
||||||
|
reg [LANES-1:0][22:0] a_mantissa, b_mantissa;
|
||||||
|
fp_type_t [LANES-1:0] a_type, b_type;
|
||||||
|
reg [LANES-1:0] a_smaller, ab_equal;
|
||||||
|
|
||||||
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
|
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
|
||||||
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
|
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
|
||||||
@@ -51,30 +56,58 @@ module VX_fp_noncomp #(
|
|||||||
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
|
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
|
||||||
reg [LANES-1:0][ 4:0] fcmp_excp; // exception of comparison
|
reg [LANES-1:0][ 4:0] fcmp_excp; // exception of comparison
|
||||||
|
|
||||||
|
wire stall = ~ready_out && valid_out;
|
||||||
|
|
||||||
// Setup
|
// Setup
|
||||||
for (genvar i = 0; i < LANES; i++) begin
|
for (genvar i = 0; i < LANES; i++) begin
|
||||||
assign a_sign[i] = dataa[i][31];
|
wire tmp_a_sign = dataa[i][31];
|
||||||
assign a_exponent[i] = dataa[i][30:23];
|
wire [7:0] tmp_a_exponent = dataa[i][30:23];
|
||||||
assign a_mantissa[i] = dataa[i][22:0];
|
wire [22:0] tmp_a_mantissa = dataa[i][22:0];
|
||||||
|
|
||||||
assign b_sign[i] = datab[i][31];
|
wire tmp_b_sign = datab[i][31];
|
||||||
assign b_exponent[i] = datab[i][30:23];
|
wire [7:0] tmp_b_exponent = datab[i][30:23];
|
||||||
assign b_mantissa[i] = datab[i][22:0];
|
wire [22:0] tmp_b_mantissa = datab[i][22:0];
|
||||||
|
|
||||||
assign a_smaller[i] = (dataa[i] < datab[i]) ^ (a_sign[i] || b_sign[i]);
|
fp_type_t tmp_a_type, tmp_b_type;
|
||||||
assign ab_equal[i] = (dataa[i] == datab[i]) | (a_type[i][4] & b_type[i][4]);
|
|
||||||
|
|
||||||
VX_fp_type fp_type_a (
|
VX_fp_type fp_type_a (
|
||||||
.exponent(a_exponent[i]),
|
.exponent(tmp_a_exponent[i]),
|
||||||
.mantissa(a_mantissa[i]),
|
.mantissa(tmp_a_mantissa[i]),
|
||||||
.o_type(a_type[i])
|
.o_type(tmp_a_type[i])
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_fp_type fp_type_b (
|
VX_fp_type fp_type_b (
|
||||||
.exponent(b_exponent[i]),
|
.exponent(tmp_b_exponent[i]),
|
||||||
.mantissa(b_mantissa[i]),
|
.mantissa(tmp_b_mantissa[i]),
|
||||||
.o_type(b_type[i])
|
.o_type(tmp_b_type[i])
|
||||||
);
|
);
|
||||||
|
|
||||||
|
wire tmp_a_smaller = (dataa[i] < datab[i]) ^ (tmp_a_sign || tmp_b_sign);
|
||||||
|
wire tmp_ab_equal = (dataa[i] == datab[i]) | (tmp_a_type[4] & tmp_b_type[4]);
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (~stall) begin
|
||||||
|
a_sign[i] <= tmp_a_sign;
|
||||||
|
b_sign[i] <= tmp_b_sign;
|
||||||
|
a_exponent[i] <= tmp_a_exponent;
|
||||||
|
b_exponent[i] <= tmp_b_exponent;
|
||||||
|
a_mantissa[i] <= tmp_a_mantissa;
|
||||||
|
b_mantissa[i] <= tmp_b_mantissa;
|
||||||
|
a_type[i] <= tmp_a_type;
|
||||||
|
b_type[i] <= tmp_b_type;
|
||||||
|
a_smaller[i] <= tmp_a_smaller;
|
||||||
|
ab_equal[i] <= tmp_ab_equal;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (~stall) begin
|
||||||
|
op_r <= op;
|
||||||
|
frm_r <= frm;
|
||||||
|
dataa_r <= dataa;
|
||||||
|
datab_r <= datab;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
// FCLASS
|
// FCLASS
|
||||||
@@ -107,13 +140,13 @@ module VX_fp_noncomp #(
|
|||||||
if (a_type[i].is_nan && b_type[i].is_nan)
|
if (a_type[i].is_nan && b_type[i].is_nan)
|
||||||
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||||
else if (a_type[i].is_nan)
|
else if (a_type[i].is_nan)
|
||||||
fminmax_res[i] = datab[i];
|
fminmax_res[i] = datab_r[i];
|
||||||
else if (b_type[i].is_nan)
|
else if (b_type[i].is_nan)
|
||||||
fminmax_res[i] = dataa[i];
|
fminmax_res[i] = dataa_r[i];
|
||||||
else begin
|
else begin
|
||||||
case (op) // use LSB to distinguish MIN and MAX
|
case (op_r) // use LSB to distinguish MIN and MAX
|
||||||
`FPU_MIN: fminmax_res[i] = a_smaller[i] ? dataa[i] : datab[i];
|
`FPU_MIN: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
|
||||||
`FPU_MAX: fminmax_res[i] = a_smaller[i] ? datab[i] : dataa[i];
|
`FPU_MAX: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
|
||||||
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
|
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
@@ -123,7 +156,7 @@ module VX_fp_noncomp #(
|
|||||||
// Sign Injection
|
// Sign Injection
|
||||||
for (genvar i = 0; i < LANES; i++) begin
|
for (genvar i = 0; i < LANES; i++) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (op)
|
case (op_r)
|
||||||
`FPU_SGNJ: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
`FPU_SGNJ: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
`FPU_SGNJN: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
`FPU_SGNJN: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
`FPU_SGNJX: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
`FPU_SGNJX: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
@@ -135,7 +168,7 @@ module VX_fp_noncomp #(
|
|||||||
// Comparison
|
// Comparison
|
||||||
for (genvar i = 0; i < LANES; i++) begin
|
for (genvar i = 0; i < LANES; i++) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (frm)
|
case (frm_r)
|
||||||
`FRM_RNE: begin
|
`FRM_RNE: begin
|
||||||
if (a_type[i].is_nan || b_type[i].is_nan) begin
|
if (a_type[i].is_nan || b_type[i].is_nan) begin
|
||||||
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
|
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
|
||||||
@@ -183,7 +216,7 @@ module VX_fp_noncomp #(
|
|||||||
reg [LANES-1:0][31:0] tmp_result;
|
reg [LANES-1:0][31:0] tmp_result;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (op)
|
case (op_r)
|
||||||
`FPU_SGNJ: tmp_has_fflags = 0;
|
`FPU_SGNJ: tmp_has_fflags = 0;
|
||||||
`FPU_SGNJN: tmp_has_fflags = 0;
|
`FPU_SGNJN: tmp_has_fflags = 0;
|
||||||
`FPU_SGNJX: tmp_has_fflags = 0;
|
`FPU_SGNJX: tmp_has_fflags = 0;
|
||||||
@@ -197,7 +230,7 @@ module VX_fp_noncomp #(
|
|||||||
for (genvar i = 0; i < LANES; i++) begin
|
for (genvar i = 0; i < LANES; i++) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
tmp_valid = 1'b1;
|
tmp_valid = 1'b1;
|
||||||
case (op)
|
case (op_r)
|
||||||
`FPU_CLASS: begin
|
`FPU_CLASS: begin
|
||||||
tmp_result[i] = fclass_mask[i];
|
tmp_result[i] = fclass_mask[i];
|
||||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||||
@@ -227,9 +260,6 @@ module VX_fp_noncomp #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
wire stall = ~ready_out && valid_out;
|
|
||||||
assign ready_in = ~stall;
|
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
|
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
|
||||||
) nc_reg (
|
) nc_reg (
|
||||||
@@ -241,4 +271,6 @@ module VX_fp_noncomp #(
|
|||||||
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assign ready_in = ~stall;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
38
hw/rtl/interfaces/VX_issue_if.v
Normal file
38
hw/rtl/interfaces/VX_issue_if.v
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
`ifndef VX_ISSUE_IF
|
||||||
|
`define VX_ISSUE_IF
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
interface VX_issue_if ();
|
||||||
|
|
||||||
|
wire valid;
|
||||||
|
|
||||||
|
wire [`ITAG_BITS-1:0] issue_tag;
|
||||||
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
|
wire [`EX_BITS-1:0] ex_type;
|
||||||
|
wire [`OP_BITS-1:0] ex_op;
|
||||||
|
|
||||||
|
wire [`FRM_BITS-1:0] frm;
|
||||||
|
|
||||||
|
wire wb;
|
||||||
|
|
||||||
|
wire [`NR_BITS-1:0] rd;
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||||
|
|
||||||
|
wire [`NR_BITS-1:0] rs1;
|
||||||
|
wire [31:0] imm;
|
||||||
|
|
||||||
|
wire rs1_is_PC;
|
||||||
|
wire rs2_is_imm;
|
||||||
|
|
||||||
|
wire [1NT_BITS-1:0] tid;
|
||||||
|
|
||||||
|
endinterface
|
||||||
|
|
||||||
|
`endif
|
||||||
47
hw/rtl/libs/VX_bypass_buffer.v
Normal file
47
hw/rtl/libs/VX_bypass_buffer.v
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
`include "VX_platform.vh"
|
||||||
|
|
||||||
|
module VX_bypass_buffer #(
|
||||||
|
parameter DATAW = 1,
|
||||||
|
parameter PASSTHRU = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
input wire valid_in,
|
||||||
|
output wire ready_in,
|
||||||
|
input wire [DATAW-1:0] data_in,
|
||||||
|
output wire [DATAW-1:0] data_out,
|
||||||
|
input wire ready_out,
|
||||||
|
output wire valid_out
|
||||||
|
);
|
||||||
|
if (PASSTHRU) begin
|
||||||
|
`UNUSED_VAR (clk)
|
||||||
|
`UNUSED_VAR (reset)
|
||||||
|
assign ready_in = ready_out;
|
||||||
|
assign valid_out = valid_in;
|
||||||
|
assign data_out = data_in;
|
||||||
|
end else begin
|
||||||
|
reg [DATAW-1:0] buffer;
|
||||||
|
reg buffer_valid;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
buffer_valid <= 0;
|
||||||
|
buffer <= 0;
|
||||||
|
end else begin
|
||||||
|
if (ready_out) begin
|
||||||
|
buffer_valid <= 0;
|
||||||
|
end
|
||||||
|
if (valid_in && ~ready_out) begin
|
||||||
|
assert(!buffer_valid);
|
||||||
|
buffer <= data_in;
|
||||||
|
buffer_valid <= 1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign ready_in = ready_out || !buffer_valid;
|
||||||
|
assign data_out = buffer_valid ? buffer : data_in;
|
||||||
|
assign valid_out = valid_in || buffer_valid;
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
set_time_format -unit ns -decimal_places 3
|
set_time_format -unit ns -decimal_places 3
|
||||||
|
|
||||||
create_clock -name {clk} -period "240 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
create_clock -name {clk} -period "200 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||||
|
|
||||||
derive_pll_clocks -create_base_clocks
|
derive_pll_clocks -create_base_clocks
|
||||||
derive_clock_uncertainty
|
derive_clock_uncertainty
|
||||||
|
|||||||
Reference in New Issue
Block a user