floating point support fixes
This commit is contained in:
@@ -57,7 +57,7 @@
|
||||
|
||||
`define EXT_M_ENABLE
|
||||
|
||||
// define EXT_F_ENABLE
|
||||
`define EXT_F_ENABLE
|
||||
|
||||
// Configuration Values =======================================================
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ module VX_decode #(
|
||||
|
||||
reg [`ALU_BITS-1:0] alu_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
wire [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`CSR_BITS-1:0] csr_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
reg [`FPU_BITS-1:0] fpu_op;
|
||||
@@ -71,10 +71,12 @@ module VX_decode #(
|
||||
wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12;
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_I: src2_imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
`INST_S: src2_imm = {{20{func7[6]}}, func7, rd};
|
||||
`INST_L: src2_imm = {{20{u_12[11]}}, u_12};
|
||||
`INST_B: src2_imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
`INST_I: src2_imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
`INST_S,
|
||||
`INST_FS: src2_imm = {{20{func7[6]}}, func7, rd};
|
||||
`INST_L,
|
||||
`INST_FL: src2_imm = {{20{u_12[11]}}, u_12};
|
||||
`INST_B: src2_imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
default: src2_imm = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
@@ -147,11 +149,6 @@ module VX_decode #(
|
||||
end
|
||||
end
|
||||
|
||||
// LSU
|
||||
|
||||
wire is_lsu = (is_ltype || is_stype);
|
||||
assign lsu_op = {is_stype, func3};
|
||||
|
||||
// CSR
|
||||
|
||||
wire is_csr_imm = is_csr && (func3[2] == 1);
|
||||
@@ -199,6 +196,11 @@ module VX_decode #(
|
||||
wire is_fmsub = (opcode == `INST_FMSUB);
|
||||
wire is_fnmsub = (opcode == `INST_FNMSUB);
|
||||
wire is_fnmadd = (opcode == `INST_FNMADD);
|
||||
|
||||
wire is_fcmp = is_fci && (func7 == 7'h50); // compare
|
||||
wire is_fcvti = is_fci && (func7 == 7'h60); // convert to int
|
||||
wire is_fcvtf = is_fci && (func7 == 7'h68); // convert to float
|
||||
wire is_fmvcls = is_fci && (func7 == 7'h70 || func7 == 7'h78); // move + class
|
||||
wire is_fr4 = is_fmadd || is_fmsub || is_fnmsub || is_fnmadd;
|
||||
wire is_fpu = (is_fl || is_fs || is_fci || is_fr4);
|
||||
|
||||
@@ -232,15 +234,29 @@ module VX_decode #(
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire is_fs = 0;
|
||||
wire is_fci = 0;
|
||||
wire is_fr4 = 0;
|
||||
wire is_fpu = 0;
|
||||
wire is_fl = 0;
|
||||
wire is_fs = 0;
|
||||
wire is_fci = 0;
|
||||
wire is_fcmp = 0;
|
||||
wire is_fcvti = 0;
|
||||
wire is_fcvtf = 0;
|
||||
wire is_fmvcls = 0;
|
||||
wire is_fr4 = 0;
|
||||
wire is_fpu = 0;
|
||||
always @(*) begin
|
||||
fpu_op = `FPU_OTHER;
|
||||
end
|
||||
`endif
|
||||
|
||||
// LSU
|
||||
|
||||
wire is_lsu = (is_ltype || is_stype || is_fl || is_fs);
|
||||
always @(*) begin
|
||||
lsu_op = {is_stype, func3};
|
||||
if (is_fl) lsu_op = `LSU_LW;
|
||||
if (is_fs) lsu_op = `LSU_SW;
|
||||
end
|
||||
|
||||
// GPU
|
||||
|
||||
always @(*) begin
|
||||
@@ -272,14 +288,14 @@ module VX_decode #(
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
|
||||
assign decode_tmp_if.instr_op = is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_fpu ? `OP_BITS'(fpu_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_br ? `OP_BITS'({1'b1, br_op}) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||
0;
|
||||
assign decode_tmp_if.ex_op = is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_fpu ? `OP_BITS'(fpu_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_br ? `OP_BITS'({1'b1, br_op}) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||
0;
|
||||
|
||||
assign decode_tmp_if.rd = rd;
|
||||
assign decode_tmp_if.rs1 = is_lui ? `NR_BITS'(0) : rs1;
|
||||
@@ -293,20 +309,25 @@ module VX_decode #(
|
||||
assign decode_tmp_if.rs1_is_PC = is_auipc;
|
||||
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
||||
|
||||
assign decode_tmp_if.use_rs1 = (decode_tmp_if.rs1 != 0)
|
||||
&& (is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || ~is_csr_imm || is_gpu);
|
||||
assign decode_tmp_if.use_rs1 = is_fpu
|
||||
|| is_gpu
|
||||
|| ((is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || ~is_csr_imm || is_gpu)
|
||||
&& (decode_tmp_if.rs1 != 0));
|
||||
|
||||
assign decode_tmp_if.use_rs2 = (decode_tmp_if.rs2 != 0)
|
||||
&& (is_btype || is_stype || is_rtype || (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN)));
|
||||
assign decode_tmp_if.use_rs2 = (is_fpu && ~(is_fl || (fpu_op == `FPU_SQRT) || is_fcvti || is_fcvtf || is_fmvcls))
|
||||
|| (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN))
|
||||
|| ((is_btype || is_stype || is_rtype)
|
||||
&& (decode_tmp_if.rs2 != 0));
|
||||
|
||||
assign decode_tmp_if.rd_is_fp = is_fpu && ~(is_fci && ((func7 == 7'h50) || (func7 == 7'h60) || (func7 == 7'h70)));
|
||||
assign decode_tmp_if.rs1_is_fp = is_fci && ((func7 != 7'h68) && (fpu_op != `FPU_MVWX)) || is_fr4;
|
||||
assign decode_tmp_if.rs2_is_fp = is_fs || (is_fci && ((func7 != 7'h60) && (func7 != 7'h68)) || is_fr4);
|
||||
assign decode_tmp_if.rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || (fpu_op == `FPU_MVXW || fpu_op == `FPU_CLASS));
|
||||
assign decode_tmp_if.rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || (fpu_op == `FPU_MVWX)));
|
||||
assign decode_tmp_if.rs2_is_fp = is_fs || is_fr4 || is_fci;
|
||||
assign decode_tmp_if.rs3 = rs3;
|
||||
assign decode_tmp_if.use_rs3 = is_fr4;
|
||||
assign decode_tmp_if.frm = func3;
|
||||
|
||||
assign decode_tmp_if.wb = is_fpu || ((rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
|
||||
assign decode_tmp_if.wb = (is_fl || is_fci || is_fr4)
|
||||
|| ((rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
|
||||
|
||||
assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
@@ -323,8 +344,8 @@ module VX_decode #(
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.rd_is_fp, decode_tmp_if.frm}),
|
||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.rd_is_fp, decode_if.frm})
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.rd_is_fp, decode_tmp_if.frm}),
|
||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.rd_is_fp, decode_if.frm})
|
||||
);
|
||||
|
||||
assign ifetch_rsp_if.ready = ~stall;
|
||||
@@ -335,13 +356,15 @@ module VX_decode #(
|
||||
$write("%t: Core%0d-Decode: warp=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC);
|
||||
print_ex_type(decode_tmp_if.ex_type);
|
||||
$write(", op=");
|
||||
print_instr_op(decode_tmp_if.ex_type, decode_tmp_if.instr_op);
|
||||
$write(", tmask=%b, wb=%b, rd=%0d, rd_is_fp=%b, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b, use_rs3=%b\n", decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, decode_tmp_if.rd_is_fp, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.rs3, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.use_rs3);
|
||||
print_ex_op(decode_tmp_if.ex_type, decode_tmp_if.ex_op);
|
||||
$write(", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b, use_rs3=%b, rd_is_fp=%b, rs1_is_fp=%b, rs2_is_fp=%b, frm=", decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.rs3, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.use_rs3, decode_tmp_if.rd_is_fp,decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp);
|
||||
print_frm(decode_tmp_if.frm);
|
||||
$write("\n");
|
||||
|
||||
// trap unsupported instructions
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.instr_op) == `CSR_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.instr_op) == `GPU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.ex_op) == `ALU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.ex_op) == `CSR_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.ex_op) == `GPU_OTHER));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -101,21 +101,21 @@
|
||||
`define INST_AUIPC 7'b0010111
|
||||
`define INST_JAL 7'b1101111
|
||||
`define INST_JALR 7'b1100111
|
||||
`define INST_B 7'b1100011
|
||||
`define INST_L 7'b0000011
|
||||
`define INST_S 7'b0100011
|
||||
`define INST_I 7'b0010011
|
||||
`define INST_R 7'b0110011
|
||||
`define INST_F 7'b0001111
|
||||
`define INST_SYS 7'b1110011
|
||||
`define INST_B 7'b1100011 // branch instructions
|
||||
`define INST_L 7'b0000011 // load instructions
|
||||
`define INST_S 7'b0100011 // store instructions
|
||||
`define INST_I 7'b0010011 // immediate instructions
|
||||
`define INST_R 7'b0110011 // register instructions
|
||||
`define INST_F 7'b0001111 // Fence instructions
|
||||
`define INST_SYS 7'b1110011 // system instructions
|
||||
|
||||
`define INST_FL 7'b0000111
|
||||
`define INST_FS 7'b0100111
|
||||
`define INST_FCI 7'b1010011
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
|
||||
`define INST_GPU 7'b1101011
|
||||
|
||||
@@ -236,7 +236,7 @@
|
||||
`define FRM_RNE 3'b000
|
||||
`define FRM_RTZ 3'b001
|
||||
`define FRM_RDN 3'b010
|
||||
`define FRM_RUP 3'b011 // positive inf
|
||||
`define FRM_RUP 3'b011
|
||||
`define FRM_RMM 3'b100
|
||||
`define FRM_DYN 3'b111
|
||||
`define FRM_BITS 3
|
||||
@@ -464,13 +464,14 @@ task print_ex_type;
|
||||
`EX_LSU: $write("LSU");
|
||||
`EX_CSR: $write("CSR");
|
||||
`EX_MUL: $write("MUL");
|
||||
`EX_FPU: $write("FPU");
|
||||
`EX_GPU: $write("GPU");
|
||||
default: $write("NOP");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_instr_op;
|
||||
task print_ex_op;
|
||||
input [`EX_BITS-1:0] ex;
|
||||
input [`OP_BITS-1:0] op;
|
||||
begin
|
||||
@@ -489,12 +490,12 @@ task print_instr_op;
|
||||
`ALU_AND: $write("AND");
|
||||
`ALU_LUI: $write("LUI");
|
||||
`ALU_AUIPC: $write("AUIPC");
|
||||
`ALU_BEQ: $write("EQ");
|
||||
`ALU_BNE: $write("NE");
|
||||
`ALU_BLT: $write("LT");
|
||||
`ALU_BGE: $write("GE");
|
||||
`ALU_BLTU: $write("LTU");
|
||||
`ALU_BGEU: $write("GEU");
|
||||
`ALU_BEQ: $write("BEQ");
|
||||
`ALU_BNE: $write("BNE");
|
||||
`ALU_BLT: $write("BLT");
|
||||
`ALU_BGE: $write("BGE");
|
||||
`ALU_BLTU: $write("BLTU");
|
||||
`ALU_BGEU: $write("BGEU");
|
||||
`ALU_JAL: $write("JAL");
|
||||
`ALU_JALR: $write("JALR");
|
||||
`ALU_ECALL: $write("ECALL");
|
||||
@@ -582,4 +583,19 @@ task print_instr_op;
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_frm;
|
||||
input [`FRM_BITS-1:0] frm;
|
||||
begin
|
||||
case (frm)
|
||||
`FRM_RNE: $write("RNE");
|
||||
`FRM_RTZ: $write("RTZ");
|
||||
`FRM_RDN: $write("RDN");
|
||||
`FRM_RUP: $write("RUP");
|
||||
`FRM_RMM: $write("RMM");
|
||||
`FRM_DYN: $write("DYN");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
@@ -49,6 +49,7 @@ module VX_fpu_unit #(
|
||||
wire [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag;
|
||||
|
||||
wire [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
|
||||
assign fpu_operands = {fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data};
|
||||
|
||||
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
|
||||
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
|
||||
@@ -88,15 +89,13 @@ module VX_fpu_unit #(
|
||||
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::ADD; fpu_op_mod = 1; end
|
||||
`FPU_CVTSW: fpu_op = fpnew_pkg::I2F;
|
||||
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
||||
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; end
|
||||
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; end
|
||||
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_op_mod = 1; fpu_rnd = `FRM_RUP; end
|
||||
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_op_mod = 0; fpu_rnd = `FRM_RUP; end
|
||||
`FPU_CLASS: fpu_op = fpnew_pkg::CLASSIFY;
|
||||
`FPU_CMP: fpu_op = fpnew_pkg::CMP;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign fpu_operands = {fpu_req_if.rs3_data, fpu_req_if.rs2_data, fpu_req_if.rs1_data};
|
||||
end
|
||||
|
||||
`DISABLE_TRACING
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ module VX_gpr_fp_ctrl (
|
||||
if (reset) begin
|
||||
multi_cyc_state <= 0;
|
||||
end else if (!schedule_delay) begin
|
||||
multi_cyc_state <= decode_if.use_rs3 && (multi_cyc_state == 0);
|
||||
multi_cyc_state <= decode_if.use_rs3 && (0 == multi_cyc_state);
|
||||
end else begin
|
||||
multi_cyc_state <= 0;
|
||||
end
|
||||
@@ -70,9 +70,9 @@ module VX_gpr_fp_ctrl (
|
||||
|
||||
// outputs
|
||||
|
||||
assign gpr_delay = (multi_cyc_state == 0) && decode_if.use_rs3;
|
||||
assign gpr_delay = decode_if.use_rs3 && (0 == multi_cyc_state);
|
||||
|
||||
assign raddr1 = multi_cyc_state ? decode_if.rs3 : decode_if.rs1 ;
|
||||
assign raddr1 = multi_cyc_state ? decode_if.rs3 : decode_if.rs1;
|
||||
assign raddr2 = decode_if.rs2;
|
||||
|
||||
always @(*) begin
|
||||
|
||||
@@ -20,7 +20,6 @@ module VX_gpr_stage #(
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_int_data [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_int_data [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NR_BITS-1:0] raddr1;
|
||||
wire [`NR_BITS-1:0] raddr2;
|
||||
@@ -28,10 +27,10 @@ module VX_gpr_stage #(
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign we[i] = writeback_if.thread_mask & {`NUM_THREADS{~writeback_if.rd_is_fp && (i == writeback_if.warp_num)}};
|
||||
wire [`NUM_WARPS-1:0] we = writeback_if.thread_mask & {`NUM_THREADS{writeback_if.valid && ~writeback_if.rd_is_fp && (i == writeback_if.warp_num)}};
|
||||
VX_gpr_ram gpr_int_ram (
|
||||
.clk (clk),
|
||||
.we (we[i]),
|
||||
.we (we),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (raddr1),
|
||||
@@ -47,10 +46,10 @@ module VX_gpr_stage #(
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_fp_data [`NUM_WARPS-1:0];
|
||||
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign we[i] = writeback_if.thread_mask & {`NUM_THREADS{writeback_if.rd_is_fp && (i == writeback_if.warp_num)}};
|
||||
wire [`NUM_WARPS-1:0] we = writeback_if.thread_mask & {`NUM_THREADS{writeback_if.valid && writeback_if.rd_is_fp && (i == writeback_if.warp_num)}};
|
||||
VX_gpr_ram gpr_fp_ram (
|
||||
.clk (clk),
|
||||
.we (we[i]),
|
||||
.we (we),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (raddr1),
|
||||
|
||||
@@ -77,7 +77,7 @@ module VX_icache_stage #(
|
||||
$display("%t: I$%0d req: warp=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.warp_num, ifetch_req_if.curr_PC);
|
||||
end
|
||||
if (icache_rsp_if.valid && icache_rsp_if.ready) begin
|
||||
$display("%t: I$%0d rsp: warp=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.warp_num, ifetch_req_if.curr_PC, ifetch_rsp_if.instr);
|
||||
$display("%t: I$%0d rsp: warp=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.warp_num, ifetch_rsp_if.curr_PC, ifetch_rsp_if.instr);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -74,8 +74,8 @@ module VX_issue #(
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm, gpr_data_if.rs1_data, gpr_data_if.rs2_data, gpr_data_if.rs3_data}),
|
||||
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.frm, gpr_data_tmp_if.rs1_data, gpr_data_tmp_if.rs2_data, gpr_data_tmp_if.rs3_data})
|
||||
.in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm, gpr_data_if.rs1_data, gpr_data_if.rs2_data, gpr_data_if.rs3_data}),
|
||||
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.frm, gpr_data_tmp_if.rs1_data, gpr_data_tmp_if.rs2_data, gpr_data_tmp_if.rs3_data})
|
||||
);
|
||||
|
||||
VX_issue_demux issue_demux (
|
||||
@@ -92,22 +92,22 @@ module VX_issue #(
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && ~stall) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
||||
end
|
||||
if (lsu_req_if.valid && ~stall) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, lsu_req_if.rw, decode_tmp_if.rd, decode_tmp_if.wb, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset);
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset);
|
||||
end
|
||||
if (csr_req_if.valid && ~stall) begin
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
end
|
||||
if (mul_req_if.valid && ~stall) begin
|
||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
end
|
||||
if (fpu_req_if.valid && ~stall) begin
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%d, rd=%0d, frm=%0h, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, fpu_req_if.frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
end
|
||||
if (gpu_req_if.valid && ~stall) begin
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
end
|
||||
end
|
||||
|
||||
@@ -102,6 +102,22 @@ module VX_scheduler #(
|
||||
|
||||
wire ib_acquire = decode_if.valid && ~stall;
|
||||
|
||||
`DEBUG_BLOCK(
|
||||
wire [`NW_BITS-1:0] cis_alu_warp_num = commit_is_if.alu_data.warp_num;
|
||||
wire [`NUM_THREADS-1:0] cis_alu_thread_mask = commit_is_if.alu_data.thread_mask;
|
||||
wire [31:0] cis_alu_curr_PC = commit_is_if.alu_data.curr_PC;
|
||||
wire [`NR_BITS-1:0] cis_alu_rd = commit_is_if.alu_data.rd;
|
||||
wire cis_alu_rd_is_fp = commit_is_if.alu_data.rd_is_fp;
|
||||
wire cis_alu_wb = commit_is_if.alu_data.wb;
|
||||
|
||||
wire [`NW_BITS-1:0] cis_fpu_warp_num = commit_is_if.fpu_data.warp_num;
|
||||
wire [`NUM_THREADS-1:0] cis_fpu_thread_mask = commit_is_if.fpu_data.thread_mask;
|
||||
wire [31:0] cis_fpu_curr_PC = commit_is_if.fpu_data.curr_PC;
|
||||
wire [`NR_BITS-1:0] cis_fpu_rd = commit_is_if.fpu_data.rd;
|
||||
wire cis_fpu_rd_is_fp = commit_is_if.fpu_data.rd_is_fp;
|
||||
wire cis_fpu_wb = commit_is_if.fpu_data.wb;
|
||||
)
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW ($bits(is_data_t)),
|
||||
.SIZE (`ISSUEQ_SIZE),
|
||||
|
||||
@@ -39,6 +39,13 @@ module VX_writeback #(
|
||||
mul_valid ? commit_is_if.mul_data.warp_num :
|
||||
fpu_valid ? commit_is_if.fpu_data.warp_num :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.curr_PC = alu_valid ? commit_is_if.alu_data.curr_PC :
|
||||
lsu_valid ? commit_is_if.lsu_data.curr_PC :
|
||||
csr_valid ? commit_is_if.csr_data.curr_PC :
|
||||
mul_valid ? commit_is_if.mul_data.curr_PC :
|
||||
fpu_valid ? commit_is_if.fpu_data.curr_PC :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.thread_mask = alu_valid ? commit_is_if.alu_data.thread_mask :
|
||||
lsu_valid ? commit_is_if.lsu_data.thread_mask :
|
||||
@@ -55,7 +62,7 @@ module VX_writeback #(
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.rd_is_fp = alu_valid ? 0 :
|
||||
lsu_valid ? 0 :
|
||||
lsu_valid ? commit_is_if.lsu_data.rd_is_fp :
|
||||
csr_valid ? 0 :
|
||||
mul_valid ? 0 :
|
||||
fpu_valid ? commit_is_if.fpu_data.rd_is_fp :
|
||||
@@ -71,14 +78,14 @@ module VX_writeback #(
|
||||
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1)
|
||||
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1)
|
||||
) wb_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.thread_mask, writeback_tmp_if.rd, writeback_tmp_if.rd_is_fp, writeback_tmp_if.data}),
|
||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.thread_mask, writeback_if.rd, writeback_if.rd_is_fp, writeback_if.data})
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.thread_mask, writeback_tmp_if.rd, writeback_tmp_if.rd_is_fp, writeback_tmp_if.data}),
|
||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.curr_PC, writeback_if.thread_mask, writeback_if.rd, writeback_if.rd_is_fp, writeback_if.data})
|
||||
);
|
||||
|
||||
assign alu_commit_if.ready = !stall;
|
||||
|
||||
@@ -12,7 +12,7 @@ interface VX_decode_if ();
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] instr_op;
|
||||
wire [`OP_BITS-1:0] ex_op;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
|
||||
@@ -7,7 +7,12 @@ interface VX_wb_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [31:0] curr_PC;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire rd_is_fp;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#include <vector>
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 100
|
||||
#define DRAM_LATENCY 4
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
|
||||
@@ -10,7 +10,8 @@ int main(int argc, char **argv)
|
||||
bool passed = true;
|
||||
|
||||
std::string tests[] = {
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-add.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-fadd.hex",
|
||||
/*"../../../benchmarks/riscv_tests/rv32ui-p-add.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-addi.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-and.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-andi.hex",
|
||||
@@ -48,6 +49,7 @@ int main(int argc, char **argv)
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-sw.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-xor.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-xori.hex",
|
||||
#ifdef EXT_M_ENABLE
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-div.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-divu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-mul.hex",
|
||||
@@ -55,7 +57,21 @@ int main(int argc, char **argv)
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-mulhsu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-mulhu.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-rem.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-remu.hex"
|
||||
"../../../benchmarks/riscv_tests/rv32um-p-remu.hex",
|
||||
#endif
|
||||
#ifdef EXT_F_ENABLE
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-fadd.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-fdiv.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-fmadd.hex"
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-fmin.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-fcmp.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-fclass.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-ldst.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-fcvt.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-fcvt_w.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-move.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32uf-p-recoding.hex",
|
||||
#endif*/
|
||||
};
|
||||
|
||||
for (std::string test : tests) {
|
||||
|
||||
Reference in New Issue
Block a user