decode op_mod optimization

This commit is contained in:
Blaise Tine
2020-08-24 02:55:14 -07:00
parent f292e5003d
commit 57971f6c76
25 changed files with 221 additions and 209 deletions

View File

@@ -32,9 +32,9 @@ module VX_alu_unit #(
wire [1:0] alu_op_class_r;
wire [31:0] next_PC_r;
wire is_br_op = `IS_BR_OP(alu_req_if.op);
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op);
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op);
wire is_br_op = alu_req_if.is_br_op;
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
wire alu_signed = `ALU_SIGNED(alu_op);
wire [1:0] alu_op_class = `ALU_OP_CLASS(alu_op);
wire is_sub = (alu_op == `ALU_SUB);

View File

@@ -23,7 +23,7 @@ module VX_csr_arb (
assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
assign csr_req_if.thread_mask = (~select_io_req) ? csr_core_req_if.thread_mask : 0;
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
assign csr_req_if.op = (~select_io_req) ? csr_core_req_if.op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;

View File

@@ -67,7 +67,7 @@ module VX_csr_unit #(
always @(*) begin
csr_we_s0_unqual = 0;
case (csr_pipe_req_if.op)
case (csr_pipe_req_if.op_type)
`CSR_RW: begin
csr_updated_data = csr_pipe_req_if.csr_mask;
csr_we_s0_unqual = 1;

View File

@@ -202,12 +202,16 @@ module VX_decode #(
wire is_fcmp = is_fci && (func7 == 7'h50); // compare
wire is_fcvti = is_fci && (func7 == 7'h60); // convert to int
wire is_fcvtf = is_fci && (func7 == 7'h68); // convert to float
wire is_fmvcls = is_fci && (func7 == 7'h70 || func7 == 7'h78); // move + class
wire is_fmvw_clss = is_fci && (func7 == 7'h70); // move to int + class
wire is_fmvx = is_fci && (func7 == 7'h78); // move to float
wire is_fr4 = is_fmadd || is_fmsub || is_fnmsub || is_fnmadd;
wire is_fpu = (is_fl || is_fs || is_fci || is_fr4);
reg [2:0] frm;
always @(*) begin
fpu_op = `FPU_OTHER;
fpu_op = `FPU_MISC;
frm = func3;
if (is_fr4) begin
case ({is_fmadd, is_fmsub, is_fnmsub, is_fnmadd})
4'b1000: fpu_op = `FPU_MADD;
@@ -223,27 +227,38 @@ module VX_decode #(
7'h04: fpu_op = `FPU_SUB;
7'h08: fpu_op = `FPU_MUL;
7'h0C: fpu_op = `FPU_DIV;
7'h10: fpu_op = (func3[1]) ? `FPU_SGNJX : ((func3[0]) ? `FPU_SGNJN : `FPU_SGNJ);
7'h14: fpu_op = (func3 == 3'h0) ? `FPU_MIN : `FPU_MAX;
7'h10: begin
fpu_op = `FPU_MISC;
frm = (func3[1]) ? 2 : ((func3[0]) ? 1 : 0);
end
7'h14: begin
fpu_op = `FPU_MISC;
frm = (func3 == 3'h0) ? 3 : 4;
end
7'h2C: fpu_op = `FPU_SQRT;
7'h50: fpu_op = `FPU_CMP; // wb to intReg
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
7'h70: fpu_op = (func3 == 3'h0) ? `FPU_MVXW : `FPU_CLASS; // both wb to intReg
7'h78: fpu_op = `FPU_MVWX;
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
7'h70: begin
fpu_op = (func3 == 3'h0) ? `FPU_MISC : `FPU_CLASS;
frm = (func3 == 3'h0) ? 5 : func3;
end
7'h78: begin fpu_op = `FPU_MISC; frm = 6; end
default:;
endcase
end
end
`else
wire is_fl = 0;
wire is_fs = 0;
wire is_fci = 0;
wire is_fcvti = 0;
wire is_fcvtf = 0;
wire is_fmvcls = 0;
wire is_fr4 = 0;
wire is_fpu = 0;
wire is_fl = 0;
wire is_fs = 0;
wire is_fci = 0;
wire is_fcvti = 0;
wire is_fcvtf = 0;
wire is_fmvw_clss = 0;
wire is_fmvx = 0;
wire is_fr4 = 0;
wire is_fpu = 0;
wire [2:0] frm = 0;
always @(*) begin
fpu_op = `FPU_OTHER;
@@ -282,7 +297,7 @@ module VX_decode #(
|| is_gpu
|| ((is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || ~is_csr_imm || is_gpu) && (rs1 != 0));
wire use_rs2 = (is_fpu && ~(is_fl || (fpu_op == `FPU_SQRT) || is_fcvti || is_fcvtf || is_fmvcls))
wire use_rs2 = (is_fpu && ~(is_fl || (fpu_op == `FPU_SQRT) || is_fcvti || is_fcvtf || is_fmvw_clss || is_fmvx))
|| (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN))
|| ((is_btype || is_stype || is_rtype) && (rs2 != 0));
@@ -308,20 +323,20 @@ module VX_decode #(
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
`EX_NOP;
assign decode_if.ex_op = is_lsu ? `OP_BITS'(lsu_op) :
assign decode_if.op_type = is_lsu ? `OP_BITS'(lsu_op) :
is_csr ? `OP_BITS'(csr_op) :
is_mul ? `OP_BITS'(mul_op) :
is_fpu ? `OP_BITS'(fpu_op) :
is_gpu ? `OP_BITS'(gpu_op) :
is_br ? `OP_BITS'({1'b1, br_op}) :
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'({1'b0, alu_op}) :
0;
is_br ? `OP_BITS'(br_op) :
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
0;
assign decode_if.wb = use_rd;
`ifdef EXT_F_ENABLE
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || (fpu_op == `FPU_MVXW || fpu_op == `FPU_CLASS));
wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || (fpu_op == `FPU_MVWX)));
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || is_fmvw_clss);
wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || is_fmvx));
wire rs2_is_fp = is_fs || is_fr4 || is_fci;
assign decode_if.rd = {rd_is_fp, rd};
@@ -350,7 +365,7 @@ module VX_decode #(
assign decode_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
assign decode_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm || is_br;
assign decode_if.frm = func3;
assign decode_if.op_mod = is_fpu ? frm : (is_br ? 1 : 0);
///////////////////////////////////////////////////////////////////////////
@@ -375,7 +390,7 @@ module VX_decode #(
$write("%t: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.curr_PC);
print_ex_type(decode_if.ex_type);
$write(", op=");
print_ex_op(decode_if.ex_type, decode_if.ex_op);
print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
$write(", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, frm=", decode_if.thread_mask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm);
print_frm(decode_if.frm);
$write("\n");

View File

@@ -108,7 +108,8 @@
///////////////////////////////////////////////////////////////////////////////
`define OP_BITS 5
`define OP_BITS 4
`define MOD_BITS 3
`define ALU_ADD 4'b0000
`define ALU_SUB 4'b0001
@@ -147,10 +148,9 @@
`define BR_NEG(x) x[1]
`define BR_LESS(x) x[2]
`define BR_STATIC(x) x[3]
`define ALU_BR_BITS 5
`define ALU_BR_BITS 4
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
`define IS_BR_OP(x) x[4]
`define IS_BR_MOD(x) x[0]
`define LSU_LB {1'b0, `BYTEEN_SB}
`define LSU_LH {1'b0, `BYTEEN_SH}
@@ -185,30 +185,23 @@
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2]
`define FPU_ADD 5'h00
`define FPU_SUB 5'h01
`define FPU_MUL 5'h02
`define FPU_DIV 5'h03
`define FPU_SQRT 5'h04
`define FPU_MADD 5'h05
`define FPU_MSUB 5'h06
`define FPU_NMSUB 5'h07
`define FPU_NMADD 5'h08
`define FPU_SGNJ 5'h09 // FSGNJ
`define FPU_SGNJN 5'h0A // FSGNJN
`define FPU_SGNJX 5'h0B // FSGNJX
`define FPU_MIN 5'h0C // FMIN.S
`define FPU_MAX 5'h0D // FMAX.S
`define FPU_CVTWS 5'h0E // FCVT.W.S
`define FPU_CVTWUS 5'h0F // FCVT.WU.S
`define FPU_CVTSW 5'h10 // FCVT.S.W
`define FPU_CVTSWU 5'h11 // FCVT.S.WU
`define FPU_MVXW 5'h12 // MOV FP from fpReg to integer reg
`define FPU_MVWX 5'h13 // MOV FP from integer reg to fpReg
`define FPU_CLASS 5'h14
`define FPU_CMP 5'h15
`define FPU_OTHER 5'h1f
`define FPU_BITS 5
`define FPU_ADD 4'h0
`define FPU_SUB 4'h1
`define FPU_MUL 4'h2
`define FPU_DIV 4'h3
`define FPU_SQRT 4'h4
`define FPU_MADD 4'h5
`define FPU_MSUB 4'h6
`define FPU_NMSUB 4'h7
`define FPU_NMADD 4'h8
`define FPU_CVTWS 4'h9 // FCVT.W.S
`define FPU_CVTWUS 4'hA // FCVT.WU.S
`define FPU_CVTSW 4'hB // FCVT.S.W
`define FPU_CVTSWU 4'hC // FCVT.S.WU
`define FPU_CLASS 4'hD
`define FPU_CMP 4'hE
`define FPU_MISC 4'hF // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define FPU_BITS 4
`define FPU_OP(x) x[`FPU_BITS-1:0]
`define GPU_TMC 3'h0

View File

@@ -122,9 +122,9 @@ module VX_execute #(
);
assign ebreak = alu_req_if.valid
&& `IS_BR_OP(alu_req_if.op)
&& (`BR_OP(alu_req_if.op) == `BR_EBREAK
|| `BR_OP(alu_req_if.op) == `BR_ECALL);
&& alu_req_if.is_br_op
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);
`SCOPE_ASSIGN (scope_decode_valid, decode_if.valid);
`SCOPE_ASSIGN (scope_decode_wid, decode_if.wid);

View File

@@ -69,7 +69,7 @@ module VX_fpu_unit #(
.tag_in (tag_in),
.op (fpu_req_if.op),
.op_type (fpu_req_if.op_type),
.frm (fpu_req_if.frm),
.dataa (fpu_req_if.rs1_data),
@@ -103,7 +103,7 @@ module VX_fpu_unit #(
.tag_in (tag_in),
.op (fpu_req_if.op),
.op_type (fpu_req_if.op_type),
.frm (fpu_req_if.frm),
.dataa (fpu_req_if.rs1_data),

View File

@@ -32,18 +32,15 @@ module VX_gpr_fp_ctrl (
end else if (read_fire) begin
read_rs3 <= 0;
end
if (read_rs3) begin
assert(rs3_wid == gpr_read_if.wid);
if (~read_rs3) begin
rs1_tmp_data <= rs1_data;
end
end
end
rs2_tmp_data <= rs2_data;
rs3_tmp_data <= rs1_data;
always @(posedge clk) begin
if (~read_rs3) begin
rs1_tmp_data <= rs1_data;
end
rs2_tmp_data <= rs2_data;
rs3_tmp_data <= rs1_data;
assert(!read_rs3 || rs3_wid == gpr_read_if.wid);
end
end
// outputs

View File

@@ -18,10 +18,10 @@ module VX_gpu_unit #(
gpu_barrier_t barrier;
gpu_split_t split;
wire is_wspawn = (gpu_req_if.op == `GPU_WSPAWN);
wire is_tmc = (gpu_req_if.op == `GPU_TMC);
wire is_split = (gpu_req_if.op == `GPU_SPLIT);
wire is_bar = (gpu_req_if.op == `GPU_BAR);
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
// tmc

View File

@@ -159,8 +159,8 @@ module VX_ibuffer #(
assign q_data_in = {ibuf_enq_if.thread_mask,
ibuf_enq_if.curr_PC,
ibuf_enq_if.ex_type,
ibuf_enq_if.ex_op,
ibuf_enq_if.frm,
ibuf_enq_if.op_type,
ibuf_enq_if.op_mod,
ibuf_enq_if.wb,
ibuf_enq_if.rd,
ibuf_enq_if.rs1,
@@ -177,8 +177,8 @@ module VX_ibuffer #(
assign {ibuf_deq_if.thread_mask,
ibuf_deq_if.curr_PC,
ibuf_deq_if.ex_type,
ibuf_deq_if.ex_op,
ibuf_deq_if.frm,
ibuf_deq_if.op_type,
ibuf_deq_if.op_mod,
ibuf_deq_if.wb,
ibuf_deq_if.rd,
ibuf_deq_if.rs1,

View File

@@ -21,6 +21,7 @@ module VX_instr_demux (
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
wire alu_req_ready;
wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
wire [`NT_BITS-1:0] tid;
VX_priority_encoder #(
@@ -32,14 +33,14 @@ module VX_instr_demux (
);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `ALU_BR_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS)
) alu_reg (
.clk (clk),
.reset (reset),
.ready_in (alu_req_ready),
.valid_in (alu_req_valid),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `ALU_BR_OP(execute_if.ex_op), execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}),
.data_out ({alu_req_if.wid, alu_req_if.thread_mask, alu_req_if.curr_PC, alu_req_if.op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid}),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}),
.data_out ({alu_req_if.wid, alu_req_if.thread_mask, alu_req_if.curr_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid}),
.ready_out (alu_req_if.ready),
.valid_out (alu_req_if.valid)
);
@@ -67,7 +68,7 @@ module VX_instr_demux (
.reset (reset),
.ready_in (lsu_req_ready),
.valid_in (lsu_req_valid),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `LSU_RW(execute_if.ex_op), `LSU_BE(execute_if.ex_op), execute_if.imm, execute_if.rd, execute_if.wb}),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}),
.data_out ({lsu_req_if.wid, lsu_req_if.thread_mask, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb}),
.ready_out (lsu_req_if.ready),
.valid_out (lsu_req_if.valid)
@@ -96,8 +97,8 @@ module VX_instr_demux (
.reset (reset),
.ready_in (csr_req_ready),
.valid_in (csr_req_valid),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `CSR_OP(execute_if.ex_op), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}),
.data_out ({csr_req_if.wid, csr_req_if.thread_mask, csr_req_if.curr_PC, csr_req_if.op, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io}),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}),
.data_out ({csr_req_if.wid, csr_req_if.thread_mask, csr_req_if.curr_PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io}),
.ready_out (csr_req_if.ready),
.valid_out (csr_req_if.valid)
);
@@ -136,8 +137,8 @@ module VX_instr_demux (
.reset (reset),
.ready_in (mul_req_ready),
.valid_in (mul_req_valid),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `MUL_OP(execute_if.ex_op), execute_if.rd, execute_if.wb}),
.data_out ({mul_req_if.wid, mul_req_if.thread_mask, mul_req_if.curr_PC, mul_req_if.op, mul_req_if.rd, mul_req_if.wb}),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
.data_out ({mul_req_if.wid, mul_req_if.thread_mask, mul_req_if.curr_PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb}),
.ready_out (mul_req_if.ready),
.valid_out (mul_req_if.valid)
);
@@ -162,7 +163,7 @@ module VX_instr_demux (
// resolve dynamic FRM
assign csr_to_issue_if.wid = execute_if.wid;
wire [`FRM_BITS-1:0] fpu_frm = (execute_if.frm == `FRM_DYN) ? csr_to_issue_if.frm : execute_if.frm;
wire [`FRM_BITS-1:0] fpu_frm = (execute_if.op_mod == `FRM_DYN) ? csr_to_issue_if.frm : execute_if.op_mod;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `FRM_BITS + `NR_BITS + 1)
@@ -171,8 +172,8 @@ module VX_instr_demux (
.reset (reset),
.ready_in (fpu_req_ready),
.valid_in (fpu_req_valid),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `FPU_OP(execute_if.ex_op), fpu_frm, execute_if.rd, execute_if.wb}),
.data_out ({fpu_req_if.wid, fpu_req_if.thread_mask, fpu_req_if.curr_PC, fpu_req_if.op, fpu_req_if.frm, fpu_req_if.rd, fpu_req_if.wb}),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `FPU_OP(execute_if.op_type), fpu_frm, execute_if.rd, execute_if.wb}),
.data_out ({fpu_req_if.wid, fpu_req_if.thread_mask, fpu_req_if.curr_PC, fpu_req_if.op_type, fpu_req_if.frm, fpu_req_if.rd, fpu_req_if.wb}),
.ready_out (fpu_req_if.ready),
.valid_out (fpu_req_if.valid)
);
@@ -201,8 +202,8 @@ module VX_instr_demux (
.reset (reset),
.ready_in (gpu_req_ready),
.valid_in (gpu_req_valid),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `GPU_OP(execute_if.ex_op), execute_if.rd, execute_if.wb}),
.data_out ({gpu_req_if.wid, gpu_req_if.thread_mask, gpu_req_if.curr_PC, gpu_req_if.op, gpu_req_if.rd, gpu_req_if.wb}),
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
.data_out ({gpu_req_if.wid, gpu_req_if.thread_mask, gpu_req_if.curr_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb}),
.ready_out (gpu_req_if.ready),
.valid_out (gpu_req_if.valid)
);

View File

@@ -67,8 +67,8 @@ module VX_issue #(
assign execute_if.thread_mask = ibuf_deq_if.thread_mask;
assign execute_if.curr_PC = ibuf_deq_if.curr_PC;
assign execute_if.ex_type = ibuf_deq_if.ex_type;
assign execute_if.ex_op = ibuf_deq_if.ex_op;
assign execute_if.frm = ibuf_deq_if.frm;
assign execute_if.op_type = ibuf_deq_if.op_type;
assign execute_if.op_mod = ibuf_deq_if.op_mod;
assign execute_if.wb = ibuf_deq_if.wb;
assign execute_if.rd = ibuf_deq_if.rd;
assign execute_if.rs1 = ibuf_deq_if.rs1;

View File

@@ -14,7 +14,7 @@ module VX_mul_unit #(
);
localparam MULQ_BITS = `LOG2UP(`MULQ_SIZE);
wire [`MUL_BITS-1:0] alu_op = mul_req_if.op;
wire [`MUL_BITS-1:0] alu_op = mul_req_if.op_type;
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;

View File

@@ -19,13 +19,14 @@ task print_ex_type;
endtask
task print_ex_op;
input [`EX_BITS-1:0] ex;
input [`OP_BITS-1:0] op;
input [`EX_BITS-1:0] ex_type;
input [`OP_BITS-1:0] op_type;
input [`OP_BITS-1:0] op_mod;
begin
case (ex)
case (ex_type)
`EX_ALU: begin
if (`IS_BR_OP(op)) begin
case (`BR_BITS'(op))
if (`IS_BR_MOD(op_mod)) begin
case (`BR_BITS'(op_type))
`BR_EQ: $write("BEQ");
`BR_NE: $write("BNE");
`BR_LT: $write("BLT");
@@ -42,7 +43,7 @@ task print_ex_op;
default: $write("?");
endcase
end else begin
case (`ALU_BITS'(op))
case (`ALU_BITS'(op_type))
`ALU_ADD: $write("ADD");
`ALU_SUB: $write("SUB");
`ALU_SLL: $write("SLL");
@@ -60,7 +61,7 @@ task print_ex_op;
end
end
`EX_LSU: begin
case (`LSU_BITS'(op))
case (`LSU_BITS'(op_type))
`LSU_LB: $write("LB");
`LSU_LH: $write("LH");
`LSU_LW: $write("LW");
@@ -75,7 +76,7 @@ task print_ex_op;
endcase
end
`EX_CSR: begin
case (`CSR_BITS'(op))
case (`CSR_BITS'(op_type))
`CSR_RW: $write("CSRW");
`CSR_RS: $write("CSRS");
`CSR_RC: $write("CSRC");
@@ -83,7 +84,7 @@ task print_ex_op;
endcase
end
`EX_MUL: begin
case (`MUL_BITS'(op))
case (`MUL_BITS'(op_type))
`MUL_MUL: $write("MUL");
`MUL_MULH: $write("MULH");
`MUL_MULHSU:$write("MULHSU");
@@ -96,7 +97,7 @@ task print_ex_op;
endcase
end
`EX_FPU: begin
case (`FPU_BITS'(op))
case (`FPU_BITS'(op_type))
`FPU_ADD: $write("ADD");
`FPU_SUB: $write("SUB");
`FPU_MUL: $write("MUL");
@@ -104,25 +105,29 @@ task print_ex_op;
`FPU_SQRT: $write("SQRT");
`FPU_MADD: $write("MADD");
`FPU_NMSUB: $write("NMSUB");
`FPU_NMADD: $write("NMADD");
`FPU_SGNJ: $write("SGNJ");
`FPU_SGNJN: $write("SGNJN");
`FPU_SGNJX: $write("SGNJX");
`FPU_MIN: $write("MIN");
`FPU_MAX: $write("MAX");
`FPU_NMADD: $write("NMADD");
`FPU_CVTWS: $write("CVTWS");
`FPU_CVTWUS:$write("CVTWUS");
`FPU_CVTSW: $write("CVTSW");
`FPU_CVTSWU:$write("CVTSWU");
`FPU_MVXW: $write("MVXW");
`FPU_MVWX: $write("MVWX");
`FPU_CLASS: $write("CLASS");
`FPU_CMP: $write("CMP");
`FPU_MISC: begin
case (op_mod)
0: $write("SGNJ");
1: $write("SGNJN");
2: $write("SGNJX");
3: $write("MIN");
4: $write("MAX");
5: $write("MVXW");
6: $write("MVWX");
endcase
end
default: $write("?");
endcase
end
`EX_GPU: begin
case (`GPU_BITS'(op))
case (`GPU_BITS'(op_type))
`GPU_TMC: $write("TMC");
`GPU_WSPAWN:$write("WSPAWN");
`GPU_SPLIT: $write("SPLIT");
@@ -151,4 +156,4 @@ task print_frm;
end
endtask
`endif
`endif

View File

@@ -12,8 +12,8 @@ module VX_fp_fpga #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op,
input wire [`FRM_BITS-1:0] frm,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@@ -34,7 +34,7 @@ module VX_fp_fpga #(
wire [NUM_FPC-1:0] per_core_ready_in;
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] per_core_result;
wire [NUM_FPC-1:0][TAGW-1:0] per_core_tag_out;
wire [NUM_FPC-1:0] per_core_ready_out;
reg [NUM_FPC-1:0] per_core_ready_out;
wire [NUM_FPC-1:0] per_core_valid_out;
wire fpnew_has_fflags;
@@ -46,7 +46,7 @@ module VX_fp_fpga #(
always @(*) begin
core_select = 0;
fmadd_negate = 0;
case (op)
case (op_type)
`FPU_ADD: core_select = 1;
`FPU_SUB: core_select = 2;
`FPU_MUL: core_select = 3;
@@ -73,8 +73,8 @@ module VX_fp_fpga #(
.valid_in (valid_in && (core_select == 0)),
.ready_in (per_core_ready_in[0]),
.tag_in (tag_in),
.op (op),
.frm (frm),
.op_type (op_type),
.frm (op_mod),
.dataa (dataa),
.datab (datab),
.result (per_core_result[0]),
@@ -271,26 +271,34 @@ module VX_fp_fpga #(
.valid_out (per_core_valid_out[11])
);
wire [FPC_BITS-1:0] fp_index;
wire fp_valid;
VX_priority_encoder #(
.N(NUM_FPC)
) wb_select (
.data_in (per_core_valid_out),
.data_out (fp_index),
.valid_out (fp_valid)
);
reg valid_out_r;
reg has_fflags_r;
reg [`NUM_THREADS-1:0][31:0] result_r;
reg [TAGW-1:0] tag_out_r;
for (genvar i = 0; i < NUM_FPC; i++) begin
assign per_core_ready_out[i] = ready_out && (i == fp_index);
always @(*) begin
per_core_ready_out = 0;
valid_out_r = 0;
has_fflags_r = 0;
result_r = 'x;
tag_out_r = 'x;
for (integer i = 0; i < NUM_FPC; i++) begin
if (per_core_valid_out[i]) begin
per_core_ready_out[i] = 1;
valid_out_r = i;
has_fflags_r = fpnew_has_fflags && (i == 0);
result_r = per_core_result[i];
tag_out_r = per_core_tag_out[i];
break;
end
end
end
assign ready_in = (& per_core_ready_in);
assign valid_out = fp_valid;
assign tag_out = per_core_tag_out[fp_index];
assign result = per_core_result[fp_index];
assign has_fflags = fpnew_has_fflags && (fp_index == 0);
assign valid_out = valid_out_r;
assign has_fflags = has_fflags_r;
assign tag_out = tag_out_r;
assign result = result_r;
assign fflags = fpnew_fflags;
endmodule

View File

@@ -12,7 +12,7 @@ module VX_fp_noncomp #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op,
input wire [`FPU_BITS-1:0] op_type,
input wire [`FRM_BITS-1:0] frm,
input wire [LANES-1:0][31:0] dataa,
@@ -38,7 +38,7 @@ module VX_fp_noncomp #(
SIG_NAN = 32'h00000100,
QUT_NAN = 32'h00000200;
reg [`FPU_BITS-1:0] op_r;
reg [`FPU_BITS-1:0] op_type_r;
reg [`FRM_BITS-1:0] frm_r;
reg [LANES-1:0][31:0] dataa_r;
@@ -103,10 +103,10 @@ module VX_fp_noncomp #(
always @(posedge clk) begin
if (~stall) begin
op_r <= op;
frm_r <= frm;
dataa_r <= dataa;
datab_r <= datab;
op_type_r <= op_type;
frm_r <= frm;
dataa_r <= dataa;
datab_r <= datab;
end
end
@@ -144,10 +144,10 @@ module VX_fp_noncomp #(
else if (b_type[i].is_nan)
fminmax_res[i] = dataa_r[i];
else begin
case (op_r) // use LSB to distinguish MIN and MAX
`FPU_MIN: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
`FPU_MAX: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
case (frm_r) // use LSB to distinguish MIN and MAX
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
endcase
end
end
@@ -156,11 +156,11 @@ module VX_fp_noncomp #(
// Sign Injection
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (op_r)
`FPU_SGNJ: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
`FPU_SGNJN: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
`FPU_SGNJX: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
case (frm_r)
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
endcase
end
end
@@ -210,56 +210,45 @@ module VX_fp_noncomp #(
// outputs
reg tmp_valid;
reg tmp_has_fflags;
fflags_t [LANES-1:0] tmp_fflags;
reg [LANES-1:0][31:0] tmp_result;
always @(*) begin
case (op_r)
`FPU_SGNJ: tmp_has_fflags = 0;
`FPU_SGNJN: tmp_has_fflags = 0;
`FPU_SGNJX: tmp_has_fflags = 0;
`FPU_MVXW: tmp_has_fflags = 0;
`FPU_MVWX: tmp_has_fflags = 0;
`FPU_CLASS: tmp_has_fflags = 0;
default: tmp_has_fflags = 1;
endcase
end
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
tmp_valid = 1'b1;
case (op_r)
tmp_result[i] = 32'hdeadbeaf;
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
case (op_type_r)
`FPU_CLASS: begin
tmp_result[i] = fclass_mask[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
`FPU_MVXW,`FPU_MVWX: begin
tmp_result[i] = dataa[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
`FPU_MIN,`FPU_MAX: begin
tmp_result[i] = fminmax_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
end
`FPU_SGNJ,`FPU_SGNJN,`FPU_SGNJX: begin
tmp_result[i] = fsgnj_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
end
`FPU_CMP: begin
tmp_result[i] = fcmp_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = fcmp_excp[i];
end
default: begin
tmp_result[i] = 32'hdeadbeaf;
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
tmp_valid = 1'b0;
end
end
`FPU_MISC: begin
case (frm)
0,1,2: begin
tmp_result[i] = fsgnj_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
3,4: begin
tmp_result[i] = fminmax_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
end
5,6: begin
tmp_result[i] = dataa[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
endcase
end
endcase
end
end
wire tmp_has_fflags = ((op_type_r == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX
|| (op_type_r == `FPU_CMP); // CMP
VX_generic_register #(
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
) nc_reg (
@@ -267,7 +256,7 @@ module VX_fp_noncomp #(
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({tmp_valid, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
.in ({valid_in, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
.out ({valid_out, tag_out, result, has_fflags, fflags})
);

View File

@@ -17,8 +17,8 @@ module VX_fpnew #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op,
input wire [`FRM_BITS-1:0] frm,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@@ -91,7 +91,7 @@ module VX_fpnew #(
fpu_operands[0] = dataa;
fpu_operands[1] = datab;
fpu_operands[2] = datac;
case (op)
case (op_type)
`FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
@@ -110,19 +110,22 @@ module VX_fpnew #(
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
`FPU_MISC: begin
case (frm)
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
endcase
end
default:;
endcase
end

View File

@@ -10,7 +10,8 @@ interface VX_alu_req_if ();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] thread_mask;
wire [31:0] curr_PC;
wire [`ALU_BR_BITS-1:0] op;
wire [`ALU_BR_BITS-1:0] op_type;
wire is_br_op;
wire rs1_is_PC;
wire rs2_is_imm;
wire [31:0] imm;

View File

@@ -10,7 +10,7 @@ interface VX_csr_req_if ();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] thread_mask;
wire [31:0] curr_PC;
wire [`CSR_BITS-1:0] op;
wire [`CSR_BITS-1:0] op_type;
wire [`CSR_ADDR_BITS-1:0] csr_addr;
wire [31:0] csr_mask;
wire [`NR_BITS-1:0] rd;

View File

@@ -12,8 +12,8 @@ interface VX_decode_if ();
wire [31:0] curr_PC;
wire [`EX_BITS-1:0] ex_type;
wire [`OP_BITS-1:0] ex_op;
wire [`FRM_BITS-1:0] frm;
wire [`OP_BITS-1:0] op_type;
wire [`MOD_BITS-1:0] op_mod;
wire wb;
wire [`NR_BITS-1:0] rd;

View File

@@ -14,7 +14,7 @@ interface VX_fpu_req_if ();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] thread_mask;
wire [31:0] curr_PC;
wire [`FPU_BITS-1:0] op;
wire [`FPU_BITS-1:0] op_type;
wire [`FRM_BITS-1:0] frm;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;

View File

@@ -10,7 +10,7 @@ interface VX_gpu_req_if();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] thread_mask;
wire [31:0] curr_PC;
wire [`GPU_BITS-1:0] op;
wire [`GPU_BITS-1:0] op_type;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [31:0] rs2_data;
wire [`NR_BITS-1:0] rd;

View File

@@ -13,7 +13,7 @@ interface VX_issue_if ();
wire [31:0] curr_PC;
wire [`EX_BITS-1:0] ex_type;
wire [`OP_BITS-1:0] ex_op;
wire [`OP_BITS-1:0] op_type;
wire [`FRM_BITS-1:0] frm;

View File

@@ -14,7 +14,7 @@ interface VX_mul_req_if ();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] thread_mask;
wire [31:0] curr_PC;
wire [`MUL_BITS-1:0] op;
wire [`MUL_BITS-1:0] op_type;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NR_BITS-1:0] rd;

View File

@@ -3,7 +3,7 @@
#include <fstream>
#include <iomanip>
//#define ALL_TESTS
#define ALL_TESTS
int main(int argc, char **argv) {
if (argc == 1) {