fpu implementation (part1)

This commit is contained in:
Blaise Tine
2020-07-23 03:18:09 -07:00
parent 6836f397f8
commit 75e3c31b56
31 changed files with 662 additions and 159 deletions

View File

@@ -13,16 +13,19 @@ module VX_issue #(
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
VX_gpr_data_if gpr_data_if();
wire schedule_delay;
wire gpr_delay;
wire alu_busy = ~alu_req_if.ready/* && (| alu_req_if.valid)*/;
wire lsu_busy = ~lsu_req_if.ready/* && (| lsu_req_if.valid)*/;
wire csr_busy = ~csr_req_if.ready/* && (| csr_req_if.valid)*/;
wire mul_busy = ~mul_req_if.ready/* && (| mul_req_if.valid)*/;
wire gpu_busy = ~gpu_req_if.ready/* && (| gpu_req_if.valid)*/;
wire alu_busy = ~alu_req_if.ready;
wire lsu_busy = ~lsu_req_if.ready;
wire csr_busy = ~csr_req_if.ready;
wire mul_busy = ~mul_req_if.ready;
wire fpu_busy = ~mul_req_if.ready;
wire gpu_busy = ~gpu_req_if.ready;
VX_scheduler #(
.CORE_ID(CORE_ID)
@@ -31,10 +34,12 @@ module VX_issue #(
.reset (reset),
.decode_if (decode_if),
.writeback_if (writeback_if),
.gpr_busy (gpr_delay),
.alu_busy (alu_busy),
.lsu_busy (lsu_busy),
.csr_busy (csr_busy),
.mul_busy (mul_busy),
.fpu_busy (fpu_busy),
.gpu_busy (gpu_busy),
.schedule_delay (schedule_delay),
`UNUSED_PIN (is_empty)
@@ -43,16 +48,20 @@ module VX_issue #(
VX_gpr_stage #(
.CORE_ID(CORE_ID)
) gpr_stage (
.clk (clk),
.clk (clk),
.reset (reset),
.decode_if (decode_if),
.writeback_if (writeback_if),
.gpr_data_if (gpr_data_if)
.gpr_data_if (gpr_data_if),
.schedule_delay (schedule_delay),
.gpr_delay (gpr_delay)
);
VX_alu_req_if alu_req_tmp_if();
VX_lsu_req_if lsu_req_tmp_if();
VX_csr_req_if csr_req_tmp_if();
VX_mul_req_if mul_req_tmp_if();
VX_fpu_req_if fpu_req_tmp_if();
VX_gpu_req_if gpu_req_tmp_if();
VX_issue_mux issue_mux (
@@ -62,6 +71,7 @@ module VX_issue #(
.lsu_req_if (lsu_req_tmp_if),
.csr_req_if (csr_req_tmp_if),
.mul_req_if (mul_req_tmp_if),
.fpu_req_if (fpu_req_tmp_if),
.gpu_req_if (gpu_req_tmp_if)
);
@@ -69,16 +79,18 @@ module VX_issue #(
wire stall_lsu = ~lsu_req_if.ready || schedule_delay;
wire stall_csr = ~csr_req_if.ready || schedule_delay;
wire stall_mul = ~mul_req_if.ready || schedule_delay;
wire stall_fpu = ~fpu_req_if.ready || schedule_delay;
wire stall_gpu = ~gpu_req_if.ready || schedule_delay;
wire flush_alu = alu_req_if.ready && schedule_delay;
wire flush_lsu = lsu_req_if.ready && schedule_delay;
wire flush_csr = csr_req_if.ready && schedule_delay;
wire flush_mul = mul_req_if.ready && schedule_delay;
wire flush_fpu = fpu_req_if.ready && schedule_delay;
wire flush_gpu = gpu_req_if.ready && schedule_delay;
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
) alu_reg (
.clk (clk),
.reset (reset),
@@ -89,7 +101,7 @@ module VX_issue #(
);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
) lsu_reg (
.clk (clk),
.reset (reset),
@@ -100,7 +112,7 @@ module VX_issue #(
);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + `WB_BITS + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + 1 + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
) csr_reg (
.clk (clk),
.reset (reset),
@@ -110,8 +122,8 @@ module VX_issue #(
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.wb, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
);
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
) mul_reg (
.clk (clk),
.reset (reset),
@@ -121,6 +133,17 @@ module VX_issue #(
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.wb, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data})
);
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
) fpu_reg (
.clk (clk),
.reset (reset),
.stall (stall_fpu),
.flush (flush_fpu),
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
) gpu_reg (
@@ -140,6 +163,9 @@ module VX_issue #(
if ((| mul_req_tmp_if.valid) && ~stall_mul) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data);
end
if ((| fpu_req_tmp_if.valid) && ~stall_fpu) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data);
end
if ((| lsu_req_tmp_if.valid) && ~stall_lsu) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, rw=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset);
end