add fpnew

This commit is contained in:
Blaise Tine
2020-07-23 06:23:05 -04:00
46 changed files with 867 additions and 341 deletions

View File

@@ -16,12 +16,12 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
DEBUG=1
#DEBUG=1
#AFU=1
CFLAGS += -fPIC

View File

@@ -4,13 +4,16 @@ FPGA_BUILD_DIR=build_fpga
all: ase-1c
ase-1c: setup-ase-1c
sources.txt:
./gen_sources.sh
ase-1c: setup-ase-1c sources.txt
make -C $(ASE_BUILD_DIR)_1c
ase-2c: setup-ase-2c
ase-2c: setup-ase-2c sources.txt
make -C $(ASE_BUILD_DIR)_2c
ase-4c: setup-ase-4c
ase-4c: setup-ase-4c sources.txt
make -C $(ASE_BUILD_DIR)_4c
setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile
@@ -28,13 +31,13 @@ $(ASE_BUILD_DIR)_2c/Makefile:
$(ASE_BUILD_DIR)_4c/Makefile:
afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c
fpga-1c: setup-fpga-1c
fpga-1c: setup-fpga-1c sources.txt
cd $(FPGA_BUILD_DIR)_1c && qsub-synth
fpga-2c: setup-fpga-2c
fpga-2c: setup-fpga-2c sources.txt
cd $(FPGA_BUILD_DIR)_2c && qsub-synth
fpga-4c: setup-fpga-4c
fpga-4c: setup-fpga-4c sources.txt
cd $(FPGA_BUILD_DIR)_4c && qsub-synth
setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf

View File

@@ -60,8 +60,8 @@ qsub-sim
make ase
# tests
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
# modify "vsim_run.tcl" to dump VCD trace

21
hw/opae/gen_sources.sh Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl'
inc_list=""
for dir in $dir_list; do
inc_list="$inc_list -I$dir"
done
echo "inc_list=$inc_list"
{
# read design sources
for dir in $dir_list; do
echo "+incdir+$dir"
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
do
echo $file
done
done
} > sources.txt

View File

@@ -1,46 +1,34 @@
vortex_afu.json
QI:vortex_afu.qsf
#+define+SCOPE
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE
#+define+DBG_PRINT_SCOPE
+incdir+.
+incdir+../rtl
+incdir+../rtl/interfaces
+incdir+../rtl/pipe_regs
+incdir+../rtl/cache
+incdir+../rtl/libs
../rtl/VX_user_config.vh
../rtl/VX_config.vh
../rtl/VX_define.vh
../rtl/cache/VX_cache_config.vh
../rtl/cache/VX_cache.v
../rtl/cache/VX_cache_core_rsp_merge.v
../rtl/cache/VX_cache_core_req_bank_sel.v
../rtl/cache/VX_cache_dram_req_arb.v
../rtl/cache/VX_cache_dram_fill_arb.v
../rtl/cache/VX_cache_miss_resrv.v
../rtl/libs/VX_countones.v
../rtl/libs/VX_divide.v
../rtl/libs/VX_fair_arbiter.v
../rtl/libs/VX_fixed_arbiter.v
../rtl/libs/VX_generic_queue.v
../rtl/libs/VX_generic_register.v
../rtl/libs/VX_generic_stack.v
../rtl/libs/VX_index_queue.v
../rtl/libs/VX_matrix_arbiter.v
../rtl/libs/VX_mult.v
../rtl/libs/VX_priority_encoder.v
../rtl/libs/VX_rr_arbiter.v
../rtl/libs/VX_onehot_encooder.v
+incdir+../rtl/cache
../rtl/cache/VX_bank.v
../rtl/cache/VX_bank_core_req_arb.v
../rtl/cache/VX_cache.v
../rtl/cache/VX_cache_core_req_bank_sel.v
../rtl/cache/VX_cache_core_rsp_merge.v
../rtl/cache/VX_cache_dram_fill_arb.v
../rtl/cache/VX_cache_dram_req_arb.v
../rtl/cache/VX_cache_miss_resrv.v
../rtl/cache/VX_prefetcher.v
../rtl/cache/VX_snp_forwarder.v
../rtl/cache/VX_snp_rsp_arb.v
../rtl/cache/VX_tag_data_access.v
../rtl/cache/VX_tag_data_structure.v
../rtl/cache/VX_snp_forwarder.v
../rtl/cache/VX_prefetcher.v
../rtl/interfaces/VX_branch_rsp_if.v
+incdir+../rtl/interfaces
../rtl/interfaces/VX_alu_req_if.v
../rtl/interfaces/VX_branch_ctl_if.v
../rtl/interfaces/VX_cache_core_req_if.v
../rtl/interfaces/VX_cache_core_rsp_if.v
../rtl/interfaces/VX_cache_dram_req_if.v
@@ -48,65 +36,46 @@ QI:vortex_afu.qsf
../rtl/interfaces/VX_cache_snp_req_if.v
../rtl/interfaces/VX_cache_snp_rsp_if.v
../rtl/interfaces/VX_csr_req_if.v
../rtl/interfaces/VX_commit_if.v
../rtl/interfaces/VX_csr_io_req_if.v
../rtl/interfaces/VX_csr_io_rsp_if.v
../rtl/interfaces/VX_exec_unit_req_if.v
../rtl/interfaces/VX_backend_req_if.v
../rtl/interfaces/VX_gpr_read_if.v
../rtl/interfaces/VX_gpu_inst_req_if.v
../rtl/interfaces/VX_inst_meta_if.v
../rtl/interfaces/VX_jal_rsp_if.v
../rtl/interfaces/VX_decode_if.v
../rtl/interfaces/VX_gpr_data_if.v
../rtl/interfaces/VX_gpu_req_if.v
../rtl/interfaces/VX_join_if.v
../rtl/interfaces/VX_lsu_req_if.v
../rtl/interfaces/VX_warp_ctl_if.v
../rtl/interfaces/VX_wb_if.v
../rtl/interfaces/VX_wstall_if.v
../rtl/libs/VX_generic_register.v
../rtl/libs/VX_mult.v
../rtl/libs/VX_divide.v
../rtl/libs/VX_generic_stack.v
../rtl/libs/VX_priority_encoder.v
../rtl/libs/VX_generic_queue.v
../rtl/libs/VX_indexable_queue.v
../rtl/libs/VX_fair_arbiter.v
../rtl/libs/VX_fixed_arbiter.v
../rtl/libs/VX_rr_arbiter.v
../rtl/libs/VX_countones.v
../rtl/libs/VX_scope.v
../rtl/Vortex.v
../rtl/interfaces/VX_csr_io_rsp_if.v
../rtl/interfaces/VX_ifetch_req_if.v
../rtl/interfaces/VX_ifetch_rsp_if.v
../rtl/interfaces/VX_mul_req_if.v
../rtl/interfaces/VX_perf_cntrs_if.v
+incdir+../rtl
../rtl/VX_alu_unit.v
../rtl/VX_commit.v
../rtl/VX_cluster.v
../rtl/VX_core.v
../rtl/VX_mem_unit.v
../rtl/VX_pipeline.v
../rtl/VX_front_end.v
../rtl/VX_back_end.v
../rtl/VX_fetch.v
../rtl/VX_scheduler.v
../rtl/VX_exec_unit.v
../rtl/VX_warp.v
../rtl/VX_icache_stage.v
../rtl/VX_gpr_wrapper.v
../rtl/VX_gpu_inst.v
../rtl/VX_writeback.v
../rtl/VX_csr_pipe.v
../rtl/VX_csr_data.v
../rtl/VX_csr_arb.v
../rtl/VX_dcache_arb.v
../rtl/VX_decode.v
../rtl/VX_csr_io_arb.v
../rtl/VX_warp_sched.v
../rtl/VX_fetch.v
../rtl/VX_csr_unit.v
../rtl/VX_gpr_ram.v
../rtl/VX_gpr_stage.v
../rtl/VX_alu_unit.v
../rtl/VX_execute.v
../rtl/VX_gpu_unit.v
../rtl/VX_icache_stage.v
../rtl/VX_issue.v
../rtl/VX_lsu_unit.v
../rtl/VX_decode.v
../rtl/VX_inst_multiplex.v
../rtl/VX_dcache_arb.v
../rtl/VX_mem_arb.v
../rtl/VX_f_d_reg.v
../rtl/VX_i_d_reg.v
../rtl/VX_d_e_reg.v
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
../rtl/VX_mem_unit.v
../rtl/VX_pipeline.v
../rtl/VX_scheduler.v
../rtl/VX_issue_mux.v
../rtl/VX_warp_sched.v
../rtl/VX_writeback.v
../rtl/Vortex.v
../rtl/VX_mul_unit.v

View File

@@ -1,3 +1,21 @@
+define+NUM_CORES=1
#+define+SCOPE
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE
#+define+DBG_PRINT_SCOPE
vortex_afu.json
QI:vortex_afu.qsf
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
C:sources.txt

View File

@@ -1,4 +1,10 @@
+define+NUM_CORES=2
+define+L2_ENABLE=0
vortex_afu.json
QI:vortex_afu.qsf
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
C:sources.txt

View File

@@ -1,4 +1,10 @@
+define+NUM_CORES=4
+define+L2_ENABLE=0
vortex_afu.json
QI:vortex_afu.qsf
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
C:sources.txt

View File

@@ -1,4 +1,6 @@
`include "VX_define.vh"
`include "fpnew_pkg.sv"
`include "defs_div_sqrt_mvp.sv"
module VX_alu_unit #(
parameter CORE_ID = 0
@@ -13,7 +15,7 @@ module VX_alu_unit #(
VX_branch_ctl_if branch_ctl_if,
VX_commit_if alu_commit_if
);
wire [`NUM_THREADS-1:0][31:0] alu_result;
reg [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][32:0] sub_result;
wire [`NUM_THREADS-1:0][32:0] shift_result;
@@ -99,7 +101,7 @@ module VX_alu_unit #(
);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
) alu_reg (
.clk (clk),
.reset (reset),

View File

@@ -11,6 +11,7 @@ module VX_commit #(
VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if,
// outputs
@@ -20,9 +21,10 @@ module VX_commit #(
wire [`NUM_EXS-1:0] commited_mask;
assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready),
((| lsu_commit_if.valid) && lsu_commit_if.ready),
((| mul_commit_if.valid) && mul_commit_if.ready),
((| lsu_commit_if.valid) && lsu_commit_if.ready),
((| csr_commit_if.valid) && csr_commit_if.ready),
((| mul_commit_if.valid) && mul_commit_if.ready),
((| fpu_commit_if.valid) && fpu_commit_if.ready),
((| gpu_commit_if.valid) && gpu_commit_if.ready)};
wire [`NE_BITS:0] num_commits;
@@ -65,6 +67,7 @@ module VX_commit #(
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.writeback_if (writeback_if)
);
@@ -77,11 +80,14 @@ module VX_commit #(
if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
end
if ((| mul_commit_if.valid) && mul_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
end
if ((| csr_commit_if.valid) && csr_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
end
if ((| mul_commit_if.valid) && mul_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
end
if ((| fpu_commit_if.valid) && fpu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.warp_num, fpu_commit_if.curr_PC, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
end
if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);

View File

@@ -15,40 +15,41 @@ module VX_csr_arb (
VX_commit_if csr_rsp_if,
// outputs
VX_csr_io_rsp_if csr_io_rsp_if,
VX_commit_if csr_commit_if
VX_commit_if csr_commit_if,
VX_csr_io_rsp_if csr_io_rsp_if,
input wire select_io_req,
input wire select_io_rsp
);
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire core_select = ~(| csr_io_req_if.valid);
// requests
assign csr_req_if.valid = core_select ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
assign csr_req_if.warp_num = core_select ? csr_core_req_if.warp_num : 0;
assign csr_req_if.curr_PC = core_select ? csr_core_req_if.curr_PC : 0;
assign csr_req_if.csr_op = core_select ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_req_if.csr_addr = core_select ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign csr_req_if.csr_mask = core_select ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_req_if.rd = core_select ? csr_core_req_if.rd : 0;
assign csr_req_if.wb = core_select ? csr_core_req_if.wb : 0;
assign csr_req_if.is_io = ~core_select;
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
assign csr_req_if.warp_num = (~select_io_req) ? csr_core_req_if.warp_num : 0;
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
assign csr_req_if.csr_op = (~select_io_req) ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
assign csr_req_if.is_io = select_io_req;
assign csr_core_req_if.ready = csr_req_if.ready && core_select;
assign csr_io_req_if.ready = csr_req_if.ready && ~core_select;
assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req);
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
// responses
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & csr_rsp_if.is_io;
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & select_io_rsp;
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}};
assign csr_commit_if.warp_num = csr_rsp_if.warp_num;
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
assign csr_commit_if.data = csr_rsp_if.data;
assign csr_commit_if.rd = csr_rsp_if.rd;
assign csr_commit_if.wb = csr_rsp_if.wb;
assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~select_io_rsp}};
assign csr_commit_if.warp_num = csr_rsp_if.warp_num;
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
assign csr_commit_if.data = csr_rsp_if.data;
assign csr_commit_if.rd = csr_rsp_if.rd;
assign csr_commit_if.wb = csr_rsp_if.wb;
assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_commit_if.ready;
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
endmodule

View File

@@ -6,7 +6,8 @@ module VX_csr_unit #(
input wire clk,
input wire reset,
VX_perf_cntrs_if perf_cntrs_if,
VX_perf_cntrs_if perf_cntrs_if,
VX_fpu_to_csr_if fpu_to_csr_if,
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
@@ -17,15 +18,23 @@ module VX_csr_unit #(
VX_csr_req_if csr_pipe_req_if();
VX_commit_if csr_pipe_commit_if();
wire select_io_req = (| csr_io_req_if.valid);
wire select_io_rsp;
VX_csr_arb csr_arb (
.clk (clk),
.reset (reset),
.csr_core_req_if (csr_req_if),
.csr_io_req_if (csr_io_req_if),
.csr_req_if (csr_pipe_req_if),
.csr_rsp_if (csr_pipe_commit_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_commit_if (csr_commit_if)
.csr_commit_if (csr_commit_if),
.select_io_req (select_io_req),
.select_io_rsp (select_io_rsp)
);
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
@@ -68,14 +77,14 @@ module VX_csr_unit #(
wire stall = ~csr_pipe_commit_if.ready && (| csr_pipe_commit_if.valid);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + `CSR_ADDR_SIZE + 1 + 32 + 32)
) csr_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, csr_pipe_commit_if.is_io, csr_read_data_s2, csr_updated_data_s2})
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, select_io_rsp, csr_read_data_s2, csr_updated_data_s2})
);
genvar i;

View File

@@ -19,10 +19,11 @@ module VX_decode #(
wire [31:0] instr = ifetch_rsp_if.instr;
reg [`ALU_BITS-1:0] alu_op;
reg [`BR_BITS-1:0] br_op;
reg [`MUL_BITS-1:0] mul_op;
reg [`BR_BITS-1:0] br_op;
wire [`LSU_BITS-1:0] lsu_op;
reg [`CSR_BITS-1:0] csr_op;
reg [`MUL_BITS-1:0] mul_op;
reg [`FPU_BITS-1:0] fpu_op;
reg [`GPU_BITS-1:0] gpu_op;
reg [19:0] upper_imm;
@@ -37,6 +38,7 @@ module VX_decode #(
wire [`NR_BITS-1:0] rd = instr[11:7];
wire [`NR_BITS-1:0] rs1 = instr[19:15];
wire [`NR_BITS-1:0] rs2 = instr[24:20];
wire [`NR_BITS-1:0] rs3 = instr[31:27];
// opcode types
wire is_rtype = (opcode == `INST_R);
@@ -51,10 +53,9 @@ module VX_decode #(
wire is_jals = (opcode == `INST_SYS) && (func3 == 0);
wire is_csr = (opcode == `INST_SYS) && (func3 != 0);
wire is_gpu = (opcode == `INST_GPU);
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
wire is_mul = is_rtype && (func7 == 7'h1);
// upper immediate
always @(*) begin
case (opcode)
`INST_LUI: upper_imm = {func7, rs2, rs1, func3};
@@ -63,20 +64,8 @@ module VX_decode #(
endcase
end
// JAL
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
wire [11:0] jalr_imm = {func7, rs2};
wire [31:0] jalr_offset = {{20{jalr_imm[11]}}, jalr_imm};
always @(*) begin
case (opcode)
`INST_JAL: jalx_offset = jal_offset;
`INST_JALR: jalx_offset = jalr_offset;
default: jalx_offset = 32'd4;
endcase
end
// I-type immediate
wire alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
wire [11:0] alu_shift_imm = {{7{1'b0}}, rs2};
wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12;
@@ -88,9 +77,26 @@ module VX_decode #(
`INST_B: src2_imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
default: src2_imm = 32'hdeadbeef;
endcase
end
end
// JAL
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
wire [11:0] jalr_imm = {func7, rs2};
wire [31:0] jalr_offset = {{20{jalr_imm[11]}}, jalr_imm};
always @(*) begin
case (opcode)
`INST_JAL: jalx_offset = jal_offset;
`INST_JALR: jalx_offset = jalr_offset;
default: jalx_offset = 32'd4;
endcase
end
// BRANCH
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
always @(*) begin
br_op = `BR_EQ;
case (opcode)
@@ -119,6 +125,7 @@ module VX_decode #(
end
// ALU
always @(*) begin
alu_op = `ALU_OTHER;
if (is_lui) begin
@@ -140,7 +147,29 @@ module VX_decode #(
end
end
// MUL
// LSU
wire is_lsu = (is_ltype || is_stype);
assign lsu_op = {is_stype, func3};
// CSR
wire is_csr_imm = is_csr && (func3[2] == 1);
always @(*) begin
csr_op = `CSR_OTHER;
case (func3[1:0])
2'h1: csr_op = `CSR_RW;
2'h2: csr_op = `CSR_RS;
2'h3: csr_op = `CSR_RC;
default:;
endcase
end
// MUL
wire is_mul = is_rtype && (func7 == 7'h1);
always @(*) begin
mul_op = `MUL_MUL;
case (func3)
@@ -156,23 +185,50 @@ module VX_decode #(
endcase
end
// LSU
wire is_lsu = (is_ltype || is_stype);
assign lsu_op = {is_stype, func3};
// FPU
// CSR
wire is_csr_imm = is_csr && (func3[2] == 1);
always @(*) begin
csr_op = `CSR_OTHER;
case (func3[1:0])
2'h1: csr_op = `CSR_RW;
2'h2: csr_op = `CSR_RS;
2'h3: csr_op = `CSR_RC;
default:;
endcase
wire is_fl = (opcode == `INST_FL) && ((func3 == 2));
wire is_fs = (opcode == `INST_FS) && ((func3 == 2));
wire is_fci = (opcode == `INST_FCI);
wire is_fmadd = (opcode == `INST_FMADD);
wire is_fmsub = (opcode == `INST_FMSUB);
wire is_fnmsub = (opcode == `INST_FNMSUB);
wire is_fnmadd = (opcode == `INST_FNMADD);
wire is_fr4 = is_fmadd || is_fmsub || is_fnmsub || is_fnmadd;
wire is_fpu = (is_fl || is_fs || is_fci || is_fr4);
always @(*) begin
fpu_op = `FPU_OTHER;
if (is_fr4) begin
case ({is_fmadd, is_fmsub, is_fnmsub, is_fnmadd})
4'b1000: fpu_op = `FPU_MADD;
4'b0100: fpu_op = `FPU_MSUB;
4'b0010: fpu_op = `FPU_NMSUB;
4'b0001: fpu_op = `FPU_NMADD;
default:;
endcase
end
else begin
case (func7)
7'h00: fpu_op = `FPU_ADD;
7'h04: fpu_op = `FPU_SUB;
7'h08: fpu_op = `FPU_MUL;
7'h0C: fpu_op = `FPU_DIV;
7'h2C: fpu_op = `FPU_SQRT;
7'h14: fpu_op = (func3 == 3'h0) ? `FPU_MIN : `FPU_MAX;
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
7'h70: fpu_op = (func3 == 3'h0) ? `FPU_MVXW : `FPU_CLASS; // both wb to intReg
7'h78: fpu_op = `FPU_MVWX;
7'h50: fpu_op = `FPU_CMP; // wb to intReg
7'h10: fpu_op = (func3[1]) ? `FPU_SGNJX : ((func3[0]) ? `FPU_SGNJN : `FPU_SGNJ);
default:;
endcase
end
end
// GPU
always @(*) begin
gpu_op = `GPU_OTHER;
case (func3)
@@ -195,23 +251,23 @@ module VX_decode #(
assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU :
is_csr ? `EX_CSR :
is_mul ? `EX_MUL :
is_gpu ? `EX_GPU :
is_br ? `EX_ALU :
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
`EX_NOP;
is_fpu ? `EX_FPU :
is_gpu ? `EX_GPU :
is_br ? `EX_ALU :
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
`EX_NOP;
assign decode_tmp_if.instr_op = is_lsu ? `OP_BITS'(lsu_op) :
is_csr ? `OP_BITS'(csr_op) :
is_mul ? `OP_BITS'(mul_op) :
is_gpu ? `OP_BITS'(gpu_op) :
is_br ? `OP_BITS'({1'b1, br_op}) :
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
0;
is_fpu ? `OP_BITS'(fpu_op) :
is_gpu ? `OP_BITS'(gpu_op) :
is_br ? `OP_BITS'({1'b1, br_op}) :
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
0;
assign decode_tmp_if.rd = rd;
assign decode_tmp_if.rs1 = is_lui ? `NR_BITS'(0) : rs1;
assign decode_tmp_if.rs2 = rs2;
assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
@@ -220,20 +276,22 @@ module VX_decode #(
src2_imm;
assign decode_tmp_if.rs1_is_PC = is_auipc;
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
assign decode_tmp_if.use_rs1 = (decode_tmp_if.rs1 != 0)
&& (is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || ~is_csr_imm || is_gpu);
assign decode_tmp_if.use_rs2 = (decode_tmp_if.rs2 != 0)
&& (is_btype || is_stype || is_rtype || (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN)));
assign decode_tmp_if.rs1_is_fp = (is_fci && ((func7 != 7'h68) && (fpu_op != `FPU_MVWX)) || is_fr4);
assign decode_tmp_if.rs2_is_fp = is_fs || (is_fci && ((func7 != 7'h60) && (func7 != 7'h68)) || is_fr4);
assign decode_tmp_if.rs3 = rs3;
assign decode_tmp_if.use_rs3 = is_fr4;
assign decode_tmp_if.frm = func3;
assign decode_tmp_if.wb = (rd == 0) ? `WB_NO : // disable writeback to r0
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
(is_jal || is_jalr || is_jals) ? `WB_JAL :
is_ltype ? `WB_MEM :
`WB_NO;
assign decode_tmp_if.wb = (is_fpu && (is_fl || (is_fci && ((func7 != 7'h50) || (func7 != 7'h70) || (func7 != 7'h60))) || is_fr4))
|| (~is_fpu && (rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN);
assign join_if.warp_num = ifetch_rsp_if.warp_num;
@@ -241,17 +299,17 @@ module VX_decode #(
assign wstall_if.wstall = in_valid && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
wire stall = ~decode_if.ready && (| decode_if.valid);
wire stall = ~decode_if.ready && (| decode_if.valid);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS)
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + 1 + `FRM_BITS)
) decode_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb}),
.out ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb})
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.frm}),
.out ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm})
);
assign ifetch_rsp_if.ready = ~stall;
@@ -263,9 +321,7 @@ module VX_decode #(
print_ex_type(decode_tmp_if.ex_type);
$write(", op=");
print_instr_op(decode_tmp_if.ex_type, decode_tmp_if.instr_op);
$write(", wb=");
print_wb(decode_tmp_if.wb);
$write(", rd=%0d, rs1=%0d, rs2=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b\n", decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2);
$write(", wb=%b, rd=%0d, rs1=%0d, rs2=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b\n", decode_tmp_if.wb, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2);
// trap unsupported instructions
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER));

View File

@@ -76,7 +76,7 @@
`define CSR_WIDTH 12
`define DIV_LATENCY 2
`define DIV_LATENCY 21
`define MUL_LATENCY 2
@@ -390,6 +390,8 @@
///////////////////////////////////////////////////////////////////////////////
task print_ex_type;
input [`EX_BITS-1:0] ex;
begin

View File

@@ -19,12 +19,13 @@ module VX_execute #(
// perf
VX_perf_cntrs_if perf_cntrs_if,
// inputs
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if,
// outputs
@@ -34,10 +35,13 @@ module VX_execute #(
VX_commit_if lsu_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if,
output wire ebreak
);
VX_fpu_to_csr_if fpu_to_csr_if();
VX_fpu_from_csr_if fpu_from_csr_if();
VX_alu_unit #(
.CORE_ID(CORE_ID)
@@ -67,6 +71,7 @@ module VX_execute #(
.clk (clk),
.reset (reset),
.perf_cntrs_if (perf_cntrs_if),
.fpu_to_csr_if (fpu_to_csr_if),
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_req_if (csr_req_if),
@@ -82,6 +87,17 @@ module VX_execute #(
.mul_commit_if (mul_commit_if)
);
VX_fpu_unit #(
.CORE_ID(CORE_ID)
) fpu_unit (
.clk (clk),
.reset (reset),
.fpu_req_if (fpu_req_if),
.fpu_from_csr_if(fpu_from_csr_if),
.fpu_to_csr_if (fpu_to_csr_if),
.fpu_commit_if (fpu_commit_if)
);
VX_gpu_unit #(
.CORE_ID(CORE_ID)
) gpu_unit (

140
hw/rtl/VX_fpu_unit.v Normal file
View File

@@ -0,0 +1,140 @@
`include "VX_define.vh"
module VX_fpu_unit #(
parameter CORE_ID = 0
) (
// inputs
input wire clk,
input wire reset,
// inputs
VX_fpu_req_if fpu_req_if,
VX_fpu_from_csr_if fpu_from_csr_if,
// outputs
VX_commit_if fpu_commit_if,
VX_fpu_to_csr_if fpu_to_csr_if
);
localparam FOP_BITS = fpnew_pkg::OP_BITS;
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
localparam int FPU_DPATHW = `NUM_THREADS * 32;
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: FPU_DPATHW,
EnableVectors: 1,
EnableNanBox: 1,
FpFmtMask: 5'b10000,
IntFmtMask: 4'b0010
};
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs:'{'{`LATENCY_FMULADD, 0, 0, 0, 0}, // ADDMUL
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
'{default: `LATENCY_FNONCOMP}, // NONCOMP
'{default: `LATENCY_FCONV}}, // CONV
UnitTypes:'{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
wire fpu_in_ready;
wire fpu_in_valid;
wire fpu_out_ready;
wire fpu_out_valid;
wire [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
assign fpu_in_valid = (| fpu_req_if.valid);
assign fpu_operands[0] = fpu_req_if.rs1_data;
assign fpu_operands[1] = fpu_req_if.rs2_data;
assign fpu_operands[2] = fpu_req_if.rs3_data;
assign fpu_req_if.ready = fpu_in_ready;
wire [`NUM_THREADS-1:0][31:0] fpu_result;
fpnew_pkg::status_t fpu_status;
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
always @(*) begin
fpu_op = fpnew_pkg::SGNJ;
fpu_op_mod = 0;
fpu_rnd = fpu_req_if.frm;
case (fpu_req_if.fpu_op)
`FPU_ADD: fpu_op = fpnew_pkg::ADD;
`FPU_SUB: begin fpu_op = fpnew_pkg::ADD; fpu_op_mod = 1; end
`FPU_MUL: fpu_op = fpnew_pkg::MUL;
`FPU_DIV: fpu_op = fpnew_pkg::DIV;
`FPU_SQRT: fpu_op = fpnew_pkg::SQRT;
`FPU_MADD: fpu_op = fpnew_pkg::FMADD;
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: fpu_op = fpnew_pkg::FNMSUB;
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; end
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; end
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; end
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
`FPU_CVTWS: fpu_op = fpnew_pkg::F2I;
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::ADD; fpu_op_mod = 1; end
`FPU_CVTSW: fpu_op = fpnew_pkg::I2F;
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; end
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; end
`FPU_CLASS: fpu_op = fpnew_pkg::CLASSIFY;
`FPU_CMP: fpu_op = fpnew_pkg::CMP;
default:;
endcase
end
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic)
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i (fpu_operands),
.rnd_mode_i (fpu_rnd),
.op_i (fpu_op),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpu_src_fmt),
.dst_fmt_i (fpu_dst_fmt),
.int_fmt_i (fpu_int_fmt),
.vectorial_op_i (1'b1),
.tag_i (1'b0),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i (reset),
.result_o (fpu_result),
.status_o (fpu_status),
`UNUSED_PIN (tag_o),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
assign fpu_commit_if.valid = fpu_req_if.valid & {`NUM_THREADS{fpu_out_valid}};
assign fpu_commit_if.data = fpu_result;
assign fpu_commit_if.wb = fpu_req_if.wb;
assign fpu_commit_if.rd = fpu_req_if.rd;
assign fpu_out_ready = fpu_commit_if.ready;
assign fpu_to_csr_if.valid = fpu_out_valid;
assign fpu_to_csr_if.warp_num = fpu_req_if.warp_num;
assign fpu_to_csr_if.fflags_NV = fpu_status.NV;
assign fpu_to_csr_if.fflags_DZ = fpu_status.DZ;
assign fpu_to_csr_if.fflags_OF = fpu_status.OF;
assign fpu_to_csr_if.fflags_UF = fpu_status.UF;
assign fpu_to_csr_if.fflags_NX = fpu_status.NX;
endmodule

94
hw/rtl/VX_gpr_fp_ctrl.v Normal file
View File

@@ -0,0 +1,94 @@
`include "VX_define.vh"
// control module to support multi-cycle read for fp register
module VX_gpr_fp_ctrl (
input wire clk,
input wire reset,
VX_decode_if decode_if,
input wire [`NUM_THREADS-1:0][31:0] rs1_int_data,
input wire [`NUM_THREADS-1:0][31:0] rs2_int_data,
input wire [`NUM_THREADS-1:0][31:0] rs1_fp_data,
input wire [`NUM_THREADS-1:0][31:0] rs2_fp_data,
// outputs
output wire [`NR_BITS-1:0] raddr1,
output wire [`NR_BITS-1:0] raddr2,
VX_gpr_data_if gpr_data_if,
input wire schedule_delay,
output wire gpr_delay
);
// param
localparam GPR_DELAY_WID = 1;
reg [GPR_DELAY_WID-1:0] multi_cyc_state;
reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data;
reg [`NUM_THREADS-1:0][31:0] tmp_rs2_data;
reg [`NUM_THREADS-1:0][31:0] rs1_data;
reg [`NUM_THREADS-1:0][31:0] rs2_data;
reg [`NUM_THREADS-1:0][31:0] rs3_data;
always @(posedge clk) begin
if (reset) begin
multi_cyc_state <= 0;
end else if (!schedule_delay) begin
multi_cyc_state <= decode_if.use_rs3 && (multi_cyc_state == 0);
end else begin
multi_cyc_state <= 0;
end
end
// select rs1 data
always @(posedge clk) begin
if (reset) begin
tmp_rs1_data <= 0;
end else begin
if (decode_if.rs1_is_fp) begin
tmp_rs1_data <= rs1_fp_data;
end else begin
tmp_rs1_data <= decode_if.rs1_is_PC ? {`NUM_THREADS{decode_if.curr_PC}} : rs1_int_data;
end
end
end
// select rs2 data
always @(posedge clk) begin
if(reset) begin
tmp_rs2_data <= 0;
end else begin
if (decode_if.rs2_is_fp) begin
tmp_rs2_data <= rs2_fp_data;
end else begin
tmp_rs2_data <= decode_if.rs2_is_imm ? {`NUM_THREADS{decode_if.imm}} : rs2_int_data;
end
end
end
// outputs
assign gpr_delay = (multi_cyc_state == 0) && decode_if.use_rs3;
assign raddr1 = multi_cyc_state ? decode_if.rs3 : decode_if.rs1 ;
assign raddr2 = decode_if.rs2;
always @(*) begin
if (decode_if.use_rs3) begin
rs1_data = tmp_rs1_data;
rs2_data = tmp_rs2_data;
rs3_data = rs1_fp_data;
end else begin
rs1_data = decode_if.rs1_is_fp ? rs1_fp_data : rs1_int_data;
rs2_data = decode_if.rs2_is_fp ? rs2_fp_data : rs2_int_data;
rs3_data = {`NUM_THREADS{32'h8000_0000}}; // default value: -0 in single fp
end
end
assign gpr_data_if.rs1_data = rs1_data;
assign gpr_data_if.rs2_data = rs2_data;
assign gpr_data_if.rs3_data = rs3_data;
endmodule

View File

@@ -4,42 +4,76 @@ module VX_gpr_stage #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// inputs
VX_wb_if writeback_if,
VX_decode_if decode_if,
VX_decode_if decode_if,
// outputs
VX_gpr_data_if gpr_data_if
VX_gpr_data_if gpr_data_if,
input wire schedule_delay,
output wire gpr_delay
);
wire [`NUM_THREADS-1:0][31:0] rs1_data_all [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0][31:0] rs2_data_all [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0][31:0] rs1_PC;
wire [`NUM_THREADS-1:0][31:0] rs2_imm;
wire [`NUM_THREADS-1:0][31:0] rs1_int_data [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0][31:0] rs2_int_data [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0][31:0] rs1_fp_data [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0][31:0] rs2_fp_data [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0];
wire [`NR_BITS-1:0] raddr1;
wire [`NR_BITS-1:0] raddr2;
genvar i;
for (i = 0; i < `NUM_THREADS; i++) begin
assign rs1_PC[i] = decode_if.curr_PC;
assign rs2_imm[i] = decode_if.imm;
end
assign gpr_data_if.rs1_data = decode_if.rs1_is_PC ? rs1_PC : rs1_data_all[decode_if.warp_num];
assign gpr_data_if.rs2_data = decode_if.rs2_is_imm ? rs2_imm : rs2_data_all[decode_if.warp_num];
for (i = 0; i < `NUM_WARPS; i++) begin
assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}};
VX_gpr_ram gpr_ram (
// Int GPRs
VX_gpr_ram gpr_int_ram (
.clk (clk),
.we (we[i]),
.we (we[i] & {`NUM_THREADS{~writeback_if.is_fp}}),
.waddr (writeback_if.rd),
.wdata (writeback_if.data),
.rs1 (decode_if.rs1),
.rs2 (decode_if.rs2),
.rs1_data (rs1_data_all[i]),
.rs2_data (rs2_data_all[i])
.rs1 (raddr1),
.rs2 (raddr2),
.rs1_data (rs1_int_data[i]),
.rs2_data (rs2_int_data[i])
);
// FP GPRs
VX_gpr_ram gpr_fp_ram (
.clk (clk),
.we (we[i] & {`NUM_THREADS{writeback_if.is_fp}}),
.waddr (writeback_if.rd),
.wdata (writeback_if.data),
.rs1 (raddr1),
.rs2 (raddr2),
.rs1_data (rs1_fp_data[i]),
.rs2_data (rs2_fp_data[i])
);
// controller for multi-cycle read
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
.clk (clk),
.reset (reset),
//inputs
.decode_if (decode_if),
.rs1_int_data (rs1_int_data[i]),
.rs2_int_data (rs2_int_data[i]),
.rs1_fp_data (rs1_fp_data[i]),
.rs2_fp_data (rs2_fp_data[i]),
// outputs
.raddr1 (raddr1),
.raddr2 (raddr2),
.gpr_data_if (gpr_data_if),
.schedule_delay (schedule_delay),
.gpr_delay (gpr_delay)
);
end
assign writeback_if.ready = 1'b1;

View File

@@ -79,7 +79,7 @@ module VX_gpu_unit #(
assign gpu_commit_if.valid = gpu_req_if.valid;
assign gpu_commit_if.warp_num = gpu_req_if.warp_num;
assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC;
assign gpu_commit_if.wb = `WB_NO;
assign gpu_commit_if.wb = 0;
assign gpu_commit_if.rd = 0;
assign gpu_commit_if.data = 0;

View File

@@ -13,16 +13,19 @@ module VX_issue #(
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
VX_gpr_data_if gpr_data_if();
wire schedule_delay;
wire gpr_delay;
wire alu_busy = ~alu_req_if.ready/* && (| alu_req_if.valid)*/;
wire lsu_busy = ~lsu_req_if.ready/* && (| lsu_req_if.valid)*/;
wire csr_busy = ~csr_req_if.ready/* && (| csr_req_if.valid)*/;
wire mul_busy = ~mul_req_if.ready/* && (| mul_req_if.valid)*/;
wire gpu_busy = ~gpu_req_if.ready/* && (| gpu_req_if.valid)*/;
wire alu_busy = ~alu_req_if.ready;
wire lsu_busy = ~lsu_req_if.ready;
wire csr_busy = ~csr_req_if.ready;
wire mul_busy = ~mul_req_if.ready;
wire fpu_busy = ~mul_req_if.ready;
wire gpu_busy = ~gpu_req_if.ready;
VX_scheduler #(
.CORE_ID(CORE_ID)
@@ -31,10 +34,12 @@ module VX_issue #(
.reset (reset),
.decode_if (decode_if),
.writeback_if (writeback_if),
.gpr_busy (gpr_delay),
.alu_busy (alu_busy),
.lsu_busy (lsu_busy),
.csr_busy (csr_busy),
.mul_busy (mul_busy),
.fpu_busy (fpu_busy),
.gpu_busy (gpu_busy),
.schedule_delay (schedule_delay),
`UNUSED_PIN (is_empty)
@@ -43,16 +48,20 @@ module VX_issue #(
VX_gpr_stage #(
.CORE_ID(CORE_ID)
) gpr_stage (
.clk (clk),
.clk (clk),
.reset (reset),
.decode_if (decode_if),
.writeback_if (writeback_if),
.gpr_data_if (gpr_data_if)
.gpr_data_if (gpr_data_if),
.schedule_delay (schedule_delay),
.gpr_delay (gpr_delay)
);
VX_alu_req_if alu_req_tmp_if();
VX_lsu_req_if lsu_req_tmp_if();
VX_csr_req_if csr_req_tmp_if();
VX_mul_req_if mul_req_tmp_if();
VX_fpu_req_if fpu_req_tmp_if();
VX_gpu_req_if gpu_req_tmp_if();
VX_issue_mux issue_mux (
@@ -62,6 +71,7 @@ module VX_issue #(
.lsu_req_if (lsu_req_tmp_if),
.csr_req_if (csr_req_tmp_if),
.mul_req_if (mul_req_tmp_if),
.fpu_req_if (fpu_req_tmp_if),
.gpu_req_if (gpu_req_tmp_if)
);
@@ -69,16 +79,18 @@ module VX_issue #(
wire stall_lsu = ~lsu_req_if.ready || schedule_delay;
wire stall_csr = ~csr_req_if.ready || schedule_delay;
wire stall_mul = ~mul_req_if.ready || schedule_delay;
wire stall_fpu = ~fpu_req_if.ready || schedule_delay;
wire stall_gpu = ~gpu_req_if.ready || schedule_delay;
wire flush_alu = alu_req_if.ready && schedule_delay;
wire flush_lsu = lsu_req_if.ready && schedule_delay;
wire flush_csr = csr_req_if.ready && schedule_delay;
wire flush_mul = mul_req_if.ready && schedule_delay;
wire flush_fpu = fpu_req_if.ready && schedule_delay;
wire flush_gpu = gpu_req_if.ready && schedule_delay;
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
) alu_reg (
.clk (clk),
.reset (reset),
@@ -89,7 +101,7 @@ module VX_issue #(
);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
) lsu_reg (
.clk (clk),
.reset (reset),
@@ -100,7 +112,7 @@ module VX_issue #(
);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + `WB_BITS + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + 1 + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
) csr_reg (
.clk (clk),
.reset (reset),
@@ -110,8 +122,8 @@ module VX_issue #(
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.wb, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
);
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
) mul_reg (
.clk (clk),
.reset (reset),
@@ -121,6 +133,17 @@ module VX_issue #(
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.wb, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data})
);
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
) fpu_reg (
.clk (clk),
.reset (reset),
.stall (stall_fpu),
.flush (flush_fpu),
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
) gpu_reg (
@@ -140,6 +163,9 @@ module VX_issue #(
if ((| mul_req_tmp_if.valid) && ~stall_mul) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data);
end
if ((| fpu_req_tmp_if.valid) && ~stall_fpu) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data);
end
if ((| lsu_req_tmp_if.valid) && ~stall_lsu) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, rw=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset);
end

View File

@@ -10,6 +10,7 @@ module VX_issue_mux (
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
@@ -17,6 +18,7 @@ module VX_issue_mux (
wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{decode_if.ex_type == `EX_LSU}};
wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{decode_if.ex_type == `EX_CSR}};
wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{decode_if.ex_type == `EX_MUL}};
wire[`NUM_THREADS-1:0] is_fpu = {`NUM_THREADS{decode_if.ex_type == `EX_FPU}};
wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{decode_if.ex_type == `EX_GPU}};
// ALU unit
@@ -64,6 +66,18 @@ module VX_issue_mux (
assign mul_req_if.rd = decode_if.rd;
assign mul_req_if.wb = decode_if.wb;
// FPU unit
assign fpu_req_if.valid = decode_if.valid & is_fpu;
assign fpu_req_if.warp_num = decode_if.warp_num;
assign fpu_req_if.curr_PC = decode_if.curr_PC;
assign fpu_req_if.fpu_op = `FPU_OP(decode_if.instr_op);
assign fpu_req_if.rs1_data = gpr_data_if.rs1_data;
assign fpu_req_if.rs2_data = gpr_data_if.rs2_data;
assign fpu_req_if.rs3_data = gpr_data_if.rs3_data;
assign fpu_req_if.frm = decode_if.frm;
assign fpu_req_if.rd = decode_if.rd;
assign fpu_req_if.wb = decode_if.wb;
// GPU unit
assign gpu_req_if.valid = decode_if.valid & is_gpu;
assign gpu_req_if.warp_num = decode_if.warp_num;

View File

@@ -28,8 +28,9 @@ module VX_lsu_unit #(
wire [`BYTEEN_BITS-1:0] mem_byteen;
wire [`NR_BITS-1:0] use_rd;
wire [`NW_BITS-1:0] use_warp_num;
wire [`WB_BITS-1:0] use_wb;
wire use_wb;
wire [31:0] use_pc;
wire mrq_full;
genvar i;
@@ -68,7 +69,7 @@ module VX_lsu_unit #(
`IGNORE_WARNINGS_END
VX_generic_register #(
.N(`NUM_THREADS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + `NW_BITS + `WB_BITS + 32)
.N(`NUM_THREADS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + `NW_BITS + 1 + 32)
) mem_req_reg (
.clk (clk),
.reset (reset),
@@ -83,8 +84,7 @@ module VX_lsu_unit #(
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr;
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
wire [`BYTEEN_BITS-1:0] core_rsp_mem_read;
wire mrq_full;
wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready
&& (0 == use_req_rw); // only push read requests
@@ -97,7 +97,7 @@ module VX_lsu_unit #(
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd);
VX_index_queue #(
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + `WB_BITS + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 1 + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
.SIZE (`DCREQ_SIZE)
) mem_req_queue (
.clk (clk),

View File

@@ -12,7 +12,7 @@ module VX_mul_unit #(
// Outputs
VX_commit_if mul_commit_if
);
wire [`NUM_THREADS-1:0][31:0] alu_result;
reg [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][63:0] mul_result;
wire [`NUM_THREADS-1:0][31:0] div_result;
wire [`NUM_THREADS-1:0][31:0] rem_result;
@@ -36,7 +36,7 @@ module VX_mul_unit #(
.WIDTHB(33),
.WIDTHP(64),
.SIGNED(1),
.PIPELINE(`MUL_LATENCY)
.PIPELINE(`LATENCY_IMUL)
) multiplier (
.clk(clk),
.reset(reset),
@@ -52,7 +52,7 @@ module VX_mul_unit #(
.WIDTHR(32),
.NSIGNED(1),
.DSIGNED(1),
.PIPELINE(`DIV_LATENCY)
.PIPELINE(`LATENCY_IDIV)
) sdiv (
.clk(clk),
.reset(reset),
@@ -77,9 +77,11 @@ module VX_mul_unit #(
end
end
wire stall;
reg result_avail;
reg [4:0] pending_ctr;
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `DIV_LATENCY : `MUL_LATENCY;
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `LATENCY_IDIV : `LATENCY_IMUL;
always @(posedge clk) begin
if (reset) begin
@@ -104,13 +106,13 @@ module VX_mul_unit #(
wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
wire stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|| pipeline_stall;
assign stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|| pipeline_stall;
wire flush = mul_commit_if.ready && pipeline_stall;
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
) mul_reg (
.clk (clk),
.reset (reset),

View File

@@ -110,6 +110,7 @@ module VX_pipeline #(
VX_lsu_req_if lsu_req_if();
VX_csr_req_if csr_req_if();
VX_mul_req_if mul_req_if();
VX_fpu_req_if fpu_req_if();
VX_gpu_req_if gpu_req_if();
VX_wb_if writeback_if();
VX_wstall_if wstall_if();
@@ -118,6 +119,7 @@ module VX_pipeline #(
VX_commit_if lsu_commit_if();
VX_commit_if csr_commit_if();
VX_commit_if mul_commit_if();
VX_commit_if fpu_commit_if();
VX_commit_if gpu_commit_if();
VX_fetch #(
@@ -159,6 +161,7 @@ module VX_pipeline #(
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if)
);
@@ -181,6 +184,7 @@ module VX_pipeline #(
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if),
.warp_ctl_if (warp_ctl_if),
@@ -189,6 +193,7 @@ module VX_pipeline #(
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
.ebreak (ebreak)
@@ -204,6 +209,7 @@ module VX_pipeline #(
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
.writeback_if (writeback_if),

View File

@@ -8,37 +8,43 @@ module VX_scheduler #(
VX_decode_if decode_if,
VX_wb_if writeback_if,
input wire gpr_busy,
input wire alu_busy,
input wire lsu_busy,
input wire csr_busy,
input wire mul_busy,
input wire fpu_busy,
input wire gpu_busy,
output wire schedule_delay,
output wire is_empty
);
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
reg [`NUM_REGS-1:0][`NUM_THREADS-1:0] rename_table [`NUM_WARPS-1:0];
reg [`NUM_REGS-1:0] busy_table [`NUM_WARPS-1:0];
reg [CTVW-1:0] count_valid;
wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0);
wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0);
wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0);
wire rs1_rename = busy_table[decode_if.warp_num][decode_if.rs1];
wire rs2_rename = busy_table[decode_if.warp_num][decode_if.rs2];
wire rs3_rename = busy_table[decode_if.warp_num][decode_if.rs3];
wire rd_rename = busy_table[decode_if.warp_num][decode_if.rd];
wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1);
wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2);
wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0);
wire rs1_rename_qual = rs1_rename && decode_if.use_rs1;
wire rs2_rename_qual = rs2_rename && decode_if.use_rs2;
wire rs3_rename_qual = rs3_rename && decode_if.use_rs3;
wire rd_rename_qual = rd_rename && decode_if.wb;
wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual);
wire rename_valid = (rs1_rename_qual || rs2_rename_qual || rs3_rename_qual || rd_rename_qual);
wire ex_stalled = (| decode_if.valid)
&& ((alu_busy && (decode_if.ex_type == `EX_ALU))
wire ex_stalled = ((gpr_busy)
|| (alu_busy && (decode_if.ex_type == `EX_ALU))
|| (lsu_busy && (decode_if.ex_type == `EX_LSU))
|| (csr_busy && (decode_if.ex_type == `EX_CSR))
|| (mul_busy && (decode_if.ex_type == `EX_MUL))
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|| (gpu_busy && (decode_if.ex_type == `EX_GPU)));
wire stall = ex_stalled || rename_valid;
wire stall = (ex_stalled || rename_valid) && (| decode_if.valid);
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && ~stall;
@@ -49,23 +55,25 @@ module VX_scheduler #(
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
count_valid;
integer i, w;
always @(posedge clk) begin
always @(posedge clk) begin
if (reset) begin
integer i, w;
for (w = 0; w < `NUM_WARPS; w++) begin
for (i = 0; i < 32; i++) begin
rename_table[w][i] <= 0;
busy_table[w][i] <= 0;
end
end
count_valid <= 0;
end
count_valid <= 0;
end else begin
if (acquire_rd) begin
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
busy_table[decode_if.warp_num][decode_if.rd] <= 1;
end
if (release_rd) begin
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
busy_table[writeback_if.warp_num][writeback_if.rd] <= (| valid_wb_new_mask);
end
count_valid <= count_valid_next;
end
@@ -80,7 +88,7 @@ module VX_scheduler #(
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (stall) begin
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, rename=%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, rd_rename_qual, rs1_rename_qual, rs2_rename_qual, alu_busy, lsu_busy, csr_busy, mul_busy, gpu_busy);
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, rename=%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, rd_rename_qual, rs1_rename_qual, rs2_rename_qual, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
end
end
`endif

View File

@@ -18,7 +18,6 @@ module VX_warp_sched #(
);
wire update_use_wspawn;
wire update_visible_active;
wire scheduled_warp;
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];

View File

@@ -9,17 +9,19 @@ module VX_writeback #(
// inputs
VX_commit_if alu_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if csr_commit_if,
// outputs
VX_wb_if writeback_if
);
wire lsu_valid = (| lsu_commit_if.valid) && (lsu_commit_if.wb != `WB_NO);
wire mul_valid = (| mul_commit_if.valid) && (mul_commit_if.wb != `WB_NO);
wire alu_valid = (| alu_commit_if.valid) && (alu_commit_if.wb != `WB_NO);
wire csr_valid = (| csr_commit_if.valid) && (csr_commit_if.wb != `WB_NO);
wire alu_valid = (| alu_commit_if.valid) && alu_commit_if.wb;
wire lsu_valid = (| lsu_commit_if.valid) && lsu_commit_if.wb;
wire csr_valid = (| csr_commit_if.valid) && csr_commit_if.wb;
wire mul_valid = (| mul_commit_if.valid) && mul_commit_if.wb;
wire fpu_valid = (| fpu_commit_if.valid) && fpu_commit_if.wb;
VX_wb_if writeback_tmp_if();
@@ -47,23 +49,26 @@ module VX_writeback #(
csr_valid ? csr_commit_if.rd :
0;
assign writeback_tmp_if.is_fp = fpu_valid && fpu_commit_if.ready;
wire stall = ~writeback_if.ready && (| writeback_if.valid);
VX_generic_register #(
.N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32))
.N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32) + 1)
) wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data}),
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data})
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data, writeback_tmp_if.is_fp}),
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data, writeback_if.is_fp})
);
assign lsu_commit_if.ready = !stall;
assign mul_commit_if.ready = !stall && !lsu_valid;
assign alu_commit_if.ready = !stall && !lsu_valid && !mul_valid;
assign csr_commit_if.ready = !stall && !lsu_valid && !mul_valid && !alu_valid;
assign fpu_commit_if.ready = !stall && !lsu_valid;
assign mul_commit_if.ready = !stall && !lsu_valid && !fpu_valid;
assign alu_commit_if.ready = !stall && !lsu_valid && !fpu_valid && !mul_valid;
assign csr_commit_if.ready = !stall && !lsu_valid && !fpu_valid && !mul_valid && !alu_valid;
// special workaround to control RISC-V benchmarks termination on Verilator
reg [31:0] last_data_wb /* verilator public */;

View File

@@ -139,54 +139,54 @@ module Vortex (
end else begin
wire per_cluster_dram_req_valid [`NUM_CLUSTERS-1:0];
wire per_cluster_dram_req_rw [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag [`NUM_CLUSTERS-1:0];
wire l3_core_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire l3_core_req_ready;
wire per_cluster_dram_rsp_valid [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag [`NUM_CLUSTERS-1:0];
wire per_cluster_dram_rsp_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire per_cluster_snp_req_valid [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr [`NUM_CLUSTERS-1:0];
wire per_cluster_snp_req_invalidate [`NUM_CLUSTERS-1:0];
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag [`NUM_CLUSTERS-1:0];
wire per_cluster_snp_req_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
wire per_cluster_snp_rsp_valid [`NUM_CLUSTERS-1:0];
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag [`NUM_CLUSTERS-1:0];
wire per_cluster_snp_rsp_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
wire per_cluster_io_req_valid [`NUM_CLUSTERS-1:0];
wire per_cluster_io_req_rw [`NUM_CLUSTERS-1:0];
wire [3:0] per_cluster_io_req_byteen [`NUM_CLUSTERS-1:0];
wire [29:0] per_cluster_io_req_addr [`NUM_CLUSTERS-1:0];
wire [31:0] per_cluster_io_req_data [`NUM_CLUSTERS-1:0];
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag [`NUM_CLUSTERS-1:0];
wire per_cluster_io_req_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
wire [`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen;
wire [`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready;
wire per_cluster_io_rsp_valid [`NUM_CLUSTERS-1:0];
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag [`NUM_CLUSTERS-1:0];
wire [31:0] per_cluster_io_rsp_data [`NUM_CLUSTERS-1:0];
wire per_cluster_io_rsp_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
wire per_cluster_csr_io_req_valid [`NUM_CLUSTERS-1:0];
wire [11:0] per_cluster_csr_io_req_addr [`NUM_CLUSTERS-1:0];
wire per_cluster_csr_io_req_rw [`NUM_CLUSTERS-1:0];
wire [31:0] per_cluster_csr_io_req_data [`NUM_CLUSTERS-1:0];
wire per_cluster_csr_io_req_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid;
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready;
wire per_cluster_csr_io_rsp_valid [`NUM_CLUSTERS-1:0];
wire [31:0] per_cluster_csr_io_rsp_data [`NUM_CLUSTERS-1:0];
wire per_cluster_csr_io_rsp_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready;
wire per_cluster_busy [`NUM_CLUSTERS-1:0];
wire per_cluster_ebreak [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak;
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
@@ -336,27 +336,27 @@ module Vortex (
// L3 Cache ///////////////////////////////////////////////////////////
wire l3_core_req_valid [`L3NUM_REQUESTS-1:0];
wire l3_core_req_rw [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag [`L3NUM_REQUESTS-1:0];
wire [`L3NUM_REQUESTS-1:0] l3_core_req_valid;
wire [`L3NUM_REQUESTS-1:0] l3_core_req_rw;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
wire l3_core_rsp_valid [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag [`L3NUM_REQUESTS-1:0];
wire l3_core_rsp_ready;
wire [`L3NUM_REQUESTS-1:0] l3_core_rsp_valid;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
wire l3_core_rsp_ready;
wire l3_snp_fwdout_valid [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr [`NUM_CLUSTERS-1:0];
wire l3_snp_fwdout_invalidate [`NUM_CLUSTERS-1:0];
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag [`NUM_CLUSTERS-1:0];
wire l3_snp_fwdout_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr;
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag;
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready;
wire l3_snp_fwdin_valid [`NUM_CLUSTERS-1:0];
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag [`NUM_CLUSTERS-1:0];
wire l3_snp_fwdin_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
for (i = 0; i < `L3NUM_REQUESTS; i++) begin
// Core Request

Submodule hw/rtl/fp_cores/fpu_div_sqrt_mvp added at d9a27f3c4e

View File

@@ -11,7 +11,7 @@ interface VX_alu_req_if ();
wire [`ALU_BITS-1:0] alu_op;
wire [`WB_BITS-1:0] wb;
wire wb;
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] rs1_data;

View File

@@ -10,8 +10,7 @@ interface VX_commit_if ();
wire [31:0] curr_PC;
wire [`NUM_THREADS-1:0][31:0] data;
wire [`NR_BITS-1:0] rd;
wire [`WB_BITS-1:0] wb;
wire is_io;
wire wb;
wire ready;
endinterface

View File

@@ -15,7 +15,7 @@ interface VX_csr_req_if ();
wire [31:0] csr_mask;
wire [`NR_BITS-1:0] rd;
wire [`WB_BITS-1:0] wb;
wire wb;
wire is_io;
wire ready;

View File

@@ -19,12 +19,19 @@ interface VX_decode_if ();
wire [31:0] imm;
wire rs1_is_PC;
wire rs2_is_imm;
wire rs2_is_imm;
wire use_rs1;
wire use_rs2;
wire [`WB_BITS-1:0] wb;
// FP states
wire [`NR_BITS-1:0] rs3;
wire use_rs3;
wire rs1_is_fp;
wire rs2_is_fp;
wire [`FRM_BITS-1:0] frm;
wire wb;
wire ready;

View File

@@ -0,0 +1,16 @@
`ifndef VX_FPU_FROM_CSR_IF
`define VX_FPU_FROM_CSR_IF
`include "VX_define.vh"
interface VX_fpu_from_csr_if ();
`IGNORE_WARNINGS_BEGIN
wire [`NUM_WARPS-1:0][`FRM_BITS-1:0] frm;
`IGNORE_WARNINGS_END
endinterface
`endif

View File

@@ -0,0 +1,26 @@
`ifndef VX_FPU_REQ_IF
`define VX_FPU_REQ_IF
`include "VX_define.vh"
interface VX_fpu_req_if ();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
wire [31:0] curr_PC;
wire [`FPU_BITS-1:0] fpu_op;
wire [`FRM_BITS-1:0] frm;
wire wb;
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire ready;
endinterface
`endif

View File

@@ -0,0 +1,23 @@
`ifndef VX_FPU_TO_CSR_IF
`define VX_FPU_TO_CSR_IF
`include "VX_define.vh"
interface VX_fpu_to_csr_if ();
`IGNORE_WARNINGS_BEGIN
wire valid;
wire [`NW_BITS-1:0] warp_num;
wire fflags_NV;
wire fflags_DZ;
wire fflags_OF;
wire fflags_UF;
wire fflags_NX;
`IGNORE_WARNINGS_END
endinterface
`endif

View File

@@ -7,6 +7,7 @@ interface VX_gpr_data_if ();
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
endinterface

View File

@@ -12,7 +12,7 @@ interface VX_lsu_req_if ();
wire rw;
wire [`BYTEEN_BITS-1:0] byteen;
wire [`WB_BITS-1:0] wb;
wire wb;
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] store_data;

View File

@@ -11,7 +11,7 @@ interface VX_mul_req_if ();
wire [`MUL_BITS-1:0] mul_op;
wire [`WB_BITS-1:0] wb;
wire wb;
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] rs1_data;

View File

@@ -9,6 +9,7 @@ interface VX_wb_if ();
wire [`NW_BITS-1:0] warp_num;
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] data;
wire is_fp;
wire ready;
endinterface

View File

@@ -2,7 +2,7 @@
module VX_tex_mgr (
input wire clk,
input wire reset,
input wire reset
);
//--

View File

@@ -11,7 +11,7 @@ module VX_tex_unit #(
parameter MAXAMW = 2,
parameter TAGW = 16,
parameter NUMCRQS = 32,
parameter NUMCRQS = 32
) (
input wire clk,
input wire reset,

View File

@@ -11,7 +11,7 @@ double sc_time_stamp() {
Simulator::Simulator() {
// force random values for unitialized signals
Verilated::randReset(2);
Verilated::randReset(1);
// Turn off assertion before reset
Verilated::assertOn(false);
@@ -105,9 +105,8 @@ void Simulator::eval_dram_bus() {
if (!dram_rsp_active_) {
if (dequeue_index != -1) {
vortex_->dram_rsp_valid = 1;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
free(dram_rsp_vec_[dequeue_index].data);
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].block.data(), GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
dram_rsp_active_ = true;
} else {
@@ -141,9 +140,8 @@ void Simulator::eval_dram_bus() {
} else {
dram_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
dram_req.data = (uint8_t*)malloc(GLOBAL_BLOCK_SIZE);
dram_req.tag = vortex_->dram_req_tag;
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.data);
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
dram_rsp_vec_.push_back(dram_req);
}
}

View File

@@ -21,7 +21,7 @@
typedef struct {
int cycles_left;
uint8_t *data;
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
unsigned tag;
} dram_req_t;

11
hw/simulate/verilator.vlt Normal file
View File

@@ -0,0 +1,11 @@
`verilator_config
lint_off -rule BLKANDNBLK -file "../rtl/fp_cores/fpnew/*"
lint_off -rule UNOPTFLAT -file "../rtl/fp_cores/fpnew/*"
lint_off -rule WIDTH -file "../rtl/fp_cores/fpnew/*"
lint_off -rule UNUSED -file "../rtl/fp_cores/fpnew/*"
lint_off -rule LITENDIAN -file "../rtl/fp_cores/fpnew/*"
lint_off -rule IMPORTSTAR -file "../rtl/fp_cores/fpnew/*"
lint_off -rule PINCONNECTEMPTY -file "../rtl/fp_cores/fpnew/*"
//lint_off -rule CASEINCOMPLETE -file "../rtl/fp_cores/fpnew/*"

View File

@@ -12,7 +12,7 @@ echo "inc_list=$inc_list"
{
# read design sources
for dir in $dir_list; do
for file in $(find $dir -name '*.v' -o -name '*.sv' -type f)
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
do
echo "read_verilog -sv $inc_list $file"
done