This commit is contained in:
trmontgomery
2020-09-05 17:12:45 -04:00
632 changed files with 106313 additions and 159060 deletions

View File

@@ -1,4 +1,7 @@
.PHONY: build_config
build_config:
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
clean:
rm ./rtl/VX_user_config.vh ./VX_config.h

View File

@@ -4,14 +4,22 @@ FPGA_BUILD_DIR=build_fpga
all: ase-1c
ase-1c: setup-ase-1c
sources.txt:
./gen_sources.sh > sources.txt
gen_sources: sources.txt
ase-1c: gen_sources setup-ase-1c
make -C $(ASE_BUILD_DIR)_1c
cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_1c/work
ase-2c: setup-ase-2c
ase-2c: gen_sources setup-ase-2c
make -C $(ASE_BUILD_DIR)_2c
cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_2c/work
ase-4c: setup-ase-4c
ase-4c: gen_sources setup-ase-4c
make -C $(ASE_BUILD_DIR)_4c
cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_3c/work
setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile
@@ -19,23 +27,26 @@ setup-ase-2c: $(ASE_BUILD_DIR)_2c/Makefile
setup-ase-4c: $(ASE_BUILD_DIR)_4c/Makefile
$(ASE_BUILD_DIR)_1c/Makefile:
$(ASE_BUILD_DIR)_1c/Makefile: sources.txt
afu_sim_setup -s sources_1c.txt $(ASE_BUILD_DIR)_1c
$(ASE_BUILD_DIR)_2c/Makefile:
$(ASE_BUILD_DIR)_2c/Makefile: sources.txt
afu_sim_setup -s sources_2c.txt $(ASE_BUILD_DIR)_2c
$(ASE_BUILD_DIR)_4c/Makefile:
$(ASE_BUILD_DIR)_4c/Makefile: sources.txt
afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c
fpga-1c: setup-fpga-1c
fpga-1c: gen_sources setup-fpga-1c
cd $(FPGA_BUILD_DIR)_1c && qsub-synth
cp ../rtl/fp_cores/altera/*.hex $(FPGA_BUILD_DIR)_1c
fpga-2c: setup-fpga-2c
fpga-2c: gen_sources setup-fpga-2c
cd $(FPGA_BUILD_DIR)_2c && qsub-synth
cp ../rtl/fp_cores/altera/*.hex $(FPGA_BUILD_DIR)_2c
fpga-4c: setup-fpga-4c
fpga-4c: gen_sources setup-fpga-4c
cd $(FPGA_BUILD_DIR)_4c && qsub-synth
cp ../rtl/fp_cores/altera/*.hex $(FPGA_BUILD_DIR)_4c
setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf
@@ -62,20 +73,20 @@ run-ase-4c:
cd $(ASE_BUILD_DIR)_4c && make sim
clean-ase-1c:
rm -rf $(ASE_BUILD_DIR)_1c
rm -rf $(ASE_BUILD_DIR)_1c sources.txt
clean-ase-2c:
rm -rf $(ASE_BUILD_DIR)_2c
rm -rf $(ASE_BUILD_DIR)_2c sources.txt
clean-ase-4c:
rm -rf $(ASE_BUILD_DIR)_4c
rm -rf $(ASE_BUILD_DIR)_4c sources.txt
clean-fpga-1c:
rm -rf $(FPGA_BUILD_DIR)_1c
rm -rf $(FPGA_BUILD_DIR)_1c sources.txt
clean-fpga-2c:
rm -rf $(FPGA_BUILD_DIR)_2c
rm -rf $(FPGA_BUILD_DIR)_2c sources.txt
clean-fpga-4c:
rm -rf $(FPGA_BUILD_DIR)_4c
rm -rf $(FPGA_BUILD_DIR)_4c sources.txt

View File

@@ -60,8 +60,9 @@ qsub-sim
make ase
# tests
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n1 -s4 -e4
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
# modify "vsim_run.tcl" to dump VCD trace
@@ -74,6 +75,10 @@ tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt
# compress VCD trace
tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd
tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd
tar -zcvf trace.vcd.tar.gz trace.vcd
tar -zcvf run.log.tar.gz run.log
tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd
# decompress VCD trace
tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz
@@ -89,8 +94,16 @@ kill -9 <pid>
lsof +D build_ase_1c
# quick off cache synthesis
make -C pipeline > pipeline/build.log 2>&1 &
make -C cache > cache/build.log 2>&1 &
make -C core > core/build.log 2>&1 &
make -C vortex > vortex/build.log 2>&1 &
make -C top > top/build.log 2>&1 &
make -C pipeline clean && make -C pipeline > pipeline/build.log 2>&1 &
make -C cache clean && make -C cache > cache/build.log 2>&1 &
make -C core clean && make -C core > core/build.log 2>&1 &
make -C vortex clean && make -C vortex > vortex/build.log 2>&1 &
make -C top clean && make -C top > top/build.log 2>&1 &
# How to calculate the maximum operating frequency?
200 Mhz -> period = 1/200x10^6 = 5ns
if slack = +1.664 -> minimal period = 5-1.664 = 3.336 -> fmax = 1/3.336 = 300 Mhz
# build rtlsim from driver tests
make -C ../../rtlsim clean && reset && make -C ../../rtlsim

20
hw/opae/gen_sources.sh Executable file
View File

@@ -0,0 +1,20 @@
#!/bin/bash
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl ../rtl/fp_cores/fpnew/src/common_cells/include ../rtl/fp_cores ../rtl/fp_cores/altera'
exclude_list='VX_fpnew.v'
# read design sources
for dir in $dir_list; do
echo "+incdir+$dir"
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f); do
exclude=0
for fe in $exclude_list; do
if [[ $file =~ $fe ]]; then
exclude=1
fi
done
if [[ $exclude == 0 ]]; then
echo $file
fi
done
done

View File

@@ -1,112 +0,0 @@
vortex_afu.json
QI:vortex_afu.qsf
#+define+SCOPE
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE
#+define+DBG_PRINT_SCOPE
+incdir+.
+incdir+../rtl
+incdir+../rtl/interfaces
+incdir+../rtl/pipe_regs
+incdir+../rtl/cache
+incdir+../rtl/libs
../rtl/VX_user_config.vh
../rtl/VX_config.vh
../rtl/VX_define.vh
../rtl/cache/VX_cache_config.vh
../rtl/cache/VX_cache.v
../rtl/cache/VX_cache_core_rsp_merge.v
../rtl/cache/VX_cache_core_req_bank_sel.v
../rtl/cache/VX_cache_dram_req_arb.v
../rtl/cache/VX_cache_dram_fill_arb.v
../rtl/cache/VX_cache_miss_resrv.v
../rtl/cache/VX_bank.v
../rtl/cache/VX_bank_core_req_arb.v
../rtl/cache/VX_snp_rsp_arb.v
../rtl/cache/VX_tag_data_access.v
../rtl/cache/VX_tag_data_structure.v
../rtl/cache/VX_snp_forwarder.v
../rtl/cache/VX_prefetcher.v
../rtl/interfaces/VX_branch_rsp_if.v
../rtl/interfaces/VX_cache_core_req_if.v
../rtl/interfaces/VX_cache_core_rsp_if.v
../rtl/interfaces/VX_cache_dram_req_if.v
../rtl/interfaces/VX_cache_dram_rsp_if.v
../rtl/interfaces/VX_cache_snp_req_if.v
../rtl/interfaces/VX_cache_snp_rsp_if.v
../rtl/interfaces/VX_csr_req_if.v
../rtl/interfaces/VX_csr_io_req_if.v
../rtl/interfaces/VX_csr_io_rsp_if.v
../rtl/interfaces/VX_exec_unit_req_if.v
../rtl/interfaces/VX_backend_req_if.v
../rtl/interfaces/VX_gpr_read_if.v
../rtl/interfaces/VX_gpu_inst_req_if.v
../rtl/interfaces/VX_inst_meta_if.v
../rtl/interfaces/VX_jal_rsp_if.v
../rtl/interfaces/VX_join_if.v
../rtl/interfaces/VX_lsu_req_if.v
../rtl/interfaces/VX_warp_ctl_if.v
../rtl/interfaces/VX_wb_if.v
../rtl/interfaces/VX_wstall_if.v
../rtl/libs/VX_generic_register.v
../rtl/libs/VX_mult.v
../rtl/libs/VX_divide.v
../rtl/libs/VX_generic_stack.v
../rtl/libs/VX_priority_encoder.v
../rtl/libs/VX_generic_queue.v
../rtl/libs/VX_indexable_queue.v
../rtl/libs/VX_fair_arbiter.v
../rtl/libs/VX_fixed_arbiter.v
../rtl/libs/VX_rr_arbiter.v
../rtl/libs/VX_countones.v
../rtl/libs/VX_scope.v
../rtl/Vortex.v
../rtl/VX_cluster.v
../rtl/VX_core.v
../rtl/VX_mem_unit.v
../rtl/VX_pipeline.v
../rtl/VX_front_end.v
../rtl/VX_back_end.v
../rtl/VX_fetch.v
../rtl/VX_scheduler.v
../rtl/VX_exec_unit.v
../rtl/VX_warp.v
../rtl/VX_icache_stage.v
../rtl/VX_gpr_wrapper.v
../rtl/VX_gpu_inst.v
../rtl/VX_writeback.v
../rtl/VX_csr_pipe.v
../rtl/VX_csr_data.v
../rtl/VX_csr_arb.v
../rtl/VX_csr_io_arb.v
../rtl/VX_warp_sched.v
../rtl/VX_gpr_ram.v
../rtl/VX_gpr_stage.v
../rtl/VX_alu_unit.v
../rtl/VX_lsu_unit.v
../rtl/VX_decode.v
../rtl/VX_inst_multiplex.v
../rtl/VX_dcache_arb.v
../rtl/VX_mem_arb.v
../rtl/VX_f_d_reg.v
../rtl/VX_i_d_reg.v
../rtl/VX_d_e_reg.v
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv

View File

@@ -1,3 +1,25 @@
+define+NUM_CORES=1
+define+SYNTHESIS
+define+QUARTUS
+define+FPU_FAST
#+define+SCOPE
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE
#+define+DBG_CORE_REQ_INFO
#+define+DBG_PRINT_SCOPE
vortex_afu.json
QI:vortex_afu.qsf
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
C:sources.txt

View File

@@ -1,4 +1,13 @@
+define+NUM_CORES=2
+define+L2_ENABLE=0
+define+SYNTHESIS
+define+QUARTUS
+define+FPU_FAST
vortex_afu.json
QI:vortex_afu.qsf
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
C:sources.txt

View File

@@ -1,4 +1,13 @@
+define+NUM_CORES=4
+define+L2_ENABLE=0
+define+SYNTHESIS
+define+QUARTUS
+define+FPU_FAST
vortex_afu.json
QI:vortex_afu.qsf
ccip_interface_reg.sv
ccip_std_afu.sv
vortex_afu.sv
C:sources.txt

View File

@@ -1,7 +1,9 @@
# Analysis & Synthesis Assignments
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
# set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name VERILOG_MACRO FPU_FAST

View File

@@ -913,10 +913,9 @@ assign cmd_run_done = !vx_busy;
Vortex #() vortex (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
.clk (clk),
.reset (SoftReset | vx_reset),
@@ -988,32 +987,36 @@ Vortex #() vortex (
localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST});
localparam SCOPE_SR_DEPTH = 2;
`SCOPE_ASSIGN(scope_dram_req_valid, vx_dram_req_valid);
`SCOPE_ASSIGN(scope_dram_req_addr, {vx_dram_req_addr, 4'b0});
`SCOPE_ASSIGN(scope_dram_req_rw, vx_dram_req_rw);
`SCOPE_ASSIGN(scope_dram_req_byteen,vx_dram_req_byteen);
`SCOPE_ASSIGN(scope_dram_req_data, vx_dram_req_data);
`SCOPE_ASSIGN(scope_dram_req_tag, vx_dram_req_tag);
`SCOPE_ASSIGN(scope_dram_req_ready, vx_dram_req_ready);
`STATIC_ASSERT(SCOPE_DATAW == 1766, "invalid size")
`SCOPE_ASSIGN(scope_dram_rsp_valid, vx_dram_rsp_valid);
`SCOPE_ASSIGN(scope_dram_rsp_data, vx_dram_rsp_data);
`SCOPE_ASSIGN(scope_dram_rsp_tag, vx_dram_rsp_tag);
`SCOPE_ASSIGN(scope_dram_rsp_ready, vx_dram_rsp_ready);
`SCOPE_ASSIGN (scope_dram_req_valid, vx_dram_req_valid);
`SCOPE_ASSIGN (scope_dram_req_addr, {vx_dram_req_addr, 4'b0});
`SCOPE_ASSIGN (scope_dram_req_rw, vx_dram_req_rw);
`SCOPE_ASSIGN (scope_dram_req_byteen,vx_dram_req_byteen);
`SCOPE_ASSIGN (scope_dram_req_data, vx_dram_req_data);
`SCOPE_ASSIGN (scope_dram_req_tag, vx_dram_req_tag);
`SCOPE_ASSIGN (scope_dram_req_ready, vx_dram_req_ready);
`SCOPE_ASSIGN(scope_snp_req_valid, vx_snp_req_valid);
`SCOPE_ASSIGN(scope_snp_req_addr, {vx_snp_req_addr, 4'b0});
`SCOPE_ASSIGN(scope_snp_req_invalidate, vx_snp_req_invalidate);
`SCOPE_ASSIGN(scope_snp_req_tag, vx_snp_req_tag);
`SCOPE_ASSIGN(scope_snp_req_ready, vx_snp_req_ready);
`SCOPE_ASSIGN (scope_dram_rsp_valid, vx_dram_rsp_valid);
`SCOPE_ASSIGN (scope_dram_rsp_data, vx_dram_rsp_data);
`SCOPE_ASSIGN (scope_dram_rsp_tag, vx_dram_rsp_tag);
`SCOPE_ASSIGN (scope_dram_rsp_ready, vx_dram_rsp_ready);
`SCOPE_ASSIGN(scope_snp_rsp_valid, vx_snp_rsp_valid);
`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag);
`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready);
`SCOPE_ASSIGN (scope_snp_req_valid, vx_snp_req_valid);
`SCOPE_ASSIGN (scope_snp_req_addr, {vx_snp_req_addr, 4'b0});
`SCOPE_ASSIGN (scope_snp_req_invalidate, vx_snp_req_invalidate);
`SCOPE_ASSIGN (scope_snp_req_tag, vx_snp_req_tag);
`SCOPE_ASSIGN (scope_snp_req_ready, vx_snp_req_ready);
`SCOPE_ASSIGN(scope_snp_rsp_valid, vx_snp_rsp_valid);
`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag);
`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready);
`SCOPE_ASSIGN (scope_snp_rsp_valid, vx_snp_rsp_valid);
`SCOPE_ASSIGN (scope_snp_rsp_tag, vx_snp_rsp_tag);
`SCOPE_ASSIGN (scope_snp_rsp_ready, vx_snp_rsp_ready);
`SCOPE_ASSIGN (scope_snp_rsp_valid, vx_snp_rsp_valid);
`SCOPE_ASSIGN (scope_snp_rsp_tag, vx_snp_rsp_tag);
`SCOPE_ASSIGN (scope_snp_rsp_ready, vx_snp_rsp_ready);
`SCOPE_ASSIGN (scope_busy, vx_busy);
wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|| (scope_icache_rsp_valid && scope_icache_rsp_ready)
@@ -1023,10 +1026,16 @@ wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|| (scope_dram_rsp_valid && scope_dram_rsp_ready)
|| (scope_snp_req_valid && scope_snp_req_ready)
|| (scope_snp_rsp_valid && scope_snp_rsp_ready)
|| (scope_issue_valid && scope_issue_ready)
|| scope_gpr_rsp_valid
|| scope_bank_valid_st0
|| scope_bank_valid_st1
|| scope_bank_valid_st2
|| scope_bank_stall_pipe;
|| scope_bank_stall_pipe
|| scope_scoreboard_delay
|| scope_gpr_delay
|| scope_execute_delay
|| scope_busy;
wire scope_start = vx_reset;
@@ -1035,8 +1044,7 @@ wire [SCOPE_DATAW+1:0] scope_data_in_ste;
assign scope_data_in_st[0] = {`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST, scope_changed, scope_start};
assign scope_data_in_ste = scope_data_in_st[SCOPE_SR_DEPTH-1];
genvar i;
for (i = 1; i < SCOPE_SR_DEPTH; i++) begin
for (genvar i = 1; i < SCOPE_SR_DEPTH; i++) begin
VX_generic_register #(
.N (SCOPE_DATAW+2)
) scope_sr (

View File

@@ -1,132 +1,125 @@
`include "VX_define.vh"
module VX_alu_unit (
input wire clk,
input wire reset,
input wire [31:0] src_a,
input wire [31:0] src_b,
input wire src_rs2,
input wire [31:0] itype_immed,
input wire [19:0] upper_immed,
input wire [4:0] alu_op,
input wire [31:0] curr_PC,
output reg [31:0] alu_result,
output reg alu_stall
);
wire[31:0] div_result_unsigned;
wire[31:0] div_result_signed;
wire[31:0] rem_result_unsigned;
wire[31:0] rem_result_signed;
wire[63:0] mul_result;
wire[31:0] alu_in1 = src_a;
wire[31:0] alu_in2 = (src_rs2 == `RS2_IMMED) ? itype_immed : src_b;
wire[31:0] upper_immed_s = {upper_immed, {12{1'b0}}};
module VX_alu_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
reg [7:0] inst_delay;
reg [7:0] curr_inst_delay;
always @(*) begin
case (alu_op)
`ALU_DIV,
`ALU_DIVU,
`ALU_REM,
`ALU_REMU: inst_delay = `DIV_LATENCY;
`ALU_MUL,
`ALU_MULH,
`ALU_MULHSU,
`ALU_MULHU: inst_delay = `MUL_LATENCY;
default: inst_delay = 0;
endcase
// Inputs
VX_alu_req_if alu_req_if,
// Outputs
VX_branch_ctl_if branch_ctl_if,
VX_exu_to_cmt_if alu_commit_if
);
reg [`NUM_THREADS-1:0][31:0] alu_result;
reg [`NUM_THREADS-1:0][31:0] add_result;
reg [`NUM_THREADS-1:0][32:0] sub_result;
reg [`NUM_THREADS-1:0][31:0] shr_result;
reg [`NUM_THREADS-1:0][31:0] msc_result;
wire is_br_op = alu_req_if.is_br_op;
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
wire alu_signed = `ALU_SIGNED(alu_op);
wire [1:0] alu_op_class = `ALU_OP_CLASS(alu_op);
wire is_sub = (alu_op == `ALU_SUB);
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1;
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
end
end
wire inst_stalled = (curr_inst_delay != inst_delay);
always @(posedge clk) begin
if (reset) begin
curr_inst_delay <= 0;
end else begin
curr_inst_delay <= inst_stalled ? (curr_inst_delay + 1) : 0;
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
always @(*) begin
sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
end
end
assign alu_stall = inst_stalled;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
`IGNORE_WARNINGS_BEGIN
wire [32:0] shr_value = $signed(shr_in1) >>> alu_in2_imm[i][4:0];
`IGNORE_WARNINGS_END
always @(*) begin
shr_result[i] = shr_value[31:0];
end
end
always @(*) begin
case (alu_op)
`ALU_ADD: alu_result = $signed(alu_in1) + $signed(alu_in2);
`ALU_SUB: alu_result = $signed(alu_in1) - $signed(alu_in2);
`ALU_SLLA: alu_result = alu_in1 << alu_in2[4:0];
`ALU_SLT: alu_result = ($signed(alu_in1) < $signed(alu_in2)) ? 32'h1 : 32'h0;
`ALU_SLTU: alu_result = alu_in1 < alu_in2 ? 32'h1 : 32'h0;
`ALU_XOR: alu_result = alu_in1 ^ alu_in2;
`ALU_SRL: alu_result = alu_in1 >> alu_in2[4:0];
`ALU_SRA: alu_result = $signed(alu_in1) >>> alu_in2[4:0];
`ALU_OR: alu_result = alu_in1 | alu_in2;
`ALU_AND: alu_result = alu_in2 & alu_in1;
`ALU_SUBU: alu_result = (alu_in1 >= alu_in2) ? 32'h0 : 32'hffffffff;
`ALU_LUI: alu_result = upper_immed_s;
`ALU_AUIPC: alu_result = $signed(curr_PC) + $signed(upper_immed_s);
`ALU_MUL: alu_result = mul_result[31:0];
`ALU_MULH: alu_result = mul_result[63:32];
`ALU_MULHSU: alu_result = mul_result[63:32];
`ALU_MULHU: alu_result = mul_result[63:32];
`ALU_DIV: alu_result = (alu_in2 == 0) ? 32'hffffffff : div_result_signed;
`ALU_DIVU: alu_result = (alu_in2 == 0) ? 32'hffffffff : div_result_unsigned;
`ALU_REM: alu_result = (alu_in2 == 0) ? alu_in1 : rem_result_signed;
`ALU_REMU: alu_result = (alu_in2 == 0) ? alu_in1 : rem_result_unsigned;
default: alu_result = 32'h0;
endcase
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
case (alu_op)
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
//`ALU_SLL,
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
endcase
end
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
case (alu_op_class)
0: alu_result[i] = add_result[i];
1: alu_result[i] = {31'b0, sub_result[i][32]};
2: alu_result[i] = is_sub ? sub_result[i][31:0] : shr_result[i];
default: alu_result[i] = msc_result[i];
endcase
end
end
wire is_jal = is_br_op && (br_op == `BR_JAL || br_op == `BR_JALR);
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.NSIGNED(0),
.DSIGNED(0),
.PIPELINE(`DIV_LATENCY)
) udiv (
.clk(clk),
.reset(reset),
.numer(alu_in1),
.denom(alu_in2),
.quotient(div_result_unsigned),
.remainder(rem_result_unsigned)
wire [31:0] br_dest = add_result[alu_req_if.tid];
wire [32:0] cmp_result = sub_result[alu_req_if.tid];
wire [32:0] cmp_result_r;
wire is_br_op_r;
`IGNORE_WARNINGS_BEGIN
wire [`BR_BITS-1:0] br_op_r;
`IGNORE_WARNINGS_END
// output
wire stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33)
) alu_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}),
.out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r})
);
wire is_less = cmp_result_r[32];
wire is_equal = ~(| cmp_result_r[31:0]);
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.NSIGNED(1),
.DSIGNED(1),
.PIPELINE(`DIV_LATENCY)
) sdiv (
.clk(clk),
.reset(reset),
.numer(alu_in1),
.denom(alu_in2),
.quotient(div_result_signed),
.remainder(rem_result_signed)
);
wire br_neg = `BR_NEG(br_op_r);
wire br_less = `BR_LESS(br_op_r);
wire br_static = `BR_STATIC(br_op_r);
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
wire [32:0] mul_dataa = {(alu_op == `ALU_MULHU) ? 1'b0 : alu_in1[31], alu_in1};
wire [32:0] mul_datab = {(alu_op == `ALU_MULHU || alu_op == `ALU_MULHSU) ? 1'b0 : alu_in2[31], alu_in2};
assign branch_ctl_if.valid = alu_commit_if.valid && alu_commit_if.ready && is_br_op_r;
assign branch_ctl_if.wid = alu_commit_if.wid;
assign branch_ctl_if.taken = br_taken;
VX_mult #(
.WIDTHA(33),
.WIDTHB(33),
.WIDTHP(64),
.SIGNED(1),
.PIPELINE(`MUL_LATENCY)
) multiplier (
.clk(clk),
.reset(reset),
.dataa(mul_dataa),
.datab(mul_datab),
.result(mul_result)
);
// can accept new request?
assign alu_req_if.ready = ~stall_out;
endmodule

View File

@@ -1,171 +0,0 @@
`include "VX_define.vh"
module VX_back_end #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_BE_IO
input wire clk,
input wire reset,
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
input wire schedule_delay,
VX_cache_core_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
VX_backend_req_if bckE_req_if,
VX_wb_if writeback_if,
VX_warp_ctl_if warp_ctl_if,
output wire mem_delay,
output wire exec_delay,
output wire gpr_stage_delay,
output wire ebreak
);
wire no_slot_mem;
wire no_slot_exec;
// LSU input + output
VX_lsu_req_if lsu_req_if();
VX_wb_if mem_wb_if();
// Exec unit input + output
VX_exec_unit_req_if exec_unit_req_if();
VX_wb_if inst_exec_wb_if();
// GPU unit input
VX_gpu_inst_req_if gpu_inst_req_if();
// CSR unit inputs
VX_csr_req_if csr_req_if();
VX_wb_if csr_wb_if();
wire no_slot_csr;
wire stall_gpr_csr;
VX_gpr_stage gpr_stage (
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.writeback_if (writeback_if),
.bckE_req_if (bckE_req_if),
// New
.exec_unit_req_if (exec_unit_req_if),
.lsu_req_if (lsu_req_if),
.gpu_inst_req_if (gpu_inst_req_if),
.csr_req_if (csr_req_if),
.stall_gpr_csr (stall_gpr_csr),
// End new
.memory_delay (mem_delay),
.exec_delay (exec_delay),
.delay (gpr_stage_delay)
);
assign ebreak = exec_unit_req_if.is_etype && (| exec_unit_req_if.valid);
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_SIGNALS_LSU_BIND
.clk (clk),
.reset (reset),
.lsu_req_if (lsu_req_if),
.mem_wb_if (mem_wb_if),
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
.delay (mem_delay),
.no_slot_mem (no_slot_mem)
);
VX_exec_unit exec_unit (
.clk (clk),
.reset (reset),
.exec_unit_req_if(exec_unit_req_if),
.inst_exec_wb_if(inst_exec_wb_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.delay (exec_delay),
.no_slot_exec (no_slot_exec)
);
VX_gpu_inst gpu_inst (
.gpu_inst_req_if(gpu_inst_req_if),
.warp_ctl_if (warp_ctl_if)
);
VX_csr_req_if issued_csr_req_if();
VX_wb_if csr_pipe_rsp_if();
VX_csr_arb csr_arb (
.clk (clk),
.reset (reset),
.csr_pipe_stall (stall_gpr_csr),
.csr_core_req_if (csr_req_if),
.csr_io_req_if (csr_io_req_if),
.issued_csr_req_if(issued_csr_req_if),
.csr_pipe_rsp_if (csr_pipe_rsp_if),
.csr_wb_if (csr_wb_if),
.csr_io_rsp_if (csr_io_rsp_if)
);
VX_csr_pipe #(
.CORE_ID(CORE_ID)
) csr_pipe (
.clk (clk),
.reset (reset),
.no_slot_csr (no_slot_csr),
.csr_req_if (issued_csr_req_if),
.writeback_if (writeback_if),
.csr_wb_if (csr_pipe_rsp_if),
.stall_gpr_csr (stall_gpr_csr)
);
VX_writeback writeback (
.clk (clk),
.reset (reset),
.mem_wb_if (mem_wb_if),
.inst_exec_wb_if(inst_exec_wb_if),
.csr_wb_if (csr_wb_if),
.writeback_if (writeback_if),
.no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec),
.no_slot_csr (no_slot_csr)
);
`SCOPE_ASSIGN(scope_decode_valid, bckE_req_if.valid);
`SCOPE_ASSIGN(scope_decode_warp_num, bckE_req_if.warp_num);
`SCOPE_ASSIGN(scope_decode_curr_PC, bckE_req_if.curr_PC);
`SCOPE_ASSIGN(scope_decode_is_jal, bckE_req_if.is_jal);
`SCOPE_ASSIGN(scope_decode_rs1, bckE_req_if.rs1);
`SCOPE_ASSIGN(scope_decode_rs2, bckE_req_if.rs2);
`SCOPE_ASSIGN(scope_execute_valid, exec_unit_req_if.valid);
`SCOPE_ASSIGN(scope_execute_warp_num, exec_unit_req_if.warp_num);
`SCOPE_ASSIGN(scope_execute_curr_PC, exec_unit_req_if.curr_PC);
`SCOPE_ASSIGN(scope_execute_rd, exec_unit_req_if.rd);
`SCOPE_ASSIGN(scope_execute_a, exec_unit_req_if.a_reg_data);
`SCOPE_ASSIGN(scope_execute_b, exec_unit_req_if.b_reg_data);
`SCOPE_ASSIGN(scope_writeback_valid, writeback_if.valid);
`SCOPE_ASSIGN(scope_writeback_warp_num, writeback_if.warp_num);
`SCOPE_ASSIGN(scope_writeback_curr_PC, writeback_if.curr_PC);
`SCOPE_ASSIGN(scope_writeback_wb, writeback_if.wb);
`SCOPE_ASSIGN(scope_writeback_rd, writeback_if.rd);
`SCOPE_ASSIGN(scope_writeback_data, writeback_if.data);
endmodule

View File

@@ -5,10 +5,9 @@ module VX_cluster #(
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
// Clock
input wire clk,
@@ -135,18 +134,15 @@ module VX_cluster #(
wire [`NUM_CORES-1:0] per_core_busy;
wire [`NUM_CORES-1:0] per_core_ebreak;
genvar i;
for (i = 0; i < `NUM_CORES; i++) begin
for (genvar i = 0; i < `NUM_CORES; i++) begin
VX_core #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) core (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
.clk (clk),
.reset (reset),
@@ -316,7 +312,7 @@ module VX_cluster #(
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdin_tag;
wire[`NUM_CORES-1:0] l2_snp_fwdin_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign l2_core_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
assign l2_core_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
@@ -367,7 +363,6 @@ module VX_cluster #(
.NUM_BANKS (`L2NUM_BANKS),
.WORD_SIZE (`L2WORD_SIZE),
.NUM_REQUESTS (`L2NUM_REQUESTS),
.STAGE_1_CYCLES (`L2STAGE_1_CYCLES),
.CREQ_SIZE (`L2CREQ_SIZE),
.MRVQ_SIZE (`L2MRVQ_SIZE),
.DFPQ_SIZE (`L2DFPQ_SIZE),
@@ -472,7 +467,7 @@ module VX_cluster #(
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdin_tag;
wire[`NUM_CORES-1:0] arb_snp_fwdin_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign arb_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
assign arb_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];

124
hw/rtl/VX_commit.v Normal file
View File

@@ -0,0 +1,124 @@
`include "VX_define.vh"
module VX_commit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// inputs
VX_exu_to_cmt_if alu_commit_if,
VX_exu_to_cmt_if lsu_commit_if,
VX_exu_to_cmt_if mul_commit_if,
VX_exu_to_cmt_if csr_commit_if,
VX_fpu_to_cmt_if fpu_commit_if,
VX_exu_to_cmt_if gpu_commit_if,
// outputs
VX_writeback_if writeback_if,
VX_cmt_to_csr_if cmt_to_csr_if
);
// CSRs update
wire [`NUM_EXS-1:0] commited_mask;
assign commited_mask = {alu_commit_if.valid,
lsu_commit_if.valid,
csr_commit_if.valid,
mul_commit_if.valid,
fpu_commit_if.valid,
gpu_commit_if.valid};
wire [$clog2(`NUM_EXS+1)-1:0] num_commits;
VX_countones #(
.N(`NUM_EXS)
) valids_counter (
.valids(commited_mask),
.count (num_commits)
);
fflags_t fflags;
always @(*) begin
fflags = 0;
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (fpu_commit_if.tmask[i]) begin
fflags.NX |= fpu_commit_if.fflags[i].NX;
fflags.UF |= fpu_commit_if.fflags[i].UF;
fflags.OF |= fpu_commit_if.fflags[i].OF;
fflags.DZ |= fpu_commit_if.fflags[i].DZ;
fflags.NV |= fpu_commit_if.fflags[i].NV;
end
end
end
fflags_t fflags_r;
reg has_fflags_r;
reg [`NW_BITS-1:0] wid_r;
reg [$clog2(`NUM_EXS+1)-1:0] num_commits_r;
reg csr_update_r;
always @(posedge clk) begin
csr_update_r <= (| commited_mask);
fflags_r <= fflags;
has_fflags_r <= fpu_commit_if.valid && fpu_commit_if.has_fflags;
wid_r <= fpu_commit_if.wid;
num_commits_r <= (num_commits << $clog2(`NUM_THREADS));
end
assign cmt_to_csr_if.valid = csr_update_r;
assign cmt_to_csr_if.wid = wid_r;
assign cmt_to_csr_if.num_commits = num_commits_r;
assign cmt_to_csr_if.has_fflags = has_fflags_r;
assign cmt_to_csr_if.fflags = fflags_r;
// Writeback
VX_writeback #(
.CORE_ID(CORE_ID)
) writeback (
.clk (clk),
.reset (reset),
.alu_commit_if (alu_commit_if),
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
.writeback_if (writeback_if)
);
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (alu_commit_if.valid && alu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data);
end
if (lsu_commit_if.valid && lsu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.wid, lsu_commit_if.PC, lsu_commit_if.tmask, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
end
if (csr_commit_if.valid && csr_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
end
if (mul_commit_if.valid && mul_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=MUL, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.wid, mul_commit_if.PC, mul_commit_if.tmask, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
end
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
end
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);
end
end
`else
`UNUSED_VAR(fpu_commit_if.PC)
`endif
endmodule

View File

@@ -27,10 +27,6 @@
`define GLOBAL_BLOCK_SIZE 16
`endif
`ifndef NUM_CSRS
`define NUM_CSRS 1024
`endif
`ifndef STARTUP_ADDR
`define STARTUP_ADDR 32'h80000000
`endif
@@ -39,10 +35,6 @@
`define SHARED_MEM_BASE_ADDR 32'h6FFFF000
`endif
`ifndef STACK_BASE_ADDR
`define STACK_BASE_ADDR 20'h6FFFF
`endif
`ifndef IO_BUS_BASE_ADDR
`define IO_BUS_BASE_ADDR 32'hFFFFFF00
`endif
@@ -59,31 +51,121 @@
`define L3_ENABLE (`NUM_CLUSTERS > 1)
`endif
// Configuration Values =======================================================
`ifndef EXT_M_DISABLE
`define EXT_M_ENABLE
`endif
`ifndef EXT_F_DISABLE
`define EXT_F_ENABLE
`endif
// Device identification
`define VENDOR_ID 0
`define ARCHITECTURE_ID 0
`define IMPLEMENTATION_ID 0
// CSR Addresses ==============================================================
///////////////////////////////////////////////////////////////////////////////
`define CSR_VEND_ID 12'hF11
`define CSR_ARCH_ID 12'hF12
`define CSR_IMPL_ID 12'hF13
`define CSR_GTID 12'hF14
`ifndef LATENCY_IMUL
`define LATENCY_IMUL 3
`endif
`ifndef LATENCY_FNONCOMP
`define LATENCY_FNONCOMP 1
`endif
`ifndef LATENCY_FADDMUL
`define LATENCY_FADDMUL 3
`endif
`ifndef LATENCY_FMADD
`define LATENCY_FMADD 4
`endif
`ifndef LATENCY_FDIV
`define LATENCY_FDIV 15
`endif
`ifndef LATENCY_FSQRT
`define LATENCY_FSQRT 10
`endif
`ifndef LATENCY_ITOF
`define LATENCY_ITOF 7
`endif
`ifndef LATENCY_FTOI
`define LATENCY_FTOI 3
`endif
`ifndef LATENCY_FDIVSQRT
`define LATENCY_FDIVSQRT 10
`endif
`ifndef LATENCY_FCONV
`define LATENCY_FCONV 3
`endif
// CSR Addresses //////////////////////////////////////////////////////////////
`define CSR_FFLAGS 12'h001
`define CSR_FRM 12'h002
`define CSR_FCSR 12'h003
`define CSR_LTID 12'h020
`define CSR_LWID 12'h021
`define CSR_GTID 12'h022
`define CSR_GWID 12'h023
`define CSR_GCID 12'h024
`define CSR_NT 12'h025
`define CSR_NW 12'h026
`define CSR_NC 12'h027
`define CSR_CYCLE_L 12'hC00
`define CSR_SATP 12'h180
`define CSR_PMPCFG0 12'h3A0
`define CSR_PMPADDR0 12'h3B0
`define CSR_MSTATUS 12'h300
`define CSR_MISA 12'h301
`define CSR_MEDELEG 12'h302
`define CSR_MIDELEG 12'h303
`define CSR_MIE 12'h304
`define CSR_MTVEC 12'h305
`define CSR_MEPC 12'h341
`define CSR_CYCLE 12'hC00
`define CSR_CYCLE_H 12'hC80
`define CSR_INSTR_L 12'hC02
`define CSR_INSTR_H 12'hC82
`define CSR_INSTRET 12'hC02
`define CSR_INSTRET_H 12'hC82
`define CSR_MVENDORID 12'hF11
`define CSR_MARCHID 12'hF12
`define CSR_MIMPID 12'hF13
`define CSR_MHARTID 12'hF14
// Pipeline Queues ============================================================
// Size of instruction queue
`ifndef IBUF_SIZE
`define IBUF_SIZE 8
`endif
// Size of LSU Request Queue
`ifndef LSUQ_SIZE
`define LSUQ_SIZE 8
`endif
// Size of MUL Request Queue
`ifndef MULQ_SIZE
`define MULQ_SIZE 8
`endif
// Size of FPU Request Queue
`ifndef FPUQ_SIZE
`define FPUQ_SIZE 8
`endif
// Dcache Configurable Knobs ==================================================
@@ -107,11 +189,6 @@
`define DWORD_SIZE 4
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef DSTAGE_1_CYCLES
`define DSTAGE_1_CYCLES 1
`endif
// Core Request Queue Size
`ifndef DCREQ_SIZE
`define DCREQ_SIZE `NUM_WARPS
@@ -124,12 +201,12 @@
// Dram Fill Rsp Queue Size
`ifndef DDFPQ_SIZE
`define DDFPQ_SIZE 16
`define DDFPQ_SIZE 8
`endif
// Snoop Req Queue Size
`ifndef DSNRQ_SIZE
`define DSNRQ_SIZE 16
`define DSNRQ_SIZE 8
`endif
// Core Writeback Queue Size
@@ -149,7 +226,7 @@
// Prefetcher
`ifndef DPRFQ_SIZE
`define DPRFQ_SIZE 16
`define DPRFQ_SIZE 8
`endif
`ifndef DPRFQ_STRIDE
@@ -178,11 +255,6 @@
`define IWORD_SIZE 4
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef ISTAGE_1_CYCLES
`define ISTAGE_1_CYCLES 1
`endif
// Core Request Queue Size
`ifndef ICREQ_SIZE
`define ICREQ_SIZE `NUM_WARPS
@@ -195,7 +267,7 @@
// Dram Fill Rsp Queue Size
`ifndef IDFPQ_SIZE
`define IDFPQ_SIZE 16
`define IDFPQ_SIZE 8
`endif
// Core Writeback Queue Size
@@ -205,7 +277,7 @@
// Dram Writeback Queue Size
`ifndef IDWBQ_SIZE
`define IDWBQ_SIZE 16
`define IDWBQ_SIZE 8
`endif
// Dram Fill Req Queue Size
@@ -215,7 +287,7 @@
// Prefetcher
`ifndef IPRFQ_SIZE
`define IPRFQ_SIZE 16
`define IPRFQ_SIZE 8
`endif
`ifndef IPRFQ_STRIDE
@@ -244,11 +316,6 @@
`define SWORD_SIZE 4
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef SSTAGE_1_CYCLES
`define SSTAGE_1_CYCLES 1
`endif
// Core Request Queue Size
`ifndef SCREQ_SIZE
`define SCREQ_SIZE `NUM_WARPS
@@ -281,14 +348,9 @@
`define L2WORD_SIZE `L2BANK_LINE_SIZE
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef L2STAGE_1_CYCLES
`define L2STAGE_1_CYCLES 1
`endif
// Core Request Queue Size
`ifndef L2CREQ_SIZE
`define L2CREQ_SIZE 16
`define L2CREQ_SIZE 8
`endif
// Miss Reserv Queue Knob
@@ -298,12 +360,12 @@
// Dram Fill Rsp Queue Size
`ifndef L2DFPQ_SIZE
`define L2DFPQ_SIZE 16
`define L2DFPQ_SIZE 8
`endif
// Snoop Req Queue Size
`ifndef L2SNRQ_SIZE
`define L2SNRQ_SIZE 16
`define L2SNRQ_SIZE 8
`endif
// Core Writeback Queue Size
@@ -313,7 +375,7 @@
// Dram Writeback Queue Size
`ifndef L2DWBQ_SIZE
`define L2DWBQ_SIZE 16
`define L2DWBQ_SIZE 8
`endif
// Dram Fill Req Queue Size
@@ -323,7 +385,7 @@
// Prefetcher
`ifndef L2PRFQ_SIZE
`define L2PRFQ_SIZE 16
`define L2PRFQ_SIZE 8
`endif
`ifndef L2PRFQ_STRIDE
@@ -352,14 +414,9 @@
`define L3WORD_SIZE `L3BANK_LINE_SIZE
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef L3STAGE_1_CYCLES
`define L3STAGE_1_CYCLES 1
`endif
// Core Request Queue Size
`ifndef L3CREQ_SIZE
`define L3CREQ_SIZE 16
`define L3CREQ_SIZE 8
`endif
// Miss Reserv Queue Knob
@@ -369,12 +426,12 @@
// Dram Fill Rsp Queue Size
`ifndef L3DFPQ_SIZE
`define L3DFPQ_SIZE 16
`define L3DFPQ_SIZE 8
`endif
// Snoop Req Queue Size
`ifndef L3SNRQ_SIZE
`define L3SNRQ_SIZE 16
`define L3SNRQ_SIZE 8
`endif
// Core Writeback Queue Size
@@ -384,7 +441,7 @@
// Dram Writeback Queue Size
`ifndef L3DWBQ_SIZE
`define L3DWBQ_SIZE 16
`define L3DWBQ_SIZE 8
`endif
// Dram Fill Req Queue Size
@@ -394,12 +451,11 @@
// Prefetcher
`ifndef L3PRFQ_SIZE
`define L3PRFQ_SIZE 16
`define L3PRFQ_SIZE 8
`endif
`ifndef L3PRFQ_STRIDE
`define L3PRFQ_STRIDE 0
`endif
// VX_CONFIG
`endif

View File

@@ -5,10 +5,9 @@ module VX_core #(
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
// Clock
input wire clk,
@@ -166,15 +165,15 @@ module VX_core #(
VX_cache_core_req_if #(
.NUM_REQUESTS(`INUM_REQUESTS),
.WORD_SIZE(`IWORD_SIZE),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
.CORE_TAG_WIDTH(`ICORE_TAG_WIDTH),
.CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS)
) core_icache_req_if();
VX_cache_core_rsp_if #(
.NUM_REQUESTS(`INUM_REQUESTS),
.WORD_SIZE(`IWORD_SIZE),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
.CORE_TAG_WIDTH(`ICORE_TAG_WIDTH),
.CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS)
) core_icache_rsp_if();
VX_pipeline #(
@@ -182,8 +181,8 @@ module VX_core #(
) pipeline (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
.clk(clk),
.reset(reset),
@@ -250,7 +249,7 @@ module VX_core #(
assign dcache_snp_req_if.addr = snp_req_addr;
assign dcache_snp_req_if.invalidate = snp_req_invalidate;
assign dcache_snp_req_if.tag = snp_req_tag;
assign snp_req_ready = dcache_snp_req_if.ready;
assign snp_req_ready = dcache_snp_req_if.ready;
assign snp_rsp_valid = dcache_snp_rsp_if.valid;
assign snp_rsp_tag = dcache_snp_rsp_if.tag;
@@ -283,18 +282,20 @@ module VX_core #(
.icache_dram_rsp_if (icache_dram_rsp_if)
);
// select io address
// select io bus
wire is_io_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `IO_BUS_BASE_ADDR);
wire io_select = (| core_dcache_req_if.valid) ? is_io_addr : 0;
wire io_req_select = (| core_dcache_req_if.valid) ? is_io_addr : 0;
wire io_rsp_select = (| arb_io_rsp_if.valid);
VX_dcache_arb dcache_io_arb (
.req_select (io_select),
.in_core_req_if (core_dcache_req_if),
.out0_core_req_if (arb_dcache_req_if),
.out1_core_req_if (arb_io_req_if),
.in0_core_rsp_if (arb_dcache_rsp_if),
.in1_core_rsp_if (arb_io_rsp_if),
.out_core_rsp_if (core_dcache_rsp_if)
VX_dcache_arb dcache_io_arb (
.core_req_in_if (core_dcache_req_if),
.core_req_out0_if (arb_dcache_req_if),
.core_req_out1_if (arb_io_req_if),
.core_rsp_in0_if (arb_dcache_rsp_if),
.core_rsp_in1_if (arb_io_rsp_if),
.core_rsp_out_if (core_dcache_rsp_if),
.select_req (io_req_select),
.select_rsp (io_rsp_select)
);
endmodule

View File

@@ -1,51 +1,50 @@
`include "VX_define.vh"
module VX_csr_arb (
input wire clk,
input wire reset,
input wire csr_pipe_stall,
module VX_csr_arb (
// inputs
VX_csr_req_if csr_core_req_if,
VX_csr_io_req_if csr_io_req_if,
VX_csr_req_if issued_csr_req_if,
VX_wb_if csr_pipe_rsp_if,
VX_wb_if csr_wb_if,
VX_csr_io_rsp_if csr_io_rsp_if
// output
VX_csr_req_if csr_req_if,
// input
VX_exu_to_cmt_if csr_rsp_if,
// outputs
VX_exu_to_cmt_if csr_commit_if,
VX_csr_io_rsp_if csr_io_rsp_if,
input wire select_io_req,
input wire select_io_rsp
);
// requests
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
assign csr_req_if.tmask = (~select_io_req) ? csr_core_req_if.tmask : 0;
assign csr_req_if.PC = (~select_io_req) ? csr_core_req_if.PC : 0;
assign csr_req_if.op_type = (~select_io_req) ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
assign csr_req_if.is_io = select_io_req;
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire pick_core = (| csr_core_req_if.valid);
// Mux between core and io
assign issued_csr_req_if.valid = pick_core ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
assign issued_csr_req_if.is_csr = pick_core ? csr_core_req_if.is_csr : 1'b1;
assign issued_csr_req_if.alu_op = pick_core ? csr_core_req_if.alu_op : (csr_io_req_if.rw ? `ALU_CSR_RW : `ALU_CSR_RS);
assign issued_csr_req_if.csr_addr = pick_core ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
assign issued_csr_req_if.csr_immed = pick_core ? csr_core_req_if.csr_immed : 0;
assign issued_csr_req_if.csr_mask = pick_core ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign issued_csr_req_if.is_io = !pick_core;
assign issued_csr_req_if.warp_num = csr_core_req_if.warp_num;
assign issued_csr_req_if.rd = csr_core_req_if.rd;
assign issued_csr_req_if.wb = csr_core_req_if.wb;
assign csr_io_req_if.ready = !(csr_pipe_stall || pick_core);
// Core Writeback
assign csr_wb_if.valid = csr_pipe_rsp_if.valid & {`NUM_THREADS{~csr_pipe_rsp_if.is_io}};
assign csr_wb_if.data = csr_pipe_rsp_if.data;
assign csr_wb_if.warp_num = csr_pipe_rsp_if.warp_num;
assign csr_wb_if.rd = csr_pipe_rsp_if.rd;
assign csr_wb_if.wb = csr_pipe_rsp_if.wb;
assign csr_wb_if.curr_PC = csr_pipe_rsp_if.curr_PC;
assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req);
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
// CSR I/O response
assign csr_io_rsp_if.valid = csr_pipe_rsp_if.valid[0] & csr_pipe_rsp_if.is_io;
assign csr_io_rsp_if.data = csr_pipe_rsp_if.data[0];
wire x = csr_io_rsp_if.ready;
`UNUSED_VAR(x)
// responses
assign csr_io_rsp_if.valid = csr_rsp_if.valid & select_io_rsp;
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp;
assign csr_commit_if.wid = csr_rsp_if.wid;
assign csr_commit_if.tmask = csr_rsp_if.tmask;
assign csr_commit_if.PC = csr_rsp_if.PC;
assign csr_commit_if.rd = csr_rsp_if.rd;
assign csr_commit_if.wb = csr_rsp_if.wb;
assign csr_commit_if.data = csr_rsp_if.data;
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
endmodule

View File

@@ -3,62 +3,143 @@
module VX_csr_data #(
parameter CORE_ID = 0
) (
input wire clk, // Clock
input wire clk,
input wire reset,
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
output reg[31:0] read_data,
input wire write_enable,
`IGNORE_WARNINGS_BEGIN
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
input wire[`CSR_ADDR_SIZE-1:0] write_addr,
`IGNORE_WARNINGS_END
input wire[`CSR_WIDTH-1:0] write_data,
input wire[`NW_BITS-1:0] warp_num,
input wire wb_valid
VX_cmt_to_csr_if cmt_to_csr_if,
VX_csr_to_issue_if csr_to_issue_if,
input wire read_enable,
input wire[`CSR_ADDR_BITS-1:0] read_addr,
input wire[`NW_BITS-1:0] read_wid,
output wire[31:0] read_data,
input wire write_enable,
input wire[`CSR_ADDR_BITS-1:0] write_addr,
input wire[`NW_BITS-1:0] write_wid,
input wire[`CSR_WIDTH-1:0] write_data
);
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
reg [`CSR_WIDTH-1:0] csr_satp;
reg [`CSR_WIDTH-1:0] csr_mstatus;
reg [`CSR_WIDTH-1:0] csr_medeleg;
reg [`CSR_WIDTH-1:0] csr_mideleg;
reg [`CSR_WIDTH-1:0] csr_mie;
reg [`CSR_WIDTH-1:0] csr_mtvec;
reg [`CSR_WIDTH-1:0] csr_mepc;
reg [`CSR_WIDTH-1:0] csr_pmpcfg [0:0];
reg [`CSR_WIDTH-1:0] csr_pmpaddr [0:0];
reg [63:0] csr_cycle;
reg [63:0] csr_instret;
reg [`FFG_BITS-1:0] csr_fflags [`NUM_WARPS-1:0];
reg [`FRM_BITS-1:0] csr_frm [`NUM_WARPS-1:0];
reg [`FRM_BITS+`FFG_BITS-1:0] csr_fcsr [`NUM_WARPS-1:0]; // fflags + frm
reg [63:0] num_cycles, num_instrs;
reg [31:0] read_data_r;
// cast address to physical CSR range
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
assign rd_addr = $size(rd_addr)'(read_addr);
assign wr_addr = $size(wr_addr)'(write_addr);
always @(posedge clk) begin
if (cmt_to_csr_if.has_fflags) begin
csr_fflags[cmt_to_csr_if.wid] <= cmt_to_csr_if.fflags;
csr_fcsr[cmt_to_csr_if.wid][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
end
if (write_enable) begin
case (write_addr)
`CSR_FFLAGS: begin
csr_fcsr[write_wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
csr_fflags[write_wid] <= write_data[`FFG_BITS-1:0];
end
`CSR_FRM: begin
csr_fcsr[write_wid][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
csr_frm[write_wid] <= write_data[`FRM_BITS-1:0];
end
`CSR_FCSR: begin
csr_fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
csr_frm[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
csr_fflags[write_wid] <= write_data[`FFG_BITS-1:0];
end
`CSR_SATP: csr_satp <= write_data;
`CSR_MSTATUS: csr_mstatus <= write_data;
`CSR_MEDELEG: csr_medeleg <= write_data;
`CSR_MIDELEG: csr_mideleg <= write_data;
`CSR_MIE: csr_mie <= write_data;
`CSR_MTVEC: csr_mtvec <= write_data;
`CSR_MEPC: csr_mepc <= write_data;
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
default: begin
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
end
endcase
end
end
always @(posedge clk) begin
if (reset) begin
num_cycles <= 0;
num_instrs <= 0;
csr_cycle <= 0;
csr_instret <= 0;
end else begin
if (write_enable) begin
csr_table[wr_addr] <= write_data;
end
num_cycles <= num_cycles + 1;
if (wb_valid) begin
num_instrs <= num_instrs + 1;
csr_cycle <= csr_cycle + 1;
if (cmt_to_csr_if.valid) begin
csr_instret <= csr_instret + 64'(cmt_to_csr_if.num_commits);
end
end
end
always @(*) begin
read_data_r = 'x;
case (read_addr)
`CSR_LWID : read_data = 32'(warp_num);
`CSR_GTID ,
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
`CSR_GCID : read_data = CORE_ID;
`CSR_NT : read_data = `NUM_THREADS;
`CSR_NW : read_data = `NUM_WARPS;
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
`CSR_CYCLE_L : read_data = num_cycles[31:0];
`CSR_CYCLE_H : read_data = num_cycles[63:32];
`CSR_INSTR_L : read_data = num_instrs[31:0];
`CSR_INSTR_H : read_data = num_instrs[63:32];
`CSR_VEND_ID : read_data = `VENDOR_ID;
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;
default : read_data = 32'(csr_table[rd_addr]);
endcase
end
`CSR_FFLAGS : read_data_r = 32'(csr_fflags[read_wid]);
`CSR_FRM : read_data_r = 32'(csr_frm[read_wid]);
`CSR_FCSR : read_data_r = 32'(csr_fcsr[read_wid]);
endmodule
`CSR_LWID : read_data_r = 32'(read_wid);
`CSR_LTID ,
`CSR_GTID ,
`CSR_MHARTID ,
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
`CSR_GCID : read_data_r = CORE_ID;
`CSR_NT : read_data_r = `NUM_THREADS;
`CSR_NW : read_data_r = `NUM_WARPS;
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
`CSR_SATP : read_data_r = 32'(csr_satp);
`CSR_MSTATUS : read_data_r = 32'(csr_mstatus);
`CSR_MISA : read_data_r = `ISA_CODE;
`CSR_MEDELEG : read_data_r = 32'(csr_medeleg);
`CSR_MIDELEG : read_data_r = 32'(csr_mideleg);
`CSR_MIE : read_data_r = 32'(csr_mie);
`CSR_MTVEC : read_data_r = 32'(csr_mtvec);
`CSR_MEPC : read_data_r = 32'(csr_mepc);
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
`CSR_PMPADDR0: read_data_r = 32'(csr_pmpaddr[0]);
`CSR_CYCLE : read_data_r = csr_cycle[31:0];
`CSR_CYCLE_H : read_data_r = csr_cycle[63:32];
`CSR_INSTRET : read_data_r = csr_instret[31:0];
`CSR_INSTRET_H:read_data_r = csr_instret[63:32];
`CSR_MVENDORID:read_data_r = `VENDOR_ID;
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
default: begin
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
end
endcase
end
assign read_data = read_data_r;
assign csr_to_issue_if.frm = csr_frm[csr_to_issue_if.wid];
endmodule

View File

@@ -2,7 +2,7 @@
module VX_csr_io_arb #(
parameter NUM_REQUESTS = 1,
parameter REQS_BITS = `CLOG2(NUM_REQUESTS)
parameter REQS_BITS = `LOG2UP(NUM_REQUESTS)
) (
input wire clk,
input wire reset,
@@ -37,6 +37,7 @@ module VX_csr_io_arb #(
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (request_id)
assign out_csr_io_req_valid = in_csr_io_req_valid;
assign out_csr_io_req_rw = in_csr_io_req_rw;
@@ -50,9 +51,7 @@ module VX_csr_io_arb #(
end else begin
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign out_csr_io_req_valid[i] = in_csr_io_req_valid && (request_id == `REQS_BITS'(i));
assign out_csr_io_req_rw[i] = in_csr_io_req_rw;
assign out_csr_io_req_addr[i] = in_csr_io_req_addr;
@@ -77,7 +76,7 @@ module VX_csr_io_arb #(
assign out_csr_io_rsp_valid = in_csr_io_rsp_valid [bus_rsp_sel];
assign out_csr_io_rsp_data = in_csr_io_rsp_data [bus_rsp_sel];
for (i = 0; i < NUM_REQUESTS; i++) begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign in_csr_io_rsp_ready[i] = out_csr_io_rsp_ready && (bus_rsp_sel == `REQS_BITS'(i));
end

View File

@@ -1,81 +0,0 @@
`include "VX_define.vh"
module VX_csr_pipe #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire no_slot_csr,
VX_csr_req_if csr_req_if,
VX_wb_if writeback_if,
VX_wb_if csr_wb_if,
output wire stall_gpr_csr
);
wire[`NUM_THREADS-1:0] valid_s2;
wire[`NW_BITS-1:0] warp_num_s2;
wire[4:0] rd_s2;
wire[1:0] wb_s2;
wire is_csr_s2;
wire[`CSR_ADDR_SIZE-1:0] csr_addr_s2;
wire[31:0] csr_read_data_s2;
wire[31:0] csr_updated_data_s2;
wire[31:0] csr_read_data_unqual;
wire[31:0] csr_read_data;
VX_csr_data #(
.CORE_ID(CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),
.read_addr (csr_req_if.csr_addr),
.read_data (csr_read_data_unqual),
.write_enable (is_csr_s2),
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_addr (csr_addr_s2),
.warp_num (csr_req_if.warp_num),
.wb_valid (| writeback_if.valid)
);
wire car_hazard = (csr_addr_s2 == csr_req_if.csr_addr) & (warp_num_s2 == csr_req_if.warp_num) & |(valid_s2) & is_csr_s2;
assign csr_read_data = car_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
reg [31:0] csr_updated_data;
always @(*) begin
case (csr_req_if.alu_op)
`ALU_CSR_RW: csr_updated_data = csr_req_if.csr_mask;
`ALU_CSR_RS: csr_updated_data = csr_read_data | csr_req_if.csr_mask;
`ALU_CSR_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_req_if.csr_mask);
default: csr_updated_data = 32'hdeadbeef;
endcase
end
VX_generic_register #(
.N(32 + 32 + 12 + 1 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)
) csr_reg_s2 (
.clk (clk),
.reset(reset),
.stall(no_slot_csr),
.flush(1'b0),
.in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_addr, csr_req_if.is_io, csr_read_data , csr_updated_data }),
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_addr_s2 , csr_wb_if.is_io , csr_read_data_s2, csr_updated_data_s2})
);
assign csr_wb_if.valid = valid_s2;
assign csr_wb_if.warp_num = warp_num_s2;
assign csr_wb_if.rd = rd_s2;
assign csr_wb_if.wb = wb_s2;
genvar i;
for (i = 0; i < `NUM_THREADS; i++) begin
assign csr_wb_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
(csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
csr_read_data_s2;
end
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && (| csr_req_if.valid);
endmodule

112
hw/rtl/VX_csr_unit.v Normal file
View File

@@ -0,0 +1,112 @@
`include "VX_define.vh"
module VX_csr_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
VX_cmt_to_csr_if cmt_to_csr_if,
VX_csr_to_issue_if csr_to_issue_if,
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
VX_csr_req_if csr_req_if,
VX_exu_to_cmt_if csr_commit_if
);
VX_csr_req_if csr_pipe_req_if();
VX_exu_to_cmt_if csr_pipe_rsp_if();
wire select_io_req = csr_io_req_if.valid;
wire select_io_rsp;
VX_csr_arb csr_arb (
.csr_core_req_if (csr_req_if),
.csr_io_req_if (csr_io_req_if),
.csr_req_if (csr_pipe_req_if),
.csr_rsp_if (csr_pipe_rsp_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_commit_if (csr_commit_if),
.select_io_req (select_io_req),
.select_io_rsp (select_io_rsp)
);
wire csr_we_s1;
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
wire [31:0] csr_read_data, csr_read_data_s1;
wire [31:0] csr_updated_data_s1;
VX_csr_data #(
.CORE_ID(CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),
.cmt_to_csr_if (cmt_to_csr_if),
.csr_to_issue_if(csr_to_issue_if),
.read_enable (csr_pipe_req_if.valid),
.read_addr (csr_pipe_req_if.csr_addr),
.read_wid (csr_pipe_req_if.wid),
.read_data (csr_read_data),
.write_enable (csr_we_s1),
.write_addr (csr_addr_s1),
.write_wid (csr_pipe_rsp_if.wid),
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0])
);
wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr)
&& (csr_pipe_rsp_if.wid == csr_pipe_req_if.wid)
&& csr_pipe_rsp_if.valid;
wire [31:0] csr_read_data_qual = csr_hazard ? csr_updated_data_s1 : csr_read_data;
reg [31:0] csr_updated_data;
reg csr_we_s0_unqual;
always @(*) begin
csr_we_s0_unqual = 0;
case (csr_pipe_req_if.op_type)
`CSR_RW: begin
csr_updated_data = csr_pipe_req_if.csr_mask;
csr_we_s0_unqual = 1;
end
`CSR_RS: begin
csr_updated_data = csr_read_data_qual | csr_pipe_req_if.csr_mask;
csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0);
end
`CSR_RC: begin
csr_updated_data = csr_read_data_qual & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask);
csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0);
end
default: csr_updated_data = 32'hdeadbeef;
endcase
end
wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid;
wire stall = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
) csr_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
.out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
);
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign csr_pipe_rsp_if.data[i] = (csr_addr_s1 == `CSR_LTID) ? i :
(csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
csr_read_data_s1;
end
// can accept new request?
assign csr_pipe_req_if.ready = ~stall;
endmodule

View File

@@ -1,48 +1,50 @@
`include "VX_define.vh"
module VX_dcache_arb (
input wire req_select,
// input request
VX_cache_core_req_if in_core_req_if,
VX_cache_core_req_if core_req_in_if,
// output 0 request
VX_cache_core_req_if out0_core_req_if,
VX_cache_core_req_if core_req_out0_if,
// output 1 request
VX_cache_core_req_if out1_core_req_if,
VX_cache_core_req_if core_req_out1_if,
// input 0 response
VX_cache_core_rsp_if in0_core_rsp_if,
VX_cache_core_rsp_if core_rsp_in0_if,
// input 1 response
VX_cache_core_rsp_if in1_core_rsp_if,
VX_cache_core_rsp_if core_rsp_in1_if,
// output response
VX_cache_core_rsp_if out_core_rsp_if
VX_cache_core_rsp_if core_rsp_out_if,
// bus select
input wire select_req,
input wire select_rsp
);
assign out0_core_req_if.valid = in_core_req_if.valid & {`NUM_THREADS{~req_select}};
assign out0_core_req_if.rw = in_core_req_if.rw;
assign out0_core_req_if.byteen = in_core_req_if.byteen;
assign out0_core_req_if.addr = in_core_req_if.addr;
assign out0_core_req_if.data = in_core_req_if.data;
assign out0_core_req_if.tag = in_core_req_if.tag;
// select request
assign core_req_out0_if.valid = core_req_in_if.valid & {`NUM_THREADS{~select_req}};
assign core_req_out0_if.rw = core_req_in_if.rw;
assign core_req_out0_if.byteen = core_req_in_if.byteen;
assign core_req_out0_if.addr = core_req_in_if.addr;
assign core_req_out0_if.data = core_req_in_if.data;
assign core_req_out0_if.tag = core_req_in_if.tag;
assign out1_core_req_if.valid = in_core_req_if.valid & {`NUM_THREADS{req_select}};
assign out1_core_req_if.rw = in_core_req_if.rw;
assign out1_core_req_if.byteen = in_core_req_if.byteen;
assign out1_core_req_if.addr = in_core_req_if.addr;
assign out1_core_req_if.data = in_core_req_if.data;
assign out1_core_req_if.tag = in_core_req_if.tag;
assign core_req_out1_if.valid = core_req_in_if.valid & {`NUM_THREADS{select_req}};
assign core_req_out1_if.rw = core_req_in_if.rw;
assign core_req_out1_if.byteen = core_req_in_if.byteen;
assign core_req_out1_if.addr = core_req_in_if.addr;
assign core_req_out1_if.data = core_req_in_if.data;
assign core_req_out1_if.tag = core_req_in_if.tag;
assign in_core_req_if.ready = req_select ? out1_core_req_if.ready : out0_core_req_if.ready;
assign core_req_in_if.ready = select_req ? core_req_out1_if.ready : core_req_out0_if.ready;
wire rsp_select0 = (| in0_core_rsp_if.valid);
assign out_core_rsp_if.valid = rsp_select0 ? in0_core_rsp_if.valid : in1_core_rsp_if.valid;
assign out_core_rsp_if.data = rsp_select0 ? in0_core_rsp_if.data : in1_core_rsp_if.data;
assign out_core_rsp_if.tag = rsp_select0 ? in0_core_rsp_if.tag : in1_core_rsp_if.tag;
assign in0_core_rsp_if.ready = out_core_rsp_if.ready && rsp_select0;
assign in1_core_rsp_if.ready = out_core_rsp_if.ready && !rsp_select0;
// select response
assign core_rsp_out_if.valid = select_rsp ? core_rsp_in1_if.valid : core_rsp_in0_if.valid;
assign core_rsp_out_if.data = select_rsp ? core_rsp_in1_if.data : core_rsp_in0_if.data;
assign core_rsp_out_if.tag = select_rsp ? core_rsp_in1_if.tag : core_rsp_in0_if.tag;
assign core_rsp_in0_if.ready = core_rsp_out_if.ready && ~select_rsp;
assign core_rsp_in1_if.ready = core_rsp_out_if.ready && select_rsp;
endmodule

View File

@@ -1,321 +1,401 @@
`include "VX_define.vh"
`include "VX_print_instr.vh"
module VX_decode(
// Fetch Inputs
VX_inst_meta_if fd_inst_meta_de,
module VX_decode #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// Outputs
VX_backend_req_if frE_to_bckE_req_if,
VX_wstall_if wstall_if,
VX_join_if join_if
// inputs
VX_ifetch_rsp_if ifetch_rsp_if,
// outputs
VX_decode_if decode_if,
VX_wstall_if wstall_if,
VX_join_if join_if
);
wire in_valid = (| fd_inst_meta_de.valid);
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.curr_PC;
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
wire [31:0] instr = ifetch_rsp_if.instr;
wire[6:0] curr_opcode;
reg [`ALU_BITS-1:0] alu_op;
reg [`BR_BITS-1:0] br_op;
reg [`LSU_BITS-1:0] lsu_op;
reg [`CSR_BITS-1:0] csr_op;
reg [`MUL_BITS-1:0] mul_op;
reg [`FPU_BITS-1:0] fpu_op;
reg [`GPU_BITS-1:0] gpu_op;
wire is_itype;
wire is_rtype;
wire is_stype;
wire is_btype;
wire is_linst;
wire is_jal;
wire is_jalr;
wire is_lui;
wire is_auipc;
wire is_csr;
wire is_csr_immed;
wire is_etype;
reg [19:0] upper_imm;
reg [31:0] jalx_offset;
reg [31:0] src2_imm;
wire is_gpgpu;
wire is_wspawn;
wire is_tmc;
wire is_split;
wire is_join;
wire is_barrier;
wire [6:0] opcode = instr[6:0];
wire [2:0] func3 = instr[14:12];
wire [6:0] func7 = instr[31:25];
wire [11:0] u_12 = instr[31:20];
wire[2:0] func3;
wire[6:0] func7;
wire[11:0] u_12;
wire [4:0] rd = instr[11:7];
wire [4:0] rs1 = instr[19:15];
wire [4:0] rs2 = instr[24:20];
wire [4:0] rs3 = instr[31:27];
wire[7:0] jal_b_19_to_12;
wire jal_b_11;
wire[9:0] jal_b_10_to_1;
wire jal_b_20;
wire jal_b_0;
wire[20:0] jal_unsigned_offset;
wire[31:0] jal_1_offset;
// opcode types
wire is_rtype = (opcode == `INST_R);
wire is_ltype = (opcode == `INST_L);
wire is_itype = (opcode == `INST_I);
wire is_stype = (opcode == `INST_S);
wire is_btype = (opcode == `INST_B);
wire is_jal = (opcode == `INST_JAL);
wire is_jalr = (opcode == `INST_JALR);
wire is_lui = (opcode == `INST_LUI);
wire is_auipc = (opcode == `INST_AUIPC);
wire is_jals = (opcode == `INST_SYS) && (func3 == 0);
wire is_csr = (opcode == `INST_SYS) && (func3 != 0);
wire is_gpu = (opcode == `INST_GPU);
// upper immediate
wire[11:0] jalr_immed;
wire[31:0] jal_2_offset;
wire jal_sys_cond1;
wire jal_sys_cond2;
wire jal_sys_jal;
wire[31:0] jal_sys_off;
wire csr_cond1;
wire csr_cond2;
wire[11:0] alu_tempp;
wire alu_shift_i;
wire[11:0] alu_shift_i_immed;
wire[1:0] csr_type;
reg[4:0] csr_alu;
reg[4:0] alu_op;
reg[4:0] mul_alu;
reg[19:0] temp_upper_immed;
reg temp_jal;
reg[31:0] temp_jal_offset;
reg[31:0] temp_itype_immed;
reg[2:0] temp_branch_type;
reg temp_branch_stall;
assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid;
assign frE_to_bckE_req_if.warp_num = in_warp_num;
assign curr_opcode = in_instruction[6:0];
assign frE_to_bckE_req_if.rd = in_instruction[11:7];
assign frE_to_bckE_req_if.rs1 = in_instruction[19:15];
assign frE_to_bckE_req_if.rs2 = in_instruction[24:20];
assign func3 = in_instruction[14:12];
assign func7 = in_instruction[31:25];
assign u_12 = in_instruction[31:20];
assign frE_to_bckE_req_if.next_PC = in_curr_PC + 32'h4;
// Write Back sigal
assign is_rtype = (curr_opcode == `INST_R);
assign is_linst = (curr_opcode == `INST_L);
assign is_itype = (curr_opcode == `INST_ALU) || is_linst;
assign is_stype = (curr_opcode == `INST_S);
assign is_btype = (curr_opcode == `INST_B);
assign is_jal = (curr_opcode == `INST_JAL);
assign is_jalr = (curr_opcode == `INST_JALR);
assign is_lui = (curr_opcode == `INST_LUI);
assign is_auipc = (curr_opcode == `INST_AUIPC);
assign is_csr = (curr_opcode == `INST_SYS) && (func3 != 0);
assign is_csr_immed = is_csr && (func3[2] == 1);
assign is_gpgpu = (curr_opcode == `INST_GPGPU);
assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE
assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE
assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
assign join_if.is_join = is_join && in_valid;
assign join_if.warp_num = in_warp_num;
assign frE_to_bckE_req_if.is_wspawn = is_wspawn;
assign frE_to_bckE_req_if.is_tmc = is_tmc;
assign frE_to_bckE_req_if.is_split = is_split;
assign frE_to_bckE_req_if.is_barrier = is_barrier;
assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
assign frE_to_bckE_req_if.is_csr = is_csr;
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_etype) ? `WB_JAL :
is_linst ? `WB_MEM :
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
`WB_NO;
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
// MEM signals
assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `BYTE_EN_NO;
assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `BYTE_EN_NO;
// UPPER IMMEDIATE
always @(*) begin
case (curr_opcode)
`INST_LUI: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
`INST_AUIPC: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
default: temp_upper_immed = 20'h0;
endcase // curr_opcode
end
assign frE_to_bckE_req_if.upper_immed = temp_upper_immed;
assign jal_b_19_to_12 = in_instruction[19:12];
assign jal_b_11 = in_instruction[20];
assign jal_b_10_to_1 = in_instruction[30:21];
assign jal_b_20 = in_instruction[31];
assign jal_b_0 = 1'b0;
assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0};
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
assign jal_sys_cond1 = (func3 == 3'h0);
assign jal_sys_cond2 = (u_12 < 12'h2);
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
// JAL
always @(*) begin
case (curr_opcode)
`INST_JAL:
begin
temp_jal = in_valid;
temp_jal_offset = jal_1_offset;
end
`INST_JALR:
begin
temp_jal = in_valid;
temp_jal_offset = jal_2_offset;
end
`INST_SYS:
begin
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid));
temp_jal = jal_sys_jal && in_valid;
temp_jal_offset = jal_sys_off;
end
default:
begin
temp_jal = 1'b0;
temp_jal_offset = 32'hdeadbeef;
end
case (opcode)
`INST_LUI: upper_imm = {func7, rs2, rs1, func3};
`INST_AUIPC: upper_imm = {func7, rs2, rs1, func3};
default: upper_imm = 20'h0;
endcase
end
end
assign frE_to_bckE_req_if.is_jal = is_jal;
assign frE_to_bckE_req_if.jal = temp_jal;
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;
// ecall/ebreak
assign is_etype = (curr_opcode == `INST_SYS) && jal_sys_jal;
assign frE_to_bckE_req_if.is_etype = is_etype;
// CSR
assign csr_cond1 = func3 != 3'h0;
assign csr_cond2 = u_12 >= 12'h2;
assign frE_to_bckE_req_if.csr_addr = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
// ITYPE IMEED
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
assign alu_shift_i_immed = {{7{1'b0}}, frE_to_bckE_req_if.rs2};
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
// I-type immediate
wire alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
wire [11:0] alu_shift_imm = {{7{1'b0}}, rs2};
wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12;
always @(*) begin
case (curr_opcode)
`INST_ALU: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
`INST_S: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd};
`INST_L: temp_itype_immed = {{20{u_12[11]}}, u_12};
`INST_B: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
default: temp_itype_immed = 32'hdeadbeef;
case (opcode)
`INST_I: src2_imm = {{20{alu_imm[11]}}, alu_imm};
`INST_S,
`INST_FS: src2_imm = {{20{func7[6]}}, func7, rd};
`INST_L,
`INST_FL: src2_imm = {{20{u_12[11]}}, u_12};
`INST_B: src2_imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
default: src2_imm = 32'hdeadbeef;
endcase
end
end
assign frE_to_bckE_req_if.itype_immed = temp_itype_immed;
// JAL
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
wire [11:0] jalr_imm = {func7, rs2};
wire [31:0] jalr_offset = {{20{jalr_imm[11]}}, jalr_imm};
always @(*) begin
case (opcode)
`INST_JAL: jalx_offset = jal_offset;
`INST_JALR: jalx_offset = jalr_offset;
default: jalx_offset = 32'd4;
endcase
end
// BRANCH
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
always @(*) begin
case (curr_opcode)
br_op = `BR_OTHER;
case (opcode)
`INST_B: begin
// $display("BRANCH IN DECODE");
temp_branch_stall = in_valid;
case (func3)
3'h0: temp_branch_type = `BR_EQ;
3'h1: temp_branch_type = `BR_NE;
3'h4: temp_branch_type = `BR_LT;
3'h5: temp_branch_type = `BR_GT;
3'h6: temp_branch_type = `BR_LTU;
3'h7: temp_branch_type = `BR_GTU;
default: temp_branch_type = `BR_NO;
3'h0: br_op = `BR_EQ;
3'h1: br_op = `BR_NE;
3'h4: br_op = `BR_LT;
3'h5: br_op = `BR_GE;
3'h6: br_op = `BR_LTU;
3'h7: br_op = `BR_GEU;
default:;
endcase
end
`INST_JAL: begin
temp_branch_type = `BR_NO;
temp_branch_stall = in_valid;
end
`INST_JALR: begin
temp_branch_type = `BR_NO;
temp_branch_stall = in_valid;
end
default: begin
temp_branch_type = `BR_NO;
temp_branch_stall = 1'b0;
`INST_JAL: br_op = `BR_JAL;
`INST_JALR: br_op = `BR_JALR;
`INST_SYS: begin
if (is_jals && u_12 == 12'h000) br_op = `BR_ECALL;
if (is_jals && u_12 == 12'h001) br_op = `BR_EBREAK;
if (is_jals && u_12 == 12'h302) br_op = `BR_MRET;
if (is_jals && u_12 == 12'h102) br_op = `BR_SRET;
if (is_jals && u_12 == 12'h7B2) br_op = `BR_DRET;
end
default:;
endcase
end
// ALU
always @(*) begin
alu_op = `ALU_OTHER;
if (is_lui) begin
alu_op = `ALU_LUI;
end else if (is_auipc) begin
alu_op = `ALU_AUIPC;
end else if (is_itype || is_rtype) begin
case (func3)
3'h0: alu_op = (is_rtype && func7 == 7'h20) ? `ALU_SUB : `ALU_ADD;
3'h1: alu_op = `ALU_SLL;
3'h2: alu_op = `ALU_SLT;
3'h3: alu_op = `ALU_SLTU;
3'h4: alu_op = `ALU_XOR;
3'h5: alu_op = (func7 == 7'h0) ? `ALU_SRL : `ALU_SRA;
3'h6: alu_op = `ALU_OR;
3'h7: alu_op = `ALU_AND;
default:;
endcase
end
end
// CSR
wire is_csr_imm = is_csr && (func3[2] == 1);
always @(*) begin
csr_op = `CSR_OTHER;
case (func3[1:0])
2'h1: csr_op = `CSR_RW;
2'h2: csr_op = `CSR_RS;
2'h3: csr_op = `CSR_RC;
default:;
endcase
end
assign frE_to_bckE_req_if.branch_type = temp_branch_type;
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && in_valid;
assign wstall_if.warp_num = in_warp_num;
// MUL
`ifdef EXT_M_ENABLE
wire is_mul = is_rtype && (func7 == 7'h1);
always @(*) begin
// ALU OP
mul_op = `MUL_MUL;
case (func3)
3'h0: alu_op = (curr_opcode == `INST_ALU) ? `ALU_ADD : (func7 == 7'h0 ? `ALU_ADD : `ALU_SUB);
3'h1: alu_op = `ALU_SLLA;
3'h2: alu_op = `ALU_SLT;
3'h3: alu_op = `ALU_SLTU;
3'h4: alu_op = `ALU_XOR;
3'h5: alu_op = (func7 == 7'h0) ? `ALU_SRL : `ALU_SRA;
3'h6: alu_op = `ALU_OR;
3'h7: alu_op = `ALU_AND;
default: alu_op = `ALU_NO;
3'h0: mul_op = `MUL_MUL;
3'h1: mul_op = `MUL_MULH;
3'h2: mul_op = `MUL_MULHSU;
3'h3: mul_op = `MUL_MULHU;
3'h4: mul_op = `MUL_DIV;
3'h5: mul_op = `MUL_DIVU;
3'h6: mul_op = `MUL_REM;
3'h7: mul_op = `MUL_REMU;
default:;
endcase
end
`else
wire is_mul = 0;
always @(*) begin
mul_op = `MUL_MUL;
end
`endif
// FPU
`ifdef EXT_F_ENABLE
wire is_fl = (opcode == `INST_FL) && ((func3 == 2));
wire is_fs = (opcode == `INST_FS) && ((func3 == 2));
wire is_fci = (opcode == `INST_FCI);
wire is_fmadd = (opcode == `INST_FMADD);
wire is_fmsub = (opcode == `INST_FMSUB);
wire is_fnmsub = (opcode == `INST_FNMSUB);
wire is_fnmadd = (opcode == `INST_FNMADD);
wire is_fcmp = is_fci && (func7 == 7'h50); // compare
wire is_fcvti = is_fci && (func7 == 7'h60); // convert to int
wire is_fcvtf = is_fci && (func7 == 7'h68); // convert to float
wire is_fmvw_clss = is_fci && (func7 == 7'h70); // move to int + class
wire is_fmvx = is_fci && (func7 == 7'h78); // move to float
wire is_fr4 = is_fmadd || is_fmsub || is_fnmsub || is_fnmadd;
wire is_fpu = (is_fl || is_fs || is_fci || is_fr4);
reg [2:0] frm;
always @(*) begin
fpu_op = `FPU_MISC;
frm = func3;
if (is_fr4) begin
case ({is_fmadd, is_fmsub, is_fnmsub, is_fnmadd})
4'b1000: fpu_op = `FPU_MADD;
4'b0100: fpu_op = `FPU_MSUB;
4'b0010: fpu_op = `FPU_NMSUB;
4'b0001: fpu_op = `FPU_NMADD;
default:;
endcase
end
else begin
case (func7)
7'h00: fpu_op = `FPU_ADD;
7'h04: fpu_op = `FPU_SUB;
7'h08: fpu_op = `FPU_MUL;
7'h0C: fpu_op = `FPU_DIV;
7'h10: begin
fpu_op = `FPU_MISC;
frm = (func3[1]) ? 2 : ((func3[0]) ? 1 : 0);
end
7'h14: begin
fpu_op = `FPU_MISC;
frm = (func3 == 3'h0) ? 3 : 4;
end
7'h2C: fpu_op = `FPU_SQRT;
7'h50: fpu_op = `FPU_CMP; // wb to intReg
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
7'h70: begin
fpu_op = (func3 == 3'h0) ? `FPU_MISC : `FPU_CLASS;
frm = (func3 == 3'h0) ? 5 : func3;
end
7'h78: begin fpu_op = `FPU_MISC; frm = 6; end
default:;
endcase
end
end
`else
wire is_fl = 0;
wire is_fs = 0;
wire is_fci = 0;
wire is_fcvti = 0;
wire is_fcvtf = 0;
wire is_fmvw_clss = 0;
wire is_fmvx = 0;
wire is_fr4 = 0;
wire is_fpu = 0;
wire [2:0] frm = 0;
always @(*) begin
// ALU OP
fpu_op = `FPU_MISC;
end
`endif
// LSU
wire is_lsu = (is_ltype || is_stype || is_fl || is_fs);
always @(*) begin
lsu_op = {is_stype, func3};
if (is_fl) lsu_op = `LSU_LW;
if (is_fs) lsu_op = `LSU_SW;
end
// GPU
always @(*) begin
gpu_op = `GPU_OTHER;
case (func3)
3'h0: mul_alu = `ALU_MUL;
3'h1: mul_alu = `ALU_MULH;
3'h2: mul_alu = `ALU_MULHSU;
3'h3: mul_alu = `ALU_MULHU;
3'h4: mul_alu = `ALU_DIV;
3'h5: mul_alu = `ALU_DIVU;
3'h6: mul_alu = `ALU_REM;
3'h7: mul_alu = `ALU_REMU;
default: mul_alu = `ALU_NO;
3'h0: gpu_op = `GPU_TMC;
3'h1: gpu_op = `GPU_WSPAWN;
3'h2: gpu_op = `GPU_SPLIT;
3'h3: gpu_op = `GPU_JOIN;
3'h4: gpu_op = `GPU_BAR;
default:;
endcase
end
assign csr_type = func3[1:0];
///////////////////////////////////////////////////////////////////////////
always @(*) begin
case (csr_type)
2'h1: csr_alu = `ALU_CSR_RW;
2'h2: csr_alu = `ALU_CSR_RS;
2'h3: csr_alu = `ALU_CSR_RC;
default: csr_alu = `ALU_NO;
endcase
wire use_rd = (is_fl || is_fci || is_fr4)
|| ((rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
wire use_rs1 = is_fpu
|| is_gpu
|| ((is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || ~is_csr_imm || is_gpu) && (rs1 != 0));
wire use_rs2 = (is_fpu && ~(is_fl || (fpu_op == `FPU_SQRT) || is_fcvti || is_fcvtf || is_fmvw_clss || is_fmvx))
|| (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN))
|| ((is_btype || is_stype || is_rtype) && (rs2 != 0));
wire use_rs3 = is_fr4;
wire [4:0] rs1_qual = is_lui ? 5'h0 : rs1;
///////////////////////////////////////////////////////////////////////////
assign decode_if.valid = ifetch_rsp_if.valid
&& (decode_if.ex_type != `EX_NOP); // skip noop
assign decode_if.wid = ifetch_rsp_if.wid;
assign decode_if.tmask = ifetch_rsp_if.tmask;
assign decode_if.PC = ifetch_rsp_if.PC;
assign decode_if.ex_type = is_lsu ? `EX_LSU :
is_csr ? `EX_CSR :
is_mul ? `EX_MUL :
is_fpu ? `EX_FPU :
is_gpu ? `EX_GPU :
is_br ? `EX_ALU :
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
`EX_NOP;
assign decode_if.op_type = is_lsu ? `OP_BITS'(lsu_op) :
is_csr ? `OP_BITS'(csr_op) :
is_mul ? `OP_BITS'(mul_op) :
is_fpu ? `OP_BITS'(fpu_op) :
is_gpu ? `OP_BITS'(gpu_op) :
is_br ? `OP_BITS'(br_op) :
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
0;
assign decode_if.wb = use_rd;
`ifdef EXT_F_ENABLE
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || is_fmvw_clss);
wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || is_fmvx));
wire rs2_is_fp = is_fs || is_fr4 || is_fci;
assign decode_if.rd = {rd_is_fp, rd};
assign decode_if.rs1 = {rs1_is_fp, rs1_qual};
assign decode_if.rs2 = {rs2_is_fp, rs2};
assign decode_if.rs3 = {1'b1, rs3};
`else
assign decode_if.rd = rd;
assign decode_if.rs1 = rs1_qual;
assign decode_if.rs2 = rs2;
assign decode_if.rs3 = rs3;
`endif
assign decode_if.use_rs3 = use_rs3;
assign decode_if.used_regs = ((`NUM_REGS)'(use_rd) << decode_if.rd)
| ((`NUM_REGS)'(use_rs1) << decode_if.rs1)
| ((`NUM_REGS)'(use_rs2) << decode_if.rs2)
| ((`NUM_REGS)'(use_rs3) << decode_if.rs3);
assign decode_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
(is_jal || is_jalr || is_jals) ? jalx_offset :
is_csr ? 32'(u_12) :
src2_imm;
assign decode_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
assign decode_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm || is_br;
assign decode_if.op_mod = is_fpu ? frm : (is_br ? 1 : 0);
///////////////////////////////////////////////////////////////////////////
wire decode_fire = decode_if.valid && decode_if.ready;
assign join_if.valid = decode_fire && is_gpu && (gpu_op == `GPU_JOIN);
assign join_if.wid = ifetch_rsp_if.wid;
assign wstall_if.valid = decode_fire && (is_btype
|| is_jal
|| is_jalr
|| (is_gpu && (gpu_op == `GPU_TMC
|| gpu_op == `GPU_SPLIT
|| gpu_op == `GPU_BAR)));
assign wstall_if.wid = ifetch_rsp_if.wid;
///////////////////////////////////////////////////////////////////////////
assign ifetch_rsp_if.ready = decode_if.ready;
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (decode_if.valid && decode_if.ready) begin
$write("%t: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.PC);
print_ex_type(decode_if.ex_type);
$write(", op=");
print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
$write(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b\n", decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm);
end
end
`endif
wire[4:0] temp_final_alu;
assign temp_final_alu = is_btype ? ((frE_to_bckE_req_if.branch_type < `BR_LTU) ? `ALU_SUB : `ALU_SUBU) :
is_lui ? `ALU_LUI :
is_auipc ? `ALU_AUIPC :
is_csr ? csr_alu :
(is_stype || is_linst) ? `ALU_ADD :
alu_op;
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
endmodule
endmodule

View File

@@ -1,60 +1,13 @@
`ifndef VX_DEFINE
`define VX_DEFINE
`include "VX_platform.vh"
`include "VX_config.vh"
`include "VX_scope.vh"
`define QUEUE_FORCE_MLAB 1
// `define SYNTHESIS 1
// `define ASIC 1
///////////////////////////////////////////////////////////////////////////////
`ifndef NDEBUG
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
x \
/* verilator lint_on UNUSED */
`else
`define DEBUG_BLOCK(x)
`endif
`define DEBUG_BEGIN /* verilator lint_off UNUSED */
`define DEBUG_END /* verilator lint_on UNUSED */
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
/* verilator lint_off PINCONNECTEMPTY */ \
/* verilator lint_off DECLFILENAME */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
/* verilator lint_on DECLFILENAME */
`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \
wire [$bits(x)-1:0] __``x``__ = x; \
/* verilator lint_on UNUSED */
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
. x () \
/* verilator lint_on PINCONNECTEMPTY */
`define STRINGIFY(x) `"x`"
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error(msg); \
endgenerate
`define CLOG2(x) $clog2(x)
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
`define MIN(x, y) ((x < y) ? (x) : (y))
`define MAX(x, y) ((x > y) ? (x) : (y))
`define UP(x) (((x) > 0) ? x : 1)
// `define SYNTHESIS 1
// `define ASIC 1
///////////////////////////////////////////////////////////////////////////////
@@ -64,90 +17,232 @@
`define NC_BITS `LOG2UP(`NUM_CORES)
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
`define NUM_GPRS 32
`ifdef EXT_F_ENABLE
`define NUM_REGS 64
`else
`define NUM_REGS 32
`endif
`define CSR_ADDR_SIZE 12
`define NR_BITS `LOG2UP(`NUM_REGS)
`define CSR_ADDR_BITS 12
`define CSR_WIDTH 12
`define DIV_LATENCY 22
///////////////////////////////////////////////////////////////////////////////
`define MUL_LATENCY 2
`define INST_LUI 7'b0110111
`define INST_AUIPC 7'b0010111
`define INST_JAL 7'b1101111
`define INST_JALR 7'b1100111
`define INST_B 7'b1100011 // branch instructions
`define INST_L 7'b0000011 // load instructions
`define INST_S 7'b0100011 // store instructions
`define INST_I 7'b0010011 // immediate instructions
`define INST_R 7'b0110011 // register instructions
`define INST_F 7'b0001111 // Fence instructions
`define INST_SYS 7'b1110011 // system instructions
`define INST_FL 7'b0000111 // float load instruction
`define INST_FS 7'b0100111 // float store instruction
`define INST_FMADD 7'b1000011
`define INST_FMSUB 7'b1000111
`define INST_FNMSUB 7'b1001011
`define INST_FNMADD 7'b1001111
`define INST_FCI 7'b1010011 // float common instructions
`define INST_GPU 7'b1101011
///////////////////////////////////////////////////////////////////////////////
`define BYTE_EN_NO 3'h7
`define BYTE_EN_SB 3'h0
`define BYTE_EN_SH 3'h1
`define BYTE_EN_SW 3'h2
`define BYTE_EN_UB 3'h4
`define BYTE_EN_UH 3'h5
`define BYTE_EN_BITS 3
`define BYTEEN_SB 3'h0
`define BYTEEN_SH 3'h1
`define BYTEEN_SW 3'h2
`define BYTEEN_UB 3'h4
`define BYTEEN_UH 3'h5
`define BYTEEN_BITS 3
`define BYTEEN_TYPE(x) x[1:0]
`define FRM_RNE 3'b000 // round to nearest even
`define FRM_RTZ 3'b001 // round to zero
`define FRM_RDN 3'b010 // round to -inf
`define FRM_RUP 3'b011 // round to +inf
`define FRM_RMM 3'b100 // round to nearest max magnitude
`define FRM_DYN 3'b111 // dynamic mode
`define FRM_BITS 3
///////////////////////////////////////////////////////////////////////////////
`define INST_R 7'd051
`define INST_L 7'd003
`define INST_ALU 7'd019
`define INST_S 7'd035
`define INST_B 7'd099
`define INST_LUI 7'd055
`define INST_AUIPC 7'd023
`define INST_JAL 7'd111
`define INST_JALR 7'd103
`define INST_SYS 7'd115
`define INST_GPGPU 7'd107
`define EX_NOP 3'h0
`define EX_ALU 3'h1
`define EX_LSU 3'h2
`define EX_CSR 3'h3
`define EX_MUL 3'h4
`define EX_FPU 3'h5
`define EX_GPU 3'h6
`define EX_BITS 3
`define RS2_IMMED 1
`define RS2_REG 0
`define BR_NO 3'h0
`define BR_EQ 3'h1
`define BR_NE 3'h2
`define BR_LT 3'h3
`define BR_GT 3'h4
`define BR_LTU 3'h5
`define BR_GTU 3'h6
`define ALU_NO 5'd15
`define ALU_ADD 5'd00
`define ALU_SUB 5'd01
`define ALU_SLLA 5'd02
`define ALU_SLT 5'd03
`define ALU_SLTU 5'd04
`define ALU_XOR 5'd05
`define ALU_SRL 5'd06
`define ALU_SRA 5'd07
`define ALU_OR 5'd08
`define ALU_AND 5'd09
`define ALU_SUBU 5'd10
`define ALU_LUI 5'd11
`define ALU_AUIPC 5'd12
`define ALU_CSR_RW 5'd13
`define ALU_CSR_RS 5'd14
`define ALU_CSR_RC 5'd15
`define ALU_MUL 5'd16
`define ALU_MULH 5'd17
`define ALU_MULHSU 5'd18
`define ALU_MULHU 5'd19
`define ALU_DIV 5'd20
`define ALU_DIVU 5'd21
`define ALU_REM 5'd22
`define ALU_REMU 5'd23
`define WB_NO 2'h0
`define WB_ALU 2'h1
`define WB_MEM 2'h2
`define WB_JAL 2'h3
`define NUM_EXS 6
`define NE_BITS `LOG2UP(`NUM_EXS)
///////////////////////////////////////////////////////////////////////////////
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 2 + 5 + `NW_BITS)
`define OP_BITS 4
`define MOD_BITS 3
`define ALU_ADD 4'b0000
`define ALU_LUI 4'b0010
`define ALU_AUIPC 4'b0011
`define ALU_SLTU 4'b0100
`define ALU_SLT 4'b0101
`define ALU_SRL 4'b1000
`define ALU_SRA 4'b1001
`define ALU_SUB 4'b1011
`define ALU_AND 4'b1100
`define ALU_OR 4'b1101
`define ALU_XOR 4'b1110
`define ALU_SLL 4'b1111
`define ALU_OTHER 4'b0111
`define ALU_BITS 4
`define ALU_OP(x) x[`ALU_BITS-1:0]
`define ALU_OP_CLASS(x) x[3:2]
`define ALU_SIGNED(x) x[0]
`define BR_EQ 4'b0000
`define BR_NE 4'b0010
`define BR_LTU 4'b0100
`define BR_GEU 4'b0110
`define BR_LT 4'b0101
`define BR_GE 4'b0111
`define BR_JAL 4'b1000
`define BR_JALR 4'b1001
`define BR_ECALL 4'b1010
`define BR_EBREAK 4'b1011
`define BR_MRET 4'b1100
`define BR_SRET 4'b1101
`define BR_DRET 4'b1110
`define BR_OTHER 4'b1111
`define BR_BITS 4
`define BR_OP(x) x[`BR_BITS-1:0]
`define BR_NEG(x) x[1]
`define BR_LESS(x) x[2]
`define BR_STATIC(x) x[3]
`define ALU_BR_BITS 4
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
`define IS_BR_MOD(x) x[0]
`define LSU_LB {1'b0, `BYTEEN_SB}
`define LSU_LH {1'b0, `BYTEEN_SH}
`define LSU_LW {1'b0, `BYTEEN_SW}
`define LSU_LBU {1'b0, `BYTEEN_UB}
`define LSU_LHU {1'b0, `BYTEEN_UH}
`define LSU_SB {1'b1, `BYTEEN_SB}
`define LSU_SH {1'b1, `BYTEEN_SH}
`define LSU_SW {1'b1, `BYTEEN_SW}
`define LSU_SBU {1'b1, `BYTEEN_UB}
`define LSU_SHU {1'b1, `BYTEEN_UH}
`define LSU_BITS 4
`define LSU_RW(x) x[3]
`define LSU_BE(x) x[2:0]
`define CSR_RW 2'h0
`define CSR_RS 2'h1
`define CSR_RC 2'h2
`define CSR_OTHER 2'h3
`define CSR_BITS 2
`define CSR_OP(x) x[`CSR_BITS-1:0]
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3
`define MUL_DIV 3'h4
`define MUL_DIVU 3'h5
`define MUL_REM 3'h6
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2]
`define FPU_ADD 4'h0
`define FPU_SUB 4'h1
`define FPU_MUL 4'h2
`define FPU_DIV 4'h3
`define FPU_SQRT 4'h4
`define FPU_MADD 4'h5
`define FPU_MSUB 4'h6
`define FPU_NMSUB 4'h7
`define FPU_NMADD 4'h8
`define FPU_CVTWS 4'h9 // FCVT.W.S
`define FPU_CVTWUS 4'hA // FCVT.WU.S
`define FPU_CVTSW 4'hB // FCVT.S.W
`define FPU_CVTSWU 4'hC // FCVT.S.WU
`define FPU_CLASS 4'hD
`define FPU_CMP 4'hE
`define FPU_MISC 4'hF // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define FPU_BITS 4
`define FPU_OP(x) x[`FPU_BITS-1:0]
`define GPU_TMC 3'h0
`define GPU_WSPAWN 3'h1
`define GPU_SPLIT 3'h2
`define GPU_JOIN 3'h3
`define GPU_BAR 3'h4
`define GPU_OTHER 3'h7
`define GPU_BITS 3
`define GPU_OP(x) x[`GPU_BITS-1:0]
///////////////////////////////////////////////////////////////////////////////
`ifdef EXT_M_ENABLE
`define ISA_EXT_M (1 << 12)
`else
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
`define ISA_EXT_M 0
`endif
`ifdef EXT_F_ENABLE
`define ISA_EXT_F (1 << 5)
`else
`define ISA_EXT_F 0
`endif
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
| (0 << 2) // C - Compressed extension \
| (0 << 3) // D - Double precsision floating-point extension \
| (0 << 4) // E - RV32E base ISA \
|`ISA_EXT_F // F - Single precsision floating-point extension \
| (0 << 6) // G - Additional standard extensions present \
| (0 << 7) // H - Hypervisor mode implemented \
| (1 << 8) // I - RV32I/64I/128I base ISA \
| (0 << 9) // J - Reserved \
| (0 << 10) // K - Reserved \
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|`ISA_EXT_M // M - Integer Multiply/Divide extension \
| (0 << 13) // N - User level interrupts supported \
| (0 << 14) // O - Reserved \
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
| (0 << 16) // Q - Quad-precision floating-point extension \
| (0 << 17) // R - Reserved \
| (0 << 18) // S - Supervisor mode implemented \
| (0 << 19) // T - Tentatively reserved for Transactional Memory extension \
| (1 << 20) // U - User mode implemented \
| (0 << 21) // V - Tentatively reserved for Vector extension \
| (0 << 22) // W - Reserved \
| (1 << 23) // X - Non-standard extensions present \
| (0 << 24) // Y - Reserved \
| (0 << 25) // Z - Reserved
///////////////////////////////////////////////////////////////////////////////
`ifdef DBG_CORE_REQ_INFO // pc, rd, wid
`define DBG_CORE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
`else
`define DBG_CORE_REQ_MDATAW 0
`endif
////////////////////////// Dcache Configurable Knobs //////////////////////////
@@ -156,10 +251,10 @@
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
// TAG sharing enable
`define DCORE_TAG_ID_BITS `LOG2UP(`DCREQ_SIZE)
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
// Core request tag bits
`define DCORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `DCORE_TAG_ID_BITS)
`define DCORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
// DRAM request data bits
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
@@ -191,10 +286,10 @@
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
// TAG sharing enable
`define ICORE_TAG_ID_BITS `LOG2UP(`ICREQ_SIZE)
`define ICORE_TAG_ID_BITS `NW_BITS
// Core request tag bits
`define ICORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `ICORE_TAG_ID_BITS)
`define ICORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
// DRAM request data bits
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
@@ -288,9 +383,10 @@
`define VX_DRAM_TAG_WIDTH `L3DRAM_TAG_WIDTH
`define VX_SNP_TAG_WIDTH `L3SNP_TAG_WIDTH
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
`define VX_CSR_ID_WIDTH `CLOG2(`NUM_CLUSTERS * `NUM_CORES)
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
`include "VX_types.vh"
// VX_DEFINE
`endif

View File

@@ -1,147 +0,0 @@
`include "VX_define.vh"
module VX_exec_unit (
input wire clk,
input wire reset,
// Request
VX_exec_unit_req_if exec_unit_req_if,
// Output
VX_wb_if inst_exec_wb_if,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
input wire no_slot_exec,
output wire delay
);
wire [`NUM_THREADS-1:0][31:0] in_a_reg_data;
wire [`NUM_THREADS-1:0][31:0] in_b_reg_data;
wire [4:0] in_alu_op;
wire in_rs2_src;
wire [31:0] in_itype_immed;
`DEBUG_BEGIN
wire [2:0] in_branch_type;
`DEBUG_END
wire [19:0] in_upper_immed;
wire in_jal;
wire [31:0] in_jal_offset;
wire [31:0] in_curr_PC;
assign in_a_reg_data = exec_unit_req_if.a_reg_data;
assign in_b_reg_data = exec_unit_req_if.b_reg_data;
assign in_alu_op = exec_unit_req_if.alu_op;
assign in_rs2_src = exec_unit_req_if.rs2_src;
assign in_itype_immed = exec_unit_req_if.itype_immed;
assign in_branch_type = exec_unit_req_if.branch_type;
assign in_upper_immed = exec_unit_req_if.upper_immed;
assign in_jal = exec_unit_req_if.jal;
assign in_jal_offset = exec_unit_req_if.jal_offset;
assign in_curr_PC = exec_unit_req_if.curr_PC;
wire [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0] alu_stall;
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i++) begin
VX_alu_unit alu_unit (
.clk (clk),
.reset (reset),
.src_a (in_a_reg_data[i]),
.src_b (in_b_reg_data[i]),
.src_rs2 (in_rs2_src),
.itype_immed (in_itype_immed),
.upper_immed (in_upper_immed),
.alu_op (in_alu_op),
.curr_PC (in_curr_PC),
.alu_result (alu_result[i]),
.alu_stall (alu_stall[i])
);
end
endgenerate
wire internal_stall = (| alu_stall);
assign delay = no_slot_exec || internal_stall;
wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index;
VX_priority_encoder #(
.N(`NUM_THREADS)
) choose_alu_result (
.data_in (exec_unit_req_if.valid),
.data_out (jal_branch_use_index),
`UNUSED_PIN (valid_out)
);
wire [31:0] branch_use_alu_result = alu_result[jal_branch_use_index];
reg temp_branch_dir;
always @(*)
begin
case (exec_unit_req_if.branch_type)
`BR_EQ: temp_branch_dir = (branch_use_alu_result == 0);
`BR_NE: temp_branch_dir = (branch_use_alu_result != 0);
`BR_LT: temp_branch_dir = (branch_use_alu_result[31] != 0);
`BR_GT: temp_branch_dir = (branch_use_alu_result[31] == 0);
`BR_LTU: temp_branch_dir = (branch_use_alu_result[31] != 0);
`BR_GTU: temp_branch_dir = (branch_use_alu_result[31] == 0);
`BR_NO: temp_branch_dir = 0;
default: temp_branch_dir = 0;
endcase // in_branch_type
end
wire [`NUM_THREADS-1:0][31:0] duplicate_PC_data;
generate
for (i = 0; i < `NUM_THREADS; i++) begin
assign duplicate_PC_data[i] = exec_unit_req_if.next_PC;
end
endgenerate
VX_jal_rsp_if jal_rsp_temp_if();
VX_branch_rsp_if branch_rsp_temp_if();
// Actual Writeback
assign inst_exec_wb_if.rd = exec_unit_req_if.rd;
assign inst_exec_wb_if.wb = exec_unit_req_if.wb;
assign inst_exec_wb_if.valid = exec_unit_req_if.valid & {`NUM_THREADS{!internal_stall}};
assign inst_exec_wb_if.warp_num = exec_unit_req_if.warp_num;
assign inst_exec_wb_if.data = exec_unit_req_if.jal ? duplicate_PC_data : alu_result;
assign inst_exec_wb_if.curr_PC = in_curr_PC;
// Jal rsp
assign jal_rsp_temp_if.valid = in_jal;
assign jal_rsp_temp_if.dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
assign jal_rsp_temp_if.warp_num = exec_unit_req_if.warp_num;
// Branch rsp
assign branch_rsp_temp_if.valid = (exec_unit_req_if.branch_type != `BR_NO) && (| exec_unit_req_if.valid);
assign branch_rsp_temp_if.dir = temp_branch_dir;
assign branch_rsp_temp_if.warp_num = exec_unit_req_if.warp_num;
assign branch_rsp_temp_if.dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset
VX_generic_register #(
.N(33 + `NW_BITS-1 + 1)
) jal_reg (
.clk (clk),
.reset (reset),
.stall (1'b0),
.flush (1'b0),
.in ({jal_rsp_temp_if.valid, jal_rsp_temp_if.dest, jal_rsp_temp_if.warp_num}),
.out ({jal_rsp_if.valid , jal_rsp_if.dest , jal_rsp_if.warp_num})
);
VX_generic_register #(
.N(34 + `NW_BITS-1 + 1)
) branch_reg (
.clk (clk),
.reset (reset),
.stall (1'b0),
.flush (1'b0),
.in ({branch_rsp_temp_if.valid, branch_rsp_temp_if.dir, branch_rsp_temp_if.warp_num, branch_rsp_temp_if.dest}),
.out ({branch_rsp_if.valid , branch_rsp_if.dir , branch_rsp_if.warp_num , branch_rsp_if.dest })
);
endmodule : VX_exec_unit

137
hw/rtl/VX_execute.v Normal file
View File

@@ -0,0 +1,137 @@
`include "VX_define.vh"
module VX_execute #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_EXECUTE_IO
input wire clk,
input wire reset,
// CSR io interface
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
// Dcache interface
VX_cache_core_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
// perf
VX_cmt_to_csr_if cmt_to_csr_if,
// inputs
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if,
// outputs
VX_csr_to_issue_if csr_to_issue_if,
VX_branch_ctl_if branch_ctl_if,
VX_warp_ctl_if warp_ctl_if,
VX_exu_to_cmt_if alu_commit_if,
VX_exu_to_cmt_if lsu_commit_if,
VX_exu_to_cmt_if csr_commit_if,
VX_exu_to_cmt_if mul_commit_if,
VX_fpu_to_cmt_if fpu_commit_if,
VX_exu_to_cmt_if gpu_commit_if,
output wire ebreak
);
VX_alu_unit #(
.CORE_ID(CORE_ID)
) alu_unit (
.clk (clk),
.reset (reset),
.alu_req_if (alu_req_if),
.branch_ctl_if (branch_ctl_if),
.alu_commit_if (alu_commit_if)
);
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_SIGNALS_LSU_BIND
.clk (clk),
.reset (reset),
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
.lsu_req_if (lsu_req_if),
.lsu_commit_if (lsu_commit_if)
);
VX_csr_unit #(
.CORE_ID(CORE_ID)
) csr_unit (
.clk (clk),
.reset (reset),
.cmt_to_csr_if (cmt_to_csr_if),
.csr_to_issue_if (csr_to_issue_if),
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_req_if (csr_req_if),
.csr_commit_if (csr_commit_if)
);
`ifdef EXT_M_ENABLE
VX_mul_unit #(
.CORE_ID(CORE_ID)
) mul_unit (
.clk (clk),
.reset (reset),
.mul_req_if (mul_req_if),
.mul_commit_if (mul_commit_if)
);
`else
assign mul_req_if.ready = 0;
assign mul_commit_if.valid = 0;
assign mul_commit_if.wid = 0;
assign mul_commit_if.PC = 0;
assign mul_commit_if.tmask = 0;
assign mul_commit_if.wb = 0;
assign mul_commit_if.rd = 0;
assign mul_commit_if.data = 0;
`endif
`ifdef EXT_F_ENABLE
VX_fpu_unit #(
.CORE_ID(CORE_ID)
) fpu_unit (
.clk (clk),
.reset (reset),
.fpu_req_if (fpu_req_if),
.fpu_commit_if (fpu_commit_if)
);
`else
assign fpu_req_if.ready = 0;
assign fpu_commit_if.valid = 0;
assign fpu_commit_if.wid = 0;
assign fpu_commit_if.PC = 0;
assign fpu_commit_if.tmask = 0;
assign fpu_commit_if.wb = 0;
assign fpu_commit_if.rd = 0;
assign fpu_commit_if.data = 0;
assign fpu_commit_if.has_fflags = 0;
assign fpu_commit_if.fflags = 0;
`endif
VX_gpu_unit #(
.CORE_ID(CORE_ID)
) gpu_unit (
.clk (clk),
.reset (reset),
.gpu_req_if (gpu_req_if),
.warp_ctl_if (warp_ctl_if),
.gpu_commit_if (gpu_commit_if)
);
assign ebreak = alu_req_if.valid
&& alu_req_if.is_br_op
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);
endmodule

View File

@@ -1,99 +1,58 @@
`include "VX_define.vh"
module VX_fetch (
input wire clk,
input wire reset,
VX_wstall_if wstall_if,
VX_join_if join_if,
input wire schedule_delay,
input wire icache_stage_delay,
input wire[`NW_BITS-1:0] icache_stage_wid,
input wire icache_stage_response,
output wire busy,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
VX_inst_meta_if fe_inst_meta_fi,
VX_warp_ctl_if warp_ctl_if
module VX_fetch #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
input wire clk,
input wire reset,
// Icache interface
VX_cache_core_req_if icache_req_if,
VX_cache_core_rsp_if icache_rsp_if,
// inputs
VX_wstall_if wstall_if,
VX_join_if join_if,
VX_branch_ctl_if branch_ctl_if,
VX_warp_ctl_if warp_ctl_if,
// outputs
VX_ifetch_rsp_if ifetch_rsp_if,
output wire busy
);
wire[`NUM_THREADS-1:0] thread_mask;
wire[`NW_BITS-1:0] warp_num;
wire[31:0] warp_pc;
wire scheduled_warp;
VX_ifetch_req_if ifetch_req_if();
wire pipe_stall = schedule_delay || icache_stage_delay;
VX_warp_sched warp_sched (
VX_warp_sched #(
.CORE_ID(CORE_ID)
) warp_sched (
.clk (clk),
.reset (reset),
.stall (pipe_stall),
.reset (reset),
.warp_ctl_if (warp_ctl_if),
.wstall_if (wstall_if),
.join_if (join_if),
.branch_ctl_if (branch_ctl_if),
.ifetch_req_if (ifetch_req_if),
.ifetch_rsp_if (ifetch_rsp_if),
.busy (busy)
);
.is_barrier (warp_ctl_if.is_barrier),
.barrier_id (warp_ctl_if.barrier_id),
.num_warps (warp_ctl_if.num_warps),
.barrier_warp_num (warp_ctl_if.warp_num),
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
`SCOPE_SIGNALS_ISTAGE_BIND
// Wspawn
.wspawn (warp_ctl_if.wspawn),
.wsapwn_pc (warp_ctl_if.wspawn_pc),
.wspawn_new_active(warp_ctl_if.wspawn_new_active),
.clk (clk),
.reset (reset),
// CTM
.ctm (warp_ctl_if.change_mask),
.ctm_mask (warp_ctl_if.thread_mask),
.ctm_warp_num (warp_ctl_if.warp_num),
.icache_rsp_if (icache_rsp_if),
.icache_req_if (icache_req_if),
// WHALT
.whalt (warp_ctl_if.whalt),
.whalt_warp_num (warp_ctl_if.warp_num),
// Wstall
.wstall (wstall_if.wstall),
.wstall_warp_num (wstall_if.warp_num),
// Lock/release Stuff
.icache_stage_response(icache_stage_response),
.icache_stage_wid (icache_stage_wid),
// Join
.is_join (join_if.is_join),
.join_warp_num (join_if.warp_num),
// Split
.is_split (warp_ctl_if.is_split),
.dont_split (warp_ctl_if.dont_split),
.split_new_mask (warp_ctl_if.split_new_mask),
.split_later_mask (warp_ctl_if.split_later_mask),
.split_save_pc (warp_ctl_if.split_save_pc),
.split_warp_num (warp_ctl_if.warp_num),
// JAL
.jal (jal_rsp_if.valid),
.dest (jal_rsp_if.dest),
.jal_warp_num (jal_rsp_if.warp_num),
// Branch
.branch_valid (branch_rsp_if.valid),
.branch_dir (branch_rsp_if.dir),
.branch_dest (branch_rsp_if.dest),
.branch_warp_num (branch_rsp_if.warp_num),
// Outputs
.thread_mask (thread_mask),
.warp_num (warp_num),
.warp_pc (warp_pc),
.busy (busy),
.scheduled_warp (scheduled_warp)
.ifetch_req_if (ifetch_req_if),
.ifetch_rsp_if (ifetch_rsp_if)
);
assign fe_inst_meta_fi.warp_num = warp_num;
assign fe_inst_meta_fi.valid = thread_mask;
assign fe_inst_meta_fi.instruction = 32'h0;
assign fe_inst_meta_fi.curr_PC = warp_pc;
`DEBUG_BEGIN
wire start_mat_add = scheduled_warp && (warp_pc == 32'h80000ed8) && (warp_num == 0);
wire end_mat_add = scheduled_warp && (warp_pc == 32'h80000fbc) && (warp_num == 0);
`DEBUG_END
endmodule

140
hw/rtl/VX_fpu_unit.v Normal file
View File

@@ -0,0 +1,140 @@
`include "VX_define.vh"
module VX_fpu_unit #(
parameter CORE_ID = 0
) (
// inputs
input wire clk,
input wire reset,
// inputs
VX_fpu_req_if fpu_req_if,
// outputs
VX_fpu_to_cmt_if fpu_commit_if
);
localparam FPUQ_BITS = `LOG2UP(`FPUQ_SIZE);
wire ready_in;
wire valid_out;
wire ready_out;
wire [`NW_BITS-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire has_fflags;
fflags_t [`NUM_THREADS-1:0] fflags;
wire [`NUM_THREADS-1:0][31:0] result;
wire [FPUQ_BITS-1:0] tag_in, tag_out;
wire fpuq_full;
wire fpuq_push = fpu_req_if.valid && fpu_req_if.ready;
wire fpuq_pop = valid_out && ready_out;
VX_cam_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`FPUQ_SIZE)
) fpu_cam (
.clk (clk),
.reset (reset),
.acquire_slot (fpuq_push),
.write_addr (tag_in),
.read_addr (tag_out),
.release_addr (tag_out),
.write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
.release_slot (fpuq_pop),
.full (fpuq_full)
);
wire valid_in = fpu_req_if.valid && ~fpuq_full;
// can accept new request?
assign fpu_req_if.ready = ready_in && ~fpuq_full;
`ifdef FPU_FAST
VX_fp_fpga #(
.TAGW (FPUQ_BITS)
) fp_core (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.op_type (fpu_req_if.op_type),
.frm (fpu_req_if.frm),
.dataa (fpu_req_if.rs1_data),
.datab (fpu_req_if.rs2_data),
.datac (fpu_req_if.rs3_data),
.result (result),
.has_fflags (has_fflags),
.fflags (fflags),
.tag_out (tag_out),
.ready_out (ready_out),
.valid_out (valid_out)
);
`else
VX_fpnew #(
.FMULADD (1),
.FDIVSQRT (1),
.FNONCOMP (1),
.FCONV (1),
.TAGW (FPUQ_BITS)
) fp_core (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.op_type (fpu_req_if.op_type),
.frm (fpu_req_if.frm),
.dataa (fpu_req_if.rs1_data),
.datab (fpu_req_if.rs2_data),
.datac (fpu_req_if.rs3_data),
.result (result),
.has_fflags (has_fflags),
.fflags (fflags),
.tag_out (tag_out),
.ready_out (ready_out),
.valid_out (valid_out)
);
`endif
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
) fpu_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, fflags}),
.out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, fpu_commit_if.has_fflags, fpu_commit_if.fflags})
);
assign ready_out = ~stall_out;
endmodule

View File

@@ -1,116 +0,0 @@
`include "VX_define.vh"
module VX_front_end #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
input wire clk,
input wire reset,
input wire schedule_delay,
VX_warp_ctl_if warp_ctl_if,
VX_cache_core_rsp_if icache_rsp_if,
VX_cache_core_req_if icache_req_if,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
VX_backend_req_if bckE_req_if,
output wire busy
);
VX_inst_meta_if fe_inst_meta_fi();
VX_inst_meta_if fe_inst_meta_fi2();
VX_inst_meta_if fe_inst_meta_id();
VX_backend_req_if frE_to_bckE_req_if();
VX_inst_meta_if fd_inst_meta_de();
wire total_freeze = schedule_delay;
wire icache_stage_delay;
wire[`NW_BITS-1:0] icache_stage_wid;
wire icache_stage_response;
VX_wstall_if wstall_if();
VX_join_if join_if();
VX_fetch fetch (
.clk (clk),
.reset (reset),
.icache_stage_wid (icache_stage_wid),
.icache_stage_response(icache_stage_response),
.wstall_if (wstall_if),
.join_if (join_if),
.schedule_delay (schedule_delay),
.jal_rsp_if (jal_rsp_if),
.warp_ctl_if (warp_ctl_if),
.icache_stage_delay (icache_stage_delay),
.branch_rsp_if (branch_rsp_if),
.busy (busy),
.fe_inst_meta_fi (fe_inst_meta_fi)
);
VX_generic_register #(
.N(64+`NW_BITS-1+1+`NUM_THREADS)
) f_d_reg (
.clk (clk),
.reset (reset),
.stall (icache_stage_delay),
.flush (1'b0),
.in ({fe_inst_meta_fi.instruction, fe_inst_meta_fi.curr_PC, fe_inst_meta_fi.warp_num, fe_inst_meta_fi.valid}),
.out ({fe_inst_meta_fi2.instruction, fe_inst_meta_fi2.curr_PC, fe_inst_meta_fi2.warp_num, fe_inst_meta_fi2.valid})
);
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
`SCOPE_SIGNALS_ISTAGE_BIND
.clk (clk),
.reset (reset),
.total_freeze (total_freeze),
.icache_stage_delay (icache_stage_delay),
.icache_stage_response(icache_stage_response),
.icache_stage_wid (icache_stage_wid),
.fe_inst_meta_fi (fe_inst_meta_fi2),
.fe_inst_meta_id (fe_inst_meta_id),
.icache_rsp_if (icache_rsp_if),
.icache_req_if (icache_req_if)
);
VX_generic_register #(
.N(64 + `NW_BITS-1 + 1 + `NUM_THREADS)
) i_d_reg (
.clk (clk),
.reset (reset),
.stall (total_freeze),
.flush (1'b0),
.in ({fe_inst_meta_id.instruction, fe_inst_meta_id.curr_PC, fe_inst_meta_id.warp_num, fe_inst_meta_id.valid}),
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.curr_PC, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
);
VX_decode decode (
.fd_inst_meta_de (fd_inst_meta_de),
.frE_to_bckE_req_if (frE_to_bckE_req_if),
.wstall_if (wstall_if),
.join_if (join_if)
);
VX_generic_register #(
.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)
) d_e_reg (
.clk (clk),
.reset (reset),
.stall (total_freeze),
.flush (1'b0),
.in ({frE_to_bckE_req_if.csr_addr, frE_to_bckE_req_if.is_jal, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.next_PC, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
.out ({bckE_req_if.csr_addr , bckE_req_if.is_jal , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.next_PC , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
);
endmodule

72
hw/rtl/VX_gpr_bypass.v Normal file
View File

@@ -0,0 +1,72 @@
`include "VX_platform.vh"
module VX_gpr_bypass #(
parameter DATAW = 1,
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire push,
input wire pop,
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out
);
if (PASSTHRU) begin
reg delayed_push;
always @(posedge clk) begin
if (reset) begin
delayed_push <= 0;
end else begin
delayed_push <= push;
assert(!delayed_push || pop);
end
end
assign data_out = data_in;
end else begin
reg [DATAW-1:0] buffer, buffer2;
reg use_buffer, use_buffer2;
reg delayed_push;
always @(posedge clk) begin
if (reset) begin
delayed_push <= 0;
use_buffer <= 0;
use_buffer2 <= 0;
buffer <= 0;
buffer2 <= 0;
end else begin
delayed_push <= push;
assert(!use_buffer2 || use_buffer);
if (pop) begin
if (use_buffer) begin
buffer <= buffer2;
use_buffer <= use_buffer2;
use_buffer2 <= 0;
end
end
if (delayed_push) begin
if (use_buffer) begin
assert(!use_buffer2); // queue full!
if (pop) begin
buffer <= data_in;
end else begin
buffer2 <= data_in;
use_buffer2 <= 1;
end
use_buffer <= 1;
end else if (!pop) begin
buffer <= data_in;
use_buffer <= 1;
end
end
end
end
assign data_out = use_buffer ? buffer : data_in;
end
endmodule

74
hw/rtl/VX_gpr_fp_ctrl.v Normal file
View File

@@ -0,0 +1,74 @@
`include "VX_define.vh"
// control module to support multi-cycle read for fp register
module VX_gpr_fp_ctrl (
input wire clk,
input wire reset,
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
VX_gpr_req_if gpr_req_if,
// outputs
output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
VX_gpr_rsp_if gpr_rsp_if
);
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data, rsp_rs3_data;
reg rsp_valid;
reg [31:0] rsp_pc;
reg [`NW_BITS-1:0] rsp_wid;
reg read_rs1;
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && read_rs1;
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
rsp_pc <= 0;
rsp_rs1_data <= 0;
rsp_rs2_data <= 0;
rsp_rs3_data <= 0;
rsp_wid <= 0;
read_rs1 <= 1;
end else begin
if (rs3_delay) begin
read_rs1 <= 0;
rsp_wid <= gpr_req_if.wid;
end else if (read_fire) begin
read_rs1 <= 1;
end
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
if (read_rs1) begin
rsp_rs1_data <= rs1_data;
end
rsp_rs2_data <= rs2_data;
rsp_rs3_data <= rs1_data;
assert(read_rs1 || rsp_wid == gpr_req_if.wid);
end
end
always @(posedge clk) begin
end
// outputs
wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3;
assign raddr1 = {gpr_req_if.wid, rs1};
assign gpr_req_if.ready = ~rs3_delay;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
assign gpr_rsp_if.rs3_data = rsp_rs3_data;
endmodule

View File

@@ -1,35 +1,30 @@
`include "VX_define.vh"
module VX_gpr_ram (
input wire clk,
input wire reset,
input wire write_ce,
VX_gpr_read_if gpr_read_if,
VX_wb_if writeback_if,
input wire clk,
input wire [`NUM_THREADS-1:0] we,
input wire [`NW_BITS+`NR_BITS-1:0] waddr,
input wire [`NUM_THREADS-1:0][31:0] wdata,
input wire [`NW_BITS+`NR_BITS-1:0] rs1,
input wire [`NW_BITS+`NR_BITS-1:0] rs2,
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
output wire [`NUM_THREADS-1:0][31:0] rs2_data
);
`ifndef ASIC
output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data,
output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data
);
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_unqual;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_unqual;
reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0];
assign a_reg_data = (gpr_read_if.rs1 != 0) ? a_reg_data_unqual : 0;
assign b_reg_data = (gpr_read_if.rs2 != 0) ? b_reg_data_unqual : 0;
wire [`NUM_THREADS-1:0] write_enable = writeback_if.valid & {`NUM_THREADS{write_ce && (writeback_if.wb != 0)}};
`ifndef ASIC
`UNUSED_VAR(reset)
reg [`NUM_THREADS-1:0][3:0][7:0] ram[31:0];
wire [4:0] waddr = writeback_if.rd;
wire [`NUM_THREADS-1:0][31:0] wdata = writeback_if.data;
initial begin // initialize ram: set r0 = 0
for (integer j = 0; j < `NUM_WARPS; j++) begin
for (integer i = 0; i < `NUM_REGS; i++) begin
ram[j * `NUM_REGS + i] = (i == 0) ? {`NUM_THREADS{32'h0}} : {`NUM_THREADS{32'hx}};
end
end
end
genvar i;
for (i = 0; i < `NUM_THREADS; i++) begin
always @(posedge clk) begin
if (write_enable[i]) begin
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (we[i]) begin
ram[waddr][i][0] <= wdata[i][07:00];
ram[waddr][i][1] <= wdata[i][15:08];
ram[waddr][i][2] <= wdata[i][23:16];
@@ -38,44 +33,36 @@ module VX_gpr_ram (
end
end
assign a_reg_data_unqual = ram[gpr_read_if.rs1];
assign b_reg_data_unqual = ram[gpr_read_if.rs2];
assign rs1_data = ram[rs1];
assign rs2_data = ram[rs2];
`else
wire going_to_write = write_enable & (| writeback_if.wb_valid);
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
genvar i;
for (i = 0; i < `NUM_THREADS; i++) begin
wire local_write = write_enable & writeback_if.wb_valid[i];
assign write_bit_mask[i] = {`NUM_GPRS{~local_write}};
for (integer i = 0; i < `NUM_THREADS; i++) begin
assign write_bit_mask[i] = {32{~we[i]}};
end
wire cenb = 0;
wire cena_1 = 0;
wire cena_2 = 0;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_a;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_b;
wire [`NUM_THREADS-1:0][31:0] tmp_a;
wire [`NUM_THREADS-1:0][31:0] tmp_b;
`ifndef SYNTHESIS
genvar j;
for (i = 0; i < `NUM_THREADS; i++) begin
for (j = 0; j < `NUM_GPRS; j++) begin
assign a_reg_data_unqual[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
assign b_reg_data_unqual[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
for (integer i = 0; i < `NUM_THREADS; i++) begin
for (integer j = 0; j < 32; j++) begin
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
end
end
`else
assign a_reg_data_unqual = tmp_a;
assign b_reg_data_unqual = tmp_b;
assign rs1_data = tmp_a;
assign rs2_data = tmp_b;
`endif
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = writeback_if.write_data;
for (i = 0; i < 'NT; i=i+4)
begin
for (integer i = 0; i < 'NT; i=i+4) begin
`IGNORE_WARNINGS_BEGIN
rf2_32x128_wm1 first_ram (
.CENYA(),
@@ -88,12 +75,12 @@ module VX_gpr_ram (
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(gpr_read_if.rs1[(i+3):(i)]),
.AA(rs1[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(writeback_if.rd[(i+3):(i)]),
.DB(to_write[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
@@ -125,12 +112,12 @@ module VX_gpr_ram (
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(gpr_read_if.rs2[(i+3):(i)]),
.AA(rs2[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(writeback_if.rd[(i+3):(i)]),
.DB(to_write[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),

View File

@@ -1,226 +1,84 @@
`include "VX_define.vh"
module VX_gpr_stage (
input wire clk,
input wire reset,
input wire schedule_delay,
module VX_gpr_stage #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire memory_delay,
input wire exec_delay,
input wire stall_gpr_csr,
output wire delay,
// inputs
VX_writeback_if writeback_if,
VX_gpr_req_if gpr_req_if,
// decodee inputs
VX_backend_req_if bckE_req_if,
// WriteBack inputs
VX_wb_if writeback_if,
// Outputs
VX_exec_unit_req_if exec_unit_req_if,
VX_lsu_req_if lsu_req_if,
VX_gpu_inst_req_if gpu_inst_req_if,
VX_csr_req_if csr_req_if
// outputs
VX_gpr_rsp_if gpr_rsp_if
);
`DEBUG_BEGIN
wire[31:0] curr_PC = bckE_req_if.curr_PC;
wire[2:0] branchType = bckE_req_if.branch_type;
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
wire is_jal = bckE_req_if.is_jal;
`DEBUG_END
`UNUSED_VAR (reset)
assign csr_req_if.is_io = 1'b0; // GPR only issues csr requests coming from core
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NW_BITS+`NR_BITS-1:0] raddr1;
VX_gpr_read_if gpr_read_if();
assign gpr_read_if.rs1 = bckE_req_if.rs1;
assign gpr_read_if.rs2 = bckE_req_if.rs2;
assign gpr_read_if.warp_num = bckE_req_if.warp_num;
VX_gpr_ram gpr_ram (
.clk (clk),
.we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.tmask),
.waddr ({writeback_if.wid, writeback_if.rd}),
.wdata (writeback_if.data),
.rs1 (raddr1),
.rs2 ({gpr_req_if.wid, gpr_req_if.rs2}),
.rs1_data (rs1_data),
.rs2_data (rs2_data)
);
`ifndef ASIC
assign gpr_read_if.is_jal = bckE_req_if.is_jal;
assign gpr_read_if.curr_PC = bckE_req_if.curr_PC;
`else
assign gpr_read_if.is_jal = exec_unit_req_if.is_jal;
assign gpr_read_if.curr_PC = exec_unit_req_if.curr_PC;
`ifdef EXT_F_ENABLE
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
.clk (clk),
.reset (reset),
.rs1_data (rs1_data),
.rs2_data (rs2_data),
.raddr1 (raddr1),
.gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if)
);
`else
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data;
reg rsp_valid;
reg [`NW_BITS-1:0] rsp_wid;
reg [31:0] rsp_pc;
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
rsp_wid <= 0;
rsp_pc <= 0;
rsp_rs1_data <= 0;
rsp_rs2_data <= 0;
end else begin
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
rsp_rs1_data <= rs1_data;
rsp_rs2_data <= rs2_data;
end
end
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign gpr_req_if.ready = 1;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
assign gpr_rsp_if.rs3_data = 0;
`UNUSED_VAR (gpr_req_if.valid);
`UNUSED_VAR (gpr_req_if.rs3);
`UNUSED_VAR (gpr_req_if.use_rs3);
`UNUSED_VAR (gpr_rsp_if.ready);
`endif
VX_gpr_wrapper grp_wrapper (
.clk (clk),
.reset (reset),
.writeback_if (writeback_if),
.gpr_read_if (gpr_read_if)
);
assign writeback_if.ready = 1'b1;
// Outputs
VX_exec_unit_req_if exec_unit_req_temp_if();
VX_lsu_req_if lsu_req_temp_if();
VX_gpu_inst_req_if gpu_inst_req_temp_if();
VX_csr_req_if csr_req_temp_if();
VX_inst_multiplex inst_mult(
.bckE_req_if (bckE_req_if),
.gpr_read_if (gpr_read_if),
.exec_unit_req_if (exec_unit_req_temp_if),
.lsu_req_if (lsu_req_temp_if),
.gpu_inst_req_if (gpu_inst_req_temp_if),
.csr_req_if (csr_req_temp_if)
);
`DEBUG_BEGIN
wire is_lsu = (| lsu_req_temp_if.valid);
`DEBUG_END
wire stall_rest = 0;
wire flush_rest = schedule_delay;
wire stall_lsu = memory_delay;
wire flush_lsu = schedule_delay && !stall_lsu;
wire stall_exec = exec_delay;
wire flush_exec = schedule_delay && !stall_exec;
wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (| bckE_req_if.valid);
assign delay = stall_lsu || stall_exec || stall_csr;
`ifdef ASIC
wire delayed_lsu_last_cycle;
VX_generic_register #(
.N(1)
) delayed_reg (
.clk (clk),
.reset (reset),
.stall (stall_rest),
.flush (stall_rest),
.in (stall_lsu),
.out (delayed_lsu_last_cycle),
`UNUSED_PIN (size)
);
wire [`NUM_THREADS-1:0][31:0] temp_store_data;
wire [`NUM_THREADS-1:0][31:0] temp_base_addr; // A reg data
wire [`NUM_THREADS-1:0][31:0] real_store_data;
wire [`NUM_THREADS-1:0][31:0] real_base_addr; // A reg data
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
VX_generic_register #(
.N(`NUM_THREADS*32*2)
) lsu_data (
.clk (clk),
.reset (reset),
.stall (!store_curr_real),
.flush (stall_rest),
.in ({real_store_data, real_base_addr}),
.out ({temp_store_data, temp_base_addr})
);
assign real_store_data = lsu_req_temp_if.store_data;
assign real_base_addr = lsu_req_temp_if.base_addr;
assign lsu_req_if.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
assign lsu_req_if.base_addr = (delayed_lsu_last_cycle) ? temp_base_addr : real_base_addr;
VX_generic_register #(
.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))
) lsu_reg (
.clk (clk),
.reset (reset),
.stall (stall_lsu),
.flush (flush_lsu),
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.curr_PC, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.curr_PC ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
);
VX_generic_register #(
.N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS))
) exec_unit_reg (
.clk (clk),
.reset (reset),
.stall (stall_exec),
.flush (flush_exec),
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.next_PC, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.is_jal, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_addr, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.next_PC , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.is_jal , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_addr , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
);
assign exec_unit_req_if.a_reg_data = real_base_addr;
assign exec_unit_req_if.b_reg_data = real_store_data;
VX_generic_register #(
.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))
) gpu_inst_reg (
.clk (clk),
.reset (reset),
.stall (stall_rest),
.flush (flush_rest),
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.next_PC}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.next_PC })
);
assign gpu_inst_req_if.a_reg_data = real_base_addr;
assign gpu_inst_req_if.rd2 = real_store_data;
VX_generic_register #(
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
) csr_reg (
.clk (clk),
.reset (reset),
.stall (stall_gpr_csr),
.flush (flush_rest),
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_addr, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_addr , csr_req_if.csr_immed , csr_req_if.csr_mask })
);
`else
// 341
VX_generic_register #(
.N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))
) lsu_reg (
.clk (clk),
.reset (reset),
.stall (stall_lsu),
.flush (flush_lsu),
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.curr_PC, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_addr, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.curr_PC , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_addr , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
);
VX_generic_register #(
.N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))
) exec_unit_reg (
.clk (clk),
.reset (reset),
.stall (stall_exec),
.flush (flush_exec),
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.next_PC, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.is_jal, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_addr, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.next_PC , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.is_jal , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_addr , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
);
VX_generic_register #(
.N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS))
) gpu_inst_reg (
.clk (clk),
.reset (reset),
.stall (stall_rest),
.flush (flush_rest),
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.next_PC, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.next_PC , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 })
);
VX_generic_register #(
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
) csr_reg (
.clk (clk),
.reset (reset),
.stall (stall_gpr_csr),
.flush (flush_rest),
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_addr, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_addr , csr_req_if.csr_immed , csr_req_if.csr_mask })
);
`endif
endmodule : VX_gpr_stage
endmodule

View File

@@ -1,60 +0,0 @@
`include "VX_define.vh"
module VX_gpr_wrapper (
input wire clk,
input wire reset,
VX_wb_if writeback_if,
VX_gpr_read_if gpr_read_if
);
wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_a_reg_data;
wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_b_reg_data;
wire [`NUM_THREADS-1:0][31:0] jal_data;
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i++) begin : jal_data_assign
assign jal_data[i] = gpr_read_if.curr_PC;
end
endgenerate
`ifndef ASIC
assign gpr_read_if.a_reg_data = gpr_read_if.is_jal ? jal_data : tmp_a_reg_data[gpr_read_if.warp_num];
assign gpr_read_if.b_reg_data = tmp_b_reg_data[gpr_read_if.warp_num];
`else
wire [`NW_BITS-1:0] old_warp_num;
VX_generic_register #(
.N(`NW_BITS-1+1)
) store_wn (
.clk (clk),
.reset (reset),
.stall (1'b0),
.flush (1'b0),
.in (gpr_read_if.warp_num),
.out (old_warp_num)
);
assign gpr_read_if.a_reg_data = gpr_jal_if.is_jal ? jal_data : tmp_a_reg_data[old_warp_num];
assign gpr_read_if.b_reg_data = tmp_b_reg_data[old_warp_num];
`endif
generate
for (i = 0; i < `NUM_WARPS; i++) begin : warp_gprs
wire write_ce = (i == writeback_if.warp_num);
VX_gpr_ram gpr_ram(
.clk (clk),
.reset (reset),
.write_ce (write_ce),
.gpr_read_if (gpr_read_if),
.writeback_if (writeback_if),
.a_reg_data (tmp_a_reg_data[i]),
.b_reg_data (tmp_b_reg_data[i])
);
end
endgenerate
endmodule

View File

@@ -1,88 +0,0 @@
`include "VX_define.vh"
module VX_gpu_inst (
// Input
VX_gpu_inst_req_if gpu_inst_req_if,
// Output
VX_warp_ctl_if warp_ctl_if
);
wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid;
wire is_split = gpu_inst_req_if.is_split;
wire[`NUM_THREADS-1:0] tmc_new_mask;
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i++) begin : tmc_new_mask_init
assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0];
end
endgenerate
wire valid_inst = (| curr_valids);
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
assign warp_ctl_if.change_mask = gpu_inst_req_if.is_tmc && valid_inst;
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (0 == warp_ctl_if.thread_mask);
wire wspawn = gpu_inst_req_if.is_wspawn && valid_inst;
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
wire[`NUM_WARPS-1:0] wspawn_new_active;
generate
for (i = 0; i < `NUM_WARPS; i++) begin : wspawn_new_active_init
assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0];
end
endgenerate
assign warp_ctl_if.is_barrier = gpu_inst_req_if.is_barrier && valid_inst;
assign warp_ctl_if.barrier_id = gpu_inst_req_if.a_reg_data[0];
`DEBUG_BEGIN
wire[31:0] num_warps_m1 = gpu_inst_req_if.rd2 - 1;
`DEBUG_END
assign warp_ctl_if.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
assign warp_ctl_if.wspawn = wspawn;
assign warp_ctl_if.wspawn_pc = wspawn_pc;
assign warp_ctl_if.wspawn_new_active = wspawn_new_active;
wire[`NUM_THREADS-1:0] split_new_use_mask;
wire[`NUM_THREADS-1:0] split_new_later_mask;
generate
for (i = 0; i < `NUM_THREADS; i++) begin : masks_init
wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1);
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);
end
endgenerate
wire[$clog2(`NUM_THREADS):0] num_valids;
VX_countones #(
.N(`NUM_THREADS)
) valids_counter (
.valids(curr_valids),
.count (num_valids)
);
// wire[`NW_BITS-1:0] num_valids = $countones(curr_valids);
assign warp_ctl_if.is_split = is_split && (num_valids > 1);
assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
assign warp_ctl_if.split_new_mask = split_new_use_mask;
assign warp_ctl_if.split_later_mask = split_new_later_mask;
assign warp_ctl_if.split_save_pc = gpu_inst_req_if.next_PC;
assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num;
// gpu_inst_req_if.is_wspawn
// gpu_inst_req_if.is_split
// gpu_inst_req_if.is_barrier
endmodule

91
hw/rtl/VX_gpu_unit.v Normal file
View File

@@ -0,0 +1,91 @@
`include "VX_define.vh"
module VX_gpu_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// Input
VX_gpu_req_if gpu_req_if,
// Output
VX_warp_ctl_if warp_ctl_if,
VX_exu_to_cmt_if gpu_commit_if
);
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
gpu_tmc_t tmc;
gpu_wspawn_t wspawn;
gpu_barrier_t barrier;
gpu_split_t split;
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
// tmc
wire [`NUM_THREADS-1:0] tmc_new_mask;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
end
assign tmc.valid = is_tmc;
assign tmc.tmask = tmc_new_mask;
// wspawn
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
wire [`NUM_WARPS-1:0] wspawn_wmask;
for (genvar i = 0; i < `NUM_WARPS; i++) begin
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
end
assign wspawn.valid = is_wspawn;
assign wspawn.wmask = wspawn_wmask;
assign wspawn.pc = wspawn_pc;
// split
wire [`NUM_THREADS-1:0] split_then_mask;
wire [`NUM_THREADS-1:0] split_else_mask;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire taken = gpu_req_if.rs1_data[i][0];
assign split_then_mask[i] = gpu_req_if.tmask[i] & taken;
assign split_else_mask[i] = gpu_req_if.tmask[i] & ~taken;
end
assign split.valid = is_split;
assign split.diverged = (| split_then_mask) && (| split_else_mask);
assign split.then_mask = split_then_mask;
assign split.else_mask = split_else_mask;
assign split.pc = gpu_req_if.next_PC;
// barrier
assign barrier.valid = is_bar;
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data - 1);
// output
assign warp_ctl_if.valid = gpu_req_if.valid && gpu_commit_if.ready;
assign warp_ctl_if.wid = gpu_commit_if.wid;
assign warp_ctl_if.tmc = tmc;
assign warp_ctl_if.wspawn = wspawn;
assign warp_ctl_if.split = split;
assign warp_ctl_if.barrier = barrier;
assign gpu_commit_if.valid = gpu_req_if.valid;
assign gpu_commit_if.wid = gpu_req_if.wid;
assign gpu_commit_if.tmask = gpu_req_if.tmask;
assign gpu_commit_if.PC = gpu_req_if.PC;
assign gpu_commit_if.rd = gpu_req_if.rd;
assign gpu_commit_if.wb = gpu_req_if.wb;
// can accept new request?
assign gpu_req_if.ready = gpu_commit_if.ready;
endmodule

212
hw/rtl/VX_ibuffer.v Normal file
View File

@@ -0,0 +1,212 @@
`include "VX_define.vh"
module VX_ibuffer #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// inputs
input wire freeze, // keep current warp
VX_decode_if ibuf_enq_if,
// outputs
output wire [`NW_BITS-1:0] deq_wid_next,
VX_decode_if ibuf_deq_if
);
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + 1 + `NUM_REGS;
localparam SIZE = `IBUF_SIZE;
localparam SIZEW = $clog2(SIZE+1);
localparam ADDRW = $clog2(SIZE);
localparam NWARPSW = $clog2(`NUM_WARPS+1);
`USE_FAST_BRAM reg [DATAW-1:0] entries [`NUM_WARPS-1:0][SIZE-1:0];
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
reg [ADDRW:0] rd_ptr_r [`NUM_WARPS-1:0];
reg [ADDRW:0] wr_ptr_r [`NUM_WARPS-1:0];
wire [`NUM_WARPS-1:0] q_full;
wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size;
wire [DATAW-1:0] q_data_in;
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready;
wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
wire writing = enq_fire && (i == ibuf_enq_if.wid);
wire reading = deq_fire && (i == ibuf_deq_if.wid);
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[i][ADDRW-1:0];
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[i][ADDRW-1:0];
always @(posedge clk) begin
if (reset) begin
rd_ptr_r[i] <= 0;
wr_ptr_r[i] <= 0;
size_r[i] <= 0;
end else begin
if (writing) begin
if ((0 == size_r[i]) || ((1 == size_r[i]) && reading)) begin
q_data_out[i] <= q_data_in;
end else begin
entries[i][wr_ptr_a] <= q_data_in;
wr_ptr_r[i] <= wr_ptr_r[i] + ADDRW'(1);
end
if (!reading) begin
size_r[i] <= size_r[i] + SIZEW'(1);
end
end
if (reading) begin
if (size_r[i] != 1) begin
q_data_out[i] <= q_data_prev[i];
rd_ptr_r[i] <= rd_ptr_r[i] + ADDRW'(1);
end
if (!writing) begin
size_r[i] <= size_r[i] - SIZEW'(1);
end
end
end
end
assign q_data_prev[i] = entries[i][rd_ptr_a];
assign q_full[i] = (size_r[i] == SIZE);
assign q_size[i] = size_r[i];
end
///////////////////////////////////////////////////////////////////////////
reg [`NUM_WARPS-1:0] valid_table, valid_table_n;
reg [`NUM_WARPS-1:0] schedule_table, schedule_table_n;
reg [NWARPSW-1:0] num_warps;
reg [`NW_BITS-1:0] deq_wid, deq_wid_n;
reg deq_valid, deq_valid_n;
reg [DATAW-1:0] deq_instr, deq_instr_n;
always @(*) begin
valid_table_n = valid_table;
if (deq_fire) begin
valid_table_n[deq_wid] = (q_size[deq_wid] != SIZEW'(1));
end
if (enq_fire) begin
valid_table_n[ibuf_enq_if.wid] = 1;
end
end
// schedule the next instruction to issue
// does round-robin scheduling when multiple warps are present
always @(*) begin
deq_valid_n = 0;
deq_wid_n = 'x;
deq_instr_n = 'x;
schedule_table_n = schedule_table;
if (0 == num_warps) begin
deq_valid_n = enq_fire;
deq_wid_n = ibuf_enq_if.wid;
deq_instr_n = q_data_in;
end else if ((1 == num_warps) || freeze) begin
deq_valid_n = (!deq_fire || (q_size[deq_wid] != SIZEW'(1))) || enq_fire;
deq_wid_n = (!deq_fire || (q_size[deq_wid] != SIZEW'(1))) ? deq_wid : ibuf_enq_if.wid;
deq_instr_n = deq_fire ? ((q_size[deq_wid] != SIZEW'(1)) ? q_data_prev[deq_wid] : q_data_in) : q_data_out[deq_wid];
end else begin
for (integer i = 0; i < `NUM_WARPS; i++) begin
if (schedule_table_n[i]) begin
deq_valid_n = 1;
deq_wid_n = `NW_BITS'(i);
deq_instr_n = q_data_out[i];
schedule_table_n[i] = 0;
break;
end
end
end
end
wire warp_added = enq_fire && (0 == q_size[ibuf_enq_if.wid]);
wire warp_removed = deq_fire && ~(enq_fire && ibuf_enq_if.wid == deq_wid) && ~(q_size[deq_wid] != SIZEW'(1));
always @(posedge clk) begin
if (reset) begin
valid_table <= 0;
schedule_table <= 0;
deq_valid <= 0;
num_warps <= 0;
end else begin
valid_table <= valid_table_n;
if ((| schedule_table_n)) begin
schedule_table <= schedule_table_n;
end else begin
schedule_table <= valid_table_n;
schedule_table[deq_wid_n] <= 0;
end
deq_valid <= deq_valid_n;
deq_wid <= deq_wid_n;
deq_instr <= deq_instr_n;
if (warp_added && !warp_removed) begin
num_warps <= num_warps + NWARPSW'(1);
end else if (warp_removed && !warp_added) begin
num_warps <= num_warps - NWARPSW'(1);
end
`ifdef VERILATOR
/*if (enq_fire || deq_fire || deq_valid) begin
$display("*** %t: cur=%b(%0d), nxt=%b(%0d), enq=%b(%0d), deq=%b(%0d), nw=%0d(%0d,%0d,%0d,%0d), sched=%b, sched_n=%b",
$time, deq_valid, deq_wid, deq_valid_n, deq_wid_n, enq_fire, ibuf_enq_if.wid, deq_fire, ibuf_deq_if.wid, num_warps, size_r[0], size_r[1], size_r[2], size_r[3], schedule_table, schedule_table_n);
end*/
begin // verify 'num_warps'
integer nw = 0;
for (integer i = 0; i < `NUM_WARPS; i++) begin
nw += 32'(q_size[i] != 0);
end
assert(nw == 32'(num_warps)) else $display("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw);
assert(~deq_valid || (q_size[deq_wid] != 0)) else $display("%t: error: invalid schedule: wid=%0d", $time, deq_wid);
assert(~deq_fire || (q_size[deq_wid] != 0)) else $display("%t: error: invalid dequeu: wid=%0d", $time, deq_wid);
end
`endif
end
end
assign deq_wid_next = deq_wid_n;
assign ibuf_enq_if.ready = ~q_full[ibuf_enq_if.wid];
assign q_data_in = {ibuf_enq_if.tmask,
ibuf_enq_if.PC,
ibuf_enq_if.ex_type,
ibuf_enq_if.op_type,
ibuf_enq_if.op_mod,
ibuf_enq_if.wb,
ibuf_enq_if.rd,
ibuf_enq_if.rs1,
ibuf_enq_if.rs2,
ibuf_enq_if.rs3,
ibuf_enq_if.imm,
ibuf_enq_if.rs1_is_PC,
ibuf_enq_if.rs2_is_imm,
ibuf_enq_if.use_rs3,
ibuf_enq_if.used_regs};
assign ibuf_deq_if.valid = deq_valid;
assign ibuf_deq_if.wid = deq_wid;
assign {ibuf_deq_if.tmask,
ibuf_deq_if.PC,
ibuf_deq_if.ex_type,
ibuf_deq_if.op_type,
ibuf_deq_if.op_mod,
ibuf_deq_if.wb,
ibuf_deq_if.rd,
ibuf_deq_if.rs1,
ibuf_deq_if.rs2,
ibuf_deq_if.rs3,
ibuf_deq_if.imm,
ibuf_deq_if.rs1_is_PC,
ibuf_deq_if.rs2_is_imm,
ibuf_deq_if.use_rs3,
ibuf_deq_if.used_regs} = deq_instr;
endmodule

View File

@@ -7,97 +7,77 @@ module VX_icache_stage #(
input wire clk,
input wire reset,
input wire total_freeze,
output wire icache_stage_delay,
output wire[`NW_BITS-1:0] icache_stage_wid,
output wire icache_stage_response,
VX_inst_meta_if fe_inst_meta_fi,
VX_inst_meta_if fe_inst_meta_id,
// Icache interface
VX_cache_core_req_if icache_req_if,
VX_cache_core_rsp_if icache_rsp_if
VX_cache_core_rsp_if icache_rsp_if,
// request
VX_ifetch_req_if ifetch_req_if,
// reponse
VX_ifetch_rsp_if ifetch_rsp_if
);
`UNUSED_VAR (reset)
reg [`NUM_THREADS-1:0] valid_threads [`NUM_WARPS-1:0];
reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
wire valid_inst = (| fe_inst_meta_fi.valid);
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
wire mrq_full;
wire mrq_push = icache_req_if.valid && icache_req_if.ready;
wire mrq_pop = icache_rsp_if.valid && icache_rsp_if.ready;
assign mrq_read_addr = icache_rsp_if.tag[0][`LOG2UP(`ICREQ_SIZE)-1:0];
VX_indexable_queue #(
.DATAW (`LOG2UP(`ICREQ_SIZE) + 32 + `NW_BITS),
.SIZE (`ICREQ_SIZE)
) mem_req_queue (
.clk (clk),
.reset (reset),
.write_data ({mrq_write_addr, fe_inst_meta_fi.curr_PC, fe_inst_meta_fi.warp_num}),
.write_addr (mrq_write_addr),
.push (mrq_push),
.full (mrq_full),
.pop (mrq_pop),
.read_addr (mrq_read_addr),
.read_data ({dbg_mrq_write_addr, fe_inst_meta_id.curr_PC, fe_inst_meta_id.warp_num}),
`UNUSED_PIN (empty)
);
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
wire [`NW_BITS-1:0] req_tag = ifetch_req_if.wid;
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
always @(posedge clk) begin
if (mrq_push) begin
valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
end
if (mrq_pop) begin
assert(mrq_read_addr == dbg_mrq_write_addr);
end
end
if (icache_req_fire) begin
rsp_PC_buf[req_tag] <= ifetch_req_if.PC;
rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask;
end
end
// Icache Request
assign icache_req_if.valid = valid_inst && !mrq_full;
assign icache_req_if.valid = ifetch_req_if.valid;
assign icache_req_if.rw = 0;
assign icache_req_if.byteen = 4'b1111;
assign icache_req_if.addr = fe_inst_meta_fi.curr_PC[31:2];
assign icache_req_if.addr = ifetch_req_if.PC[31:2];
assign icache_req_if.data = 0;
// Can't accept new request
assign icache_stage_delay = mrq_full || !icache_req_if.ready;
// Can accept new request?
assign ifetch_req_if.ready = icache_req_if.ready;
`ifdef DBG_CORE_REQ_INFO
assign icache_req_if.tag = {fe_inst_meta_fi.curr_PC, 2'b1, 5'b0, fe_inst_meta_fi.warp_num, mrq_write_addr};
assign icache_req_if.tag = {ifetch_req_if.PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag};
`else
assign icache_req_if.tag = mrq_write_addr;
assign icache_req_if.tag = req_tag;
`endif
assign fe_inst_meta_id.instruction = icache_rsp_if.valid ? icache_rsp_if.data[0] : 0;
assign fe_inst_meta_id.valid = icache_rsp_if.valid ? valid_threads[fe_inst_meta_id.warp_num] : 0;
assign icache_stage_response = mrq_pop;
assign icache_stage_wid = fe_inst_meta_id.warp_num;
assign ifetch_rsp_if.valid = icache_rsp_if.valid;
assign ifetch_rsp_if.wid = rsp_tag;
assign ifetch_rsp_if.tmask = rsp_tmask_buf[rsp_tag];
assign ifetch_rsp_if.PC = rsp_PC_buf[rsp_tag];
assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
// Can't accept new response
assign icache_rsp_if.ready = !total_freeze;
// Can accept new response?
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
`SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.valid);
`SCOPE_ASSIGN(scope_icache_req_warp_num, fe_inst_meta_fi.warp_num);
`SCOPE_ASSIGN(scope_icache_req_addr, {icache_req_if.addr, 2'b0});
`SCOPE_ASSIGN(scope_icache_req_tag, icache_req_if.tag);
`SCOPE_ASSIGN(scope_icache_req_ready, icache_req_if.ready);
`SCOPE_ASSIGN (scope_icache_req_valid, icache_req_if.valid);
`SCOPE_ASSIGN (scope_icache_req_wid, ifetch_req_if.wid);
`SCOPE_ASSIGN (scope_icache_req_addr, {icache_req_if.addr, 2'b0});
`SCOPE_ASSIGN (scope_icache_req_tag, req_tag);
`SCOPE_ASSIGN (scope_icache_req_ready, icache_req_if.ready);
`SCOPE_ASSIGN(scope_icache_rsp_valid, icache_rsp_if.valid);
`SCOPE_ASSIGN(scope_icache_rsp_data, icache_rsp_if.data);
`SCOPE_ASSIGN(scope_icache_rsp_tag, icache_rsp_if.tag);
`SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_if.ready);
`SCOPE_ASSIGN (scope_icache_rsp_valid, icache_rsp_if.valid);
`SCOPE_ASSIGN (scope_icache_rsp_data, icache_rsp_if.data);
`SCOPE_ASSIGN (scope_icache_rsp_tag, rsp_tag);
`SCOPE_ASSIGN (scope_icache_rsp_ready, icache_rsp_if.ready);
`ifdef DBG_PRINT_CORE_ICACHE
always @(posedge clk) begin
if (icache_req_if.valid && icache_req_if.ready) begin
$display("%t: I%0d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, fe_inst_meta_fi.curr_PC, fe_inst_meta_fi.warp_num);
$display("%t: I$%0d req: wid=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC);
end
if (icache_rsp_if.valid && icache_rsp_if.ready) begin
$display("%t: I%0d$ rsp: tag=%0h, pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, mrq_read_addr, fe_inst_meta_id.curr_PC, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
$display("%t: I$%0d rsp: wid=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.instr);
end
end
`endif

View File

@@ -1,90 +0,0 @@
`include "VX_define.vh"
module VX_inst_multiplex (
// Inputs
VX_backend_req_if bckE_req_if,
VX_gpr_read_if gpr_read_if,
// Outputs
VX_exec_unit_req_if exec_unit_req_if,
VX_lsu_req_if lsu_req_if,
VX_gpu_inst_req_if gpu_inst_req_if,
VX_csr_req_if csr_req_if
);
wire[`NUM_THREADS-1:0] is_mem_mask;
wire[`NUM_THREADS-1:0] is_gpu_mask;
wire[`NUM_THREADS-1:0] is_csr_mask;
wire is_mem = (bckE_req_if.mem_write != `BYTE_EN_NO) || (bckE_req_if.mem_read != `BYTE_EN_NO);
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
wire is_csr = bckE_req_if.is_csr;
// wire is_gpu = 0;
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i++) begin : mask_init
assign is_mem_mask[i] = is_mem;
assign is_gpu_mask[i] = is_gpu;
assign is_csr_mask[i] = is_csr;
end
endgenerate
// LSU Unit
assign lsu_req_if.valid = bckE_req_if.valid & is_mem_mask;
assign lsu_req_if.warp_num = bckE_req_if.warp_num;
assign lsu_req_if.base_addr = gpr_read_if.a_reg_data;
assign lsu_req_if.store_data = gpr_read_if.b_reg_data;
assign lsu_req_if.offset = bckE_req_if.itype_immed;
assign lsu_req_if.mem_read = bckE_req_if.mem_read;
assign lsu_req_if.mem_write = bckE_req_if.mem_write;
assign lsu_req_if.rd = bckE_req_if.rd;
assign lsu_req_if.wb = bckE_req_if.wb;
assign lsu_req_if.curr_PC = bckE_req_if.curr_PC;
// Execute Unit
assign exec_unit_req_if.valid = bckE_req_if.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
assign exec_unit_req_if.warp_num = bckE_req_if.warp_num;
assign exec_unit_req_if.curr_PC = bckE_req_if.curr_PC;
assign exec_unit_req_if.next_PC = bckE_req_if.next_PC;
assign exec_unit_req_if.rd = bckE_req_if.rd;
assign exec_unit_req_if.wb = bckE_req_if.wb;
assign exec_unit_req_if.a_reg_data = gpr_read_if.a_reg_data;
assign exec_unit_req_if.b_reg_data = gpr_read_if.b_reg_data;
assign exec_unit_req_if.alu_op = bckE_req_if.alu_op;
assign exec_unit_req_if.rs1 = bckE_req_if.rs1;
assign exec_unit_req_if.rs2 = bckE_req_if.rs2;
assign exec_unit_req_if.rs2_src = bckE_req_if.rs2_src;
assign exec_unit_req_if.itype_immed = bckE_req_if.itype_immed;
assign exec_unit_req_if.upper_immed = bckE_req_if.upper_immed;
assign exec_unit_req_if.branch_type = bckE_req_if.branch_type;
assign exec_unit_req_if.is_jal = bckE_req_if.is_jal;
assign exec_unit_req_if.jal = bckE_req_if.jal;
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
assign exec_unit_req_if.is_etype = bckE_req_if.is_etype;
// GPR Req
assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask;
assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num;
assign gpu_inst_req_if.is_wspawn = bckE_req_if.is_wspawn;
assign gpu_inst_req_if.is_tmc = bckE_req_if.is_tmc;
assign gpu_inst_req_if.is_split = bckE_req_if.is_split;
assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier;
assign gpu_inst_req_if.a_reg_data = gpr_read_if.a_reg_data;
assign gpu_inst_req_if.rd2 = gpr_read_if.b_reg_data[0];
assign gpu_inst_req_if.next_PC = bckE_req_if.next_PC;
// CSR Req
assign csr_req_if.valid = bckE_req_if.valid & is_csr_mask;
assign csr_req_if.warp_num = bckE_req_if.warp_num;
assign csr_req_if.rd = bckE_req_if.rd;
assign csr_req_if.wb = bckE_req_if.wb;
assign csr_req_if.alu_op = bckE_req_if.alu_op;
assign csr_req_if.is_csr = bckE_req_if.is_csr;
assign csr_req_if.csr_addr = bckE_req_if.csr_addr;
assign csr_req_if.csr_immed = bckE_req_if.csr_immed;
assign csr_req_if.csr_mask = bckE_req_if.csr_mask;
endmodule

237
hw/rtl/VX_instr_demux.v Normal file
View File

@@ -0,0 +1,237 @@
`include "VX_define.vh"
module VX_instr_demux (
input wire clk,
input wire reset,
// inputs
VX_decode_if execute_if,
VX_gpr_rsp_if gpr_rsp_if,
VX_csr_to_issue_if csr_to_issue_if,
// outputs
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
wire [`NT_BITS-1:0] tid;
VX_priority_encoder #(
.N(`NUM_THREADS)
) tid_select (
.data_in (execute_if.tmask),
.data_out (tid),
`UNUSED_PIN (valid_out)
);
wire [31:0] next_PC = execute_if.PC + 4;
// ALU unit
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
wire alu_req_ready;
wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS)
) alu_reg (
.clk (clk),
.reset (reset),
.ready_in (alu_req_ready),
.valid_in (alu_req_valid),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}),
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid}),
.ready_out (alu_req_if.ready),
.valid_out (alu_req_if.valid)
);
VX_gpr_bypass #(
.DATAW (2 * `NUM_THREADS * 32),
.PASSTHRU (1) // ALU has no back-pressure, bypass not needed
) alu_bypass (
.clk (clk),
.reset (reset),
.push (alu_req_valid && alu_req_ready),
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.rs1_data, alu_req_if.rs2_data}),
.pop (alu_req_if.valid && alu_req_if.ready)
);
// lsu unit
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
wire lsu_req_ready;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1)
) lsu_reg (
.clk (clk),
.reset (reset),
.ready_in (lsu_req_ready),
.valid_in (lsu_req_valid),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}),
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb}),
.ready_out (lsu_req_if.ready),
.valid_out (lsu_req_if.valid)
);
VX_gpr_bypass #(
.DATAW ((2 * `NUM_THREADS * 32))
) lsu_bypass (
.clk (clk),
.reset (reset),
.push (lsu_req_valid && lsu_req_ready),
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({lsu_req_if.base_addr, lsu_req_if.store_data}),
.pop (lsu_req_if.valid && lsu_req_if.ready)
);
// csr unit
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
wire csr_req_ready;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1)
) csr_reg (
.clk (clk),
.reset (reset),
.ready_in (csr_req_ready),
.valid_in (csr_req_valid),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}),
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io}),
.ready_out (csr_req_if.ready),
.valid_out (csr_req_if.valid)
);
reg tmp_rs2_is_imm;
reg [`NR_BITS-1:0] tmp_rs1;
always @(posedge clk) begin
tmp_rs2_is_imm <= execute_if.rs2_is_imm;
tmp_rs1 <= execute_if.rs1;
end
wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_rsp_if.rs1_data[0];
VX_gpr_bypass #(
.DATAW (32)
) csr_bypass (
.clk (clk),
.reset (reset),
.push (csr_req_valid && csr_req_ready),
.data_in (csr_req_mask),
.data_out (csr_req_if.csr_mask),
.pop (csr_req_if.valid && csr_req_if.ready)
);
// mul unit
`ifdef EXT_M_ENABLE
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
wire mul_req_ready;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1)
) mul_reg (
.clk (clk),
.reset (reset),
.ready_in (mul_req_ready),
.valid_in (mul_req_valid),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
.data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb}),
.ready_out (mul_req_if.ready),
.valid_out (mul_req_if.valid)
);
VX_gpr_bypass #(
.DATAW ((2 * `NUM_THREADS * 32))
) mul_bypass (
.clk (clk),
.reset (reset),
.push (mul_req_valid && mul_req_ready),
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({mul_req_if.rs1_data, mul_req_if.rs2_data}),
.pop (mul_req_if.valid && mul_req_if.ready)
);
`endif
// fpu unit
`ifdef EXT_F_ENABLE
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
wire fpu_req_ready;
// resolve dynamic FRM
assign csr_to_issue_if.wid = execute_if.wid;
wire [`FRM_BITS-1:0] fpu_frm = (execute_if.op_mod == `FRM_DYN) ? csr_to_issue_if.frm : execute_if.op_mod;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `FRM_BITS + `NR_BITS + 1)
) fpu_reg (
.clk (clk),
.reset (reset),
.ready_in (fpu_req_ready),
.valid_in (fpu_req_valid),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), fpu_frm, execute_if.rd, execute_if.wb}),
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.frm, fpu_req_if.rd, fpu_req_if.wb}),
.ready_out (fpu_req_if.ready),
.valid_out (fpu_req_if.valid)
);
VX_gpr_bypass #(
.DATAW ((3 * `NUM_THREADS * 32))
) fpu_bypass (
.clk (clk),
.reset (reset),
.push (fpu_req_valid && fpu_req_ready),
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
.pop (fpu_req_if.valid && fpu_req_if.ready)
);
`endif
// gpu unit
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
wire gpu_req_ready;
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1)
) gpu_reg (
.clk (clk),
.reset (reset),
.ready_in (gpu_req_ready),
.valid_in (gpu_req_valid),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb}),
.ready_out (gpu_req_if.ready),
.valid_out (gpu_req_if.valid)
);
VX_gpr_bypass #(
.DATAW ((`NUM_THREADS * 32) + 32)
) gpu_bypass (
.clk (clk),
.reset (reset),
.push (gpu_req_valid && gpu_req_ready),
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
.data_out ({gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
.pop (gpu_req_if.valid && gpu_req_if.ready)
);
// can take next request?
assign execute_if.ready = (alu_req_ready && (execute_if.ex_type == `EX_ALU))
|| (lsu_req_ready && (execute_if.ex_type == `EX_LSU))
|| (csr_req_ready && (execute_if.ex_type == `EX_CSR))
`ifdef EXT_M_ENABLE
|| (mul_req_ready && (execute_if.ex_type == `EX_MUL))
`endif
`ifdef EXT_F_ENABLE
|| (fpu_req_ready && (execute_if.ex_type == `EX_FPU))
`endif
|| (gpu_req_ready && (execute_if.ex_type == `EX_GPU));
endmodule

49
hw/rtl/VX_ipdom_stack.v Normal file
View File

@@ -0,0 +1,49 @@
`include "VX_platform.vh"
module VX_ipdom_stack #(
parameter WIDTH = 1,
parameter DEPTH = 1
) (
input wire clk,
input wire reset,
input wire [WIDTH - 1:0] q1,
input wire [WIDTH - 1:0] q2,
output wire [WIDTH - 1:0] d,
input wire push,
input wire pop,
output wire empty,
output wire full
);
localparam STACK_SIZE = 2 ** DEPTH;
`USE_FAST_BRAM reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
`USE_FAST_BRAM reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
`USE_FAST_BRAM reg is_part [0:STACK_SIZE-1];
reg [DEPTH-1:0] rd_ptr, wr_ptr;
always @(posedge clk) begin
if (reset) begin
wr_ptr <= 0;
end else begin
if (push) begin
stack_1[wr_ptr] <= q1;
stack_2[wr_ptr] <= q2;
is_part[wr_ptr] <= 0;
rd_ptr <= wr_ptr;
wr_ptr <= wr_ptr + DEPTH'(1);
end else if (pop) begin
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
is_part[rd_ptr] <= 1;
end
end
end
assign d = is_part[rd_ptr] ? stack_1[rd_ptr] : stack_2[rd_ptr];
assign empty = (0 == wr_ptr);
assign full = ((STACK_SIZE-1) == wr_ptr);
endmodule

159
hw/rtl/VX_issue.v Normal file
View File

@@ -0,0 +1,159 @@
`include "VX_define.vh"
module VX_issue #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISSUE_IO
input wire clk,
input wire reset,
VX_decode_if decode_if,
VX_writeback_if writeback_if,
VX_csr_to_issue_if csr_to_issue_if,
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
VX_decode_if ibuf_deq_if();
VX_decode_if execute_if();
VX_gpr_req_if gpr_req_if();
VX_gpr_rsp_if gpr_rsp_if();
wire scoreboard_delay;
wire [`NW_BITS-1:0] deq_wid_next;
VX_ibuffer #(
.CORE_ID(CORE_ID)
) ibuffer (
.clk (clk),
.reset (reset),
.freeze (~gpr_req_if.ready),
.ibuf_enq_if (decode_if),
.deq_wid_next (deq_wid_next),
.ibuf_deq_if (ibuf_deq_if)
);
VX_scoreboard #(
.CORE_ID(CORE_ID)
) scoreboard (
.clk (clk),
.reset (reset),
.ibuf_deq_if (ibuf_deq_if),
.writeback_if (writeback_if),
.deq_wid_next (deq_wid_next),
.exe_delay (~execute_if.ready),
.gpr_delay (~gpr_req_if.ready),
.delay (scoreboard_delay)
);
assign gpr_req_if.valid = ibuf_deq_if.valid && ~scoreboard_delay;
assign gpr_req_if.wid = ibuf_deq_if.wid;
assign gpr_req_if.PC = ibuf_deq_if.PC;
assign gpr_req_if.rs1 = ibuf_deq_if.rs1;
assign gpr_req_if.rs2 = ibuf_deq_if.rs2;
assign gpr_req_if.rs3 = ibuf_deq_if.rs3;
assign gpr_req_if.use_rs3 = ibuf_deq_if.use_rs3;
assign gpr_rsp_if.ready = execute_if.ready;
VX_gpr_stage #(
.CORE_ID(CORE_ID)
) gpr_stage (
.clk (clk),
.reset (reset),
.writeback_if (writeback_if),
.gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if)
);
`UNUSED_VAR (gpr_rsp_if.valid);
assign execute_if.valid = ibuf_deq_if.valid && gpr_req_if.ready && ~scoreboard_delay;
assign execute_if.wid = ibuf_deq_if.wid;
assign execute_if.tmask = ibuf_deq_if.tmask;
assign execute_if.PC = ibuf_deq_if.PC;
assign execute_if.ex_type = ibuf_deq_if.ex_type;
assign execute_if.op_type = ibuf_deq_if.op_type;
assign execute_if.op_mod = ibuf_deq_if.op_mod;
assign execute_if.wb = ibuf_deq_if.wb;
assign execute_if.rd = ibuf_deq_if.rd;
assign execute_if.rs1 = ibuf_deq_if.rs1;
assign execute_if.imm = ibuf_deq_if.imm;
assign execute_if.rs1_is_PC = ibuf_deq_if.rs1_is_PC;
assign execute_if.rs2_is_imm = ibuf_deq_if.rs2_is_imm;
VX_instr_demux instr_demux (
.clk (clk),
.reset (reset),
.execute_if (execute_if),
.gpr_rsp_if (gpr_rsp_if),
.csr_to_issue_if(csr_to_issue_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if)
);
`SCOPE_ASSIGN (scope_issue_valid, ibuf_deq_if.valid);
`SCOPE_ASSIGN (scope_issue_wid, ibuf_deq_if.wid);
`SCOPE_ASSIGN (scope_issue_tmask, ibuf_deq_if.tmask);
`SCOPE_ASSIGN (scope_issue_pc, ibuf_deq_if.PC);
`SCOPE_ASSIGN (scope_issue_ex_type, ibuf_deq_if.ex_type);
`SCOPE_ASSIGN (scope_issue_op_type, ibuf_deq_if.op_type);
`SCOPE_ASSIGN (scope_issue_op_mod, ibuf_deq_if.op_mod);
`SCOPE_ASSIGN (scope_issue_wb, ibuf_deq_if.wb);
`SCOPE_ASSIGN (scope_issue_rd, ibuf_deq_if.rd);
`SCOPE_ASSIGN (scope_issue_rs1, ibuf_deq_if.rs1);
`SCOPE_ASSIGN (scope_issue_rs2, ibuf_deq_if.rs2);
`SCOPE_ASSIGN (scope_issue_rs3, ibuf_deq_if.rs3);
`SCOPE_ASSIGN (scope_issue_imm, ibuf_deq_if.imm);
`SCOPE_ASSIGN (scope_issue_rs1_is_pc, ibuf_deq_if.rs1_is_PC);
`SCOPE_ASSIGN (scope_issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm);
`SCOPE_ASSIGN (scope_issue_ready, ibuf_deq_if.ready);
`SCOPE_ASSIGN (scope_scoreboard_delay, scoreboard_delay);
`SCOPE_ASSIGN (scope_gpr_delay, ~gpr_req_if.ready);
`SCOPE_ASSIGN (scope_execute_delay, ~execute_if.ready);
`SCOPE_ASSIGN (scope_gpr_rsp_valid, gpr_rsp_if.valid);
`SCOPE_ASSIGN (scope_gpr_rsp_wid, gpr_rsp_if.wid);
`SCOPE_ASSIGN (scope_gpr_rsp_pc, gpr_rsp_if.PC);
`SCOPE_ASSIGN (scope_gpr_rsp_a, gpr_rsp_if.rs1_data);
`SCOPE_ASSIGN (scope_gpr_rsp_b, gpr_rsp_if.rs2_data);
`SCOPE_ASSIGN (scope_gpr_rsp_c, gpr_rsp_if.rs3_data);
`SCOPE_ASSIGN (scope_writeback_valid, writeback_if.valid);
`SCOPE_ASSIGN (scope_writeback_wid, writeback_if.wid);
`SCOPE_ASSIGN (scope_writeback_pc, writeback_if.PC);
`SCOPE_ASSIGN (scope_writeback_rd, writeback_if.rd);
`SCOPE_ASSIGN (scope_writeback_data, writeback_if.data);
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (alu_req_if.valid && alu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rs1_data, alu_req_if.rs2_data);
end
if (lsu_req_if.valid && lsu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
end
if (csr_req_if.valid && csr_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.csr_addr, csr_req_if.csr_mask);
end
if (mul_req_if.valid && mul_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rs1_data, mul_req_if.rs2_data);
end
if (fpu_req_if.valid && fpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
end
if (gpu_req_if.valid && gpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
end
end
`endif
endmodule

View File

@@ -5,203 +5,217 @@ module VX_lsu_unit #(
) (
`SCOPE_SIGNALS_LSU_IO
input wire clk,
input wire reset,
input wire no_slot_mem,
VX_lsu_req_if lsu_req_if,
// Write back to GPR
VX_wb_if mem_wb_if,
input wire clk,
input wire reset,
// Dcache interface
VX_cache_core_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
output wire delay
// inputs
VX_lsu_req_if lsu_req_if,
// outputs
VX_exu_to_cmt_if lsu_commit_if
);
wire [`NUM_THREADS-1:0] req_tmask;
wire req_rw;
wire [`NUM_THREADS-1:0][29:0] req_addr;
wire [`NUM_THREADS-1:0][1:0] req_offset;
wire [`NUM_THREADS-1:0][3:0] req_byteen;
wire [`NUM_THREADS-1:0][31:0] req_data;
wire [1:0] req_sext;
wire [`NR_BITS-1:0] req_rd;
wire req_wb;
wire [`NW_BITS-1:0] req_wid;
wire [31:0] req_pc;
VX_wb_if mem_wb_unqual_if();
wire [`NUM_THREADS-1:0] use_valid;
wire use_req_rw;
wire [`NUM_THREADS-1:0][29:0] use_req_addr;
wire [`NUM_THREADS-1:0][1:0] use_req_offset;
wire [`NUM_THREADS-1:0][3:0] use_req_byteen;
wire [`NUM_THREADS-1:0][31:0] use_req_data;
wire [`BYTE_EN_BITS-1:0] use_mem_read;
wire [4:0] use_rd;
wire [`NW_BITS-1:0] use_warp_num;
wire [1:0] use_wb;
wire [31:0] use_pc;
genvar i;
// Generate Full Addresses
wire[`NUM_THREADS-1:0][31:0] full_address;
for (i = 0; i < `NUM_THREADS; i++) begin
wire [`NUM_THREADS-1:0][31:0] full_address;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
end
wire mem_req_rw = (lsu_req_if.mem_write != `BYTE_EN_NO);
reg [3:0] wmask;
reg [1:0] mem_req_sext;
always @(*) begin
case ((mem_req_rw ? lsu_req_if.mem_write[1:0] : lsu_req_if.mem_read[1:0]))
0: wmask = 4'b0001;
1: wmask = 4'b0011;
default : wmask = 4'b1111;
case (lsu_req_if.byteen)
`BYTEEN_SB: mem_req_sext = 2'h1;
`BYTEEN_SH: mem_req_sext = 2'h2;
default: mem_req_sext = 2'h0;
endcase
end
wire [`NUM_THREADS-1:0][29:0] mem_req_addr;
wire [`NUM_THREADS-1:0][1:0] mem_req_offset;
wire [`NUM_THREADS-1:0][3:0] mem_req_byteen;
wire [`NUM_THREADS-1:0][3:0] mem_req_byteen;
wire [`NUM_THREADS-1:0][31:0] mem_req_data;
for (i = 0; i < `NUM_THREADS; i++) begin
reg [3:0] wmask;
always @(*) begin
case (`BYTEEN_TYPE(lsu_req_if.byteen))
0: wmask = 4'b0001;
1: wmask = 4'b0011;
default: wmask = 4'b1111;
endcase
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign mem_req_addr[i] = full_address[i][31:2];
assign mem_req_offset[i] = full_address[i][1:0];
assign mem_req_byteen[i] = wmask << full_address[i][1:0];
assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0};
end
end
`IGNORE_WARNINGS_BEGIN
wire[`NUM_THREADS-1:0][31:0] use_address;
wire [`NUM_THREADS-1:0][31:0] req_address;
`IGNORE_WARNINGS_END
wire valid_in;
wire stall_in;
VX_generic_register #(
.N((`NUM_THREADS * 1) + (`NUM_THREADS * 32) + `BYTE_EN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + 5 + `NW_BITS + 2 + 32)
) lsu_buffer (
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32)))
) lsu_req_reg (
.clk (clk),
.reset (reset),
.stall (delay),
.stall (stall_in),
.flush (1'b0),
.in ({lsu_req_if.valid, full_address, lsu_req_if.mem_read, mem_req_rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.curr_PC}),
.out ({use_valid , use_address, use_mem_read , use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd , use_warp_num , use_wb , use_pc})
.in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}),
.out ({valid_in, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
);
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
wire [`BYTE_EN_BITS-1:0] core_rsp_mem_read;
wire [`NW_BITS-1:0] rsp_wid;
wire [31:0] rsp_pc;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire [`NUM_THREADS-1:0][1:0] rsp_offset;
wire [1:0] rsp_sext;
reg [`NUM_THREADS-1:0][31:0] rsp_data;
reg [`NUM_THREADS-1:0] mem_rsp_mask[`DCREQ_SIZE-1:0];
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask;
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
wire mrq_full;
wire [`DCORE_TAG_ID_BITS-1:0] req_tag, rsp_tag;
wire lsuq_full;
wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready
&& (0 == use_req_rw); // only push read requests
wire lsuq_push = (| dcache_req_if.valid) && dcache_req_if.ready
&& (0 == req_rw); // loads only
wire mrq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
wire lsuq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
assign mrq_read_addr = dcache_rsp_if.tag[0][`LOG2UP(`DCREQ_SIZE)-1:0];
assign rsp_tag = dcache_rsp_if.tag[0][`DCORE_TAG_ID_BITS-1:0];
wire [`NUM_THREADS-1:0] mem_rsp_mask_upd = mem_rsp_mask[mrq_read_addr] & ~dcache_rsp_if.valid;
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask[rsp_tag] & ~dcache_rsp_if.valid;
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd);
wire lsuq_pop = lsuq_pop_part && (0 == mem_rsp_mask_n);
VX_indexable_queue #(
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 2 + (`NUM_THREADS * 2) + `BYTE_EN_BITS + 5 + `NW_BITS),
.SIZE (`DCREQ_SIZE)
) mem_req_queue (
.clk (clk),
.reset (reset),
.write_data ({mrq_write_addr, use_pc, use_wb, use_req_offset, use_mem_read, use_rd, use_warp_num}),
.write_addr (mrq_write_addr),
.push (mrq_push),
.full (mrq_full),
.pop (mrq_pop),
.read_addr (mrq_read_addr),
.read_data ({dbg_mrq_write_addr, mem_wb_unqual_if.curr_PC, mem_wb_unqual_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_unqual_if.rd, mem_wb_unqual_if.warp_num}),
`UNUSED_PIN (empty)
VX_cam_buffer #(
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
.SIZE (`LSUQ_SIZE)
) lsu_cam (
.clk (clk),
.reset (reset),
.write_addr (req_tag),
.acquire_slot (lsuq_push),
.read_addr (rsp_tag),
.write_data ({req_wid, req_pc, req_rd, req_wb, req_offset, req_sext}),
.read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_offset, rsp_sext}),
.release_addr (rsp_tag),
.release_slot (lsuq_pop),
.full (lsuq_full)
);
always @(posedge clk) begin
if (mrq_push) begin
mem_rsp_mask[mrq_write_addr] <= use_valid;
if (lsuq_push) begin
mem_rsp_mask[req_tag] <= req_tmask;
end
if (mrq_pop_part) begin
mem_rsp_mask[mrq_read_addr] <= mem_rsp_mask_upd;
assert(($time < 2) || mrq_read_addr == dbg_mrq_write_addr);
if (lsuq_pop_part) begin
mem_rsp_mask[rsp_tag] <= mem_rsp_mask_n;
end
end
// Core Request
wire stall_out = ~lsu_commit_if.ready && lsu_commit_if.valid;
wire store_stall = valid_in && req_rw && stall_out;
assign dcache_req_if.valid = use_valid & {`NUM_THREADS{~mrq_full}};
assign dcache_req_if.rw = {`NUM_THREADS{use_req_rw}};
assign dcache_req_if.byteen = use_req_byteen;
assign dcache_req_if.addr = use_req_addr;
assign dcache_req_if.data = use_req_data;
// Core Request
assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~lsuq_full && ~store_stall}} & req_tmask;
assign dcache_req_if.rw = {`NUM_THREADS{req_rw}};
assign dcache_req_if.byteen = req_byteen;
assign dcache_req_if.addr = req_addr;
assign dcache_req_if.data = req_data;
`ifdef DBG_CORE_REQ_INFO
assign dcache_req_if.tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr};
assign dcache_req_if.tag = {req_pc, req_rd, req_wid, req_tag};
`else
assign dcache_req_if.tag = mrq_write_addr;
assign dcache_req_if.tag = req_tag;
`endif
// Can't accept new request
assign delay = mrq_full || !dcache_req_if.ready;
assign stall_in = ~dcache_req_if.ready || lsuq_full || store_stall;
// Core Response
// Can accept new request?
assign lsu_req_if.ready = ~stall_in;
reg [`NUM_THREADS-1:0][31:0] core_rsp_data;
wire [`NUM_THREADS-1:0][31:0] rsp_data_shifted;
for (i = 0; i < `NUM_THREADS; i++) begin
assign rsp_data_shifted[i] = dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0};
// Core Response
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] rsp_data_shifted = dcache_rsp_if.data[i] >> {rsp_offset[i], 3'b0};
always @(*) begin
case (core_rsp_mem_read)
`BYTE_EN_SB: core_rsp_data[i] = {{24{rsp_data_shifted[i][7]}}, rsp_data_shifted[i][7:0]};
`BYTE_EN_SH: core_rsp_data[i] = {{16{rsp_data_shifted[i][15]}}, rsp_data_shifted[i][15:0]};
`BYTE_EN_UB: core_rsp_data[i] = 32'(rsp_data_shifted[i][7:0]);
`BYTE_EN_UH: core_rsp_data[i] = 32'(rsp_data_shifted[i][15:0]);
default : core_rsp_data[i] = rsp_data_shifted[i];
case (rsp_sext)
1: rsp_data[i] = {{24{rsp_data_shifted[7]}}, rsp_data_shifted[7:0]};
2: rsp_data[i] = {{16{rsp_data_shifted[15]}}, rsp_data_shifted[15:0]};
default: rsp_data[i] = rsp_data_shifted;
endcase
end
end
end
assign mem_wb_unqual_if.valid = dcache_rsp_if.valid;
assign mem_wb_unqual_if.data = core_rsp_data;
wire is_store_req = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready;
wire is_load_rsp = (| dcache_rsp_if.valid);
// Can't accept new response
assign dcache_rsp_if.ready = !(no_slot_mem & (|mem_wb_if.valid));
wire mem_rsp_stall = is_load_rsp && is_store_req; // arbitration prioritizes stores
// From LSU to WB
localparam WB_REQ_SIZE = (`NUM_THREADS) + (`NUM_THREADS * 32) + (`NW_BITS) + (5) + (2) + 32;
VX_generic_register #(.N(WB_REQ_SIZE)) lsu_to_wb (
wire arb_valid = is_store_req || is_load_rsp;
wire [`NW_BITS-1:0] arb_wid = is_store_req ? req_wid : rsp_wid;
wire [`NUM_THREADS-1:0] arb_tmask = is_store_req ? req_tmask : dcache_rsp_if.valid;
wire [31:0] arb_PC = is_store_req ? req_pc : rsp_pc;
wire [`NR_BITS-1:0] arb_rd = is_store_req ? 0 : rsp_rd;
wire arb_wb = is_store_req ? 0 : rsp_wb;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
) lsu_rsp_reg (
.clk (clk),
.reset (reset),
.stall (no_slot_mem),
.stall (stall_out),
.flush (1'b0),
.in ({mem_wb_unqual_if.valid, mem_wb_unqual_if.data, mem_wb_unqual_if.warp_num, mem_wb_unqual_if.rd, mem_wb_unqual_if.wb, mem_wb_unqual_if.curr_PC}),
.out ({mem_wb_if.valid, mem_wb_if.data, mem_wb_if.warp_num, mem_wb_if.rd, mem_wb_if.wb, mem_wb_if.curr_PC})
.in ({arb_valid, arb_wid, arb_tmask, arb_PC, arb_rd, arb_wb, rsp_data}),
.out ({lsu_commit_if.valid, lsu_commit_if.wid, lsu_commit_if.tmask, lsu_commit_if.PC, lsu_commit_if.rd, lsu_commit_if.wb, lsu_commit_if.data})
);
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
`SCOPE_ASSIGN(scope_dcache_req_curr_PC, use_pc);
`SCOPE_ASSIGN(scope_dcache_req_addr, use_address);
`SCOPE_ASSIGN(scope_dcache_req_rw, core_req_rw);
`SCOPE_ASSIGN(scope_dcache_req_byteen,dcache_req_if.byteen);
`SCOPE_ASSIGN(scope_dcache_req_data, dcache_req_if.data);
`SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_if.tag);
`SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.ready);
// Can accept new cache response?
assign dcache_rsp_if.ready = ~(stall_out || mem_rsp_stall);
`SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_if.valid);
`SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.data);
`SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_if.tag);
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.ready);
// scope registration
`SCOPE_ASSIGN (scope_dcache_req_valid, dcache_req_if.valid);
`SCOPE_ASSIGN (scope_dcache_req_addr, req_address);
`SCOPE_ASSIGN (scope_dcache_req_rw, req_rw);
`SCOPE_ASSIGN (scope_dcache_req_byteen,dcache_req_if.byteen);
`SCOPE_ASSIGN (scope_dcache_req_data, dcache_req_if.data);
`SCOPE_ASSIGN (scope_dcache_req_tag, req_tag);
`SCOPE_ASSIGN (scope_dcache_req_ready, dcache_req_if.ready);
`SCOPE_ASSIGN (scope_dcache_req_wid, req_wid);
`SCOPE_ASSIGN (scope_dcache_req_pc, req_pc);
`SCOPE_ASSIGN (scope_dcache_rsp_valid, dcache_rsp_if.valid);
`SCOPE_ASSIGN (scope_dcache_rsp_data, dcache_rsp_if.data);
`SCOPE_ASSIGN (scope_dcache_rsp_tag, rsp_tag);
`SCOPE_ASSIGN (scope_dcache_rsp_ready, dcache_rsp_if.ready);
`ifdef DBG_PRINT_CORE_DCACHE
always @(posedge clk) begin
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
$display("%t: D%0d$ req: valid=%b, addr=%0h, tag=%0h, rw=%0b, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h",
$time, CORE_ID, use_valid, use_address, mrq_write_addr, use_req_rw, use_pc, use_rd, use_warp_num, use_req_byteen, use_req_data);
$display("%t: D$%0d req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, rd=%0d, rw=%0b, byteen=%0h, data=%0h",
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, req_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data);
end
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
$display("%t: D%0d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h",
$time, CORE_ID, mem_wb_unqual_if.valid, mrq_read_addr, mem_wb_unqual_if.curr_PC, mem_wb_unqual_if.rd, mem_wb_unqual_if.warp_num, mem_wb_unqual_if.data);
$display("%t: D$%0d rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data);
end
end
`endif

View File

@@ -83,15 +83,13 @@ module VX_mem_arb #(
assign out_mem_req_data = in_mem_req_data [bus_req_sel];
assign out_mem_req_tag = {in_mem_req_tag [bus_req_sel], REQS_BITS'(bus_req_sel)};
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign in_mem_req_ready[i] = out_mem_req_ready && (bus_req_sel == REQS_BITS'(i));
end
wire [REQS_BITS-1:0] bus_rsp_sel = out_mem_rsp_tag[REQS_BITS-1:0];
for (i = 0; i < NUM_REQUESTS; i++) begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign in_mem_rsp_valid[i] = out_mem_rsp_valid && (bus_rsp_sel == REQS_BITS'(i));
assign in_mem_rsp_data[i] = out_mem_rsp_data;
assign in_mem_rsp_tag[i] = out_mem_rsp_tag[REQS_BITS +: TAG_IN_WIDTH];

View File

@@ -40,18 +40,20 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS)
) core_dcache_rsp_qual_if(), core_smem_rsp_if();
// select shared memory address
wire is_smem_addr = (({core_dcache_req_if.addr[0], 2'b0} - `SHARED_MEM_BASE_ADDR) <= `SCACHE_SIZE);
wire smem_select = (| core_dcache_req_if.valid) ? is_smem_addr : 0;
// select shared memory bus
wire is_smem_addr = (({core_dcache_req_if.addr[0], 2'b0} - `SHARED_MEM_BASE_ADDR) <= `SCACHE_SIZE);
wire smem_req_select = (| core_dcache_req_if.valid) ? is_smem_addr : 0;
wire smem_rsp_select = (| core_smem_rsp_if.valid);
VX_dcache_arb dcache_smem_arb (
.req_select (smem_select),
.in_core_req_if (core_dcache_req_if),
.out0_core_req_if (core_dcache_req_qual_if),
.out1_core_req_if (core_smem_req_if),
.in0_core_rsp_if (core_dcache_rsp_qual_if),
.in1_core_rsp_if (core_smem_rsp_if),
.out_core_rsp_if (core_dcache_rsp_if)
VX_dcache_arb dcache_smem_arb (
.core_req_in_if (core_dcache_req_if),
.core_req_out0_if (core_dcache_req_qual_if),
.core_req_out1_if (core_smem_req_if),
.core_rsp_in0_if (core_dcache_rsp_qual_if),
.core_rsp_in1_if (core_smem_rsp_if),
.core_rsp_out_if (core_dcache_rsp_if),
.select_req (smem_req_select),
.select_rsp (smem_rsp_select)
);
VX_cache #(
@@ -61,7 +63,6 @@ module VX_mem_unit # (
.NUM_BANKS (`SNUM_BANKS),
.WORD_SIZE (`SWORD_SIZE),
.NUM_REQUESTS (`SNUM_REQUESTS),
.STAGE_1_CYCLES (`SSTAGE_1_CYCLES),
.CREQ_SIZE (`SCREQ_SIZE),
.MRVQ_SIZE (8),
.DFPQ_SIZE (1),
@@ -145,7 +146,6 @@ module VX_mem_unit # (
.NUM_BANKS (`DNUM_BANKS),
.WORD_SIZE (`DWORD_SIZE),
.NUM_REQUESTS (`DNUM_REQUESTS),
.STAGE_1_CYCLES (`DSTAGE_1_CYCLES),
.CREQ_SIZE (`DCREQ_SIZE),
.MRVQ_SIZE (`DMRVQ_SIZE),
.DFPQ_SIZE (`DDFPQ_SIZE),
@@ -230,7 +230,6 @@ module VX_mem_unit # (
.NUM_BANKS (`INUM_BANKS),
.WORD_SIZE (`IWORD_SIZE),
.NUM_REQUESTS (`INUM_REQUESTS),
.STAGE_1_CYCLES (`ISTAGE_1_CYCLES),
.CREQ_SIZE (`ICREQ_SIZE),
.MRVQ_SIZE (`IMRVQ_SIZE),
.DFPQ_SIZE (`IDFPQ_SIZE),
@@ -243,8 +242,8 @@ module VX_mem_unit # (
.SNOOP_FORWARDING (0),
.DRAM_ENABLE (1),
.WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
) icache (
`SCOPE_SIGNALS_CACHE_UNBIND

165
hw/rtl/VX_mul_unit.v Normal file
View File

@@ -0,0 +1,165 @@
`include "VX_define.vh"
module VX_mul_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// Inputs
VX_mul_req_if mul_req_if,
// Outputs
VX_exu_to_cmt_if mul_commit_if
);
localparam MULQ_BITS = `LOG2UP(`MULQ_SIZE);
wire [`MUL_BITS-1:0] alu_op = mul_req_if.op_type;
wire is_div_op = `IS_DIV_OP(alu_op);
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;
wire [`NW_BITS-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire [MULQ_BITS-1:0] tag_in, tag_out;
wire valid_out;
wire stall_out;
wire mulq_full;
wire mulq_push = mul_req_if.valid && mul_req_if.ready;
wire mulq_pop = valid_out && ~stall_out;
VX_cam_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`MULQ_SIZE)
) mul_cam (
.clk (clk),
.reset (reset),
.acquire_slot (mulq_push),
.write_addr (tag_in),
.read_addr (tag_out),
.release_addr (tag_out),
.write_data ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb}),
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
.release_slot (mulq_pop),
.full (mulq_full)
);
///////////////////////////////////////////////////////////////////////////
wire [`NUM_THREADS-1:0][31:0] mul_result;
wire is_mul_in = (alu_op == `MUL_MUL);
wire is_mul_out;
wire stall_mul;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]};
wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]};
`IGNORE_WARNINGS_BEGIN
wire [65:0] mul_result_tmp;
`IGNORE_WARNINGS_END
VX_multiplier #(
.WIDTHA(33),
.WIDTHB(33),
.WIDTHP(66),
.SIGNED(1),
.LATENCY(`LATENCY_IMUL)
) multiplier (
.clk(clk),
.enable(~stall_mul),
.dataa(mul_in1),
.datab(mul_in2),
.result(mul_result_tmp)
);
assign mul_result[i] = is_mul_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
end
wire [MULQ_BITS-1:0] mul_tag;
wire mul_valid_out;
wire mul_fire = mul_req_if.valid && mul_req_if.ready && !is_div_op;
VX_shift_register #(
.DATAW(1 + MULQ_BITS + 1),
.DEPTH(`LATENCY_IMUL)
) mul_shift_reg (
.clk(clk),
.reset(reset),
.enable(~stall_mul),
.in({mul_fire, tag_in, is_mul_in}),
.out({mul_valid_out, mul_tag, is_mul_out})
);
///////////////////////////////////////////////////////////////////////////
wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp;
wire is_div_only = (alu_op == `MUL_DIV) || (alu_op == `MUL_DIVU);
wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM);
wire div_valid_in = mul_req_if.valid && is_div_op;
wire div_ready_in;
wire div_ready_out;
wire div_valid_out;
wire is_div_out;
wire [MULQ_BITS-1:0] div_tag;
VX_serial_div #(
.WIDTHN(32),
.WIDTHD(32),
.WIDTHQ(32),
.WIDTHR(32),
.LANES(`NUM_THREADS),
.TAGW(MULQ_BITS + 1)
) divide (
.clk(clk),
.reset(reset),
.ready_in(div_ready_in),
.valid_in(div_valid_in),
.signed_mode(is_signed_div),
.tag_in({tag_in, is_div_only}),
.numer(alu_in1),
.denom(alu_in2),
.quotient(div_result_tmp),
.remainder(rem_result_tmp),
.ready_out(div_ready_out),
.valid_out(div_valid_out),
.tag_out({div_tag, is_div_out})
);
wire [`NUM_THREADS-1:0][31:0] div_result = is_div_out ? div_result_tmp : rem_result_tmp;
///////////////////////////////////////////////////////////////////////////
wire arbiter_hazard = mul_valid_out && div_valid_out;
assign stall_out = ~mul_commit_if.ready && mul_commit_if.valid;
assign stall_mul = (stall_out && !is_div_op) || mulq_full;
assign div_ready_out = ~stall_out && ~arbiter_hazard; // arbitration prioritizes MUL
wire stall_in = stall_mul || ~div_ready_in;
assign valid_out = mul_valid_out || div_valid_out;
assign tag_out = mul_valid_out ? mul_tag : div_tag;
wire [`NUM_THREADS-1:0][31:0] result = mul_valid_out ? mul_result : div_result;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
) mul_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result}),
.out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
);
// can accept new request?
assign mul_req_if.ready = ~stall_in;
endmodule

View File

@@ -5,8 +5,8 @@ module VX_pipeline #(
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
// Clock
input wire clk,
@@ -58,16 +58,6 @@ module VX_pipeline #(
output wire busy,
output wire ebreak
);
`DEBUG_BEGIN
wire scheduler_empty;
`DEBUG_END
wire memory_delay;
wire exec_delay;
wire gpr_stage_delay;
wire schedule_delay;
// Dcache
VX_cache_core_req_if #(
.NUM_REQUESTS(`NUM_THREADS),
@@ -98,7 +88,6 @@ module VX_pipeline #(
.CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS)
) core_icache_rsp_if();
// CSR I/O
VX_csr_io_req_if csr_io_req_if();
assign csr_io_req_if.valid = csr_io_req_valid;
@@ -112,69 +101,127 @@ module VX_pipeline #(
assign csr_io_rsp_data = csr_io_rsp_if.data;
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
// Front-end to Back-end
VX_backend_req_if bckE_req_if();
// Back-end to Front-end
VX_wb_if writeback_if();
VX_branch_rsp_if branch_rsp_if();
VX_jal_rsp_if jal_rsp_if();
// Warp controls
VX_csr_to_issue_if csr_to_issue_if();
VX_cmt_to_csr_if cmt_to_csr_if();
VX_decode_if decode_if();
VX_branch_ctl_if branch_ctl_if();
VX_warp_ctl_if warp_ctl_if();
VX_ifetch_rsp_if ifetch_rsp_if();
VX_alu_req_if alu_req_if();
VX_lsu_req_if lsu_req_if();
VX_csr_req_if csr_req_if();
VX_mul_req_if mul_req_if();
VX_fpu_req_if fpu_req_if();
VX_gpu_req_if gpu_req_if();
VX_writeback_if writeback_if();
VX_wstall_if wstall_if();
VX_join_if join_if();
VX_exu_to_cmt_if alu_commit_if();
VX_exu_to_cmt_if lsu_commit_if();
VX_exu_to_cmt_if csr_commit_if();
VX_exu_to_cmt_if mul_commit_if();
VX_fpu_to_cmt_if fpu_commit_if();
VX_exu_to_cmt_if gpu_commit_if();
VX_front_end #(
VX_fetch #(
.CORE_ID(CORE_ID)
) front_end (
) fetch (
`SCOPE_SIGNALS_ISTAGE_BIND
.clk (clk),
.reset (reset),
.warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if),
.schedule_delay (schedule_delay),
.icache_rsp_if (core_icache_rsp_if),
.icache_req_if (core_icache_req_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.icache_rsp_if (core_icache_rsp_if),
.wstall_if (wstall_if),
.join_if (join_if),
.warp_ctl_if (warp_ctl_if),
.branch_ctl_if (branch_ctl_if),
.ifetch_rsp_if (ifetch_rsp_if),
.busy (busy)
);
VX_scheduler scheduler (
VX_decode #(
.CORE_ID(CORE_ID)
) decode (
.clk (clk),
.reset (reset),
.ifetch_rsp_if (ifetch_rsp_if),
.decode_if (decode_if),
.wstall_if (wstall_if),
.join_if (join_if)
);
VX_issue #(
.CORE_ID(CORE_ID)
) issue (
`SCOPE_SIGNALS_ISSUE_BIND
.clk (clk),
.reset (reset),
.decode_if (decode_if),
.writeback_if (writeback_if),
.csr_to_issue_if(csr_to_issue_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if)
);
VX_execute #(
.CORE_ID(CORE_ID)
) execute (
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
.clk (clk),
.reset (reset),
.dcache_req_if (core_dcache_req_if),
.dcache_rsp_if (core_dcache_rsp_if),
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_to_issue_if(csr_to_issue_if),
.cmt_to_csr_if (cmt_to_csr_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
.mul_req_if (mul_req_if),
.fpu_req_if (fpu_req_if),
.gpu_req_if (gpu_req_if),
.warp_ctl_if (warp_ctl_if),
.branch_ctl_if (branch_ctl_if),
.alu_commit_if (alu_commit_if),
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
.ebreak (ebreak)
);
VX_commit #(
.CORE_ID(CORE_ID)
) commit (
.clk (clk),
.reset (reset),
.memory_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay(gpr_stage_delay),
.bckE_req_if (bckE_req_if),
.alu_commit_if (alu_commit_if),
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
.writeback_if (writeback_if),
.schedule_delay (schedule_delay),
.is_empty (scheduler_empty)
);
VX_back_end #(
.CORE_ID(CORE_ID)
) back_end (
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk),
.reset (reset),
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.schedule_delay (schedule_delay),
.warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.dcache_req_if (core_dcache_req_if),
.dcache_rsp_if (core_dcache_rsp_if),
.writeback_if (writeback_if),
.mem_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay),
.ebreak (ebreak)
);
.cmt_to_csr_if (cmt_to_csr_if)
);
assign dcache_req_valid = core_dcache_req_if.valid;
assign dcache_req_rw = core_dcache_req_if.rw;
@@ -202,21 +249,4 @@ module VX_pipeline #(
assign core_icache_rsp_if.tag = icache_rsp_tag;
assign icache_rsp_ready = core_icache_rsp_if.ready;
`SCOPE_ASSIGN(scope_busy, busy);
`SCOPE_ASSIGN(scope_schedule_delay, schedule_delay);
`SCOPE_ASSIGN(scope_memory_delay, memory_delay);
`SCOPE_ASSIGN(scope_exec_delay, exec_delay);
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay);
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin
$display("%t: Core%0d-WB: warp=%0d, rd=%0d, data=%0h", $time, CORE_ID, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
end
if (schedule_delay || memory_delay || exec_delay || gpr_stage_delay) begin
$display("%t: Core%0d-Delay: sched=%b, mem=%b, exec=%b, gpr=%b ", $time, CORE_ID, schedule_delay, memory_delay, exec_delay, gpr_stage_delay);
end
end
`endif
endmodule

67
hw/rtl/VX_platform.vh Normal file
View File

@@ -0,0 +1,67 @@
`ifndef VX_PLATFORM
`define VX_PLATFORM
`include "VX_scope.vh"
///////////////////////////////////////////////////////////////////////////////
`ifndef NDEBUG
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
x \
/* verilator lint_on UNUSED */
`else
`define DEBUG_BLOCK(x)
`endif
`define DEBUG_BEGIN /* verilator lint_off UNUSED */
`define DEBUG_END /* verilator lint_on UNUSED */
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
/* verilator lint_off PINCONNECTEMPTY */ \
/* verilator lint_off WIDTH */ \
/* verilator lint_off UNOPTFLAT */ \
/* verilator lint_off UNDRIVEN */ \
/* verilator lint_off DECLFILENAME */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
/* verilator lint_on WIDTH */ \
/* verilator lint_on UNOPTFLAT */ \
/* verilator lint_on UNDRIVEN */ \
/* verilator lint_on DECLFILENAME */
`define UNUSED_VAR(x) always @(x) begin end
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
. x () \
/* verilator lint_on PINCONNECTEMPTY */
`define STRINGIFY(x) `"x`"
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error(msg); \
endgenerate
`define ENABLE_TRACING /* verilator tracing_on */
`define DISABLE_TRACING /* verilator tracing_off */
///////////////////////////////////////////////////////////////////////////////
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
`define RELAX_BRAM_RW (* syn_ramstyle = "no_rw_check" *)
///////////////////////////////////////////////////////////////////////////////
`define CLOG2(x) $clog2(x)
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
`define MIN(x, y) ((x < y) ? (x) : (y))
`define MAX(x, y) ((x > y) ? (x) : (y))
`define UP(x) (((x) > 0) ? x : 1)
`endif

144
hw/rtl/VX_print_instr.vh Normal file
View File

@@ -0,0 +1,144 @@
`ifndef VX_PRINT_INSTR
`define VX_PRINT_INSTR
`include "VX_define.vh"
task print_ex_type;
input [`EX_BITS-1:0] ex;
begin
case (ex)
`EX_ALU: $write("ALU");
`EX_LSU: $write("LSU");
`EX_CSR: $write("CSR");
`EX_MUL: $write("MUL");
`EX_FPU: $write("FPU");
`EX_GPU: $write("GPU");
default: $write("NOP");
endcase
end
endtask
task print_ex_op;
input [`EX_BITS-1:0] ex_type;
input [`OP_BITS-1:0] op_type;
input [`MOD_BITS-1:0] op_mod;
begin
case (ex_type)
`EX_ALU: begin
if (`IS_BR_MOD(op_mod)) begin
case (`BR_BITS'(op_type))
`BR_EQ: $write("BEQ");
`BR_NE: $write("BNE");
`BR_LT: $write("BLT");
`BR_GE: $write("BGE");
`BR_LTU: $write("BLTU");
`BR_GEU: $write("BGEU");
`BR_JAL: $write("JAL");
`BR_JALR: $write("JALR");
`BR_ECALL: $write("ECALL");
`BR_EBREAK:$write("EBREAK");
`BR_MRET: $write("MRET");
`BR_SRET: $write("SRET");
`BR_DRET: $write("DRET");
default: $write("?");
endcase
end else begin
case (`ALU_BITS'(op_type))
`ALU_ADD: $write("ADD");
`ALU_SUB: $write("SUB");
`ALU_SLL: $write("SLL");
`ALU_SRL: $write("SRL");
`ALU_SRA: $write("SRA");
`ALU_SLT: $write("SLT");
`ALU_SLTU: $write("SLTU");
`ALU_XOR: $write("XOR");
`ALU_OR: $write("OR");
`ALU_AND: $write("AND");
`ALU_LUI: $write("LUI");
`ALU_AUIPC: $write("AUIPC");
default: $write("?");
endcase
end
end
`EX_LSU: begin
case (`LSU_BITS'(op_type))
`LSU_LB: $write("LB");
`LSU_LH: $write("LH");
`LSU_LW: $write("LW");
`LSU_LBU: $write("LBU");
`LSU_LHU: $write("LHU");
`LSU_SB: $write("SB");
`LSU_SH: $write("SH");
`LSU_SW: $write("SW");
`LSU_SBU: $write("SBU");
`LSU_SHU: $write("SHU");
default: $write("?");
endcase
end
`EX_CSR: begin
case (`CSR_BITS'(op_type))
`CSR_RW: $write("CSRW");
`CSR_RS: $write("CSRS");
`CSR_RC: $write("CSRC");
default: $write("?");
endcase
end
`EX_MUL: begin
case (`MUL_BITS'(op_type))
`MUL_MUL: $write("MUL");
`MUL_MULH: $write("MULH");
`MUL_MULHSU:$write("MULHSU");
`MUL_MULHU: $write("MULHU");
`MUL_DIV: $write("DIV");
`MUL_DIVU: $write("DIVU");
`MUL_REM: $write("REM");
`MUL_REMU: $write("REMU");
default: $write("?");
endcase
end
`EX_FPU: begin
case (`FPU_BITS'(op_type))
`FPU_ADD: $write("ADD");
`FPU_SUB: $write("SUB");
`FPU_MUL: $write("MUL");
`FPU_DIV: $write("DIV");
`FPU_SQRT: $write("SQRT");
`FPU_MADD: $write("MADD");
`FPU_NMSUB: $write("NMSUB");
`FPU_NMADD: $write("NMADD");
`FPU_CVTWS: $write("CVTWS");
`FPU_CVTWUS:$write("CVTWUS");
`FPU_CVTSW: $write("CVTSW");
`FPU_CVTSWU:$write("CVTSWU");
`FPU_CLASS: $write("CLASS");
`FPU_CMP: $write("CMP");
`FPU_MISC: begin
case (op_mod)
0: $write("SGNJ");
1: $write("SGNJN");
2: $write("SGNJX");
3: $write("MIN");
4: $write("MAX");
5: $write("MVXW");
6: $write("MVWX");
endcase
end
default: $write("?");
endcase
end
`EX_GPU: begin
case (`GPU_BITS'(op_type))
`GPU_TMC: $write("TMC");
`GPU_WSPAWN:$write("WSPAWN");
`GPU_SPLIT: $write("SPLIT");
`GPU_JOIN: $write("JOIN");
`GPU_BAR: $write("BAR");
default: $write("?");
endcase
end
default:;
endcase
end
endtask
`endif

View File

@@ -1,83 +0,0 @@
`include "VX_define.vh"
module VX_scheduler (
input wire clk,
input wire reset,
input wire memory_delay,
input wire exec_delay,
input wire gpr_stage_delay,
VX_backend_req_if bckE_req_if,
VX_wb_if writeback_if,
output wire schedule_delay,
output wire is_empty
);
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
reg [CTVW-1:0] count_valid;
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
wire is_mem = (is_store || is_load);
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
wire is_csr = bckE_req_if.is_csr;
wire is_exec = !is_mem && !is_gpu && !is_csr;
wire using_rs2 = is_store
|| (bckE_req_if.rs2_src == `RS2_REG)
|| bckE_req_if.is_barrier
|| bckE_req_if.is_wspawn;
wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0);
wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0);
wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0);
wire rs1_rename_qual = (rs1_rename) && (bckE_req_if.rs1 != 0);
wire rs2_rename_qual = (rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2);
wire rd_rename_qual = (rd_rename) && (bckE_req_if.rd != 0);
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
assign schedule_delay = (| bckE_req_if.valid)
&& ((rename_valid)
|| (memory_delay && is_mem)
|| (gpr_stage_delay && (is_mem || is_exec))
|| (exec_delay && is_exec));
assign is_empty = (count_valid == 0);
integer i, w;
wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0) && !schedule_delay;
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
count_valid;
always @(posedge clk) begin
if (reset) begin
for (w = 0; w < `NUM_WARPS; w++) begin
for (i = 0; i < 32; i++) begin
rename_table[w][i] <= 0;
end
end
count_valid <= 0;
end else begin
if (acquire_rd) begin
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
end
if (release_rd) begin
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
end
count_valid <= count_valid_next;
end
end
endmodule

View File

@@ -15,13 +15,13 @@
scope_snp_req_invalidate, \
scope_snp_req_tag, \
scope_snp_rsp_tag, \
scope_icache_req_warp_num, \
scope_icache_req_wid, \
scope_icache_req_addr, \
scope_icache_req_tag, \
scope_icache_rsp_data, \
scope_icache_rsp_tag, \
scope_dcache_req_warp_num, \
scope_dcache_req_curr_PC, \
scope_dcache_req_wid, \
scope_dcache_req_pc, \
scope_dcache_req_addr, \
scope_dcache_req_rw, \
scope_dcache_req_byteen, \
@@ -29,19 +29,27 @@
scope_dcache_req_tag, \
scope_dcache_rsp_data, \
scope_dcache_rsp_tag, \
scope_decode_warp_num, \
scope_decode_curr_PC, \
scope_decode_is_jal, \
scope_decode_rs1, \
scope_decode_rs2, \
scope_execute_warp_num, \
scope_execute_curr_PC, \
scope_execute_rd, \
scope_execute_a, \
scope_execute_b, \
scope_writeback_warp_num, \
scope_writeback_curr_PC, \
scope_writeback_wb, \
scope_issue_wid, \
scope_issue_tmask, \
scope_issue_pc, \
scope_issue_ex_type, \
scope_issue_op_type, \
scope_issue_op_mod, \
scope_issue_wb, \
scope_issue_rd, \
scope_issue_rs1, \
scope_issue_rs2, \
scope_issue_rs3, \
scope_issue_imm, \
scope_issue_rs1_is_pc, \
scope_issue_rs2_is_imm, \
scope_gpr_rsp_wid, \
scope_gpr_rsp_pc, \
scope_gpr_rsp_a, \
scope_gpr_rsp_b, \
scope_gpr_rsp_c, \
scope_writeback_wid, \
scope_writeback_pc, \
scope_writeback_rd, \
scope_writeback_data, \
scope_bank_addr_st0, \
@@ -51,7 +59,6 @@
scope_bank_miss_st1, \
scope_bank_dirty_st1, \
scope_bank_force_miss_st1,
`define SCOPE_SIGNALS_UPD_LIST \
scope_dram_req_valid, \
@@ -70,18 +77,18 @@
scope_dcache_req_ready, \
scope_dcache_rsp_valid, \
scope_dcache_rsp_ready, \
scope_decode_valid, \
scope_execute_valid, \
scope_writeback_valid, \
scope_schedule_delay, \
scope_memory_delay, \
scope_exec_delay, \
scope_gpr_stage_delay, \
scope_busy, \
scope_bank_valid_st0, \
scope_bank_valid_st1, \
scope_bank_valid_st2, \
scope_bank_stall_pipe
scope_bank_stall_pipe, \
scope_issue_valid, \
scope_issue_ready, \
scope_gpr_rsp_valid, \
scope_writeback_valid, \
scope_scoreboard_delay, \
scope_gpr_delay, \
scope_execute_delay, \
scope_busy
`define SCOPE_SIGNALS_DECL \
wire scope_dram_req_valid; \
@@ -103,51 +110,53 @@
wire scope_snp_rsp_valid; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
wire scope_icache_req_valid; \
wire [`NW_BITS-1:0] scope_icache_req_warp_num; \
wire [`NW_BITS-1:0] scope_icache_req_wid; \
wire [31:0] scope_icache_req_addr; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag; \
wire scope_icache_req_ready; \
wire scope_icache_rsp_valid; \
wire [31:0] scope_icache_rsp_data; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag; \
wire scope_icache_rsp_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \
wire [`NW_BITS-1:0] scope_dcache_req_warp_num; \
wire [31:0] scope_dcache_req_curr_PC; \
wire [63:0] scope_dcache_req_addr; \
wire [`NW_BITS-1:0] scope_dcache_req_wid; \
wire [31:0] scope_dcache_req_pc; \
wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr; \
wire scope_dcache_req_rw; \
wire [7:0] scope_dcache_req_byteen; \
wire [63:0] scope_dcache_req_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \
wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen; \
wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data; \
wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag; \
wire scope_dcache_req_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \
wire [63:0] scope_dcache_rsp_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \
wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data; \
wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag; \
wire scope_dcache_rsp_ready; \
wire scope_busy; \
wire scope_snp_rsp_ready; \
wire scope_schedule_delay; \
wire scope_memory_delay; \
wire scope_exec_delay; \
wire scope_gpr_stage_delay; \
wire [`NUM_THREADS-1:0] scope_decode_valid; \
wire [`NW_BITS-1:0] scope_decode_warp_num; \
wire [31:0] scope_decode_curr_PC; \
wire scope_decode_is_jal; \
wire [4:0] scope_decode_rs1; \
wire [4:0] scope_decode_rs2; \
wire [`NUM_THREADS-1:0] scope_execute_valid; \
wire [`NW_BITS-1:0] scope_execute_warp_num; \
wire [31:0] scope_execute_curr_PC; \
wire [4:0] scope_execute_rd; \
wire [63:0] scope_execute_a; \
wire [63:0] scope_execute_b; \
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
wire [31:0] scope_writeback_curr_PC; \
wire [1:0] scope_writeback_wb; \
wire [4:0] scope_writeback_rd; \
wire [63:0] scope_writeback_data; \
wire [`NW_BITS-1:0] scope_issue_wid; \
wire [`NUM_THREADS-1:0] scope_issue_tmask; \
wire [31:0] scope_issue_pc; \
wire [`EX_BITS-1:0] scope_issue_ex_type; \
wire [`OP_BITS-1:0] scope_issue_op_type; \
wire [`MOD_BITS-1:0] scope_issue_op_mod; \
wire scope_issue_wb; \
wire [`NR_BITS-1:0] scope_issue_rd; \
wire [`NR_BITS-1:0] scope_issue_rs1; \
wire [`NR_BITS-1:0] scope_issue_rs2; \
wire [`NR_BITS-1:0] scope_issue_rs3; \
wire [31:0] scope_issue_imm; \
wire scope_issue_rs1_is_pc; \
wire scope_issue_rs2_is_imm; \
wire scope_gpr_rsp_valid; \
wire [`NW_BITS-1:0] scope_gpr_rsp_wid; \
wire [31:0] scope_gpr_rsp_pc; \
wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a; \
wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b; \
wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c; \
wire scope_writeback_valid; \
wire [`NW_BITS-1:0] scope_writeback_wid; \
wire [31:0] scope_writeback_pc; \
wire [`NR_BITS-1:0] scope_writeback_rd; \
wire [`NUM_THREADS-1:0][31:0] scope_writeback_data; \
wire scope_bank_valid_st0; \
wire scope_bank_valid_st1; \
wire scope_bank_valid_st2; \
@@ -158,35 +167,39 @@
wire scope_bank_miss_st1; \
wire scope_bank_dirty_st1; \
wire scope_bank_force_miss_st1; \
wire scope_bank_stall_pipe;
wire scope_bank_stall_pipe; \
wire scope_issue_valid; \
wire scope_issue_ready; \
wire scope_scoreboard_delay; \
wire scope_gpr_delay; \
wire scope_execute_delay; \
wire scope_busy;
`define SCOPE_SIGNALS_ISTAGE_IO \
output wire scope_icache_req_valid, \
output wire [`NW_BITS-1:0] scope_icache_req_warp_num, \
output wire [`NW_BITS-1:0] scope_icache_req_wid, \
output wire [31:0] scope_icache_req_addr, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag, \
output wire scope_icache_req_ready, \
output wire scope_icache_rsp_valid, \
output wire [31:0] scope_icache_rsp_data, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \
output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag, \
output wire scope_icache_rsp_ready,
`define SCOPE_SIGNALS_LSU_IO \
output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \
output wire [`NW_BITS-1:0] scope_dcache_req_warp_num, \
output wire [31:0] scope_dcache_req_curr_PC, \
output wire [63:0] scope_dcache_req_addr, \
output wire [`NW_BITS-1:0] scope_dcache_req_wid, \
output wire [31:0] scope_dcache_req_pc, \
output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr, \
output wire scope_dcache_req_rw, \
output wire [7:0] scope_dcache_req_byteen, \
output wire [63:0] scope_dcache_req_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \
output wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen, \
output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data, \
output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag, \
output wire scope_dcache_req_ready, \
output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \
output wire [63:0] scope_dcache_rsp_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \
output wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data, \
output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag, \
output wire scope_dcache_rsp_ready,
`define SCOPE_SIGNALS_CORE_IO \
`define SCOPE_SIGNALS_CACHE_IO \
output wire scope_bank_valid_st0, \
@@ -201,36 +214,43 @@
output wire scope_bank_force_miss_st1, \
output wire scope_bank_stall_pipe,
`define SCOPE_SIGNALS_PIPELINE_IO \
output wire scope_busy, \
output wire scope_schedule_delay, \
output wire scope_memory_delay, \
output wire scope_exec_delay, \
output wire scope_gpr_stage_delay,
`define SCOPE_SIGNALS_BE_IO \
output wire [`NUM_THREADS-1:0] scope_decode_valid, \
output wire [`NW_BITS-1:0] scope_decode_warp_num, \
output wire [31:0] scope_decode_curr_PC, \
output wire scope_decode_is_jal, \
output wire [4:0] scope_decode_rs1, \
output wire [4:0] scope_decode_rs2, \
output wire [`NUM_THREADS-1:0] scope_execute_valid, \
output wire [`NW_BITS-1:0] scope_execute_warp_num, \
output wire [31:0] scope_execute_curr_PC, \
output wire [4:0] scope_execute_rd, \
output wire [63:0] scope_execute_a, \
output wire [63:0] scope_execute_b, \
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
output wire [31:0] scope_writeback_curr_PC, \
output wire [1:0] scope_writeback_wb, \
output wire [4:0] scope_writeback_rd, \
output wire [63:0] scope_writeback_data,
`define SCOPE_SIGNALS_ISSUE_IO \
output wire scope_issue_valid, \
output wire [`NW_BITS-1:0] scope_issue_wid, \
output wire [`NUM_THREADS-1:0] scope_issue_tmask, \
output wire [31:0] scope_issue_pc, \
output wire [`EX_BITS-1:0] scope_issue_ex_type, \
output wire [`OP_BITS-1:0] scope_issue_op_type, \
output wire [`MOD_BITS-1:0] scope_issue_op_mod, \
output wire scope_issue_wb, \
output wire [`NR_BITS-1:0] scope_issue_rd, \
output wire [`NR_BITS-1:0] scope_issue_rs1, \
output wire [`NR_BITS-1:0] scope_issue_rs2, \
output wire [`NR_BITS-1:0] scope_issue_rs3, \
output wire [31:0] scope_issue_imm, \
output wire scope_issue_rs1_is_pc, \
output wire scope_issue_rs2_is_imm, \
output wire scope_writeback_valid, \
output wire scope_gpr_rsp_valid, \
output wire [`NW_BITS-1:0] scope_gpr_rsp_wid, \
output wire [31:0] scope_gpr_rsp_pc, \
output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a, \
output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b, \
output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c, \
output wire [`NW_BITS-1:0] scope_writeback_wid, \
output wire [31:0] scope_writeback_pc, \
output wire [`NR_BITS-1:0] scope_writeback_rd, \
output wire [`NUM_THREADS-1:0][31:0] scope_writeback_data, \
output wire scope_issue_ready, \
output wire scope_scoreboard_delay, \
output wire scope_gpr_delay, \
output wire scope_execute_delay,
`define SCOPE_SIGNALS_EXECUTE_IO
`define SCOPE_SIGNALS_ISTAGE_BIND \
.scope_icache_req_valid (scope_icache_req_valid), \
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
.scope_icache_req_wid (scope_icache_req_wid), \
.scope_icache_req_addr (scope_icache_req_addr), \
.scope_icache_req_tag (scope_icache_req_tag), \
.scope_icache_req_ready (scope_icache_req_ready), \
@@ -241,8 +261,8 @@
`define SCOPE_SIGNALS_LSU_BIND \
.scope_dcache_req_valid (scope_dcache_req_valid), \
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
.scope_dcache_req_wid (scope_dcache_req_wid), \
.scope_dcache_req_pc (scope_dcache_req_pc), \
.scope_dcache_req_addr (scope_dcache_req_addr), \
.scope_dcache_req_rw (scope_dcache_req_rw), \
.scope_dcache_req_byteen(scope_dcache_req_byteen), \
@@ -254,8 +274,6 @@
.scope_dcache_rsp_tag (scope_dcache_rsp_tag), \
.scope_dcache_rsp_ready (scope_dcache_rsp_ready),
`define SCOPE_SIGNALS_CORE_BIND \
`define SCOPE_SIGNALS_CACHE_BIND \
.scope_bank_valid_st0 (scope_bank_valid_st0), \
.scope_bank_valid_st1 (scope_bank_valid_st1), \
@@ -266,7 +284,7 @@
.scope_bank_is_mrvq_st1 (scope_bank_is_mrvq_st1), \
.scope_bank_miss_st1 (scope_bank_miss_st1), \
.scope_bank_dirty_st1 (scope_bank_dirty_st1), \
.scope_bank_force_miss_st1 (scope_bank_force_miss_st1), \
.scope_bank_force_miss_st1(scope_bank_force_miss_st1), \
.scope_bank_stall_pipe (scope_bank_stall_pipe),
`define SCOPE_SIGNALS_CACHE_UNBIND \
@@ -311,60 +329,65 @@
assign scope_bank_stall_pipe = scope_per_bank_stall_pipe[0];
`define SCOPE_SIGNALS_CACHE_BANK_BIND \
.scope_bank_valid_st0 (scope_per_bank_valid_st0[i]), \
.scope_bank_valid_st1 (scope_per_bank_valid_st1[i]), \
.scope_bank_valid_st2 (scope_per_bank_valid_st2[i]), \
.scope_bank_addr_st0 (scope_per_bank_addr_st0[i]), \
.scope_bank_addr_st1 (scope_per_bank_addr_st1[i]), \
.scope_bank_addr_st2 (scope_per_bank_addr_st2[i]), \
.scope_bank_valid_st0 (scope_per_bank_valid_st0[i]), \
.scope_bank_valid_st1 (scope_per_bank_valid_st1[i]), \
.scope_bank_valid_st2 (scope_per_bank_valid_st2[i]), \
.scope_bank_addr_st0 (scope_per_bank_addr_st0[i]), \
.scope_bank_addr_st1 (scope_per_bank_addr_st1[i]), \
.scope_bank_addr_st2 (scope_per_bank_addr_st2[i]), \
.scope_bank_is_mrvq_st1 (scope_per_bank_is_mrvq_st1[i]), \
.scope_bank_miss_st1 (scope_per_bank_miss_st1[i]), \
.scope_bank_dirty_st1 (scope_per_bank_dirty_st1[i]), \
.scope_bank_miss_st1 (scope_per_bank_miss_st1[i]), \
.scope_bank_dirty_st1 (scope_per_bank_dirty_st1[i]), \
.scope_bank_force_miss_st1 (scope_per_bank_force_miss_st1[i]), \
.scope_bank_stall_pipe (scope_per_bank_stall_pipe[i]),
.scope_bank_stall_pipe (scope_per_bank_stall_pipe[i]),
`define SCOPE_SIGNALS_PIPELINE_BIND \
.scope_busy (scope_busy), \
.scope_schedule_delay (scope_schedule_delay), \
.scope_memory_delay (scope_memory_delay), \
.scope_exec_delay (scope_exec_delay), \
.scope_gpr_stage_delay (scope_gpr_stage_delay),
`define SCOPE_SIGNALS_BE_BIND \
.scope_decode_valid (scope_decode_valid), \
.scope_decode_warp_num (scope_decode_warp_num), \
.scope_decode_curr_PC (scope_decode_curr_PC), \
.scope_decode_is_jal (scope_decode_is_jal), \
.scope_decode_rs1 (scope_decode_rs1), \
.scope_decode_rs2 (scope_decode_rs2), \
.scope_execute_valid (scope_execute_valid), \
.scope_execute_warp_num (scope_execute_warp_num), \
.scope_execute_curr_PC (scope_execute_curr_PC), \
.scope_execute_rd (scope_execute_rd), \
.scope_execute_a (scope_execute_a), \
.scope_execute_b (scope_execute_b), \
`define SCOPE_SIGNALS_ISSUE_BIND \
.scope_issue_valid (scope_issue_valid), \
.scope_issue_wid (scope_issue_wid), \
.scope_issue_tmask (scope_issue_tmask), \
.scope_issue_pc (scope_issue_pc), \
.scope_issue_ex_type (scope_issue_ex_type), \
.scope_issue_op_type (scope_issue_op_type), \
.scope_issue_op_mod (scope_issue_op_mod), \
.scope_issue_wb (scope_issue_wb), \
.scope_issue_rd (scope_issue_rd), \
.scope_issue_rs1 (scope_issue_rs1), \
.scope_issue_rs2 (scope_issue_rs2), \
.scope_issue_rs3 (scope_issue_rs3), \
.scope_issue_imm (scope_issue_imm), \
.scope_issue_rs1_is_pc (scope_issue_rs1_is_pc), \
.scope_issue_rs2_is_imm (scope_issue_rs2_is_imm), \
.scope_writeback_valid (scope_writeback_valid), \
.scope_writeback_warp_num (scope_writeback_warp_num), \
.scope_writeback_curr_PC(scope_writeback_curr_PC), \
.scope_writeback_wb (scope_writeback_wb), \
.scope_writeback_wid (scope_writeback_wid), \
.scope_writeback_pc (scope_writeback_pc), \
.scope_writeback_rd (scope_writeback_rd), \
.scope_writeback_data (scope_writeback_data),
.scope_writeback_data (scope_writeback_data), \
.scope_issue_ready (scope_issue_ready), \
.scope_gpr_rsp_valid (scope_gpr_rsp_valid), \
.scope_gpr_rsp_wid (scope_gpr_rsp_wid), \
.scope_gpr_rsp_pc (scope_gpr_rsp_pc), \
.scope_gpr_rsp_a (scope_gpr_rsp_a), \
.scope_gpr_rsp_b (scope_gpr_rsp_b), \
.scope_gpr_rsp_c (scope_gpr_rsp_c), \
.scope_scoreboard_delay (scope_scoreboard_delay), \
.scope_gpr_delay (scope_gpr_delay), \
.scope_execute_delay (scope_execute_delay), \
`define SCOPE_SIGNALS_EXECUTE_BIND
`define SCOPE_ASSIGN(d,s) assign d = s
`else
`define SCOPE_SIGNALS_ISTAGE_IO
`define SCOPE_SIGNALS_LSU_IO
`define SCOPE_SIGNALS_CORE_IO
`define SCOPE_SIGNALS_CACHE_IO
`define SCOPE_SIGNALS_PIPELINE_IO
`define SCOPE_SIGNALS_BE_IO
`define SCOPE_SIGNALS_ISSUE_IO
`define SCOPE_SIGNALS_EXECUTE_IO
`define SCOPE_SIGNALS_ISTAGE_BIND
`define SCOPE_SIGNALS_LSU_BIND
`define SCOPE_SIGNALS_CORE_BIND
`define SCOPE_SIGNALS_CACHE_BIND
`define SCOPE_SIGNALS_PIPELINE_BIND
`define SCOPE_SIGNALS_BE_BIND
`define SCOPE_SIGNALS_ISSUE_BIND
`define SCOPE_SIGNALS_EXECUTE_BIND
`define SCOPE_SIGNALS_CACHE_UNBIND
`define SCOPE_SIGNALS_CACHE_BANK_SELECT

76
hw/rtl/VX_scoreboard.v Normal file
View File

@@ -0,0 +1,76 @@
`include "VX_define.vh"
module VX_scoreboard #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
VX_decode_if ibuf_deq_if,
VX_writeback_if writeback_if,
input wire [`NW_BITS-1:0] deq_wid_next,
input wire exe_delay,
input wire gpr_delay,
output wire delay
);
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_reg_mask, inuse_reg_mask_n;
reg [`NUM_REGS-1:0] deq_used_regs;
wire [`NUM_REGS-1:0] inuse_regs = deq_used_regs & ibuf_deq_if.used_regs;
assign delay = (| inuse_regs);
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && (ibuf_deq_if.wb != 0);
wire release_reg = writeback_if.valid && writeback_if.ready;
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.wid, writeback_if.rd}] & ~writeback_if.tmask;
always @(*) begin
inuse_reg_mask_n = inuse_reg_mask;
if (reserve_reg) begin
inuse_reg_mask_n[ibuf_deq_if.wid][ibuf_deq_if.rd] = 1;
end
if (release_reg) begin
inuse_reg_mask_n[writeback_if.wid][writeback_if.rd] = (| inuse_registers_n);
end
end
always @(posedge clk) begin
if (reset) begin
for (integer w = 0; w < `NUM_WARPS; w++) begin
for (integer i = 0; i < `NUM_REGS; i++) begin
inuse_registers[w * `NUM_REGS + i] <= 0;
end
inuse_reg_mask[w] <= `NUM_REGS'(0);
end
end else begin
if (reserve_reg) begin
inuse_registers[{ibuf_deq_if.wid, ibuf_deq_if.rd}] <= ibuf_deq_if.tmask;
end
if (release_reg) begin
assert(inuse_reg_mask[writeback_if.wid][writeback_if.rd] != 0);
inuse_registers[{writeback_if.wid, writeback_if.rd}] <= inuse_registers_n;
end
inuse_reg_mask <= inuse_reg_mask_n;
end
deq_used_regs <= inuse_reg_mask_n[deq_wid_next];
end
// issue the instruction
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
end
end
`endif
endmodule

51
hw/rtl/VX_types.vh Normal file
View File

@@ -0,0 +1,51 @@
`ifndef VX_TYPES
`define VX_TYPES
`include "VX_define.vh"
typedef struct packed {
logic is_normal;
logic is_zero;
logic is_subnormal;
logic is_inf;
logic is_nan;
logic is_signaling;
logic is_quiet;
} fp_type_t;
typedef struct packed {
logic NV; // Invalid
logic DZ; // Divide by zero
logic OF; // Overflow
logic UF; // Underflow
logic NX; // Inexact
} fflags_t;
`define FFG_BITS $bits(fflags_t)
typedef struct packed {
logic valid;
logic [`NUM_THREADS-1:0] tmask;
} gpu_tmc_t;
typedef struct packed {
logic valid;
logic [`NUM_WARPS-1:0] wmask;
logic [31:0] pc;
} gpu_wspawn_t;
typedef struct packed {
logic valid;
logic diverged;
logic [`NUM_THREADS-1:0] then_mask;
logic [`NUM_THREADS-1:0] else_mask;
logic [31:0] pc;
} gpu_split_t;
typedef struct packed {
logic valid;
logic [`NB_BITS-1:0] id;
logic [`NW_BITS-1:0] size_m1;
} gpu_barrier_t;
`endif

View File

@@ -1,69 +0,0 @@
`include "VX_define.vh"
module VX_warp (
input wire clk,
input wire reset,
input wire stall,
input wire remove,
input wire[`NUM_THREADS-1:0] thread_mask,
input wire change_mask,
input wire jal,
input wire[31:0] dest,
input wire branch_dir,
input wire[31:0] branch_dest,
input wire wspawn,
input wire[31:0] wspawn_pc,
output wire[31:0] PC,
output wire[`NUM_THREADS-1:0] valid
);
reg [`NUM_THREADS-1:0] valid_t;
reg [31:0] real_PC;
reg [31:0] temp_PC;
reg [31:0] use_PC;
always @(posedge clk) begin
if (reset) begin
valid_t <= {{(`NUM_THREADS-1){1'b0}},1'b1}; // Thread 1 active
end else if (remove) begin
valid_t <= 0;
end else if (change_mask) begin
valid_t <= thread_mask;
end
end
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i++) begin : valid_assign
assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i];
end
endgenerate
always @(*) begin
if (jal == 1'b1) begin
temp_PC = dest;
end else if (branch_dir) begin
temp_PC = branch_dest;
end else begin
temp_PC = real_PC;
end
end
assign use_PC = temp_PC;
assign PC = temp_PC;
always @(posedge clk) begin
if (reset) begin
real_PC <= 0;
end else if (wspawn) begin
real_PC <= wspawn_pc;
end else if (!stall) begin
real_PC <= use_PC + 32'h4;
end else begin
real_PC <= use_PC;
end
end
endmodule

View File

@@ -1,327 +1,245 @@
`include "VX_define.vh"
module VX_warp_sched (
input wire clk,
input wire reset,
input wire stall,
module VX_warp_sched #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// Wspawn
input wire wspawn,
input wire[31:0] wsapwn_pc,
input wire[`NUM_WARPS-1:0] wspawn_new_active,
VX_warp_ctl_if warp_ctl_if,
VX_wstall_if wstall_if,
VX_join_if join_if,
VX_branch_ctl_if branch_ctl_if,
// CTM
input wire ctm,
input wire[`NUM_THREADS-1:0] ctm_mask,
input wire[`NW_BITS-1:0] ctm_warp_num,
VX_ifetch_rsp_if ifetch_rsp_if,
VX_ifetch_req_if ifetch_req_if,
// WHALT
input wire whalt,
input wire[`NW_BITS-1:0] whalt_warp_num,
input wire is_barrier,
`DEBUG_BEGIN
input wire[31:0] barrier_id,
`DEBUG_END
input wire[$clog2(`NUM_WARPS):0] num_warps,
input wire[`NW_BITS-1:0] barrier_warp_num,
// WSTALL
input wire wstall,
input wire [`NW_BITS-1:0] wstall_warp_num,
// Split
input wire is_split,
input wire dont_split,
input wire [`NUM_THREADS-1:0] split_new_mask,
input wire [`NUM_THREADS-1:0] split_later_mask,
input wire [31:0] split_save_pc,
input wire [`NW_BITS-1:0] split_warp_num,
// Join
input wire is_join,
input wire [`NW_BITS-1:0] join_warp_num,
// JAL
input wire jal,
input wire [31:0] dest,
input wire [`NW_BITS-1:0] jal_warp_num,
// Branch
input wire branch_valid,
input wire branch_dir,
input wire [31:0] branch_dest,
input wire [`NW_BITS-1:0] branch_warp_num,
output wire [`NUM_THREADS-1:0] thread_mask,
output wire [`NW_BITS-1:0] warp_num,
output wire [31:0] warp_pc,
output wire busy,
output wire scheduled_warp,
input wire [`NW_BITS-1:0] icache_stage_wid,
input wire icache_stage_response
output wire busy
);
wire update_use_wspawn;
wire update_visible_active;
wire join_fall;
wire [31:0] join_pc;
wire [`NUM_THREADS-1:0] join_tm;
wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
wire join_fall;
wire[31:0] join_pc;
wire[`NUM_THREADS-1:0] join_tm;
`DEBUG_BEGIN
wire in_wspawn = wspawn;
wire in_ctm = ctm;
wire in_whalt = whalt;
wire in_wstall = wstall;
`DEBUG_END
reg[`NUM_WARPS-1:0] warp_active;
reg[`NUM_WARPS-1:0] warp_stalled;
reg [`NUM_WARPS-1:0] visible_active;
wire[`NUM_WARPS-1:0] use_active;
reg [`NUM_WARPS-1:0] warp_lock;
wire wstall_this_cycle;
reg [`NUM_WARPS-1:0] active_warps; // real active warps (updated when a warp is activated or disabled)
reg [`NUM_WARPS-1:0] stalled_warps; // asserted when a branch/gpgpu instructions are issued
reg [`NUM_WARPS-1:0] schedule_table, schedule_table_n; // enforces round-robin, barrier, and non-speculating branches
// Lock warp until instruction decode to resolve branches
reg [`NUM_WARPS-1:0] fetch_lock;
reg [`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
reg [31:0] warp_pcs[`NUM_WARPS-1:0];
// barriers
reg [`NUM_WARPS-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
wire reached_barrier_limit;
wire [`NUM_WARPS-1:0] b_mask;
wire [$clog2(`NUM_WARPS):0] b_count;
// wsapwn
reg [31:0] use_wsapwn_pc;
reg [`NUM_WARPS-1:0] use_wsapwn;
wire [`NW_BITS-1:0] warp_to_schedule;
wire schedule;
wire hazard;
wire global_stall;
wire real_schedule;
wire [31:0] new_pc;
reg [`NUM_WARPS-1:0] total_barrier_stall;
reg didnt_split;
integer w, b;
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0]; // warps waiting on barrier
wire reached_barrier_limit; // the expected number of warps reached the barrier
// wspawn
reg [31:0] use_wspawn_pc;
reg [`NUM_WARPS-1:0] use_wspawn;
reg [31:0] warp_pc;
reg [`NW_BITS-1:0] warp_to_schedule;
wire scheduled_warp;
reg didnt_split;
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
always @(*) begin
schedule_table_n = schedule_table;
if (warp_ctl_if.valid
&& warp_ctl_if.tmc.valid
&& (0 == warp_ctl_if.tmc.tmask)) begin
schedule_table_n[warp_ctl_if.wid] = 0;
end
if (scheduled_warp) begin // remove scheduled warp (round-robin)
schedule_table_n[warp_to_schedule] = 0;
end
end
always @(posedge clk) begin
if (reset) begin
for (b = 0; b < `NUM_BARRIERS; b=b+1) begin
barrier_stall_mask[b] <= 0;
end
use_wsapwn_pc <= 0;
use_wsapwn <= 0;
warp_pcs[0] <= `STARTUP_ADDR;
warp_active[0] <= 1; // Activating first warp
visible_active[0] <= 1; // Activating first warp
thread_masks[0] <= 1; // Activating first thread in first warp
warp_stalled <= 0;
didnt_split <= 0;
warp_lock <= 0;
// total_barrier_stall = 0;
for (w = 1; w < `NUM_WARPS; w=w+1) begin
warp_pcs[w] <= 0;
warp_active[w] <= 0; // Activating first warp
visible_active[w] <= 0; // Activating first warp
thread_masks[w] <= 1; // Activating first thread in first warp
for (integer i = 0; i < `NUM_BARRIERS; i++) begin
barrier_stall_mask[i] <= 0;
end
end else begin
// Wsapwning warps
if (wspawn) begin
warp_active <= wspawn_new_active;
use_wsapwn_pc <= wsapwn_pc;
use_wsapwn <= wspawn_new_active & (~`NUM_WARPS'b1);
end
if (is_barrier) begin
warp_stalled[barrier_warp_num] <= 0;
if (reached_barrier_limit) begin
barrier_stall_mask[barrier_id] <= 0;
end else begin
barrier_stall_mask[barrier_id][barrier_warp_num] <= 1;
end
end else if (ctm) begin
thread_masks[ctm_warp_num] <= ctm_mask;
warp_stalled[ctm_warp_num] <= 0;
end else if (is_join && !didnt_split) begin
if (!join_fall) begin
warp_pcs[join_warp_num] <= join_pc;
end
thread_masks[join_warp_num] <= join_tm;
didnt_split <= 0;
end else if (is_split) begin
warp_stalled[split_warp_num] <= 0;
if (!dont_split) begin
thread_masks[split_warp_num] <= split_new_mask;
didnt_split <= 0;
end else begin
didnt_split <= 1;
end
end
use_wspawn_pc <= 0;
use_wspawn <= 0;
warp_pcs[0] <= `STARTUP_ADDR;
active_warps[0] <= 1; // Activating first warp
schedule_table[0] <= 1; // set first warp as ready
thread_masks[0] <= 1; // Activating first thread in first warp
stalled_warps <= 0;
didnt_split <= 0;
fetch_lock <= 0;
if (whalt) begin
warp_active[whalt_warp_num] <= 0;
visible_active[whalt_warp_num] <= 0;
for (integer i = 1; i < `NUM_WARPS; i++) begin
warp_pcs[i] <= 0;
active_warps[i] <= 0;
schedule_table[i] <= 0;
thread_masks[i] <= 0;
end
end else begin
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
active_warps <= warp_ctl_if.wspawn.wmask;
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
use_wspawn_pc <= warp_ctl_if.wspawn.pc;
end
if (update_use_wspawn) begin
use_wsapwn[warp_to_schedule] <= 0;
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
stalled_warps[warp_ctl_if.wid] <= 0;
if (reached_barrier_limit) begin
barrier_stall_mask[warp_ctl_if.barrier.id] <= 0;
end else begin
barrier_stall_mask[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
end
end else if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
stalled_warps[warp_ctl_if.wid] <= 0;
if (0 == warp_ctl_if.tmc.tmask) begin
active_warps[warp_ctl_if.wid] <= 0;
end
end else if (join_if.valid && !didnt_split) begin
if (!join_fall) begin
warp_pcs[join_if.wid] <= join_pc;
end
thread_masks[join_if.wid] <= join_tm;
didnt_split <= 0;
end else if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
stalled_warps[warp_ctl_if.wid] <= 0;
if (warp_ctl_if.split.diverged) begin
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_mask;
didnt_split <= 0;
end else begin
didnt_split <= 1;
end
end
if (use_wspawn[warp_to_schedule] && scheduled_warp) begin
use_wspawn[warp_to_schedule] <= 0;
thread_masks[warp_to_schedule] <= 1;
end
// Stalling the scheduling of warps
if (wstall) begin
warp_stalled[wstall_warp_num] <= 1;
visible_active[wstall_warp_num] <= 0;
end
// Refilling active warps
if (update_visible_active) begin
visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall) & ~warp_lock;
end
// Don't change state if stall
if (!global_stall && real_schedule && (thread_mask != 0)) begin
visible_active[warp_to_schedule] <= 0;
warp_pcs[warp_to_schedule] <= new_pc;
end
// Jal
if (jal) begin
warp_pcs[jal_warp_num] <= dest;
warp_stalled[jal_warp_num] <= 0;
if (wstall_if.valid) begin
stalled_warps[wstall_if.wid] <= 1;
end
// Branch
if (branch_valid) begin
if (branch_dir) begin
warp_pcs[branch_warp_num] <= branch_dest;
if (branch_ctl_if.valid) begin
if (branch_ctl_if.taken) begin
warp_pcs[branch_ctl_if.wid] <= branch_ctl_if.dest;
end
warp_stalled[branch_warp_num] <= 0;
stalled_warps[branch_ctl_if.wid] <= 0;
end
// Lock/Release
if (scheduled_warp && !stall) begin
warp_lock[warp_num] <= 1'b1;
// Lock warp until instruction decode to resolve branches
if (scheduled_warp) begin
fetch_lock[warp_to_schedule] <= 1;
end
if (icache_stage_response) begin
warp_lock[icache_stage_wid] <= 1'b0;
if (ifetch_rsp_fire) begin
fetch_lock[ifetch_rsp_if.wid] <= 0;
warp_pcs[ifetch_rsp_if.wid] <= ifetch_rsp_if.PC + 4;
end
// reset 'schedule_table' when it goes to zero
schedule_table <= (| schedule_table_n) ? schedule_table_n : active_warps;
end
end
// calculate active barrier status
`IGNORE_WARNINGS_BEGIN
wire [`NW_BITS:0] active_barrier_count;
`IGNORE_WARNINGS_END
VX_countones #(
.N(`NUM_WARPS)
) barrier_count (
.valids(b_mask),
.count (b_count)
);
.valids(barrier_stall_mask[warp_ctl_if.barrier.id]),
.count (active_barrier_count)
);
wire [$clog2(`NUM_WARPS):0] count_visible_active;
assign reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
VX_countones #(
.N(`NUM_WARPS)
) num_visible (
.valids(visible_active),
.count (count_visible_active)
);
reg [`NUM_WARPS-1:0] total_barrier_stall;
always @(*) begin
total_barrier_stall = barrier_stall_mask[0];
for (integer i = 1; i < `NUM_BARRIERS; ++i) begin
total_barrier_stall |= barrier_stall_mask[i];
end
end
// assign b_count = $countones(b_mask);
// split/join stack management
assign b_mask = barrier_stall_mask[barrier_id][`NUM_WARPS-1:0];
assign reached_barrier_limit = b_count == (num_warps);
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.wid]};
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split.pc, warp_ctl_if.split.else_mask};
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
assign {join_fall, join_pc, join_tm} = ipdom[join_if.wid];
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
for (genvar i = 0; i < `NUM_WARPS; i++) begin
wire push = warp_ctl_if.valid
&& warp_ctl_if.split.valid
&& warp_ctl_if.split.diverged
&& (i == warp_ctl_if.wid);
assign update_visible_active = (0 == count_visible_active) && !(stall || wstall_this_cycle || hazard || is_join);
wire pop = join_if.valid && (i == join_if.wid);
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc, split_later_mask};
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
genvar i;
generate
for (i = 0; i < `NUM_WARPS; i++) begin : stacks
wire correct_warp_s = (i == split_warp_num);
wire correct_warp_j = (i == join_warp_num);
wire push = (is_split && !dont_split) && correct_warp_s;
wire pop = is_join && correct_warp_j;
VX_generic_stack #(
VX_ipdom_stack #(
.WIDTH(1+32+`NUM_THREADS),
.DEPTH($clog2(`NUM_THREADS)+1)
) ipdom_stack(
.DEPTH(`NT_BITS+1)
) ipdom_stack (
.clk (clk),
.reset(reset),
.push (push),
.pop (pop),
.d (d[i]),
.q1 (q1),
.q2 (q2)
.q2 (q2),
.d (ipdom[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full)
);
end
endgenerate
wire should_jal = (jal && (warp_to_schedule == jal_warp_num));
wire should_bra = (branch_valid && branch_dir && (warp_to_schedule == branch_warp_num));
// calculate next warp schedule
assign hazard = (should_jal || should_bra) && schedule;
reg schedule_valid;
reg [`NUM_THREADS-1:0] thread_mask;
wire [`NUM_WARPS-1:0] schedule_ready = schedule_table & ~(stalled_warps | total_barrier_stall | fetch_lock);
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule] && !warp_lock[0];
always @(*) begin
schedule_valid = 0;
thread_mask = 'x;
warp_pc = 'x;
warp_to_schedule = 'x;
for (integer i = 0; i < `NUM_WARPS; ++i) begin
if (schedule_ready[i]) begin
schedule_valid = 1;
thread_mask = use_wspawn[i] ? `NUM_THREADS'(1) : thread_masks[i];
warp_pc = use_wspawn[i] ? use_wspawn_pc : warp_pcs[i];
warp_to_schedule = `NW_BITS'(i);
break;
end
end
end
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || is_join) && !reset;
assign scheduled_warp = schedule_valid && ~stall_out;
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]);
assign warp_num = warp_to_schedule;
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;
assign new_pc = warp_pc + 4;
assign use_active = (count_visible_active != 0) ? visible_active : (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock));
// Choosing a warp to schedule
VX_fixed_arbiter #(
.N(`NUM_WARPS)
) choose_schedule (
.clk (clk),
.reset (reset),
.requests (use_active),
.grant_index (warp_to_schedule),
.grant_valid (schedule),
`UNUSED_PIN (grant_onehot)
VX_generic_register #(
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
) fetch_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}),
.out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
);
// always @(*) begin
// $display("WarpPC: %h",warp_pc);
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
// end
assign busy = (warp_active != 0);
assign busy = (active_warps != 0);
endmodule

View File

@@ -1,93 +1,96 @@
`include "VX_define.vh"
module VX_writeback (
module VX_writeback #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// Mem WB info
VX_wb_if mem_wb_if,
// inputs
VX_exu_to_cmt_if alu_commit_if,
VX_exu_to_cmt_if lsu_commit_if,
VX_exu_to_cmt_if csr_commit_if,
VX_exu_to_cmt_if mul_commit_if,
VX_fpu_to_cmt_if fpu_commit_if,
VX_exu_to_cmt_if gpu_commit_if,
// EXEC Unit WB info
VX_wb_if inst_exec_wb_if,
// CSR Unit WB info
VX_wb_if csr_wb_if,
// Actual WB to GPR
VX_wb_if writeback_if,
output wire no_slot_mem,
output wire no_slot_exec,
output wire no_slot_csr
// outputs
VX_writeback_if writeback_if
);
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
wire lsu_valid = lsu_commit_if.valid && lsu_commit_if.wb;
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
VX_wb_if writeback_tmp_if();
wire wb_valid;
wire [`NW_BITS-1:0] wb_wid;
wire [`NUM_THREADS-1:0] wb_tmask;
wire [`NR_BITS-1:0] wb_rd;
wire [`NUM_THREADS-1:0][31:0] wb_data;
assign wb_valid = alu_valid ? alu_commit_if.valid :
lsu_valid ? lsu_commit_if.valid :
csr_valid ? csr_commit_if.valid :
mul_valid ? mul_commit_if.valid :
fpu_valid ? fpu_commit_if.valid :
0;
wire exec_wb = (inst_exec_wb_if.wb != 0) && (| inst_exec_wb_if.valid);
wire mem_wb = (mem_wb_if.wb != 0) && (| mem_wb_if.valid);
wire csr_wb = (csr_wb_if.wb != 0) && (| csr_wb_if.valid);
assign wb_wid = alu_valid ? alu_commit_if.wid :
lsu_valid ? lsu_commit_if.wid :
csr_valid ? csr_commit_if.wid :
mul_valid ? mul_commit_if.wid :
fpu_valid ? fpu_commit_if.wid :
0;
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
lsu_valid ? lsu_commit_if.tmask :
csr_valid ? csr_commit_if.tmask :
mul_valid ? mul_commit_if.tmask :
fpu_valid ? fpu_commit_if.tmask :
0;
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
assign no_slot_csr = csr_wb && exec_wb;
assign no_slot_exec = 0;
assign wb_rd = alu_valid ? alu_commit_if.rd :
lsu_valid ? lsu_commit_if.rd :
csr_valid ? csr_commit_if.rd :
mul_valid ? mul_commit_if.rd :
fpu_valid ? fpu_commit_if.rd :
0;
assign writeback_tmp_if.data = exec_wb ? inst_exec_wb_if.data :
csr_wb ? csr_wb_if.data :
mem_wb ? mem_wb_if.data :
0;
assign writeback_tmp_if.valid = exec_wb ? inst_exec_wb_if.valid :
csr_wb ? csr_wb_if.valid :
mem_wb ? mem_wb_if.valid :
0;
assign writeback_tmp_if.rd = exec_wb ? inst_exec_wb_if.rd :
csr_wb ? csr_wb_if.rd :
mem_wb ? mem_wb_if.rd :
0;
assign writeback_tmp_if.wb = exec_wb ? inst_exec_wb_if.wb :
csr_wb ? csr_wb_if.wb :
mem_wb ? mem_wb_if.wb :
0;
assign writeback_tmp_if.warp_num = exec_wb ? inst_exec_wb_if.warp_num :
csr_wb ? csr_wb_if.warp_num :
mem_wb ? mem_wb_if.warp_num :
0;
assign writeback_tmp_if.curr_PC = exec_wb ? inst_exec_wb_if.curr_PC :
csr_wb ? 32'hdeadbeef :
mem_wb ? mem_wb_if.curr_PC :
32'hdeadbeef;
wire [`NUM_THREADS-1:0][31:0] use_wb_data;
assign wb_data = alu_valid ? alu_commit_if.data :
lsu_valid ? lsu_commit_if.data :
csr_valid ? csr_commit_if.data :
mul_valid ? mul_commit_if.data :
fpu_valid ? fpu_commit_if.data :
0;
always @(*) assert(writeback_if.ready);
wire stall = 0/*~writeback_if.ready && writeback_if.valid*/;
VX_generic_register #(
.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)
) wb_register (
.clk (clk),
.reset(reset),
.stall(1'b0),
.flush(1'b0),
.in ({writeback_tmp_if.data, writeback_tmp_if.valid, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC}),
.out ({use_wb_data, writeback_if.valid, writeback_if.rd, writeback_if.wb, writeback_if.warp_num, writeback_if.curr_PC})
.N(1 + `NW_BITS + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
) wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({wb_valid, wb_wid, wb_tmask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.tmask, writeback_if.rd, writeback_if.data})
);
reg [31:0] last_data_wb /* verilator public */;
assign alu_commit_if.ready = !stall;
assign lsu_commit_if.ready = !stall && !alu_valid;
assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid;
assign mul_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid;
assign fpu_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid && !mul_valid;
assign gpu_commit_if.ready = 1'b1;
// special workaround to get RISC-V tests Pass/Fail status
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
always @(posedge clk) begin
if ( (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
last_data_wb <= use_wb_data[0];
if (writeback_if.valid && writeback_if.ready) begin
last_wb_value[writeback_if.rd] <= writeback_if.data[0];
end
end
assign writeback_if.data = use_wb_data;
endmodule : VX_writeback
endmodule

View File

@@ -3,10 +3,9 @@
module Vortex (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
// Clock
input wire clk,
@@ -78,10 +77,9 @@ module Vortex (
) cluster (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
.clk (clk),
.reset (reset),
@@ -139,69 +137,67 @@ module Vortex (
end else begin
wire per_cluster_dram_req_valid [`NUM_CLUSTERS-1:0];
wire per_cluster_dram_req_rw [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag [`NUM_CLUSTERS-1:0];
wire l3_core_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire l3_core_req_ready;
wire per_cluster_dram_rsp_valid [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag [`NUM_CLUSTERS-1:0];
wire per_cluster_dram_rsp_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire per_cluster_snp_req_valid [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr [`NUM_CLUSTERS-1:0];
wire per_cluster_snp_req_invalidate [`NUM_CLUSTERS-1:0];
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag [`NUM_CLUSTERS-1:0];
wire per_cluster_snp_req_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
wire per_cluster_snp_rsp_valid [`NUM_CLUSTERS-1:0];
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag [`NUM_CLUSTERS-1:0];
wire per_cluster_snp_rsp_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
wire per_cluster_io_req_valid [`NUM_CLUSTERS-1:0];
wire per_cluster_io_req_rw [`NUM_CLUSTERS-1:0];
wire [3:0] per_cluster_io_req_byteen [`NUM_CLUSTERS-1:0];
wire [29:0] per_cluster_io_req_addr [`NUM_CLUSTERS-1:0];
wire [31:0] per_cluster_io_req_data [`NUM_CLUSTERS-1:0];
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag [`NUM_CLUSTERS-1:0];
wire per_cluster_io_req_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
wire [`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen;
wire [`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready;
wire per_cluster_io_rsp_valid [`NUM_CLUSTERS-1:0];
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag [`NUM_CLUSTERS-1:0];
wire [31:0] per_cluster_io_rsp_data [`NUM_CLUSTERS-1:0];
wire per_cluster_io_rsp_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
wire per_cluster_csr_io_req_valid [`NUM_CLUSTERS-1:0];
wire [11:0] per_cluster_csr_io_req_addr [`NUM_CLUSTERS-1:0];
wire per_cluster_csr_io_req_rw [`NUM_CLUSTERS-1:0];
wire [31:0] per_cluster_csr_io_req_data [`NUM_CLUSTERS-1:0];
wire per_cluster_csr_io_req_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid;
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready;
wire per_cluster_csr_io_rsp_valid [`NUM_CLUSTERS-1:0];
wire [31:0] per_cluster_csr_io_rsp_data [`NUM_CLUSTERS-1:0];
wire per_cluster_csr_io_rsp_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready;
wire per_cluster_busy [`NUM_CLUSTERS-1:0];
wire per_cluster_ebreak [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak;
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
genvar i;
for (i = 0; i < `NUM_CLUSTERS; i++) begin
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
VX_cluster #(
.CLUSTER_ID(i)
) cluster (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
.clk (clk),
.reset (reset),
@@ -336,29 +332,29 @@ module Vortex (
// L3 Cache ///////////////////////////////////////////////////////////
wire l3_core_req_valid [`L3NUM_REQUESTS-1:0];
wire l3_core_req_rw [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag [`L3NUM_REQUESTS-1:0];
wire [`L3NUM_REQUESTS-1:0] l3_core_req_valid;
wire [`L3NUM_REQUESTS-1:0] l3_core_req_rw;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
wire l3_core_rsp_valid [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data [`L3NUM_REQUESTS-1:0];
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag [`L3NUM_REQUESTS-1:0];
wire l3_core_rsp_ready;
wire [`L3NUM_REQUESTS-1:0] l3_core_rsp_valid;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data;
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
wire l3_core_rsp_ready;
wire l3_snp_fwdout_valid [`NUM_CLUSTERS-1:0];
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr [`NUM_CLUSTERS-1:0];
wire l3_snp_fwdout_invalidate [`NUM_CLUSTERS-1:0];
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag [`NUM_CLUSTERS-1:0];
wire l3_snp_fwdout_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr;
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag;
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready;
wire l3_snp_fwdin_valid [`NUM_CLUSTERS-1:0];
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag [`NUM_CLUSTERS-1:0];
wire l3_snp_fwdin_ready [`NUM_CLUSTERS-1:0];
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
for (i = 0; i < `L3NUM_REQUESTS; i++) begin
for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin
// Core Request
assign l3_core_req_valid [i] = per_cluster_dram_req_valid [i];
assign l3_core_req_rw [i] = per_cluster_dram_req_rw [i];
@@ -394,7 +390,6 @@ module Vortex (
.NUM_BANKS (`L3NUM_BANKS),
.WORD_SIZE (`L3WORD_SIZE),
.NUM_REQUESTS (`L3NUM_REQUESTS),
.STAGE_1_CYCLES (`L3STAGE_1_CYCLES),
.CREQ_SIZE (`L3CREQ_SIZE),
.MRVQ_SIZE (`L3MRVQ_SIZE),
.DFPQ_SIZE (`L3DFPQ_SIZE),

283
hw/rtl/cache/VX_bank.v vendored
View File

@@ -13,8 +13,6 @@ module VX_bank #(
parameter WORD_SIZE = 0,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 0,
// Number of cycles to complete i 1 (read from memory)
parameter STAGE_1_CYCLES = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
@@ -105,28 +103,25 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
/* verilator lint_off UNUSED */
wire[31:0] debug_use_pc_st0;
wire[1:0] debug_wb_st0;
wire[4:0] debug_rd_st0;
wire[`NW_BITS-1:0] debug_warp_num_st0;
wire[31:0] debug_pc_st0;
wire[`NR_BITS-1:0] debug_rd_st0;
wire[`NW_BITS-1:0] debug_wid_st0;
wire debug_rw_st0;
wire[WORD_SIZE-1:0] debug_byteen_st0;
wire[`REQS_BITS-1:0] debug_tid_st0;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
wire[31:0] debug_use_pc_st1e;
wire[1:0] debug_wb_st1e;
wire[4:0] debug_rd_st1e;
wire[`NW_BITS-1:0] debug_warp_num_st1e;
wire debug_rw_st1e;
wire[WORD_SIZE-1:0] debug_byteen_st1e;
wire[`REQS_BITS-1:0] debug_tid_st1e;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
wire[31:0] debug_pc_st1;
wire[`NR_BITS-1:0] debug_rd_st1;
wire[`NW_BITS-1:0] debug_wid_st1;
wire debug_rw_st1;
wire[WORD_SIZE-1:0] debug_byteen_st1;
wire[`REQS_BITS-1:0] debug_tid_st1;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1;
wire[31:0] debug_use_pc_st2;
wire[1:0] debug_wb_st2;
wire[4:0] debug_rd_st2;
wire[`NW_BITS-1:0] debug_warp_num_st2;
wire[31:0] debug_pc_st2;
wire[`NR_BITS-1:0] debug_rd_st2;
wire[`NW_BITS-1:0] debug_wid_st2;
wire debug_rw_st2;
wire[WORD_SIZE-1:0] debug_byteen_st2;
wire[`REQS_BITS-1:0] debug_tid_st2;
@@ -244,9 +239,9 @@ module VX_bank #(
wire mrvq_is_snp_st0;
wire mrvq_snp_invalidate_st0;
wire mrvq_pending_hazard_st1e;
wire st2_pending_hazard_st1e;
wire force_request_miss_st1e;
wire mrvq_pending_hazard_st1;
wire st2_pending_hazard_st1;
wire force_request_miss_st1;
wire[`REQS_BITS-1:0] miss_add_tid;
wire[`REQ_TAG_WIDTH-1:0] miss_add_tag;
@@ -263,27 +258,15 @@ module VX_bank #(
wire dwbq_push_stall;
wire dram_fill_req_stall;
wire stall_bank_pipe;
reg is_fill_in_pipe;
wire is_fill_st1 [STAGE_1_CYCLES-1:0];
wire is_fill_st1;
`DEBUG_BEGIN
wire going_to_write_st1 [STAGE_1_CYCLES-1:0];
wire going_to_write_st1;
`DEBUG_END
integer j;
always @(*) begin
is_fill_in_pipe = 0;
for (j = 0; j < STAGE_1_CYCLES; j++) begin
if (is_fill_st1[j]) begin
is_fill_in_pipe = 1;
end
end
end
wire mrvq_pop_unqual = mrvq_valid_st0;
wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty;
wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1[0] && !is_fill_in_pipe;
wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1 && !is_fill_st1;
wire snrq_pop_unqual = !mrvq_stop && !reqq_pop_unqual && !reqq_pop_unqual && !mrvq_pop_unqual && !dfpq_pop_unqual && !snrq_empty && !reqq_req_st0; // if there's any reqq_req, don't schedule snrq.
assign mrvq_pop = mrvq_pop_unqual && !stall_bank_pipe && !recover_mrvq_state_st2;
@@ -304,15 +287,15 @@ module VX_bank #(
wire qual_is_snp_st0;
wire qual_snp_invalidate_st0;
wire valid_st1 [STAGE_1_CYCLES-1:0];
wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0];
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1 [STAGE_1_CYCLES-1:0];
wire [`WORD_WIDTH-1:0] writeword_st1 [STAGE_1_CYCLES-1:0];
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0];
wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0];
wire is_snp_st1 [STAGE_1_CYCLES-1:0];
wire snp_invalidate_st1 [STAGE_1_CYCLES-1:0];
wire is_mrvq_st1 [STAGE_1_CYCLES-1:0];
wire valid_st1;
wire [`LINE_ADDR_WIDTH-1:0] addr_st1;
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1;
wire [`WORD_WIDTH-1:0] writeword_st1;
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1;
wire [`BANK_LINE_WIDTH-1:0] writedata_st1;
wire is_snp_st1;
wire snp_invalidate_st1;
wire is_mrvq_st1;
assign qual_is_fill_st0 = dfpq_pop_unqual;
@@ -360,130 +343,115 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
end
`endif
VX_generic_register #(
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
) s0_1_c0 (
) pipe_reg0 (
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (1'b0),
.flush (0),
.in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({is_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
.out ({is_mrvq_st1 , is_snp_st1, snp_invalidate_st1, going_to_write_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
);
genvar i;
for (i = 1; i < STAGE_1_CYCLES; i++) begin
VX_generic_register #(
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
) s0_1_cc (
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (1'b0),
.in ({is_mrvq_st1[i-1], is_snp_st1[i-1], snp_invalidate_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
.out ({is_mrvq_st1[i] , is_snp_st1[i], snp_invalidate_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
);
end
wire[`WORD_WIDTH-1:0] readword_st1e;
wire[`BANK_LINE_WIDTH-1:0] readdata_st1e;
wire[`TAG_SELECT_BITS-1:0] readtag_st1e;
wire miss_st1e;
wire dirty_st1e;
wire[BANK_LINE_SIZE-1:0] dirtyb_st1e;
wire[`WORD_WIDTH-1:0] readword_st1;
wire[`BANK_LINE_WIDTH-1:0] readdata_st1;
wire[`TAG_SELECT_BITS-1:0] readtag_st1;
wire miss_st1;
wire dirty_st1;
wire[BANK_LINE_SIZE-1:0] dirtyb_st1;
`DEBUG_BEGIN
wire [`REQ_TAG_WIDTH-1:0] tag_st1e;
wire [`REQS_BITS-1:0] tid_st1e;
wire [`REQ_TAG_WIDTH-1:0] tag_st1;
wire [`REQS_BITS-1:0] tid_st1;
`DEBUG_END
wire mem_rw_st1e;
wire [WORD_SIZE-1:0] mem_byteen_st1e;
wire fill_saw_dirty_st1e;
wire is_snp_st1e;
wire snp_invalidate_st1e;
wire snp_to_mrvq_st1e;
wire mrvq_init_ready_state_st1e;
wire mem_rw_st1;
wire [WORD_SIZE-1:0] mem_byteen_st1;
wire fill_saw_dirty_st1;
wire snp_to_mrvq_st1;
wire mrvq_init_ready_state_st1;
wire miss_add_because_miss;
wire valid_st1e;
wire is_mrvq_st1e;
wire mrvq_recover_ready_state_st1e;
wire[`LINE_ADDR_WIDTH-1:0] addr_st1e;
wire mrvq_recover_ready_state_st1;
assign is_mrvq_st1e = is_mrvq_st1[STAGE_1_CYCLES-1];
assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1];
assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1];
assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1];
assign addr_st1e = addr_st1[STAGE_1_CYCLES-1];
assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1;
assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
assign st2_pending_hazard_st1 = (miss_add_because_miss)
&& ((addr_st2 == addr_st1) && !is_fill_st2);
assign st2_pending_hazard_st1e = (miss_add_because_miss)
&& ((addr_st2 == addr_st1e) && !is_fill_st2);
assign force_request_miss_st1 = (valid_st1 && !is_mrvq_st1 && (mrvq_pending_hazard_st1 || st2_pending_hazard_st1))
|| (valid_st1 && is_mrvq_st1 && recover_mrvq_state_st2);
assign force_request_miss_st1e = (valid_st1e && !is_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e))
|| (valid_st1e && is_mrvq_st1e && recover_mrvq_state_st2);
assign mrvq_recover_ready_state_st1e = valid_st1e
&& is_mrvq_st1e
assign mrvq_recover_ready_state_st1 = valid_st1
&& is_mrvq_st1
&& recover_mrvq_state_st2
&& (addr_st2 == addr_st1e);
&& (addr_st2 == addr_st1);
VX_tag_data_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CORE_TAG_ID_BITS(CORE_TAG_ID_BITS),
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE)
) tag_data_access (
.clk (clk),
.reset (reset),
`ifdef DBG_CORE_REQ_INFO
.debug_pc_st1 (debug_pc_st1),
.debug_rd_st1 (debug_rd_st1),
.debug_wid_st1 (debug_wid_st1),
.debug_tagid_st1(debug_tagid_st1),
`endif
.stall (stall_bank_pipe),
.stall_bank_pipe(stall_bank_pipe),
.force_request_miss_st1e(force_request_miss_st1e),
.force_request_miss_st1(force_request_miss_st1),
// Initial Read
.readaddr_st10(addr_st1[0][`LINE_SELECT_BITS-1:0]),
.readaddr_st1(addr_st1[`LINE_SELECT_BITS-1:0]),
// Actual Read/Write
.valid_req_st1e (valid_st1e),
.writefill_st1e (is_fill_st1[STAGE_1_CYCLES-1]),
.writeaddr_st1e (addr_st1e),
.wordsel_st1e (wsel_st1[STAGE_1_CYCLES-1]),
.writeword_st1e (writeword_st1[STAGE_1_CYCLES-1]),
.writedata_st1e (writedata_st1[STAGE_1_CYCLES-1]),
.valid_req_st1 (valid_st1),
.writefill_st1 (is_fill_st1),
.writeaddr_st1 (addr_st1),
.wordsel_st1 (wsel_st1),
.writeword_st1 (writeword_st1),
.writedata_st1 (writedata_st1),
.mem_rw_st1e (mem_rw_st1e),
.mem_byteen_st1e (mem_byteen_st1e),
.mem_rw_st1 (mem_rw_st1),
.mem_byteen_st1 (mem_byteen_st1),
.is_snp_st1e (is_snp_st1e),
.snp_invalidate_st1e (snp_invalidate_st1e),
.is_snp_st1 (is_snp_st1),
.snp_invalidate_st1(snp_invalidate_st1),
// Read Data
.readword_st1e (readword_st1e),
.readdata_st1e (readdata_st1e),
.readtag_st1e (readtag_st1e),
.miss_st1e (miss_st1e),
.dirty_st1e (dirty_st1e),
.dirtyb_st1e (dirtyb_st1e),
.fill_saw_dirty_st1e (fill_saw_dirty_st1e),
.snp_to_mrvq_st1e (snp_to_mrvq_st1e),
.mrvq_init_ready_state_st1e(mrvq_init_ready_state_st1e)
.readword_st1 (readword_st1),
.readdata_st1 (readdata_st1),
.readtag_st1 (readtag_st1),
.miss_st1 (miss_st1),
.dirty_st1 (dirty_st1),
.dirtyb_st1 (dirtyb_st1),
.fill_saw_dirty_st1(fill_saw_dirty_st1),
.snp_to_mrvq_st1(snp_to_mrvq_st1),
.mrvq_init_ready_state_st1(mrvq_init_ready_state_st1)
);
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
end
`endif
wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1];
wire is_mrvq_st1e_st2 = is_mrvq_st1e;
wire qual_valid_st1_2 = valid_st1 && !is_fill_st1;
wire is_mrvq_st1_st2 = is_mrvq_st1;
wire valid_st2;
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2;
@@ -504,22 +472,22 @@ module VX_bank #(
wire mrvq_recover_ready_state_st2;
wire mrvq_init_ready_state_unqual_st2;
wire mrvq_init_ready_state_hazard_st0_st1;
wire mrvq_init_ready_state_hazard_st1e_st1;
wire mrvq_init_ready_state_hazard_st1_st1;
VX_generic_register #(
.N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH)
) st_1e_2 (
) pipe_reg1 (
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (1'b0),
.in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
.flush (0),
.in ({mrvq_recover_ready_state_st1, is_mrvq_st1_st2, mrvq_init_ready_state_st1, snp_to_mrvq_st1, is_snp_st1, snp_invalidate_st1, fill_saw_dirty_st1, is_fill_st1, qual_valid_st1_2, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}),
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2})
);
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end
`endif
@@ -531,10 +499,10 @@ module VX_bank #(
assign mrvq_push_stall = miss_add_unqual && mrvq_full;
wire miss_add = miss_add_unqual
&& !mrvq_full
&& !(cwbq_push_stall
|| dwbq_push_stall
|| dram_fill_req_stall);
&& !mrvq_full
&& !(cwbq_push_stall
|| dwbq_push_stall
|| dram_fill_req_stall);
assign recover_mrvq_state_st2 = miss_add_unqual && is_mrvq_st2; // Doesn't need to include the stalls
@@ -548,11 +516,11 @@ module VX_bank #(
wire miss_add_is_mrvq = valid_st2 && is_mrvq_st2 && !stall_bank_pipe;
assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == dfpq_addr_st0); // Doesn't need to be muxed to qual, only care about fills
assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1e);
assign mrvq_init_ready_state_hazard_st1_st1 = miss_add_unqual && is_fill_st1 && (miss_add_addr == addr_st1);
assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 // When req was in st1e, either matched with an mrvq entery OR mrvq recovering state
|| mrvq_init_ready_state_hazard_st0_st1 // If there's a fill in st0 that has the same address as miss_add_addr
|| mrvq_init_ready_state_hazard_st1e_st1; // If there's a fill in st1 that has the same address as miss_add_addr
|| mrvq_init_ready_state_hazard_st1_st1; // If there's a fill in st1 that has the same address as miss_add_addr
VX_cache_miss_resrv #(
.BANK_ID (BANK_ID),
@@ -585,9 +553,9 @@ module VX_bank #(
.mrvq_init_ready_state (mrvq_init_ready_state_st2),
// Broadcast
.is_fill_st1 (is_fill_st1[STAGE_1_CYCLES-1]),
.fill_addr_st1 (addr_st1e),
.pending_hazard (mrvq_pending_hazard_st1e),
.is_fill_st1 (is_fill_st1),
.fill_addr_st1 (addr_st1),
.pending_hazard_st1 (mrvq_pending_hazard_st1),
// Dequeue
.miss_resrv_pop (mrvq_pop),
@@ -644,6 +612,7 @@ module VX_bank #(
assign core_rsp_valid = !cwbq_empty;
// Enqueue DRAM fill request
wire dram_fill_req_fast = miss_add_unqual; // Completely unqualified hint that we might send a dram_fill_req
wire dram_fill_req_unqual = dram_fill_req_fast
&& (!mrvq_init_ready_state_st2
@@ -706,7 +675,9 @@ module VX_bank #(
always @(posedge clk) begin
if (reset) begin
dwbq_dual_valid_sel <= 0;
end else if (dwbq_is_dwb_out && dwbq_is_snp_out && (dram_wb_req_fire || snp_rsp_fire)) begin
end else if (dwbq_is_dwb_out
&& dwbq_is_snp_out
&& (dram_wb_req_fire || snp_rsp_fire)) begin
dwbq_dual_valid_sel <= ~dwbq_dual_valid_sel;
end
end
@@ -728,41 +699,41 @@ module VX_bank #(
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
if ((|core_req_valid) && core_req_ready) begin
$display("%t: bank%0d:%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr[0], BANK_ID), core_req_tag);
$display("%t: cache%0d:%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr[0], BANK_ID), core_req_tag);
end
if (core_rsp_valid && core_rsp_ready) begin
$display("%t: bank%0d:%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
$display("%t: cache%0d:%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
end
if (dram_fill_req_valid && dram_fill_req_ready) begin
$display("%t: bank%0d:%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
$display("%t: cache%0d:%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
end
if (dram_wb_req_valid && dram_wb_req_ready) begin
$display("%t: bank%0d:%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
$display("%t: cache%0d:%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
end
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
$display("%t: bank%0d:%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
$display("%t: cache%0d:%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
end
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%0d:%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
$display("%t: cache%0d:%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%0d:%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
$display("%t: cache%0d:%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
end
end
`endif
`SCOPE_ASSIGN(scope_bank_valid_st0, qual_valid_st0);
`SCOPE_ASSIGN(scope_bank_valid_st1, valid_st1e);
`SCOPE_ASSIGN(scope_bank_valid_st2, valid_st2);
`SCOPE_ASSIGN (scope_bank_valid_st0, qual_valid_st0);
`SCOPE_ASSIGN (scope_bank_valid_st1, valid_st1);
`SCOPE_ASSIGN (scope_bank_valid_st2, valid_st2);
`SCOPE_ASSIGN(scope_bank_is_mrvq_st1, is_mrvq_st1e);
`SCOPE_ASSIGN(scope_bank_miss_st1, miss_st1e);
`SCOPE_ASSIGN(scope_bank_dirty_st1, dirty_st1e);
`SCOPE_ASSIGN(scope_bank_force_miss_st1, force_request_miss_st1e);
`SCOPE_ASSIGN(scope_bank_stall_pipe, stall_bank_pipe);
`SCOPE_ASSIGN (scope_bank_is_mrvq_st1, is_mrvq_st1);
`SCOPE_ASSIGN (scope_bank_miss_st1, miss_st1);
`SCOPE_ASSIGN (scope_bank_dirty_st1, dirty_st1);
`SCOPE_ASSIGN (scope_bank_force_miss_st1, force_request_miss_st1);
`SCOPE_ASSIGN (scope_bank_stall_pipe, stall_bank_pipe);
`SCOPE_ASSIGN(scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
`SCOPE_ASSIGN(scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1e, BANK_ID));
`SCOPE_ASSIGN(scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
`SCOPE_ASSIGN (scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
`SCOPE_ASSIGN (scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`SCOPE_ASSIGN (scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
endmodule : VX_bank
endmodule

View File

@@ -12,8 +12,6 @@ module VX_cache #(
parameter WORD_SIZE = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 4,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 1,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
@@ -130,14 +128,13 @@ module VX_cache #(
`ifdef DBG_CORE_REQ_INFO
/* verilator lint_off UNUSED */
wire[31:0] debug_core_req_use_pc;
wire[1:0] debug_core_req_wb;
wire[4:0] debug_core_req_rd;
wire[`NW_BITS-1:0] debug_core_req_warp_num;
wire[`LOG2UP(CREQ_SIZE)-1:0] debug_core_req_idx;
wire[`NR_BITS-1:0] debug_core_req_rd;
wire[`NW_BITS-1:0] debug_core_req_wid;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx;
/* verilator lint_on UNUSED */
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0];
assign {debug_core_req_use_pc, debug_core_req_rd, debug_core_req_wid, debug_core_req_idx} = core_req_tag[0];
end
`endif
@@ -246,190 +243,185 @@ module VX_cache #(
assign dram_req_tag = dram_req_addr;
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
genvar i;
generate
for (i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
wire curr_bank_core_rsp_valid;
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready;
wire curr_bank_core_rsp_valid;
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready;
wire curr_bank_dram_fill_rsp_valid;
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr;
wire curr_bank_dram_fill_rsp_ready;
wire curr_bank_dram_fill_rsp_valid;
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr;
wire curr_bank_dram_fill_rsp_ready;
wire curr_bank_dram_fill_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr;
wire curr_bank_dram_fill_req_ready;
wire curr_bank_dram_fill_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr;
wire curr_bank_dram_fill_req_ready;
wire curr_bank_dram_wb_req_valid;
wire [BANK_LINE_SIZE-1:0] curr_bank_dram_wb_req_byteen;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr;
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data;
wire curr_bank_dram_wb_req_ready;
wire curr_bank_dram_wb_req_valid;
wire [BANK_LINE_SIZE-1:0] curr_bank_dram_wb_req_byteen;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr;
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data;
wire curr_bank_dram_wb_req_ready;
wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_invalidate;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready;
wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_invalidate;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready;
wire curr_bank_snp_rsp_valid;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire curr_bank_snp_rsp_ready;
wire curr_bank_snp_rsp_valid;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire curr_bank_snp_rsp_ready;
wire curr_bank_core_req_ready;
wire curr_bank_core_req_ready;
// Core Req
assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQUESTS{core_req_ready}});
assign curr_bank_core_req_addr = core_req_addr;
assign curr_bank_core_req_rw = core_req_rw;
assign curr_bank_core_req_byteen = core_req_byteen;
assign curr_bank_core_req_data = core_req_data;
assign curr_bank_core_req_tag = core_req_tag;
assign per_bank_core_req_ready[i] = curr_bank_core_req_ready;
// Core Req
assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQUESTS{core_req_ready}});
assign curr_bank_core_req_addr = core_req_addr;
assign curr_bank_core_req_rw = core_req_rw;
assign curr_bank_core_req_byteen = core_req_byteen;
assign curr_bank_core_req_data = core_req_data;
assign curr_bank_core_req_tag = core_req_tag;
assign per_bank_core_req_ready[i] = curr_bank_core_req_ready;
// Core WB
assign curr_bank_core_rsp_ready = per_bank_core_rsp_ready[i];
assign per_bank_core_rsp_valid [i] = curr_bank_core_rsp_valid;
assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid;
assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag;
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
// Core WB
assign curr_bank_core_rsp_ready = per_bank_core_rsp_ready[i];
assign per_bank_core_rsp_valid [i] = curr_bank_core_rsp_valid;
assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid;
assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag;
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
// Dram fill request
assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid;
if (NUM_BANKS == 1) begin
assign per_bank_dram_fill_req_addr[i] = curr_bank_dram_fill_req_addr;
end else begin
assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i);
end
assign curr_bank_dram_fill_req_ready = dram_fill_req_ready;
// Dram fill request
assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid;
if (NUM_BANKS == 1) begin
assign per_bank_dram_fill_req_addr[i] = curr_bank_dram_fill_req_addr;
end else begin
assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i);
end
assign curr_bank_dram_fill_req_ready = dram_fill_req_ready;
// Dram fill response
if (NUM_BANKS == 1) begin
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid;
assign curr_bank_dram_fill_rsp_addr = dram_rsp_tag;
end else begin
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i);
assign curr_bank_dram_fill_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag);
end
assign curr_bank_dram_fill_rsp_data = dram_rsp_data;
assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready;
// Dram writeback request
assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid;
assign per_bank_dram_wb_req_byteen[i] = curr_bank_dram_wb_req_byteen;
if (NUM_BANKS == 1) begin
assign per_bank_dram_wb_req_addr[i] = curr_bank_dram_wb_req_addr;
end else begin
assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i);
end
assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data;
assign curr_bank_dram_wb_req_ready = per_bank_dram_wb_req_ready[i];
// Snoop request
if (NUM_BANKS == 1) begin
assign curr_bank_snp_req_valid = snp_req_valid_qual;
assign curr_bank_snp_req_addr = snp_req_addr_qual;
end else begin
assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i);
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual);
end
assign curr_bank_snp_req_invalidate = snp_req_invalidate_qual;
assign curr_bank_snp_req_tag = snp_req_tag_qual;
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
// Snoop response
assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid;
assign per_bank_snp_rsp_tag[i] = curr_bank_snp_rsp_tag;
assign curr_bank_snp_rsp_ready = per_bank_snp_rsp_ready[i];
VX_bank #(
.BANK_ID (i),
.CACHE_ID (CACHE_ID),
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.CREQ_SIZE (CREQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE),
.SNOOP_FORWARDING (SNOOP_FORWARDING),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) bank (
`SCOPE_SIGNALS_CACHE_BANK_BIND
.clk (clk),
.reset (reset),
// Core request
.core_req_valid (curr_bank_core_req_valid),
.core_req_rw (curr_bank_core_req_rw),
.core_req_byteen (curr_bank_core_req_byteen),
.core_req_addr (curr_bank_core_req_addr),
.core_req_data (curr_bank_core_req_data),
.core_req_tag (curr_bank_core_req_tag),
.core_req_ready (curr_bank_core_req_ready),
// Core response
.core_rsp_valid (curr_bank_core_rsp_valid),
.core_rsp_tid (curr_bank_core_rsp_tid),
.core_rsp_data (curr_bank_core_rsp_data),
.core_rsp_tag (curr_bank_core_rsp_tag),
.core_rsp_ready (curr_bank_core_rsp_ready),
// Dram fill request
.dram_fill_req_valid (curr_bank_dram_fill_req_valid),
.dram_fill_req_addr (curr_bank_dram_fill_req_addr),
.dram_fill_req_ready (curr_bank_dram_fill_req_ready),
// Dram fill response
if (NUM_BANKS == 1) begin
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid;
assign curr_bank_dram_fill_rsp_addr = dram_rsp_tag;
end else begin
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i);
assign curr_bank_dram_fill_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag);
end
assign curr_bank_dram_fill_rsp_data = dram_rsp_data;
assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready;
.dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid),
.dram_fill_rsp_data (curr_bank_dram_fill_rsp_data),
.dram_fill_rsp_addr (curr_bank_dram_fill_rsp_addr),
.dram_fill_rsp_ready (curr_bank_dram_fill_rsp_ready),
// Dram writeback request
assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid;
assign per_bank_dram_wb_req_byteen[i] = curr_bank_dram_wb_req_byteen;
if (NUM_BANKS == 1) begin
assign per_bank_dram_wb_req_addr[i] = curr_bank_dram_wb_req_addr;
end else begin
assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i);
end
assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data;
assign curr_bank_dram_wb_req_ready = per_bank_dram_wb_req_ready[i];
// Dram writeback request
.dram_wb_req_valid (curr_bank_dram_wb_req_valid),
.dram_wb_req_byteen (curr_bank_dram_wb_req_byteen),
.dram_wb_req_addr (curr_bank_dram_wb_req_addr),
.dram_wb_req_data (curr_bank_dram_wb_req_data),
.dram_wb_req_ready (curr_bank_dram_wb_req_ready),
// Snoop request
if (NUM_BANKS == 1) begin
assign curr_bank_snp_req_valid = snp_req_valid_qual;
assign curr_bank_snp_req_addr = snp_req_addr_qual;
end else begin
assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i);
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual);
end
assign curr_bank_snp_req_invalidate = snp_req_invalidate_qual;
assign curr_bank_snp_req_tag = snp_req_tag_qual;
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
.snp_req_valid (curr_bank_snp_req_valid),
.snp_req_addr (curr_bank_snp_req_addr),
.snp_req_invalidate (curr_bank_snp_req_invalidate),
.snp_req_tag (curr_bank_snp_req_tag),
.snp_req_ready (curr_bank_snp_req_ready),
// Snoop response
assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid;
assign per_bank_snp_rsp_tag[i] = curr_bank_snp_rsp_tag;
assign curr_bank_snp_rsp_ready = per_bank_snp_rsp_ready[i];
VX_bank #(
.BANK_ID (i),
.CACHE_ID (CACHE_ID),
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.CREQ_SIZE (CREQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE),
.SNOOP_FORWARDING (SNOOP_FORWARDING),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) bank (
`SCOPE_SIGNALS_CACHE_BANK_BIND
.clk (clk),
.reset (reset),
// Core request
.core_req_valid (curr_bank_core_req_valid),
.core_req_rw (curr_bank_core_req_rw),
.core_req_byteen (curr_bank_core_req_byteen),
.core_req_addr (curr_bank_core_req_addr),
.core_req_data (curr_bank_core_req_data),
.core_req_tag (curr_bank_core_req_tag),
.core_req_ready (curr_bank_core_req_ready),
// Core response
.core_rsp_valid (curr_bank_core_rsp_valid),
.core_rsp_tid (curr_bank_core_rsp_tid),
.core_rsp_data (curr_bank_core_rsp_data),
.core_rsp_tag (curr_bank_core_rsp_tag),
.core_rsp_ready (curr_bank_core_rsp_ready),
// Dram fill request
.dram_fill_req_valid (curr_bank_dram_fill_req_valid),
.dram_fill_req_addr (curr_bank_dram_fill_req_addr),
.dram_fill_req_ready (curr_bank_dram_fill_req_ready),
// Dram fill response
.dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid),
.dram_fill_rsp_data (curr_bank_dram_fill_rsp_data),
.dram_fill_rsp_addr (curr_bank_dram_fill_rsp_addr),
.dram_fill_rsp_ready (curr_bank_dram_fill_rsp_ready),
// Dram writeback request
.dram_wb_req_valid (curr_bank_dram_wb_req_valid),
.dram_wb_req_byteen (curr_bank_dram_wb_req_byteen),
.dram_wb_req_addr (curr_bank_dram_wb_req_addr),
.dram_wb_req_data (curr_bank_dram_wb_req_data),
.dram_wb_req_ready (curr_bank_dram_wb_req_ready),
// Snoop request
.snp_req_valid (curr_bank_snp_req_valid),
.snp_req_addr (curr_bank_snp_req_addr),
.snp_req_invalidate (curr_bank_snp_req_invalidate),
.snp_req_tag (curr_bank_snp_req_tag),
.snp_req_ready (curr_bank_snp_req_ready),
// Snoop response
.snp_rsp_valid (curr_bank_snp_rsp_valid),
.snp_rsp_tag (curr_bank_snp_rsp_tag),
.snp_rsp_ready (curr_bank_snp_rsp_ready)
);
end
endgenerate
// Snoop response
.snp_rsp_valid (curr_bank_snp_rsp_valid),
.snp_rsp_tag (curr_bank_snp_rsp_tag),
.snp_rsp_ready (curr_bank_snp_rsp_ready)
);
end
VX_cache_dram_req_arb #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),

View File

@@ -1,10 +1,17 @@
`ifndef VX_CACHE_CONFIG
`define VX_CACHE_CONFIG
`include "VX_platform.vh"
`include "VX_scope.vh"
`ifdef DBG_CORE_REQ_INFO
`include "VX_define.vh"
`endif
`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_REQ_TAG_WIDTH)
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
// tag rw byteen tid
`define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS)
@@ -74,4 +81,6 @@
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
`endif

View File

@@ -1,4 +1,3 @@
`include "VX_cache_config.vh"
module VX_cache_core_req_bank_sel #(
@@ -15,31 +14,33 @@ module VX_cache_core_req_bank_sel #(
`IGNORE_WARNINGS_BEGIN
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
`IGNORE_WARNINGS_END
input wire [NUM_BANKS-1:0] per_bank_ready,
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
input wire [NUM_BANKS-1:0] per_bank_ready,
output wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
output wire core_req_ready
);
integer i;
reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r;
if (NUM_BANKS == 1) begin
always @(*) begin
per_bank_valid = 0;
for (i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid[0][i] = core_req_valid[i];
per_bank_valid_r = 0;
for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid_r[0][i] = core_req_valid[i];
end
end
assign core_req_ready = per_bank_ready;
end else begin
reg [NUM_BANKS-1:0] per_bank_ready_sel;
always @(*) begin
per_bank_valid = 0;
per_bank_valid_r = 0;
per_bank_ready_sel = {NUM_BANKS{1'b1}};
for (i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0;
end
end
assign core_req_ready = & (per_bank_ready | per_bank_ready_sel);
end
assign per_bank_valid = per_bank_valid_r;
endmodule

View File

@@ -23,9 +23,9 @@ module VX_cache_core_rsp_merge #(
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
// Core Writeback
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [NUM_REQUESTS-1:0] core_rsp_valid,
output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready
);
@@ -41,47 +41,61 @@ module VX_cache_core_rsp_merge #(
`UNUSED_PIN (grant_onehot)
);
reg [NUM_BANKS-1:0] per_bank_core_rsp_pop_unqual;
reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual;
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_BANKS-1:0] core_rsp_bank_select;
assign per_bank_core_rsp_ready = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}};
integer i;
wire stall = ~core_rsp_ready && (| core_rsp_valid);
if (CORE_TAG_ID_BITS != 0) begin
assign core_rsp_tag = per_bank_core_rsp_tag[main_bank_index];
always @(*) begin
core_rsp_valid = 0;
core_rsp_data = 0;
for (i = 0; i < NUM_BANKS; i++) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 0;
core_rsp_tag_unqual = per_bank_core_rsp_tag[main_bank_index];
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
per_bank_core_rsp_pop_unqual[i] = 1;
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_bank_select[i] = 1;
end else begin
per_bank_core_rsp_pop_unqual[i] = 0;
core_rsp_bank_select[i] = 0;
end
end
end
end else begin
always @(*) begin
core_rsp_valid = 0;
core_rsp_data = 0;
core_rsp_tag = 0;
for (i = 0; i < NUM_BANKS; i++) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 0;
core_rsp_tag_unqual = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `BANK_BITS'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
per_bank_core_rsp_pop_unqual[i] = 1;
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_bank_select[i] = 1;
end else begin
per_bank_core_rsp_pop_unqual[i] = 0;
core_rsp_bank_select[i] = 0;
end
end
end
end
VX_generic_register #(
.N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH))
) core_wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);
assign per_bank_core_rsp_ready = core_rsp_bank_select & {NUM_BANKS{~stall}};
endmodule

View File

@@ -106,8 +106,7 @@ module VX_cache_dram_req_arb #(
`UNUSED_PIN (grant_onehot)
);
genvar i;
for (i = 0; i < NUM_BANKS; i++) begin
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_dram_wb_req_ready[i] = dram_req_ready && (dwb_bank == `BANK_BITS'(i));
end

View File

@@ -41,7 +41,7 @@ module VX_cache_miss_resrv #(
input wire is_fill_st1,
input wire[`LINE_ADDR_WIDTH-1:0] fill_addr_st1,
output wire pending_hazard,
output wire pending_hazard_st1,
// Miss dequeue
input wire miss_resrv_pop,
@@ -77,16 +77,13 @@ module VX_cache_miss_resrv #(
reg [MRVQ_SIZE-1:0] make_ready;
reg [MRVQ_SIZE-1:0] make_ready_push;
reg [MRVQ_SIZE-1:0] valid_address_match;
for (genvar i = 0; i < MRVQ_SIZE; i++) begin
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
end
genvar i;
generate
for (i = 0; i < MRVQ_SIZE; i++) begin
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
end
endgenerate
assign pending_hazard = |(valid_address_match);
assign pending_hazard_st1 = |(valid_address_match);
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
@@ -123,7 +120,6 @@ module VX_cache_miss_resrv #(
head_ptr <= 0;
tail_ptr <= 0;
end else begin
if (mrvq_push) begin
valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= mrvq_init_ready_state;
@@ -159,11 +155,10 @@ module VX_cache_miss_resrv #(
end
`ifdef DBG_PRINT_CACHE_MSRQ
integer j;
always @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
$write("%t: bank%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
for (j = 0; j < MRVQ_SIZE; j++) begin
$write("%t: cache%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
for (integer j = 0; j < MRVQ_SIZE; j++) begin
if (valid_table[j]) begin
$write(" ");
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");

View File

@@ -41,8 +41,8 @@ module VX_snp_forwarder #(
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr, dbg_sfq_write_addr;
wire sfq_push, sfq_pop, sfq_full;
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr;
wire sfq_acquire, sfq_release, sfq_full;
wire fwdin_valid;
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag;
@@ -56,38 +56,35 @@ module VX_snp_forwarder #(
assign sfq_read_addr = fwdin_tag;
assign sfq_push = snp_req_valid && !sfq_full && fwdout_ready;
assign sfq_pop = snp_rsp_valid;
assign sfq_acquire = snp_req_valid && !sfq_full && fwdout_ready;
assign sfq_release = snp_rsp_valid;
VX_indexable_queue #(
.DATAW (`LOG2UP(SNRQ_SIZE) + 1 +`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH),
VX_cam_buffer #(
.DATAW (`DRAM_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
.SIZE (SNRQ_SIZE)
) snp_fwd_queue (
.clk (clk),
.reset (reset),
.write_data ({sfq_write_addr, snp_req_addr, snp_req_invalidate, snp_req_tag}),
.write_addr (sfq_write_addr),
.push (sfq_push),
.pop (sfq_pop),
.full (sfq_full),
.read_addr (sfq_read_addr),
.read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
`UNUSED_PIN (empty)
) snp_fwd_cam (
.clk (clk),
.reset (reset),
.write_addr (sfq_write_addr),
.acquire_slot (sfq_acquire),
.read_addr (sfq_read_addr),
.write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
.read_data ({snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
.release_addr (sfq_read_addr),
.release_slot (sfq_release),
.full (sfq_full)
);
always @(posedge clk) begin
if (sfq_push) begin
if (sfq_acquire) begin
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS;
end
if (fwdin_fire) begin
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
assert(sfq_read_addr == dbg_sfq_write_addr);
end
end
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdout_valid[i] = snp_req_valid && snp_req_ready;
assign snp_fwdout_addr[i] = snp_req_addr;
assign snp_fwdout_invalidate[i] = snp_req_invalidate;
@@ -112,7 +109,7 @@ module VX_snp_forwarder #(
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
for (i = 0; i < NUM_REQUESTS; i++) begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
end

View File

@@ -34,8 +34,7 @@ module VX_snp_rsp_arb #(
assign snp_rsp_valid = fsq_valid;
assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank];
genvar i;
for (i = 0; i < NUM_BANKS; i++) begin
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (fsq_bank == `BANK_BITS'(i));
end

View File

@@ -1,75 +1,80 @@
`include "VX_cache_config.vh"
module VX_tag_data_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
parameter CORE_TAG_ID_BITS = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 0,
parameter CACHE_SIZE = 0,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 0,
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 0,
parameter NUM_BANKS = 0,
// Size of a word in bytes
parameter WORD_SIZE = 0,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 0,
parameter WORD_SIZE = 0,
// Enable cache writeable
parameter WRITE_ENABLE = 0,
parameter WRITE_ENABLE = 0,
// Enable dram update
parameter DRAM_ENABLE = 0
parameter DRAM_ENABLE = 0
) (
input wire clk,
input wire reset,
`ifdef DBG_CORE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc_st1,
input wire[`NR_BITS-1:0] debug_rd_st1,
input wire[`NW_BITS-1:0] debug_wid_st1,
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1,
`IGNORE_WARNINGS_END
`endif
input wire stall,
input wire is_snp_st1e,
input wire snp_invalidate_st1e,
input wire is_snp_st1,
input wire snp_invalidate_st1,
input wire stall_bank_pipe,
input wire force_request_miss_st1e,
input wire force_request_miss_st1,
input wire[`LINE_SELECT_BITS-1:0] readaddr_st10,
input wire[`LINE_ADDR_WIDTH-1:0] writeaddr_st1e,
input wire[`LINE_SELECT_BITS-1:0] readaddr_st1,
input wire[`LINE_ADDR_WIDTH-1:0] writeaddr_st1,
input wire valid_req_st1e,
input wire writefill_st1e,
input wire[`WORD_WIDTH-1:0] writeword_st1e,
input wire[`BANK_LINE_WIDTH-1:0] writedata_st1e,
input wire valid_req_st1,
input wire writefill_st1,
input wire[`WORD_WIDTH-1:0] writeword_st1,
input wire[`BANK_LINE_WIDTH-1:0] writedata_st1,
`IGNORE_WARNINGS_BEGIN
input wire mem_rw_st1e,
input wire[WORD_SIZE-1:0] mem_byteen_st1e,
input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_st1e,
input wire mem_rw_st1,
input wire[WORD_SIZE-1:0] mem_byteen_st1,
input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_st1,
`IGNORE_WARNINGS_END
output wire[`WORD_WIDTH-1:0] readword_st1e,
output wire[`BANK_LINE_WIDTH-1:0] readdata_st1e,
output wire[`TAG_SELECT_BITS-1:0] readtag_st1e,
output wire miss_st1e,
output wire dirty_st1e,
output wire[BANK_LINE_SIZE-1:0] dirtyb_st1e,
output wire fill_saw_dirty_st1e,
output wire snp_to_mrvq_st1e,
output wire mrvq_init_ready_state_st1e
output wire[`WORD_WIDTH-1:0] readword_st1,
output wire[`BANK_LINE_WIDTH-1:0] readdata_st1,
output wire[`TAG_SELECT_BITS-1:0] readtag_st1,
output wire miss_st1,
output wire dirty_st1,
output wire[BANK_LINE_SIZE-1:0] dirtyb_st1,
output wire fill_saw_dirty_st1,
output wire snp_to_mrvq_st1,
output wire mrvq_init_ready_state_st1
);
wire read_valid_st1c[STAGE_1_CYCLES-1:0];
wire read_dirty_st1c[STAGE_1_CYCLES-1:0];
wire[BANK_LINE_SIZE-1:0] read_dirtyb_st1c[STAGE_1_CYCLES-1:0];
wire[`TAG_SELECT_BITS-1:0] read_tag_st1c [STAGE_1_CYCLES-1:0];
wire[`BANK_LINE_WIDTH-1:0] read_data_st1c [STAGE_1_CYCLES-1:0];
`UNUSED_VAR (stall)
wire qual_read_valid_st1;
wire qual_read_dirty_st1;
wire[BANK_LINE_SIZE-1:0] qual_read_dirtyb_st1;
wire[`TAG_SELECT_BITS-1:0] qual_read_tag_st1;
wire[`BANK_LINE_WIDTH-1:0] qual_read_data_st1;
wire use_read_valid_st1e;
wire use_read_dirty_st1e;
wire[BANK_LINE_SIZE-1:0] use_read_dirtyb_st1e;
wire[`TAG_SELECT_BITS-1:0] use_read_tag_st1e;
wire[`BANK_LINE_WIDTH-1:0] use_read_data_st1e;
wire use_read_valid_st1;
wire use_read_dirty_st1;
wire[BANK_LINE_SIZE-1:0] use_read_dirtyb_st1;
wire[`TAG_SELECT_BITS-1:0] use_read_tag_st1;
wire[`BANK_LINE_WIDTH-1:0] use_read_data_st1;
wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] use_write_enable;
wire[`BANK_LINE_WIDTH-1:0] use_write_data;
@@ -77,23 +82,23 @@ module VX_tag_data_access #(
wire invalidate_line;
wire tags_match;
wire real_writefill = valid_req_st1e && writefill_st1e
&& ((!use_read_valid_st1e) || (use_read_valid_st1e && !tags_match));
wire real_writefill = valid_req_st1 && writefill_st1
&& ((~use_read_valid_st1) || (use_read_valid_st1 && ~tags_match));
wire[`TAG_SELECT_BITS-1:0] writetag_st1e = writeaddr_st1e[`TAG_LINE_ADDR_RNG];
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
wire[`TAG_SELECT_BITS-1:0] writetag_st1 = writeaddr_st1[`TAG_LINE_ADDR_RNG];
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1 = writeaddr_st1[`LINE_SELECT_BITS-1:0];
VX_tag_data_structure #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE)
) tag_data_structure (
VX_tag_data_store #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE)
) tag_data_store (
.clk (clk),
.reset (reset),
.stall_bank_pipe(stall_bank_pipe),
.read_addr (readaddr_st10),
.read_addr (readaddr_st1),
.read_valid (qual_read_valid_st1),
.read_dirty (qual_read_dirty_st1),
.read_dirtyb (qual_read_dirtyb_st1),
@@ -103,98 +108,93 @@ module VX_tag_data_access #(
.invalidate (invalidate_line),
.write_enable(use_write_enable),
.write_fill (real_writefill),
.write_addr (writeladdr_st1e),
.tag_index (writetag_st1e),
.write_addr (writeladdr_st1),
.tag_index (writetag_st1),
.write_data (use_write_data),
.fill_sent (fill_sent)
);
VX_generic_register #(
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH),
.PASSTHRU(1)
) s0_1_c0 (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({qual_read_valid_st1, qual_read_dirty_st1, qual_read_dirtyb_st1, qual_read_tag_st1, qual_read_data_st1}),
.out ({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
);
genvar i;
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
VX_generic_register #(
.N( 1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
) s0_1_cc (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_dirtyb_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}),
.out ({read_valid_st1c[i], read_dirty_st1c[i], read_dirtyb_st1c[i], read_tag_st1c[i], read_data_st1c[i]})
);
end
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || !DRAM_ENABLE; // If shared memory, always valid
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writetag_st1e; // Tag is always the same in SM
assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1];
assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1];
assign use_read_valid_st1 = qual_read_valid_st1 || !DRAM_ENABLE; // If shared memory, always valid
assign use_read_dirty_st1 = qual_read_dirty_st1 && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache
assign use_read_tag_st1 = DRAM_ENABLE ? qual_read_tag_st1 : writetag_st1; // Tag is always the same in SM
assign use_read_dirtyb_st1= qual_read_dirtyb_st1;
assign use_read_data_st1 = qual_read_data_st1;
if (`WORD_SELECT_WIDTH != 0) begin
assign readword_st1e = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
wire [`WORD_WIDTH-1:0] readword = use_read_data_st1[wordsel_st1 * `WORD_WIDTH +: `WORD_WIDTH];
for (genvar i = 0; i < WORD_SIZE; i++) begin
assign readword_st1[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1[i]}};
end
end else begin
assign readword_st1e = use_read_data_st1e;
for (genvar i = 0; i < WORD_SIZE; i++) begin
assign readword_st1[i * 8 +: 8] = use_read_data_st1[i * 8 +: 8] & {8{mem_byteen_st1[i]}};
end
end
wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] we;
wire [`BANK_LINE_WIDTH-1:0] data_write;
wire should_write = mem_rw_st1e
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e
&& !real_writefill;
wire should_write = mem_rw_st1
&& valid_req_st1
&& use_read_valid_st1
&& ~miss_st1
&& ~is_snp_st1
&& ~real_writefill;
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i)))
for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1 == `UP(`WORD_SELECT_WIDTH)'(i)))
&& should_write;
assign we[i] = real_writefill ? {WORD_SIZE{1'b1}} :
normal_write ? mem_byteen_st1e :
normal_write ? mem_byteen_st1 :
{WORD_SIZE{1'b0}};
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1e;
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1;
end
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;
assign use_write_enable = (writefill_st1 && ~real_writefill) ? 0 : we;
assign use_write_data = data_write;
// use "case equality" to handle uninitialized tag when block entry is not valid
assign tags_match = (writetag_st1e === use_read_tag_st1e);
assign tags_match = (writetag_st1 === use_read_tag_st1);
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && !force_request_miss_st1e;
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;
wire req_miss = valid_req_st1e && !is_snp_st1e && use_read_valid_st1e && !writefill_st1e && !tags_match;
wire snoop_hit_no_pending = valid_req_st1 && is_snp_st1 && use_read_valid_st1 && tags_match && (use_read_dirty_st1 || snp_invalidate_st1) && ~force_request_miss_st1;
wire req_invalid = valid_req_st1 && ~is_snp_st1 && ~use_read_valid_st1 && ~writefill_st1;
wire req_miss = valid_req_st1 && ~is_snp_st1 && use_read_valid_st1 && ~writefill_st1 && ~tags_match;
wire real_miss = req_invalid || req_miss;
wire force_core_miss = (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e && !real_miss);
assign snp_to_mrvq_st1e = valid_req_st1e && is_snp_st1e && force_request_miss_st1e;
wire force_core_miss = (force_request_miss_st1 && ~is_snp_st1 && ~writefill_st1 && valid_req_st1 && ~real_miss);
assign snp_to_mrvq_st1 = valid_req_st1 && is_snp_st1 && force_request_miss_st1;
// The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss
assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e
|| (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e);
assign miss_st1e = real_miss || snoop_hit_no_pending || force_core_miss;
assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e;
assign dirtyb_st1e = use_read_dirtyb_st1e;
assign readdata_st1e = use_read_data_st1e;
assign readtag_st1e = use_read_tag_st1e;
assign fill_sent = miss_st1e;
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
assign invalidate_line = snoop_hit_no_pending;
endmodule
assign mrvq_init_ready_state_st1 = snp_to_mrvq_st1
|| (force_request_miss_st1 && ~is_snp_st1 && ~writefill_st1 && valid_req_st1);
assign miss_st1 = real_miss || snoop_hit_no_pending || force_core_miss;
assign dirty_st1 = valid_req_st1 && use_read_valid_st1 && use_read_dirty_st1;
assign dirtyb_st1 = use_read_dirtyb_st1;
assign readdata_st1 = use_read_data_st1;
assign readtag_st1 = use_read_tag_st1;
assign fill_sent = miss_st1;
assign fill_saw_dirty_st1 = real_writefill && dirty_st1;
assign invalidate_line = snoop_hit_no_pending;
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
if (valid_req_st1) begin
if ((| use_write_enable)) begin
if (writefill_st1) begin
$display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
end else begin
$display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
end
end else
if (miss_st1) begin
$display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
end else begin
$display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
end
end
end
`endif
endmodule

View File

@@ -1,6 +1,6 @@
`include "VX_cache_config.vh"
module VX_tag_data_structure #(
module VX_tag_data_store #(
// Size of cache in bytes
parameter CACHE_SIZE = 0,
// Size of line inside a bank in bytes
@@ -44,10 +44,9 @@ module VX_tag_data_structure #(
wire do_write = (| write_enable);
integer i, j;
always @(posedge clk) begin
if (reset) begin
for (i = 0; i < `BANK_LINE_COUNT; i++) begin
for (integer i = 0; i < `BANK_LINE_COUNT; i++) begin
valid[i] <= 0;
dirty[i] <= 0;
end
@@ -71,10 +70,10 @@ module VX_tag_data_structure #(
valid[write_addr] <= 0;
end
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
for (j = 0; j < WORD_SIZE; j++) begin
if (write_enable[i][j]) begin
data[write_addr][i][j] <= write_data[i * `WORD_WIDTH + j * 8 +: 8];
for (integer j = 0; j < `BANK_LINE_WORDS; j++) begin
for (integer i = 0; i < WORD_SIZE; i++) begin
if (write_enable[j][i]) begin
data[write_addr][j][i] <= write_data[j * `WORD_WIDTH + i * 8 +: 8];
end
end
end

View File

@@ -0,0 +1,187 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_addmul #(
parameter TAGW = 1,
parameter LANES = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire do_sub,
input wire do_mul,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
output wire [LANES-1:0][31:0] result,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
reg do_sub_r, do_mul_r;
for (genvar i = 0; i < LANES; i++) begin
wire [31:0] result_add;
wire [31:0] result_sub;
wire [31:0] result_mul;
`ifdef QUARTUS
twentynm_fp_mac mac_fp_add (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(dataa[i]),
.ay(datab[i]),
.az(),
.clk({2'b00,clk}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result_add),
.chainout()
);
defparam mac_fp_add.operation_mode = "sp_add";
defparam mac_fp_add.use_chainin = "false";
defparam mac_fp_add.adder_subtract = "false";
defparam mac_fp_add.ax_clock = "0";
defparam mac_fp_add.ay_clock = "0";
defparam mac_fp_add.az_clock = "none";
defparam mac_fp_add.output_clock = "0";
defparam mac_fp_add.accumulate_clock = "none";
defparam mac_fp_add.ax_chainin_pl_clock = "none";
defparam mac_fp_add.accum_pipeline_clock = "none";
defparam mac_fp_add.mult_pipeline_clock = "none";
defparam mac_fp_add.adder_input_clock = "0";
defparam mac_fp_add.accum_adder_clock = "none";
twentynm_fp_mac mac_fp_sub (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(dataa[i]),
.ay(datab[i]),
.az(),
.clk({2'b00,clk}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result_sub),
.chainout()
);
defparam mac_fp_sub.operation_mode = "sp_add";
defparam mac_fp_sub.use_chainin = "false";
defparam mac_fp_sub.adder_subtract = "true";
defparam mac_fp_sub.ax_clock = "0";
defparam mac_fp_sub.ay_clock = "0";
defparam mac_fp_sub.az_clock = "none";
defparam mac_fp_sub.output_clock = "0";
defparam mac_fp_sub.accumulate_clock = "none";
defparam mac_fp_sub.ax_chainin_pl_clock = "none";
defparam mac_fp_sub.accum_pipeline_clock = "none";
defparam mac_fp_sub.mult_pipeline_clock = "none";
defparam mac_fp_sub.adder_input_clock = "0";
defparam mac_fp_sub.accum_adder_clock = "none";
twentynm_fp_mac mac_fp_mul (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(),
.ay(datab[i]),
.az(dataa[i]),
.clk({2'b00,clk}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result_mul),
.chainout()
);
defparam mac_fp_mul.operation_mode = "sp_mult";
defparam mac_fp_mul.use_chainin = "false";
defparam mac_fp_mul.adder_subtract = "false";
defparam mac_fp_mul.ax_clock = "none";
defparam mac_fp_mul.ay_clock = "0";
defparam mac_fp_mul.az_clock = "0";
defparam mac_fp_mul.output_clock = "0";
defparam mac_fp_mul.accumulate_clock = "none";
defparam mac_fp_mul.ax_chainin_pl_clock = "none";
defparam mac_fp_mul.accum_pipeline_clock = "none";
defparam mac_fp_mul.mult_pipeline_clock = "0";
defparam mac_fp_mul.adder_input_clock = "none";
defparam mac_fp_mul.accum_adder_clock = "none";
`else
always @(posedge clk) begin
dpi_fadd(0*LANES+i, enable, dataa[i], datab[i], result_add);
dpi_fsub(1*LANES+i, enable, dataa[i], datab[i], result_sub);
dpi_fmul(2*LANES+i, enable, dataa[i], datab[i], result_mul);
end
`endif
assign result[i] = do_mul_r ? result_mul : (do_sub_r ? result_sub : result_add);
end
VX_shift_register #(
.DATAW(TAGW + 1 + 1 + 1),
.DEPTH(`LATENCY_FADDMUL)
) shift_reg (
.clk(clk),
.reset(reset),
.enable(enable),
.in({tag_in, valid_in, do_sub, do_mul}),
.out({tag_out, valid_out, do_sub_r, do_mul_r})
);
assign ready_in = enable;
endmodule

View File

@@ -0,0 +1,61 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_div #(
parameter TAGW = 1,
parameter LANES = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
output wire [LANES-1:0][31:0] result,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < LANES; i++) begin
`ifdef QUARTUS
acl_fdiv fdiv (
.clk (clk),
.areset (1'b0),
.en (enable),
.a (dataa[i]),
.b (datab[i]),
.q (result[i])
);
`else
always @(posedge clk) begin
dpi_fdiv(8*LANES+i, enable, dataa[i], datab[i], result[i]);
end
`endif
end
VX_shift_register #(
.DATAW(TAGW + 1),
.DEPTH(`LATENCY_FDIV)
) shift_reg (
.clk(clk),
.reset(reset),
.enable(enable),
.in ({tag_in, valid_in}),
.out({tag_out, valid_out})
);
assign ready_in = enable;
endmodule

View File

@@ -0,0 +1,244 @@
`include "VX_define.vh"
module VX_fp_fpga #(
parameter TAGW = 1
) (
input wire clk,
input wire reset,
input wire valid_in,
output wire ready_in,
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
input wire [`NUM_THREADS-1:0][31:0] datac,
output wire [`NUM_THREADS-1:0][31:0] result,
output wire has_fflags,
output fflags_t [`NUM_THREADS-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
localparam NUM_FPC = 8;
localparam FPC_BITS = `LOG2UP(NUM_FPC);
wire [NUM_FPC-1:0] per_core_ready_in;
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] per_core_result;
wire [NUM_FPC-1:0][TAGW-1:0] per_core_tag_out;
reg [NUM_FPC-1:0] per_core_ready_out;
wire [NUM_FPC-1:0] per_core_valid_out;
wire fpnew_has_fflags;
fflags_t [`NUM_THREADS-1:0] fpnew_fflags;
reg [FPC_BITS-1:0] core_select;
reg do_sub, do_mul;
reg is_signed;
always @(*) begin
core_select = 'x;
do_sub = 'x;
do_mul = 'x;
is_signed = 'x;
case (op_type)
`FPU_ADD: begin core_select = 1; do_mul = 0; do_sub = 0; end
`FPU_SUB: begin core_select = 1; do_mul = 0; do_sub = 1; end
`FPU_MUL: begin core_select = 1; do_mul = 1; do_sub = 0; end
`FPU_MADD: begin core_select = 2; do_sub = 0; end
`FPU_MSUB: begin core_select = 2; do_sub = 1; end
`FPU_NMADD: begin core_select = 3; do_sub = 0; end
`FPU_NMSUB: begin core_select = 3; do_sub = 1; end
`FPU_DIV: begin core_select = 4; end
`FPU_SQRT: begin core_select = 5; end
`FPU_CVTWS: begin core_select = 6; is_signed = 1; end
`FPU_CVTWUS: begin core_select = 6; is_signed = 0; end
`FPU_CVTSW: begin core_select = 7; is_signed = 1; end
`FPU_CVTSWU: begin core_select = 7; is_signed = 0; end
default: begin core_select = 0; end
endcase
end
VX_fp_noncomp #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_noncomp (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 0)),
.ready_in (per_core_ready_in[0]),
.tag_in (tag_in),
.op_type (op_type),
.frm (frm),
.dataa (dataa),
.datab (datab),
.result (per_core_result[0]),
.has_fflags (fpnew_has_fflags),
.fflags (fpnew_fflags),
.tag_out (per_core_tag_out[0]),
.ready_out (per_core_ready_out[0]),
.valid_out (per_core_valid_out[0])
);
VX_fp_addmul #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_addmul (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 1)),
.ready_in (per_core_ready_in[1]),
.tag_in (tag_in),
.do_sub (do_sub),
.do_mul (do_mul),
.dataa (dataa),
.datab (datab),
.result (per_core_result[1]),
.tag_out (per_core_tag_out[1]),
.ready_out (per_core_ready_out[1]),
.valid_out (per_core_valid_out[1])
);
VX_fp_madd #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_madd (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 2)),
.ready_in (per_core_ready_in[2]),
.tag_in (tag_in),
.do_sub (do_sub),
.dataa (dataa),
.datab (datab),
.datac (datac),
.result (per_core_result[2]),
.tag_out (per_core_tag_out[2]),
.ready_out (per_core_ready_out[2]),
.valid_out (per_core_valid_out[2])
);
VX_fp_nmadd #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_nmadd (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 3)),
.ready_in (per_core_ready_in[3]),
.tag_in (tag_in),
.do_sub (do_sub),
.dataa (dataa),
.datab (datab),
.datac (datac),
.result (per_core_result[3]),
.tag_out (per_core_tag_out[3]),
.ready_out (per_core_ready_out[3]),
.valid_out (per_core_valid_out[3])
);
VX_fp_div #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_div (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 4)),
.ready_in (per_core_ready_in[4]),
.tag_in (tag_in),
.dataa (dataa),
.datab (datab),
.result (per_core_result[4]),
.tag_out (per_core_tag_out[4]),
.ready_out (per_core_ready_out[4]),
.valid_out (per_core_valid_out[4])
);
VX_fp_sqrt #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_sqrt (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 5)),
.ready_in (per_core_ready_in[5]),
.tag_in (tag_in),
.dataa (dataa),
.result (per_core_result[5]),
.tag_out (per_core_tag_out[5]),
.ready_out (per_core_ready_out[5]),
.valid_out (per_core_valid_out[5])
);
VX_fp_ftoi #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_ftoi (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 6)),
.ready_in (per_core_ready_in[6]),
.tag_in (tag_in),
.is_signed (is_signed),
.dataa (dataa),
.result (per_core_result[6]),
.tag_out (per_core_tag_out[6]),
.ready_out (per_core_ready_out[6]),
.valid_out (per_core_valid_out[6])
);
VX_fp_itof #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_itof (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 7)),
.ready_in (per_core_ready_in[7]),
.tag_in (tag_in),
.is_signed (is_signed),
.dataa (dataa),
.result (per_core_result[7]),
.tag_out (per_core_tag_out[7]),
.ready_out (per_core_ready_out[7]),
.valid_out (per_core_valid_out[7])
);
reg valid_out_n;
reg has_fflags_n;
reg [`NUM_THREADS-1:0][31:0] result_n;
reg [TAGW-1:0] tag_out_n;
always @(*) begin
per_core_ready_out = 0;
valid_out_n = 0;
has_fflags_n = 'x;
result_n = 'x;
tag_out_n = 'x;
for (integer i = 0; i < NUM_FPC; i++) begin
if (per_core_valid_out[i]) begin
per_core_ready_out[i] = ready_out;
valid_out_n = 1;
has_fflags_n = fpnew_has_fflags && (i == 0);
result_n = per_core_result[i];
tag_out_n = per_core_tag_out[i];
break;
end
end
end
assign ready_in = (& per_core_ready_in);
assign valid_out = valid_out_n;
assign has_fflags = has_fflags_n;
assign tag_out = tag_out_n;
assign result = result_n;
assign fflags = fpnew_fflags;
endmodule

View File

@@ -0,0 +1,78 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_ftoi #(
parameter TAGW = 1,
parameter LANES = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire is_signed,
input wire [LANES-1:0][31:0] dataa,
output wire [LANES-1:0][31:0] result,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
reg is_signed_r;
for (genvar i = 0; i < LANES; i++) begin
wire [31:0] result_s;
wire [31:0] result_u;
`ifdef QUARTUS
acl_ftoi ftoi (
.clk (clk),
.areset (1'b0),
.en (enable),
.a (dataa[i]),
.q (result_s)
);
acl_ftou ftou (
.clk (clk),
.areset (1'b0),
.en (enable),
.a (dataa[i]),
.q (result_u)
);
`else
always @(posedge clk) begin
dpi_ftoi(10*LANES+i, enable, dataa[i], result_s);
dpi_ftou(11*LANES+i, enable, dataa[i], result_u);
end
`endif
assign result[i] = is_signed_r ? result_s : result_u;
end
VX_shift_register #(
.DATAW(TAGW + 1 + 1),
.DEPTH(`LATENCY_FTOI)
) shift_reg (
.clk(clk),
.reset(reset),
.enable(enable),
.in ({tag_in, valid_in, is_signed}),
.out({tag_out, valid_out, is_signed_r})
);
assign ready_in = enable;
endmodule

View File

@@ -0,0 +1,78 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_itof #(
parameter TAGW = 1,
parameter LANES = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire is_signed,
input wire [LANES-1:0][31:0] dataa,
output wire [LANES-1:0][31:0] result,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
reg is_signed_r;
for (genvar i = 0; i < LANES; i++) begin
wire [31:0] result_s;
wire [31:0] result_u;
`ifdef QUARTUS
acl_itof itof (
.clk (clk),
.areset (1'b0),
.en (enable),
.a (dataa[i]),
.q (result_s)
);
acl_utof utof (
.clk (clk),
.areset (1'b0),
.en (enable),
.a (dataa[i]),
.q (result_u)
);
`else
always @(posedge clk) begin
dpi_itof(12*LANES+i, enable, dataa[i], result_s);
dpi_utof(13*LANES+i, enable, dataa[i], result_u);
end
`endif
assign result[i] = is_signed_r ? result_s : result_u;
end
VX_shift_register #(
.DATAW(TAGW + 1 + 1),
.DEPTH(`LATENCY_ITOF)
) shift_reg (
.clk(clk),
.reset(reset),
.enable(enable),
.in ({tag_in, valid_in, is_signed}),
.out({tag_out, valid_out, is_signed_r})
);
assign ready_in = enable;
endmodule

View File

@@ -0,0 +1,145 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_madd #(
parameter TAGW = 1,
parameter LANES = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire do_sub,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
input wire [LANES-1:0][31:0] datac,
output wire [LANES-1:0][31:0] result,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
reg do_sub_r;
for (genvar i = 0; i < LANES; i++) begin
wire [31:0] result_madd;
wire [31:0] result_msub;
`ifdef QUARTUS
twentynm_fp_mac mac_fp_madd (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(datac[i]),
.ay(datab[i]),
.az(dataa[i]),
.clk({2'b00,clk}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result_madd),
.chainout()
);
defparam mac_fp_madd.operation_mode = "sp_mult_add";
defparam mac_fp_madd.use_chainin = "false";
defparam mac_fp_madd.adder_subtract = "false";
defparam mac_fp_madd.ax_clock = "0";
defparam mac_fp_madd.ay_clock = "0";
defparam mac_fp_madd.az_clock = "0";
defparam mac_fp_madd.output_clock = "0";
defparam mac_fp_madd.accumulate_clock = "none";
defparam mac_fp_madd.ax_chainin_pl_clock = "0";
defparam mac_fp_madd.accum_pipeline_clock = "none";
defparam mac_fp_madd.mult_pipeline_clock = "0";
defparam mac_fp_madd.adder_input_clock = "0";
defparam mac_fp_madd.accum_adder_clock = "none";
twentynm_fp_mac mac_fp_msub (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(datac[i]),
.ay(datab[i]),
.az(dataa[i]),
.clk({2'b00,clk}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result_msub),
.chainout()
);
defparam mac_fp_msub.operation_mode = "sp_mult_add";
defparam mac_fp_msub.use_chainin = "false";
defparam mac_fp_msub.adder_subtract = "true";
defparam mac_fp_msub.ax_clock = "0";
defparam mac_fp_msub.ay_clock = "0";
defparam mac_fp_msub.az_clock = "0";
defparam mac_fp_msub.output_clock = "0";
defparam mac_fp_msub.accumulate_clock = "none";
defparam mac_fp_msub.ax_chainin_pl_clock = "0";
defparam mac_fp_msub.accum_pipeline_clock = "none";
defparam mac_fp_msub.mult_pipeline_clock = "0";
defparam mac_fp_msub.adder_input_clock = "0";
defparam mac_fp_msub.accum_adder_clock = "none";
`else
always @(posedge clk) begin
dpi_fmadd(3*LANES+i, enable, dataa[i], datab[i], datac[i], result_madd);
dpi_fmsub(4*LANES+i, enable, dataa[i], datab[i], datac[i], result_msub);
end
`endif
assign result[i] = do_sub_r ? result_msub : result_madd;
end
VX_shift_register #(
.DATAW(TAGW + 1 + 1),
.DEPTH(`LATENCY_FMADD)
) shift_reg (
.clk(clk),
.reset(reset),
.enable(enable),
.in({tag_in, valid_in, do_sub}),
.out({tag_out, valid_out, do_sub_r})
);
assign ready_in = enable;
endmodule

View File

@@ -0,0 +1,197 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_nmadd #(
parameter TAGW = 1,
parameter LANES = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire do_sub,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
input wire [LANES-1:0][31:0] datac,
output wire [LANES-1:0][31:0] result,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
reg do_sub_r;
for (genvar i = 0; i < LANES; i++) begin
wire [31:0] result_madd;
wire [31:0] result_msub;
wire [31:0] result_st0 = do_sub_r ? result_msub : result_madd;
`ifdef QUARTUS
twentynm_fp_mac mac_fp_madd (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(datac[i]),
.ay(datab[i]),
.az(dataa[i]),
.clk({2'b00,clk}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result_madd),
.chainout()
);
defparam mac_fp_madd.operation_mode = "sp_mult_add";
defparam mac_fp_madd.use_chainin = "false";
defparam mac_fp_madd.adder_subtract = "false";
defparam mac_fp_madd.ax_clock = "0";
defparam mac_fp_madd.ay_clock = "0";
defparam mac_fp_madd.az_clock = "0";
defparam mac_fp_madd.output_clock = "0";
defparam mac_fp_madd.accumulate_clock = "none";
defparam mac_fp_madd.ax_chainin_pl_clock = "0";
defparam mac_fp_madd.accum_pipeline_clock = "none";
defparam mac_fp_madd.mult_pipeline_clock = "0";
defparam mac_fp_madd.adder_input_clock = "0";
defparam mac_fp_madd.accum_adder_clock = "none";
twentynm_fp_mac mac_fp_msub (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(datac[i]),
.ay(datab[i]),
.az(dataa[i]),
.clk({2'b00,clk}),
.ena({2'b11,enable0}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result_msub),
.chainout()
);
defparam mac_fp_msub.operation_mode = "sp_mult_add";
defparam mac_fp_msub.use_chainin = "false";
defparam mac_fp_msub.adder_subtract = "true";
defparam mac_fp_msub.ax_clock = "0";
defparam mac_fp_msub.ay_clock = "0";
defparam mac_fp_msub.az_clock = "0";
defparam mac_fp_msub.output_clock = "0";
defparam mac_fp_msub.accumulate_clock = "none";
defparam mac_fp_msub.ax_chainin_pl_clock = "0";
defparam mac_fp_msub.accum_pipeline_clock = "none";
defparam mac_fp_msub.mult_pipeline_clock = "0";
defparam mac_fp_msub.adder_input_clock = "0";
defparam mac_fp_msub.accum_adder_clock = "none";
twentynm_fp_mac mac_fp_neg (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(32'h0),
.ay(result_st0),
.az(),
.clk({2'b00,clk}),
.ena({2'b11,enable1}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result[i]),
.chainout()
);
defparam mac_fp_neg.operation_mode = "sp_add";
defparam mac_fp_neg.use_chainin = "false";
defparam mac_fp_neg.adder_subtract = "true";
defparam mac_fp_neg.ax_clock = "0";
defparam mac_fp_neg.ay_clock = "0";
defparam mac_fp_neg.az_clock = "none";
defparam mac_fp_neg.output_clock = "0";
defparam mac_fp_neg.accumulate_clock = "none";
defparam mac_fp_neg.ax_chainin_pl_clock = "none";
defparam mac_fp_neg.accum_pipeline_clock = "none";
defparam mac_fp_neg.mult_pipeline_clock = "none";
defparam mac_fp_neg.adder_input_clock = "0";
defparam mac_fp_neg.accum_adder_clock = "none";
`else
always @(posedge clk) begin
dpi_fmadd(5*LANES+i, enable, dataa[i], datab[i], datac[i], result_madd);
dpi_fmsub(6*LANES+i, enable, dataa[i], datab[i], datac[i], result_msub);
dpi_fsub(7*LANES+i, enable, 32'b0, result_st0, result[i]);
end
`endif
end
VX_shift_register #(
.DATAW(1),
.DEPTH(`LATENCY_FMADD)
) shift_reg0 (
.clk(clk),
.reset(reset),
.enable(enable),
.in({do_sub}),
.out({do_sub_r})
);
VX_shift_register #(
.DATAW(TAGW + 1),
.DEPTH(`LATENCY_FMADD + `LATENCY_FADDMUL)
) shift_reg1 (
.clk(clk),
.reset(reset),
.enable(enable),
.in({tag_in, valid_in}),
.out({tag_out, valid_out})
);
assign ready_in = enable;
endmodule

View File

@@ -0,0 +1,263 @@
`include "VX_define.vh"
module VX_fp_noncomp #(
parameter TAGW = 1,
parameter LANES = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`FRM_BITS-1:0] frm,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
output wire [LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
localparam NEG_INF = 32'h00000001,
NEG_NORM = 32'h00000002,
NEG_SUBNORM = 32'h00000004,
NEG_ZERO = 32'h00000008,
POS_ZERO = 32'h00000010,
POS_SUBNORM = 32'h00000020,
POS_NORM = 32'h00000040,
POS_INF = 32'h00000080,
SIG_NAN = 32'h00000100,
QUT_NAN = 32'h00000200;
reg [`FPU_BITS-1:0] op_type_r;
reg [`FRM_BITS-1:0] frm_r;
reg [LANES-1:0][31:0] dataa_r;
reg [LANES-1:0][31:0] datab_r;
reg [LANES-1:0] a_sign, b_sign;
reg [LANES-1:0][7:0] a_exponent;
reg [LANES-1:0][22:0] a_mantissa;
fp_type_t [LANES-1:0] a_type, b_type;
reg [LANES-1:0] a_smaller, ab_equal;
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
reg [LANES-1:0][31:0] fsgnj_res; // result of sign injection
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
reg [LANES-1:0][ 4:0] fcmp_excp; // exception of comparison
wire stall = ~ready_out && valid_out;
// Setup
for (genvar i = 0; i < LANES; i++) begin
wire tmp_a_sign = dataa[i][31];
wire [7:0] tmp_a_exponent = dataa[i][30:23];
wire [22:0] tmp_a_mantissa = dataa[i][22:0];
wire tmp_b_sign = datab[i][31];
wire [7:0] tmp_b_exponent = datab[i][30:23];
wire [22:0] tmp_b_mantissa = datab[i][22:0];
fp_type_t tmp_a_type, tmp_b_type;
VX_fp_type fp_type_a (
.exponent(tmp_a_exponent),
.mantissa(tmp_a_mantissa),
.o_type(tmp_a_type)
);
VX_fp_type fp_type_b (
.exponent(tmp_b_exponent),
.mantissa(tmp_b_mantissa),
.o_type(tmp_b_type)
);
wire tmp_a_smaller = $signed(dataa[i]) < $signed(datab[i]);
wire tmp_ab_equal = (dataa[i] == datab[i]) | (tmp_a_type[4] & tmp_b_type[4]);
VX_generic_register #(
.N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1)
) fnc1_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
.out ({a_sign[i], b_sign[i], a_exponent[i], a_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]})
);
end
VX_generic_register #(
.N(`FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32))
) fnc2_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({op_type, frm, dataa, datab}),
.out ({op_type_r, frm_r, dataa_r, datab_r})
);
// FCLASS
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
if (a_type[i].is_normal) begin
fclass_mask[i] = a_sign[i] ? NEG_NORM : POS_NORM;
end
else if (a_type[i].is_inf) begin
fclass_mask[i] = a_sign[i] ? NEG_INF : POS_INF;
end
else if (a_type[i].is_zero) begin
fclass_mask[i] = a_sign[i] ? NEG_ZERO : POS_ZERO;
end
else if (a_type[i].is_subnormal) begin
fclass_mask[i] = a_sign[i] ? NEG_SUBNORM : POS_SUBNORM;
end
else if (a_type[i].is_nan) begin
fclass_mask[i] = {22'h0, a_type[i].is_quiet, a_type[i].is_signaling, 8'h0};
end
else begin
fclass_mask[i] = QUT_NAN;
end
end
end
// Min/Max
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
if (a_type[i].is_nan && b_type[i].is_nan)
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
else if (a_type[i].is_nan)
fminmax_res[i] = datab_r[i];
else if (b_type[i].is_nan)
fminmax_res[i] = dataa_r[i];
else begin
case (frm_r) // use LSB to distinguish MIN and MAX
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
endcase
end
end
end
// Sign Injection
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (frm_r)
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
endcase
end
end
// Comparison
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (frm_r)
`FRM_RNE: begin
if (a_type[i].is_nan || b_type[i].is_nan) begin
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
fcmp_excp[i] = {1'b1, 4'h0}; // raise NV flag when either operand is NaN
end
else begin
fcmp_res[i] = {31'h0, (a_smaller[i] | ab_equal[i])};
fcmp_excp[i] = 5'h0;
end
end
`FRM_RTZ: begin
if (a_type[i].is_nan || b_type[i].is_nan) begin
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
fcmp_excp[i] = {1'b1, 4'h0}; // raise NV flag when either operand is NaN
end
else begin
fcmp_res[i] = {31'h0, (a_smaller[i] & ~ab_equal[i])};
fcmp_excp[i] = 5'h0;
end
end
`FRM_RDN: begin
if (a_type[i].is_nan || b_type[i].is_nan) begin
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
// ** FEQS only raise NV flag when either operand is signaling NaN
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
end
else begin
fcmp_res[i] = {31'h0, ab_equal[i]};
fcmp_excp[i] = 5'h0;
end
end
default: begin
fcmp_res[i] = 32'hdeadbeaf; // don't care value
fcmp_excp[i] = 5'h0;
end
endcase
end
end
// outputs
fflags_t [LANES-1:0] tmp_fflags;
reg [LANES-1:0][31:0] tmp_result;
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (op_type_r)
`FPU_CLASS: begin
tmp_result[i] = fclass_mask[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
`FPU_CMP: begin
tmp_result[i] = fcmp_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = fcmp_excp[i];
end
//`FPU_MISC:
default: begin
case (frm)
0,1,2: begin
tmp_result[i] = fsgnj_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
3,4: begin
tmp_result[i] = fminmax_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
end
//5,6,7:
default: begin
tmp_result[i] = dataa[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
endcase
end
endcase
end
end
wire tmp_has_fflags = ((op_type_r == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX
|| (op_type_r == `FPU_CMP); // CMP
VX_generic_register #(
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
) nc_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({valid_in, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
.out ({valid_out, tag_out, result, has_fflags, fflags})
);
assign ready_in = ~stall;
endmodule

View File

@@ -0,0 +1,59 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_sqrt #(
parameter TAGW = 1,
parameter LANES = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [LANES-1:0][31:0] dataa,
output wire [LANES-1:0][31:0] result,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < LANES; i++) begin
`ifdef QUARTUS
acl_fsqrt fsqrt (
.clk (clk),
.areset (1'b0),
.en (enable),
.a (dataa[i]),
.q (result[i])
);
`else
always @(posedge clk) begin
dpi_fsqrt(9*LANES+i, enable, dataa[i], result[i]);
end
`endif
end
VX_shift_register #(
.DATAW(TAGW + 1),
.DEPTH(`LATENCY_FSQRT)
) shift_reg (
.clk(clk),
.reset(reset),
.enable(enable),
.in ({tag_in, valid_in}),
.out({tag_out, valid_out})
);
assign ready_in = enable;
endmodule

View File

@@ -0,0 +1,27 @@
`include "VX_define.vh"
module VX_fp_type (
// inputs
input [7:0] exponent,
input [22:0] mantissa,
// outputs
output fp_type_t o_type
);
wire is_normal = (exponent != 8'd0) && (exponent != 8'hff);
wire is_zero = (exponent == 8'd0) && (mantissa == 23'd0);
wire is_subnormal = (exponent == 8'd0) && !is_zero;
wire is_inf = (exponent == 8'hff) && (mantissa == 23'd0);
wire is_nan = (exponent == 8'hff) && (mantissa != 23'd0);
wire is_signaling = is_nan && (mantissa[22] == 1'b0);
wire is_quiet = is_nan && !is_signaling;
assign o_type.is_normal = is_normal;
assign o_type.is_zero = is_zero;
assign o_type.is_subnormal = is_subnormal;
assign o_type.is_inf = is_inf;
assign o_type.is_nan = is_nan;
assign o_type.is_signaling = is_signaling;
assign o_type.is_quiet = is_quiet;
endmodule

209
hw/rtl/fp_cores/VX_fpnew.v Normal file
View File

@@ -0,0 +1,209 @@
`include "VX_define.vh"
`include "fpnew_pkg.sv"
`include "defs_div_sqrt_mvp.sv"
module VX_fpnew #(
parameter TAGW = 1,
parameter FMULADD = 1,
parameter FDIVSQRT = 1,
parameter FNONCOMP = 1,
parameter FCONV = 1
) (
input wire clk,
input wire reset,
input wire valid_in,
output wire ready_in,
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
input wire [`NUM_THREADS-1:0][31:0] datac,
output wire [`NUM_THREADS-1:0][31:0] result,
output wire has_fflags,
output fflags_t [`NUM_THREADS-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
localparam UNIT_FMULADD = FMULADD ? fpnew_pkg::PARALLEL : fpnew_pkg::DISABLED;
localparam UNIT_FDIVSQRT = FDIVSQRT ? fpnew_pkg::MERGED : fpnew_pkg::DISABLED;
localparam UNIT_FNONCOMP = FNONCOMP ? fpnew_pkg::PARALLEL : fpnew_pkg::DISABLED;
localparam UNIT_FCONV = FCONV ? fpnew_pkg::MERGED : fpnew_pkg::DISABLED;
localparam FOP_BITS = fpnew_pkg::OP_BITS;
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
localparam FPU_DPATHW = 32'd32;
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: FPU_DPATHW,
EnableVectors: 1'b0,
EnableNanBox: 1'b1,
FpFmtMask: 5'b10000,
IntFmtMask: 4'b0010
};
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs:'{'{`LATENCY_FADDMUL, 0, 0, 0, 0}, // ADDMUL
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
'{default: `LATENCY_FNONCOMP}, // NONCOMP
'{default: `LATENCY_FCONV}}, // CONV
UnitTypes:'{'{default: UNIT_FMULADD}, // ADDMUL
'{default: UNIT_FDIVSQRT}, // DIVSQRT
'{default: UNIT_FNONCOMP}, // NONCOMP
'{default: UNIT_FCONV}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
wire fpu_ready_in, fpu_valid_in;
wire fpu_ready_out, fpu_valid_out;
reg [TAGW-1:0] fpu_tag_in, fpu_tag_out;
reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
wire [`NUM_THREADS-1:0][31:0] fpu_result;
fpnew_pkg::status_t [0:`NUM_THREADS-1] fpu_status;
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
reg fpu_has_fflags, fpu_has_fflags_out;
always @(*) begin
fpu_op = fpnew_pkg::SGNJ;
fpu_rnd = frm;
fpu_op_mod = 0;
fpu_has_fflags = 1;
fpu_operands[0] = dataa;
fpu_operands[1] = datab;
fpu_operands[2] = datac;
case (op_type)
`FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
end
`FPU_SUB: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
fpu_op_mod = 1;
end
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
`FPU_MISC: begin
case (frm)
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
endcase
end
default:;
endcase
end
`DISABLE_TRACING
for (genvar i = 0; i < `NUM_THREADS; i++) begin
if (0 == i) begin
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic[TAGW+1+1-1:0])
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i ({fpu_operands[2][0], fpu_operands[1][0], fpu_operands[0][0]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b0),
.tag_i ({fpu_tag_in, fpu_has_fflags}),
.in_valid_i (fpu_valid_in),
.in_ready_o (fpu_ready_in),
.flush_i (reset),
.result_o (fpu_result[0]),
.status_o (fpu_status[0]),
.tag_o ({fpu_tag_out, fpu_has_fflags_out}),
.out_valid_o (fpu_valid_out),
.out_ready_i (fpu_ready_out),
`UNUSED_PIN (busy_o)
);
end else begin
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic)
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b0),
.tag_i (1'b0),
.in_valid_i (fpu_valid_in),
`UNUSED_PIN (in_ready_o),
.flush_i (reset),
.result_o (fpu_result[i]),
.status_o (fpu_status[i]),
`UNUSED_PIN (tag_o),
`UNUSED_PIN (out_valid_o),
.out_ready_i (fpu_ready_out),
`UNUSED_PIN (busy_o)
);
end
end
`ENABLE_TRACING
assign fpu_valid_in = valid_in;
assign ready_in = fpu_ready_in;
assign fpu_tag_in = tag_in;
assign tag_out = fpu_tag_out;
assign result = fpu_result;
assign has_fflags = fpu_has_fflags_out;
assign fflags = fpu_status;
assign valid_out = fpu_valid_out;
assign fpu_ready_out = ready_out;
endmodule

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,514 @@
:020000040000FA
:0400000020000004D8
:040001001FF00800E4
:040002001FE01FE4F8
:040003001FD047992A
:040004001FC07F0694
:040005001FB0C6154D
:040006001FA11CAE6C
:040007001F9182BA09
:040008001F81F82339
:040009001F727CD214
:04000A001F6310B1AF
:04000B001F53B3A824
:04000C001F4465A286
:04000D001F352689EC
:04000E001F25F6486C
:04000F001F16D4C81C
:040010001F07C1F411
:040011001EF8BDB761
:040012001EE9C7FC20
:040013001EDAE0AE63
:040014001ECC07B740
:040015001EBD3D03CC
:040016001EAE807F1B
:040017001E9FD21442
:040018001E9131B054
:040019001E829F3E66
:04001A001E741AA98D
:04001B001E65A3E0DB
:04001C001E573ACD64
:04001D001E48DF5D3D
:04001E001E3A917E77
:04001F001E2C511B27
:040020001E1E1E2260
:040021001E0FF88036
:040022001E01E022B9
:040023001DF3D4F500
:040024001DE5D6E818
:040025001DD7E5E717
:040026001DCA01E10D
:040027001DBC2AC210
:040028001DAE607B2E
:040029001DA0A2F77D
:04002A001D92F2270A
:04002B001D854DF8EA
:04002C001D77B6592D
:04002D001D6A2B38E5
:04002E001D5CAC8425
:04002F001D4F3A2DFA
:040030001D41D42179
:040031001D347A50B0
:040032001D272CA8B2
:040033001D19EB198F
:040034001D0CB59357
:040035001CFF8C061A
:040036001CF26E60EA
:040037001CE55C93D5
:040038001CD8568DED
:040039001CCB5C3F41
:04003A001CBE6D9AE1
:04003B001CB18A8DDD
:04003C001CA4B30944
:04003D001C97E6FF27
:04003E001C8B265F92
:04003F001C7E711A98
:040040001C71C72048
:040041001C652864AE
:040042001C5894D5DD
:040043001C4C0C65E0
:040044001C3F8F06C8
:040045001C331CA8A4
:040046001C26B53D82
:040047001C1A58B770
:040048001C0E07077C
:040049001C01C020B6
:04004A001BF583F22D
:04004B001BE95271EA
:04004C001BDD2B8EFF
:04004D001BD10F3A7A
:04004E001BC4FD6969
:04004F001BB8F60DD7
:040050001BACF919D3
:040051001BA1067E6B
:040052001B951E2FAD
:040053001B89401FA6
:040054001B7D6C4262
:040055001B71A289F0
:040056001B65E2E85C
:040057001B5A2D51B2
:040058001B4E81B901
:040059001B42E01254
:04005A001B37484FB9
:04005B001B2BBA643D
:04005C001B203644EB
:04005D001B14BBE4D1
:04005E001B094B36F9
:04005F001AFDE42E74
:040060001AF286C149
:040061001AE732E187
:040062001ADBE88439
:040063001AD0A79C6C
:040064001AC5701F2A
:040065001ABA420081
:040066001AAF1D337D
:040067001AA401AE28
:040068001A98EF648F
:040069001A8DE64ABC
:04006A001A82E655BB
:04006B001A77EF7998
:04006C001A6D01AB5D
:04006D001A621CDF18
:04006E001A57410BD1
:04006F001A4C6E2495
:040070001A41A41E6F
:040071001A36E2EF6A
:040072001A2C2A8C8E
:040073001A217AE9EB
:040074001A16D3FD88
:040075001A0C35BD6F
:040076001A01A01EAD
:0400770019F713154D
:0400780019EC8E9958
:0400790019E2129ED8
:04007A0019D79F1BD8
:04007B0019CD340661
:04007C0019C2D15381
:04007D0019B876F93F
:04007E0019AE24EEA5
:04007F0019A3DB28BE
:040080001999999E93
:04008100198F60442F
:0400820019852F129B
:04008300197B05FDE3
:040084001970E4FC0F
:040085001966CC0626
:04008600195CBB1036
:040087001952B21147
:040088001948B10161
:04008900193EB7D590
:04008A001934C684DB
:04008B00192ADD044D
:04008C001920FB4EEE
:04008D0019172157C7
:04008E00190D4F16E3
:04008F00190384834A
:0400900018F9C19406
:0400910018F006401D
:0400920018E6527F9B
:0400930018DCA64887
:0400940018D30191EB
:0400950018C96453CF
:0400960018BFCE843D
:0400970018B6401D3A
:0400980018ACB913D4
:0400990018A339600F
:04009A001899C0FAF7
:04009B0018904FD991
:04009C001886E5F5E8
:04009D00187D834502
:04009E00187427C1EA
:04009F00186AD361A7
:0400A0001861861C41
:0400A10018583FECC0
:0400A200184F00C62D
:0400A3001845C8A58F
:0400A400183C977FEE
:0400A50018336D4C53
:0400A600182A4A05C5
:0400A70018212DA34C
:0400A8001818181CF0
:0400A900180F096AB9
:0400AA0018060184AF
:0400AB0017FD0064D9
:0400AC0017F406013E
:0400AD0017EB1254E7
:0400AE0017E22555DB
:0400AF0017D93EFE21
:0400B00017D05F45C1
:0400B10017C78625C2
:0400B20017BEB3962C
:0400B30017B5E79006
:0400B40017AD220D55
:0400B50017A4630425
:0400B600179BAA707A
:0400B7001792F8485C
:0400B800178A4C85D2
:0400B9001781A722E2
:0400BA001779081694
:0400BB0017706F5AF1
:0400BC001767DCE8FE
:0400BD00175F50B9C0
:0400BE001756CAC641
:0400BF00174E4B0885
:0400C0001745D17897
:0400C100173D5E1079
:0400C2001734F0C936
:0400C300172C899CD1
:0400C4001724288352
:0400C500171BCD77C1
:0400C6001713787222
:0400C700170B296C7E
:0400C8001702E060DB
:0400C90016FA9D473F
:0400CA0016F2601BAF
:0400CB0016EA28D534
:0400CC0016E1F76FD3
:0400CD0016D9CBE392
:0400CE0016D1A62A77
:0400CF0016C9863F89
:0400D00016C16C1BCE
:0400D10016B957B74E
:0400D20016B1490F0B
:0400D30016A9401B0F
:0400D40016A13CD560
:0400D50016993F3900
:0400D6001691473FF9
:0400D700168954E151
:0400D8001681681A0B
:0400D900167980E52F
:0400DA0016719F3AC2
:0400DB001669C314CB
:0400DC001661EC6E4F
:0400DD00165A1B4252
:0400DE0016524F89DE
:0400DF00164A893FF5
:0400E0001642C85D9F
:0400E100163B0CDEE0
:0400E200163356BDBE
:0400E300162BA5F340
:0400E4001623FA7B6A
:0400E500161C545041
:0400E6001614B36CCD
:0400E700160D17CA11
:0400E8001605816414
:0400E90015FDF035DC
:0400EA0015F664386B
:0400EB0015EEDD67CA
:0400EC0015E75BBDFC
:0400ED0015DFDF3408
:0400EE0015D867C8F2
:0400EF0015D0F573C0
:0400F00015C9883076
:0400F10015C21FF91C
:0400F20015BABCCAB5
:0400F30015B35E9E45
:0400F40015AC056FD3
:0400F50015A4B13865
:0400F600159D61F5FE
:0400F700159617A0A3
:0400F800158ED2345B
:0400F900158791AD29
:0400FA001580560512
:0400FB0015791F381C
:0400FC001571ED404D
:0400FD00156AC019A7
:0400FE00156397BE31
:0400FF00155C742BED
:0401000015555559E3
:04010100154E3B4616
:04010200154725EB8D
:04010300154015444A
:040104001539094D53
:0401050015320201AC
:04010600152AFF5B5C
:040107001524015664
:04010800151D07EFCB
:040109001516132094
:04010A00150F22E5C6
:04010B001508373A62
:04010C001501501970
:04010D0014FA6D7FF4
:04010E0014F38F67F0
:04010F0014ECB5CC6B
:0401100014E5E0AB67
:0401110014DF0FFFE9
:0401120014D843C3F7
:0401130014D17BF395
:0401140014CAB88BC6
:0401150014C3F9878F
:0401160014BD3EE2F4
:0401170014B68898FA
:0401180014AFD6A4A6
:0401190014A92904F8
:04011A0014A27FB1FB
:04011B00149BDAA9AE
:04011C00149539E815
:04011D00148E9D6837
:04011E001488052616
:04011F001481711EB8
:04012000147AE14C20
:04012100147455AB52
:04012200146DCE3852
:0401230014674AEF24
:040124001460CBCCCC
:04012500145A50CA4E
:040126001453D9E7AE
:04012700144D671DEF
:040128001446F86918
:0401290014408DC829
:04012A00143A273527
:04012B001433C4AC19
:04012C00142D662AFE
:04012D0014270BABDD
:04012E001420B52ABA
:04012F00141A62A597
:040130001414141877
:04013100140DC97E62
:04013200140782D557
:04013300140140185B
:0401340013FB014474
:0401350013F4C654A5
:0401360013EE8F47EE
:0401370013E85C1756
:0401380013E22CC1E1
:0401390013DC014290
:04013A0013D5D9966A
:04013B0013CFB5B970
:04013C0013C995A8A6
:04013D0013C379600F
:04013E0013BD60DDB0
:04013F0013B74C1B8B
:0401400013B13B18A4
:0401410013AB2DCF00
:0401420013A5243CA1
:04014300139F1E5E8A
:0401440013991C30BF
:0401450013931DAF44
:04014600138D22D71C
:0401470013872BA649
:0401480013813817D0
:04014900137B4829B3
:04014A0013755BD6F8
:04014B00136F731C9F
:04014C0013698DF8AE
:04014D001363AC6626
:04014E00135DCE640B
:04014F001357F3ED62
:0401500013521CFF2B
:04015100134C49976B
:04015200134679B126
:040153001340AD4A5E
:04015400133AE45F17
:0401550013351EED53
:04015600132F5CF116
:0401570013299E6862
:040158001323E34E3C
:04015900131E2BA1A5
:04015A001318775DA2
:04015B001312C67F36
:04015C00130D190561
:04015D0013076EEB2B
:04015E001301C82F92
:04015F0012FC24CD9D
:0401600012F684C24D
:0401610012F0E80BA5
:0401620012EB4EA6A8
:0401630012E5B88F5A
:0401640012E025C4BC
:0401650012DA9642D2
:0401660012D50A059F
:0401670012CF810B27
:0401680012C9FB516C
:0401690012C478D56F
:04016A0012BEF99236
:04016B0012B97D87C1
:04016C0012B404B114
:04016D0012AE8F0C33
:04016E0012A91C971F
:04016F0012A3AD4EDC
:04017000129E412E6C
:040171001298D835D3
:040172001293726012
:04017300128E0FAC2D
:040174001288B01726
:040175001283539D01
:04017600127DFA3DBF
:040177001278A3F364
:04017800127350BCF2
:04017900126E00976B
:04017A001268B381D3
:04017B00126369762C
:04017C00125E227479
:04017D001258DE7ABC
:04017E0012539D83F8
:04017F00124E5F8D30
:040180001249249666
:040181001243EC9C9D
:04018200123EB79BD7
:040183001239859216
:040184001234567D5E
:04018500122F2A5AB1
:04018600122A012711
:040187001224DAE183
:04018800121FB78506
:04018900121A97129D
:04018A00121579844D
:04018B0012105EDA16
:04018C00120B4710FB
:04018D0012063225FF
:04018E001201201624
:04018F0011FC10E06F
:0401900011F70482DD
:0401910011F1FAF876
:0401920011ECF44038
:0401930011E7F05927
:0401940011E2EF3F46
:0401950011DDF0F197
:0401960011D8F56B1C
:0401970011D3FCACD8
:0401980011CF06B2CB
:0401990011CA1379FB
:04019A0011C5230068
:04019B0011C0354515
:04019C0011BB4A4405
:04019D0011B661FD39
:04019E0011B17C6CB3
:04019F0011AC998F77
:0401A00011A7B96585
:0401A10011A2DBEBE1
:0401A200119E011E8B
:0401A300119928FD89
:0401A40011945385DA
:0401A500118F80B482
:0401A600118AB08882
:0401A7001185E2FEDE
:0401A8001181181594
:0401A900117C4FCBAB
:0401AA0011778A1D22
:0401AB001172C709FD
:0401AC00116E068D3D
:0401AD00116948A7E5
:0401AE0011648D55F6
:0401AF00115FD49474
:0401B000115B1E635E
:0401B10011566AC0B9
:0401B2001151B9A886
:0401B300114D0B19C6
:0401B40011485F127D
:0401B5001143B590AD
:0401B600113F0E9156
:0401B700113A6A147B
:0401B8001135C81520
:0401B9001131289444
:0401BA00112C8B8EEB
:0401BB001127F10116
:0401BC00112358EBC8
:0401BD00111EC34B01
:0401BE00111A301EC4
:0401BF0011159F6215
:0401C00011111115F3
:0401C100110C853662
:0401C2001107FBC264
:0401C300110374B8F8
:0401C40010FEF01524
:0401C50010FA6DD8E7
:0401C60010F5EDFF44
:0401C70010F170873C
:0401C80010ECF570D2
:0401C90010E87CB707
:0401CA0010E40659DE
:0401CB0010DF925758
:0401CC0010DB20AD77
:0401CD0010D6B1593E
:0401CE0010D2445AAD
:0401CF0010CDD9AEC8
:0401D00010C971548D
:0401D10010C50B4802
:0401D20010C0A78B27
:0401D30010BC4618FE
:0401D40010B7E6F08A
:0401D50010B38A10C9
:0401D60010AF2F76C1
:0401D70010AAD72172
:0401D80010A6810EDE
:0401D90010A22D3D06
:0401DA00109DDBABEE
:0401DB0010998C5695
:0401DC0010953F3DFE
:0401DD001090F45E2C
:0401DE00108CABB71F
:0401DF0010886547D8
:0401E0001084210C5A
:0401E100107FDF04A8
:0401E200107B9F2EC1
:0401E30010776187A9
:0401E4001073260E60
:0401E500106EECC2EA
:0401E600106AB5A046
:0401E700106680A876
:0401E80010624DD77D
:0401E900105E1D2C5B
:0401EA001059EEA416
:0401EB001055C240A9
:0401EC00105197FC1B
:0401ED00104D6FD76B
:0401EE00104949D09B
:0401EF00104525E5AD
:0401F00010410414A2
:0401F100103CE45C7E
:0401F2001038C6BC3F
:0401F3001034AB30E9
:0401F400103091B97D
:0401F500102C7A54FC
:0401F6001028650068
:0401F700102451BCC3
:0401F800102040850E
:0401F900101C315A4B
:0401FA001018243A7B
:0401FB0010141923A0
:0401FC0010101014BB
:0401FD00100C090BCE
:0401FE0010080406DB
:0401FF0010040104E3
:00000001ff

View File

@@ -0,0 +1,514 @@
:020000040000FA
:03000000100002EB
:03000100100FF6E7
:03000200101FD2FA
:03000300102F9526
:03000400103F4466
:03000500104ED9C1
:03000600105E5831
:03000700106DC1B8
:03000800107D1454
:03000900108C4D0B
:03000A00109B6FD9
:03000B0010AA7FB9
:03000C0010B977B1
:03000D0010C85BBD
:03000E0010D726E2
:03000F0010E5DC1D
:0300100010F47E6B
:0300110011030CCC
:0300120011118148
:03001300111FE1D9
:03001400112E2F7B
:03001500113C6A31
:03001600114A8DFF
:0300170011589FDE
:03001800116699D5
:03001900117480DF
:03001A00118258F8
:03001B001190162B
:03001C00119DC370
:03001D0011AB5FC5
:03001E0011B8E333
:03001F0011C657B0
:0300200011D3B841
:0300210011E105E5
:0300220011EE3F9D
:0300230011FB6965
:0300240012087D42
:0300250012158130
:0300260012227033
:03002700122F5342
:03002800123C1D6A
:030029001248DD9D
:03002A00125585E7
:03002B0012621D41
:03002C00126EA3AE
:03002D00127B192A
:03002E00128781B5
:03002F001293D455
:0300300012A01605
:0300310012AC47C7
:0300320012B86A97
:0300330012C47C78
:0300340012D07C6B
:0300350012DC6A70
:0300360012E84B82
:0300370012F419A7
:0300380012FFDBD9
:03003900130B8C1A
:03003A0013172B6E
:03003B001322BDD0
:03003C00132E3E42
:03003D001339AFC5
:03003E0013451156
:03003F00135063F8
:03004000135BAAA5
:030041001366DD66
:0300420013720531
:03004300137D1D0D
:03004400138824FA
:0300450013931FF3
:03004600139E0CFA
:0300470013A8E813
:0300480013B3BA35
:0300490013BE786B
:03004A0013C92EA9
:03004B0013D3D0FC
:03004C0013DE6759
:03004D0013E8F2C3
:03004E0013F36E3B
:03004F0013FDDDC1
:0300500014083A57
:0300510014128EF8
:03005200141CD4A7
:0300530014270E61
:030054001431372D
:03005500143B5603
:03005600144567E7
:03005700144F6DD6
:03005800145962D6
:0300590014634DE0
:03005A00146D2CF6
:03005B001476FD1B
:03005C001480C449
:03005D00148A7B87
:03005E00149428CF
:03005F00149DC825
:0300600014A75B87
:0300610014B0E4F4
:0300620014BA5F6E
:0300630014C3D1F2
:0300640014CD3484
:0300650014D68C22
:0300660014DFDCC8
:0300670014E91D7C
:0300680014F2523D
:0300690014FB7E07
:03006A0015049BDF
:03006B00150DADC3
:03006C001516B6B0
:03006D00151FB5A7
:03006E001528A7AB
:03006F0015318DBB
:03007000153A69D5
:03007100154339FB
:03007200154BFE2D
:030073001554BE63
:03007400155D6FA8
:03007500156614F9
:03007600156EAE56
:03007700157742B8
:03007800157FC62B
:03007900158846A1
:03007A001590B727
:03007B0015991DB7
:03007C0015A17C4F
:03007D0015A9D1F1
:03007E0015B21C9C
:03007F0015BA5B54
:0300800015C28F17
:0300810015CABBE2
:0300820015D2DDB7
:0300830015DAF794
:0300840015E3067B
:0300850015EB0B6D
:0300860015F30768
:0300870015FAFC6B
:030088001602E37A
:03008900160AC292
:03008A00161298B3
:03008B00161A69D9
:03008C0016222A0F
:03008D001629E54C
:03008E0016319890
:03008F0016393FE0
:030090001640DF38
:0300910016487896
:0300920016500500
:0300930016578A73
:03009400165F09EB
:0300950016667C70
:03009600166DE8FC
:0300970016754893
:03009800167CA52E
:030099001683F5D6
:03009A00168B3F83
:03009B0016927F3B
:03009C001699B6FC
:03009D0016A0E6C4
:03009E0016A80F92
:03009F0016AF2F6A
:0300A00016B64849
:0300A10016BD5534
:0300A20016C46021
:0300A30016CB5D1C
:0300A40016D2551C
:0300A50016D94524
:0300A60016E02F32
:0300A70016E70C4D
:0300A80016EDE66C
:0300A90016F4B694
:0300AA0016FB80C2
:0300AB00170240F9
:0300AC001708FA38
:0300AD00170FAD7D
:0300AE00171657CB
:0300AF00171CF823
:0300B0001723987B
:0300B100172A2CDF
:0300B2001730B94B
:0300B30017373EBE
:0300B400173DBC39
:0300B500174434B9
:0300B600174AA244
:0300B70017510BD3
:0300B80017577067
:0300B900175DC808
:0300BA0017641DAB
:0300BB00176A6C55
:0300BC001770B208
:0300BD001776F1C2
:0300BE00177D2982
:0300BF0017835B49
:0300C00017898815
:0300C100178FACEA
:0300C2001795C9C6
:0300C300179BE1A7
:0300C40017A1F091
:0300C50017A7F981
:0300C60017ADFB78
:0300C70017B3F973
:0300C80017B9F075
:0300C90017BFDF7F
:0300CA0017C5C98E
:0300CB0017CBADA3
:0300CC0017D18ABF
:0300CD0017D75FE3
:0300CE0017DD3209
:0300CF0017E2FA3B
:0300D00017E8BD71
:0300D10017EE7CAB
:0300D20017F431EF
:0300D30017F9E436
:0300D40017FF9182
:0300D500180534D7
:0300D600180AD431
:0300D70018106F8F
:0300D800181605F2
:0300D900181B9061
:0300DA0018211AD0
:0300DB0018269E46
:0300DC00182C19C4
:0300DD0018318F48
:0300DE00183701CF
:0300DF00183C6B5F
:0300E0001841D1F3
:0300E1001847318C
:0300E200184C892E
:0300E3001851DFD2
:0300E4001857307A
:0300E500185C7A2A
:0300E6001861BFDF
:0300E7001866FE9A
:0300E800186C3958
:0300E90018716D1E
:0300EA0018769BEA
:0300EB00187BC3BC
:0300EC001880E891
:0300ED001886076B
:0300EE00188B204C
:0300EF0018903432
:0300F0001895431D
:0300F100189A500A
:0300F200189F55FF
:0300F30018A453FB
:0300F40018A94FF9
:0300F50018AE46FC
:0300F60018B33606
:0300F70018B82115
:0300F80018BD0A26
:0300F90018C1EB40
:0300FA0018C6C85D
:0300FB0018CB9F80
:0300FC0018D074A5
:0300FD0018D543D0
:0300FE0018DA0B02
:0300FF0018DECE3A
:0301000018E38F72
:0301010018E848B3
:0301020018ED00F5
:0301030018F1B33D
:0301040018F65F8B
:0301050018FB06DE
:0301060018FFAB34
:0301070019044C8C
:030108001908E6ED
:03010900190D7C51
:03010A0019120DBA
:03010B0019169A28
:03010C00191B2498
:03010D00191FA80F
:03010E0019242889
:03010F001928A507
:03011000192D1B8B
:0301110019318C15
:030112001935FBA1
:03011300193A6630
:03011400193ECBC6
:0301150019432B60
:03011600194787FF
:03011700194BE1A0
:0301180019503843
:03011900195486F0
:03011A001958D79A
:03011B00195D1E4D
:03011C0019615F07
:03011D001965A0C1
:03011E001969DC80
:03011F00196E1343
:030120001972470A
:03012100197678D4
:03012200197AA5A2
:03012300197ECC76
:030124001982EF4E
:0301250019871027
:03012600198B2A08
:03012700198F43EA
:03012800199358D0
:03012900199766BD
:03012A00199B72AC
:03012B00199F7B9E
:03012C0019A37F95
:03012D0019A7808F
:03012E0019AB808A
:03012F0019AF7A8B
:0301300019B36D93
:0301310019B7619A
:0301320019BB4EA8
:0301330019BF38B9
:0301340019C31DCF
:0301350019C703E4
:0301360019CAE003
:0301370019CEBD21
:0301380019D29643
:0301390019D66A6A
:03013A0019DA3B94
:03013B0019DE0BBF
:03013C0019E1D7EF
:03013D0019E59D24
:03013E0019E9605C
:03013F0019ED1F98
:0301400019F0D8DB
:0301410019F4911D
:0301420019F84B5E
:0301430019FBFBAA
:0301440019FFAAF6
:030145001A035545
:030146001A06FE98
:030147001A0AA0F1
:030148001A0E4547
:030149001A11DEAA
:03014A001A157A09
:03014B001A19126C
:03014C001A1CA6D4
:03014D001A20373E
:03014E001A23C3AE
:03014F001A274F1D
:030150001A2AD692
:030151001A2E590A
:030152001A31DA85
:030153001A355901
:030154001A38D581
:030155001A3C4C05
:030156001A3FC08D
:030157001A432F19
:030158001A469EA6
:030159001A4A0738
:03015A001A4D70CB
:03015B001A50D760
:03015C001A5438FA
:03015D001A579698
:03015E001A5AF03A
:03015F001A5E48DD
:030160001A619F82
:030161001A64F32A
:030162001A6843D5
:030163001A6B9282
:030164001A6EDB35
:030165001A7221EA
:030166001A7567A0
:030167001A78A95A
:030168001A7BE718
:030169001A7F21D9
:03016A001A825C9A
:03016B001A859161
:03016C001A88C42A
:03016D001A8BF5F5
:03016E001A8F21C4
:03016F001A924B96
:030170001A957469
:030171001A989841
:030172001A9BBB1A
:030173001A9EDBF6
:030174001AA1F8D5
:030175001AA513B5
:030176001AA82A9A
:030177001AAB3F81
:030178001AAE5666
:030179001AB16454
:03017A001AB46F45
:03017B001AB77A36
:03017C001ABA8527
:03017D001ABD8523
:03017E001AC0891B
:03017F001AC38B15
:030180001AC68913
:030181001AC98315
:030182001ACC7B19
:030183001ACF6F21
:030184001AD26329
:030185001AD55434
:030186001AD84242
:030187001ADB2E52
:030188001ADE1A62
:030189001AE10078
:03018A001AE3E68F
:03018B001AE6C7AA
:03018C001AE9A8C5
:03018D001AEC84E5
:03018E001AEF5F06
:03018F001AF2372A
:030190001AF50B52
:030191001AF7DF7B
:030192001AFAB3A3
:030193001AFD81D1
:030194001B004D00
:030195001B031633
:030196001B05DF67
:030197001B08A49E
:030198001B0B66D8
:030199001B0E2812
:03019A001B10E651
:03019B001B13A291
:03019C001B165DD2
:03019D001B191417
:03019E001B1BCA5E
:03019F001B1E7FA5
:0301A0001B212EF2
:0301A1001B23DB42
:0301A2001B268891
:0301A3001B2932E3
:0301A4001B2BDA38
:0301A5001B2E808E
:0301A6001B3125E5
:0301A7001B33C93E
:0301A8001B36699A
:0301A9001B3905FA
:0301AA001B3BA05C
:0301AB001B3E3ABE
:0301AC001B40D124
:0301AD001B43658C
:0301AE001B45F8F6
:0301AF001B488A60
:0301B0001B4B18CE
:0301B1001B4DA340
:0301B2001B502EB1
:0301B3001B52B725
:0301B4001B553C9C
:0301B5001B57C015
:0301B6001B5A428F
:0301B7001B5CC10D
:0301B8001B5F4189
:0301B9001B61BD0A
:0301BA001B64378C
:0301BB001B66B010
:0301BC001B692795
:0301BD001B6B9920
:0301BE001B6E0AAB
:0301BF001B707B37
:0301C0001B72EAC5
:0301C1001B755655
:0301C2001B77C1E7
:0301C3001B7A297B
:0301C4001B7C9110
:0301C5001B7EF5A9
:0301C6001B815842
:0301C7001B83BADD
:0301C8001B86177C
:0301C9001B88731D
:0301CA001B8AD2BB
:0301CB001B8D2861
:0301CC001B8F8006
:0301CD001B91D8AB
:0301CE001B942D52
:0301CF001B967EFE
:0301D0001B98CDAC
:0301D1001B9B1E57
:0301D2001B9D670B
:0301D3001B9FB5BA
:0301D4001BA1FD6F
:0301D5001BA44424
:0301D6001BA689DC
:0301D7001BA8CD95
:0301D8001BAB104E
:0301D9001BAD4E0D
:0301DA001BAF8CCC
:0301DB001BB1C98C
:0301DC001BB4054C
:0301DD001BB63F0F
:0301DE001BB877D4
:0301DF001BBAAD9B
:0301E0001BBCE065
:0301E1001BBF122F
:0301E2001BC141FD
:0301E3001BC371CA
:0301E4001BC59F99
:0301E5001BC7CA6B
:0301E6001BC9F53D
:0301E7001BCC1C12
:0301E8001BCE41EA
:0301E9001BD065C3
:0301EA001BD28C99
:0301EB001BD4AB77
:0301EC001BD6CC53
:0301ED001BD8EB31
:0301EE001BDB0810
:0301EF001BDD23F2
:0301F0001BDF3DD5
:0301F1001BE156B9
:0301F2001BE369A3
:0301F3001BE58188
:0301F4001BE79373
:0301F5001BE9A65D
:0301F6001BEBB64A
:0301F7001BEDC23B
:0301F8001BEFD02A
:0301F9001BF1DD1A
:0301FA001BF3E70D
:0301FB001BF5EF02
:0301FC001BF7F4FA
:0301FD001BF9F9F2
:0301FE001BFBFEEA
:0301FF001BFE00E4
:00000001ff

View File

@@ -0,0 +1,514 @@
:020000040000FA
:0200000007FAFD
:0200010007EE08
:0200020007E312
:0200030007D81C
:0200040007CB28
:0200050007C032
:0200060007B53C
:0200070007A947
:02000800079B54
:0200090007925C
:02000A00078865
:02000B00077B71
:02000C0007717A
:02000D00076486
:02000E00075A8F
:02000F00075197
:020010000746A1
:020011000739AD
:020012000731B4
:020013000728BC
:02001400071EC5
:020015000712D0
:020016000708D9
:0200170006FDE4
:0200180006F5EB
:0200190006ECF3
:02001A0006DFFF
:02001B0006D706
:02001C0006CE0E
:02001D0006C219
:02001E0006BA20
:02001F0006B029
:0200200006A632
:02002100069D3A
:02002200069541
:02002300068A4B
:02002400068252
:0200250006795A
:02002600067260
:0200270006666B
:02002800066070
:0200290006537C
:02002A00064C82
:02002B00064489
:02002C00063C90
:02002D00063497
:02002E000628A2
:02002F000620A9
:020030000619AF
:020031000611B6
:020032000607BF
:0200330005FEC8
:0200340005F7CE
:0200350005F0D4
:0200360005E8DB
:0200370005E1E1
:0200380005D7EA
:0200390005CFF1
:02003A0005C8F7
:02003B0005BFFF
:02003C0005B805
:02003D0005B10B
:02003E0005AA11
:02003F0005A416
:02004000059A1F
:02004100059424
:02004200058B2C
:02004300058432
:02004400057E37
:0200450005763E
:02004600056E45
:02004700056949
:02004800055F52
:02004900055B55
:02004A0005515E
:02004B00054D61
:02004C00054568
:02004D00053E6E
:02004E00053774
:02004F00052F7B
:02005000052B7E
:02005100052385
:02005200051D8A
:02005300051591
:02005400051095
:0200550005099B
:020056000502A1
:0200570004FBA8
:0200580004F7AB
:0200590004F0B1
:02005A0004E9B7
:02005B0004E4BB
:02005C0004DCC2
:02005D0004D7C6
:02005E0004D0CC
:02005F0004CBD0
:0200600004C5D5
:0200610004BFDA
:0200620004B9DF
:0200630004B2E5
:0200640004ADE9
:0200650004A8ED
:02006600049FF5
:020067000499FA
:020068000495FD
:02006900048D04
:02006A00048907
:02006B0004850A
:02006C00047E10
:02006D00047716
:02006E0004721A
:02006F00046D1E
:02007000046822
:02007100046425
:02007200046028
:02007300045631
:02007400045135
:02007500044D38
:02007600044A3A
:02007700044241
:02007800044042
:0200790004374A
:02007A0004344C
:02007B0004304F
:02007C00042A54
:02007D00042459
:02007E00041E5E
:02007F00041B60
:02008000041763
:02008100041366
:02008200040E6A
:0200830004086F
:02008400040472
:0200850003FF77
:0200860003FB7A
:0200870003F480
:0200880003F182
:0200890003ED85
:02008A0003E988
:02008B0003E18F
:02008C0003DF90
:02008D0003DA94
:02008E0003D598
:02008F0003D29A
:0200900003CD9E
:0200910003C7A3
:0200920003C4A5
:0200930003BFA9
:0200940003B9AE
:0200950003B5B1
:0200960003B1B4
:0200970003AFB5
:0200980003A8BB
:0200990003A5BD
:02009A0003A0C1
:02009B00039DC3
:02009C00039AC5
:02009D000396C8
:02009E000391CC
:02009F00038CD0
:0200A0000388D3
:0200A1000386D4
:0200A200037FDA
:0200A300037DDB
:0200A4000378DF
:0200A5000375E1
:0200A600036FE6
:0200A700036DE7
:0200A8000368EB
:0200A9000364EE
:0200AA000360F1
:0200AB00035DF3
:0200AC000359F6
:0200AD000354FA
:0200AE000352FB
:0200AF000350FC
:0200B000034803
:0200B100034505
:0200B200034108
:0200B300033F09
:0200B400033B0C
:0200B50003380E
:0200B60003360F
:0200B700033311
:0200B800032D16
:0200B900032C16
:0200BA0003271A
:0200BB0003221E
:0200BC00031F20
:0200BD00031C22
:0200BE00031924
:0200BF00031527
:0200C00003102B
:0200C100030D2D
:0200C200030A2F
:0200C300030632
:0200C400030433
:0200C500030234
:0200C60002FF37
:0200C70002FB3A
:0200C80002F73D
:0200C90002F53E
:0200CA0002F141
:0200CB0002ED44
:0200CC0002EA46
:0200CD0002E946
:0200CE0002E34B
:0200CF0002E24B
:0200D00002DF4D
:0200D10002DC4F
:0200D20002DB4F
:0200D30002D653
:0200D40002D355
:0200D50002D255
:0200D60002CE58
:0200D70002CB5A
:0200D80002C65E
:0200D90002C55E
:0200DA0002C062
:0200DB0002BC65
:0200DC0002BB65
:0200DD0002B867
:0200DE0002B569
:0200DF0002B36A
:0200E00002B06C
:0200E10002AE6D
:0200E20002AD6D
:0200E30002A970
:0200E40002A573
:0200E50002A275
:0200E600029F77
:0200E700029C79
:0200E80002987C
:0200E90002967D
:0200EA0002947E
:0200EB0002937E
:0200EC00028F81
:0200ED00028D82
:0200EE00028B83
:0200EF00028984
:0200F000028686
:0200F10002818A
:0200F200027F8B
:0200F300027E8B
:0200F400027A8E
:0200F500027790
:0200F600027591
:0200F700027491
:0200F800026F95
:0200F900026D96
:0200FA00026B97
:0200FB00026998
:0200FC0002659B
:0200FD0002629D
:0200FE0002629C
:0200FF0002609D
:02010000025E9D
:02010100025D9D
:020102000259A0
:020103000256A2
:020104000255A2
:020105000254A2
:020106000250A5
:02010700024DA7
:02010800024BA8
:020109000249A9
:02010A000248A9
:02010B000245AB
:02010C000242AD
:02010D000240AE
:02010E00023DB0
:02010F00023AB2
:020110000239B2
:020111000238B2
:020112000235B4
:020113000232B6
:020114000231B6
:020115000230B6
:02011600022FB6
:02011700022BB9
:020118000228BB
:020119000228BA
:02011A000221C0
:02011B000221BF
:02011C000221BE
:02011D00021EC0
:02011E00021CC1
:02011F00021BC1
:020120000218C3
:020121000215C5
:020122000212C7
:020123000211C7
:02012400020FC8
:02012500020DC9
:02012600020CC9
:020127000209CB
:020128000207CC
:020129000207CB
:02012A000205CC
:02012B000203CD
:02012C000202CD
:02012D0001FFD0
:02012E0001FBD3
:02012F0001F9D4
:0201300001FAD2
:0201310001F6D5
:0201320001F5D5
:0201330001F4D5
:0201340001F4D4
:0201350001F0D7
:0201360001F0D6
:0201370001EDD8
:0201380001EBD9
:0201390001EAD9
:02013A0001E8DA
:02013B0001E4DD
:02013C0001E1DF
:02013D0001E0DF
:02013E0001DEE0
:02013F0001DEDF
:0201400001DFDD
:0201410001DDDE
:0201420001D7E3
:0201430001D7E2
:0201440001D5E3
:0201450001D3E4
:0201460001D1E5
:0201470001D2E3
:0201480001CCE8
:0201490001CFE4
:02014A0001CCE6
:02014B0001CAE7
:02014C0001C8E8
:02014D0001C7E8
:02014E0001C6E8
:02014F0001C3EA
:0201500001C2EA
:0201510001C1EA
:0201520001BFEB
:0201530001BCED
:0201540001B9EF
:0201550001B8EF
:0201560001B7EF
:0201570001B7EE
:0201580001B5EF
:0201590001B5EE
:02015A0001B2F0
:02015B0001AFF2
:02015C0001AEF2
:02015D0001AEF1
:02015E0001AEF0
:02015F0001ADF0
:0201600001AAF2
:0201610001A8F3
:0201620001A6F4
:0201630001A3F6
:0201640001A3F5
:0201650001A2F5
:02016600019FF7
:02016700019DF8
:02016800019DF7
:02016900019CF7
:02016A000199F9
:02016B000199F8
:02016C000197F9
:02016D000196F9
:02016E000196F8
:02016F000195F8
:020170000193F9
:020171000193F8
:020172000191F9
:020173000190F9
:02017400018EFA
:02017500018DFA
:02017600018CFA
:02017700018BFA
:020178000185FF
:020179000186FD
:02017A000186FC
:02017B000185FC
:02017C00018000
:02017D000184FB
:02017E000181FD
:02017F00017EFF
:02018000017DFF
:02018100017CFF
:02018200017CFE
:02018300017CFD
:02018400017AFE
:020185000179FE
:020186000178FE
:020187000176FF
:02018800017301
:02018900017201
:02018A00017002
:02018B00016F02
:02018C00016D03
:02018D00016D02
:02018E00016B03
:02018F00016B02
:02019000016B01
:02019100016A01
:02019200016604
:02019300016504
:02019400016503
:02019500016403
:02019600016204
:02019700016203
:02019800016103
:02019900015F04
:02019A00015F03
:02019B00015D04
:02019C00015C04
:02019D00015B04
:02019E00015905
:02019F00015706
:0201A000015804
:0201A100015803
:0201A200015703
:0201A300015603
:0201A400015503
:0201A500015403
:0201A600015105
:0201A700014E07
:0201A800014D07
:0201A900014D06
:0201AA00014C06
:0201AB00014A07
:0201AC00014907
:0201AD00014906
:0201AE00014707
:0201AF00014508
:0201B000014507
:0201B100014506
:0201B200014307
:0201B300014207
:0201B400014206
:0201B500014106
:0201B600014105
:0201B700014005
:0201B800013E06
:0201B900013D06
:0201BA00013C06
:0201BB00013A07
:0201BC00013907
:0201BD00013A05
:0201BE00013A04
:0201BF00013805
:0201C000013705
:0201C100013605
:0201C200013505
:0201C300013405
:0201C400013206
:0201C500013205
:0201C600013006
:0201C700012F06
:0201C800013004
:0201C900013003
:0201CA00012C06
:0201CB00012E03
:0201CC00012C04
:0201CD00012906
:0201CE00012707
:0201CF00012805
:0201D000012705
:0201D100012407
:0201D200012604
:0201D300012306
:0201D400012305
:0201D500012205
:0201D600012204
:0201D700012005
:0201D800011F05
:0201D900012003
:0201DA00011F03
:0201DB00011E03
:0201DC00011C04
:0201DD00011A05
:0201DE00011905
:0201DF00011805
:0201E000011804
:0201E100011803
:0201E200011802
:0201E300011603
:0201E400011503
:0201E500011403
:0201E600011303
:0201E700011302
:0201E800011400
:0201E9000114FF
:0201EA00011002
:0201EB00011100
:0201EC00010F01
:0201ED00010E01
:0201EE00010D01
:0201EF00010C01
:0201F000010B01
:0201F100010902
:0201F200010BFF
:0201F300010801
:0201F400010800
:0201F500010601
:0201F600010600
:0201F7000107FE
:0201F8000105FF
:0201F900010300
:0201FA00010200
:0201FB000102FF
:0201FC000103FD
:0201FD000102FD
:0201FE000100FE
:0201FF000100FD
:00000001ff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,258 @@
:020000040000FA
:0400000008000004F0
:040001000807FC08E8
:04000200080FF024CF
:040003000817DC6F8F
:04000400081FC0FF12
:0400050008279DEC3F
:04000600082F734C00
:040007000837413342
:04000800083F07B7EF
:040009000846C6EEF1
:04000A00084E7EEB33
:04000B0008562FC2A2
:04000C00085DD9882A
:04000D0008657C4FB7
:04000E00086D182B36
:04000F000874AD2E96
:04001000087C3B6BC2
:040011000883C2F3AB
:04001200088B43D83C
:040013000892BE2D64
:04001400089A320113
:0400150008A19F6639
:0400160008A9066DC2
:0400170008B06725A1
:0400180008B7C19EC6
:0400190008BF15E91E
:04001A0008C664159B
:04001B0008CDAC312F
:04001C0008D4EE4CCA
:04001D0008DC2A755C
:04001E0008E360BAD9
:04001F0008EA912931
:0400200008F1BBD256
:0400210008F8E0C13A
:0400220009000004CD
:04002300090719A907
:04002400090E2DBCD8
:0400250009153C4C31
:04002600091C456408
:04002700092349114F
:04002800092A4761F9
:040029000931405EFB
:04002A000938341548
:04002B00093F2292D5
:04002C0009460BE195
:04002D00094CF00D7D
:04002E000953CF2182
:04002F00095AA92998
:0400300009617E31B3
:0400310009684E42CA
:04003200096F1967D2
:040033000975DFACC0
:04003400097CA11A88
:0400350009835DBC22
:04003600098A159C82
:040037000990C8C59F
:040038000997773F6E
:04003900099E2115E6
:04003A0009A4C650FF
:04003B0009AB66FAAD
:04003C0009B2031CE6
:04003D0009B89AC0A4
:04003E0009BF2DEEDB
:04003F0009C5BCB083
:0400400009CC470E92
:0400410009D2CD1102
:0400420009D94EC3C7
:0400430009DFCC2ADB
:0400440009E6455133
:0400450009ECBA3FC9
:0400460009F32AFC94
:0400470009F997908C
:040048000A000004A6
:040049000A06645FE0
:04004A000A0CC4AA2E
:04004B000A1320EC88
:04004C000A19792CE8
:04004D000A1FCD7247
:04004E000A261DC69B
:04004F000A2C6A2FDE
:040050000A32B2B40A
:040051000A38F75C16
:040052000A3F382EFB
:040053000A457532B3
:040054000A4BAE6F36
:040055000A51E3EA7F
:040056000A5815AC83
:040057000A5E43BA40
:040058000A646E1BAD
:040059000A6A94D6C5
:04005A000A70B7F180
:04005B000A76D773D7
:04005C000A7CF362C5
:04005D000A830BC443
:04005E000A89209F4C
:04005F000A8F31F9DA
:040060000A953FD9E5
:040061000A9B4A4468
:040062000AA151405E
:040063000AA754D4C0
:040064000AAD550488
:040065000AB351D7B2
:040066000AB94B5236
:040067000ABF417A11
:040068000AC534563B
:040069000ACB23EBB0
:04006A000AD1103E69
:04006B000AD6F95464
:04006C000ADCDF3398
:04006D000AE2C1E002
:04006E000AE8A15F9C
:04006F000AEE7DB860
:040070000AF456ED4B
:040071000AFA2D0555
:040072000B0000047B
:040073000B05CFEFBB
:040074000B0B9CCC0A
:040075000B11669E67
:040076000B172D6BCC
:040077000B1CF13736
:040078000B22B2079E
:040079000B286FDF02
:04007A000B2E2AC55A
:04007B000B33E2BDA4
:04007C000B3997CBDA
:04007D000B3F49F3F9
:04007E000B44F93AFC
:04007F000B4AA5A5DE
:040080000B504F3C96
:040081000B5B99E894
:040082000B66D966CA
:040083000B720DD619
:040084000B7D375861
:040085000B88560E80
:040086000B936A145A
:040087000B9E738ACF
:040088000BA97290BE
:040089000BB4673E0F
:04008A000BBF51B6A1
:04008B000BCA321456
:04008C000BD5087018
:04008D000BDFD4EAC7
:04008E000BEA979A48
:04008F000BF5509C81
:040090000C00000858
:040091000C0AA5FAB6
:040092000C1542887F
:040093000C1FD5CE9B
:040094000C2A5FE2F1
:040095000C34E0DC6B
:040096000C3F58D4EF
:040097000C49C7E267
:040098000C542E1ABC
:040099000C5E8B96D8
:04009A000C68E068A6
:04009B000C732CA610
:04009C000C7D7068FF
:04009D000C87ABC25F
:04009E000C91DEC81B
:04009F000C9C098C20
:0400A0000CA62C2658
:0400A1000CB046A6B3
:0400A2000CBA592219
:0400A3000CC463AC7A
:0400A4000CCE6654C4
:0400A5000CD86132E0
:0400A6000CE25454C0
:0400A7000CEC3FCC52
:0400A8000CF623AE81
:0400A9000D0000083E
:0400AA000D09D4EE7A
:0400AB000D13A2701F
:0400AC000D1D689E20
:0400AD000D2727886C
:0400AE000D30DF3EF4
:0400AF000D3A8FD2A5
:0400B0000D44395270
:0400B1000D4DDBCE48
:0400B2000D5777541B
:0400B3000D610BF4DC
:0400B4000D6A99BC7C
:0400B5000D7420BCEA
:0400B6000D7DA10219
:0400B7000D871A9CFB
:0400B8000D908D9684
:0400B9000D99FA00A3
:0400BA000DA35FE84B
:0400BB000DACBF5A6F
:0400BC000DB6186401
:0400BD000DBF6B12F6
:0400BE000DC8B7743E
:0400BF000DD1FD92D0
:0400C0000DDB3D7C9B
:0400C1000DE4773E95
:0400C2000DEDAAE2B4
:0400C3000DF6D878E6
:0400C4000E00000822
:0400C5000E0921A05F
:0400C6000E123D4C8D
:0400C7000E1B5314A5
:0400C8000E24630897
:0400C9000E2D6D305B
:0400CA000E36719AE3
:0400CB000E3F704E26
:0400CC000E4869561B
:0400CD000E515CC0B4
:0400CE000E5A4A96E6
:0400CF000E6332E0AA
:0400D0000E6C15AAF3
:0400D1000E74F2FEB9
:0400D2000E7DCAE6EF
:0400D3000E869D6C8C
:0400D4000E8F6A9889
:0400D5000E983276D9
:0400D6000EA0F51073
:0400D7000EA9B26C50
:0400D8000EB26A9862
:0400D9000EBB1D98A5
:0400DA000EC3CB7A0C
:0400DB000ECC74448F
:0400DC000ED5180025
:0400DD000EDDB6B6C8
:0400DE000EE650706A
:0400DF000EEEE53606
:0400E0000EF7751092
:0400E1000F00000804
:0400E2000F08862459
:0400E3000F11076E84
:0400E4000F1983EE7F
:0400E5000F21FBAC40
:0400E6000F2A6EAEC1
:0400E7000F32DD00F7
:0400E8000F3B46A4E0
:0400E9000F43ABA86E
:0400EA000F4C0C109B
:0400EB000F5467E463
:0400EC000F5CBF2ABC
:0400ED000F6511EE9C
:0400EE000F6D603200
:0400EF000F75AA02DD
:0400F0000F7DEF6031
:0400F1000F863058EE
:0400F2000F8E6CEE13
:0400F3000F96A52A95
:0400F4000F9ED9146E
:0400F5000FA708B099
:0400F6000FAF34060E
:0400F7000FB75B1EC6
:0400F8000FBF7DFEBB
:0400F9000FC79CACE5
:0400FA000FCFB72C41
:0400FB000FD7CD8AC4
:0400FC000FDFDFC86B
:0400FD000FE7EDEC30
:0400FE000FEFF80008
:0400FF000FF7FE08F1
:00000001ff

View File

@@ -0,0 +1,258 @@
:020000040000FA
:0300000007FFFEF9
:0300010007F809F4
:0300020007F02DD7
:0300030007E866A5
:0300040007E0BA58
:0300050007D922F6
:0300060007D19C83
:0300070007CA31F4
:0300080007C2D953
:0300090007BB949E
:03000A0007B465D3
:03000B0007AD4AF4
:03000C0007A64103
:03000D00079F4DFD
:03000E0007986AE6
:03000F0007919BBB
:03001000078ADB81
:030011000784322F
:03001200077D9ACD
:0300130007770F5D
:03001400077099D9
:03001500076A3344
:030016000763D9A4
:03001700075D94EE
:0300180007575F28
:0300190007513854
:03001A00074B1F72
:03001B0007451680
:03001C00073F1E7D
:03001D000739316F
:03001E000733564F
:03001F00072D8A20
:030020000727C7E8
:030021000722159E
:03002200071C7048
:030023000716D9E4
:0300240007115071
:03002500070BD0F6
:030026000706606A
:030027000700FED1
:0300280006FBA133
:0300290006F65583
:03002A0006F116C6
:03002B0006EBE2FF
:03002C0006E6B82D
:03002D0006E19C4D
:03002E0006DC8D60
:03002F0006D7866B
:0300300006D2886D
:0300310006CD9861
:0300320006C8B34A
:0300330006C3D62B
:0300340006BF06FE
:0300350006BA40C8
:0300360006B58488
:0300370006B0CD43
:0300380006AC24EF
:0300390006A78592
:03003A0006A2F02B
:03003B00069E64BA
:03003C000699E141
:03003D00069566BF
:03003E000690F633
:03003F00068C8D9F
:0300400006882F00
:030041000683D95A
:03004200067F8AAC
:03004300067B46F3
:0300440006770834
:030045000672D16F
:03004600066EA59E
:03004700066A82C4
:03004800066666E3
:03004900066253F9
:03004A00065E440B
:03004B00065A3E14
:03004C0006564114
:03004D0006524C0C
:03004E00064E5CFF
:03004F00064A73EB
:03005000064693CE
:030051000642BBA9
:03005200063EEB7C
:03005300063B1F4A
:0300540006375A12
:0300550006339DD2
:03005600062FE58D
:03005700062C363E
:0300580006288DEA
:030059000624EB8F
:03005A0006214F2D
:03005B00061DB7C8
:03005C00061A265B
:03005D0006169BE9
:03005E000613186E
:03005F00060F9CED
:03006000060C2467
:030061000608B3DB
:0300620006054848
:030063000601E0B3
:0300640005FE8016
:0300650005FB2573
:0300660005F7D0CB
:0300670005F4831A
:0300680005F13867
:0300690005EDF1B1
:03006A0005EAB1F3
:03006B0005E7782E
:03006C0005E44266
:03006D0005E11298
:03006E0005DDEDC0
:03006F0005DAC4EB
:0300700005D7A60B
:0300710005D48A29
:0300720005D17441
:0300730005CE6552
:0300740005CB5465
:0300750005C84D6E
:0300760005C54974
:0300770005C24B74
:0300780005BF536E
:0300790005BC6063
:03007A0005B96E57
:03007B0005B68047
:03007C0005B39831
:03007D0005B0B714
:03007E0005ADD9F4
:03007F0005AAFED1
:030080000B504AD8
:030081000B450C20
:030082000B39EE49
:030083000B2EF24F
:030084000B241634
:030085000B1956FE
:030086000B0EB8A6
:030087000B043A2D
:030088000AF9D0A2
:030089000AEF90EB
:03008A000AE5661E
:03008B000ADB5637
:03008C000AD1682E
:03008D000AC78E11
:03008E000ABDD2D6
:03008F000AB42E82
:030090000AAAA811
:030091000AA13889
:030092000A97E4E6
:030093000A8EA42E
:030094000A85805A
:030095000A7C766C
:030096000A737E6C
:030097000A6A9E54
:030098000A61D822
:030099000A591CE5
:03009A000A508287
:03009B000A480010
:03009C000A3F8A8E
:03009D000A372CF3
:03009E000A2EDE49
:03009F000A26AA84
:0300A0000A1E86AF
:0300A1000A167AC2
:0300A2000A0E7EC5
:0300A3000A0692B8
:0300A40009FEC092
:0300A50009F6F861
:0300A60009EF4619
:0300A70009E7AABC
:0300A80009E01656
:0300A90009D89CD7
:0300AA0009D12E4B
:0300AB0009C9D0B0
:0300AC0009C28402
:0300AD0009BB4C40
:0300AE0009B4246E
:0300AF0009AD0692
:0300B00009A5F8A7
:0300B100099EFAAB
:0300B20009980E9C
:0300B3000991327E
:0300B400098A6452
:0300B5000983A21A
:0300B600097CECD6
:0300B70009764483
:0300B800096FB21B
:0300B900096928AA
:0300BA000962AA2E
:0300BB00095C3CA1
:0300BC000955DA09
:0300BD00094F8662
:0300BE0009493AB3
:0300BF00094302F0
:0300C000093CD226
:0300C1000936B04D
:0300C20009309C66
:0300C300092A8E79
:0300C4000924927A
:0300C500091EA071
:0300C6000918B462
:0300C7000912DE3D
:0300C800090D0C13
:0300C900090746DE
:0300CA00090188A1
:0300CB0008FBDA55
:0300CC0008F63AF9
:0300CD0008F09E9A
:0300CE0008EB0C30
:0300CF0008E588B9
:0300D00008E00E37
:0300D10008DA9CAE
:0300D20008D5321C
:0300D30008CFD47F
:0300D40008CA84D3
:0300D50008C53A21
:0300D60008BFF66A
:0300D70008BAC4A0
:0300D80008B590D8
:0300D90008B070FC
:0300DA0008AB521E
:0300DB0008A64034
:0300DC0008A13840
:0300DD00089C3844
:0300DE0008974040
:0300DF0008925034
:0300E000088D6A1E
:0300E10008888804
:0300E2000883B4DC
:0300E300087EE6AE
:0300E400087A1C7B
:0300E50008755E3D
:0300E6000870ACF3
:0300E700086BF6AD
:0300E80008675452
:0300E9000862B2F8
:0300EA00085E1895
:0300EB0008598A27
:0300EC00085504B0
:0300ED0008508038
:0300EE00084C0CAF
:0300EF000847942B
:0300F00008432C96
:0300F100083EC600
:0300F200083A6A5F
:0300F300083612BA
:0300F4000831C010
:0300F500082D7A59
:0300F60008293A9C
:0300F7000824FCDE
:0300F8000820C21B
:0300F900081C944C
:0300FA0008187073
:0300FB000814489E
:0300FC00081030B9
:0300FD00080C1ECE
:0300FE0008080EE1
:0300FF000803FEF5
:00000001ff

View File

@@ -0,0 +1,258 @@
:020000040000FA
:020000000C06EC
:020001000C13DE
:020002000C1DD3
:020003000C2AC5
:020004000C33BB
:020005000C3DB0
:020006000C4BA1
:020007000C5398
:020008000C5E8C
:020009000C6980
:02000A000C7276
:02000B000C7C6B
:02000C000C8660
:02000D000C8F56
:02000E000C994B
:02000F000CA142
:020010000CAD35
:020011000CB32E
:020012000CBB25
:020013000CC51A
:020014000CCC12
:020015000CD30A
:020016000CDFFD
:020017000CE5F6
:020018000CECEE
:020019000CF4E5
:02001A000CFDDB
:02001B000D05D1
:02001C000D0ACB
:02001D000D14C0
:02001E000D19BA
:02001F000D1FB3
:020020000D28A9
:020021000D2EA2
:020022000D359A
:020023000D3A94
:020024000D408D
:020025000D4884
:020026000D4D7E
:020027000D5179
:020028000D5C6D
:020029000D6266
:02002A000D6760
:02002B000D6D59
:02002C000D7451
:02002D000D784C
:02002E000D7B48
:02002F000D8240
:020030000D8938
:020031000D8D33
:020032000D922D
:020033000D9826
:020034000D9C21
:020035000DA01C
:020036000DA417
:020037000DAD0D
:020038000DB207
:020039000DB602
:02003A000DBAFD
:02003B000DBEF8
:02003C000DC3F2
:02003D000DC8EC
:02003E000DCCE7
:02003F000DD1E1
:020040000DD4DD
:020041000DD9D7
:020042000DDDD2
:020043000DE1CD
:020044000DE5C8
:020045000DECC0
:020046000DEFBC
:020047000DF2B8
:020048000DF5B4
:020049000DF8B0
:02004A000DFEA9
:02004B000E02A3
:02004C000E059F
:02004D000E089B
:02004E000E0D95
:02004F000E128F
:020050000E158B
:020051000E1788
:020052000E1985
:020053000E1E7F
:020054000E217B
:020055000E2576
:020056000E2971
:020057000E2B6E
:020058000E2E6A
:020059000E3067
:02005A000E3363
:02005B000E385D
:02005C000E3C58
:02005D000E4053
:02005E000E4250
:02005F000E444D
:020060000E4749
:020061000E4946
:020062000E4C42
:020063000E503D
:020064000E5339
:020065000E5635
:020066000E5832
:020067000E5930
:020068000E5D2B
:020069000E6225
:02006A000E6521
:02006B000E671E
:02006C000E6B19
:02006D000E6E15
:02006E000E6B17
:02006F000E7110
:020070000E720E
:020071000E750A
:020072000E7707
:020073000E7706
:020074000E7EFE
:020075000E80FB
:020076000E83F7
:020077000E85F4
:020078000E85F3
:020079000E86F1
:02007A000E8AEC
:02007B000E8EE7
:02007C000E90E4
:02007D000E90E3
:02007E000E92E0
:02007F000E94DD
:020080000A6212
:020081000A7201
:020082000A82F0
:020083000A90E1
:020084000AA0D0
:020085000AB0BF
:020086000ABEB0
:020087000ACAA3
:020088000ADE8E
:020089000AE883
:02008A000AF872
:02008B000B0662
:02008C000B1255
:02008D000B2244
:02008E000B3035
:02008F000B3E26
:020090000B4A19
:020091000B560C
:020092000B62FF
:020093000B70F0
:020094000B7AE5
:020095000B82DC
:020096000B90CD
:020097000B9AC2
:020098000BA2B9
:020099000BB6A4
:02009A000BBC9D
:02009B000BC296
:02009C000BD087
:02009D000BD87E
:02009E000BE66F
:02009F000BF064
:0200A0000BFA59
:0200A1000C024F
:0200A2000C0A46
:0200A3000C1639
:0200A4000C1E30
:0200A5000C2A23
:0200A6000C321A
:0200A7000C3615
:0200A8000C4406
:0200A9000C4AFF
:0200AA000C54F4
:0200AB000C5EE9
:0200AC000C66E0
:0200AD000C6ADB
:0200AE000C70D4
:0200AF000C7AC9
:0200B0000C84BE
:0200B1000C8CB5
:0200B2000C92AE
:0200B3000C96A9
:0200B4000C9CA2
:0200B5000CA499
:0200B6000CAE8E
:0200B7000CB685
:0200B8000CB882
:0200B9000CC079
:0200BA000CC870
:0200BB000CCE69
:0200BC000CD462
:0200BD000CDC59
:0200BE000CE450
:0200BF000CE84B
:0200C0000CF042
:0200C1000CF43D
:0200C2000CFA36
:0200C3000D022C
:0200C4000D0627
:0200C5000D0A22
:0200C6000D1417
:0200C7000D1614
:0200C8000D1C0D
:0200C9000D2404
:0200CA000D2CFB
:0200CB000D2EF8
:0200CC000D30F5
:0200CD000D38EC
:0200CE000D3EE5
:0200CF000D42E0
:0200D0000D46DB
:0200D1000D4CD4
:0200D2000D54CB
:0200D3000D58C6
:0200D4000D5AC3
:0200D5000D60BC
:0200D6000D66B5
:0200D7000D66B4
:0200D8000D70A9
:0200D9000D72A6
:0200DA000D789F
:0200DB000D7C9A
:0200DC000D7E97
:0200DD000D8292
:0200DE000D868D
:0200DF000D8A88
:0200E0000D8E83
:0200E1000D947C
:0200E2000D9679
:0200E3000D9A74
:0200E4000DA26B
:0200E5000DA468
:0200E6000DA467
:0200E7000DAE5C
:0200E8000DB059
:0200E9000DB652
:0200EA000DBC4B
:0200EB000DBC4A
:0200EC000DC045
:0200ED000DC440
:0200EE000DC241
:0200EF000DCA38
:0200F0000DCC35
:0200F1000DD030
:0200F2000DD22D
:0200F3000DD826
:0200F4000DDC21
:0200F5000DDC20
:0200F6000DDE1D
:0200F7000DE416
:0200F8000DEC0D
:0200F9000DEC0C
:0200FA000DEE09
:0200FB000DF600
:0200FC000DF401
:0200FD000DF6FE
:0200FE000DF8FB
:0200FF000E02EF
:00000001ff

View File

@@ -0,0 +1,508 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 17.1 (Release Build #273)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2017 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_ftoi
// SystemVerilog created on Wed Sep 2 07:11:09 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_ftoi (
input wire [31:0] a,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire [0:0] GND_q;
wire [7:0] cstAllOWE_uid6_fpToFxPTest_q;
wire [22:0] cstZeroWF_uid7_fpToFxPTest_q;
wire [7:0] cstAllZWE_uid8_fpToFxPTest_q;
wire [7:0] exp_x_uid9_fpToFxPTest_b;
wire [22:0] frac_x_uid10_fpToFxPTest_b;
wire [0:0] excZ_x_uid11_fpToFxPTest_qi;
reg [0:0] excZ_x_uid11_fpToFxPTest_q;
wire [0:0] expXIsMax_uid12_fpToFxPTest_qi;
reg [0:0] expXIsMax_uid12_fpToFxPTest_q;
wire [0:0] fracXIsZero_uid13_fpToFxPTest_qi;
reg [0:0] fracXIsZero_uid13_fpToFxPTest_q;
wire [0:0] fracXIsNotZero_uid14_fpToFxPTest_q;
wire [0:0] excI_x_uid15_fpToFxPTest_q;
wire [0:0] excN_x_uid16_fpToFxPTest_q;
wire [0:0] invExcXZ_uid22_fpToFxPTest_q;
wire [23:0] oFracX_uid23_fpToFxPTest_q;
wire [0:0] signX_uid25_fpToFxPTest_b;
wire [8:0] ovfExpVal_uid26_fpToFxPTest_q;
wire [10:0] ovfExpRange_uid27_fpToFxPTest_a;
wire [10:0] ovfExpRange_uid27_fpToFxPTest_b;
logic [10:0] ovfExpRange_uid27_fpToFxPTest_o;
wire [0:0] ovfExpRange_uid27_fpToFxPTest_n;
wire [7:0] udfExpVal_uid28_fpToFxPTest_q;
wire [10:0] udf_uid29_fpToFxPTest_a;
wire [10:0] udf_uid29_fpToFxPTest_b;
logic [10:0] udf_uid29_fpToFxPTest_o;
wire [0:0] udf_uid29_fpToFxPTest_n;
wire [8:0] ovfExpVal_uid30_fpToFxPTest_q;
wire [10:0] shiftValE_uid31_fpToFxPTest_a;
wire [10:0] shiftValE_uid31_fpToFxPTest_b;
logic [10:0] shiftValE_uid31_fpToFxPTest_o;
wire [9:0] shiftValE_uid31_fpToFxPTest_q;
wire [5:0] shiftValRaw_uid32_fpToFxPTest_in;
wire [5:0] shiftValRaw_uid32_fpToFxPTest_b;
wire [5:0] maxShiftCst_uid33_fpToFxPTest_q;
wire [11:0] shiftOutOfRange_uid34_fpToFxPTest_a;
wire [11:0] shiftOutOfRange_uid34_fpToFxPTest_b;
logic [11:0] shiftOutOfRange_uid34_fpToFxPTest_o;
wire [0:0] shiftOutOfRange_uid34_fpToFxPTest_n;
wire [0:0] shiftVal_uid35_fpToFxPTest_s;
reg [5:0] shiftVal_uid35_fpToFxPTest_q;
wire [31:0] shifterIn_uid37_fpToFxPTest_q;
wire [31:0] maxPosValueS_uid39_fpToFxPTest_q;
wire [31:0] maxNegValueS_uid40_fpToFxPTest_q;
wire [32:0] zRightShiferNoStickyOut_uid41_fpToFxPTest_q;
wire [32:0] xXorSignE_uid42_fpToFxPTest_b;
wire [32:0] xXorSignE_uid42_fpToFxPTest_q;
wire [2:0] d0_uid43_fpToFxPTest_q;
wire [33:0] sPostRndFull_uid44_fpToFxPTest_a;
wire [33:0] sPostRndFull_uid44_fpToFxPTest_b;
logic [33:0] sPostRndFull_uid44_fpToFxPTest_o;
wire [33:0] sPostRndFull_uid44_fpToFxPTest_q;
wire [32:0] sPostRnd_uid45_fpToFxPTest_in;
wire [31:0] sPostRnd_uid45_fpToFxPTest_b;
wire [34:0] sPostRnd_uid46_fpToFxPTest_in;
wire [33:0] sPostRnd_uid46_fpToFxPTest_b;
wire [35:0] rndOvfPos_uid47_fpToFxPTest_a;
wire [35:0] rndOvfPos_uid47_fpToFxPTest_b;
logic [35:0] rndOvfPos_uid47_fpToFxPTest_o;
wire [0:0] rndOvfPos_uid47_fpToFxPTest_c;
wire [0:0] ovfPostRnd_uid48_fpToFxPTest_q;
wire [2:0] muxSelConc_uid49_fpToFxPTest_q;
reg [1:0] muxSel_uid50_fpToFxPTest_q;
wire [31:0] maxNegValueU_uid51_fpToFxPTest_q;
wire [1:0] finalOut_uid52_fpToFxPTest_s;
reg [31:0] finalOut_uid52_fpToFxPTest_q;
wire [15:0] rightShiftStage0Idx1Rng16_uid56_rightShiferNoStickyOut_uid38_fpToFxPTest_b;
wire [15:0] rightShiftStage0Idx1Pad16_uid57_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [31:0] rightShiftStage0Idx1_uid58_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [1:0] rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_s;
reg [31:0] rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [27:0] rightShiftStage1Idx1Rng4_uid63_rightShiferNoStickyOut_uid38_fpToFxPTest_b;
wire [3:0] rightShiftStage1Idx1Pad4_uid64_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [31:0] rightShiftStage1Idx1_uid65_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [23:0] rightShiftStage1Idx2Rng8_uid66_rightShiferNoStickyOut_uid38_fpToFxPTest_b;
wire [31:0] rightShiftStage1Idx2_uid68_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [19:0] rightShiftStage1Idx3Rng12_uid69_rightShiferNoStickyOut_uid38_fpToFxPTest_b;
wire [11:0] rightShiftStage1Idx3Pad12_uid70_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [31:0] rightShiftStage1Idx3_uid71_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [1:0] rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_s;
reg [31:0] rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [30:0] rightShiftStage2Idx1Rng1_uid74_rightShiferNoStickyOut_uid38_fpToFxPTest_b;
wire [31:0] rightShiftStage2Idx1_uid76_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [29:0] rightShiftStage2Idx2Rng2_uid77_rightShiferNoStickyOut_uid38_fpToFxPTest_b;
wire [1:0] rightShiftStage2Idx2Pad2_uid78_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [31:0] rightShiftStage2Idx2_uid79_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [28:0] rightShiftStage2Idx3Rng3_uid80_rightShiferNoStickyOut_uid38_fpToFxPTest_b;
wire [2:0] rightShiftStage2Idx3Pad3_uid81_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [31:0] rightShiftStage2Idx3_uid82_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [1:0] rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_s;
reg [31:0] rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
wire [1:0] rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select_b;
wire [1:0] rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select_c;
wire [1:0] rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select_d;
reg [31:0] redist0_sPostRnd_uid45_fpToFxPTest_b_1_q;
reg [5:0] redist1_shiftValRaw_uid32_fpToFxPTest_b_1_q;
reg [0:0] redist2_udf_uid29_fpToFxPTest_n_3_q;
reg [0:0] redist3_ovfExpRange_uid27_fpToFxPTest_n_3_q;
reg [0:0] redist4_signX_uid25_fpToFxPTest_b_2_q;
reg [0:0] redist5_signX_uid25_fpToFxPTest_b_3_q;
reg [0:0] redist6_fracXIsZero_uid13_fpToFxPTest_q_2_q;
reg [0:0] redist7_expXIsMax_uid12_fpToFxPTest_q_3_q;
reg [22:0] redist8_frac_x_uid10_fpToFxPTest_b_1_q;
// maxNegValueU_uid51_fpToFxPTest(CONSTANT,50)
assign maxNegValueU_uid51_fpToFxPTest_q = 32'b00000000000000000000000000000000;
// maxNegValueS_uid40_fpToFxPTest(CONSTANT,39)
assign maxNegValueS_uid40_fpToFxPTest_q = 32'b10000000000000000000000000000000;
// maxPosValueS_uid39_fpToFxPTest(CONSTANT,38)
assign maxPosValueS_uid39_fpToFxPTest_q = 32'b01111111111111111111111111111111;
// d0_uid43_fpToFxPTest(CONSTANT,42)
assign d0_uid43_fpToFxPTest_q = 3'b001;
// signX_uid25_fpToFxPTest(BITSELECT,24)@0
assign signX_uid25_fpToFxPTest_b = a[31:31];
// redist4_signX_uid25_fpToFxPTest_b_2(DELAY,90)
dspba_delay_ver #( .width(1), .depth(2), .reset_kind("ASYNC") )
redist4_signX_uid25_fpToFxPTest_b_2 ( .xin(signX_uid25_fpToFxPTest_b), .xout(redist4_signX_uid25_fpToFxPTest_b_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// GND(CONSTANT,0)
assign GND_q = 1'b0;
// rightShiftStage2Idx3Pad3_uid81_rightShiferNoStickyOut_uid38_fpToFxPTest(CONSTANT,80)
assign rightShiftStage2Idx3Pad3_uid81_rightShiferNoStickyOut_uid38_fpToFxPTest_q = 3'b000;
// rightShiftStage2Idx3Rng3_uid80_rightShiferNoStickyOut_uid38_fpToFxPTest(BITSELECT,79)@1
assign rightShiftStage2Idx3Rng3_uid80_rightShiferNoStickyOut_uid38_fpToFxPTest_b = rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q[31:3];
// rightShiftStage2Idx3_uid82_rightShiferNoStickyOut_uid38_fpToFxPTest(BITJOIN,81)@1
assign rightShiftStage2Idx3_uid82_rightShiferNoStickyOut_uid38_fpToFxPTest_q = {rightShiftStage2Idx3Pad3_uid81_rightShiferNoStickyOut_uid38_fpToFxPTest_q, rightShiftStage2Idx3Rng3_uid80_rightShiferNoStickyOut_uid38_fpToFxPTest_b};
// rightShiftStage2Idx2Pad2_uid78_rightShiferNoStickyOut_uid38_fpToFxPTest(CONSTANT,77)
assign rightShiftStage2Idx2Pad2_uid78_rightShiferNoStickyOut_uid38_fpToFxPTest_q = 2'b00;
// rightShiftStage2Idx2Rng2_uid77_rightShiferNoStickyOut_uid38_fpToFxPTest(BITSELECT,76)@1
assign rightShiftStage2Idx2Rng2_uid77_rightShiferNoStickyOut_uid38_fpToFxPTest_b = rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q[31:2];
// rightShiftStage2Idx2_uid79_rightShiferNoStickyOut_uid38_fpToFxPTest(BITJOIN,78)@1
assign rightShiftStage2Idx2_uid79_rightShiferNoStickyOut_uid38_fpToFxPTest_q = {rightShiftStage2Idx2Pad2_uid78_rightShiferNoStickyOut_uid38_fpToFxPTest_q, rightShiftStage2Idx2Rng2_uid77_rightShiferNoStickyOut_uid38_fpToFxPTest_b};
// rightShiftStage2Idx1Rng1_uid74_rightShiferNoStickyOut_uid38_fpToFxPTest(BITSELECT,73)@1
assign rightShiftStage2Idx1Rng1_uid74_rightShiferNoStickyOut_uid38_fpToFxPTest_b = rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q[31:1];
// rightShiftStage2Idx1_uid76_rightShiferNoStickyOut_uid38_fpToFxPTest(BITJOIN,75)@1
assign rightShiftStage2Idx1_uid76_rightShiferNoStickyOut_uid38_fpToFxPTest_q = {GND_q, rightShiftStage2Idx1Rng1_uid74_rightShiferNoStickyOut_uid38_fpToFxPTest_b};
// rightShiftStage1Idx3Pad12_uid70_rightShiferNoStickyOut_uid38_fpToFxPTest(CONSTANT,69)
assign rightShiftStage1Idx3Pad12_uid70_rightShiferNoStickyOut_uid38_fpToFxPTest_q = 12'b000000000000;
// rightShiftStage1Idx3Rng12_uid69_rightShiferNoStickyOut_uid38_fpToFxPTest(BITSELECT,68)@1
assign rightShiftStage1Idx3Rng12_uid69_rightShiferNoStickyOut_uid38_fpToFxPTest_b = rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q[31:12];
// rightShiftStage1Idx3_uid71_rightShiferNoStickyOut_uid38_fpToFxPTest(BITJOIN,70)@1
assign rightShiftStage1Idx3_uid71_rightShiferNoStickyOut_uid38_fpToFxPTest_q = {rightShiftStage1Idx3Pad12_uid70_rightShiferNoStickyOut_uid38_fpToFxPTest_q, rightShiftStage1Idx3Rng12_uid69_rightShiferNoStickyOut_uid38_fpToFxPTest_b};
// cstAllZWE_uid8_fpToFxPTest(CONSTANT,7)
assign cstAllZWE_uid8_fpToFxPTest_q = 8'b00000000;
// rightShiftStage1Idx2Rng8_uid66_rightShiferNoStickyOut_uid38_fpToFxPTest(BITSELECT,65)@1
assign rightShiftStage1Idx2Rng8_uid66_rightShiferNoStickyOut_uid38_fpToFxPTest_b = rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q[31:8];
// rightShiftStage1Idx2_uid68_rightShiferNoStickyOut_uid38_fpToFxPTest(BITJOIN,67)@1
assign rightShiftStage1Idx2_uid68_rightShiferNoStickyOut_uid38_fpToFxPTest_q = {cstAllZWE_uid8_fpToFxPTest_q, rightShiftStage1Idx2Rng8_uid66_rightShiferNoStickyOut_uid38_fpToFxPTest_b};
// rightShiftStage1Idx1Pad4_uid64_rightShiferNoStickyOut_uid38_fpToFxPTest(CONSTANT,63)
assign rightShiftStage1Idx1Pad4_uid64_rightShiferNoStickyOut_uid38_fpToFxPTest_q = 4'b0000;
// rightShiftStage1Idx1Rng4_uid63_rightShiferNoStickyOut_uid38_fpToFxPTest(BITSELECT,62)@1
assign rightShiftStage1Idx1Rng4_uid63_rightShiferNoStickyOut_uid38_fpToFxPTest_b = rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q[31:4];
// rightShiftStage1Idx1_uid65_rightShiferNoStickyOut_uid38_fpToFxPTest(BITJOIN,64)@1
assign rightShiftStage1Idx1_uid65_rightShiferNoStickyOut_uid38_fpToFxPTest_q = {rightShiftStage1Idx1Pad4_uid64_rightShiferNoStickyOut_uid38_fpToFxPTest_q, rightShiftStage1Idx1Rng4_uid63_rightShiferNoStickyOut_uid38_fpToFxPTest_b};
// rightShiftStage0Idx1Pad16_uid57_rightShiferNoStickyOut_uid38_fpToFxPTest(CONSTANT,56)
assign rightShiftStage0Idx1Pad16_uid57_rightShiferNoStickyOut_uid38_fpToFxPTest_q = 16'b0000000000000000;
// rightShiftStage0Idx1Rng16_uid56_rightShiferNoStickyOut_uid38_fpToFxPTest(BITSELECT,55)@1
assign rightShiftStage0Idx1Rng16_uid56_rightShiferNoStickyOut_uid38_fpToFxPTest_b = shifterIn_uid37_fpToFxPTest_q[31:16];
// rightShiftStage0Idx1_uid58_rightShiferNoStickyOut_uid38_fpToFxPTest(BITJOIN,57)@1
assign rightShiftStage0Idx1_uid58_rightShiferNoStickyOut_uid38_fpToFxPTest_q = {rightShiftStage0Idx1Pad16_uid57_rightShiferNoStickyOut_uid38_fpToFxPTest_q, rightShiftStage0Idx1Rng16_uid56_rightShiferNoStickyOut_uid38_fpToFxPTest_b};
// exp_x_uid9_fpToFxPTest(BITSELECT,8)@0
assign exp_x_uid9_fpToFxPTest_b = a[30:23];
// excZ_x_uid11_fpToFxPTest(LOGICAL,10)@0 + 1
assign excZ_x_uid11_fpToFxPTest_qi = exp_x_uid9_fpToFxPTest_b == cstAllZWE_uid8_fpToFxPTest_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
excZ_x_uid11_fpToFxPTest_delay ( .xin(excZ_x_uid11_fpToFxPTest_qi), .xout(excZ_x_uid11_fpToFxPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// invExcXZ_uid22_fpToFxPTest(LOGICAL,21)@1
assign invExcXZ_uid22_fpToFxPTest_q = ~ (excZ_x_uid11_fpToFxPTest_q);
// frac_x_uid10_fpToFxPTest(BITSELECT,9)@0
assign frac_x_uid10_fpToFxPTest_b = a[22:0];
// redist8_frac_x_uid10_fpToFxPTest_b_1(DELAY,94)
dspba_delay_ver #( .width(23), .depth(1), .reset_kind("ASYNC") )
redist8_frac_x_uid10_fpToFxPTest_b_1 ( .xin(frac_x_uid10_fpToFxPTest_b), .xout(redist8_frac_x_uid10_fpToFxPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// oFracX_uid23_fpToFxPTest(BITJOIN,22)@1
assign oFracX_uid23_fpToFxPTest_q = {invExcXZ_uid22_fpToFxPTest_q, redist8_frac_x_uid10_fpToFxPTest_b_1_q};
// shifterIn_uid37_fpToFxPTest(BITJOIN,36)@1
assign shifterIn_uid37_fpToFxPTest_q = {oFracX_uid23_fpToFxPTest_q, cstAllZWE_uid8_fpToFxPTest_q};
// rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest(MUX,61)@1
assign rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_s = rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select_b;
always @(rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_s or en or shifterIn_uid37_fpToFxPTest_q or rightShiftStage0Idx1_uid58_rightShiferNoStickyOut_uid38_fpToFxPTest_q or maxNegValueU_uid51_fpToFxPTest_q)
begin
unique case (rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_s)
2'b00 : rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q = shifterIn_uid37_fpToFxPTest_q;
2'b01 : rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q = rightShiftStage0Idx1_uid58_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
2'b10 : rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q = maxNegValueU_uid51_fpToFxPTest_q;
2'b11 : rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q = maxNegValueU_uid51_fpToFxPTest_q;
default : rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q = 32'b0;
endcase
end
// rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest(MUX,72)@1
assign rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_s = rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select_c;
always @(rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_s or en or rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q or rightShiftStage1Idx1_uid65_rightShiferNoStickyOut_uid38_fpToFxPTest_q or rightShiftStage1Idx2_uid68_rightShiferNoStickyOut_uid38_fpToFxPTest_q or rightShiftStage1Idx3_uid71_rightShiferNoStickyOut_uid38_fpToFxPTest_q)
begin
unique case (rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_s)
2'b00 : rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q = rightShiftStage0_uid62_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
2'b01 : rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q = rightShiftStage1Idx1_uid65_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
2'b10 : rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q = rightShiftStage1Idx2_uid68_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
2'b11 : rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q = rightShiftStage1Idx3_uid71_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
default : rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q = 32'b0;
endcase
end
// maxShiftCst_uid33_fpToFxPTest(CONSTANT,32)
assign maxShiftCst_uid33_fpToFxPTest_q = 6'b100000;
// ovfExpVal_uid30_fpToFxPTest(CONSTANT,29)
assign ovfExpVal_uid30_fpToFxPTest_q = 9'b010011101;
// shiftValE_uid31_fpToFxPTest(SUB,30)@0
assign shiftValE_uid31_fpToFxPTest_a = {{2{ovfExpVal_uid30_fpToFxPTest_q[8]}}, ovfExpVal_uid30_fpToFxPTest_q};
assign shiftValE_uid31_fpToFxPTest_b = {3'b000, exp_x_uid9_fpToFxPTest_b};
assign shiftValE_uid31_fpToFxPTest_o = $signed(shiftValE_uid31_fpToFxPTest_a) - $signed(shiftValE_uid31_fpToFxPTest_b);
assign shiftValE_uid31_fpToFxPTest_q = shiftValE_uid31_fpToFxPTest_o[9:0];
// shiftValRaw_uid32_fpToFxPTest(BITSELECT,31)@0
assign shiftValRaw_uid32_fpToFxPTest_in = shiftValE_uid31_fpToFxPTest_q[5:0];
assign shiftValRaw_uid32_fpToFxPTest_b = shiftValRaw_uid32_fpToFxPTest_in[5:0];
// redist1_shiftValRaw_uid32_fpToFxPTest_b_1(DELAY,87)
dspba_delay_ver #( .width(6), .depth(1), .reset_kind("ASYNC") )
redist1_shiftValRaw_uid32_fpToFxPTest_b_1 ( .xin(shiftValRaw_uid32_fpToFxPTest_b), .xout(redist1_shiftValRaw_uid32_fpToFxPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// shiftOutOfRange_uid34_fpToFxPTest(COMPARE,33)@0 + 1
assign shiftOutOfRange_uid34_fpToFxPTest_a = {{2{shiftValE_uid31_fpToFxPTest_q[9]}}, shiftValE_uid31_fpToFxPTest_q};
assign shiftOutOfRange_uid34_fpToFxPTest_b = {6'b000000, maxShiftCst_uid33_fpToFxPTest_q};
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
shiftOutOfRange_uid34_fpToFxPTest_o <= 12'b0;
end
else if (en == 1'b1)
begin
shiftOutOfRange_uid34_fpToFxPTest_o <= $signed(shiftOutOfRange_uid34_fpToFxPTest_a) - $signed(shiftOutOfRange_uid34_fpToFxPTest_b);
end
end
assign shiftOutOfRange_uid34_fpToFxPTest_n[0] = ~ (shiftOutOfRange_uid34_fpToFxPTest_o[11]);
// shiftVal_uid35_fpToFxPTest(MUX,34)@1
assign shiftVal_uid35_fpToFxPTest_s = shiftOutOfRange_uid34_fpToFxPTest_n;
always @(shiftVal_uid35_fpToFxPTest_s or en or redist1_shiftValRaw_uid32_fpToFxPTest_b_1_q or maxShiftCst_uid33_fpToFxPTest_q)
begin
unique case (shiftVal_uid35_fpToFxPTest_s)
1'b0 : shiftVal_uid35_fpToFxPTest_q = redist1_shiftValRaw_uid32_fpToFxPTest_b_1_q;
1'b1 : shiftVal_uid35_fpToFxPTest_q = maxShiftCst_uid33_fpToFxPTest_q;
default : shiftVal_uid35_fpToFxPTest_q = 6'b0;
endcase
end
// rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select(BITSELECT,85)@1
assign rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select_b = shiftVal_uid35_fpToFxPTest_q[5:4];
assign rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select_c = shiftVal_uid35_fpToFxPTest_q[3:2];
assign rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select_d = shiftVal_uid35_fpToFxPTest_q[1:0];
// rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest(MUX,83)@1 + 1
assign rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_s = rightShiftStageSel5Dto4_uid61_rightShiferNoStickyOut_uid38_fpToFxPTest_merged_bit_select_d;
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_q <= 32'b0;
end
else if (en == 1'b1)
begin
unique case (rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_s)
2'b00 : rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_q <= rightShiftStage1_uid73_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
2'b01 : rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_q <= rightShiftStage2Idx1_uid76_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
2'b10 : rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_q <= rightShiftStage2Idx2_uid79_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
2'b11 : rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_q <= rightShiftStage2Idx3_uid82_rightShiferNoStickyOut_uid38_fpToFxPTest_q;
default : rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_q <= 32'b0;
endcase
end
end
// zRightShiferNoStickyOut_uid41_fpToFxPTest(BITJOIN,40)@2
assign zRightShiferNoStickyOut_uid41_fpToFxPTest_q = {GND_q, rightShiftStage2_uid84_rightShiferNoStickyOut_uid38_fpToFxPTest_q};
// xXorSignE_uid42_fpToFxPTest(LOGICAL,41)@2
assign xXorSignE_uid42_fpToFxPTest_b = {{32{redist4_signX_uid25_fpToFxPTest_b_2_q[0]}}, redist4_signX_uid25_fpToFxPTest_b_2_q};
assign xXorSignE_uid42_fpToFxPTest_q = zRightShiferNoStickyOut_uid41_fpToFxPTest_q ^ xXorSignE_uid42_fpToFxPTest_b;
// sPostRndFull_uid44_fpToFxPTest(ADD,43)@2
assign sPostRndFull_uid44_fpToFxPTest_a = {{1{xXorSignE_uid42_fpToFxPTest_q[32]}}, xXorSignE_uid42_fpToFxPTest_q};
assign sPostRndFull_uid44_fpToFxPTest_b = {{31{d0_uid43_fpToFxPTest_q[2]}}, d0_uid43_fpToFxPTest_q};
assign sPostRndFull_uid44_fpToFxPTest_o = $signed(sPostRndFull_uid44_fpToFxPTest_a) + $signed(sPostRndFull_uid44_fpToFxPTest_b);
assign sPostRndFull_uid44_fpToFxPTest_q = sPostRndFull_uid44_fpToFxPTest_o[33:0];
// sPostRnd_uid45_fpToFxPTest(BITSELECT,44)@2
assign sPostRnd_uid45_fpToFxPTest_in = sPostRndFull_uid44_fpToFxPTest_q[32:0];
assign sPostRnd_uid45_fpToFxPTest_b = sPostRnd_uid45_fpToFxPTest_in[32:1];
// redist0_sPostRnd_uid45_fpToFxPTest_b_1(DELAY,86)
dspba_delay_ver #( .width(32), .depth(1), .reset_kind("ASYNC") )
redist0_sPostRnd_uid45_fpToFxPTest_b_1 ( .xin(sPostRnd_uid45_fpToFxPTest_b), .xout(redist0_sPostRnd_uid45_fpToFxPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist5_signX_uid25_fpToFxPTest_b_3(DELAY,91)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist5_signX_uid25_fpToFxPTest_b_3 ( .xin(redist4_signX_uid25_fpToFxPTest_b_2_q), .xout(redist5_signX_uid25_fpToFxPTest_b_3_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// udfExpVal_uid28_fpToFxPTest(CONSTANT,27)
assign udfExpVal_uid28_fpToFxPTest_q = 8'b01111101;
// udf_uid29_fpToFxPTest(COMPARE,28)@0 + 1
assign udf_uid29_fpToFxPTest_a = {{3{udfExpVal_uid28_fpToFxPTest_q[7]}}, udfExpVal_uid28_fpToFxPTest_q};
assign udf_uid29_fpToFxPTest_b = {3'b000, exp_x_uid9_fpToFxPTest_b};
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
udf_uid29_fpToFxPTest_o <= 11'b0;
end
else if (en == 1'b1)
begin
udf_uid29_fpToFxPTest_o <= $signed(udf_uid29_fpToFxPTest_a) - $signed(udf_uid29_fpToFxPTest_b);
end
end
assign udf_uid29_fpToFxPTest_n[0] = ~ (udf_uid29_fpToFxPTest_o[10]);
// redist2_udf_uid29_fpToFxPTest_n_3(DELAY,88)
dspba_delay_ver #( .width(1), .depth(2), .reset_kind("ASYNC") )
redist2_udf_uid29_fpToFxPTest_n_3 ( .xin(udf_uid29_fpToFxPTest_n), .xout(redist2_udf_uid29_fpToFxPTest_n_3_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// sPostRnd_uid46_fpToFxPTest(BITSELECT,45)@2
assign sPostRnd_uid46_fpToFxPTest_in = {{1{sPostRndFull_uid44_fpToFxPTest_q[33]}}, sPostRndFull_uid44_fpToFxPTest_q};
assign sPostRnd_uid46_fpToFxPTest_b = sPostRnd_uid46_fpToFxPTest_in[34:1];
// rndOvfPos_uid47_fpToFxPTest(COMPARE,46)@2 + 1
assign rndOvfPos_uid47_fpToFxPTest_a = {4'b0000, maxPosValueS_uid39_fpToFxPTest_q};
assign rndOvfPos_uid47_fpToFxPTest_b = {{2{sPostRnd_uid46_fpToFxPTest_b[33]}}, sPostRnd_uid46_fpToFxPTest_b};
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
rndOvfPos_uid47_fpToFxPTest_o <= 36'b0;
end
else if (en == 1'b1)
begin
rndOvfPos_uid47_fpToFxPTest_o <= $signed(rndOvfPos_uid47_fpToFxPTest_a) - $signed(rndOvfPos_uid47_fpToFxPTest_b);
end
end
assign rndOvfPos_uid47_fpToFxPTest_c[0] = rndOvfPos_uid47_fpToFxPTest_o[35];
// ovfExpVal_uid26_fpToFxPTest(CONSTANT,25)
assign ovfExpVal_uid26_fpToFxPTest_q = 9'b010011110;
// ovfExpRange_uid27_fpToFxPTest(COMPARE,26)@0 + 1
assign ovfExpRange_uid27_fpToFxPTest_a = {3'b000, exp_x_uid9_fpToFxPTest_b};
assign ovfExpRange_uid27_fpToFxPTest_b = {{2{ovfExpVal_uid26_fpToFxPTest_q[8]}}, ovfExpVal_uid26_fpToFxPTest_q};
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
ovfExpRange_uid27_fpToFxPTest_o <= 11'b0;
end
else if (en == 1'b1)
begin
ovfExpRange_uid27_fpToFxPTest_o <= $signed(ovfExpRange_uid27_fpToFxPTest_a) - $signed(ovfExpRange_uid27_fpToFxPTest_b);
end
end
assign ovfExpRange_uid27_fpToFxPTest_n[0] = ~ (ovfExpRange_uid27_fpToFxPTest_o[10]);
// redist3_ovfExpRange_uid27_fpToFxPTest_n_3(DELAY,89)
dspba_delay_ver #( .width(1), .depth(2), .reset_kind("ASYNC") )
redist3_ovfExpRange_uid27_fpToFxPTest_n_3 ( .xin(ovfExpRange_uid27_fpToFxPTest_n), .xout(redist3_ovfExpRange_uid27_fpToFxPTest_n_3_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// cstZeroWF_uid7_fpToFxPTest(CONSTANT,6)
assign cstZeroWF_uid7_fpToFxPTest_q = 23'b00000000000000000000000;
// fracXIsZero_uid13_fpToFxPTest(LOGICAL,12)@1 + 1
assign fracXIsZero_uid13_fpToFxPTest_qi = cstZeroWF_uid7_fpToFxPTest_q == redist8_frac_x_uid10_fpToFxPTest_b_1_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
fracXIsZero_uid13_fpToFxPTest_delay ( .xin(fracXIsZero_uid13_fpToFxPTest_qi), .xout(fracXIsZero_uid13_fpToFxPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist6_fracXIsZero_uid13_fpToFxPTest_q_2(DELAY,92)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist6_fracXIsZero_uid13_fpToFxPTest_q_2 ( .xin(fracXIsZero_uid13_fpToFxPTest_q), .xout(redist6_fracXIsZero_uid13_fpToFxPTest_q_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// cstAllOWE_uid6_fpToFxPTest(CONSTANT,5)
assign cstAllOWE_uid6_fpToFxPTest_q = 8'b11111111;
// expXIsMax_uid12_fpToFxPTest(LOGICAL,11)@0 + 1
assign expXIsMax_uid12_fpToFxPTest_qi = exp_x_uid9_fpToFxPTest_b == cstAllOWE_uid6_fpToFxPTest_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
expXIsMax_uid12_fpToFxPTest_delay ( .xin(expXIsMax_uid12_fpToFxPTest_qi), .xout(expXIsMax_uid12_fpToFxPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist7_expXIsMax_uid12_fpToFxPTest_q_3(DELAY,93)
dspba_delay_ver #( .width(1), .depth(2), .reset_kind("ASYNC") )
redist7_expXIsMax_uid12_fpToFxPTest_q_3 ( .xin(expXIsMax_uid12_fpToFxPTest_q), .xout(redist7_expXIsMax_uid12_fpToFxPTest_q_3_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// excI_x_uid15_fpToFxPTest(LOGICAL,14)@3
assign excI_x_uid15_fpToFxPTest_q = redist7_expXIsMax_uid12_fpToFxPTest_q_3_q & redist6_fracXIsZero_uid13_fpToFxPTest_q_2_q;
// fracXIsNotZero_uid14_fpToFxPTest(LOGICAL,13)@3
assign fracXIsNotZero_uid14_fpToFxPTest_q = ~ (redist6_fracXIsZero_uid13_fpToFxPTest_q_2_q);
// excN_x_uid16_fpToFxPTest(LOGICAL,15)@3
assign excN_x_uid16_fpToFxPTest_q = redist7_expXIsMax_uid12_fpToFxPTest_q_3_q & fracXIsNotZero_uid14_fpToFxPTest_q;
// ovfPostRnd_uid48_fpToFxPTest(LOGICAL,47)@3
assign ovfPostRnd_uid48_fpToFxPTest_q = excN_x_uid16_fpToFxPTest_q | excI_x_uid15_fpToFxPTest_q | redist3_ovfExpRange_uid27_fpToFxPTest_n_3_q | rndOvfPos_uid47_fpToFxPTest_c;
// muxSelConc_uid49_fpToFxPTest(BITJOIN,48)@3
assign muxSelConc_uid49_fpToFxPTest_q = {redist5_signX_uid25_fpToFxPTest_b_3_q, redist2_udf_uid29_fpToFxPTest_n_3_q, ovfPostRnd_uid48_fpToFxPTest_q};
// muxSel_uid50_fpToFxPTest(LOOKUP,49)@3
always @(muxSelConc_uid49_fpToFxPTest_q)
begin
// Begin reserved scope level
unique case (muxSelConc_uid49_fpToFxPTest_q)
3'b000 : muxSel_uid50_fpToFxPTest_q = 2'b00;
3'b001 : muxSel_uid50_fpToFxPTest_q = 2'b01;
3'b010 : muxSel_uid50_fpToFxPTest_q = 2'b11;
3'b011 : muxSel_uid50_fpToFxPTest_q = 2'b11;
3'b100 : muxSel_uid50_fpToFxPTest_q = 2'b00;
3'b101 : muxSel_uid50_fpToFxPTest_q = 2'b10;
3'b110 : muxSel_uid50_fpToFxPTest_q = 2'b11;
3'b111 : muxSel_uid50_fpToFxPTest_q = 2'b11;
default : begin
// unreachable
muxSel_uid50_fpToFxPTest_q = 2'bxx;
end
endcase
// End reserved scope level
end
// finalOut_uid52_fpToFxPTest(MUX,51)@3
assign finalOut_uid52_fpToFxPTest_s = muxSel_uid50_fpToFxPTest_q;
always @(finalOut_uid52_fpToFxPTest_s or en or redist0_sPostRnd_uid45_fpToFxPTest_b_1_q or maxPosValueS_uid39_fpToFxPTest_q or maxNegValueS_uid40_fpToFxPTest_q or maxNegValueU_uid51_fpToFxPTest_q)
begin
unique case (finalOut_uid52_fpToFxPTest_s)
2'b00 : finalOut_uid52_fpToFxPTest_q = redist0_sPostRnd_uid45_fpToFxPTest_b_1_q;
2'b01 : finalOut_uid52_fpToFxPTest_q = maxPosValueS_uid39_fpToFxPTest_q;
2'b10 : finalOut_uid52_fpToFxPTest_q = maxNegValueS_uid40_fpToFxPTest_q;
2'b11 : finalOut_uid52_fpToFxPTest_q = maxNegValueU_uid51_fpToFxPTest_q;
default : finalOut_uid52_fpToFxPTest_q = 32'b0;
endcase
end
// xOut(GPOUT,4)@3
assign q = finalOut_uid52_fpToFxPTest_q;
endmodule

View File

@@ -0,0 +1,493 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 17.1 (Release Build #273)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2017 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_ftou
// SystemVerilog created on Wed Sep 2 07:11:09 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_ftou (
input wire [31:0] a,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire [0:0] GND_q;
wire [0:0] VCC_q;
wire [7:0] cstAllOWE_uid6_fpToFxPTest_q;
wire [22:0] cstZeroWF_uid7_fpToFxPTest_q;
wire [7:0] cstAllZWE_uid8_fpToFxPTest_q;
wire [7:0] exp_x_uid9_fpToFxPTest_b;
wire [22:0] frac_x_uid10_fpToFxPTest_b;
wire [0:0] excZ_x_uid11_fpToFxPTest_qi;
reg [0:0] excZ_x_uid11_fpToFxPTest_q;
wire [0:0] expXIsMax_uid12_fpToFxPTest_qi;
reg [0:0] expXIsMax_uid12_fpToFxPTest_q;
wire [0:0] fracXIsZero_uid13_fpToFxPTest_qi;
reg [0:0] fracXIsZero_uid13_fpToFxPTest_q;
wire [0:0] fracXIsNotZero_uid14_fpToFxPTest_q;
wire [0:0] excI_x_uid15_fpToFxPTest_q;
wire [0:0] excN_x_uid16_fpToFxPTest_q;
wire [0:0] invExcXZ_uid22_fpToFxPTest_q;
wire [23:0] oFracX_uid23_fpToFxPTest_q;
wire [0:0] signX_uid25_fpToFxPTest_b;
wire [8:0] ovfExpVal_uid26_fpToFxPTest_q;
wire [10:0] ovf_uid27_fpToFxPTest_a;
wire [10:0] ovf_uid27_fpToFxPTest_b;
logic [10:0] ovf_uid27_fpToFxPTest_o;
wire [0:0] ovf_uid27_fpToFxPTest_n;
wire [0:0] negOrOvf_uid28_fpToFxPTest_q;
wire [7:0] udfExpVal_uid29_fpToFxPTest_q;
wire [10:0] udf_uid30_fpToFxPTest_a;
wire [10:0] udf_uid30_fpToFxPTest_b;
logic [10:0] udf_uid30_fpToFxPTest_o;
wire [0:0] udf_uid30_fpToFxPTest_n;
wire [8:0] ovfExpVal_uid31_fpToFxPTest_q;
wire [10:0] shiftValE_uid32_fpToFxPTest_a;
wire [10:0] shiftValE_uid32_fpToFxPTest_b;
logic [10:0] shiftValE_uid32_fpToFxPTest_o;
wire [9:0] shiftValE_uid32_fpToFxPTest_q;
wire [5:0] shiftValRaw_uid33_fpToFxPTest_in;
wire [5:0] shiftValRaw_uid33_fpToFxPTest_b;
wire [5:0] maxShiftCst_uid34_fpToFxPTest_q;
wire [11:0] shiftOutOfRange_uid35_fpToFxPTest_a;
wire [11:0] shiftOutOfRange_uid35_fpToFxPTest_b;
logic [11:0] shiftOutOfRange_uid35_fpToFxPTest_o;
wire [0:0] shiftOutOfRange_uid35_fpToFxPTest_n;
wire [0:0] shiftVal_uid36_fpToFxPTest_s;
reg [5:0] shiftVal_uid36_fpToFxPTest_q;
wire [8:0] zPadd_uid37_fpToFxPTest_q;
wire [32:0] shifterIn_uid38_fpToFxPTest_q;
wire [31:0] maxPosValueU_uid40_fpToFxPTest_q;
wire [31:0] maxNegValueU_uid41_fpToFxPTest_q;
wire [33:0] zRightShiferNoStickyOut_uid43_fpToFxPTest_q;
wire [34:0] sPostRndFull_uid44_fpToFxPTest_a;
wire [34:0] sPostRndFull_uid44_fpToFxPTest_b;
logic [34:0] sPostRndFull_uid44_fpToFxPTest_o;
wire [34:0] sPostRndFull_uid44_fpToFxPTest_q;
wire [32:0] sPostRnd_uid45_fpToFxPTest_in;
wire [31:0] sPostRnd_uid45_fpToFxPTest_b;
wire [33:0] sPostRndFullMSBU_uid46_fpToFxPTest_in;
wire [0:0] sPostRndFullMSBU_uid46_fpToFxPTest_b;
wire [0:0] ovfPostRnd_uid47_fpToFxPTest_q;
wire [2:0] muxSelConc_uid48_fpToFxPTest_q;
reg [1:0] muxSel_uid49_fpToFxPTest_q;
wire [1:0] finalOut_uid51_fpToFxPTest_s;
reg [31:0] finalOut_uid51_fpToFxPTest_q;
wire [16:0] rightShiftStage0Idx1Rng16_uid55_rightShiferNoStickyOut_uid39_fpToFxPTest_b;
wire [15:0] rightShiftStage0Idx1Pad16_uid56_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [32:0] rightShiftStage0Idx1_uid57_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [0:0] rightShiftStage0Idx2Rng32_uid58_rightShiferNoStickyOut_uid39_fpToFxPTest_b;
wire [32:0] rightShiftStage0Idx2_uid60_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [32:0] rightShiftStage0Idx3_uid61_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [1:0] rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_s;
reg [32:0] rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [28:0] rightShiftStage1Idx1Rng4_uid64_rightShiferNoStickyOut_uid39_fpToFxPTest_b;
wire [3:0] rightShiftStage1Idx1Pad4_uid65_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [32:0] rightShiftStage1Idx1_uid66_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [24:0] rightShiftStage1Idx2Rng8_uid67_rightShiferNoStickyOut_uid39_fpToFxPTest_b;
wire [32:0] rightShiftStage1Idx2_uid69_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [20:0] rightShiftStage1Idx3Rng12_uid70_rightShiferNoStickyOut_uid39_fpToFxPTest_b;
wire [11:0] rightShiftStage1Idx3Pad12_uid71_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [32:0] rightShiftStage1Idx3_uid72_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [1:0] rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_s;
reg [32:0] rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [31:0] rightShiftStage2Idx1Rng1_uid75_rightShiferNoStickyOut_uid39_fpToFxPTest_b;
wire [32:0] rightShiftStage2Idx1_uid77_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [30:0] rightShiftStage2Idx2Rng2_uid78_rightShiferNoStickyOut_uid39_fpToFxPTest_b;
wire [1:0] rightShiftStage2Idx2Pad2_uid79_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [32:0] rightShiftStage2Idx2_uid80_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [29:0] rightShiftStage2Idx3Rng3_uid81_rightShiferNoStickyOut_uid39_fpToFxPTest_b;
wire [2:0] rightShiftStage2Idx3Pad3_uid82_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [32:0] rightShiftStage2Idx3_uid83_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [1:0] rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_s;
reg [32:0] rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
wire [1:0] rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select_b;
wire [1:0] rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select_c;
wire [1:0] rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select_d;
reg [31:0] redist0_sPostRnd_uid45_fpToFxPTest_b_1_q;
reg [5:0] redist1_shiftValRaw_uid33_fpToFxPTest_b_1_q;
reg [0:0] redist2_udf_uid30_fpToFxPTest_n_2_q;
reg [0:0] redist3_ovf_uid27_fpToFxPTest_n_2_q;
reg [0:0] redist4_signX_uid25_fpToFxPTest_b_2_q;
reg [0:0] redist5_expXIsMax_uid12_fpToFxPTest_q_2_q;
reg [22:0] redist6_frac_x_uid10_fpToFxPTest_b_1_q;
// maxNegValueU_uid41_fpToFxPTest(CONSTANT,40)
assign maxNegValueU_uid41_fpToFxPTest_q = 32'b00000000000000000000000000000000;
// maxPosValueU_uid40_fpToFxPTest(CONSTANT,39)
assign maxPosValueU_uid40_fpToFxPTest_q = 32'b11111111111111111111111111111111;
// VCC(CONSTANT,1)
assign VCC_q = 1'b1;
// GND(CONSTANT,0)
assign GND_q = 1'b0;
// rightShiftStage2Idx3Pad3_uid82_rightShiferNoStickyOut_uid39_fpToFxPTest(CONSTANT,81)
assign rightShiftStage2Idx3Pad3_uid82_rightShiferNoStickyOut_uid39_fpToFxPTest_q = 3'b000;
// rightShiftStage2Idx3Rng3_uid81_rightShiferNoStickyOut_uid39_fpToFxPTest(BITSELECT,80)@1
assign rightShiftStage2Idx3Rng3_uid81_rightShiferNoStickyOut_uid39_fpToFxPTest_b = rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q[32:3];
// rightShiftStage2Idx3_uid83_rightShiferNoStickyOut_uid39_fpToFxPTest(BITJOIN,82)@1
assign rightShiftStage2Idx3_uid83_rightShiferNoStickyOut_uid39_fpToFxPTest_q = {rightShiftStage2Idx3Pad3_uid82_rightShiferNoStickyOut_uid39_fpToFxPTest_q, rightShiftStage2Idx3Rng3_uid81_rightShiferNoStickyOut_uid39_fpToFxPTest_b};
// rightShiftStage2Idx2Pad2_uid79_rightShiferNoStickyOut_uid39_fpToFxPTest(CONSTANT,78)
assign rightShiftStage2Idx2Pad2_uid79_rightShiferNoStickyOut_uid39_fpToFxPTest_q = 2'b00;
// rightShiftStage2Idx2Rng2_uid78_rightShiferNoStickyOut_uid39_fpToFxPTest(BITSELECT,77)@1
assign rightShiftStage2Idx2Rng2_uid78_rightShiferNoStickyOut_uid39_fpToFxPTest_b = rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q[32:2];
// rightShiftStage2Idx2_uid80_rightShiferNoStickyOut_uid39_fpToFxPTest(BITJOIN,79)@1
assign rightShiftStage2Idx2_uid80_rightShiferNoStickyOut_uid39_fpToFxPTest_q = {rightShiftStage2Idx2Pad2_uid79_rightShiferNoStickyOut_uid39_fpToFxPTest_q, rightShiftStage2Idx2Rng2_uid78_rightShiferNoStickyOut_uid39_fpToFxPTest_b};
// rightShiftStage2Idx1Rng1_uid75_rightShiferNoStickyOut_uid39_fpToFxPTest(BITSELECT,74)@1
assign rightShiftStage2Idx1Rng1_uid75_rightShiferNoStickyOut_uid39_fpToFxPTest_b = rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q[32:1];
// rightShiftStage2Idx1_uid77_rightShiferNoStickyOut_uid39_fpToFxPTest(BITJOIN,76)@1
assign rightShiftStage2Idx1_uid77_rightShiferNoStickyOut_uid39_fpToFxPTest_q = {GND_q, rightShiftStage2Idx1Rng1_uid75_rightShiferNoStickyOut_uid39_fpToFxPTest_b};
// rightShiftStage1Idx3Pad12_uid71_rightShiferNoStickyOut_uid39_fpToFxPTest(CONSTANT,70)
assign rightShiftStage1Idx3Pad12_uid71_rightShiferNoStickyOut_uid39_fpToFxPTest_q = 12'b000000000000;
// rightShiftStage1Idx3Rng12_uid70_rightShiferNoStickyOut_uid39_fpToFxPTest(BITSELECT,69)@1
assign rightShiftStage1Idx3Rng12_uid70_rightShiferNoStickyOut_uid39_fpToFxPTest_b = rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q[32:12];
// rightShiftStage1Idx3_uid72_rightShiferNoStickyOut_uid39_fpToFxPTest(BITJOIN,71)@1
assign rightShiftStage1Idx3_uid72_rightShiferNoStickyOut_uid39_fpToFxPTest_q = {rightShiftStage1Idx3Pad12_uid71_rightShiferNoStickyOut_uid39_fpToFxPTest_q, rightShiftStage1Idx3Rng12_uid70_rightShiferNoStickyOut_uid39_fpToFxPTest_b};
// cstAllZWE_uid8_fpToFxPTest(CONSTANT,7)
assign cstAllZWE_uid8_fpToFxPTest_q = 8'b00000000;
// rightShiftStage1Idx2Rng8_uid67_rightShiferNoStickyOut_uid39_fpToFxPTest(BITSELECT,66)@1
assign rightShiftStage1Idx2Rng8_uid67_rightShiferNoStickyOut_uid39_fpToFxPTest_b = rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q[32:8];
// rightShiftStage1Idx2_uid69_rightShiferNoStickyOut_uid39_fpToFxPTest(BITJOIN,68)@1
assign rightShiftStage1Idx2_uid69_rightShiferNoStickyOut_uid39_fpToFxPTest_q = {cstAllZWE_uid8_fpToFxPTest_q, rightShiftStage1Idx2Rng8_uid67_rightShiferNoStickyOut_uid39_fpToFxPTest_b};
// rightShiftStage1Idx1Pad4_uid65_rightShiferNoStickyOut_uid39_fpToFxPTest(CONSTANT,64)
assign rightShiftStage1Idx1Pad4_uid65_rightShiferNoStickyOut_uid39_fpToFxPTest_q = 4'b0000;
// rightShiftStage1Idx1Rng4_uid64_rightShiferNoStickyOut_uid39_fpToFxPTest(BITSELECT,63)@1
assign rightShiftStage1Idx1Rng4_uid64_rightShiferNoStickyOut_uid39_fpToFxPTest_b = rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q[32:4];
// rightShiftStage1Idx1_uid66_rightShiferNoStickyOut_uid39_fpToFxPTest(BITJOIN,65)@1
assign rightShiftStage1Idx1_uid66_rightShiferNoStickyOut_uid39_fpToFxPTest_q = {rightShiftStage1Idx1Pad4_uid65_rightShiferNoStickyOut_uid39_fpToFxPTest_q, rightShiftStage1Idx1Rng4_uid64_rightShiferNoStickyOut_uid39_fpToFxPTest_b};
// rightShiftStage0Idx3_uid61_rightShiferNoStickyOut_uid39_fpToFxPTest(CONSTANT,60)
assign rightShiftStage0Idx3_uid61_rightShiferNoStickyOut_uid39_fpToFxPTest_q = 33'b000000000000000000000000000000000;
// rightShiftStage0Idx2Rng32_uid58_rightShiferNoStickyOut_uid39_fpToFxPTest(BITSELECT,57)@1
assign rightShiftStage0Idx2Rng32_uid58_rightShiferNoStickyOut_uid39_fpToFxPTest_b = shifterIn_uid38_fpToFxPTest_q[32:32];
// rightShiftStage0Idx2_uid60_rightShiferNoStickyOut_uid39_fpToFxPTest(BITJOIN,59)@1
assign rightShiftStage0Idx2_uid60_rightShiferNoStickyOut_uid39_fpToFxPTest_q = {maxNegValueU_uid41_fpToFxPTest_q, rightShiftStage0Idx2Rng32_uid58_rightShiferNoStickyOut_uid39_fpToFxPTest_b};
// rightShiftStage0Idx1Pad16_uid56_rightShiferNoStickyOut_uid39_fpToFxPTest(CONSTANT,55)
assign rightShiftStage0Idx1Pad16_uid56_rightShiferNoStickyOut_uid39_fpToFxPTest_q = 16'b0000000000000000;
// rightShiftStage0Idx1Rng16_uid55_rightShiferNoStickyOut_uid39_fpToFxPTest(BITSELECT,54)@1
assign rightShiftStage0Idx1Rng16_uid55_rightShiferNoStickyOut_uid39_fpToFxPTest_b = shifterIn_uid38_fpToFxPTest_q[32:16];
// rightShiftStage0Idx1_uid57_rightShiferNoStickyOut_uid39_fpToFxPTest(BITJOIN,56)@1
assign rightShiftStage0Idx1_uid57_rightShiferNoStickyOut_uid39_fpToFxPTest_q = {rightShiftStage0Idx1Pad16_uid56_rightShiferNoStickyOut_uid39_fpToFxPTest_q, rightShiftStage0Idx1Rng16_uid55_rightShiferNoStickyOut_uid39_fpToFxPTest_b};
// exp_x_uid9_fpToFxPTest(BITSELECT,8)@0
assign exp_x_uid9_fpToFxPTest_b = a[30:23];
// excZ_x_uid11_fpToFxPTest(LOGICAL,10)@0 + 1
assign excZ_x_uid11_fpToFxPTest_qi = exp_x_uid9_fpToFxPTest_b == cstAllZWE_uid8_fpToFxPTest_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
excZ_x_uid11_fpToFxPTest_delay ( .xin(excZ_x_uid11_fpToFxPTest_qi), .xout(excZ_x_uid11_fpToFxPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// invExcXZ_uid22_fpToFxPTest(LOGICAL,21)@1
assign invExcXZ_uid22_fpToFxPTest_q = ~ (excZ_x_uid11_fpToFxPTest_q);
// frac_x_uid10_fpToFxPTest(BITSELECT,9)@0
assign frac_x_uid10_fpToFxPTest_b = a[22:0];
// redist6_frac_x_uid10_fpToFxPTest_b_1(DELAY,93)
dspba_delay_ver #( .width(23), .depth(1), .reset_kind("ASYNC") )
redist6_frac_x_uid10_fpToFxPTest_b_1 ( .xin(frac_x_uid10_fpToFxPTest_b), .xout(redist6_frac_x_uid10_fpToFxPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// oFracX_uid23_fpToFxPTest(BITJOIN,22)@1
assign oFracX_uid23_fpToFxPTest_q = {invExcXZ_uid22_fpToFxPTest_q, redist6_frac_x_uid10_fpToFxPTest_b_1_q};
// zPadd_uid37_fpToFxPTest(CONSTANT,36)
assign zPadd_uid37_fpToFxPTest_q = 9'b000000000;
// shifterIn_uid38_fpToFxPTest(BITJOIN,37)@1
assign shifterIn_uid38_fpToFxPTest_q = {oFracX_uid23_fpToFxPTest_q, zPadd_uid37_fpToFxPTest_q};
// rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest(MUX,62)@1
assign rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_s = rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select_b;
always @(rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_s or en or shifterIn_uid38_fpToFxPTest_q or rightShiftStage0Idx1_uid57_rightShiferNoStickyOut_uid39_fpToFxPTest_q or rightShiftStage0Idx2_uid60_rightShiferNoStickyOut_uid39_fpToFxPTest_q or rightShiftStage0Idx3_uid61_rightShiferNoStickyOut_uid39_fpToFxPTest_q)
begin
unique case (rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_s)
2'b00 : rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q = shifterIn_uid38_fpToFxPTest_q;
2'b01 : rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q = rightShiftStage0Idx1_uid57_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
2'b10 : rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q = rightShiftStage0Idx2_uid60_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
2'b11 : rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q = rightShiftStage0Idx3_uid61_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
default : rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q = 33'b0;
endcase
end
// rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest(MUX,73)@1
assign rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_s = rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select_c;
always @(rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_s or en or rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q or rightShiftStage1Idx1_uid66_rightShiferNoStickyOut_uid39_fpToFxPTest_q or rightShiftStage1Idx2_uid69_rightShiferNoStickyOut_uid39_fpToFxPTest_q or rightShiftStage1Idx3_uid72_rightShiferNoStickyOut_uid39_fpToFxPTest_q)
begin
unique case (rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_s)
2'b00 : rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q = rightShiftStage0_uid63_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
2'b01 : rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q = rightShiftStage1Idx1_uid66_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
2'b10 : rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q = rightShiftStage1Idx2_uid69_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
2'b11 : rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q = rightShiftStage1Idx3_uid72_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
default : rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q = 33'b0;
endcase
end
// maxShiftCst_uid34_fpToFxPTest(CONSTANT,33)
assign maxShiftCst_uid34_fpToFxPTest_q = 6'b100001;
// ovfExpVal_uid31_fpToFxPTest(CONSTANT,30)
assign ovfExpVal_uid31_fpToFxPTest_q = 9'b010011110;
// shiftValE_uid32_fpToFxPTest(SUB,31)@0
assign shiftValE_uid32_fpToFxPTest_a = {{2{ovfExpVal_uid31_fpToFxPTest_q[8]}}, ovfExpVal_uid31_fpToFxPTest_q};
assign shiftValE_uid32_fpToFxPTest_b = {3'b000, exp_x_uid9_fpToFxPTest_b};
assign shiftValE_uid32_fpToFxPTest_o = $signed(shiftValE_uid32_fpToFxPTest_a) - $signed(shiftValE_uid32_fpToFxPTest_b);
assign shiftValE_uid32_fpToFxPTest_q = shiftValE_uid32_fpToFxPTest_o[9:0];
// shiftValRaw_uid33_fpToFxPTest(BITSELECT,32)@0
assign shiftValRaw_uid33_fpToFxPTest_in = shiftValE_uid32_fpToFxPTest_q[5:0];
assign shiftValRaw_uid33_fpToFxPTest_b = shiftValRaw_uid33_fpToFxPTest_in[5:0];
// redist1_shiftValRaw_uid33_fpToFxPTest_b_1(DELAY,88)
dspba_delay_ver #( .width(6), .depth(1), .reset_kind("ASYNC") )
redist1_shiftValRaw_uid33_fpToFxPTest_b_1 ( .xin(shiftValRaw_uid33_fpToFxPTest_b), .xout(redist1_shiftValRaw_uid33_fpToFxPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// shiftOutOfRange_uid35_fpToFxPTest(COMPARE,34)@0 + 1
assign shiftOutOfRange_uid35_fpToFxPTest_a = {{2{shiftValE_uid32_fpToFxPTest_q[9]}}, shiftValE_uid32_fpToFxPTest_q};
assign shiftOutOfRange_uid35_fpToFxPTest_b = {6'b000000, maxShiftCst_uid34_fpToFxPTest_q};
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
shiftOutOfRange_uid35_fpToFxPTest_o <= 12'b0;
end
else if (en == 1'b1)
begin
shiftOutOfRange_uid35_fpToFxPTest_o <= $signed(shiftOutOfRange_uid35_fpToFxPTest_a) - $signed(shiftOutOfRange_uid35_fpToFxPTest_b);
end
end
assign shiftOutOfRange_uid35_fpToFxPTest_n[0] = ~ (shiftOutOfRange_uid35_fpToFxPTest_o[11]);
// shiftVal_uid36_fpToFxPTest(MUX,35)@1
assign shiftVal_uid36_fpToFxPTest_s = shiftOutOfRange_uid35_fpToFxPTest_n;
always @(shiftVal_uid36_fpToFxPTest_s or en or redist1_shiftValRaw_uid33_fpToFxPTest_b_1_q or maxShiftCst_uid34_fpToFxPTest_q)
begin
unique case (shiftVal_uid36_fpToFxPTest_s)
1'b0 : shiftVal_uid36_fpToFxPTest_q = redist1_shiftValRaw_uid33_fpToFxPTest_b_1_q;
1'b1 : shiftVal_uid36_fpToFxPTest_q = maxShiftCst_uid34_fpToFxPTest_q;
default : shiftVal_uid36_fpToFxPTest_q = 6'b0;
endcase
end
// rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select(BITSELECT,86)@1
assign rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select_b = shiftVal_uid36_fpToFxPTest_q[5:4];
assign rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select_c = shiftVal_uid36_fpToFxPTest_q[3:2];
assign rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select_d = shiftVal_uid36_fpToFxPTest_q[1:0];
// rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest(MUX,84)@1 + 1
assign rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_s = rightShiftStageSel5Dto4_uid62_rightShiferNoStickyOut_uid39_fpToFxPTest_merged_bit_select_d;
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_q <= 33'b0;
end
else if (en == 1'b1)
begin
unique case (rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_s)
2'b00 : rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_q <= rightShiftStage1_uid74_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
2'b01 : rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_q <= rightShiftStage2Idx1_uid77_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
2'b10 : rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_q <= rightShiftStage2Idx2_uid80_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
2'b11 : rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_q <= rightShiftStage2Idx3_uid83_rightShiferNoStickyOut_uid39_fpToFxPTest_q;
default : rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_q <= 33'b0;
endcase
end
end
// zRightShiferNoStickyOut_uid43_fpToFxPTest(BITJOIN,42)@2
assign zRightShiferNoStickyOut_uid43_fpToFxPTest_q = {GND_q, rightShiftStage2_uid85_rightShiferNoStickyOut_uid39_fpToFxPTest_q};
// sPostRndFull_uid44_fpToFxPTest(ADD,43)@2
assign sPostRndFull_uid44_fpToFxPTest_a = {1'b0, zRightShiferNoStickyOut_uid43_fpToFxPTest_q};
assign sPostRndFull_uid44_fpToFxPTest_b = {34'b0000000000000000000000000000000000, VCC_q};
assign sPostRndFull_uid44_fpToFxPTest_o = $unsigned(sPostRndFull_uid44_fpToFxPTest_a) + $unsigned(sPostRndFull_uid44_fpToFxPTest_b);
assign sPostRndFull_uid44_fpToFxPTest_q = sPostRndFull_uid44_fpToFxPTest_o[34:0];
// sPostRnd_uid45_fpToFxPTest(BITSELECT,44)@2
assign sPostRnd_uid45_fpToFxPTest_in = sPostRndFull_uid44_fpToFxPTest_q[32:0];
assign sPostRnd_uid45_fpToFxPTest_b = sPostRnd_uid45_fpToFxPTest_in[32:1];
// redist0_sPostRnd_uid45_fpToFxPTest_b_1(DELAY,87)
dspba_delay_ver #( .width(32), .depth(1), .reset_kind("ASYNC") )
redist0_sPostRnd_uid45_fpToFxPTest_b_1 ( .xin(sPostRnd_uid45_fpToFxPTest_b), .xout(redist0_sPostRnd_uid45_fpToFxPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// signX_uid25_fpToFxPTest(BITSELECT,24)@0
assign signX_uid25_fpToFxPTest_b = a[31:31];
// redist4_signX_uid25_fpToFxPTest_b_2(DELAY,91)
dspba_delay_ver #( .width(1), .depth(2), .reset_kind("ASYNC") )
redist4_signX_uid25_fpToFxPTest_b_2 ( .xin(signX_uid25_fpToFxPTest_b), .xout(redist4_signX_uid25_fpToFxPTest_b_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// udfExpVal_uid29_fpToFxPTest(CONSTANT,28)
assign udfExpVal_uid29_fpToFxPTest_q = 8'b01111101;
// udf_uid30_fpToFxPTest(COMPARE,29)@0 + 1
assign udf_uid30_fpToFxPTest_a = {{3{udfExpVal_uid29_fpToFxPTest_q[7]}}, udfExpVal_uid29_fpToFxPTest_q};
assign udf_uid30_fpToFxPTest_b = {3'b000, exp_x_uid9_fpToFxPTest_b};
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
udf_uid30_fpToFxPTest_o <= 11'b0;
end
else if (en == 1'b1)
begin
udf_uid30_fpToFxPTest_o <= $signed(udf_uid30_fpToFxPTest_a) - $signed(udf_uid30_fpToFxPTest_b);
end
end
assign udf_uid30_fpToFxPTest_n[0] = ~ (udf_uid30_fpToFxPTest_o[10]);
// redist2_udf_uid30_fpToFxPTest_n_2(DELAY,89)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist2_udf_uid30_fpToFxPTest_n_2 ( .xin(udf_uid30_fpToFxPTest_n), .xout(redist2_udf_uid30_fpToFxPTest_n_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// sPostRndFullMSBU_uid46_fpToFxPTest(BITSELECT,45)@2
assign sPostRndFullMSBU_uid46_fpToFxPTest_in = sPostRndFull_uid44_fpToFxPTest_q[33:0];
assign sPostRndFullMSBU_uid46_fpToFxPTest_b = sPostRndFullMSBU_uid46_fpToFxPTest_in[33:33];
// ovfExpVal_uid26_fpToFxPTest(CONSTANT,25)
assign ovfExpVal_uid26_fpToFxPTest_q = 9'b010011111;
// ovf_uid27_fpToFxPTest(COMPARE,26)@0 + 1
assign ovf_uid27_fpToFxPTest_a = {3'b000, exp_x_uid9_fpToFxPTest_b};
assign ovf_uid27_fpToFxPTest_b = {{2{ovfExpVal_uid26_fpToFxPTest_q[8]}}, ovfExpVal_uid26_fpToFxPTest_q};
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
ovf_uid27_fpToFxPTest_o <= 11'b0;
end
else if (en == 1'b1)
begin
ovf_uid27_fpToFxPTest_o <= $signed(ovf_uid27_fpToFxPTest_a) - $signed(ovf_uid27_fpToFxPTest_b);
end
end
assign ovf_uid27_fpToFxPTest_n[0] = ~ (ovf_uid27_fpToFxPTest_o[10]);
// redist3_ovf_uid27_fpToFxPTest_n_2(DELAY,90)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist3_ovf_uid27_fpToFxPTest_n_2 ( .xin(ovf_uid27_fpToFxPTest_n), .xout(redist3_ovf_uid27_fpToFxPTest_n_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// negOrOvf_uid28_fpToFxPTest(LOGICAL,27)@2
assign negOrOvf_uid28_fpToFxPTest_q = redist4_signX_uid25_fpToFxPTest_b_2_q | redist3_ovf_uid27_fpToFxPTest_n_2_q;
// cstZeroWF_uid7_fpToFxPTest(CONSTANT,6)
assign cstZeroWF_uid7_fpToFxPTest_q = 23'b00000000000000000000000;
// fracXIsZero_uid13_fpToFxPTest(LOGICAL,12)@1 + 1
assign fracXIsZero_uid13_fpToFxPTest_qi = cstZeroWF_uid7_fpToFxPTest_q == redist6_frac_x_uid10_fpToFxPTest_b_1_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
fracXIsZero_uid13_fpToFxPTest_delay ( .xin(fracXIsZero_uid13_fpToFxPTest_qi), .xout(fracXIsZero_uid13_fpToFxPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// cstAllOWE_uid6_fpToFxPTest(CONSTANT,5)
assign cstAllOWE_uid6_fpToFxPTest_q = 8'b11111111;
// expXIsMax_uid12_fpToFxPTest(LOGICAL,11)@0 + 1
assign expXIsMax_uid12_fpToFxPTest_qi = exp_x_uid9_fpToFxPTest_b == cstAllOWE_uid6_fpToFxPTest_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
expXIsMax_uid12_fpToFxPTest_delay ( .xin(expXIsMax_uid12_fpToFxPTest_qi), .xout(expXIsMax_uid12_fpToFxPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist5_expXIsMax_uid12_fpToFxPTest_q_2(DELAY,92)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist5_expXIsMax_uid12_fpToFxPTest_q_2 ( .xin(expXIsMax_uid12_fpToFxPTest_q), .xout(redist5_expXIsMax_uid12_fpToFxPTest_q_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// excI_x_uid15_fpToFxPTest(LOGICAL,14)@2
assign excI_x_uid15_fpToFxPTest_q = redist5_expXIsMax_uid12_fpToFxPTest_q_2_q & fracXIsZero_uid13_fpToFxPTest_q;
// fracXIsNotZero_uid14_fpToFxPTest(LOGICAL,13)@2
assign fracXIsNotZero_uid14_fpToFxPTest_q = ~ (fracXIsZero_uid13_fpToFxPTest_q);
// excN_x_uid16_fpToFxPTest(LOGICAL,15)@2
assign excN_x_uid16_fpToFxPTest_q = redist5_expXIsMax_uid12_fpToFxPTest_q_2_q & fracXIsNotZero_uid14_fpToFxPTest_q;
// ovfPostRnd_uid47_fpToFxPTest(LOGICAL,46)@2
assign ovfPostRnd_uid47_fpToFxPTest_q = excN_x_uid16_fpToFxPTest_q | excI_x_uid15_fpToFxPTest_q | negOrOvf_uid28_fpToFxPTest_q | sPostRndFullMSBU_uid46_fpToFxPTest_b;
// muxSelConc_uid48_fpToFxPTest(BITJOIN,47)@2
assign muxSelConc_uid48_fpToFxPTest_q = {redist4_signX_uid25_fpToFxPTest_b_2_q, redist2_udf_uid30_fpToFxPTest_n_2_q, ovfPostRnd_uid47_fpToFxPTest_q};
// muxSel_uid49_fpToFxPTest(LOOKUP,48)@2 + 1
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
muxSel_uid49_fpToFxPTest_q <= 2'b00;
end
else if (en == 1'b1)
begin
unique case (muxSelConc_uid48_fpToFxPTest_q)
3'b000 : muxSel_uid49_fpToFxPTest_q <= 2'b00;
3'b001 : muxSel_uid49_fpToFxPTest_q <= 2'b01;
3'b010 : muxSel_uid49_fpToFxPTest_q <= 2'b11;
3'b011 : muxSel_uid49_fpToFxPTest_q <= 2'b00;
3'b100 : muxSel_uid49_fpToFxPTest_q <= 2'b10;
3'b101 : muxSel_uid49_fpToFxPTest_q <= 2'b10;
3'b110 : muxSel_uid49_fpToFxPTest_q <= 2'b10;
3'b111 : muxSel_uid49_fpToFxPTest_q <= 2'b10;
default : begin
// unreachable
muxSel_uid49_fpToFxPTest_q <= 2'bxx;
end
endcase
end
end
// finalOut_uid51_fpToFxPTest(MUX,50)@3
assign finalOut_uid51_fpToFxPTest_s = muxSel_uid49_fpToFxPTest_q;
always @(finalOut_uid51_fpToFxPTest_s or en or redist0_sPostRnd_uid45_fpToFxPTest_b_1_q or maxPosValueU_uid40_fpToFxPTest_q or maxNegValueU_uid41_fpToFxPTest_q)
begin
unique case (finalOut_uid51_fpToFxPTest_s)
2'b00 : finalOut_uid51_fpToFxPTest_q = redist0_sPostRnd_uid45_fpToFxPTest_b_1_q;
2'b01 : finalOut_uid51_fpToFxPTest_q = maxPosValueU_uid40_fpToFxPTest_q;
2'b10 : finalOut_uid51_fpToFxPTest_q = maxNegValueU_uid41_fpToFxPTest_q;
2'b11 : finalOut_uid51_fpToFxPTest_q = maxNegValueU_uid41_fpToFxPTest_q;
default : finalOut_uid51_fpToFxPTest_q = 32'b0;
endcase
end
// xOut(GPOUT,4)@3
assign q = finalOut_uid51_fpToFxPTest_q;
endmodule

View File

@@ -0,0 +1,169 @@
starting execution ...
build model options ...
argc=21
Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_fdiv
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 539, DSPs 5, RAMBits 32768, RAMBlocks 3
The pipeline depth of the block is 15 cycle(s)
@@start
@name FPDiv@
@latency 15@
@LUT 539@
@DSP 5@
@RAMBits 32768@
@RAMBlockUsage 3@
@enable 1@
@subnormals 0@
@error 1.00@
@rounding NA@
@method polynomial approximation@
@inPort 0 fpieee 8 23@
@inPort 1 fpieee 8 23@
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=20
Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_fsqrt
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 271, DSPs 3, RAMBits 15872, RAMBlocks 3
The pipeline depth of the block is 10 cycle(s)
@@start
@name FPSqrt@
@latency 10@
@LUT 271@
@DSP 3@
@RAMBits 15872@
@RAMBlockUsage 3@
@enable 1@
@subnormals 0@
@error 1.00@
@rounding NA@
@method polynomial approximation@
@inPort 0 fpieee 8 23@
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=23
Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_ftoi
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 327, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s)
@@start
@name FPToFXP@
@latency 3@
@LUT 327@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 1.00@
@rounding NA@
@method default@
@inPort 0 fpieee 8 23@
@outPort 0 fxp 32 0 1@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=23
Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_ftou
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 287, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s)
@@start
@name FPToFXP@
@latency 3@
@LUT 287@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 1.00@
@rounding NA@
@method default@
@inPort 0 fpieee 8 23@
@outPort 0 fxp 32 0 0@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=23
Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_itof
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 397, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 7 cycle(s)
@@start
@name FXPToFP@
@latency 7@
@LUT 397@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 1.00@
@rounding NA@
@method default@
@inPort 0 fxp 32 0 1@
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=23
Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_utof
Frequency 300MHz
Deployment FPGA Arria10
Estimated resources LUTs 363, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 7 cycle(s)
@@start
@name FXPToFP@
@latency 7@
@LUT 363@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 1.00@
@rounding NA@
@method default@
@inPort 0 fxp 32 0 0@
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end

View File

@@ -0,0 +1,25 @@
#!/bin/bash
CMD_POLY_EVAL_PATH=$QUARTUS_HOME/dspba/backend/linux64
OPTIONS="-target Arria10 -lang verilog -enableHardFP 1 -printMachineReadable -faithfulRounding -noChanValid -enable -speedgrade 2"
export LD_LIBRARY_PATH=$CMD_POLY_EVAL_PATH:$LD_LIBRARY_PATH
CMD="$CMD_POLY_EVAL_PATH/cmdPolyEval $OPTIONS"
EXP_BITS=8
MAN_BITS=23
FBITS="f$(($EXP_BITS + $MAN_BITS + 1))"
echo Generating IP cores for $FBITS
{
$CMD -name acl_fdiv -frequency 250 FPDiv $EXP_BITS $MAN_BITS 0
$CMD -name acl_fsqrt -frequency 250 FPSqrt $EXP_BITS $MAN_BITS
$CMD -name acl_ftoi -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 1
$CMD -name acl_ftou -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 0
$CMD -name acl_itof -frequency 250 FXPToFP 32 0 1 $EXP_BITS $MAN_BITS
$CMD -name acl_utof -frequency 300 FXPToFP 32 0 0 $EXP_BITS $MAN_BITS
} > acl_gen.log 2>&1
#cp $QUARTUS_HOME/dspba/backend/Libraries/sv/base/dspba_library_ver.sv .

View File

@@ -0,0 +1,520 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 17.1 (Release Build #273)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2017 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_itof
// SystemVerilog created on Wed Sep 2 07:11:09 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_itof (
input wire [31:0] a,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire [0:0] GND_q;
wire [0:0] signX_uid6_fxpToFPTest_b;
wire [31:0] xXorSign_uid7_fxpToFPTest_b;
wire [31:0] xXorSign_uid7_fxpToFPTest_qi;
reg [31:0] xXorSign_uid7_fxpToFPTest_q;
wire [32:0] yE_uid8_fxpToFPTest_a;
wire [32:0] yE_uid8_fxpToFPTest_b;
logic [32:0] yE_uid8_fxpToFPTest_o;
wire [32:0] yE_uid8_fxpToFPTest_q;
wire [31:0] y_uid9_fxpToFPTest_in;
wire [31:0] y_uid9_fxpToFPTest_b;
wire [5:0] maxCount_uid11_fxpToFPTest_q;
wire [0:0] inIsZero_uid12_fxpToFPTest_qi;
reg [0:0] inIsZero_uid12_fxpToFPTest_q;
wire [7:0] msbIn_uid13_fxpToFPTest_q;
wire [8:0] expPreRnd_uid14_fxpToFPTest_a;
wire [8:0] expPreRnd_uid14_fxpToFPTest_b;
logic [8:0] expPreRnd_uid14_fxpToFPTest_o;
wire [8:0] expPreRnd_uid14_fxpToFPTest_q;
wire [32:0] expFracRnd_uid16_fxpToFPTest_q;
wire [0:0] sticky_uid20_fxpToFPTest_qi;
reg [0:0] sticky_uid20_fxpToFPTest_q;
wire [0:0] nr_uid21_fxpToFPTest_q;
wire [0:0] rnd_uid22_fxpToFPTest_q;
wire [34:0] expFracR_uid24_fxpToFPTest_a;
wire [34:0] expFracR_uid24_fxpToFPTest_b;
logic [34:0] expFracR_uid24_fxpToFPTest_o;
wire [33:0] expFracR_uid24_fxpToFPTest_q;
wire [23:0] fracR_uid25_fxpToFPTest_in;
wire [22:0] fracR_uid25_fxpToFPTest_b;
wire [9:0] expR_uid26_fxpToFPTest_b;
wire [11:0] udf_uid27_fxpToFPTest_a;
wire [11:0] udf_uid27_fxpToFPTest_b;
logic [11:0] udf_uid27_fxpToFPTest_o;
wire [0:0] udf_uid27_fxpToFPTest_n;
wire [7:0] expInf_uid28_fxpToFPTest_q;
wire [11:0] ovf_uid29_fxpToFPTest_a;
wire [11:0] ovf_uid29_fxpToFPTest_b;
logic [11:0] ovf_uid29_fxpToFPTest_o;
wire [0:0] ovf_uid29_fxpToFPTest_n;
wire [0:0] excSelector_uid30_fxpToFPTest_q;
wire [22:0] fracZ_uid31_fxpToFPTest_q;
wire [0:0] fracRPostExc_uid32_fxpToFPTest_s;
reg [22:0] fracRPostExc_uid32_fxpToFPTest_q;
wire [0:0] udfOrInZero_uid33_fxpToFPTest_q;
wire [1:0] excSelector_uid34_fxpToFPTest_q;
wire [7:0] expZ_uid37_fxpToFPTest_q;
wire [7:0] expR_uid38_fxpToFPTest_in;
wire [7:0] expR_uid38_fxpToFPTest_b;
wire [1:0] expRPostExc_uid39_fxpToFPTest_s;
reg [7:0] expRPostExc_uid39_fxpToFPTest_q;
wire [31:0] outRes_uid40_fxpToFPTest_q;
wire [31:0] zs_uid42_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_qi;
reg [0:0] vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_s;
reg [31:0] vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [15:0] zs_uid47_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [31:0] cStage_uid52_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_s;
reg [31:0] vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [31:0] cStage_uid59_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_s;
reg [31:0] vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [3:0] zs_uid61_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [31:0] cStage_uid66_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_s;
reg [31:0] vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [1:0] zs_uid68_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vCount_uid70_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [31:0] cStage_uid73_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_s;
reg [31:0] vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vCount_uid77_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [31:0] cStage_uid80_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [0:0] vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest_s;
reg [31:0] vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [5:0] vCount_uid82_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [7:0] vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_a;
wire [7:0] vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_b;
logic [7:0] vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_o;
wire [0:0] vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_c;
wire [0:0] vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_s;
reg [5:0] vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_q;
wire [1:0] l_uid17_fxpToFPTest_merged_bit_select_in;
wire [0:0] l_uid17_fxpToFPTest_merged_bit_select_b;
wire [0:0] l_uid17_fxpToFPTest_merged_bit_select_c;
wire [15:0] rVStage_uid48_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b;
wire [15:0] rVStage_uid48_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c;
wire [7:0] rVStage_uid55_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b;
wire [23:0] rVStage_uid55_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c;
wire [3:0] rVStage_uid62_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b;
wire [27:0] rVStage_uid62_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c;
wire [1:0] rVStage_uid69_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b;
wire [29:0] rVStage_uid69_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c;
wire [0:0] rVStage_uid76_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b;
wire [30:0] rVStage_uid76_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c;
wire [30:0] fracRnd_uid15_fxpToFPTest_merged_bit_select_in;
wire [23:0] fracRnd_uid15_fxpToFPTest_merged_bit_select_b;
wire [6:0] fracRnd_uid15_fxpToFPTest_merged_bit_select_c;
reg [23:0] redist0_fracRnd_uid15_fxpToFPTest_merged_bit_select_b_2_q;
reg [0:0] redist1_vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest_q_1_q;
reg [0:0] redist2_vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest_q_1_q;
reg [0:0] redist3_vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest_q_2_q;
reg [0:0] redist4_vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_q_3_q;
reg [9:0] redist5_expR_uid26_fxpToFPTest_b_1_q;
reg [22:0] redist6_fracR_uid25_fxpToFPTest_b_1_q;
reg [0:0] redist7_sticky_uid20_fxpToFPTest_q_2_q;
reg [0:0] redist8_inIsZero_uid12_fxpToFPTest_q_2_q;
reg [31:0] redist9_y_uid9_fxpToFPTest_b_1_q;
reg [0:0] redist10_signX_uid6_fxpToFPTest_b_1_q;
reg [0:0] redist11_signX_uid6_fxpToFPTest_b_7_q;
// signX_uid6_fxpToFPTest(BITSELECT,5)@0
assign signX_uid6_fxpToFPTest_b = a[31:31];
// redist10_signX_uid6_fxpToFPTest_b_1(DELAY,105)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist10_signX_uid6_fxpToFPTest_b_1 ( .xin(signX_uid6_fxpToFPTest_b), .xout(redist10_signX_uid6_fxpToFPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist11_signX_uid6_fxpToFPTest_b_7(DELAY,106)
dspba_delay_ver #( .width(1), .depth(6), .reset_kind("ASYNC") )
redist11_signX_uid6_fxpToFPTest_b_7 ( .xin(redist10_signX_uid6_fxpToFPTest_b_1_q), .xout(redist11_signX_uid6_fxpToFPTest_b_7_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// expInf_uid28_fxpToFPTest(CONSTANT,27)
assign expInf_uid28_fxpToFPTest_q = 8'b11111111;
// expZ_uid37_fxpToFPTest(CONSTANT,36)
assign expZ_uid37_fxpToFPTest_q = 8'b00000000;
// rVStage_uid76_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select(BITSELECT,93)@4
assign rVStage_uid76_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b = vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_q[31:31];
assign rVStage_uid76_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c = vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_q[30:0];
// GND(CONSTANT,0)
assign GND_q = 1'b0;
// cStage_uid80_lzcShifterZ1_uid10_fxpToFPTest(BITJOIN,79)@4
assign cStage_uid80_lzcShifterZ1_uid10_fxpToFPTest_q = {rVStage_uid76_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c, GND_q};
// rVStage_uid69_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select(BITSELECT,92)@4
assign rVStage_uid69_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b = vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_q[31:30];
assign rVStage_uid69_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c = vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_q[29:0];
// zs_uid68_lzcShifterZ1_uid10_fxpToFPTest(CONSTANT,67)
assign zs_uid68_lzcShifterZ1_uid10_fxpToFPTest_q = 2'b00;
// cStage_uid73_lzcShifterZ1_uid10_fxpToFPTest(BITJOIN,72)@4
assign cStage_uid73_lzcShifterZ1_uid10_fxpToFPTest_q = {rVStage_uid69_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c, zs_uid68_lzcShifterZ1_uid10_fxpToFPTest_q};
// rVStage_uid62_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select(BITSELECT,91)@3
assign rVStage_uid62_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b = vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_q[31:28];
assign rVStage_uid62_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c = vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_q[27:0];
// zs_uid61_lzcShifterZ1_uid10_fxpToFPTest(CONSTANT,60)
assign zs_uid61_lzcShifterZ1_uid10_fxpToFPTest_q = 4'b0000;
// cStage_uid66_lzcShifterZ1_uid10_fxpToFPTest(BITJOIN,65)@3
assign cStage_uid66_lzcShifterZ1_uid10_fxpToFPTest_q = {rVStage_uid62_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c, zs_uid61_lzcShifterZ1_uid10_fxpToFPTest_q};
// rVStage_uid55_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select(BITSELECT,90)@3
assign rVStage_uid55_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b = vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_q[31:24];
assign rVStage_uid55_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c = vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_q[23:0];
// cStage_uid59_lzcShifterZ1_uid10_fxpToFPTest(BITJOIN,58)@3
assign cStage_uid59_lzcShifterZ1_uid10_fxpToFPTest_q = {rVStage_uid55_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c, expZ_uid37_fxpToFPTest_q};
// rVStage_uid48_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select(BITSELECT,89)@2
assign rVStage_uid48_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b = vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_q[31:16];
assign rVStage_uid48_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c = vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_q[15:0];
// zs_uid47_lzcShifterZ1_uid10_fxpToFPTest(CONSTANT,46)
assign zs_uid47_lzcShifterZ1_uid10_fxpToFPTest_q = 16'b0000000000000000;
// cStage_uid52_lzcShifterZ1_uid10_fxpToFPTest(BITJOIN,51)@2
assign cStage_uid52_lzcShifterZ1_uid10_fxpToFPTest_q = {rVStage_uid48_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_c, zs_uid47_lzcShifterZ1_uid10_fxpToFPTest_q};
// zs_uid42_lzcShifterZ1_uid10_fxpToFPTest(CONSTANT,41)
assign zs_uid42_lzcShifterZ1_uid10_fxpToFPTest_q = 32'b00000000000000000000000000000000;
// xXorSign_uid7_fxpToFPTest(LOGICAL,6)@0 + 1
assign xXorSign_uid7_fxpToFPTest_b = {{31{signX_uid6_fxpToFPTest_b[0]}}, signX_uid6_fxpToFPTest_b};
assign xXorSign_uid7_fxpToFPTest_qi = a ^ xXorSign_uid7_fxpToFPTest_b;
dspba_delay_ver #( .width(32), .depth(1), .reset_kind("ASYNC") )
xXorSign_uid7_fxpToFPTest_delay ( .xin(xXorSign_uid7_fxpToFPTest_qi), .xout(xXorSign_uid7_fxpToFPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// yE_uid8_fxpToFPTest(ADD,7)@1
assign yE_uid8_fxpToFPTest_a = {1'b0, xXorSign_uid7_fxpToFPTest_q};
assign yE_uid8_fxpToFPTest_b = {32'b00000000000000000000000000000000, redist10_signX_uid6_fxpToFPTest_b_1_q};
assign yE_uid8_fxpToFPTest_o = $unsigned(yE_uid8_fxpToFPTest_a) + $unsigned(yE_uid8_fxpToFPTest_b);
assign yE_uid8_fxpToFPTest_q = yE_uid8_fxpToFPTest_o[32:0];
// y_uid9_fxpToFPTest(BITSELECT,8)@1
assign y_uid9_fxpToFPTest_in = yE_uid8_fxpToFPTest_q[31:0];
assign y_uid9_fxpToFPTest_b = y_uid9_fxpToFPTest_in[31:0];
// redist9_y_uid9_fxpToFPTest_b_1(DELAY,104)
dspba_delay_ver #( .width(32), .depth(1), .reset_kind("ASYNC") )
redist9_y_uid9_fxpToFPTest_b_1 ( .xin(y_uid9_fxpToFPTest_b), .xout(redist9_y_uid9_fxpToFPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest(LOGICAL,43)@1 + 1
assign vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_qi = y_uid9_fxpToFPTest_b == zs_uid42_lzcShifterZ1_uid10_fxpToFPTest_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_delay ( .xin(vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_qi), .xout(vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest(MUX,45)@2
assign vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_s = vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_q;
always @(vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_s or en or redist9_y_uid9_fxpToFPTest_b_1_q or zs_uid42_lzcShifterZ1_uid10_fxpToFPTest_q)
begin
unique case (vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_s)
1'b0 : vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_q = redist9_y_uid9_fxpToFPTest_b_1_q;
1'b1 : vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_q = zs_uid42_lzcShifterZ1_uid10_fxpToFPTest_q;
default : vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_q = 32'b0;
endcase
end
// vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest(LOGICAL,48)@2
assign vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest_q = rVStage_uid48_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b == zs_uid47_lzcShifterZ1_uid10_fxpToFPTest_q ? 1'b1 : 1'b0;
// vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest(MUX,52)@2 + 1
assign vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_s = vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest_q;
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_q <= 32'b0;
end
else if (en == 1'b1)
begin
unique case (vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_s)
1'b0 : vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_q <= vStagei_uid46_lzcShifterZ1_uid10_fxpToFPTest_q;
1'b1 : vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_q <= cStage_uid52_lzcShifterZ1_uid10_fxpToFPTest_q;
default : vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_q <= 32'b0;
endcase
end
end
// vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest(LOGICAL,55)@3
assign vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest_q = rVStage_uid55_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b == expZ_uid37_fxpToFPTest_q ? 1'b1 : 1'b0;
// vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest(MUX,59)@3
assign vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_s = vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest_q;
always @(vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_s or en or vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_q or cStage_uid59_lzcShifterZ1_uid10_fxpToFPTest_q)
begin
unique case (vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_s)
1'b0 : vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_q = vStagei_uid53_lzcShifterZ1_uid10_fxpToFPTest_q;
1'b1 : vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_q = cStage_uid59_lzcShifterZ1_uid10_fxpToFPTest_q;
default : vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_q = 32'b0;
endcase
end
// vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest(LOGICAL,62)@3
assign vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest_q = rVStage_uid62_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b == zs_uid61_lzcShifterZ1_uid10_fxpToFPTest_q ? 1'b1 : 1'b0;
// vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest(MUX,66)@3 + 1
assign vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_s = vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest_q;
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_q <= 32'b0;
end
else if (en == 1'b1)
begin
unique case (vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_s)
1'b0 : vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_q <= vStagei_uid60_lzcShifterZ1_uid10_fxpToFPTest_q;
1'b1 : vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_q <= cStage_uid66_lzcShifterZ1_uid10_fxpToFPTest_q;
default : vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_q <= 32'b0;
endcase
end
end
// vCount_uid70_lzcShifterZ1_uid10_fxpToFPTest(LOGICAL,69)@4
assign vCount_uid70_lzcShifterZ1_uid10_fxpToFPTest_q = rVStage_uid69_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b == zs_uid68_lzcShifterZ1_uid10_fxpToFPTest_q ? 1'b1 : 1'b0;
// vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest(MUX,73)@4
assign vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_s = vCount_uid70_lzcShifterZ1_uid10_fxpToFPTest_q;
always @(vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_s or en or vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_q or cStage_uid73_lzcShifterZ1_uid10_fxpToFPTest_q)
begin
unique case (vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_s)
1'b0 : vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_q = vStagei_uid67_lzcShifterZ1_uid10_fxpToFPTest_q;
1'b1 : vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_q = cStage_uid73_lzcShifterZ1_uid10_fxpToFPTest_q;
default : vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_q = 32'b0;
endcase
end
// vCount_uid77_lzcShifterZ1_uid10_fxpToFPTest(LOGICAL,76)@4
assign vCount_uid77_lzcShifterZ1_uid10_fxpToFPTest_q = rVStage_uid76_lzcShifterZ1_uid10_fxpToFPTest_merged_bit_select_b == GND_q ? 1'b1 : 1'b0;
// vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest(MUX,80)@4
assign vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest_s = vCount_uid77_lzcShifterZ1_uid10_fxpToFPTest_q;
always @(vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest_s or en or vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_q or cStage_uid80_lzcShifterZ1_uid10_fxpToFPTest_q)
begin
unique case (vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest_s)
1'b0 : vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest_q = vStagei_uid74_lzcShifterZ1_uid10_fxpToFPTest_q;
1'b1 : vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest_q = cStage_uid80_lzcShifterZ1_uid10_fxpToFPTest_q;
default : vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest_q = 32'b0;
endcase
end
// fracRnd_uid15_fxpToFPTest_merged_bit_select(BITSELECT,94)@4
assign fracRnd_uid15_fxpToFPTest_merged_bit_select_in = vStagei_uid81_lzcShifterZ1_uid10_fxpToFPTest_q[30:0];
assign fracRnd_uid15_fxpToFPTest_merged_bit_select_b = fracRnd_uid15_fxpToFPTest_merged_bit_select_in[30:7];
assign fracRnd_uid15_fxpToFPTest_merged_bit_select_c = fracRnd_uid15_fxpToFPTest_merged_bit_select_in[6:0];
// sticky_uid20_fxpToFPTest(LOGICAL,19)@4 + 1
assign sticky_uid20_fxpToFPTest_qi = fracRnd_uid15_fxpToFPTest_merged_bit_select_c != 7'b0000000 ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
sticky_uid20_fxpToFPTest_delay ( .xin(sticky_uid20_fxpToFPTest_qi), .xout(sticky_uid20_fxpToFPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist7_sticky_uid20_fxpToFPTest_q_2(DELAY,102)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist7_sticky_uid20_fxpToFPTest_q_2 ( .xin(sticky_uid20_fxpToFPTest_q), .xout(redist7_sticky_uid20_fxpToFPTest_q_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// nr_uid21_fxpToFPTest(LOGICAL,20)@6
assign nr_uid21_fxpToFPTest_q = ~ (l_uid17_fxpToFPTest_merged_bit_select_c);
// l_uid17_fxpToFPTest_merged_bit_select(BITSELECT,88)@6
assign l_uid17_fxpToFPTest_merged_bit_select_in = expFracRnd_uid16_fxpToFPTest_q[1:0];
assign l_uid17_fxpToFPTest_merged_bit_select_b = l_uid17_fxpToFPTest_merged_bit_select_in[1:1];
assign l_uid17_fxpToFPTest_merged_bit_select_c = l_uid17_fxpToFPTest_merged_bit_select_in[0:0];
// rnd_uid22_fxpToFPTest(LOGICAL,21)@6
assign rnd_uid22_fxpToFPTest_q = l_uid17_fxpToFPTest_merged_bit_select_b | nr_uid21_fxpToFPTest_q | redist7_sticky_uid20_fxpToFPTest_q_2_q;
// maxCount_uid11_fxpToFPTest(CONSTANT,10)
assign maxCount_uid11_fxpToFPTest_q = 6'b100000;
// redist4_vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_q_3(DELAY,99)
dspba_delay_ver #( .width(1), .depth(2), .reset_kind("ASYNC") )
redist4_vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_q_3 ( .xin(vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_q), .xout(redist4_vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_q_3_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist3_vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest_q_2(DELAY,98)
dspba_delay_ver #( .width(1), .depth(2), .reset_kind("ASYNC") )
redist3_vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest_q_2 ( .xin(vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest_q), .xout(redist3_vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest_q_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist2_vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest_q_1(DELAY,97)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist2_vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest_q_1 ( .xin(vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest_q), .xout(redist2_vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest_q_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist1_vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest_q_1(DELAY,96)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist1_vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest_q_1 ( .xin(vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest_q), .xout(redist1_vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest_q_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// vCount_uid82_lzcShifterZ1_uid10_fxpToFPTest(BITJOIN,81)@4
assign vCount_uid82_lzcShifterZ1_uid10_fxpToFPTest_q = {redist4_vCount_uid44_lzcShifterZ1_uid10_fxpToFPTest_q_3_q, redist3_vCount_uid49_lzcShifterZ1_uid10_fxpToFPTest_q_2_q, redist2_vCount_uid56_lzcShifterZ1_uid10_fxpToFPTest_q_1_q, redist1_vCount_uid63_lzcShifterZ1_uid10_fxpToFPTest_q_1_q, vCount_uid70_lzcShifterZ1_uid10_fxpToFPTest_q, vCount_uid77_lzcShifterZ1_uid10_fxpToFPTest_q};
// vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest(COMPARE,83)@4
assign vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_a = {2'b00, maxCount_uid11_fxpToFPTest_q};
assign vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_b = {2'b00, vCount_uid82_lzcShifterZ1_uid10_fxpToFPTest_q};
assign vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_o = $unsigned(vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_a) - $unsigned(vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_b);
assign vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_c[0] = vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_o[7];
// vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest(MUX,85)@4 + 1
assign vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_s = vCountBig_uid84_lzcShifterZ1_uid10_fxpToFPTest_c;
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_q <= 6'b0;
end
else if (en == 1'b1)
begin
unique case (vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_s)
1'b0 : vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_q <= vCount_uid82_lzcShifterZ1_uid10_fxpToFPTest_q;
1'b1 : vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_q <= maxCount_uid11_fxpToFPTest_q;
default : vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_q <= 6'b0;
endcase
end
end
// msbIn_uid13_fxpToFPTest(CONSTANT,12)
assign msbIn_uid13_fxpToFPTest_q = 8'b10011110;
// expPreRnd_uid14_fxpToFPTest(SUB,13)@5 + 1
assign expPreRnd_uid14_fxpToFPTest_a = {1'b0, msbIn_uid13_fxpToFPTest_q};
assign expPreRnd_uid14_fxpToFPTest_b = {3'b000, vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_q};
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
expPreRnd_uid14_fxpToFPTest_o <= 9'b0;
end
else if (en == 1'b1)
begin
expPreRnd_uid14_fxpToFPTest_o <= $unsigned(expPreRnd_uid14_fxpToFPTest_a) - $unsigned(expPreRnd_uid14_fxpToFPTest_b);
end
end
assign expPreRnd_uid14_fxpToFPTest_q = expPreRnd_uid14_fxpToFPTest_o[8:0];
// redist0_fracRnd_uid15_fxpToFPTest_merged_bit_select_b_2(DELAY,95)
dspba_delay_ver #( .width(24), .depth(2), .reset_kind("ASYNC") )
redist0_fracRnd_uid15_fxpToFPTest_merged_bit_select_b_2 ( .xin(fracRnd_uid15_fxpToFPTest_merged_bit_select_b), .xout(redist0_fracRnd_uid15_fxpToFPTest_merged_bit_select_b_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// expFracRnd_uid16_fxpToFPTest(BITJOIN,15)@6
assign expFracRnd_uid16_fxpToFPTest_q = {expPreRnd_uid14_fxpToFPTest_q, redist0_fracRnd_uid15_fxpToFPTest_merged_bit_select_b_2_q};
// expFracR_uid24_fxpToFPTest(ADD,23)@6
assign expFracR_uid24_fxpToFPTest_a = {{2{expFracRnd_uid16_fxpToFPTest_q[32]}}, expFracRnd_uid16_fxpToFPTest_q};
assign expFracR_uid24_fxpToFPTest_b = {34'b0000000000000000000000000000000000, rnd_uid22_fxpToFPTest_q};
assign expFracR_uid24_fxpToFPTest_o = $signed(expFracR_uid24_fxpToFPTest_a) + $signed(expFracR_uid24_fxpToFPTest_b);
assign expFracR_uid24_fxpToFPTest_q = expFracR_uid24_fxpToFPTest_o[33:0];
// expR_uid26_fxpToFPTest(BITSELECT,25)@6
assign expR_uid26_fxpToFPTest_b = expFracR_uid24_fxpToFPTest_q[33:24];
// redist5_expR_uid26_fxpToFPTest_b_1(DELAY,100)
dspba_delay_ver #( .width(10), .depth(1), .reset_kind("ASYNC") )
redist5_expR_uid26_fxpToFPTest_b_1 ( .xin(expR_uid26_fxpToFPTest_b), .xout(redist5_expR_uid26_fxpToFPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// expR_uid38_fxpToFPTest(BITSELECT,37)@7
assign expR_uid38_fxpToFPTest_in = redist5_expR_uid26_fxpToFPTest_b_1_q[7:0];
assign expR_uid38_fxpToFPTest_b = expR_uid38_fxpToFPTest_in[7:0];
// ovf_uid29_fxpToFPTest(COMPARE,28)@7
assign ovf_uid29_fxpToFPTest_a = {{2{redist5_expR_uid26_fxpToFPTest_b_1_q[9]}}, redist5_expR_uid26_fxpToFPTest_b_1_q};
assign ovf_uid29_fxpToFPTest_b = {4'b0000, expInf_uid28_fxpToFPTest_q};
assign ovf_uid29_fxpToFPTest_o = $signed(ovf_uid29_fxpToFPTest_a) - $signed(ovf_uid29_fxpToFPTest_b);
assign ovf_uid29_fxpToFPTest_n[0] = ~ (ovf_uid29_fxpToFPTest_o[11]);
// inIsZero_uid12_fxpToFPTest(LOGICAL,11)@5 + 1
assign inIsZero_uid12_fxpToFPTest_qi = vCountFinal_uid86_lzcShifterZ1_uid10_fxpToFPTest_q == maxCount_uid11_fxpToFPTest_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
inIsZero_uid12_fxpToFPTest_delay ( .xin(inIsZero_uid12_fxpToFPTest_qi), .xout(inIsZero_uid12_fxpToFPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist8_inIsZero_uid12_fxpToFPTest_q_2(DELAY,103)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist8_inIsZero_uid12_fxpToFPTest_q_2 ( .xin(inIsZero_uid12_fxpToFPTest_q), .xout(redist8_inIsZero_uid12_fxpToFPTest_q_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// udf_uid27_fxpToFPTest(COMPARE,26)@7
assign udf_uid27_fxpToFPTest_a = {11'b00000000000, GND_q};
assign udf_uid27_fxpToFPTest_b = {{2{redist5_expR_uid26_fxpToFPTest_b_1_q[9]}}, redist5_expR_uid26_fxpToFPTest_b_1_q};
assign udf_uid27_fxpToFPTest_o = $signed(udf_uid27_fxpToFPTest_a) - $signed(udf_uid27_fxpToFPTest_b);
assign udf_uid27_fxpToFPTest_n[0] = ~ (udf_uid27_fxpToFPTest_o[11]);
// udfOrInZero_uid33_fxpToFPTest(LOGICAL,32)@7
assign udfOrInZero_uid33_fxpToFPTest_q = udf_uid27_fxpToFPTest_n | redist8_inIsZero_uid12_fxpToFPTest_q_2_q;
// excSelector_uid34_fxpToFPTest(BITJOIN,33)@7
assign excSelector_uid34_fxpToFPTest_q = {ovf_uid29_fxpToFPTest_n, udfOrInZero_uid33_fxpToFPTest_q};
// expRPostExc_uid39_fxpToFPTest(MUX,38)@7
assign expRPostExc_uid39_fxpToFPTest_s = excSelector_uid34_fxpToFPTest_q;
always @(expRPostExc_uid39_fxpToFPTest_s or en or expR_uid38_fxpToFPTest_b or expZ_uid37_fxpToFPTest_q or expInf_uid28_fxpToFPTest_q)
begin
unique case (expRPostExc_uid39_fxpToFPTest_s)
2'b00 : expRPostExc_uid39_fxpToFPTest_q = expR_uid38_fxpToFPTest_b;
2'b01 : expRPostExc_uid39_fxpToFPTest_q = expZ_uid37_fxpToFPTest_q;
2'b10 : expRPostExc_uid39_fxpToFPTest_q = expInf_uid28_fxpToFPTest_q;
2'b11 : expRPostExc_uid39_fxpToFPTest_q = expInf_uid28_fxpToFPTest_q;
default : expRPostExc_uid39_fxpToFPTest_q = 8'b0;
endcase
end
// fracZ_uid31_fxpToFPTest(CONSTANT,30)
assign fracZ_uid31_fxpToFPTest_q = 23'b00000000000000000000000;
// fracR_uid25_fxpToFPTest(BITSELECT,24)@6
assign fracR_uid25_fxpToFPTest_in = expFracR_uid24_fxpToFPTest_q[23:0];
assign fracR_uid25_fxpToFPTest_b = fracR_uid25_fxpToFPTest_in[23:1];
// redist6_fracR_uid25_fxpToFPTest_b_1(DELAY,101)
dspba_delay_ver #( .width(23), .depth(1), .reset_kind("ASYNC") )
redist6_fracR_uid25_fxpToFPTest_b_1 ( .xin(fracR_uid25_fxpToFPTest_b), .xout(redist6_fracR_uid25_fxpToFPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// excSelector_uid30_fxpToFPTest(LOGICAL,29)@7
assign excSelector_uid30_fxpToFPTest_q = redist8_inIsZero_uid12_fxpToFPTest_q_2_q | ovf_uid29_fxpToFPTest_n | udf_uid27_fxpToFPTest_n;
// fracRPostExc_uid32_fxpToFPTest(MUX,31)@7
assign fracRPostExc_uid32_fxpToFPTest_s = excSelector_uid30_fxpToFPTest_q;
always @(fracRPostExc_uid32_fxpToFPTest_s or en or redist6_fracR_uid25_fxpToFPTest_b_1_q or fracZ_uid31_fxpToFPTest_q)
begin
unique case (fracRPostExc_uid32_fxpToFPTest_s)
1'b0 : fracRPostExc_uid32_fxpToFPTest_q = redist6_fracR_uid25_fxpToFPTest_b_1_q;
1'b1 : fracRPostExc_uid32_fxpToFPTest_q = fracZ_uid31_fxpToFPTest_q;
default : fracRPostExc_uid32_fxpToFPTest_q = 23'b0;
endcase
end
// outRes_uid40_fxpToFPTest(BITJOIN,39)@7
assign outRes_uid40_fxpToFPTest_q = {redist11_signX_uid6_fxpToFPTest_b_7_q, expRPostExc_uid39_fxpToFPTest_q, fracRPostExc_uid32_fxpToFPTest_q};
// xOut(GPOUT,4)@7
assign q = outRes_uid40_fxpToFPTest_q;
endmodule

View File

@@ -0,0 +1,486 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 17.1 (Release Build #273)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2017 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_utof
// SystemVerilog created on Wed Sep 2 07:11:09 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_utof (
input wire [31:0] a,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire [0:0] GND_q;
wire [5:0] maxCount_uid7_fxpToFPTest_q;
wire [0:0] inIsZero_uid8_fxpToFPTest_qi;
reg [0:0] inIsZero_uid8_fxpToFPTest_q;
wire [7:0] msbIn_uid9_fxpToFPTest_q;
wire [8:0] expPreRnd_uid10_fxpToFPTest_a;
wire [8:0] expPreRnd_uid10_fxpToFPTest_b;
logic [8:0] expPreRnd_uid10_fxpToFPTest_o;
wire [8:0] expPreRnd_uid10_fxpToFPTest_q;
wire [32:0] expFracRnd_uid12_fxpToFPTest_q;
wire [0:0] sticky_uid16_fxpToFPTest_qi;
reg [0:0] sticky_uid16_fxpToFPTest_q;
wire [0:0] nr_uid17_fxpToFPTest_q;
wire [0:0] rnd_uid18_fxpToFPTest_qi;
reg [0:0] rnd_uid18_fxpToFPTest_q;
wire [34:0] expFracR_uid20_fxpToFPTest_a;
wire [34:0] expFracR_uid20_fxpToFPTest_b;
logic [34:0] expFracR_uid20_fxpToFPTest_o;
wire [33:0] expFracR_uid20_fxpToFPTest_q;
wire [23:0] fracR_uid21_fxpToFPTest_in;
wire [22:0] fracR_uid21_fxpToFPTest_b;
wire [9:0] expR_uid22_fxpToFPTest_b;
wire [11:0] udf_uid23_fxpToFPTest_a;
wire [11:0] udf_uid23_fxpToFPTest_b;
logic [11:0] udf_uid23_fxpToFPTest_o;
wire [0:0] udf_uid23_fxpToFPTest_n;
wire [7:0] expInf_uid24_fxpToFPTest_q;
wire [11:0] ovf_uid25_fxpToFPTest_a;
wire [11:0] ovf_uid25_fxpToFPTest_b;
logic [11:0] ovf_uid25_fxpToFPTest_o;
wire [0:0] ovf_uid25_fxpToFPTest_n;
wire [0:0] excSelector_uid26_fxpToFPTest_q;
wire [22:0] fracZ_uid27_fxpToFPTest_q;
wire [0:0] fracRPostExc_uid28_fxpToFPTest_s;
reg [22:0] fracRPostExc_uid28_fxpToFPTest_q;
wire [0:0] udfOrInZero_uid29_fxpToFPTest_q;
wire [1:0] excSelector_uid30_fxpToFPTest_q;
wire [7:0] expZ_uid33_fxpToFPTest_q;
wire [7:0] expR_uid34_fxpToFPTest_in;
wire [7:0] expR_uid34_fxpToFPTest_b;
wire [1:0] expRPostExc_uid35_fxpToFPTest_s;
reg [7:0] expRPostExc_uid35_fxpToFPTest_q;
wire [31:0] outRes_uid36_fxpToFPTest_q;
wire [31:0] zs_uid38_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_qi;
reg [0:0] vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_s;
reg [31:0] vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [15:0] zs_uid43_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [31:0] cStage_uid48_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_s;
reg [31:0] vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [31:0] cStage_uid55_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_s;
reg [31:0] vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [3:0] zs_uid57_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [31:0] cStage_uid62_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_s;
reg [31:0] vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [1:0] zs_uid64_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [31:0] cStage_uid69_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_s;
reg [31:0] vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vCount_uid73_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [31:0] cStage_uid76_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [0:0] vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest_s;
reg [31:0] vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [5:0] vCount_uid78_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [7:0] vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_a;
wire [7:0] vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_b;
logic [7:0] vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_o;
wire [0:0] vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_c;
wire [0:0] vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_s;
reg [5:0] vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_q;
wire [1:0] l_uid13_fxpToFPTest_merged_bit_select_in;
wire [0:0] l_uid13_fxpToFPTest_merged_bit_select_b;
wire [0:0] l_uid13_fxpToFPTest_merged_bit_select_c;
wire [15:0] rVStage_uid44_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b;
wire [15:0] rVStage_uid44_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c;
wire [7:0] rVStage_uid51_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b;
wire [23:0] rVStage_uid51_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c;
wire [3:0] rVStage_uid58_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b;
wire [27:0] rVStage_uid58_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c;
wire [1:0] rVStage_uid65_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b;
wire [29:0] rVStage_uid65_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c;
wire [0:0] rVStage_uid72_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b;
wire [30:0] rVStage_uid72_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c;
wire [30:0] fracRnd_uid11_fxpToFPTest_merged_bit_select_in;
wire [23:0] fracRnd_uid11_fxpToFPTest_merged_bit_select_b;
wire [6:0] fracRnd_uid11_fxpToFPTest_merged_bit_select_c;
reg [23:0] redist0_fracRnd_uid11_fxpToFPTest_merged_bit_select_b_1_q;
reg [0:0] redist1_vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest_q_1_q;
reg [0:0] redist2_vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest_q_1_q;
reg [0:0] redist3_vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest_q_2_q;
reg [0:0] redist4_vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest_q_3_q;
reg [0:0] redist5_vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_q_4_q;
reg [9:0] redist6_expR_uid22_fxpToFPTest_b_1_q;
reg [22:0] redist7_fracR_uid21_fxpToFPTest_b_1_q;
reg [32:0] redist8_expFracRnd_uid12_fxpToFPTest_q_1_q;
reg [0:0] redist9_inIsZero_uid8_fxpToFPTest_q_2_q;
reg [31:0] redist10_xIn_a_1_q;
// GND(CONSTANT,0)
assign GND_q = 1'b0;
// expInf_uid24_fxpToFPTest(CONSTANT,23)
assign expInf_uid24_fxpToFPTest_q = 8'b11111111;
// expZ_uid33_fxpToFPTest(CONSTANT,32)
assign expZ_uid33_fxpToFPTest_q = 8'b00000000;
// rVStage_uid72_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select(BITSELECT,89)@4
assign rVStage_uid72_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b = vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_q[31:31];
assign rVStage_uid72_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c = vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_q[30:0];
// cStage_uid76_lzcShifterZ1_uid6_fxpToFPTest(BITJOIN,75)@4
assign cStage_uid76_lzcShifterZ1_uid6_fxpToFPTest_q = {rVStage_uid72_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c, GND_q};
// rVStage_uid65_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select(BITSELECT,88)@3
assign rVStage_uid65_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b = vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_q[31:30];
assign rVStage_uid65_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c = vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_q[29:0];
// zs_uid64_lzcShifterZ1_uid6_fxpToFPTest(CONSTANT,63)
assign zs_uid64_lzcShifterZ1_uid6_fxpToFPTest_q = 2'b00;
// cStage_uid69_lzcShifterZ1_uid6_fxpToFPTest(BITJOIN,68)@3
assign cStage_uid69_lzcShifterZ1_uid6_fxpToFPTest_q = {rVStage_uid65_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c, zs_uid64_lzcShifterZ1_uid6_fxpToFPTest_q};
// rVStage_uid58_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select(BITSELECT,87)@3
assign rVStage_uid58_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b = vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_q[31:28];
assign rVStage_uid58_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c = vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_q[27:0];
// zs_uid57_lzcShifterZ1_uid6_fxpToFPTest(CONSTANT,56)
assign zs_uid57_lzcShifterZ1_uid6_fxpToFPTest_q = 4'b0000;
// cStage_uid62_lzcShifterZ1_uid6_fxpToFPTest(BITJOIN,61)@3
assign cStage_uid62_lzcShifterZ1_uid6_fxpToFPTest_q = {rVStage_uid58_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c, zs_uid57_lzcShifterZ1_uid6_fxpToFPTest_q};
// rVStage_uid51_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select(BITSELECT,86)@2
assign rVStage_uid51_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b = vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_q[31:24];
assign rVStage_uid51_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c = vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_q[23:0];
// cStage_uid55_lzcShifterZ1_uid6_fxpToFPTest(BITJOIN,54)@2
assign cStage_uid55_lzcShifterZ1_uid6_fxpToFPTest_q = {rVStage_uid51_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c, expZ_uid33_fxpToFPTest_q};
// rVStage_uid44_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select(BITSELECT,85)@1
assign rVStage_uid44_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b = vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_q[31:16];
assign rVStage_uid44_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c = vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_q[15:0];
// zs_uid43_lzcShifterZ1_uid6_fxpToFPTest(CONSTANT,42)
assign zs_uid43_lzcShifterZ1_uid6_fxpToFPTest_q = 16'b0000000000000000;
// cStage_uid48_lzcShifterZ1_uid6_fxpToFPTest(BITJOIN,47)@1
assign cStage_uid48_lzcShifterZ1_uid6_fxpToFPTest_q = {rVStage_uid44_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_c, zs_uid43_lzcShifterZ1_uid6_fxpToFPTest_q};
// zs_uid38_lzcShifterZ1_uid6_fxpToFPTest(CONSTANT,37)
assign zs_uid38_lzcShifterZ1_uid6_fxpToFPTest_q = 32'b00000000000000000000000000000000;
// redist10_xIn_a_1(DELAY,101)
dspba_delay_ver #( .width(32), .depth(1), .reset_kind("ASYNC") )
redist10_xIn_a_1 ( .xin(a), .xout(redist10_xIn_a_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest(LOGICAL,39)@0 + 1
assign vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_qi = a == zs_uid38_lzcShifterZ1_uid6_fxpToFPTest_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_delay ( .xin(vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_qi), .xout(vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest(MUX,41)@1
assign vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_s = vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_q;
always @(vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_s or en or redist10_xIn_a_1_q or zs_uid38_lzcShifterZ1_uid6_fxpToFPTest_q)
begin
unique case (vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_s)
1'b0 : vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_q = redist10_xIn_a_1_q;
1'b1 : vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_q = zs_uid38_lzcShifterZ1_uid6_fxpToFPTest_q;
default : vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_q = 32'b0;
endcase
end
// vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest(LOGICAL,44)@1
assign vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest_q = rVStage_uid44_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b == zs_uid43_lzcShifterZ1_uid6_fxpToFPTest_q ? 1'b1 : 1'b0;
// vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest(MUX,48)@1 + 1
assign vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_s = vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest_q;
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_q <= 32'b0;
end
else if (en == 1'b1)
begin
unique case (vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_s)
1'b0 : vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_q <= vStagei_uid42_lzcShifterZ1_uid6_fxpToFPTest_q;
1'b1 : vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_q <= cStage_uid48_lzcShifterZ1_uid6_fxpToFPTest_q;
default : vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_q <= 32'b0;
endcase
end
end
// vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest(LOGICAL,51)@2
assign vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest_q = rVStage_uid51_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b == expZ_uid33_fxpToFPTest_q ? 1'b1 : 1'b0;
// vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest(MUX,55)@2 + 1
assign vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_s = vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest_q;
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_q <= 32'b0;
end
else if (en == 1'b1)
begin
unique case (vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_s)
1'b0 : vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_q <= vStagei_uid49_lzcShifterZ1_uid6_fxpToFPTest_q;
1'b1 : vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_q <= cStage_uid55_lzcShifterZ1_uid6_fxpToFPTest_q;
default : vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_q <= 32'b0;
endcase
end
end
// vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest(LOGICAL,58)@3
assign vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest_q = rVStage_uid58_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b == zs_uid57_lzcShifterZ1_uid6_fxpToFPTest_q ? 1'b1 : 1'b0;
// vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest(MUX,62)@3
assign vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_s = vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest_q;
always @(vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_s or en or vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_q or cStage_uid62_lzcShifterZ1_uid6_fxpToFPTest_q)
begin
unique case (vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_s)
1'b0 : vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_q = vStagei_uid56_lzcShifterZ1_uid6_fxpToFPTest_q;
1'b1 : vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_q = cStage_uid62_lzcShifterZ1_uid6_fxpToFPTest_q;
default : vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_q = 32'b0;
endcase
end
// vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest(LOGICAL,65)@3
assign vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest_q = rVStage_uid65_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b == zs_uid64_lzcShifterZ1_uid6_fxpToFPTest_q ? 1'b1 : 1'b0;
// vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest(MUX,69)@3 + 1
assign vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_s = vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest_q;
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_q <= 32'b0;
end
else if (en == 1'b1)
begin
unique case (vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_s)
1'b0 : vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_q <= vStagei_uid63_lzcShifterZ1_uid6_fxpToFPTest_q;
1'b1 : vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_q <= cStage_uid69_lzcShifterZ1_uid6_fxpToFPTest_q;
default : vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_q <= 32'b0;
endcase
end
end
// vCount_uid73_lzcShifterZ1_uid6_fxpToFPTest(LOGICAL,72)@4
assign vCount_uid73_lzcShifterZ1_uid6_fxpToFPTest_q = rVStage_uid72_lzcShifterZ1_uid6_fxpToFPTest_merged_bit_select_b == GND_q ? 1'b1 : 1'b0;
// vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest(MUX,76)@4
assign vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest_s = vCount_uid73_lzcShifterZ1_uid6_fxpToFPTest_q;
always @(vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest_s or en or vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_q or cStage_uid76_lzcShifterZ1_uid6_fxpToFPTest_q)
begin
unique case (vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest_s)
1'b0 : vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest_q = vStagei_uid70_lzcShifterZ1_uid6_fxpToFPTest_q;
1'b1 : vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest_q = cStage_uid76_lzcShifterZ1_uid6_fxpToFPTest_q;
default : vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest_q = 32'b0;
endcase
end
// fracRnd_uid11_fxpToFPTest_merged_bit_select(BITSELECT,90)@4
assign fracRnd_uid11_fxpToFPTest_merged_bit_select_in = vStagei_uid77_lzcShifterZ1_uid6_fxpToFPTest_q[30:0];
assign fracRnd_uid11_fxpToFPTest_merged_bit_select_b = fracRnd_uid11_fxpToFPTest_merged_bit_select_in[30:7];
assign fracRnd_uid11_fxpToFPTest_merged_bit_select_c = fracRnd_uid11_fxpToFPTest_merged_bit_select_in[6:0];
// sticky_uid16_fxpToFPTest(LOGICAL,15)@4 + 1
assign sticky_uid16_fxpToFPTest_qi = fracRnd_uid11_fxpToFPTest_merged_bit_select_c != 7'b0000000 ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
sticky_uid16_fxpToFPTest_delay ( .xin(sticky_uid16_fxpToFPTest_qi), .xout(sticky_uid16_fxpToFPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// nr_uid17_fxpToFPTest(LOGICAL,16)@5
assign nr_uid17_fxpToFPTest_q = ~ (l_uid13_fxpToFPTest_merged_bit_select_c);
// maxCount_uid7_fxpToFPTest(CONSTANT,6)
assign maxCount_uid7_fxpToFPTest_q = 6'b100000;
// redist5_vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_q_4(DELAY,96)
dspba_delay_ver #( .width(1), .depth(3), .reset_kind("ASYNC") )
redist5_vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_q_4 ( .xin(vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_q), .xout(redist5_vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_q_4_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist4_vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest_q_3(DELAY,95)
dspba_delay_ver #( .width(1), .depth(3), .reset_kind("ASYNC") )
redist4_vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest_q_3 ( .xin(vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest_q), .xout(redist4_vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest_q_3_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist3_vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest_q_2(DELAY,94)
dspba_delay_ver #( .width(1), .depth(2), .reset_kind("ASYNC") )
redist3_vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest_q_2 ( .xin(vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest_q), .xout(redist3_vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest_q_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist2_vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest_q_1(DELAY,93)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist2_vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest_q_1 ( .xin(vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest_q), .xout(redist2_vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest_q_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist1_vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest_q_1(DELAY,92)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist1_vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest_q_1 ( .xin(vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest_q), .xout(redist1_vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest_q_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// vCount_uid78_lzcShifterZ1_uid6_fxpToFPTest(BITJOIN,77)@4
assign vCount_uid78_lzcShifterZ1_uid6_fxpToFPTest_q = {redist5_vCount_uid40_lzcShifterZ1_uid6_fxpToFPTest_q_4_q, redist4_vCount_uid45_lzcShifterZ1_uid6_fxpToFPTest_q_3_q, redist3_vCount_uid52_lzcShifterZ1_uid6_fxpToFPTest_q_2_q, redist2_vCount_uid59_lzcShifterZ1_uid6_fxpToFPTest_q_1_q, redist1_vCount_uid66_lzcShifterZ1_uid6_fxpToFPTest_q_1_q, vCount_uid73_lzcShifterZ1_uid6_fxpToFPTest_q};
// vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest(COMPARE,79)@4
assign vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_a = {2'b00, maxCount_uid7_fxpToFPTest_q};
assign vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_b = {2'b00, vCount_uid78_lzcShifterZ1_uid6_fxpToFPTest_q};
assign vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_o = $unsigned(vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_a) - $unsigned(vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_b);
assign vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_c[0] = vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_o[7];
// vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest(MUX,81)@4 + 1
assign vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_s = vCountBig_uid80_lzcShifterZ1_uid6_fxpToFPTest_c;
always @ (posedge clk or posedge areset)
begin
if (areset)
begin
vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_q <= 6'b0;
end
else if (en == 1'b1)
begin
unique case (vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_s)
1'b0 : vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_q <= vCount_uid78_lzcShifterZ1_uid6_fxpToFPTest_q;
1'b1 : vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_q <= maxCount_uid7_fxpToFPTest_q;
default : vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_q <= 6'b0;
endcase
end
end
// msbIn_uid9_fxpToFPTest(CONSTANT,8)
assign msbIn_uid9_fxpToFPTest_q = 8'b10011110;
// expPreRnd_uid10_fxpToFPTest(SUB,9)@5
assign expPreRnd_uid10_fxpToFPTest_a = {1'b0, msbIn_uid9_fxpToFPTest_q};
assign expPreRnd_uid10_fxpToFPTest_b = {3'b000, vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_q};
assign expPreRnd_uid10_fxpToFPTest_o = $unsigned(expPreRnd_uid10_fxpToFPTest_a) - $unsigned(expPreRnd_uid10_fxpToFPTest_b);
assign expPreRnd_uid10_fxpToFPTest_q = expPreRnd_uid10_fxpToFPTest_o[8:0];
// redist0_fracRnd_uid11_fxpToFPTest_merged_bit_select_b_1(DELAY,91)
dspba_delay_ver #( .width(24), .depth(1), .reset_kind("ASYNC") )
redist0_fracRnd_uid11_fxpToFPTest_merged_bit_select_b_1 ( .xin(fracRnd_uid11_fxpToFPTest_merged_bit_select_b), .xout(redist0_fracRnd_uid11_fxpToFPTest_merged_bit_select_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// expFracRnd_uid12_fxpToFPTest(BITJOIN,11)@5
assign expFracRnd_uid12_fxpToFPTest_q = {expPreRnd_uid10_fxpToFPTest_q, redist0_fracRnd_uid11_fxpToFPTest_merged_bit_select_b_1_q};
// l_uid13_fxpToFPTest_merged_bit_select(BITSELECT,84)@5
assign l_uid13_fxpToFPTest_merged_bit_select_in = expFracRnd_uid12_fxpToFPTest_q[1:0];
assign l_uid13_fxpToFPTest_merged_bit_select_b = l_uid13_fxpToFPTest_merged_bit_select_in[1:1];
assign l_uid13_fxpToFPTest_merged_bit_select_c = l_uid13_fxpToFPTest_merged_bit_select_in[0:0];
// rnd_uid18_fxpToFPTest(LOGICAL,17)@5 + 1
assign rnd_uid18_fxpToFPTest_qi = l_uid13_fxpToFPTest_merged_bit_select_b | nr_uid17_fxpToFPTest_q | sticky_uid16_fxpToFPTest_q;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
rnd_uid18_fxpToFPTest_delay ( .xin(rnd_uid18_fxpToFPTest_qi), .xout(rnd_uid18_fxpToFPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist8_expFracRnd_uid12_fxpToFPTest_q_1(DELAY,99)
dspba_delay_ver #( .width(33), .depth(1), .reset_kind("ASYNC") )
redist8_expFracRnd_uid12_fxpToFPTest_q_1 ( .xin(expFracRnd_uid12_fxpToFPTest_q), .xout(redist8_expFracRnd_uid12_fxpToFPTest_q_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// expFracR_uid20_fxpToFPTest(ADD,19)@6
assign expFracR_uid20_fxpToFPTest_a = {{2{redist8_expFracRnd_uid12_fxpToFPTest_q_1_q[32]}}, redist8_expFracRnd_uid12_fxpToFPTest_q_1_q};
assign expFracR_uid20_fxpToFPTest_b = {34'b0000000000000000000000000000000000, rnd_uid18_fxpToFPTest_q};
assign expFracR_uid20_fxpToFPTest_o = $signed(expFracR_uid20_fxpToFPTest_a) + $signed(expFracR_uid20_fxpToFPTest_b);
assign expFracR_uid20_fxpToFPTest_q = expFracR_uid20_fxpToFPTest_o[33:0];
// expR_uid22_fxpToFPTest(BITSELECT,21)@6
assign expR_uid22_fxpToFPTest_b = expFracR_uid20_fxpToFPTest_q[33:24];
// redist6_expR_uid22_fxpToFPTest_b_1(DELAY,97)
dspba_delay_ver #( .width(10), .depth(1), .reset_kind("ASYNC") )
redist6_expR_uid22_fxpToFPTest_b_1 ( .xin(expR_uid22_fxpToFPTest_b), .xout(redist6_expR_uid22_fxpToFPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// expR_uid34_fxpToFPTest(BITSELECT,33)@7
assign expR_uid34_fxpToFPTest_in = redist6_expR_uid22_fxpToFPTest_b_1_q[7:0];
assign expR_uid34_fxpToFPTest_b = expR_uid34_fxpToFPTest_in[7:0];
// ovf_uid25_fxpToFPTest(COMPARE,24)@7
assign ovf_uid25_fxpToFPTest_a = {{2{redist6_expR_uid22_fxpToFPTest_b_1_q[9]}}, redist6_expR_uid22_fxpToFPTest_b_1_q};
assign ovf_uid25_fxpToFPTest_b = {4'b0000, expInf_uid24_fxpToFPTest_q};
assign ovf_uid25_fxpToFPTest_o = $signed(ovf_uid25_fxpToFPTest_a) - $signed(ovf_uid25_fxpToFPTest_b);
assign ovf_uid25_fxpToFPTest_n[0] = ~ (ovf_uid25_fxpToFPTest_o[11]);
// inIsZero_uid8_fxpToFPTest(LOGICAL,7)@5 + 1
assign inIsZero_uid8_fxpToFPTest_qi = vCountFinal_uid82_lzcShifterZ1_uid6_fxpToFPTest_q == maxCount_uid7_fxpToFPTest_q ? 1'b1 : 1'b0;
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
inIsZero_uid8_fxpToFPTest_delay ( .xin(inIsZero_uid8_fxpToFPTest_qi), .xout(inIsZero_uid8_fxpToFPTest_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// redist9_inIsZero_uid8_fxpToFPTest_q_2(DELAY,100)
dspba_delay_ver #( .width(1), .depth(1), .reset_kind("ASYNC") )
redist9_inIsZero_uid8_fxpToFPTest_q_2 ( .xin(inIsZero_uid8_fxpToFPTest_q), .xout(redist9_inIsZero_uid8_fxpToFPTest_q_2_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// udf_uid23_fxpToFPTest(COMPARE,22)@7
assign udf_uid23_fxpToFPTest_a = {11'b00000000000, GND_q};
assign udf_uid23_fxpToFPTest_b = {{2{redist6_expR_uid22_fxpToFPTest_b_1_q[9]}}, redist6_expR_uid22_fxpToFPTest_b_1_q};
assign udf_uid23_fxpToFPTest_o = $signed(udf_uid23_fxpToFPTest_a) - $signed(udf_uid23_fxpToFPTest_b);
assign udf_uid23_fxpToFPTest_n[0] = ~ (udf_uid23_fxpToFPTest_o[11]);
// udfOrInZero_uid29_fxpToFPTest(LOGICAL,28)@7
assign udfOrInZero_uid29_fxpToFPTest_q = udf_uid23_fxpToFPTest_n | redist9_inIsZero_uid8_fxpToFPTest_q_2_q;
// excSelector_uid30_fxpToFPTest(BITJOIN,29)@7
assign excSelector_uid30_fxpToFPTest_q = {ovf_uid25_fxpToFPTest_n, udfOrInZero_uid29_fxpToFPTest_q};
// expRPostExc_uid35_fxpToFPTest(MUX,34)@7
assign expRPostExc_uid35_fxpToFPTest_s = excSelector_uid30_fxpToFPTest_q;
always @(expRPostExc_uid35_fxpToFPTest_s or en or expR_uid34_fxpToFPTest_b or expZ_uid33_fxpToFPTest_q or expInf_uid24_fxpToFPTest_q)
begin
unique case (expRPostExc_uid35_fxpToFPTest_s)
2'b00 : expRPostExc_uid35_fxpToFPTest_q = expR_uid34_fxpToFPTest_b;
2'b01 : expRPostExc_uid35_fxpToFPTest_q = expZ_uid33_fxpToFPTest_q;
2'b10 : expRPostExc_uid35_fxpToFPTest_q = expInf_uid24_fxpToFPTest_q;
2'b11 : expRPostExc_uid35_fxpToFPTest_q = expInf_uid24_fxpToFPTest_q;
default : expRPostExc_uid35_fxpToFPTest_q = 8'b0;
endcase
end
// fracZ_uid27_fxpToFPTest(CONSTANT,26)
assign fracZ_uid27_fxpToFPTest_q = 23'b00000000000000000000000;
// fracR_uid21_fxpToFPTest(BITSELECT,20)@6
assign fracR_uid21_fxpToFPTest_in = expFracR_uid20_fxpToFPTest_q[23:0];
assign fracR_uid21_fxpToFPTest_b = fracR_uid21_fxpToFPTest_in[23:1];
// redist7_fracR_uid21_fxpToFPTest_b_1(DELAY,98)
dspba_delay_ver #( .width(23), .depth(1), .reset_kind("ASYNC") )
redist7_fracR_uid21_fxpToFPTest_b_1 ( .xin(fracR_uid21_fxpToFPTest_b), .xout(redist7_fracR_uid21_fxpToFPTest_b_1_q), .ena(en[0]), .clk(clk), .aclr(areset) );
// excSelector_uid26_fxpToFPTest(LOGICAL,25)@7
assign excSelector_uid26_fxpToFPTest_q = redist9_inIsZero_uid8_fxpToFPTest_q_2_q | ovf_uid25_fxpToFPTest_n | udf_uid23_fxpToFPTest_n;
// fracRPostExc_uid28_fxpToFPTest(MUX,27)@7
assign fracRPostExc_uid28_fxpToFPTest_s = excSelector_uid26_fxpToFPTest_q;
always @(fracRPostExc_uid28_fxpToFPTest_s or en or redist7_fracR_uid21_fxpToFPTest_b_1_q or fracZ_uid27_fxpToFPTest_q)
begin
unique case (fracRPostExc_uid28_fxpToFPTest_s)
1'b0 : fracRPostExc_uid28_fxpToFPTest_q = redist7_fracR_uid21_fxpToFPTest_b_1_q;
1'b1 : fracRPostExc_uid28_fxpToFPTest_q = fracZ_uid27_fxpToFPTest_q;
default : fracRPostExc_uid28_fxpToFPTest_q = 23'b0;
endcase
end
// outRes_uid36_fxpToFPTest(BITJOIN,35)@7
assign outRes_uid36_fxpToFPTest_q = {GND_q, expRPostExc_uid35_fxpToFPTest_q, fracRPostExc_uid28_fxpToFPTest_q};
// xOut(GPOUT,4)@7
assign q = outRes_uid36_fxpToFPTest_q;
endmodule

View File

@@ -0,0 +1,95 @@
// Legal Notice: Copyright 2017 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing device programming or simulation files), and
// any associated documentation or information are expressly subject to the
// terms and conditions of the Intel FPGA Software License Agreement,
// Intel MegaCore Function License Agreement, or other applicable license
// agreement, including, without limitation, that your use is for the sole
// purpose of programming logic devices manufactured by Intel and sold by
// Intel or its authorized distributors. Please refer to the applicable
// agreement for further details.
module dspba_delay_ver
#(
parameter width = 8,
parameter depth = 1,
parameter reset_high = 1'b1,
parameter reset_kind = "ASYNC"
) (
input clk,
input aclr,
input ena,
input [width-1:0] xin,
output [width-1:0] xout
);
wire reset;
reg [width-1:0] delays [depth-1:0];
assign reset = aclr ^ reset_high;
generate
if (depth > 0)
begin
genvar i;
for (i = 0; i < depth; ++i)
begin : delay_block
if (reset_kind == "ASYNC")
begin : sync_reset
always @ (posedge clk or negedge reset)
begin: a
if (!reset) begin
delays[i] <= 0;
end else begin
if (ena) begin
if (i > 0) begin
delays[i] <= delays[i - 1];
end else begin
delays[i] <= xin;
end
end
end
end
end
if (reset_kind == "SYNC")
begin : async_reset
always @ (posedge clk)
begin: a
if (!reset) begin
delays[i] <= 0;
end else begin
if (ena) begin
if (i > 0) begin
delays[i] <= delays[i - 1];
end else begin
delays[i] <= xin;
end
end
end
end
end
if (reset_kind == "NONE")
begin : no_reset
always @ (posedge clk)
begin: a
if (ena) begin
if (i > 0) begin
delays[i] <= delays[i - 1];
end else begin
delays[i] <= xin;
end
end
end
end
end
assign xout = delays[depth - 1];
end else begin
assign xout = xin;
end
endgenerate
endmodule

1
hw/rtl/fp_cores/fpnew Submodule

Submodule hw/rtl/fp_cores/fpnew added at 1def7bb630

View File

@@ -0,0 +1,230 @@
#include <stdio.h>
#include <math.h>
#include <unordered_map>
#include <vector>
#include <mutex>
#include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h"
extern "C" {
void dpi_fadd(int inst, bool enable, int a, int b, int* result);
void dpi_fsub(int inst, bool enable, int a, int b, int* result);
void dpi_fmul(int inst, bool enable, int a, int b, int* result);
void dpi_fmadd(int inst, bool enable, int a, int b, int c, int* result);
void dpi_fmsub(int inst, bool enable, int a, int b, int c, int* result);
void dpi_fdiv(int inst, bool enable, int a, int b, int* result);
void dpi_fsqrt(int inst, bool enable, int a, int* result);
void dpi_ftoi(int inst, bool enable, int a, int* result);
void dpi_ftou(int inst, bool enable, int a, int* result);
void dpi_itof(int inst, bool enable, int a, int* result);
void dpi_utof(int inst, bool enable, int a, int* result);
}
class ShiftRegister {
public:
ShiftRegister() : init_(false), depth_(0) {}
void ensure_init(int depth) {
if (!init_) {
buffer_.resize(depth);
init_ = true;
depth_ = depth;
}
}
void push(int value, bool enable) {
if (!enable)
return;
for (unsigned i = 0; i < depth_-1; ++i) {
buffer_[i] = buffer_[i+1];
}
buffer_[depth_-1] = value;
}
int top() const {
return buffer_[0];
}
private:
std::vector<int> buffer_;
bool init_;
unsigned depth_;
};
union Float_t {
float f;
int i;
struct {
uint32_t man : 23;
uint32_t exp : 8;
uint32_t sign : 1;
} parts;
};
class Instances {
public:
ShiftRegister& get(int inst) {
mutex_.lock();
ShiftRegister& sr = instances_[inst];
mutex_.unlock();
return sr;
}
private:
std::unordered_map<int, ShiftRegister> instances_;
std::mutex mutex_;
};
Instances instances;
void dpi_fadd(int inst, bool enable, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f + fb.f;
sr.ensure_init(LATENCY_FADDMUL);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fsub(int inst, bool enable, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f - fb.f;
sr.ensure_init(LATENCY_FADDMUL);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fmul(int inst, bool enable, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f * fb.f;
sr.ensure_init(LATENCY_FADDMUL);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fmadd(int inst, bool enable, int a, int b, int c, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = fa.f * fb.f + fc.f;
sr.ensure_init(LATENCY_FMADD);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fmsub(int inst, bool enable, int a, int b, int c, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = fa.f * fb.f - fc.f;
sr.ensure_init(LATENCY_FMADD);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fdiv(int inst, bool enable, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f / fb.f;
sr.ensure_init(LATENCY_FDIV);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fsqrt(int inst, bool enable, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fr;
fa.i = a;
fr.f = sqrtf(fa.f);
sr.ensure_init(LATENCY_FSQRT);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_ftoi(int inst, bool enable, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fr;
fa.i = a;
fr.i = int(fa.f);
sr.ensure_init(LATENCY_FTOI);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_ftou(int inst, bool enable, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fr;
fa.i = a;
fr.i = unsigned(fa.f);
sr.ensure_init(LATENCY_FTOI);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_itof(int inst, bool enable, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fr;
fr.f = (float)a;
sr.ensure_init(LATENCY_ITOF);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_utof(int inst, bool enable, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fr;
unsigned ua = a;
fr.f = (float)ua;
sr.ensure_init(LATENCY_ITOF);
sr.push(fr.i, enable);
*result = sr.top();
}

View File

@@ -0,0 +1,16 @@
`ifndef FLOAT_DPI
`define FLOAT_DPI
import "DPI-C" context function void dpi_fadd(int inst, input logic enable, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsub(int inst, input logic enable, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fmul(int inst, input logic enable, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fmadd(int inst, input logic enable, input int a, input int b, input int c, output int result);
import "DPI-C" context function void dpi_fmsub(int inst, input logic enable, input int a, input int b, input int c, output int result);
import "DPI-C" context function void dpi_fdiv(int inst, input logic enable, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsqrt(int inst, input logic enable, input int a, output int result);
import "DPI-C" context function void dpi_ftoi(int inst, input logic enable, input int a, output int result);
import "DPI-C" context function void dpi_ftou(int inst, input logic enable, input int a, output int result);
import "DPI-C" context function void dpi_itof(int inst, input logic enable, input int a, output int result);
import "DPI-C" context function void dpi_utof(int inst, input logic enable, input int a, output int result);
`endif

View File

@@ -0,0 +1,29 @@
`ifndef VX_ALU_REQ_IF
`define VX_ALU_REQ_IF
`include "VX_define.vh"
interface VX_alu_req_if ();
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [31:0] next_PC;
wire [`ALU_BR_BITS-1:0] op_type;
wire is_br_op;
wire rs1_is_PC;
wire rs2_is_imm;
wire [31:0] imm;
wire [`NT_BITS-1:0] tid;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NR_BITS-1:0] rd;
wire wb;
wire ready;
endinterface
`endif

View File

@@ -1,40 +0,0 @@
`ifndef VX_FrE_to_BCKBE_REQ_IF
`define VX_FrE_to_BCKBE_REQ_IF
`include "VX_define.vh"
interface VX_backend_req_if ();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
wire [31:0] curr_PC;
wire [11:0] csr_addr;
wire is_csr;
wire csr_immed;
wire [31:0] csr_mask;
wire [4:0] rd;
wire [4:0] rs1;
wire [4:0] rs2;
wire [4:0] alu_op;
wire [1:0] wb;
wire rs2_src;
wire [31:0] itype_immed;
wire [`BYTE_EN_BITS-1:0] mem_read;
wire [`BYTE_EN_BITS-1:0] mem_write;
wire [2:0] branch_type;
wire [19:0] upper_immed;
wire is_etype;
wire is_jal;
wire jal;
wire [31:0] jal_offset;
wire [31:0] next_PC;
// GPGPU stuff
wire is_wspawn;
wire is_tmc;
wire is_split;
wire is_barrier;
endinterface
`endif

View File

@@ -0,0 +1,15 @@
`ifndef VX_BRANCH_RSP_IF
`define VX_BRANCH_RSP_IF
`include "VX_define.vh"
interface VX_branch_ctl_if ();
wire valid;
wire [`NW_BITS-1:0] wid;
wire taken;
wire [31:0] dest;
endinterface
`endif

Some files were not shown because too many files have changed in this diff Show More