diff --git a/rtl/VX_alu.v b/rtl/VX_alu.v index 3b308297..9688aad2 100644 --- a/rtl/VX_alu.v +++ b/rtl/VX_alu.v @@ -1,4 +1,3 @@ - `include "VX_define.v" module VX_alu( @@ -13,6 +12,71 @@ module VX_alu( ); + `ifdef SYN_FUNC + wire which_in2; + + wire[31:0] ALU_in1; + wire[31:0] ALU_in2; + wire[63:0] ALU_in1_mult; + wire[63:0] ALU_in2_mult; + wire[31:0] upper_immed; + wire[31:0] div_result; + wire[31:0] rem_result; + + + assign which_in2 = in_rs2_src == `RS2_IMMED; + + assign ALU_in1 = in_1; + + assign ALU_in2 = which_in2 ? in_itype_immed : in_2; + + + assign upper_immed = {in_upper_immed, {12{1'b0}}}; + + + + //always @(posedge `MUL) begin + + + /* verilator lint_off UNUSED */ + + + wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1}; + wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2}; + assign ALU_in1_mult = (in_alu_op == `MULHU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in1} : alu_in1_signed; + assign ALU_in2_mult = (in_alu_op == `MULHU || in_alu_op == `MULHSU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in2} : alu_in2_signed; + wire[63:0] mult_result = ALU_in1_mult * ALU_in2_mult; + + /* verilator lint_on UNUSED */ + + always @(in_alu_op or ALU_in1 or ALU_in2) begin + case(in_alu_op) + `ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2); + `SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2); + `SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0]; + `SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0; + `SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0; + `XOR: out_alu_result = ALU_in1 ^ ALU_in2; + `SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0]; + `SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0]; + `OR: out_alu_result = ALU_in1 | ALU_in2; + `AND: out_alu_result = ALU_in2 & ALU_in1; + `SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; + `LUI_ALU: out_alu_result = upper_immed; + `AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed); + `MUL: out_alu_result = mult_result[31:0]; + `MULH: out_alu_result = mult_result[63:32]; + `MULHSU: out_alu_result = mult_result[63:32]; + `MULHU: out_alu_result = mult_result[63:32]; + `DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2)); + `DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2; + `REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2)); + `REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2; + default: out_alu_result = 32'h0; + endcase // in_alu_op + end + + `else wire which_in2; wire[31:0] ALU_in1; @@ -69,7 +133,7 @@ module VX_alu( `REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2; default: out_alu_result = 32'h0; endcase // in_alu_op - end - + end + `endif endmodule // VX_alu \ No newline at end of file diff --git a/rtl/VX_define.v b/rtl/VX_define.v index a577bf38..b40567f8 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -1,11 +1,10 @@ +`include "./VX_define_synth.v" + + -`define NT 4 `define NT_M1 (`NT-1) // NW_M1 is actually log2(NW) -//`define NW_M1 (4-1) - -`define NW 8 `define NW_M1 (`CLOG2(`NW)) // Uncomment the below line if NW=1 @@ -13,6 +12,7 @@ // `define SYN 1 // `define ASIC 1 +// `define SYN_FUNC 1 `define NUM_BARRIERS 4 diff --git a/rtl/VX_define_synth.v b/rtl/VX_define_synth.v new file mode 100644 index 00000000..0136b895 --- /dev/null +++ b/rtl/VX_define_synth.v @@ -0,0 +1,2 @@ +`define NT 32 +`define NW 8 diff --git a/rtl/VX_gpr.v b/rtl/VX_gpr.v index 890e3041..6f239c51 100644 --- a/rtl/VX_gpr.v +++ b/rtl/VX_gpr.v @@ -85,83 +85,87 @@ module VX_gpr ( wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0; - /* verilator lint_off PINCONNECTEMPTY */ - rf2_32x128_wm1 first_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(temp_a), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_1), - .AA(VX_gpr_read.rs1), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask), - .AB(VX_writeback_inter.rd), - .DB(to_write), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - /* verilator lint_on PINCONNECTEMPTY */ + genvar curr_base_thread; + for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4) + begin + /* verilator lint_off PINCONNECTEMPTY */ + rf2_32x128_wm1 first_ram ( + .CENYA(), + .AYA(), + .CENYB(), + .WENYB(), + .AYB(), + .QA(temp_a[(curr_base_thread+3):(curr_base_thread)]), + .SOA(), + .SOB(), + .CLKA(clk), + .CENA(cena_1), + .AA(VX_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]), + .CLKB(clk), + .CENB(cenb), + .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), + .AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]), + .DB(to_write[(curr_base_thread+3):(curr_base_thread)]), + .EMAA(3'b011), + .EMASA(1'b0), + .EMAB(3'b011), + .TENA(1'b1), + .TCENA(1'b0), + .TAA(5'b0), + .TENB(1'b1), + .TCENB(1'b0), + .TWENB(128'b0), + .TAB(5'b0), + .TDB(128'b0), + .RET1N(1'b1), + .SIA(2'b0), + .SEA(1'b0), + .DFTRAMBYP(1'b0), + .SIB(2'b0), + .SEB(1'b0), + .COLLDISN(1'b1) + ); + /* verilator lint_on PINCONNECTEMPTY */ - /* verilator lint_off PINCONNECTEMPTY */ - rf2_32x128_wm1 second_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(temp_b), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_2), - .AA(VX_gpr_read.rs2), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask), - .AB(VX_writeback_inter.rd), - .DB(to_write), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - /* verilator lint_on PINCONNECTEMPTY */ + /* verilator lint_off PINCONNECTEMPTY */ + rf2_32x128_wm1 second_ram ( + .CENYA(), + .AYA(), + .CENYB(), + .WENYB(), + .AYB(), + .QA(temp_b[(curr_base_thread+3):(curr_base_thread)]), + .SOA(), + .SOB(), + .CLKA(clk), + .CENA(cena_2), + .AA(VX_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]), + .CLKB(clk), + .CENB(cenb), + .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), + .AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]), + .DB(to_write[(curr_base_thread+3):(curr_base_thread)]), + .EMAA(3'b011), + .EMASA(1'b0), + .EMAB(3'b011), + .TENA(1'b1), + .TCENA(1'b0), + .TAA(5'b0), + .TENB(1'b1), + .TCENB(1'b0), + .TWENB(128'b0), + .TAB(5'b0), + .TDB(128'b0), + .RET1N(1'b1), + .SIA(2'b0), + .SEA(1'b0), + .DFTRAMBYP(1'b0), + .SIB(2'b0), + .SEB(1'b0), + .COLLDISN(1'b1) + ); + /* verilator lint_on PINCONNECTEMPTY */ + end `endif diff --git a/rtl/cache/VX_d_cache.v b/rtl/cache/VX_d_cache.v index fd6c9641..78b407f7 100644 --- a/rtl/cache/VX_d_cache.v +++ b/rtl/cache/VX_d_cache.v @@ -304,9 +304,15 @@ module VX_d_cache // 0; wire[1:0] byte_select = bank_addr[1:0]; + wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START]; + + `ifdef SYN_FUNC + wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = 0; + wire[IND_SIZE_END:IND_SIZE_START] cache_index = 0; + `else wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = bank_addr[ADDR_OFFSET_END:ADDR_OFFSET_START]; wire[IND_SIZE_END:IND_SIZE_START] cache_index = bank_addr[ADDR_IND_END:ADDR_IND_START]; - wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START]; + `endif wire normal_valid_in = valid_per_bank[bank_id]; diff --git a/rtl/modelsim/Makefile b/rtl/modelsim/Makefile index 099b2e69..7a3a4efd 100644 --- a/rtl/modelsim/Makefile +++ b/rtl/modelsim/Makefile @@ -7,6 +7,7 @@ SRC = \ vortex_dpi.cpp \ vortex_tb.v \ ../VX_define.v \ +../VX_define_synth.v \ ../interfaces/VX_branch_response_inter.v \ ../interfaces/VX_csr_req_inter.v \ ../interfaces/VX_csr_wb_inter.v \ diff --git a/syn/Makefile b/syn/Makefile index 1cfe585c..ca3094bc 100644 --- a/syn/Makefile +++ b/syn/Makefile @@ -1,7 +1,33 @@ -all: syn +SCRIPT_DIR=./scripts + +all: dc -syn: - dc_shell-t -f fsyn.tcl 2>&1 | tee vortex_syn.log \ No newline at end of file +#syn: + #dc_shell-t -f esyn.tcl 2>&1 | tee vortex_syn.log + #dc_shell -f esyn.tcl 2>&1 | tee vortex_syn.log + #dc_shell -f $(SCRIPT_DIR)/dc/dc_script.tcl + +dc: + rm -rf rpt + mkdir rpt + dc_shell -f esyn.tcl 2>&1 | tee vortex_syn.log + +clean: + rm -f simv + rm -f *.vcd + rm -f *.key + rm -rf csrc/ + rm -rf *.rpt + rm -rf *.log + rm -rf *.svf + rm -rf *.ddc + rm -rf results_synthesized.v + rm -rf results_synthesized.sdc + rm -rf alib-52/ + rm -rf rpt/ + rm -rf simv.daidir/ + rm -rf encounter* + rm -rf ./synth_out \ No newline at end of file diff --git a/syn/esyn.tcl b/syn/esyn.tcl new file mode 100644 index 00000000..10fa09d9 --- /dev/null +++ b/syn/esyn.tcl @@ -0,0 +1,53 @@ +#set search_path [concat /nethome/dshim8/Desktop/GTCAD-3DPKG-v3/example/tech/cln28hpm/2d_db/ /nethome/dshim8/Desktop/GTCAD-3DPKG-v3/example/tech/cln28hpm/2d_hard_db/ ../rtl/ ../rtl/interfaces ../rtl/pipe_regs ../rtl/shared_memory ../rtl/cache ../models/memory/cln28hpm/2d_hardmacro_db] +set search_path [concat ../rtl/ ../rtl/interfaces ../rtl/pipe_regs ../rtl/shared_memory ../rtl/cache ../models/memory/cln28hpm/2d_hardmacro_db] +set link_library [concat ./NanGate_15nm_OCL.db] +set symbol_library {} +set target_library [concat ./NanGate_15nm_OCL.db] + +set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v VX_cache_data_per_index.v VX_Cache_Bank.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_define_synth.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v VX_cache_bank_valid.v \ + ] +# set verilog_files [ list Vortex.v VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \ +# ] + +set top_level Vortex +analyze -format sverilog $verilog_files +#analyze -format sverilog -error=LINT-66 $verilog_files +elaborate Vortex +link + +set clk_freq 0.4 +set clk_period [expr 1000.0 / $clk_freq / 1.0] +create_clock [get_ports clk] -period $clk_period +set_max_fanout 20 [get_ports clk] +set_ideal_network [get_ports clk] + +set_max_fanout 20 [get_ports reset] +set_false_path -from [get_ports reset] +all_high_fanout -net -threshold 20 + +# set_register_merging Vortex FALSE +# set compile_seqmap_propagate_constants false +# set compile_seqmap_propagate_high_effort false + +check_design +compile_ultra -no_autoungroup +ungroup -all -flatten +uniquify + +define_name_rules verilog -remove_internal_net_bus -remove_port_bus +change_names -rule verilog -hierarchy + +# report_qor +report_area +report_hierarchy +report_cell +report_reference +report_port +report_power + +write -hierarchy -format verilog -output Vortex.netlist.v +remove_ideal_network [get_ports clk] +set_propagated_clock [get_ports clk] +write_sdc -version 1.9 Vortex.sdc +write_file -format ddc -output Vortex.ddc +exit \ No newline at end of file diff --git a/syn/run_mult_synth.sh b/syn/run_mult_synth.sh new file mode 100644 index 00000000..81509aa7 --- /dev/null +++ b/syn/run_mult_synth.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set top_level = Vortex + +source /tools/synopsys/synthesis/j201409/cshrc.syn +set cur_dir = `pwd` +echo $cur_dir + +for number_of_warps in 2 4 8 16 32; do + for number_of_threads in 2 4 8 16 32; do + + echo "Warp Count: $number_of_warps Thread Count: $number_of_threads Launched" + echo "\`define NT $number_of_threads" > ../rtl/VX_define_synth.v + echo "\`define NW $number_of_warps" >> ../rtl/VX_define_synth.v + make dc | tee run.log + sleep 30 + moved_filename="${number_of_warps}_Warps__${number_of_threads}_threads__400MHz.log" + mv ./vortex_syn.log ../../$moved_filename + sleep 30 + + + + + echo "Warp Count: $number_of_warps Thread Count: $number_of_threads Finished" + done +done + + +echo "Done!"