diff --git a/README.md b/README.md index b74c549b..3ee11d3b 100644 --- a/README.md +++ b/README.md @@ -67,19 +67,19 @@ Build LLVM for RiscV $ cd llvm $ mkdir build $ cd build - $ export LLVM_RISCV_PATH=$PWD/../drops_riscv - $ cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DBUILD_SHARED_LIBS=True -DLLVM_USE_SPLIT_DWARF=True -DCMAKE_INSTALL_PREFIX=$LLVM_RISCV_PATH -DLLVM_OPTIMIZED_TABLEGEN=True -DLLVM_BUILD_TESTS=True -DDEFAULT_SYSROOT=$RISC_GNU_TOOLS_PATH/riscv32-unknown-elf -DLLVM_DEFAULT_TARGET_TRIPLE="riscv32-unknown-elf" -DLLVM_TARGETS_TO_BUILD="RISCV" .. + $ cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DBUILD_SHARED_LIBS=True -DLLVM_USE_SPLIT_DWARF=True -DCMAKE_INSTALL_PREFIX=$RISC_GNU_TOOLS_PATH -DLLVM_OPTIMIZED_TABLEGEN=True -DLLVM_BUILD_TESTS=True -DDEFAULT_SYSROOT=$RISC_GNU_TOOLS_PATH/riscv32-unknown-elf -DLLVM_DEFAULT_TARGET_TRIPLE="riscv32-unknown-elf" -DLLVM_TARGETS_TO_BUILD="RISCV" .. $ cmake --build . --target install - $ cp -rf $LLVM_RISCV_PATH $RISC_GNU_TOOLS_PATH Build pocl for RISCV - $ git clone https://github.com/pocl/pocl.git + $ git clone https://github.gatech.edu/casl/pocl.git $ cd pocl $ mkdir build $ cd build $ export POCL_CC_PATH=$PWD/../drops_riscv_cc $ export POCL_RT_PATH=$PWD/../drops_riscv_rt $ cmake -G Ninja -DCMAKE_INSTALL_PREFIX=$POCL_CC_PATH -DCMAKE_BUILD_TYPE=Debug -DWITH_LLVM_CONFIG=$RISC_GNU_TOOLS_PATH/bin/llvm-config -DLLC_HOST_CPU= -DNEWLIB_BSP=ON -DNEWLIB_DEVICE_ADDRESS_BIT=32 -DBUILD_TESTS=OFF -DPOCL_DEBUG_MESSAGES=ON .. + $ cmake --build . --target install + $ rm -rf * $ cmake -G Ninja -DCMAKE_INSTALL_PREFIX=$POCL_RT_PATH -DCMAKE_BUILD_TYPE=Debug -DOCS_AVAILABLE=OFF -DBUILD_SHARED_LIBS=OFF -DNEWLIB_BSP=ON -DNEWLIB_DEVICE_ADDRESS_BIT=32 -DBUILD_TESTS=OFF -DHOST_DEVICE_BUILD_HASH=basic-riscv32-unknown-elf -DCMAKE_TOOLCHAIN_FILE=../RISCV_newlib.cmake -DENABLE_TRACING=OFF -DENABLE_ICD=OFF -DPOCL_DEBUG_MESSAGES=ON .. - $ cmake --build . --target install \ No newline at end of file + $ cmake --build . --target install diff --git a/emulator/enc.cpp b/emulator/enc.cpp index 11bea0fe..24a2b857 100644 --- a/emulator/enc.cpp +++ b/emulator/enc.cpp @@ -24,7 +24,7 @@ ByteDecoder::ByteDecoder(const ArchDef &ad) { static void decodeError(string msg) { cout << "Instruction decoder error: " << msg << '\n'; - exit(1); + std::abort(); } void Encoder::encodeChunk(DataChunk &dest, const TextChunk &src) { @@ -386,7 +386,7 @@ Instruction *WordDecoder::decode(const std::vector &v, Size &idx) { break; defualt: cout << "Unrecognized argument class in word decoder.\n"; - exit(1); + std::abort(); } if (haveRefs && usedImm && refMap.find(idx-n/8) != refMap.end()) { diff --git a/emulator/include/mem.h b/emulator/include/mem.h index f3b072f1..3d1776ac 100644 --- a/emulator/include/mem.h +++ b/emulator/include/mem.h @@ -359,7 +359,7 @@ namespace Harp { char* content = new char[size]; int x = fread(content, 1, size, fp); - if (!x) { std::cout << "COULD NOT READ FILE\n"; exit(1);} + if (!x) { std::cout << "COULD NOT READ FILE\n"; std::abort();} int offset = 0; char* line = content; diff --git a/emulator/include/obj.h b/emulator/include/obj.h index d0ee4357..d39a09bd 100644 --- a/emulator/include/obj.h +++ b/emulator/include/obj.h @@ -40,7 +40,7 @@ namespace Harp { Ref(name, rel), addr(addr) { } virtual void bind(Addr addr, Addr base = 0) { std::cout << "Attempted to bind a SimpleRef.\n"; - exit(1); + std::abort(); } virtual Addr getAddr() const { return this->addr; } Byte *getAddrPtr() { return (Byte*)&addr; } @@ -86,7 +86,7 @@ namespace Harp { // std::cout << "Attempt to bind a " << bits << "-bit " // << (relative?"":"non-") << "relative symbol to an address" // " it cannot reach.\n"; -// exit(1); +// std::abort(); // } // virtual Addr getAddr() const { diff --git a/emulator/instruction.cpp b/emulator/instruction.cpp index e3e0882d..f77c584f 100644 --- a/emulator/instruction.cpp +++ b/emulator/instruction.cpp @@ -284,7 +284,7 @@ void Instruction::executeOn(Warp &c) { break; default: cout << "unsupported MUL/DIV instr\n"; - exit(1); + std::abort(); } } else @@ -351,7 +351,7 @@ void Instruction::executeOn(Warp &c) { break; default: cout << "ERROR: UNSUPPORTED R INST\n"; - exit(1); + std::abort(); } } break; @@ -388,7 +388,7 @@ void Instruction::executeOn(Warp &c) { break; default: cout << "ERROR: UNSUPPORTED L INST\n"; - exit(1); + std::abort(); c.memAccesses.push_back(Warp::MemAccess(false, memAddr)); } break; @@ -475,7 +475,7 @@ void Instruction::executeOn(Warp &c) { break; default: cout << "ERROR: UNSUPPORTED L INST\n"; - exit(1); + std::abort(); } break; case S_INST: @@ -507,7 +507,7 @@ void Instruction::executeOn(Warp &c) { break; default: cout << "ERROR: UNSUPPORTED S INST\n"; - exit(1); + std::abort(); } c.memAccesses.push_back(Warp::MemAccess(true, memAddr)); #ifdef EMU_INSTRUMENTATION @@ -855,7 +855,7 @@ void Instruction::executeOn(Warp &c) { default: cout << "pc: " << hex << (c.pc) << "\n"; cout << "aERROR: Unsupported instruction: " << *this << "\n" << flush; - exit(1); + std::abort(); } } diff --git a/emulator/mem.cpp b/emulator/mem.cpp index 35a7e4f5..ca6937d3 100644 --- a/emulator/mem.cpp +++ b/emulator/mem.cpp @@ -25,7 +25,7 @@ RamMemDevice::RamMemDevice(const char *filename, Size wordSize) : if (!input) { cout << "Error reading file \"" << filename << "\" into RamMemDevice.\n"; - exit(1); + std::abort(); } do { contents.push_back(input.get()); } while (input); @@ -38,7 +38,7 @@ RamMemDevice::RamMemDevice(Size size, Size wordSize) : void RomMemDevice::write(Addr, Word) { cout << "Attempt to write to ROM.\n"; - exit(1); + std::abort(); } Word RamMemDevice::read(Addr addr) { @@ -215,7 +215,7 @@ Word DiskControllerMemDevice::read(Addr a) { case 5: return status; default: cout << "Attempt to read invalid disk controller register.\n"; - exit(1); + std::abort(); } } diff --git a/emulator/qsim-harp.cpp b/emulator/qsim-harp.cpp index d25ad9ec..61714b6f 100644 --- a/emulator/qsim-harp.cpp +++ b/emulator/qsim-harp.cpp @@ -20,7 +20,7 @@ Harp::OSDomain::OSDomain(ArchDef &archref, string imgFile) : { if (osDomain != NULL) { cout << "Error: OSDomain is a singleton."; - exit(1); + std::abort(); } osDomain = this; diff --git a/opae/opae_setup.sh b/opae/opae_setup.sh new file mode 100644 index 00000000..7763e212 --- /dev/null +++ b/opae/opae_setup.sh @@ -0,0 +1,97 @@ + + +## Required tools +# gcc (>4.9) +# libjson +# python +# Quartus +# RTL Simulator (VCS or ModelSim or QuestaSim) + + + +## Download OPAE SDK from https://github.com/OPAE/opae-sdk/archive/1.4.0-1.tar.gz +cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/ + +## Update the following file based on /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh +# ./opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh + + + +################################################################################################### +################################### TO BE DONE EVERY TIME ######################################### +################################################################################################### +## Change the shell to bash before running +bash + +## Setup Environment +## Running the default script results in multiple versions of libcurl during cmake. +#source /nethome/achawda6/specialProblem/rg_intel_fpga_end_19.3.sh +source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh + +## Setup the variables for using the Quartus modelsim +source /nethome/achawda6/specialProblem/modelsim_env.sh + +## Run this to setup the environment variables +source /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh + +## gcc version should be greater than 4.9 to support c++14 +source /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/env_check.sh + +export PATH=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall/bin:${PATH} +export FPGA_BBB_CCI_SRC=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb +#################################################################################################### + + + + + + +## Setup OPAE +mkdir mybuild +cd mybuild + +## Update the directory path where you want to install OPAE +cmake .. -DBUILD_ASE=1 -DCMAKE_INSTALL_PREFIX=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall +make +make install + + + + +## Setup ASE +## Add the installed OPAE path in PATH +export PATH=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall/bin:${PATH} + +## Use this version of HDL files +/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/afu_sim_setup --sources=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/rtl/sources_ase_server.txt run1Build +cd run1Build/ +python scripts/ipc_clean.py + + + + + +## Running Sample +## Download opae-bbb from https://github.com/OPAE/intel-fpga-bbb +cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1 +git clone https://github.com/OPAE/intel-fpga-bbb +cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb +mkdir mybuild +cd mybuild +cmake .. -DCMAKE_INSTALL_PREFIX=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall +make +make install + +export FPGA_BBB_CCI_SRC=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb + + + + + +## Running hello world +cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb/samples/tutorial/01_hello_world +afu_sim_setup --source hw/rtl/sources.txt build_sim +cd build_sim +## Update libstdc++6 if it errors out +make +make sim diff --git a/rtl/Makefile b/rtl/Makefile index 4f511fdd..c2515b29 100644 --- a/rtl/Makefile +++ b/rtl/Makefile @@ -33,7 +33,7 @@ VERILATOR: VERILATORnoWarnings: echo "#define VCD_OFF" > simulate/tb_debug.h - verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(WNO) + verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(WNO) $(DEB) compdebug: echo "#define VCD_OUTPUT" > simulate/tb_debug.h diff --git a/rtl/VX_back_end.v b/rtl/VX_back_end.v index a58847f3..640def5f 100644 --- a/rtl/VX_back_end.v +++ b/rtl/VX_back_end.v @@ -32,7 +32,7 @@ assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num; VX_mw_wb_inter VX_mw_wb(); -wire no_slot_mem; +wire no_slot_mem; VX_mem_req_inter VX_exe_mem_req(); @@ -55,6 +55,8 @@ VX_gpu_inst_req_inter VX_gpu_inst_req(); // CSR unit inputs VX_csr_req_inter VX_csr_req(); VX_csr_wb_inter VX_csr_wb(); +wire no_slot_csr; +wire stall_gpr_csr; VX_gpr_stage VX_gpr_stage( .clk (clk), @@ -67,6 +69,7 @@ VX_gpr_stage VX_gpr_stage( .VX_lsu_req (VX_lsu_req), .VX_gpu_inst_req (VX_gpu_inst_req), .VX_csr_req (VX_csr_req), + .stall_gpr_csr (stall_gpr_csr), // End new .memory_delay (out_mem_delay), .gpr_stage_delay (gpr_stage_delay) @@ -100,9 +103,19 @@ VX_gpgpu_inst VX_gpgpu_inst( .VX_warp_ctl (VX_warp_ctl) ); -VX_csr_wrapper VX_csr_wrapper( - .VX_csr_req(VX_csr_req), - .VX_csr_wb (VX_csr_wb) +// VX_csr_wrapper VX_csr_wrapper( +// .VX_csr_req(VX_csr_req), +// .VX_csr_wb (VX_csr_wb) +// ); + +VX_csr_pipe VX_csr_pipe( + .clk (clk), + .reset (reset), + .no_slot_csr (no_slot_csr), + .VX_csr_req (VX_csr_req), + .VX_writeback(VX_writeback_temp), + .VX_csr_wb (VX_csr_wb), + .stall_gpr_csr(stall_gpr_csr) ); VX_writeback VX_wb( @@ -113,7 +126,8 @@ VX_writeback VX_wb( .VX_csr_wb (VX_csr_wb), .VX_writeback_inter(VX_writeback_temp), - .no_slot_mem (no_slot_mem) + .no_slot_mem (no_slot_mem), + .no_slot_csr (no_slot_csr) ); endmodule \ No newline at end of file diff --git a/rtl/VX_csr_data.v b/rtl/VX_csr_data.v new file mode 100644 index 00000000..ab62aa23 --- /dev/null +++ b/rtl/VX_csr_data.v @@ -0,0 +1,82 @@ +`include "../VX_define.v" + +module VX_csr_data ( + input wire clk, // Clock + input wire reset, + + input wire[11:0] in_read_csr_address, + + input wire in_write_valid, + input wire[31:0] in_write_csr_data, + input wire[11:0] in_write_csr_address, + + output wire[31:0] out_read_csr_data, + + // For instruction retire counting + input wire in_writeback_valid + +); + + + // wire[`NT_M1:0][31:0] thread_ids; + // wire[`NT_M1:0][31:0] warp_ids; + + // genvar cur_t; + // for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin + // assign thread_ids[cur_t] = cur_t; + // end + + // genvar cur_tw; + // for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin + // assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num}; + // end + + reg[11:0] csr[1023:0]; + reg[63:0] cycle; + reg[63:0] instret; + + + wire read_cycle; + wire read_cycleh; + wire read_instret; + wire read_instreth; + + assign read_cycle = in_read_csr_address == 12'hC00; + assign read_cycleh = in_read_csr_address == 12'hC80; + assign read_instret = in_read_csr_address == 12'hC02; + assign read_instreth = in_read_csr_address == 12'hC82; + + // wire thread_select = in_read_csr_address == 12'h20; + // wire warp_select = in_read_csr_address == 12'h21; + + // assign out_read_csr_data = thread_select ? thread_ids : + // warp_select ? warp_ids : + // 0; + + integer curr_e; + always @(posedge clk or posedge reset) begin + if (reset) begin + for (curr_e = 0; curr_e < 1024; curr_e=curr_e+1) begin + assign csr[curr_e] = 0; + end + cycle <= 0; + instret <= 0; + end else begin + cycle <= cycle + 1; + if (in_write_valid) begin + csr[in_write_csr_address] <= in_write_csr_data[11:0]; + end + if (in_writeback_valid) begin + instret <= instret + 1; + end + end + end + + + assign out_read_csr_data = read_cycle ? cycle[31:0] : + read_cycleh ? cycle[63:32] : + read_instret ? instret[31:0] : + read_instreth ? instret[63:32] : + {{20{1'b0}}, csr[in_read_csr_address]}; + +endmodule \ No newline at end of file diff --git a/rtl/VX_csr_pipe.v b/rtl/VX_csr_pipe.v new file mode 100644 index 00000000..a5727c60 --- /dev/null +++ b/rtl/VX_csr_pipe.v @@ -0,0 +1,105 @@ + +module VX_csr_pipe ( + input wire clk, // Clock + input wire reset, + input wire no_slot_csr, + VX_csr_req_inter VX_csr_req, + VX_wb_inter VX_writeback, + VX_csr_wb_inter VX_csr_wb, + output wire stall_gpr_csr + +); + + wire[`NT_M1:0] valid_s2; + wire[`NW_M1:0] warp_num_s2; + wire[4:0] rd_s2; + wire[1:0] wb_s2; + wire[4:0] alu_op_s2; + wire is_csr_s2; + wire[11:0] csr_address_s2; + wire[31:0] csr_read_data_s2; + wire[31:0] csr_updated_data_s2; + + wire[31:0] csr_read_data_unqual; + wire[31:0] csr_read_data; + + assign stall_gpr_csr = no_slot_csr && VX_csr_req.is_csr && |(VX_csr_req.valid); + + assign csr_read_data = (csr_address_s2 == VX_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual; + + wire writeback = |VX_writeback.wb_valid; + VX_csr_data VX_csr_data( + .clk (clk), + .reset (reset), + .in_read_csr_address (VX_csr_req.csr_address), + + .in_write_valid (is_csr_s2), + .in_write_csr_data (csr_updated_data_s2), + .in_write_csr_address(csr_address_s2), + + .out_read_csr_data (csr_read_data_unqual), + + .in_writeback_valid (writeback) + ); + + + + reg[31:0] csr_updated_data; + always @(*) begin + case(VX_csr_req.alu_op) + `CSR_ALU_RW: csr_updated_data = VX_csr_req.csr_mask; + `CSR_ALU_RS: csr_updated_data = csr_read_data | VX_csr_req.csr_mask; + `CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - VX_csr_req.csr_mask); + default: csr_updated_data = 32'hdeadbeef; + endcase + end + + wire zero = 0; + + VX_generic_register #(.N(`NT + `NW_M1 + 1 + 5 + 2 + 5 + 12 + 64)) csr_reg_s2 ( + .clk (clk), + .reset(reset), + .stall(no_slot_csr), + .flush(zero), + .in ({VX_csr_req.valid, VX_csr_req.warp_num, VX_csr_req.rd, VX_csr_req.wb, VX_csr_req.is_csr, VX_csr_req.csr_address, csr_read_data , csr_updated_data }), + .out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2}) + ); + + + wire[`NT_M1:0][31:0] final_csr_data; + + wire[`NT_M1:0][31:0] thread_ids; + wire[`NT_M1:0][31:0] warp_ids; + wire[`NT_M1:0][31:0] csr_vec_read_data_s2; + + genvar cur_t; + for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin + assign thread_ids[cur_t] = cur_t; + end + + genvar cur_tw; + for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin + assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2}; + end + + genvar cur_v; + for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin + assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2; + end + + wire thread_select = csr_address_s2 == 12'h20; + wire warp_select = csr_address_s2 == 12'h21; + + assign final_csr_data = thread_select ? thread_ids : + warp_select ? warp_ids : + csr_vec_read_data_s2; + + + + assign VX_csr_wb.valid = valid_s2; + assign VX_csr_wb.warp_num = warp_num_s2; + assign VX_csr_wb.rd = rd_s2; + assign VX_csr_wb.wb = wb_s2; + assign VX_csr_wb.csr_result = final_csr_data; + +endmodule \ No newline at end of file diff --git a/rtl/VX_decode.v b/rtl/VX_decode.v index 7fb2f90a..4f33bbd1 100644 --- a/rtl/VX_decode.v +++ b/rtl/VX_decode.v @@ -119,7 +119,8 @@ module VX_decode( assign is_auipc = (curr_opcode == `AUIPC_INST); assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0); assign is_csr_immed = (is_csr) && (func3[2] == 1); - assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0); + // assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0); + assign is_e_inst = in_instruction == 32'h00000073; assign is_gpgpu = (curr_opcode == `GPGPU_INST); diff --git a/rtl/VX_define.v b/rtl/VX_define.v index 809ff759..f177fbfb 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -12,7 +12,7 @@ // `define SYN 1 // `define ASIC 1 -`define SYN_FUNC 1 +// `define SYN_FUNC 1 `define NUM_BARRIERS 4 @@ -128,14 +128,16 @@ // `define PARAM +// oooooo + //Cache configurations //Cache configurations //Bytes -`define ICACHE_SIZE 1024 +`define ICACHE_SIZE 4096 `define ICACHE_WAYS 2 //Bytes -`define ICACHE_BLOCK 16 -`define ICACHE_BANKS 1 +`define ICACHE_BLOCK 64 +`define ICACHE_BANKS 4 `define ICACHE_LOG_NUM_BANKS `CLOG2(`ICACHE_BANKS) `define ICACHE_NUM_WORDS_PER_BLOCK (`ICACHE_BLOCK / (`ICACHE_BANKS * 4)) diff --git a/rtl/VX_gpr_stage.v b/rtl/VX_gpr_stage.v index 3d556a83..22fea9d6 100644 --- a/rtl/VX_gpr_stage.v +++ b/rtl/VX_gpr_stage.v @@ -7,6 +7,7 @@ module VX_gpr_stage ( input wire schedule_delay, input wire memory_delay, + input wire stall_gpr_csr, output wire gpr_stage_delay, // inputs @@ -93,7 +94,7 @@ module VX_gpr_stage ( wire stall_lsu = memory_delay; wire flush_lsu = schedule_delay && !stall_lsu; - assign gpr_stage_delay = stall_lsu; + assign gpr_stage_delay = stall_lsu || (stall_gpr_csr && VX_bckE_req.is_csr && (|VX_bckE_req.valid)); `ifdef ASIC wire delayed_lsu_last_cycle; @@ -166,13 +167,13 @@ module VX_gpr_stage ( assign VX_gpu_inst_req.a_reg_data = real_base_address; assign VX_gpu_inst_req.rd2 = real_store_data; - VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg( + VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg( .clk (clk), .reset(reset), - .stall(stall_rest), + .stall(stall_gpr_csr), .flush(flush_rest), - .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), - .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) + .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), + .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) ); @@ -208,13 +209,13 @@ module VX_gpr_stage ( .out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 }) ); - VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg( + VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg( .clk (clk), .reset(reset), - .stall(stall_rest), + .stall(stall_gpr_csr), .flush(flush_rest), - .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), - .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) + .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), + .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) ); `endif diff --git a/rtl/VX_inst_multiplex.v b/rtl/VX_inst_multiplex.v index 3aa17510..86da67de 100644 --- a/rtl/VX_inst_multiplex.v +++ b/rtl/VX_inst_multiplex.v @@ -82,6 +82,7 @@ module VX_inst_multiplex ( assign VX_csr_req.warp_num = VX_bckE_req.warp_num; assign VX_csr_req.rd = VX_bckE_req.rd; assign VX_csr_req.wb = VX_bckE_req.wb; + assign VX_csr_req.alu_op = VX_bckE_req.alu_op; assign VX_csr_req.is_csr = VX_bckE_req.is_csr; assign VX_csr_req.csr_address = VX_bckE_req.csr_address; assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed; diff --git a/rtl/VX_writeback.v b/rtl/VX_writeback.v index c9616d43..2f684bae 100644 --- a/rtl/VX_writeback.v +++ b/rtl/VX_writeback.v @@ -14,7 +14,8 @@ module VX_writeback ( // Actual WB to GPR VX_wb_inter VX_writeback_inter, - output wire no_slot_mem + output wire no_slot_mem, + output wire no_slot_csr ); @@ -26,6 +27,7 @@ module VX_writeback ( assign no_slot_mem = mem_wb && (exec_wb || csr_wb); + assign no_slot_csr = csr_wb && (exec_wb); assign VX_writeback_tempp.write_data = exec_wb ? VX_inst_exec_wb.alu_result : csr_wb ? VX_csr_wb.csr_result : @@ -85,6 +87,13 @@ module VX_writeback ( .out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc}) ); + reg[31:0] last_data_wb; + always @(posedge clk) begin + if ((|VX_writeback_inter.wb_valid) && (VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd == 28)) begin + last_data_wb <= use_wb_data[0]; + end + end + `ifdef SYN assign VX_writeback_inter.write_data = prev_is_mem ? VX_writeback_tempp.write_data : use_wb_data; `else diff --git a/rtl/Vortex.v b/rtl/Vortex.v index 74e79036..f4b13e7d 100644 --- a/rtl/Vortex.v +++ b/rtl/Vortex.v @@ -44,6 +44,26 @@ module Vortex ); +reg[31:0] icache_banks = `ICACHE_BANKS; +reg[31:0] icache_num_words_per_block = `ICACHE_NUM_WORDS_PER_BLOCK; + + +reg[31:0] dcache_banks = `DCACHE_BANKS; +reg[31:0] dcache_num_words_per_block = `DCACHE_NUM_WORDS_PER_BLOCK; + +reg[31:0] number_threads = `NT; +reg[31:0] number_warps = `NW; + +always @(posedge clk) begin + icache_banks <= icache_banks; + icache_num_words_per_block <= icache_num_words_per_block; + + dcache_banks <= dcache_banks; + dcache_num_words_per_block <= dcache_num_words_per_block; + + number_threads <= number_threads; + number_warps <= number_warps; +end wire memory_delay; wire gpr_stage_delay; diff --git a/rtl/interfaces/VX_csr_req_inter.v b/rtl/interfaces/VX_csr_req_inter.v index c8daf257..9080f0e1 100644 --- a/rtl/interfaces/VX_csr_req_inter.v +++ b/rtl/interfaces/VX_csr_req_inter.v @@ -11,7 +11,7 @@ interface VX_csr_req_inter (); wire[`NW_M1:0] warp_num; wire[4:0] rd; wire[1:0] wb; - + wire[4:0] alu_op; wire is_csr; wire[11:0] csr_address; wire csr_immed; diff --git a/rtl/simulate/VX_define.h b/rtl/simulate/VX_define.h index c7dd4e26..ed10c77f 100644 --- a/rtl/simulate/VX_define.h +++ b/rtl/simulate/VX_define.h @@ -3,8 +3,8 @@ #define NW 8 -// #define CACHE_NUM_BANKS 8 -// #define CACHE_WORDS_PER_BLOCK 4 +#define CACHE_NUM_BANKS 8 +#define CACHE_WORDS_PER_BLOCK 4 #define R_INST 51 #define L_INST 3 diff --git a/rtl/simulate/ram.h b/rtl/simulate/ram.h index b8fd0ec3..13f78e94 100644 --- a/rtl/simulate/ram.h +++ b/rtl/simulate/ram.h @@ -167,11 +167,12 @@ uint32_t hToI(char *c, uint32_t size) { -void loadHexImpl(char * path,RAM* mem) { +void loadHexImpl(const char *path, RAM* mem) { mem->clear(); - FILE *fp = fopen(&path[0], "r"); + FILE *fp = fopen(path, "r"); if(fp == 0){ printf("Path not found %s\n", path); + return; // std::cout << path << " not found" << std::endl; } //Preload 0x0 <-> 0x80000000 jumps diff --git a/rtl/simulate/test_bench.cpp b/rtl/simulate/test_bench.cpp index 9387ad89..2becfb89 100644 --- a/rtl/simulate/test_bench.cpp +++ b/rtl/simulate/test_bench.cpp @@ -12,83 +12,94 @@ int main(int argc, char **argv) Verilated::traceEverOn(true); +#define ALL_TESTS +#ifdef ALL_TESTS + bool passed = true; + std::string tests[NUM_TESTS] = { + "../../emulator/riscv_tests/rv32ui-p-add.hex", + "../../emulator/riscv_tests/rv32ui-p-addi.hex", + "../../emulator/riscv_tests/rv32ui-p-and.hex", + "../../emulator/riscv_tests/rv32ui-p-andi.hex", + "../../emulator/riscv_tests/rv32ui-p-auipc.hex", + "../../emulator/riscv_tests/rv32ui-p-beq.hex", + "../../emulator/riscv_tests/rv32ui-p-bge.hex", + "../../emulator/riscv_tests/rv32ui-p-bgeu.hex", + "../../emulator/riscv_tests/rv32ui-p-blt.hex", + "../../emulator/riscv_tests/rv32ui-p-bltu.hex", + "../../emulator/riscv_tests/rv32ui-p-bne.hex", + "../../emulator/riscv_tests/rv32ui-p-jal.hex", + "../../emulator/riscv_tests/rv32ui-p-jalr.hex", + "../../emulator/riscv_tests/rv32ui-p-lb.hex", + "../../emulator/riscv_tests/rv32ui-p-lbu.hex", + "../../emulator/riscv_tests/rv32ui-p-lh.hex", + "../../emulator/riscv_tests/rv32ui-p-lhu.hex", + "../../emulator/riscv_tests/rv32ui-p-lui.hex", + "../../emulator/riscv_tests/rv32ui-p-lw.hex", + "../../emulator/riscv_tests/rv32ui-p-or.hex", + "../../emulator/riscv_tests/rv32ui-p-ori.hex", + "../../emulator/riscv_tests/rv32ui-p-sb.hex", + "../../emulator/riscv_tests/rv32ui-p-sh.hex", + "../../emulator/riscv_tests/rv32ui-p-simple.hex", + "../../emulator/riscv_tests/rv32ui-p-sll.hex", + "../../emulator/riscv_tests/rv32ui-p-slli.hex", + "../../emulator/riscv_tests/rv32ui-p-slt.hex", + "../../emulator/riscv_tests/rv32ui-p-slti.hex", + "../../emulator/riscv_tests/rv32ui-p-sltiu.hex", + "../../emulator/riscv_tests/rv32ui-p-sltu.hex", + "../../emulator/riscv_tests/rv32ui-p-sra.hex", + "../../emulator/riscv_tests/rv32ui-p-srai.hex", + "../../emulator/riscv_tests/rv32ui-p-srl.hex", + "../../emulator/riscv_tests/rv32ui-p-srli.hex", + "../../emulator/riscv_tests/rv32ui-p-sub.hex", + "../../emulator/riscv_tests/rv32ui-p-sw.hex", + "../../emulator/riscv_tests/rv32ui-p-xor.hex", + "../../emulator/riscv_tests/rv32ui-p-xori.hex", + "../../emulator/riscv_tests/rv32um-p-div.hex", + "../../emulator/riscv_tests/rv32um-p-divu.hex", + "../../emulator/riscv_tests/rv32um-p-mul.hex", + "../../emulator/riscv_tests/rv32um-p-mulh.hex", + "../../emulator/riscv_tests/rv32um-p-mulhsu.hex", + "../../emulator/riscv_tests/rv32um-p-mulhu.hex", + "../../emulator/riscv_tests/rv32um-p-rem.hex", + "../../emulator/riscv_tests/rv32um-p-remu.hex" + }; - // bool passed = true; - // std::string tests[NUM_TESTS] = { - // "../../emulator/riscv_tests/rv32ui-p-add.hex", - // "../../emulator/riscv_tests/rv32ui-p-addi.hex", - // "../../emulator/riscv_tests/rv32ui-p-and.hex", - // "../../emulator/riscv_tests/rv32ui-p-andi.hex", - // "../../emulator/riscv_tests/rv32ui-p-auipc.hex", - // "../../emulator/riscv_tests/rv32ui-p-beq.hex", - // "../../emulator/riscv_tests/rv32ui-p-bge.hex", - // "../../emulator/riscv_tests/rv32ui-p-bgeu.hex", - // "../../emulator/riscv_tests/rv32ui-p-blt.hex", - // "../../emulator/riscv_tests/rv32ui-p-bltu.hex", - // "../../emulator/riscv_tests/rv32ui-p-bne.hex", - // "../../emulator/riscv_tests/rv32ui-p-jal.hex", - // "../../emulator/riscv_tests/rv32ui-p-jalr.hex", - // "../../emulator/riscv_tests/rv32ui-p-lb.hex", - // "../../emulator/riscv_tests/rv32ui-p-lbu.hex", - // "../../emulator/riscv_tests/rv32ui-p-lh.hex", - // "../../emulator/riscv_tests/rv32ui-p-lhu.hex", - // "../../emulator/riscv_tests/rv32ui-p-lui.hex", - // "../../emulator/riscv_tests/rv32ui-p-lw.hex", - // "../../emulator/riscv_tests/rv32ui-p-or.hex", - // "../../emulator/riscv_tests/rv32ui-p-ori.hex", - // "../../emulator/riscv_tests/rv32ui-p-sb.hex", - // "../../emulator/riscv_tests/rv32ui-p-sh.hex", - // "../../emulator/riscv_tests/rv32ui-p-simple.hex", - // "../../emulator/riscv_tests/rv32ui-p-sll.hex", - // "../../emulator/riscv_tests/rv32ui-p-slli.hex", - // "../../emulator/riscv_tests/rv32ui-p-slt.hex", - // "../../emulator/riscv_tests/rv32ui-p-slti.hex", - // "../../emulator/riscv_tests/rv32ui-p-sltiu.hex", - // "../../emulator/riscv_tests/rv32ui-p-sltu.hex", - // "../../emulator/riscv_tests/rv32ui-p-sra.hex", - // "../../emulator/riscv_tests/rv32ui-p-srai.hex", - // "../../emulator/riscv_tests/rv32ui-p-srl.hex", - // "../../emulator/riscv_tests/rv32ui-p-srli.hex", - // "../../emulator/riscv_tests/rv32ui-p-sub.hex", - // "../../emulator/riscv_tests/rv32ui-p-sw.hex", - // "../../emulator/riscv_tests/rv32ui-p-xor.hex", - // "../../emulator/riscv_tests/rv32ui-p-xori.hex", - // "../../emulator/riscv_tests/rv32um-p-div.hex", - // "../../emulator/riscv_tests/rv32um-p-divu.hex", - // "../../emulator/riscv_tests/rv32um-p-mul.hex", - // "../../emulator/riscv_tests/rv32um-p-mulh.hex", - // "../../emulator/riscv_tests/rv32um-p-mulhsu.hex", - // "../../emulator/riscv_tests/rv32um-p-mulhu.hex", - // "../../emulator/riscv_tests/rv32um-p-rem.hex", - // "../../emulator/riscv_tests/rv32um-p-remu.hex" - // }; + for (std::string s : tests) { + Vortex v; - // for (int ii = 0; ii < NUM_TESTS; ii++) - // // for (int ii = 5; ii < 6; ii++) - // { - // std::cout << "TESTING: " << tests[ii] << '\n'; - // Vortex v; - // bool curr = v.simulate(tests[ii]); + std::cerr << s << std::endl; - // if ( curr) std::cerr << GREEN << "Test Passed: " << tests[ii] << std::endl; - // if (!curr) std::cerr << RED << "Test Failed: " << tests[ii] << std::endl; - // passed = passed && curr; + bool curr = v.simulate(s); + if ( curr) std::cerr << GREEN << "Test Passed: " << s << std::endl; + if (!curr) std::cerr << RED << "Test Failed: " << s << std::endl; + passed = passed && curr; + } - // std::cerr << DEFAULT; - // } + if( passed) std::cerr << DEFAULT << "PASSED ALL TESTS\n"; + if(!passed) std::cerr << DEFAULT << "Failed one or more tests\n"; - // if( passed) std::cerr << DEFAULT << "PASSED ALL TESTS\n"; - // if(!passed) std::cerr << DEFAULT << "Failed one or more tests\n"; + return !passed; + #else - // char testing[] = "../../emulator/riscv_tests/rv32ui-p-sw.hex"; + char testing[] = "../../emulator/riscv_tests/rv32ui-p-sw.hex"; Vortex v; - char testing[] = "../../kernel/vortex_test.hex"; + const char *testing; + + if (argc >= 2) { + testing = argv[1]; + } else { + testing = "../../kernel/vortex_test.hex"; + } + + std::cerr << testing << std::endl; + bool curr = v.simulate(testing); if ( curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl; if (!curr) std::cerr << RED << "Test Failed: " << testing << std::endl; - return 0; + return !curr; +#endif } diff --git a/rtl/simulate/test_bench.h b/rtl/simulate/test_bench.h index 98785e36..3a001377 100644 --- a/rtl/simulate/test_bench.h +++ b/rtl/simulate/test_bench.h @@ -46,8 +46,10 @@ class Vortex VVortex * vortex; unsigned start_pc; - bool refill; - unsigned refill_addr; + bool refill_d; + unsigned refill_addr_d; + bool refill_i; + unsigned refill_addr_i; long int curr_cycle; bool stop; bool unit_test; @@ -100,7 +102,7 @@ Vortex::~Vortex() void Vortex::ProcessFile(void) { - loadHexImpl("../../kernel/vortex_test.hex", &this->ram); + loadHexImpl(this->instruction_file_name.c_str(), &this->ram); } void Vortex::print_stats(bool cycle_test) @@ -154,38 +156,66 @@ void Vortex::print_stats(bool cycle_test) bool Vortex::ibus_driver() { - ////////////////////// IBUS ////////////////////// - unsigned new_PC; - bool stop = false; - uint32_t curr_inst = 0; + vortex->i_m_ready_i = false; - curr_inst = 0xdeadbeef; - - new_PC = vortex->icache_request_pc_address; - ram.getWord(new_PC, &curr_inst); - vortex->icache_response_instruction = curr_inst; - - // std::cout << std::hex << "IReq: " << vortex->icache_request_pc_address << "\tResp: " << curr_inst << "\n"; - - // printf("\n\n---------------------------------------------\n(%x) Inst: %x\n", new_PC, curr_inst); - // printf("\n"); - ////////////////////// IBUS ////////////////////// - - - ////////////////////// STATS ////////////////////// - - - if (((((unsigned int)curr_inst) != 0) && (((unsigned int)curr_inst) != 0xffffffff))) { - ++stats_dynamic_inst; - stop = false; - } else - { - // printf("Ibus requesting stop: %x\n", curr_inst); - stop = true; + + // int dcache_num_words_per_block + + if (refill_i) + { + refill_i = false; + vortex->i_m_ready_i = true; + + for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++) + { + for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++) + { + unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank; + unsigned curr_addr = refill_addr_i + (4*curr_index); + + unsigned curr_value; + ram.getWord(curr_addr, &curr_value); + + vortex->i_m_readdata_i[curr_bank][curr_word] = curr_value; + + } + } + } + else + { + if (vortex->o_m_valid_i) + { + + if (vortex->o_m_read_or_write_i) + { + // fprintf(stderr, "++++++++++++++++++++++++++++++++\n"); + unsigned base_addr = vortex->o_m_evict_addr_i; + + for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++) + { + for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++) + { + unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank; + unsigned curr_addr = base_addr + (4*curr_index); + + unsigned curr_value = vortex->o_m_writedata_i[curr_bank][curr_word]; + + ram.writeWord( curr_addr, &curr_value); + } + } + } + + // Respond next cycle + refill_i = true; + refill_addr_i = vortex->o_m_read_addr_i; + } + } + } - return stop; + + return false; } @@ -197,6 +227,7 @@ void Vortex::io_handler() char c = (char) data_write; std::cerr << c; + // std::cout << c; } } @@ -204,75 +235,62 @@ void Vortex::io_handler() bool Vortex::dbus_driver() { - // printf("****************************\n"); + vortex->i_m_ready_d = false; - vortex->i_m_ready = 0; - for (int i = 0; i < CACHE_NUM_BANKS; i++) { - for (int j = 0; j < CACHE_WORDS_PER_BLOCK; j++) + + // int dcache_num_words_per_block + + if (refill_d) { - vortex->i_m_readdata[i][j] = 0; - } - } + refill_d = false; + vortex->i_m_ready_d = true; - - if (this->refill) - { - this->refill = false; - - vortex->i_m_ready = 1; - for (int curr_e = 0; curr_e < (CACHE_NUM_BANKS*CACHE_WORDS_PER_BLOCK); curr_e++) - { - unsigned new_addr = this->refill_addr + (4*curr_e); - - - unsigned addr_without_byte = new_addr >> 2; - unsigned bank_num = addr_without_byte & 0x7; - unsigned addr_wihtout_bank = addr_without_byte >> 3; - unsigned offset_num = addr_wihtout_bank & 0x3; - - unsigned value; - ram.getWord(new_addr, &value); - - // printf("-------- (%x) i_m_readdata[%d][%d] (%d) = %d\n", new_addr, bank_num, offset_num, curr_e, value); - vortex->i_m_readdata[bank_num][offset_num] = value; - - } - } - else - { - if (vortex->o_m_valid) - { - // printf("Valid o_m_valid\n"); - if (vortex->o_m_read_or_write) + for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__dcache_banks; curr_bank++) { - // printf("Valid write\n"); - - for (int curr_e = 0; curr_e < (CACHE_NUM_BANKS*CACHE_WORDS_PER_BLOCK); curr_e++) + for (int curr_word = 0; curr_word < vortex->Vortex__DOT__dcache_num_words_per_block; curr_word++) { - unsigned new_addr = vortex->o_m_evict_addr + (4*curr_e); + unsigned curr_index = (curr_word * vortex->Vortex__DOT__dcache_banks) + curr_bank; + unsigned curr_addr = refill_addr_d + (4*curr_index); + unsigned curr_value; + ram.getWord(curr_addr, &curr_value); - unsigned addr_without_byte = new_addr >> 2; - unsigned bank_num = addr_without_byte & 0x7; - unsigned addr_wihtout_bank = addr_without_byte >> 3; - unsigned offset_num = addr_wihtout_bank & 0x3; + vortex->i_m_readdata_d[curr_bank][curr_word] = curr_value; - - unsigned new_value = vortex->o_m_writedata[bank_num][offset_num]; - - ram.writeWord( new_addr, &new_value); - - // printf("+++++++ (%x) writeback[%d][%d] (%d) = %d\n", new_addr, bank_num, offset_num, curr_e, new_value); - // printf("+++++++ (%x) i_m_readdata[%d][%d] (%d) = %d\n", new_addr, bank_num, offset_num, curr_e, value); } - } - - // Respond next cycle - this->refill = true; - this->refill_addr = vortex->o_m_read_addr; } + else + { + if (vortex->o_m_valid_d) + { + + if (vortex->o_m_read_or_write_d) + { + // fprintf(stderr, "++++++++++++++++++++++++++++++++\n"); + unsigned base_addr = vortex->o_m_evict_addr_d; + + for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__dcache_banks; curr_bank++) + { + for (int curr_word = 0; curr_word < vortex->Vortex__DOT__dcache_num_words_per_block; curr_word++) + { + unsigned curr_index = (curr_word * vortex->Vortex__DOT__dcache_banks) + curr_bank; + unsigned curr_addr = base_addr + (4*curr_index); + + unsigned curr_value = vortex->o_m_writedata_d[curr_bank][curr_word]; + + ram.writeWord( curr_addr, &curr_value); + } + } + } + + // Respond next cycle + refill_d = true; + refill_addr_d = vortex->o_m_read_addr_d; + } + } + } @@ -397,7 +415,9 @@ bool Vortex::simulate(std::string file_to_simulate) std::cerr << "New Total Cycles: " << (this->stats_total_cycles) << "\n"; - // int status = (unsigned int) vortex->Vortex__DOT__vx_front_end__DOT__vx_decode__DOT__vx_grp_wrapper__DOT__genblk2__BRA__0__KET____DOT__vx_gpr__DOT__first_ram__DOT__GPR[28][0] & 0xf; + int status = (unsigned int) vortex->Vortex__DOT__vx_back_end__DOT__VX_wb__DOT__last_data_wb & 0xf; + + // std::cout << "Last wb: " << std::hex << ((unsigned int) vortex->Vortex__DOT__vx_back_end__DOT__VX_wb__DOT__last_data_wb) << "\n"; // std::cout << "Something: " << result << '\n'; @@ -408,6 +428,6 @@ bool Vortex::simulate(std::string file_to_simulate) - // return (status == 1); - return (1 == 1); + return (status == 1); + // return (1 == 1); } \ No newline at end of file diff --git a/runtime/mains/dev/Makefile b/runtime/mains/dev/Makefile index d7232978..f78a9d66 100644 --- a/runtime/mains/dev/Makefile +++ b/runtime/mains/dev/Makefile @@ -1,10 +1,14 @@ -COMP = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-gcc +# To set a custom TOOLPATH, call make like this: +# TOOLPATH=../../../../riscv-gnu-toolchain/drops/bin make ... +TOOLPATH ?= ~/dev/riscv-gnu-toolchain/drops/bin + +COMP = $(TOOLPATH)/riscv32-unknown-elf-gcc # CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,../vortex_link.ld -ffreestanding -nostartfiles -DMP = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump -CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy +DMP = $(TOOLPATH)/riscv32-unknown-elf-objdump +CPY = $(TOOLPATH)/riscv32-unknown-elf-objcopy VX_STR = ../../startup/vx_start.s diff --git a/simX/Makefile b/simX/Makefile index fb563f13..68fe7414 100644 --- a/simX/Makefile +++ b/simX/Makefile @@ -1,7 +1,9 @@ ################################################################################ # HARPtools by Chad D. Kersey, Summer 2011 # ################################################################################ -CXXFLAGS ?= -std=c++11 -fPIC -O3 -g # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS + +CXXFLAGS ?= -std=c++11 -fPIC -O3 -Wall -Wextra -pedantic -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS +# CXXFLAGS ?= -std=c++11 -fPIC -O0 -g -Wall -Wextra -pedantic # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS LIB_OBJS=simX.cpp args.cpp mem.cpp core.cpp instruction.cpp enc.cpp util.cpp @@ -10,7 +12,9 @@ INCLUDE=-I. -I../rtl/shared_memory -I../rtl/cache -I../rtl/interfaces -Isimulate FILE=cache_simX.v COMP=--compiler gcc LIB= -CF=-CFLAGS '-std=c++11 -fPIC -O3' + +CF=-CFLAGS '-std=c++11 -fPIC -O3 -Wall -Wextra -pedantic' +#CF=-CFLAGS '-std=c++11 -fPIC -O0 -g -Wall -Wextra -pedantic' LIGHTW=-Wno-UNOPTFLAT -Wno-WIDTH DEB=--trace -DVL_DEBUG=1 diff --git a/simX/core.cpp b/simX/core.cpp index f267cbe7..7ba684a4 100644 --- a/simX/core.cpp +++ b/simX/core.cpp @@ -111,6 +111,14 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id): release_warp = false; foundSchedule = true; schedule_w = 0; + + memset(&inst_in_fetch, 0, sizeof(inst_in_fetch)); + memset(&inst_in_decode, 0, sizeof(inst_in_decode)); + memset(&inst_in_scheduler, 0, sizeof(inst_in_scheduler)); + memset(&inst_in_exe, 0, sizeof(inst_in_exe)); + memset(&inst_in_lsu, 0, sizeof(inst_in_lsu)); + memset(&inst_in_wb, 0, sizeof(inst_in_wb)); + INIT_TRACE(inst_in_fetch); INIT_TRACE(inst_in_decode); INIT_TRACE(inst_in_scheduler); @@ -158,6 +166,7 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id): bool Core::interrupt(Word r0) { w[0].interrupt(r0); + return false; } void Core::step() @@ -214,8 +223,8 @@ void Core::getCacheDelays(trace_inst_t * trace_inst) if (trace_inst->valid_inst) { - bool in_dcache_in_valid[a.getNThds()]; - unsigned in_dcache_in_address[a.getNThds()]; + std::vector in_dcache_in_valid(a.getNThds()); + std::vector in_dcache_in_address(a.getNThds()); unsigned in_dcache_mem_read; unsigned in_dcache_mem_write; @@ -709,10 +718,26 @@ void Core::printStats() const { } Warp::Warp(Core *c, Word id) : - core(c), pc(0x80000000), interruptEnable(true), - supervisorMode(true), activeThreads(0), reg(0), pred(0), - shadowReg(core->a.getNRegs()), shadowPReg(core->a.getNPRegs()), id(id), - spawned(false), steps(0), insts(0), loads(0), stores(0), VLEN(1024) + core(c), + pc(0x80000000), + shadowPc(0), + id(id), + activeThreads(0), + shadowActiveThreads(0), + reg(0), + pred(0), + shadowReg(core->a.getNRegs()), + shadowPReg(core->a.getNPRegs()), + VLEN(1024), + interruptEnable(true), + shadowInterruptEnable(false), + supervisorMode(true), + shadowSupervisorMode(false), + spawned(false), + steps(0), + insts(0), + loads(0), + stores(0) { D(3, "Creating a new thread with PC: " << hex << this->pc << '\n'); /* Build the register file. */ diff --git a/simX/enc.cpp b/simX/enc.cpp index 074f391e..6a7d81a2 100644 --- a/simX/enc.cpp +++ b/simX/enc.cpp @@ -22,14 +22,12 @@ using namespace Harp; // wordSize = ad.getWordSize(); // } -static void decodeError(string msg) { +/*static void decodeError(string msg) { cout << "Instruction decoder error: " << msg << '\n'; - exit(1); -} + std::abort(); +}*/ - - -static unsigned ceilLog2(RegNum x) { +/*static unsigned ceilLog2(RegNum x) { unsigned z = 0; bool nonZeroInnerValues(false); @@ -44,8 +42,7 @@ static unsigned ceilLog2(RegNum x) { if (nonZeroInnerValues) z++; return z; -} - +}*/ WordDecoder::WordDecoder(const ArchDef &arch) { @@ -236,7 +233,7 @@ Instruction *WordDecoder::decode(const std::vector &v, Size &idx, trace_in case InstType::V_TYPE: D(3, "Entered here: instr type = vector" << op); - switch(op) { + switch (op) { case Opcode::VSET_ARITH: //TODO: arithmetic ops inst.setDestReg((code>>shift_rd) & reg_mask); inst.setSrcReg((code>>shift_rs1) & reg_mask); @@ -308,11 +305,14 @@ Instruction *WordDecoder::decode(const std::vector &v, Size &idx, trace_in //trace_inst->vd = ((code>>shift_rd) & reg_mask); trace_inst->vs1 = ((code>>shift_rd) & reg_mask); //vs3 break; + default: + cout << "Inavlid opcode.\n"; + std::abort(); } break; - default: + default: cout << "Unrecognized argument class in word decoder.\n"; - exit(1); + std::abort(); } if (haveRefs && usedImm && refMap.find(idx-n/8) != refMap.end()) { diff --git a/simX/include/archdef.h b/simX/include/archdef.h index 4a071c0d..fa284aad 100644 --- a/simX/include/archdef.h +++ b/simX/include/archdef.h @@ -111,6 +111,6 @@ namespace Harp { RegNum nRegs, nPRegs; char encChar; }; -}; +} #endif diff --git a/simX/include/args.h b/simX/include/args.h index 77ef2c90..ec8d6880 100644 --- a/simX/include/args.h +++ b/simX/include/args.h @@ -56,6 +56,6 @@ namespace HarpTools { bool &x; }; -}; +} #endif diff --git a/simX/include/asm-tokens.h b/simX/include/asm-tokens.h index f28a598b..792c2f92 100644 --- a/simX/include/asm-tokens.h +++ b/simX/include/asm-tokens.h @@ -11,6 +11,6 @@ namespace HarpTools { ASM_T_PREG, ASM_T_REG, ASM_T_REG_RA, ASM_T_REG_SP, ASM_T_REG_FP, ASM_T_LIT, ASM_T_SYM, ASM_T_PEXP }; -}; +} #endif diff --git a/simX/include/core.h b/simX/include/core.h index b4cfd112..77e4bf41 100644 --- a/simX/include/core.h +++ b/simX/include/core.h @@ -35,9 +35,9 @@ namespace Harp { template class Reg { public: - Reg(): cpuId(0), regNum(0), val(0) {} - Reg(Word c, Word n): cpuId(c), regNum(n), val(0) {} - Reg(Word c, Word n, T v): cpuId(c), regNum(n), val(v) {} + Reg(): val(0), cpuId(0), regNum(0) {} + Reg(Word c, Word n): val(0), cpuId(c), regNum(n) {} + Reg(Word c, Word n, T v): val(v), cpuId(c), regNum(n) {} Reg &operator=(T r) { if (regNum) {val = r; doWrite();} return *this; } @@ -83,10 +83,10 @@ namespace Harp { DomStackEntry(const std::vector &tmask): tmask(tmask), fallThrough(true), uni(false) {} - bool fallThrough; - bool uni; - std::vector tmask; + std::vector tmask; Word pc; + bool fallThrough; + bool uni; }; struct vtype @@ -193,13 +193,14 @@ namespace Harp { std::vector>> vreg; // 32 vector registers - bool interruptEnable, shadowInterruptEnable, supervisorMode, - shadowSupervisorMode, spawned; + bool interruptEnable, shadowInterruptEnable; + bool supervisorMode, shadowSupervisorMode; + bool spawned; unsigned long steps, insts, loads, stores; friend class Instruction; }; -}; +} #endif diff --git a/simX/include/enc.h b/simX/include/enc.h index 0c8dc08e..37b054ee 100644 --- a/simX/include/enc.h +++ b/simX/include/enc.h @@ -50,7 +50,10 @@ namespace Harp { public: WordDecoder(const ArchDef &); virtual Instruction *decode(const std::vector &v, Size &n, trace_inst_t * trace_inst); - virtual Instruction *decode(const std::vector &v, Size &n) {printf("Not implemented\n");} + virtual Instruction *decode(const std::vector &v, Size &n) { + printf("Not implemented\n"); + return nullptr; + } private: Size n, o, r, p, i1, i2, i3; @@ -72,6 +75,6 @@ namespace Harp { }; -}; +} #endif diff --git a/simX/include/harpfloat.h b/simX/include/harpfloat.h index f2941c79..c8cdbcfc 100644 --- a/simX/include/harpfloat.h +++ b/simX/include/harpfloat.h @@ -64,7 +64,7 @@ namespace Harp { DEBUGMSG("Set to " << d); } - Float(double d, Size n): sz(n), d(d) { DEBUGMSG("Float(double, size)"); } + Float(double d, Size n): d(d), sz(n) { DEBUGMSG("Float(double, size)"); } operator Word_u() { DEBUGMSG("Float -> Word_u: " << d); @@ -120,4 +120,4 @@ namespace Harp { double d; Size sz; }; -}; +} diff --git a/simX/include/help.h b/simX/include/help.h index 9c495bbb..802bf501 100644 --- a/simX/include/help.h +++ b/simX/include/help.h @@ -32,6 +32,6 @@ namespace HarpTools { *disasmHelp = "HARP Disassembler command line arguments:\n" " -a, --arch Architecture string.\n" " -o, --output Output filename.\n"; - }; -}; + } +} #endif diff --git a/simX/include/instruction.h b/simX/include/instruction.h index e227cfd3..4df21236 100644 --- a/simX/include/instruction.h +++ b/simX/include/instruction.h @@ -164,7 +164,7 @@ namespace Harp { }; -}; +} #endif diff --git a/simX/include/mem.h b/simX/include/mem.h index 2b4b38d0..f0f340e7 100644 --- a/simX/include/mem.h +++ b/simX/include/mem.h @@ -99,9 +99,10 @@ namespace Harp { Byte *file; Size blocks; }; - std::vector disks; + + Size wordSize, blockSize; Core &core; - Size wordSize, blockSize;; + std::vector disks; }; class MemoryUnit { @@ -136,7 +137,7 @@ namespace Harp { private: class ADecoder { public: - ADecoder() : zeroChild(NULL), oneChild(NULL), range(0) {} + ADecoder() : zeroChild(NULL), oneChild(NULL), range(0), md(nullptr) {} ADecoder(MemDevice &md, Size range) : zeroChild(NULL), oneChild(NULL), range(range), md(&md) {} Byte *getPtr(Addr a, Size sz, Size wordSize); @@ -145,24 +146,24 @@ namespace Harp { void map(Addr a, MemDevice &md, Size range, Size bit); private: MemDevice &doLookup(Addr a, Size &bit); - ADecoder *zeroChild, *oneChild; - MemDevice *md; + ADecoder *zeroChild, *oneChild; Size range; + MemDevice *md; }; - ADecoder ad; - struct TLBEntry { TLBEntry() {} TLBEntry(Word pfn, Word flags): pfn(pfn), flags(flags) {} - Word flags; Word pfn; + Word flags; }; - std::map tlb; - TLBEntry tlbLookup(Addr vAddr, Word flagMask); - Size pageSize, addrBytes; + + ADecoder ad; + + std::map tlb; + TLBEntry tlbLookup(Addr vAddr, Word flagMask); bool disableVm; }; @@ -402,7 +403,7 @@ namespace Harp { char* content = new char[size]; int x = fread(content, 1, size, fp); - if (!x) { std::cout << "COULD NOT READ FILE\n"; exit(1);} + if (!x) { std::cout << "COULD NOT READ FILE\n"; std::abort();} int offset = 0; char* line = content; @@ -455,7 +456,7 @@ namespace Harp { -}; +} #endif diff --git a/simX/include/obj.h b/simX/include/obj.h index d0ee4357..c64607bb 100644 --- a/simX/include/obj.h +++ b/simX/include/obj.h @@ -40,7 +40,7 @@ namespace Harp { Ref(name, rel), addr(addr) { } virtual void bind(Addr addr, Addr base = 0) { std::cout << "Attempted to bind a SimpleRef.\n"; - exit(1); + std::abort(); } virtual Addr getAddr() const { return this->addr; } Byte *getAddrPtr() { return (Byte*)&addr; } @@ -86,7 +86,7 @@ namespace Harp { // std::cout << "Attempt to bind a " << bits << "-bit " // << (relative?"":"non-") << "relative symbol to an address" // " it cannot reach.\n"; -// exit(1); +// std::abort(); // } // virtual Addr getAddr() const { @@ -205,6 +205,6 @@ namespace Harp { // private: // const ArchDef &arch; // }; -}; +} #endif diff --git a/simX/include/types.h b/simX/include/types.h index 1c5c86d9..43598a7d 100644 --- a/simX/include/types.h +++ b/simX/include/types.h @@ -20,6 +20,6 @@ namespace Harp { enum MemFlags {RD_USR = 1, WR_USR = 2, EX_USR = 4, RD_SUP = 8, WR_SUP = 16, EX_SUP = 32}; -}; +} #endif diff --git a/simX/include/util.h b/simX/include/util.h index 006612e3..a7935ca7 100644 --- a/simX/include/util.h +++ b/simX/include/util.h @@ -19,6 +19,6 @@ namespace Harp { Word_u readWord(const std::vector &b, Size &n, Size wordSize); void writeByte(std::vector &p, Size &n, Byte b); void writeWord(std::vector &p, Size &n, Size wordSize, Word w); -}; +} #endif diff --git a/simX/instruction.cpp b/simX/instruction.cpp index c7f45eb6..60d23682 100644 --- a/simX/instruction.cpp +++ b/simX/instruction.cpp @@ -264,16 +264,16 @@ void trap_to_simulator(Warp & c) fstat(file, &st); fprintf(stderr, "------------------------\n"); - fprintf(stderr, "Size of struct: %x\n", sizeof(struct stat)); + fprintf(stderr, "Size of struct: %ld\n", sizeof(struct stat)); fprintf(stderr, "st_mode: %x\n", st.st_mode); - fprintf(stderr, "st_dev: %x\n", st.st_dev); - fprintf(stderr, "st_ino: %x\n", st.st_ino); + fprintf(stderr, "st_dev: %ld\n", st.st_dev); + fprintf(stderr, "st_ino: %ld\n", st.st_ino); fprintf(stderr, "st_uid: %x\n", st.st_uid); fprintf(stderr, "st_gid: %x\n", st.st_gid); - fprintf(stderr, "st_rdev: %x\n", st.st_rdev); - fprintf(stderr, "st_size: %x\n", st.st_size); - fprintf(stderr, "st_blksize: %x\n", st.st_blksize); - fprintf(stderr, "st_blocks: %x\n", st.st_blocks); + fprintf(stderr, "st_rdev: %ld\n", st.st_rdev); + fprintf(stderr, "st_size: %ld\n", st.st_size); + fprintf(stderr, "st_blksize: %ld\n", st.st_blksize); + fprintf(stderr, "st_blocks: %ld\n", st.st_blocks); fprintf(stderr, "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n"); upload(&write_buffer, (char *) &st.st_mode , sizeof(st.st_mode), c); @@ -517,7 +517,7 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; default: cout << "unsupported MUL/DIV instr\n"; - exit(1); + std::abort(); } } else @@ -584,7 +584,7 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; default: cout << "ERROR: UNSUPPORTED R INST\n"; - exit(1); + std::abort(); } } break; @@ -622,7 +622,7 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; default: cout << "ERROR: UNSUPPORTED L INST\n"; - exit(1); + std::abort(); c.memAccesses.push_back(Warp::MemAccess(false, memAddr)); } break; @@ -709,7 +709,7 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; default: cout << "ERROR: UNSUPPORTED L INST\n"; - exit(1); + std::abort(); } break; case S_INST: @@ -743,7 +743,7 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; default: cout << "ERROR: UNSUPPORTED S INST\n"; - exit(1); + std::abort(); } c.memAccesses.push_back(Warp::MemAccess(true, memAddr)); #ifdef EMU_INSTRUMENTATION @@ -2397,7 +2397,7 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { break; default: cout << "ERROR: UNSUPPORTED S INST\n" << flush; - exit(1); + std::abort(); } // cout << "Loop finished" << endl; // c.memAccesses.push_back(Warp::MemAccess(true, memAddr)); @@ -2408,7 +2408,7 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { default: D(3, "pc: " << hex << (c.pc-4)); D(3, "aERROR: Unsupported instruction: " << *this); - exit(1); + std::abort(); } // break; diff --git a/simX/mem.cpp b/simX/mem.cpp index e7d3bb24..b39a0ee4 100644 --- a/simX/mem.cpp +++ b/simX/mem.cpp @@ -25,7 +25,7 @@ RamMemDevice::RamMemDevice(const char *filename, Size wordSize) : if (!input) { cout << "Error reading file \"" << filename << "\" into RamMemDevice.\n"; - exit(1); + std::abort(); } do { contents.push_back(input.get()); } while (input); @@ -34,11 +34,11 @@ RamMemDevice::RamMemDevice(const char *filename, Size wordSize) : } RamMemDevice::RamMemDevice(Size size, Size wordSize) : - contents(size), wordSize(wordSize) {} + wordSize(wordSize), contents(size) {} void RomMemDevice::write(Addr, Word) { cout << "Attempt to write to ROM.\n"; - exit(1); + std::abort(); } Word RamMemDevice::read(Addr addr) { @@ -216,6 +216,7 @@ void *Harp::consoleInputThread(void* arg_vp) { // } // cout << "Console input ended. Exiting.\n"; // exit(4); + return nullptr; } // ConsoleMemDevice::ConsoleMemDevice(Size wS, std::ostream &o, Core &core, @@ -246,7 +247,7 @@ Word DiskControllerMemDevice::read(Addr a) { case 5: return status; default: cout << "Attempt to read invalid disk controller register.\n"; - exit(1); + std::abort(); } } diff --git a/simX/simX.cpp b/simX/simX.cpp index a2fe330f..1db16a5c 100644 --- a/simX/simX.cpp +++ b/simX/simX.cpp @@ -144,14 +144,22 @@ int main(int argc, char** argv) { try { switch (findMode(argc - 1, argv + 1)) { - case HARPTOOL_MODE_ASM: cout << "ASM not supported\n"; - case HARPTOOL_MODE_DISASM: cout << "DISASM not supported\n"; - case HARPTOOL_MODE_EMU: return emu_main (argc - 2, argv + 2); - case HARPTOOL_MODE_LD: cout << "LD not supported\n"; - case HARPTOOL_MODE_HELP: - default: - cout << "Usage:\n" << Help::mainHelp; - return 0; + case HARPTOOL_MODE_ASM: + cout << "ASM not supported\n"; + return -1; + case HARPTOOL_MODE_DISASM: + cout << "DISASM not supported\n"; + return -1; + case HARPTOOL_MODE_EMU: + return emu_main(argc - 2, argv + 2); + case HARPTOOL_MODE_LD: + cout << "LD not supported\n"; + return -1; + case HARPTOOL_MODE_HELP: + [[fallthrough]]; + default: + cout << "Usage:\n" << Help::mainHelp; + return 0; } } catch (BadArg ba) { cout << "Unrecognized argument \"" << ba.arg << "\".\n"; diff --git a/simX/util.cpp b/simX/util.cpp index 09b2ec0c..a1d6ebe6 100644 --- a/simX/util.cpp +++ b/simX/util.cpp @@ -13,7 +13,7 @@ using namespace std; // Make it easy for autotools-based build systems to detect this library. extern "C" { int harplib_present = 1; -}; +} void Harp::wordToBytes(Byte *b, Word_u w, Size wordSize) { while (wordSize--) {