diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 4d776267..06f5957f 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -6,7 +6,7 @@ set -e show_usage() { echo "Vortex BlackBox Test Driver v1.0" - echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=] [--help]]" + echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim|simx] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=] [--help]]" } SCRIPT_DIR=$(dirname "$0") @@ -104,6 +104,10 @@ case $DRIVER in fpga) DRIVER_PATH=$VORTEX_HOME/driver/opae DRIVER_EXTRA=fpga + ;; + simx) + DRIVER_PATH=$VORTEX_HOME/driver/simx + DRIVER_EXTRA= ;; *) echo "invalid driver: $DRIVER" diff --git a/driver/common/vx_utils.cpp b/driver/common/vx_utils.cpp index 8e1b18f9..cac3d5d3 100644 --- a/driver/common/vx_utils.cpp +++ b/driver/common/vx_utils.cpp @@ -25,28 +25,6 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_ // get buffer address auto buf_ptr = (uint8_t*)vx_host_ptr(buffer); -#if defined(USE_SIMX) - // default startup routine - ((uint32_t*)buf_ptr)[0] = 0xf1401073; - ((uint32_t*)buf_ptr)[1] = 0xf1401073; - ((uint32_t*)buf_ptr)[2] = 0x30101073; - ((uint32_t*)buf_ptr)[3] = 0x800000b7; - ((uint32_t*)buf_ptr)[4] = 0x000080e7; - err = vx_copy_to_dev(buffer, 0, 5 * 4, 0); - if (err != 0) { - vx_buf_release(buffer); - return err; - } - - // newlib io simulator trap - ((uint32_t*)buf_ptr)[0] = 0x00008067; - err = vx_copy_to_dev(buffer, 0x70000000, 4, 0); - if (err != 0) { - vx_buf_release(buffer); - return err; - } -#endif - // // upload content // diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 13dd9372..dd64569f 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -1,8 +1,7 @@ CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors #CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors -CFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized - +CFLAGS += -fPIC -Wno-aligned-new -Wno-maybe-uninitialized CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw # control RTL debug print states @@ -26,9 +25,7 @@ DBG_FLAGS += -DDBG_CACHE_REQ_INFO CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 #CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1 -CFLAGS += -fPIC - -CFLAGS += -DUSE_RTLSIM $(CONFIGS) +CFLAGS += $(CONFIGS) CFLAGS += -DDUMP_PERF_STATS diff --git a/driver/simx/Makefile b/driver/simx/Makefile index cf76b198..58db553c 100644 --- a/driver/simx/Makefile +++ b/driver/simx/Makefile @@ -1,53 +1,42 @@ -CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors -#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors +PROJECT = libvortex.so +#PROJECT = libvortex.dylib -CFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized +SIMX_DIR = ../../simX -CFLAGS += -I../../include -I../../../simX/include -I../../../hw +#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors -CFLAGS += -fPIC +CXXFLAGS += -fPIC -Wno-aligned-new -Wno-maybe-uninitialized +CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR) +CXXFLAGS += -DDUMP_PERF_STATS -CFLAGS += -DUSE_SIMX +#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 +#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 +CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1 + +CXXFLAGS += $(CONFIGS) LDFLAGS += -shared -pthread #LDFLAGS += -dynamiclib -pthread -TOP = cache_simX - -RTL_DIR = ../../hw/old_rtl - SRCS = vortex.cpp ../common/vx_utils.cpp -SRCS += ../../simX/args.cpp ../../simX/mem.cpp ../../simX/core.cpp ../../simX/instruction.cpp ../../simX/enc.cpp ../../simX/util.cpp - -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/shared_memory -RTL_INCLUDE += -I../../simX - -VL_FLAGS += -O2 --language 1800-2009 --assert -VL_FLAGS += -Wno-DECLFILENAME -VL_FLAGS += --x-initial unique --x-assign unique -VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH - -# Enable Verilator multithreaded simulation -#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') -#VL_FLAGS += --threads $(THREADS) +SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/instr.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp # Debugigng ifdef DEBUG - VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS) - CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) + CXXFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) else - VL_FLAGS += -DNDEBUG - CFLAGS += -DNDEBUG + CXXFLAGS += -DNDEBUG endif -PROJECT = libvortex.so -#PROJECT = libvortex.dylib - all: $(PROJECT) -$(PROJECT): $(SRCS) - verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) - make -j -C obj_dir -f V$(TOP).mk +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf $(PROJECT) obj_dir \ No newline at end of file + rm -rf $(PROJECT) *.o .depend \ No newline at end of file diff --git a/driver/simx/vortex.cpp b/driver/simx/vortex.cpp index e336aab7..5281119c 100644 --- a/driver/simx/vortex.cpp +++ b/driver/simx/vortex.cpp @@ -142,16 +142,27 @@ public: private: void run() { - Harp::ArchDef arch("rv32i", NUM_WARPS, NUM_THREADS); - Harp::WordDecoder dec(arch); - Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true); - Harp::Core core(arch, dec, mu); + vortex::ArchDef arch("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS); + vortex::Decoder decoder(arch); + vortex::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true); mu.attach(ram_, 0); - while (core.running()) { - core.step(); + std::vector> cores(NUM_CORES); + for (size_t i = 0; i < NUM_CORES; ++i) { + cores[i] = std::make_shared(arch, decoder, mu); } - core.printStats(); + + bool running; + + do { + running = false; + for (size_t i = 0; i < NUM_CORES; ++i) { + if (!cores[i]->running()) + continue; + running = true; + cores[i]->step(); + } + } while (running); } void thread_proc() { @@ -190,7 +201,7 @@ private: bool is_running_; size_t mem_allocation_; std::thread thread_; - Harp::RAM ram_; + vortex::RAM ram_; std::mutex mutex_; }; diff --git a/hw/old_rtl/VX_alu.v b/hw/old_rtl/VX_alu.v deleted file mode 100644 index 9688aad2..00000000 --- a/hw/old_rtl/VX_alu.v +++ /dev/null @@ -1,139 +0,0 @@ -`include "VX_define.v" - -module VX_alu( - input wire[31:0] in_1, - input wire[31:0] in_2, - input wire in_rs2_src, - input wire[31:0] in_itype_immed, - input wire[19:0] in_upper_immed, - input wire[4:0] in_alu_op, - input wire[31:0] in_curr_PC, - output reg[31:0] out_alu_result - ); - - - `ifdef SYN_FUNC - wire which_in2; - - wire[31:0] ALU_in1; - wire[31:0] ALU_in2; - wire[63:0] ALU_in1_mult; - wire[63:0] ALU_in2_mult; - wire[31:0] upper_immed; - wire[31:0] div_result; - wire[31:0] rem_result; - - - assign which_in2 = in_rs2_src == `RS2_IMMED; - - assign ALU_in1 = in_1; - - assign ALU_in2 = which_in2 ? in_itype_immed : in_2; - - - assign upper_immed = {in_upper_immed, {12{1'b0}}}; - - - - //always @(posedge `MUL) begin - - - /* verilator lint_off UNUSED */ - - - wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1}; - wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2}; - assign ALU_in1_mult = (in_alu_op == `MULHU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in1} : alu_in1_signed; - assign ALU_in2_mult = (in_alu_op == `MULHU || in_alu_op == `MULHSU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in2} : alu_in2_signed; - wire[63:0] mult_result = ALU_in1_mult * ALU_in2_mult; - - /* verilator lint_on UNUSED */ - - always @(in_alu_op or ALU_in1 or ALU_in2) begin - case(in_alu_op) - `ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2); - `SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2); - `SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0]; - `SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0; - `SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0; - `XOR: out_alu_result = ALU_in1 ^ ALU_in2; - `SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0]; - `SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0]; - `OR: out_alu_result = ALU_in1 | ALU_in2; - `AND: out_alu_result = ALU_in2 & ALU_in1; - `SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; - `LUI_ALU: out_alu_result = upper_immed; - `AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed); - `MUL: out_alu_result = mult_result[31:0]; - `MULH: out_alu_result = mult_result[63:32]; - `MULHSU: out_alu_result = mult_result[63:32]; - `MULHU: out_alu_result = mult_result[63:32]; - `DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2)); - `DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2; - `REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2)); - `REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2; - default: out_alu_result = 32'h0; - endcase // in_alu_op - end - - `else - wire which_in2; - - wire[31:0] ALU_in1; - wire[31:0] ALU_in2; - wire[31:0] upper_immed; - - - assign which_in2 = in_rs2_src == `RS2_IMMED; - - assign ALU_in1 = in_1; - - assign ALU_in2 = which_in2 ? in_itype_immed : in_2; - - - assign upper_immed = {in_upper_immed, {12{1'b0}}}; - - - - // always @(*) begin - // $display("EXECUTE CURR_PC: %h",in_curr_PC); - // end - - /* verilator lint_off UNUSED */ - wire[63:0] mult_unsigned_result = ALU_in1 * ALU_in2; - wire[63:0] mult_signed_result = $signed(ALU_in1) * $signed(ALU_in2); - - wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1}; - - wire[63:0] mult_signed_un_result = alu_in1_signed * ALU_in2; - /* verilator lint_on UNUSED */ - - always @(in_alu_op or ALU_in1 or ALU_in2) begin - case(in_alu_op) - `ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2); - `SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2); - `SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0]; - `SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0; - `SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0; - `XOR: out_alu_result = ALU_in1 ^ ALU_in2; - `SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0]; - `SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0]; - `OR: out_alu_result = ALU_in1 | ALU_in2; - `AND: out_alu_result = ALU_in2 & ALU_in1; - `SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; - `LUI_ALU: out_alu_result = upper_immed; - `AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed); - `MUL: begin out_alu_result = mult_signed_result[31:0]; end - `MULH: out_alu_result = mult_signed_result[63:32]; - `MULHSU: out_alu_result = mult_signed_un_result[63:32]; - `MULHU: out_alu_result = mult_unsigned_result[63:32]; - `DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2)); - `DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2; - `REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2)); - `REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2; - default: out_alu_result = 32'h0; - endcase // in_alu_op - end - `endif - -endmodule // VX_alu \ No newline at end of file diff --git a/hw/old_rtl/VX_back_end.v b/hw/old_rtl/VX_back_end.v deleted file mode 100644 index 640def5f..00000000 --- a/hw/old_rtl/VX_back_end.v +++ /dev/null @@ -1,133 +0,0 @@ -`include "VX_define.v" - -module VX_back_end ( - input wire clk, - input wire reset, - input wire schedule_delay, - - output wire out_mem_delay, - output wire gpr_stage_delay, - VX_jal_response_inter VX_jal_rsp, - VX_branch_response_inter VX_branch_rsp, - - VX_frE_to_bckE_req_inter VX_bckE_req, - VX_wb_inter VX_writeback_inter, - - VX_warp_ctl_inter VX_warp_ctl, - - VX_dcache_response_inter VX_dcache_rsp, - VX_dcache_request_inter VX_dcache_req - -); - - -VX_wb_inter VX_writeback_temp(); -assign VX_writeback_inter.wb = VX_writeback_temp.wb; -assign VX_writeback_inter.rd = VX_writeback_temp.rd; -assign VX_writeback_inter.write_data = VX_writeback_temp.write_data; -assign VX_writeback_inter.wb_valid = VX_writeback_temp.wb_valid; -assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num; - -// assign VX_writeback_inter(VX_writeback_temp); - - -VX_mw_wb_inter VX_mw_wb(); -wire no_slot_mem; - - -VX_mem_req_inter VX_exe_mem_req(); -VX_mem_req_inter VX_mem_req(); - - - -// LSU input + output -VX_lsu_req_inter VX_lsu_req(); -VX_inst_mem_wb_inter VX_mem_wb(); - -// Exec unit input + output -VX_exec_unit_req_inter VX_exec_unit_req(); -VX_inst_exec_wb_inter VX_inst_exec_wb(); - - -// GPU unit input -VX_gpu_inst_req_inter VX_gpu_inst_req(); - -// CSR unit inputs -VX_csr_req_inter VX_csr_req(); -VX_csr_wb_inter VX_csr_wb(); -wire no_slot_csr; -wire stall_gpr_csr; - -VX_gpr_stage VX_gpr_stage( - .clk (clk), - .reset (reset), - .schedule_delay (schedule_delay), - .VX_writeback_inter(VX_writeback_temp), - .VX_bckE_req (VX_bckE_req), - // New - .VX_exec_unit_req(VX_exec_unit_req), - .VX_lsu_req (VX_lsu_req), - .VX_gpu_inst_req (VX_gpu_inst_req), - .VX_csr_req (VX_csr_req), - .stall_gpr_csr (stall_gpr_csr), - // End new - .memory_delay (out_mem_delay), - .gpr_stage_delay (gpr_stage_delay) - ); - - -VX_lsu load_store_unit( - .clk (clk), - .reset (reset), - .VX_lsu_req (VX_lsu_req), - .VX_mem_wb (VX_mem_wb), - .VX_dcache_rsp(VX_dcache_rsp), - .VX_dcache_req(VX_dcache_req), - .out_delay (out_mem_delay), - .no_slot_mem (no_slot_mem) - ); - - -VX_execute_unit VX_execUnit( - .clk (clk), - .reset (reset), - .VX_exec_unit_req(VX_exec_unit_req), - .VX_inst_exec_wb (VX_inst_exec_wb), - .VX_jal_rsp (VX_jal_rsp), - .VX_branch_rsp (VX_branch_rsp) - ); - - -VX_gpgpu_inst VX_gpgpu_inst( - .VX_gpu_inst_req(VX_gpu_inst_req), - .VX_warp_ctl (VX_warp_ctl) - ); - -// VX_csr_wrapper VX_csr_wrapper( -// .VX_csr_req(VX_csr_req), -// .VX_csr_wb (VX_csr_wb) -// ); - -VX_csr_pipe VX_csr_pipe( - .clk (clk), - .reset (reset), - .no_slot_csr (no_slot_csr), - .VX_csr_req (VX_csr_req), - .VX_writeback(VX_writeback_temp), - .VX_csr_wb (VX_csr_wb), - .stall_gpr_csr(stall_gpr_csr) - ); - -VX_writeback VX_wb( - .clk (clk), - .reset (reset), - .VX_mem_wb (VX_mem_wb), - .VX_inst_exec_wb (VX_inst_exec_wb), - .VX_csr_wb (VX_csr_wb), - - .VX_writeback_inter(VX_writeback_temp), - .no_slot_mem (no_slot_mem), - .no_slot_csr (no_slot_csr) - ); - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_countones.v b/hw/old_rtl/VX_countones.v deleted file mode 100644 index 62f20e16..00000000 --- a/hw/old_rtl/VX_countones.v +++ /dev/null @@ -1,22 +0,0 @@ -module VX_countones - #( - parameter N = 10 - ) - ( - - input wire[N-1:0] valids, - output reg[$clog2(N):0] count - -); - - integer i; - always @(*) begin - count = 0; - for (i = N-1; i >= 0; i = i - 1) begin - if (valids[i]) begin - count = count + 1; - end - end - end - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_csr_data.v b/hw/old_rtl/VX_csr_data.v deleted file mode 100644 index ab62aa23..00000000 --- a/hw/old_rtl/VX_csr_data.v +++ /dev/null @@ -1,82 +0,0 @@ -`include "../VX_define.v" - -module VX_csr_data ( - input wire clk, // Clock - input wire reset, - - input wire[11:0] in_read_csr_address, - - input wire in_write_valid, - input wire[31:0] in_write_csr_data, - input wire[11:0] in_write_csr_address, - - output wire[31:0] out_read_csr_data, - - // For instruction retire counting - input wire in_writeback_valid - -); - - - // wire[`NT_M1:0][31:0] thread_ids; - // wire[`NT_M1:0][31:0] warp_ids; - - // genvar cur_t; - // for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin - // assign thread_ids[cur_t] = cur_t; - // end - - // genvar cur_tw; - // for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin - // assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num}; - // end - - reg[11:0] csr[1023:0]; - reg[63:0] cycle; - reg[63:0] instret; - - - wire read_cycle; - wire read_cycleh; - wire read_instret; - wire read_instreth; - - assign read_cycle = in_read_csr_address == 12'hC00; - assign read_cycleh = in_read_csr_address == 12'hC80; - assign read_instret = in_read_csr_address == 12'hC02; - assign read_instreth = in_read_csr_address == 12'hC82; - - // wire thread_select = in_read_csr_address == 12'h20; - // wire warp_select = in_read_csr_address == 12'h21; - - // assign out_read_csr_data = thread_select ? thread_ids : - // warp_select ? warp_ids : - // 0; - - integer curr_e; - always @(posedge clk or posedge reset) begin - if (reset) begin - for (curr_e = 0; curr_e < 1024; curr_e=curr_e+1) begin - assign csr[curr_e] = 0; - end - cycle <= 0; - instret <= 0; - end else begin - cycle <= cycle + 1; - if (in_write_valid) begin - csr[in_write_csr_address] <= in_write_csr_data[11:0]; - end - if (in_writeback_valid) begin - instret <= instret + 1; - end - end - end - - - assign out_read_csr_data = read_cycle ? cycle[31:0] : - read_cycleh ? cycle[63:32] : - read_instret ? instret[31:0] : - read_instreth ? instret[63:32] : - {{20{1'b0}}, csr[in_read_csr_address]}; - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_csr_handler.v b/hw/old_rtl/VX_csr_handler.v deleted file mode 100644 index b6b4e84a..00000000 --- a/hw/old_rtl/VX_csr_handler.v +++ /dev/null @@ -1,84 +0,0 @@ - - -module VX_csr_handler ( - input wire clk, - input wire[11:0] in_decode_csr_address, // done - VX_csr_write_request_inter VX_csr_w_req, - input wire in_wb_valid, - output wire[31:0] out_decode_csr_data // done - ); - - wire in_mem_is_csr; - wire[11:0] in_mem_csr_address; - /* verilator lint_off UNUSED */ - wire[31:0] in_mem_csr_result; - /* verilator lint_on UNUSED */ - - - assign in_mem_is_csr = VX_csr_w_req.is_csr; - assign in_mem_csr_address = VX_csr_w_req.csr_address; - assign in_mem_csr_result = VX_csr_w_req.csr_result; - - - reg[1024:0][11:0] csr; - reg[63:0] cycle; - reg[63:0] instret; - reg[11:0] decode_csr_address; - - - wire read_cycle; - wire read_cycleh; - wire read_instret; - wire read_instreth; - - initial begin - cycle = 0; - instret = 0; - decode_csr_address = 0; - end - - - always @(posedge clk) begin - cycle <= cycle + 1; - decode_csr_address <= in_decode_csr_address; - if (in_wb_valid) begin - instret <= instret + 1; - end - end - - reg[11:0] data_read; - always @(posedge clk) begin - if(in_mem_is_csr) begin - csr[in_mem_csr_address] <= in_mem_csr_result[11:0]; - end - end - - assign data_read = csr[decode_csr_address]; - - - assign read_cycle = decode_csr_address == 12'hC00; - assign read_cycleh = decode_csr_address == 12'hC80; - assign read_instret = decode_csr_address == 12'hC02; - assign read_instreth = decode_csr_address == 12'hC82; - - - /* verilator lint_off WIDTH */ - assign out_decode_csr_data = read_cycle ? cycle[31:0] : - read_cycleh ? cycle[63:32] : - read_instret ? instret[31:0] : - read_instreth ? instret[63:32] : - {{20{1'b0}}, data_read}; - /* verilator lint_on WIDTH */ - - - - - -endmodule // VX_csr_handler - - - - - - - diff --git a/hw/old_rtl/VX_csr_pipe.v b/hw/old_rtl/VX_csr_pipe.v deleted file mode 100644 index a5727c60..00000000 --- a/hw/old_rtl/VX_csr_pipe.v +++ /dev/null @@ -1,105 +0,0 @@ - -module VX_csr_pipe ( - input wire clk, // Clock - input wire reset, - input wire no_slot_csr, - VX_csr_req_inter VX_csr_req, - VX_wb_inter VX_writeback, - VX_csr_wb_inter VX_csr_wb, - output wire stall_gpr_csr - -); - - wire[`NT_M1:0] valid_s2; - wire[`NW_M1:0] warp_num_s2; - wire[4:0] rd_s2; - wire[1:0] wb_s2; - wire[4:0] alu_op_s2; - wire is_csr_s2; - wire[11:0] csr_address_s2; - wire[31:0] csr_read_data_s2; - wire[31:0] csr_updated_data_s2; - - wire[31:0] csr_read_data_unqual; - wire[31:0] csr_read_data; - - assign stall_gpr_csr = no_slot_csr && VX_csr_req.is_csr && |(VX_csr_req.valid); - - assign csr_read_data = (csr_address_s2 == VX_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual; - - wire writeback = |VX_writeback.wb_valid; - VX_csr_data VX_csr_data( - .clk (clk), - .reset (reset), - .in_read_csr_address (VX_csr_req.csr_address), - - .in_write_valid (is_csr_s2), - .in_write_csr_data (csr_updated_data_s2), - .in_write_csr_address(csr_address_s2), - - .out_read_csr_data (csr_read_data_unqual), - - .in_writeback_valid (writeback) - ); - - - - reg[31:0] csr_updated_data; - always @(*) begin - case(VX_csr_req.alu_op) - `CSR_ALU_RW: csr_updated_data = VX_csr_req.csr_mask; - `CSR_ALU_RS: csr_updated_data = csr_read_data | VX_csr_req.csr_mask; - `CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - VX_csr_req.csr_mask); - default: csr_updated_data = 32'hdeadbeef; - endcase - end - - wire zero = 0; - - VX_generic_register #(.N(`NT + `NW_M1 + 1 + 5 + 2 + 5 + 12 + 64)) csr_reg_s2 ( - .clk (clk), - .reset(reset), - .stall(no_slot_csr), - .flush(zero), - .in ({VX_csr_req.valid, VX_csr_req.warp_num, VX_csr_req.rd, VX_csr_req.wb, VX_csr_req.is_csr, VX_csr_req.csr_address, csr_read_data , csr_updated_data }), - .out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2}) - ); - - - wire[`NT_M1:0][31:0] final_csr_data; - - wire[`NT_M1:0][31:0] thread_ids; - wire[`NT_M1:0][31:0] warp_ids; - wire[`NT_M1:0][31:0] csr_vec_read_data_s2; - - genvar cur_t; - for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin - assign thread_ids[cur_t] = cur_t; - end - - genvar cur_tw; - for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin - assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2}; - end - - genvar cur_v; - for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin - assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2; - end - - wire thread_select = csr_address_s2 == 12'h20; - wire warp_select = csr_address_s2 == 12'h21; - - assign final_csr_data = thread_select ? thread_ids : - warp_select ? warp_ids : - csr_vec_read_data_s2; - - - - assign VX_csr_wb.valid = valid_s2; - assign VX_csr_wb.warp_num = warp_num_s2; - assign VX_csr_wb.rd = rd_s2; - assign VX_csr_wb.wb = wb_s2; - assign VX_csr_wb.csr_result = final_csr_data; - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_csr_wrapper.v b/hw/old_rtl/VX_csr_wrapper.v deleted file mode 100644 index 0988ca67..00000000 --- a/hw/old_rtl/VX_csr_wrapper.v +++ /dev/null @@ -1,38 +0,0 @@ - -`include "VX_define.v" - -module VX_csr_wrapper ( - VX_csr_req_inter VX_csr_req, - - VX_csr_wb_inter VX_csr_wb -); - - - wire[`NT_M1:0][31:0] thread_ids; - wire[`NT_M1:0][31:0] warp_ids; - - genvar cur_t; - for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin - assign thread_ids[cur_t] = cur_t; - end - - genvar cur_tw; - for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin - assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, VX_csr_req.warp_num}; - end - - - assign VX_csr_wb.valid = VX_csr_req.valid; - assign VX_csr_wb.warp_num = VX_csr_req.warp_num; - assign VX_csr_wb.rd = VX_csr_req.rd; - assign VX_csr_wb.wb = VX_csr_req.wb; - - - wire thread_select = VX_csr_req.csr_address == 12'h20; - wire warp_select = VX_csr_req.csr_address == 12'h21; - - assign VX_csr_wb.csr_result = thread_select ? thread_ids : - warp_select ? warp_ids : - 0; - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_decode.v b/hw/old_rtl/VX_decode.v deleted file mode 100644 index 4f33bbd1..00000000 --- a/hw/old_rtl/VX_decode.v +++ /dev/null @@ -1,361 +0,0 @@ - -`include "VX_define.v" - -module VX_decode( - // Fetch Inputs - VX_inst_meta_inter fd_inst_meta_de, - - // Outputs - VX_frE_to_bckE_req_inter VX_frE_to_bckE_req, - VX_wstall_inter VX_wstall, - VX_join_inter VX_join, - - output wire terminate_sim - -); - - wire[31:0] in_instruction = fd_inst_meta_de.instruction; - wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc; - wire[`NW_M1:0] in_warp_num = fd_inst_meta_de.warp_num; - - assign VX_frE_to_bckE_req.curr_PC = in_curr_PC; - - wire[`NT_M1:0] in_valid = fd_inst_meta_de.valid; - - wire[6:0] curr_opcode; - - wire is_itype; - wire is_rtype; - wire is_stype; - wire is_btype; - wire is_linst; - wire is_jal; - wire is_jalr; - wire is_lui; - wire is_auipc; - wire is_csr; - wire is_csr_immed; - wire is_e_inst; - - wire is_gpgpu; - wire is_wspawn; - wire is_tmc; - wire is_split; - wire is_join; - wire is_barrier; - - wire[2:0] func3; - wire[6:0] func7; - wire[11:0] u_12; - - - wire[7:0] jal_b_19_to_12; - wire jal_b_11; - wire[9:0] jal_b_10_to_1; - wire jal_b_20; - wire jal_b_0; - wire[20:0] jal_unsigned_offset; - wire[31:0] jal_1_offset; - - wire[11:0] jalr_immed; - wire[31:0] jal_2_offset; - - wire jal_sys_cond1; - wire jal_sys_cond2; - wire jal_sys_jal; - wire[31:0] jal_sys_off; - - wire csr_cond1; - wire csr_cond2; - - wire[11:0] alu_tempp; - wire alu_shift_i; - wire[11:0] alu_shift_i_immed; - - wire[1:0] csr_type; - - reg[4:0] csr_alu; - reg[4:0] alu_op; - reg[4:0] mul_alu; - reg[19:0] temp_upper_immed; - reg temp_jal; - reg[31:0] temp_jal_offset; - reg[31:0] temp_itype_immed; - reg[2:0] temp_branch_type; - reg temp_branch_stall; - - // always @(posedge reset) begin - - // end - - assign VX_frE_to_bckE_req.valid = fd_inst_meta_de.valid; - - assign VX_frE_to_bckE_req.warp_num = in_warp_num; - - - assign curr_opcode = in_instruction[6:0]; - - - assign VX_frE_to_bckE_req.rd = in_instruction[11:7]; - assign VX_frE_to_bckE_req.rs1 = in_instruction[19:15]; - assign VX_frE_to_bckE_req.rs2 = in_instruction[24:20]; - assign func3 = in_instruction[14:12]; - assign func7 = in_instruction[31:25]; - assign u_12 = in_instruction[31:20]; - - - assign VX_frE_to_bckE_req.PC_next = in_curr_PC + 32'h4; - - - // Write Back sigal - assign is_rtype = (curr_opcode == `R_INST); - assign is_linst = (curr_opcode == `L_INST); - assign is_itype = (curr_opcode == `ALU_INST) || is_linst; - assign is_stype = (curr_opcode == `S_INST); - assign is_btype = (curr_opcode == `B_INST); - assign is_jal = (curr_opcode == `JAL_INST); - assign is_jalr = (curr_opcode == `JALR_INST); - assign is_lui = (curr_opcode == `LUI_INST); - assign is_auipc = (curr_opcode == `AUIPC_INST); - assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0); - assign is_csr_immed = (is_csr) && (func3[2] == 1); - // assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0); - assign is_e_inst = in_instruction == 32'h00000073; - - assign is_gpgpu = (curr_opcode == `GPGPU_INST); - - assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE - assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE - assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE - assign is_split = is_gpgpu && (func3 == 2); // Goes to BE - assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE - - - assign VX_join.is_join = is_join; - assign VX_join.join_warp_num = in_warp_num; - - - assign VX_frE_to_bckE_req.is_wspawn = is_wspawn; - assign VX_frE_to_bckE_req.is_tmc = is_tmc; - assign VX_frE_to_bckE_req.is_split = is_split; - assign VX_frE_to_bckE_req.is_barrier = is_barrier; - - - - assign VX_frE_to_bckE_req.csr_immed = is_csr_immed; - assign VX_frE_to_bckE_req.is_csr = is_csr; - - - assign VX_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL : - is_linst ? `WB_MEM : - (is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU : - `NO_WB; - - - assign VX_frE_to_bckE_req.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG; - - // MEM signals - assign VX_frE_to_bckE_req.mem_read = (is_linst) ? func3 : `NO_MEM_READ; - assign VX_frE_to_bckE_req.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE; - - // UPPER IMMEDIATE - always @(*) begin - case(curr_opcode) - `LUI_INST: temp_upper_immed = {func7, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.rs1, func3}; - `AUIPC_INST: temp_upper_immed = {func7, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.rs1, func3}; - default: temp_upper_immed = 20'h0; - endcase // curr_opcode - end - - assign VX_frE_to_bckE_req.upper_immed = temp_upper_immed; - - - assign jal_b_19_to_12 = in_instruction[19:12]; - assign jal_b_11 = in_instruction[20]; - assign jal_b_10_to_1 = in_instruction[30:21]; - assign jal_b_20 = in_instruction[31]; - assign jal_b_0 = 1'b0; - assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0}; - assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset}; - - - assign jalr_immed = {func7, VX_frE_to_bckE_req.rs2}; - assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed}; - - - assign jal_sys_cond1 = func3 == 3'h0; - assign jal_sys_cond2 = u_12 < 12'h2; - - assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0; - assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef; - - // JAL - always @(*) begin - case(curr_opcode) - `JAL_INST: - begin - temp_jal = 1'b1 && (|in_valid); - temp_jal_offset = jal_1_offset; - end - `JALR_INST: - begin - temp_jal = 1'b1 && (|in_valid); - temp_jal_offset = jal_2_offset; - end - `SYS_INST: - begin - // $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) ); - temp_jal = jal_sys_jal && (|in_valid); - temp_jal_offset = jal_sys_off; - end - default: - begin - temp_jal = 1'b0 && (|in_valid); - temp_jal_offset = 32'hdeadbeef; - end - endcase - end - - assign VX_frE_to_bckE_req.jalQual = is_jal; - assign VX_frE_to_bckE_req.jal = temp_jal; - assign VX_frE_to_bckE_req.jal_offset = temp_jal_offset; - - // wire is_ebreak; - - - // assign is_ebreak = is_e_inst; - wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid)); - assign VX_frE_to_bckE_req.ebreak = ebreak; - wire out_ebreak = ebreak; - assign terminate_sim = is_e_inst; - - - // CSR - - assign csr_cond1 = func3 != 3'h0; - assign csr_cond2 = u_12 >= 12'h2; - - assign VX_frE_to_bckE_req.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55; - - - // ITYPE IMEED - assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5); - assign alu_shift_i_immed = {{7{1'b0}}, VX_frE_to_bckE_req.rs2}; - assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12; - - - always @(*) begin - case(curr_opcode) - `ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp}; - `S_INST: temp_itype_immed = {{20{func7[6]}}, func7, VX_frE_to_bckE_req.rd}; - `L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12}; - `B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]}; - default: temp_itype_immed = 32'hdeadbeef; - endcase - end - - assign VX_frE_to_bckE_req.itype_immed = temp_itype_immed; - - - - always @(*) begin - case(curr_opcode) - `B_INST: - begin - // $display("BRANCH IN DECODE"); - temp_branch_stall = 1'b1 && (|in_valid); - case(func3) - 3'h0: temp_branch_type = `BEQ; - 3'h1: temp_branch_type = `BNE; - 3'h4: temp_branch_type = `BLT; - 3'h5: temp_branch_type = `BGT; - 3'h6: temp_branch_type = `BLTU; - 3'h7: temp_branch_type = `BGTU; - default: temp_branch_type = `NO_BRANCH; - endcase - end - - `JAL_INST: - begin - temp_branch_type = `NO_BRANCH; - temp_branch_stall = 1'b1 && (|in_valid); - end - `JALR_INST: - begin - temp_branch_type = `NO_BRANCH; - temp_branch_stall = 1'b1 && (|in_valid); - end - default: - begin - temp_branch_type = `NO_BRANCH; - temp_branch_stall = 1'b0 && (|in_valid); - end - endcase - end - - assign VX_frE_to_bckE_req.branch_type = temp_branch_type; - - assign VX_wstall.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid); - assign VX_wstall.warp_num = in_warp_num; - - always @(*) begin - // ALU OP - case(func3) - 3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB); - 3'h1: alu_op = `SLLA; - 3'h2: alu_op = `SLT; - 3'h3: alu_op = `SLTU; - 3'h4: alu_op = `XOR; - 3'h5: alu_op = (func7 == 7'h0) ? `SRL : `SRA; - 3'h6: alu_op = `OR; - 3'h7: alu_op = `AND; - default: alu_op = `NO_ALU; - endcase - end - - always @(*) begin - // ALU OP - case(func3) - 3'h0: mul_alu = `MUL; - 3'h1: mul_alu = `MULH; - 3'h2: mul_alu = `MULHSU; - 3'h3: mul_alu = `MULHU; - 3'h4: mul_alu = `DIV; - 3'h5: mul_alu = `DIVU; - 3'h6: mul_alu = `REM; - 3'h7: mul_alu = `REMU; - default: mul_alu = `NO_ALU; - endcase - end - - assign csr_type = func3[1:0]; - - always @(*) begin - case(csr_type) - 2'h1: csr_alu = `CSR_ALU_RW; - 2'h2: csr_alu = `CSR_ALU_RS; - 2'h3: csr_alu = `CSR_ALU_RC; - default: csr_alu = `NO_ALU; - endcase - end - - wire[4:0] temp_final_alu; - - assign temp_final_alu = is_btype ? ((VX_frE_to_bckE_req.branch_type < `BLTU) ? `SUB : `SUBU) : - is_lui ? `LUI_ALU : - is_auipc ? `AUIPC_ALU : - is_csr ? csr_alu : - (is_stype || is_linst) ? `ADD : - alu_op; - - assign VX_frE_to_bckE_req.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu; - -endmodule - - - - - - - - diff --git a/hw/old_rtl/VX_define.v b/hw/old_rtl/VX_define.v deleted file mode 100644 index f177fbfb..00000000 --- a/hw/old_rtl/VX_define.v +++ /dev/null @@ -1,269 +0,0 @@ -`include "./VX_define_synth.v" - - - -`define NT_M1 (`NT-1) - -// NW_M1 is actually log2(NW) -`define NW_M1 (`CLOG2(`NW)) - -// Uncomment the below line if NW=1 -// `define ONLY - -// `define SYN 1 -// `define ASIC 1 -// `define SYN_FUNC 1 - -`define NUM_BARRIERS 4 - -`define R_INST 7'd51 -`define L_INST 7'd3 -`define ALU_INST 7'd19 -`define S_INST 7'd35 -`define B_INST 7'd99 -`define LUI_INST 7'd55 -`define AUIPC_INST 7'd23 -`define JAL_INST 7'd111 -`define JALR_INST 7'd103 -`define SYS_INST 7'd115 -`define GPGPU_INST 7'h6b - - -`define WB_ALU 2'h1 -`define WB_MEM 2'h2 -`define WB_JAL 2'h3 -`define NO_WB 2'h0 - - -`define RS2_IMMED 1 -`define RS2_REG 0 - - -`define NO_MEM_READ 3'h7 -`define LB_MEM_READ 3'h0 -`define LH_MEM_READ 3'h1 -`define LW_MEM_READ 3'h2 -`define LBU_MEM_READ 3'h4 -`define LHU_MEM_READ 3'h5 - - -`define NO_MEM_WRITE 3'h7 -`define SB_MEM_WRITE 3'h0 -`define SH_MEM_WRITE 3'h1 -`define SW_MEM_WRITE 3'h2 - - -`define NO_BRANCH 3'h0 -`define BEQ 3'h1 -`define BNE 3'h2 -`define BLT 3'h3 -`define BGT 3'h4 -`define BLTU 3'h5 -`define BGTU 3'h6 - - -`define NO_ALU 5'd15 -`define ADD 5'd0 -`define SUB 5'd1 -`define SLLA 5'd2 -`define SLT 5'd3 -`define SLTU 5'd4 -`define XOR 5'd5 -`define SRL 5'd6 -`define SRA 5'd7 -`define OR 5'd8 -`define AND 5'd9 -`define SUBU 5'd10 -`define LUI_ALU 5'd11 -`define AUIPC_ALU 5'd12 -`define CSR_ALU_RW 5'd13 -`define CSR_ALU_RS 5'd14 -`define CSR_ALU_RC 5'd15 -`define MUL 5'd16 -`define MULH 5'd17 -`define MULHSU 5'd18 -`define MULHU 5'd19 -`define DIV 5'd20 -`define DIVU 5'd21 -`define REM 5'd22 -`define REMU 5'd23 - - - -// WRITEBACK -`define WB_ALU 2'h1 -`define WB_MEM 2'h2 -`define WB_JAL 2'h3 -`define NO_WB 2'h0 - - -// JAL -`define JUMP 1'h1 -`define NO_JUMP 1'h0 - -// STALLS -`define STALL 1'h1 -`define NO_STALL 1'h0 - - -`define TAKEN 1'b1 -`define NOT_TAKEN 1'b0 - - -`define ZERO_REG 5'h0 - -`define CLOG2(x) \ - (x <= 2) ? 1 : \ - (x <= 4) ? 2 : \ - (x <= 8) ? 3 : \ - (x <= 16) ? 4 : \ - (x <= 32) ? 5 : \ - (x <= 64) ? 6 : \ - (x <= 128) ? 7 : \ - (x <= 256) ? 8 : \ - (x <= 512) ? 9 : \ - (x <= 1024) ? 10 : \ - -199 - - -// `define PARAM - -// oooooo - -//Cache configurations -//Cache configurations - //Bytes -`define ICACHE_SIZE 4096 -`define ICACHE_WAYS 2 -//Bytes -`define ICACHE_BLOCK 64 -`define ICACHE_BANKS 4 -`define ICACHE_LOG_NUM_BANKS `CLOG2(`ICACHE_BANKS) - -`define ICACHE_NUM_WORDS_PER_BLOCK (`ICACHE_BLOCK / (`ICACHE_BANKS * 4)) -`define ICACHE_NUM_REQ 1 -`define ICACHE_LOG_NUM_REQ `CLOG2(`ICACHE_NUM_REQ) - - //set this to 1 if CACHE_WAYS is 1 -`define ICACHE_WAY_INDEX `CLOG2(`ICACHE_WAYS) -//`define ICACHE_WAY_INDEX 1 -`define ICACHE_BLOCK_PER_BANK (`ICACHE_BLOCK / `ICACHE_BANKS) - -// Offset -`define ICACHE_OFFSET_NB (`CLOG2(`ICACHE_NUM_WORDS_PER_BLOCK)) - -`define ICACHE_ADDR_OFFSET_ST (2+$clog2(`ICACHE_BANKS)) -`define ICACHE_ADDR_OFFSET_ED (`ICACHE_ADDR_OFFSET_ST+(`ICACHE_OFFSET_NB)-1) - - -`define ICACHE_ADDR_OFFSET_RNG `ICACHE_ADDR_OFFSET_ED:`ICACHE_ADDR_OFFSET_ST -`define ICACHE_OFFSET_SIZE_RNG (`CLOG2(`ICACHE_NUM_WORDS_PER_BLOCK)-1):0 -`define ICACHE_OFFSET_ST 0 -`define ICACHE_OFFSET_ED ($clog2(`ICACHE_NUM_WORDS_PER_BLOCK)-1) - -// Index -// `define ICACHE_NUM_IND (`ICACHE_SIZE / (`ICACHE_WAYS * `ICACHE_BLOCK_PER_BANK)) -`define ICACHE_NUM_IND (`ICACHE_SIZE / (`ICACHE_WAYS * `ICACHE_BLOCK)) -`define ICACHE_IND_NB ($clog2(`ICACHE_NUM_IND)) - -`define ICACHE_IND_ST (`ICACHE_ADDR_OFFSET_ED+1) -`define ICACHE_IND_ED (`ICACHE_IND_ST+`ICACHE_IND_NB-1) - -`define ICACHE_ADDR_IND_RNG `ICACHE_IND_ED:`ICACHE_IND_ST -`define ICACHE_IND_SIZE_RNG `ICACHE_IND_NB-1:0 - -`define ICACHE_IND_SIZE_START 0 -`define ICACHE_IND_SIZE_END `ICACHE_IND_NB-1 - - -// Tag -`define ICACHE_ADDR_TAG_RNG 31:(`ICACHE_IND_ED+1) -`define ICACHE_TAG_SIZE_RNG (32-(`ICACHE_IND_ED+1)-1):0 -`define ICACHE_TAG_SIZE_START 0 -`define ICACHE_TAG_SIZE_END (32-(`ICACHE_IND_ED+1)-1) -`define ICACHE_ADDR_TAG_START (`ICACHE_IND_ED+1) -`define ICACHE_ADDR_TAG_END 31 - -//Cache configurations -//Bytes -`define DCACHE_SIZE 4096 -`define DCACHE_WAYS 2 - -//Bytes -`define DCACHE_BLOCK 64 -`define DCACHE_BANKS 4 -`define DCACHE_LOG_NUM_BANKS $clog2(`DCACHE_BANKS) -`define DCACHE_NUM_WORDS_PER_BLOCK (`DCACHE_BLOCK / (`DCACHE_BANKS * 4)) -`define DCACHE_NUM_REQ `NT -`define DCACHE_LOG_NUM_REQ $clog2(`DCACHE_NUM_REQ) - -//set this to 1 if CACHE_WAYS is 1 -`define DCACHE_WAY_INDEX $clog2(`DCACHE_WAYS) -//`define DCACHE_WAY_INDEX 1 -`define DCACHE_BLOCK_PER_BANK (`DCACHE_BLOCK / `DCACHE_BANKS) - -// Offset -`define DCACHE_OFFSET_NB ($clog2(`DCACHE_NUM_WORDS_PER_BLOCK)) - -`define DCACHE_ADDR_OFFSET_ST (2+$clog2(`DCACHE_BANKS)) -`define DCACHE_ADDR_OFFSET_ED (`DCACHE_ADDR_OFFSET_ST+(`DCACHE_OFFSET_NB)-1) - - -`define DCACHE_ADDR_OFFSET_RNG `DCACHE_ADDR_OFFSET_ED:`DCACHE_ADDR_OFFSET_ST -`define DCACHE_OFFSET_SIZE_RNG ($clog2(`DCACHE_NUM_WORDS_PER_BLOCK)-1):0 -`define DCACHE_OFFSET_ST 0 -`define DCACHE_OFFSET_ED ($clog2(`DCACHE_NUM_WORDS_PER_BLOCK)-1) - -// Index -// `define DCACHE_NUM_IND (`DCACHE_SIZE / (`DCACHE_WAYS * `DCACHE_BLOCK_PER_BANK)) -`define DCACHE_NUM_IND (`DCACHE_SIZE / (`DCACHE_WAYS * `DCACHE_BLOCK)) -`define DCACHE_IND_NB ($clog2(`DCACHE_NUM_IND)) - -`define DCACHE_IND_ST (`DCACHE_ADDR_OFFSET_ED+1) -`define DCACHE_IND_ED (`DCACHE_IND_ST+`DCACHE_IND_NB-1) - -`define DCACHE_ADDR_IND_RNG `DCACHE_IND_ED:`DCACHE_IND_ST -`define DCACHE_IND_SIZE_RNG `DCACHE_IND_NB-1:0 - -`define DCACHE_IND_SIZE_START 0 -`define DCACHE_IND_SIZE_END `DCACHE_IND_NB-1 - - -// Tag -`define DCACHE_ADDR_TAG_RNG 31:(`DCACHE_IND_ED+1) -`define DCACHE_TAG_SIZE_RNG (32-(`DCACHE_IND_ED+1)-1):0 -`define DCACHE_TAG_SIZE_START 0 -`define DCACHE_TAG_SIZE_END (32-(`DCACHE_IND_ED+1)-1) -`define DCACHE_ADDR_TAG_START (`DCACHE_IND_ED+1) -`define DCACHE_ADDR_TAG_END 31 - -// Mask -`define DCACHE_MEM_REQ_ADDR_MASK (32'hffffffff - (`DCACHE_BLOCK-1)) -`define ICACHE_MEM_REQ_ADDR_MASK (32'hffffffff - (`ICACHE_BLOCK-1)) - - - -/////// - -//`define SHARED_MEMORY_SIZE 4096 -`define SHARED_MEMORY_SIZE 8192 -`define SHARED_MEMORY_BANKS 4 -//`define SHARED_MEMORY_BYTES_PER_READ 16 -//`define SHARED_MEMORY_HEIGHT ((`SHARED_MEMORY_SIZE) / (`SHARED_MEMORY_BANKS * `SHARED_MEMORY_BYTES_PER_READ)) - -//`define SHARED_MEMORY_SIZE 16384 -//`define SHARED_MEMORY_BANKS 8 -`define SHARED_MEMORY_BYTES_PER_READ 16 -//`define SHARED_MEMORY_BITS_PER_BANK 3 -`define SHARED_MEMORY_BITS_PER_BANK `CLOG2(`SHARED_MEMORY_BANKS) -`define SHARED_MEMORY_NUM_REQ `NT -`define SHARED_MEMORY_WORDS_PER_READ (`SHARED_MEMORY_BYTES_PER_READ / 4) -`define SHARED_MEMORY_LOG_WORDS_PER_READ $clog2(`SHARED_MEMORY_WORDS_PER_READ) -`define SHARED_MEMORY_HEIGHT ((`SHARED_MEMORY_SIZE) / (`SHARED_MEMORY_BANKS * `SHARED_MEMORY_BYTES_PER_READ)) - -`define SHARED_MEMORY_BANK_OFFSET_ST (2) -`define SHARED_MEMORY_BANK_OFFSET_ED (2+$clog2(`SHARED_MEMORY_BANKS)-1) -`define SHARED_MEMORY_BLOCK_OFFSET_ST (`SHARED_MEMORY_BANK_OFFSET_ED + 1) -`define SHARED_MEMORY_BLOCK_OFFSET_ED (`SHARED_MEMORY_BLOCK_OFFSET_ST +`SHARED_MEMORY_LOG_WORDS_PER_READ-1) -`define SHARED_MEMORY_INDEX_OFFSET_ST (`SHARED_MEMORY_BLOCK_OFFSET_ED + 1) -`define SHARED_MEMORY_INDEX_OFFSET_ED (`SHARED_MEMORY_INDEX_OFFSET_ST + $clog2(`SHARED_MEMORY_HEIGHT)-1) diff --git a/hw/old_rtl/VX_define_synth.v b/hw/old_rtl/VX_define_synth.v deleted file mode 100644 index 0444fe94..00000000 --- a/hw/old_rtl/VX_define_synth.v +++ /dev/null @@ -1,2 +0,0 @@ -`define NT 4 -`define NW 8 diff --git a/hw/old_rtl/VX_dmem_controller.v b/hw/old_rtl/VX_dmem_controller.v deleted file mode 100644 index 39d10b64..00000000 --- a/hw/old_rtl/VX_dmem_controller.v +++ /dev/null @@ -1,188 +0,0 @@ - -`include "VX_define.v" - -module VX_dmem_controller ( - input wire clk, - input wire reset, - // MEM-RAM - VX_dram_req_rsp_inter VX_dram_req_rsp, - VX_dram_req_rsp_inter VX_dram_req_rsp_icache, - // MEM-Processor - VX_icache_request_inter VX_icache_req, - VX_icache_response_inter VX_icache_rsp, - VX_dcache_request_inter VX_dcache_req, - VX_dcache_response_inter VX_dcache_rsp -); - - - wire to_shm = VX_dcache_req.out_cache_driver_in_address[0][31:24] == 8'hFF; - - wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{to_shm}}; - wire[`NT_M1:0] cache_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{~to_shm}}; - - wire read_or_write = (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|cache_driver_in_valid); - - - - wire[`NT_M1:0][31:0] cache_driver_in_address = VX_dcache_req.out_cache_driver_in_address; - wire[2:0] cache_driver_in_mem_read = !(|cache_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.out_cache_driver_in_mem_read; - wire[2:0] cache_driver_in_mem_write = !(|cache_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.out_cache_driver_in_mem_write; - wire[`NT_M1:0][31:0] cache_driver_in_data = VX_dcache_req.out_cache_driver_in_data; - - - wire[2:0] sm_driver_in_mem_read = !(|sm_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.out_cache_driver_in_mem_read; - wire[2:0] sm_driver_in_mem_write = !(|sm_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.out_cache_driver_in_mem_write; - - - wire[`NT_M1:0][31:0] cache_driver_out_data; - wire[`NT_M1:0][31:0] sm_driver_out_data; - wire[`NT_M1:0] cache_driver_out_valid; // Not used for now - wire sm_delay; - wire cache_delay; - - - // I_Cache Signals - - wire[31:0] icache_instruction_out; - wire icache_delay; - wire icache_driver_in_valid = VX_icache_req.out_cache_driver_in_valid; - wire[31:0] icache_driver_in_address = VX_icache_req.pc_address; - wire[2:0] icache_driver_in_mem_read = !(|icache_driver_in_valid) ? `NO_MEM_READ : VX_icache_req.out_cache_driver_in_mem_read; - wire[2:0] icache_driver_in_mem_write = !(|icache_driver_in_valid) ? `NO_MEM_WRITE : VX_icache_req.out_cache_driver_in_mem_write; - wire[31:0] icache_driver_in_data = VX_icache_req.out_cache_driver_in_data; - wire read_or_write_ic = (VX_icache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|icache_driver_in_valid); - - wire valid_read_cache = !cache_delay && cache_driver_in_valid[0]; - - - VX_shared_memory #( - .SM_SIZE (`SHARED_MEMORY_SIZE), - .SM_BANKS (`SHARED_MEMORY_BANKS), - .SM_BYTES_PER_READ (`SHARED_MEMORY_BYTES_PER_READ), - .SM_WORDS_PER_READ (`SHARED_MEMORY_WORDS_PER_READ), - .SM_LOG_WORDS_PER_READ (`SHARED_MEMORY_LOG_WORDS_PER_READ), - .SM_BANK_OFFSET_START (`SHARED_MEMORY_BANK_OFFSET_ST), - .SM_BANK_OFFSET_END (`SHARED_MEMORY_BANK_OFFSET_ED), - .SM_BLOCK_OFFSET_START (`SHARED_MEMORY_BLOCK_OFFSET_ST), - .SM_BLOCK_OFFSET_END (`SHARED_MEMORY_BLOCK_OFFSET_ED), - .SM_INDEX_START (`SHARED_MEMORY_INDEX_OFFSET_ST), - .SM_INDEX_END (`SHARED_MEMORY_INDEX_OFFSET_ED), - .SM_HEIGHT (`SHARED_MEMORY_HEIGHT), - .NUM_REQ (`SHARED_MEMORY_NUM_REQ), - .BITS_PER_BANK (`SHARED_MEMORY_BITS_PER_BANK) - ) - shared_memory - ( - .clk (clk), - .reset (reset), - .in_valid (sm_driver_in_valid), - .in_address(cache_driver_in_address), - .in_data (cache_driver_in_data), - .mem_read (sm_driver_in_mem_read), - .mem_write (sm_driver_in_mem_write), - .out_valid (cache_driver_out_valid), - .out_data (sm_driver_out_data), - .stall (sm_delay) - ); - - - VX_d_cache#( - .CACHE_SIZE (`DCACHE_SIZE), - .CACHE_WAYS (`DCACHE_WAYS), - .CACHE_BLOCK (`DCACHE_BLOCK), - .CACHE_BANKS (`DCACHE_BANKS), - .LOG_NUM_BANKS (`DCACHE_LOG_NUM_BANKS), - .NUM_REQ (`DCACHE_NUM_REQ), - .LOG_NUM_REQ (`DCACHE_LOG_NUM_REQ), - .NUM_IND (`DCACHE_NUM_IND), - .CACHE_WAY_INDEX (`DCACHE_WAY_INDEX), - .NUM_WORDS_PER_BLOCK (`DCACHE_NUM_WORDS_PER_BLOCK), - .OFFSET_SIZE_START (`DCACHE_OFFSET_ST), - .OFFSET_SIZE_END (`DCACHE_OFFSET_ED), - .TAG_SIZE_START (`DCACHE_TAG_SIZE_START), - .TAG_SIZE_END (`DCACHE_TAG_SIZE_END), - .IND_SIZE_START (`DCACHE_IND_SIZE_START), - .IND_SIZE_END (`DCACHE_IND_SIZE_END), - .ADDR_TAG_START (`DCACHE_ADDR_TAG_START), - .ADDR_TAG_END (`DCACHE_ADDR_TAG_END), - .ADDR_OFFSET_START (`DCACHE_ADDR_OFFSET_ST), - .ADDR_OFFSET_END (`DCACHE_ADDR_OFFSET_ED), - .ADDR_IND_START (`DCACHE_IND_ST), - .ADDR_IND_END (`DCACHE_IND_ED), - .MEM_ADDR_REQ_MASK (`DCACHE_MEM_REQ_ADDR_MASK) - ) - dcache - ( - .clk (clk), - .rst (reset), - .i_p_valid (cache_driver_in_valid), - .i_p_addr (cache_driver_in_address), - .i_p_writedata (cache_driver_in_data), - .i_p_read_or_write (read_or_write), - .i_p_mem_read (cache_driver_in_mem_read), - .i_p_mem_write (cache_driver_in_mem_write), - .o_p_readdata (cache_driver_out_data), - .o_p_delay (cache_delay), - .o_m_evict_addr (VX_dram_req_rsp.o_m_evict_addr), - .o_m_read_addr (VX_dram_req_rsp.o_m_read_addr), - .o_m_valid (VX_dram_req_rsp.o_m_valid), - .o_m_writedata (VX_dram_req_rsp.o_m_writedata), - .o_m_read_or_write (VX_dram_req_rsp.o_m_read_or_write), - .i_m_readdata (VX_dram_req_rsp.i_m_readdata), - .i_m_ready (VX_dram_req_rsp.i_m_ready) - ); - - -VX_d_cache#( - .CACHE_SIZE (`ICACHE_SIZE), - .CACHE_WAYS (`ICACHE_WAYS), - .CACHE_BLOCK (`ICACHE_BLOCK), - .CACHE_BANKS (`ICACHE_BANKS), - .LOG_NUM_BANKS (`ICACHE_LOG_NUM_BANKS), - .NUM_REQ (`ICACHE_NUM_REQ), - .LOG_NUM_REQ (`ICACHE_LOG_NUM_REQ), - .NUM_IND (`ICACHE_NUM_IND), - .CACHE_WAY_INDEX (`ICACHE_WAY_INDEX), - .NUM_WORDS_PER_BLOCK (`ICACHE_NUM_WORDS_PER_BLOCK), - .OFFSET_SIZE_START (`ICACHE_OFFSET_ST), - .OFFSET_SIZE_END (`ICACHE_OFFSET_ED), - .TAG_SIZE_START (`ICACHE_TAG_SIZE_START), - .TAG_SIZE_END (`ICACHE_TAG_SIZE_END), - .IND_SIZE_START (`ICACHE_IND_SIZE_START), - .IND_SIZE_END (`ICACHE_IND_SIZE_END), - .ADDR_TAG_START (`ICACHE_ADDR_TAG_START), - .ADDR_TAG_END (`ICACHE_ADDR_TAG_END), - .ADDR_OFFSET_START (`ICACHE_ADDR_OFFSET_ST), - .ADDR_OFFSET_END (`ICACHE_ADDR_OFFSET_ED), - .ADDR_IND_START (`ICACHE_IND_ST), - .ADDR_IND_END (`ICACHE_IND_ED), - .MEM_ADDR_REQ_MASK (`ICACHE_MEM_REQ_ADDR_MASK) - ) icache - ( - .clk (clk), - .rst (reset), - .i_p_valid (icache_driver_in_valid), - .i_p_addr (icache_driver_in_address), - .i_p_writedata (icache_driver_in_data), - .i_p_read_or_write (read_or_write_ic), - .i_p_mem_read (icache_driver_in_mem_read), - .i_p_mem_write (icache_driver_in_mem_write), - .o_p_readdata (icache_instruction_out), - .o_p_delay (icache_delay), - .o_m_evict_addr (VX_dram_req_rsp_icache.o_m_evict_addr), - .o_m_read_addr (VX_dram_req_rsp_icache.o_m_read_addr), - .o_m_valid (VX_dram_req_rsp_icache.o_m_valid), - .o_m_writedata (VX_dram_req_rsp_icache.o_m_writedata), - .o_m_read_or_write (VX_dram_req_rsp_icache.o_m_read_or_write), - .i_m_readdata (VX_dram_req_rsp_icache.i_m_readdata), - .i_m_ready (VX_dram_req_rsp_icache.i_m_ready) - ); - - assign VX_dcache_rsp.in_cache_driver_out_data = to_shm ? sm_driver_out_data : cache_driver_out_data; - assign VX_dcache_rsp.delay = sm_delay || cache_delay; - - assign VX_icache_rsp.instruction = icache_instruction_out; - assign VX_icache_rsp.delay = icache_delay; - - -endmodule diff --git a/hw/old_rtl/VX_execute_unit.v b/hw/old_rtl/VX_execute_unit.v deleted file mode 100644 index c64c1181..00000000 --- a/hw/old_rtl/VX_execute_unit.v +++ /dev/null @@ -1,168 +0,0 @@ -`include "VX_define.v" - -module VX_execute_unit ( - input wire clk, - input wire reset, - // Request - VX_exec_unit_req_inter VX_exec_unit_req, - - // Output - // Writeback - VX_inst_exec_wb_inter VX_inst_exec_wb, - // JAL Response - VX_jal_response_inter VX_jal_rsp, - // Branch Response - VX_branch_response_inter VX_branch_rsp -); - - - - wire[`NT_M1:0][31:0] in_a_reg_data; - wire[`NT_M1:0][31:0] in_b_reg_data; - wire[4:0] in_alu_op; - wire in_rs2_src; - wire[31:0] in_itype_immed; - wire[2:0] in_branch_type; - wire[19:0] in_upper_immed; - wire in_jal; - wire[31:0] in_jal_offset; - wire[31:0] in_curr_PC; - - assign in_a_reg_data = VX_exec_unit_req.a_reg_data; - assign in_b_reg_data = VX_exec_unit_req.b_reg_data; - assign in_alu_op = VX_exec_unit_req.alu_op; - assign in_rs2_src = VX_exec_unit_req.rs2_src; - assign in_itype_immed = VX_exec_unit_req.itype_immed; - assign in_branch_type = VX_exec_unit_req.branch_type; - assign in_upper_immed = VX_exec_unit_req.upper_immed; - assign in_jal = VX_exec_unit_req.jal; - assign in_jal_offset = VX_exec_unit_req.jal_offset; - assign in_curr_PC = VX_exec_unit_req.curr_PC; - - - wire[`NT_M1:0][31:0] alu_result; - genvar index_out_reg; - generate - for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) - begin - VX_alu vx_alu( - // .in_reg_data (in_reg_data[1:0]), - .in_1 (in_a_reg_data[index_out_reg]), - .in_2 (in_b_reg_data[index_out_reg]), - .in_rs2_src (in_rs2_src), - .in_itype_immed(in_itype_immed), - .in_upper_immed(in_upper_immed), - .in_alu_op (in_alu_op), - .in_curr_PC (in_curr_PC), - .out_alu_result(alu_result[index_out_reg]) - ); - end - endgenerate - - - wire [$clog2(`NT)-1:0] jal_branch_use_index; - wire jal_branch_found_valid; - VX_generic_priority_encoder #(.N(`NT)) choose_alu_result( - .valids(VX_exec_unit_req.valid), - .index (jal_branch_use_index), - .found (jal_branch_found_valid) - ); - - wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index]; - - reg temp_branch_dir; - always @(*) - begin - case(VX_exec_unit_req.branch_type) - `BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN; - `BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN; - `BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN; - `BGT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN; - `BLTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN; - `BGTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN; - `NO_BRANCH: temp_branch_dir = `NOT_TAKEN; - default: temp_branch_dir = `NOT_TAKEN; - endcase // in_branch_type - end - - - wire[`NT_M1:0][31:0] duplicate_PC_data; - genvar i; - generate - for (i = 0; i < `NT; i=i+1) - begin - assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next; - end - endgenerate - - - // VX_inst_exec_wb_inter VX_inst_exec_wb_temp(); - // JAL Response - VX_jal_response_inter VX_jal_rsp_temp(); - // Branch Response - VX_branch_response_inter VX_branch_rsp_temp(); - - // Actual Writeback - assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd; - assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb; - assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid; - assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num; - assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result; - - assign VX_inst_exec_wb.exec_wb_pc = in_curr_PC; - // Jal rsp - assign VX_jal_rsp_temp.jal = in_jal; - assign VX_jal_rsp_temp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset); - assign VX_jal_rsp_temp.jal_warp_num = VX_exec_unit_req.warp_num; - - // Branch rsp - assign VX_branch_rsp_temp.valid_branch = (VX_exec_unit_req.branch_type != `NO_BRANCH) && (|VX_exec_unit_req.valid); - assign VX_branch_rsp_temp.branch_dir = temp_branch_dir; - assign VX_branch_rsp_temp.branch_warp_num = VX_exec_unit_req.warp_num; - assign VX_branch_rsp_temp.branch_dest = $signed(VX_exec_unit_req.curr_PC) + ($signed(VX_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset - - - wire zero = 0; - - // VX_generic_register #(.N(174)) exec_reg( - // .clk (clk), - // .reset(reset), - // .stall(zero), - // .flush(zero), - // .in ({VX_inst_exec_wb_temp.rd, VX_inst_exec_wb_temp.wb, VX_inst_exec_wb_temp.wb_valid, VX_inst_exec_wb_temp.wb_warp_num, VX_inst_exec_wb_temp.alu_result, VX_inst_exec_wb_temp.exec_wb_pc}), - // .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc }) - // ); - - VX_generic_register #(.N(33 + `NW_M1 + 1)) jal_reg( - .clk (clk), - .reset(reset), - .stall(zero), - .flush(zero), - .in ({VX_jal_rsp_temp.jal, VX_jal_rsp_temp.jal_dest, VX_jal_rsp_temp.jal_warp_num}), - .out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num}) - ); - - VX_generic_register #(.N(34 + `NW_M1 + 1)) branch_reg( - .clk (clk), - .reset(reset), - .stall(zero), - .flush(zero), - .in ({VX_branch_rsp_temp.valid_branch, VX_branch_rsp_temp.branch_dir, VX_branch_rsp_temp.branch_warp_num, VX_branch_rsp_temp.branch_dest}), - .out ({VX_branch_rsp.valid_branch , VX_branch_rsp.branch_dir , VX_branch_rsp.branch_warp_num , VX_branch_rsp.branch_dest }) - ); - - // always @(*) begin - // case(in_alu_op) - // `CSR_ALU_RW: out_csr_result = in_csr_mask; - // `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask; - // `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask); - // default: out_csr_result = 32'hdeadbeef; - // endcase - - // end - - - // assign out_is_csr = VX_exec_unit_req.is_csr; - // assign out_csr_address = VX_exec_unit_req.csr_address; - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_fetch.v b/hw/old_rtl/VX_fetch.v deleted file mode 100644 index d71df00f..00000000 --- a/hw/old_rtl/VX_fetch.v +++ /dev/null @@ -1,103 +0,0 @@ - -`include "VX_define.v" - -module VX_fetch ( - input wire clk, - input wire reset, - VX_wstall_inter VX_wstall, - VX_join_inter VX_join, - input wire schedule_delay, - VX_icache_response_inter icache_response, - VX_icache_request_inter icache_request, - - output wire out_ebreak, - VX_jal_response_inter VX_jal_rsp, - VX_branch_response_inter VX_branch_rsp, - VX_inst_meta_inter fe_inst_meta_fd, - VX_warp_ctl_inter VX_warp_ctl -); - - // Locals - wire pipe_stall; - - - assign pipe_stall = schedule_delay || icache_response.delay; - - wire[`NT_M1:0] thread_mask; - wire[`NW_M1:0] warp_num; - wire[31:0] warp_pc; - wire scheduled_warp; - VX_warp_scheduler warp_scheduler( - .clk (clk), - .reset (reset), - .stall (pipe_stall), - - .is_barrier (VX_warp_ctl.is_barrier), - .barrier_id (VX_warp_ctl.barrier_id), - .num_warps (VX_warp_ctl.num_warps), - .barrier_warp_num (VX_warp_ctl.warp_num), - - // Wspawn - .wspawn (VX_warp_ctl.wspawn), - .wsapwn_pc (VX_warp_ctl.wspawn_pc), - .wspawn_new_active(VX_warp_ctl.wspawn_new_active), - // CTM - .ctm (VX_warp_ctl.change_mask), - .ctm_mask (VX_warp_ctl.thread_mask), - .ctm_warp_num (VX_warp_ctl.warp_num), - // WHALT - .whalt (VX_warp_ctl.ebreak), - .whalt_warp_num (VX_warp_ctl.warp_num), - // Wstall - .wstall (VX_wstall.wstall), - .wstall_warp_num (VX_wstall.warp_num), - - // Join - .is_join (VX_join.is_join), - .join_warp_num (VX_join.join_warp_num), - - // Split - .is_split (VX_warp_ctl.is_split), - .dont_split (VX_warp_ctl.dont_split), - .split_new_mask (VX_warp_ctl.split_new_mask), - .split_later_mask (VX_warp_ctl.split_later_mask), - .split_save_pc (VX_warp_ctl.split_save_pc), - .split_warp_num (VX_warp_ctl.warp_num), - - // JAL - .jal (VX_jal_rsp.jal), - .jal_dest (VX_jal_rsp.jal_dest), - .jal_warp_num (VX_jal_rsp.jal_warp_num), - - // Branch - .branch_valid (VX_branch_rsp.valid_branch), - .branch_dir (VX_branch_rsp.branch_dir), - .branch_dest (VX_branch_rsp.branch_dest), - .branch_warp_num (VX_branch_rsp.branch_warp_num), - - // Outputs - .thread_mask (thread_mask), - .warp_num (warp_num), - .warp_pc (warp_pc), - .out_ebreak (out_ebreak), - .scheduled_warp (scheduled_warp) - ); - - // always @(*) begin - // $display("Inside verilog instr: %h, pc: %h", icache_response.instruction, warp_pc); - // end - - assign icache_request.pc_address = warp_pc; - assign icache_request.out_cache_driver_in_valid = !schedule_delay && scheduled_warp; - assign icache_request.out_cache_driver_in_mem_read = `LW_MEM_READ; - assign icache_request.out_cache_driver_in_mem_write = `NO_MEM_WRITE; - assign icache_request.out_cache_driver_in_data = 32'b0; - - assign fe_inst_meta_fd.warp_num = warp_num; - assign fe_inst_meta_fd.valid = thread_mask; - - assign fe_inst_meta_fd.instruction = (thread_mask == 0) ? 32'b0 : icache_response.instruction; - assign fe_inst_meta_fd.inst_pc = warp_pc; - - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_front_end.v b/hw/old_rtl/VX_front_end.v deleted file mode 100644 index eaf5e8c9..00000000 --- a/hw/old_rtl/VX_front_end.v +++ /dev/null @@ -1,89 +0,0 @@ -`include "VX_define.v" - -module VX_front_end ( - input wire clk, - input wire reset, - - input wire schedule_delay, - - VX_warp_ctl_inter VX_warp_ctl, - - VX_icache_response_inter icache_response_fe, - VX_icache_request_inter icache_request_fe, - - VX_jal_response_inter VX_jal_rsp, - VX_branch_response_inter VX_branch_rsp, - - VX_frE_to_bckE_req_inter VX_bckE_req, - - output wire fetch_ebreak -); - - -VX_inst_meta_inter fe_inst_meta_fd(); - -VX_frE_to_bckE_req_inter VX_frE_to_bckE_req(); -VX_inst_meta_inter fd_inst_meta_de(); - -wire total_freeze = schedule_delay; - -/* verilator lint_off UNUSED */ -// wire real_fetch_ebreak; -/* verilator lint_on UNUSED */ - -wire vortex_ebreak; -wire terminate_sim; - -assign fetch_ebreak = vortex_ebreak || terminate_sim; - - -VX_wstall_inter VX_wstall(); -VX_join_inter VX_join(); - -VX_fetch vx_fetch( - .clk (clk), - .reset (reset), - .VX_wstall (VX_wstall), - .VX_join (VX_join), - .schedule_delay (schedule_delay), - .VX_jal_rsp (VX_jal_rsp), - .icache_response (icache_response_fe), - .VX_warp_ctl (VX_warp_ctl), - - .icache_request (icache_request_fe), - .VX_branch_rsp (VX_branch_rsp), - .out_ebreak (vortex_ebreak), // fetch_ebreak - .fe_inst_meta_fd (fe_inst_meta_fd) - ); - -VX_f_d_reg vx_f_d_reg( - .clk (clk), - .reset (reset), - .in_freeze (total_freeze), - .fe_inst_meta_fd(fe_inst_meta_fd), - .fd_inst_meta_de(fd_inst_meta_de) - ); - - -VX_decode vx_decode( - .fd_inst_meta_de (fd_inst_meta_de), - .VX_frE_to_bckE_req(VX_frE_to_bckE_req), - .VX_wstall (VX_wstall), - .VX_join (VX_join), - .terminate_sim (terminate_sim) - ); - -wire no_br_stall = 0; - -VX_d_e_reg vx_d_e_reg( - .clk (clk), - .reset (reset), - .in_branch_stall(no_br_stall), - .in_freeze (total_freeze), - .VX_frE_to_bckE_req(VX_frE_to_bckE_req), - .VX_bckE_req (VX_bckE_req) - ); - -endmodule - - diff --git a/hw/old_rtl/VX_generic_priority_encoder.v b/hw/old_rtl/VX_generic_priority_encoder.v deleted file mode 100644 index 6bef1a4f..00000000 --- a/hw/old_rtl/VX_generic_priority_encoder.v +++ /dev/null @@ -1,27 +0,0 @@ -`include "../VX_define.v" - -module VX_generic_priority_encoder - #( - parameter N = 1 - ) - ( - input wire[N-1:0] valids, - //output reg[$clog2(N)-1:0] index, - output reg[(`CLOG2(N))-1:0] index, - //output reg[`CLOG2(N):0] index, // eh - output reg found - ); - - integer i; - always @(*) begin - index = 0; - found = 0; - for (i = N-1; i >= 0; i = i - 1) begin - if (valids[i]) begin - //index = i[$clog2(N)-1:0]; - index = i[(`CLOG2(N))-1:0]; - found = 1; - end - end - end -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_generic_register.v b/hw/old_rtl/VX_generic_register.v deleted file mode 100644 index 7a1a023d..00000000 --- a/hw/old_rtl/VX_generic_register.v +++ /dev/null @@ -1,34 +0,0 @@ - - -module VX_generic_register - #( - parameter N = 1 - ) - ( - input clk, - input reset, - input stall, - input flush, - input[N-1:0] in, - output [N-1:0] out - ); - - - reg[N-1:0] value; - - - - always @(posedge clk or posedge reset) begin - if (reset) begin - value <= 0; - end else if (flush) begin - value <= 0; - end else if (~stall) begin - value <= in; - end - end - - - assign out = value; - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_generic_stack.v b/hw/old_rtl/VX_generic_stack.v deleted file mode 100644 index cdac974f..00000000 --- a/hw/old_rtl/VX_generic_stack.v +++ /dev/null @@ -1,38 +0,0 @@ -module VX_generic_stack - #( - parameter WIDTH = 40, - parameter DEPTH = 2 - ) - ( - input wire clk, - input wire reset, - input wire push, - input wire pop, - input reg [WIDTH - 1:0] q1, - input reg [WIDTH - 1:0] q2, - output wire[WIDTH - 1:0] d - ); - - - reg [DEPTH - 1:0] ptr; - reg [WIDTH - 1:0] stack [0:(1 << DEPTH) - 1]; - - integer i; - always @(posedge clk) begin - if (reset) begin - ptr <= 0; - for (i = 0; i < (1 << DEPTH); i=i+1) stack[i] <= 0; - end else if (push) begin - stack[ptr] <= q1; - stack[ptr+1] <= q2; - ptr <= ptr + 2; - end else if (pop) begin - ptr <= ptr - 1; - end - - end - - - assign d = stack[ptr - 1]; - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_gpgpu_inst.v b/hw/old_rtl/VX_gpgpu_inst.v deleted file mode 100644 index 01a50515..00000000 --- a/hw/old_rtl/VX_gpgpu_inst.v +++ /dev/null @@ -1,85 +0,0 @@ -`include "VX_define.v" - -module VX_gpgpu_inst ( - // Input - VX_gpu_inst_req_inter VX_gpu_inst_req, - - // Output - VX_warp_ctl_inter VX_warp_ctl -); - - - wire[`NT_M1:0] curr_valids = VX_gpu_inst_req.valid; - wire is_split = (VX_gpu_inst_req.is_split); - - wire[`NT_M1:0] tmc_new_mask; - genvar curr_t; - for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) - begin - assign tmc_new_mask[curr_t] = curr_t < VX_gpu_inst_req.a_reg_data[0]; - end - - wire valid_inst = (|curr_valids); - - assign VX_warp_ctl.warp_num = VX_gpu_inst_req.warp_num; - assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc) && valid_inst; - assign VX_warp_ctl.thread_mask = VX_gpu_inst_req.is_tmc ? tmc_new_mask : 0; - - // assign VX_warp_ctl.ebreak = (VX_gpu_inst_req.a_reg_data[0] == 0) && valid_inst; - assign VX_warp_ctl.ebreak = VX_warp_ctl.change_mask && (VX_warp_ctl.thread_mask == 0); - - - wire wspawn = VX_gpu_inst_req.is_wspawn; - wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2; - wire[`NW-1:0] wspawn_new_active; - genvar curr_w; - for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) - begin - assign wspawn_new_active[curr_w] = curr_w < VX_gpu_inst_req.a_reg_data[0]; - end - - - assign VX_warp_ctl.is_barrier = VX_gpu_inst_req.is_barrier && valid_inst; - assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0]; - - wire[31:0] num_warps_m1 = VX_gpu_inst_req.rd2 - 1; - assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NW):0]; - - assign VX_warp_ctl.wspawn = wspawn; - assign VX_warp_ctl.wspawn_pc = wspawn_pc; - assign VX_warp_ctl.wspawn_new_active = wspawn_new_active; - - wire[`NT_M1:0] split_new_use_mask; - wire[`NT_M1:0] split_new_later_mask; - - // VX_gpu_inst_req.pc - genvar curr_s_t; - for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin - wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1); - - assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool); - assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool); - end - - wire[$clog2(`NT):0] num_valids; - - VX_countones #(.N(`NT)) valids_counter ( - .valids(curr_valids), - .count (num_valids) - ); - - // wire[`NW_M1:0] num_valids = $countones(curr_valids); - - - assign VX_warp_ctl.is_split = is_split && (num_valids > 1); - assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NT{1'b1}})); - assign VX_warp_ctl.split_new_mask = split_new_use_mask; - assign VX_warp_ctl.split_later_mask = split_new_later_mask; - assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next; - assign VX_warp_ctl.split_warp_num = VX_gpu_inst_req.warp_num; - - // VX_gpu_inst_req.is_wspawn - // VX_gpu_inst_req.is_split - // VX_gpu_inst_req.is_barrier - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_gpr.v b/hw/old_rtl/VX_gpr.v deleted file mode 100644 index 6f239c51..00000000 --- a/hw/old_rtl/VX_gpr.v +++ /dev/null @@ -1,172 +0,0 @@ - -`include "VX_define.v" - -module VX_gpr ( - input wire clk, - input wire reset, - input wire valid_write_request, - VX_gpr_read_inter VX_gpr_read, - VX_wb_inter VX_writeback_inter, - - output reg[`NT_M1:0][31:0] out_a_reg_data, - output reg[`NT_M1:0][31:0] out_b_reg_data -); - - - - wire write_enable; - - - `ifndef ASIC - assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0)) && (VX_writeback_inter.rd != 0); - - byte_enabled_simple_dual_port_ram first_ram( - .we (write_enable), - .clk (clk), - .reset (reset), - .waddr (VX_writeback_inter.rd), - .raddr1(VX_gpr_read.rs1), - .raddr2(VX_gpr_read.rs2), - .be (VX_writeback_inter.wb_valid), - .wdata (VX_writeback_inter.write_data), - .q1 (out_a_reg_data), - .q2 (out_b_reg_data) - ); - - `else - - assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0)); - - - wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid); - - - wire[`NT_M1:0][31:0] write_bit_mask; - - genvar curr_t; - for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin - wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t]; - assign write_bit_mask[curr_t] = {32{~local_write}}; - end - - - - // wire cenb = !going_to_write; - wire cenb = 0; - - // wire cena_1 = (VX_gpr_read.rs1 == 0); - // wire cena_2 = (VX_gpr_read.rs2 == 0); - wire cena_1 = 0; - wire cena_2 = 0; - - wire[`NT_M1:0][31:0] temp_a; - wire[`NT_M1:0][31:0] temp_b; - - - `ifndef SYN - genvar thread; - genvar curr_bit; - for (thread = 0; thread < `NT; thread = thread + 1) - begin - for (curr_bit = 0; curr_bit < 32; curr_bit=curr_bit+1) - begin - assign out_a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit]; - assign out_b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit]; - end - end - - `else - - assign out_a_reg_data = temp_a; - assign out_b_reg_data = temp_b; - - `endif - - - wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0; - - genvar curr_base_thread; - for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4) - begin - /* verilator lint_off PINCONNECTEMPTY */ - rf2_32x128_wm1 first_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(temp_a[(curr_base_thread+3):(curr_base_thread)]), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_1), - .AA(VX_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), - .AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]), - .DB(to_write[(curr_base_thread+3):(curr_base_thread)]), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - /* verilator lint_on PINCONNECTEMPTY */ - - /* verilator lint_off PINCONNECTEMPTY */ - rf2_32x128_wm1 second_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(temp_b[(curr_base_thread+3):(curr_base_thread)]), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_2), - .AA(VX_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), - .AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]), - .DB(to_write[(curr_base_thread+3):(curr_base_thread)]), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - /* verilator lint_on PINCONNECTEMPTY */ - end - - `endif - -endmodule diff --git a/hw/old_rtl/VX_gpr_stage.v b/hw/old_rtl/VX_gpr_stage.v deleted file mode 100644 index 22fea9d6..00000000 --- a/hw/old_rtl/VX_gpr_stage.v +++ /dev/null @@ -1,223 +0,0 @@ - -`include "VX_define.v" - -module VX_gpr_stage ( - input wire clk, - input wire reset, - input wire schedule_delay, - - input wire memory_delay, - input wire stall_gpr_csr, - output wire gpr_stage_delay, - - // inputs - // Instruction Information - VX_frE_to_bckE_req_inter VX_bckE_req, - - // WriteBack inputs - VX_wb_inter VX_writeback_inter, - - - - - // Outputs - VX_exec_unit_req_inter VX_exec_unit_req, - VX_lsu_req_inter VX_lsu_req, - VX_gpu_inst_req_inter VX_gpu_inst_req, - VX_csr_req_inter VX_csr_req -); - - - wire[31:0] curr_PC = VX_bckE_req.curr_PC; - wire[2:0] branchType = VX_bckE_req.branch_type; - - wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE); - wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ); - - - wire jalQual = VX_bckE_req.jalQual; - - VX_gpr_read_inter VX_gpr_read(); - assign VX_gpr_read.rs1 = VX_bckE_req.rs1; - assign VX_gpr_read.rs2 = VX_bckE_req.rs2; - assign VX_gpr_read.warp_num = VX_bckE_req.warp_num; - - `ifndef ASIC - VX_gpr_jal_inter VX_gpr_jal(); - assign VX_gpr_jal.is_jal = VX_bckE_req.jalQual; - assign VX_gpr_jal.curr_PC = VX_bckE_req.curr_PC; - `else - VX_gpr_jal_inter VX_gpr_jal(); - assign VX_gpr_jal.is_jal = VX_exec_unit_req.jalQual; - assign VX_gpr_jal.curr_PC = VX_exec_unit_req.curr_PC; - `endif - - - VX_gpr_data_inter VX_gpr_datf(); - - - VX_gpr_wrapper vx_grp_wrapper( - .clk (clk), - .reset (reset), - .VX_writeback_inter(VX_writeback_inter), - .VX_gpr_read (VX_gpr_read), - .VX_gpr_jal (VX_gpr_jal), - - .out_a_reg_data (VX_gpr_datf.a_reg_data), - .out_b_reg_data (VX_gpr_datf.b_reg_data) - ); - - // assign VX_bckE_req.is_csr = is_csr; - // assign VX_bckE_req_out.csr_mask = (VX_bckE_req.sr_immed == 1'b1) ? {27'h0, VX_bckE_req.rs1} : VX_gpr_data.a_reg_data[0]; - - // Outputs - VX_exec_unit_req_inter VX_exec_unit_req_temp(); - VX_lsu_req_inter VX_lsu_req_temp(); - VX_gpu_inst_req_inter VX_gpu_inst_req_temp(); - VX_csr_req_inter VX_csr_req_temp(); - - VX_inst_multiplex VX_inst_mult( - .VX_bckE_req (VX_bckE_req), - .VX_gpr_data (VX_gpr_datf), - .VX_exec_unit_req(VX_exec_unit_req_temp), - .VX_lsu_req (VX_lsu_req_temp), - .VX_gpu_inst_req (VX_gpu_inst_req_temp), - .VX_csr_req (VX_csr_req_temp) - ); - - wire is_lsu = (|VX_lsu_req_temp.valid); - - wire stall_rest = 0; - wire flush_rest = schedule_delay; - - - wire stall_lsu = memory_delay; - wire flush_lsu = schedule_delay && !stall_lsu; - - assign gpr_stage_delay = stall_lsu || (stall_gpr_csr && VX_bckE_req.is_csr && (|VX_bckE_req.valid)); - - `ifdef ASIC - wire delayed_lsu_last_cycle; - - VX_generic_register #(.N(1)) delayed_reg ( - .clk (clk), - .reset(reset), - .stall(stall_rest), - .flush(stall_rest), - .in (stall_lsu), - .out (delayed_lsu_last_cycle) - ); - - - wire[`NT_M1:0][31:0] temp_store_data; - wire[`NT_M1:0][31:0] temp_base_address; // A reg data - - wire[`NT_M1:0][31:0] real_store_data; - wire[`NT_M1:0][31:0] real_base_address; // A reg data - - wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu; - - VX_generic_register #(.N(`NT*32*2)) lsu_data( - .clk (clk), - .reset(reset), - .stall(!store_curr_real), - .flush(stall_rest), - .in ({real_store_data, real_base_address}), - .out ({temp_store_data, temp_base_address}) - ); - - assign real_store_data = VX_lsu_req_temp.store_data; - assign real_base_address = VX_lsu_req_temp.base_address; - - - assign VX_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data; - assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address; - - - VX_generic_register #(.N(77 + `NW_M1 + 1 + (`NT))) lsu_reg( - .clk (clk), - .reset(reset), - .stall(stall_lsu), - .flush(flush_lsu), - .in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.lsu_pc, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}), - .out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb }) - ); - - VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg( - .clk (clk), - .reset(reset), - .stall(stall_rest), - .flush(flush_rest), - .in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}), - .out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask }) - ); - - assign VX_exec_unit_req.a_reg_data = real_base_address; - assign VX_exec_unit_req.b_reg_data = real_store_data; - - VX_generic_register #(.N(36 + `NW_M1 + 1 + (`NT))) gpu_inst_reg( - .clk (clk), - .reset(reset), - .stall(stall_rest), - .flush(flush_rest), - .in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next}), - .out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next }) - ); - - assign VX_gpu_inst_req.a_reg_data = real_base_address; - assign VX_gpu_inst_req.rd2 = real_store_data; - - VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg( - .clk (clk), - .reset(reset), - .stall(stall_gpr_csr), - .flush(flush_rest), - .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), - .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) - ); - - - // assign - - `else - - // 341 - VX_generic_register #(.N(77 + `NW_M1 + 1 + 65*(`NT))) lsu_reg( - .clk (clk), - .reset(reset), - .stall(stall_lsu), - .flush(flush_lsu), - .in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.lsu_pc, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.store_data, VX_lsu_req_temp.base_address, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}), - .out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb }) - ); - - VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg( - .clk (clk), - .reset(reset), - .stall(stall_rest), - .flush(flush_rest), - .in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.a_reg_data, VX_exec_unit_req_temp.b_reg_data, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}), - .out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask }) - ); - - VX_generic_register #(.N(68 + `NW_M1 + 1 + 33*(`NT))) gpu_inst_reg( - .clk (clk), - .reset(reset), - .stall(stall_rest), - .flush(flush_rest), - .in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next, VX_gpu_inst_req_temp.a_reg_data, VX_gpu_inst_req_temp.rd2}), - .out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 }) - ); - - VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg( - .clk (clk), - .reset(reset), - .stall(stall_gpr_csr), - .flush(flush_rest), - .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), - .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) - ); - - `endif - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_gpr_wrapper.v b/hw/old_rtl/VX_gpr_wrapper.v deleted file mode 100644 index 2f2ec4e0..00000000 --- a/hw/old_rtl/VX_gpr_wrapper.v +++ /dev/null @@ -1,70 +0,0 @@ -`include "VX_define.v" - -module VX_gpr_wrapper ( - input wire clk, - input wire reset, - VX_gpr_read_inter VX_gpr_read, - VX_wb_inter VX_writeback_inter, - VX_gpr_jal_inter VX_gpr_jal, - - output wire[`NT_M1:0][31:0] out_a_reg_data, - output wire[`NT_M1:0][31:0] out_b_reg_data - -); - - wire[`NW-1:0][`NT_M1:0][31:0] temp_a_reg_data; - wire[`NW-1:0][`NT_M1:0][31:0] temp_b_reg_data; - - wire[`NT_M1:0][31:0] jal_data; - genvar index; - for (index = 0; index <= `NT_M1; index = index + 1) begin - assign jal_data[index] = VX_gpr_jal.curr_PC; - end - - - `ifndef ASIC - assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[VX_gpr_read.warp_num])); - assign out_b_reg_data = (temp_b_reg_data[VX_gpr_read.warp_num]); - `else - - wire zer = 0; - - wire[`NW_M1:0] old_warp_num; - VX_generic_register #(`NW_M1+1) store_wn( - .clk (clk), - .reset(reset), - .stall(zer), - .flush(zer), - .in (VX_gpr_read.warp_num), - .out (old_warp_num) - ); - - assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[old_warp_num])); - assign out_b_reg_data = (temp_b_reg_data[old_warp_num]); - - `endif - - genvar warp_index; - generate - - for (warp_index = 0; warp_index < `NW; warp_index = warp_index + 1) begin - - wire valid_write_request = warp_index == VX_writeback_inter.wb_warp_num; - VX_gpr vx_gpr( - .clk (clk), - .reset (reset), - .valid_write_request(valid_write_request), - .VX_gpr_read (VX_gpr_read), - .VX_writeback_inter (VX_writeback_inter), - .out_a_reg_data (temp_a_reg_data[warp_index]), - .out_b_reg_data (temp_b_reg_data[warp_index]) - ); - - end - - endgenerate - - -endmodule - - diff --git a/hw/old_rtl/VX_inst_multiplex.v b/hw/old_rtl/VX_inst_multiplex.v deleted file mode 100644 index 86da67de..00000000 --- a/hw/old_rtl/VX_inst_multiplex.v +++ /dev/null @@ -1,95 +0,0 @@ -`include "VX_define.v" - -module VX_inst_multiplex ( - // Inputs - VX_frE_to_bckE_req_inter VX_bckE_req, - VX_gpr_data_inter VX_gpr_data, - - // Outputs - VX_exec_unit_req_inter VX_exec_unit_req, - VX_lsu_req_inter VX_lsu_req, - VX_gpu_inst_req_inter VX_gpu_inst_req, - VX_csr_req_inter VX_csr_req -); - - wire[`NT_M1:0] is_mem_mask; - wire[`NT_M1:0] is_gpu_mask; - wire[`NT_M1:0] is_csr_mask; - - wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ); - wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split); - wire is_csr = VX_bckE_req.is_csr; - // wire is_gpu = 0; - - genvar currT; - for (currT = 0; currT < `NT; currT = currT + 1) begin - assign is_mem_mask[currT] = is_mem; - assign is_gpu_mask[currT] = is_gpu; - assign is_csr_mask[currT] = is_csr; - end - - // LSU Unit - assign VX_lsu_req.valid = VX_bckE_req.valid & is_mem_mask; - assign VX_lsu_req.warp_num = VX_bckE_req.warp_num; - assign VX_lsu_req.base_address = VX_gpr_data.a_reg_data; - assign VX_lsu_req.store_data = VX_gpr_data.b_reg_data; - - assign VX_lsu_req.offset = VX_bckE_req.itype_immed; - - assign VX_lsu_req.mem_read = VX_bckE_req.mem_read; - assign VX_lsu_req.mem_write = VX_bckE_req.mem_write; - assign VX_lsu_req.rd = VX_bckE_req.rd; - assign VX_lsu_req.wb = VX_bckE_req.wb; - assign VX_lsu_req.lsu_pc = VX_bckE_req.curr_PC; - - - // Execute Unit - assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask); - assign VX_exec_unit_req.warp_num = VX_bckE_req.warp_num; - assign VX_exec_unit_req.curr_PC = VX_bckE_req.curr_PC; - assign VX_exec_unit_req.PC_next = VX_bckE_req.PC_next; - assign VX_exec_unit_req.rd = VX_bckE_req.rd; - assign VX_exec_unit_req.wb = VX_bckE_req.wb; - assign VX_exec_unit_req.a_reg_data = VX_gpr_data.a_reg_data; - assign VX_exec_unit_req.b_reg_data = VX_gpr_data.b_reg_data; - assign VX_exec_unit_req.alu_op = VX_bckE_req.alu_op; - assign VX_exec_unit_req.rs1 = VX_bckE_req.rs1; - assign VX_exec_unit_req.rs2 = VX_bckE_req.rs2; - assign VX_exec_unit_req.rs2_src = VX_bckE_req.rs2_src; - assign VX_exec_unit_req.itype_immed = VX_bckE_req.itype_immed; - assign VX_exec_unit_req.upper_immed = VX_bckE_req.upper_immed; - assign VX_exec_unit_req.branch_type = VX_bckE_req.branch_type; - assign VX_exec_unit_req.jalQual = VX_bckE_req.jalQual; - assign VX_exec_unit_req.jal = VX_bckE_req.jal; - assign VX_exec_unit_req.jal_offset = VX_bckE_req.jal_offset; - assign VX_exec_unit_req.ebreak = VX_bckE_req.ebreak; - - - // GPR Req - assign VX_gpu_inst_req.valid = VX_bckE_req.valid & is_gpu_mask; - assign VX_gpu_inst_req.warp_num = VX_bckE_req.warp_num; - assign VX_gpu_inst_req.is_wspawn = VX_bckE_req.is_wspawn; - assign VX_gpu_inst_req.is_tmc = VX_bckE_req.is_tmc; - assign VX_gpu_inst_req.is_split = VX_bckE_req.is_split; - assign VX_gpu_inst_req.is_barrier = VX_bckE_req.is_barrier; - assign VX_gpu_inst_req.a_reg_data = VX_gpr_data.a_reg_data; - assign VX_gpu_inst_req.rd2 = VX_gpr_data.b_reg_data[0]; - assign VX_gpu_inst_req.pc_next = VX_bckE_req.PC_next; - - - // CSR Req - assign VX_csr_req.valid = VX_bckE_req.valid & is_csr_mask; - assign VX_csr_req.warp_num = VX_bckE_req.warp_num; - assign VX_csr_req.rd = VX_bckE_req.rd; - assign VX_csr_req.wb = VX_bckE_req.wb; - assign VX_csr_req.alu_op = VX_bckE_req.alu_op; - assign VX_csr_req.is_csr = VX_bckE_req.is_csr; - assign VX_csr_req.csr_address = VX_bckE_req.csr_address; - assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed; - assign VX_csr_req.csr_mask = VX_bckE_req.csr_mask; - -endmodule - - - - diff --git a/hw/old_rtl/VX_lsu.v b/hw/old_rtl/VX_lsu.v deleted file mode 100644 index 05def072..00000000 --- a/hw/old_rtl/VX_lsu.v +++ /dev/null @@ -1,106 +0,0 @@ - -`include "VX_define.v" - - -module VX_lsu ( - input wire clk, - input wire reset, - input wire no_slot_mem, - VX_lsu_req_inter VX_lsu_req, - - // Write back to GPR - VX_inst_mem_wb_inter VX_mem_wb, - - VX_dcache_response_inter VX_dcache_rsp, - VX_dcache_request_inter VX_dcache_req, - output wire out_delay - ); - - // VX_inst_mem_wb_inter VX_mem_wb_temp(); - - assign out_delay = VX_dcache_rsp.delay || no_slot_mem; - - - // Generate Addresses - wire[`NT_M1:0][31:0] address; - VX_lsu_addr_gen VX_lsu_addr_gen - ( - .base_address(VX_lsu_req.base_address), - .offset (VX_lsu_req.offset), - .address (address) - ); - - - wire[`NT_M1:0][31:0] use_address; - wire[`NT_M1:0][31:0] use_store_data; - wire[`NT_M1:0] use_valid; - wire[2:0] use_mem_read; - wire[2:0] use_mem_write; - wire[4:0] use_rd; - wire[`NW_M1:0] use_warp_num; - wire[1:0] use_wb; - wire[31:0] use_pc; - - - - wire zero = 0; - - VX_generic_register #(.N(45 + `NW_M1 + 1 + `NT*65)) lsu_buffer( - .clk (clk), - .reset(reset), - .stall(out_delay), - .flush(zero), - .in ({address , VX_lsu_req.store_data, VX_lsu_req.valid, VX_lsu_req.mem_read, VX_lsu_req.mem_write, VX_lsu_req.rd, VX_lsu_req.warp_num, VX_lsu_req.wb, VX_lsu_req.lsu_pc}), - .out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc }) - ); - - - genvar index; - for (index = 0; index <= `NT_M1; index = index + 1) begin - assign VX_dcache_req.out_cache_driver_in_address[index] = use_address[index]; - assign VX_dcache_req.out_cache_driver_in_data[index] = use_store_data[index]; - assign VX_dcache_req.out_cache_driver_in_valid[index] = (use_valid[index]); - - assign VX_mem_wb.loaded_data[index] = VX_dcache_rsp.in_cache_driver_out_data[index]; - end - - assign VX_dcache_req.out_cache_driver_in_mem_read = use_mem_read; - assign VX_dcache_req.out_cache_driver_in_mem_write = use_mem_write; - - - assign VX_mem_wb.rd = use_rd; - assign VX_mem_wb.wb = use_wb & {!VX_dcache_rsp.delay, !VX_dcache_rsp.delay}; - assign VX_mem_wb.wb_valid = use_valid; - assign VX_mem_wb.wb_warp_num = use_warp_num; - - assign VX_mem_wb.mem_wb_pc = use_pc; - - // integer curr_t; - // always @(negedge clk) begin - // for (int curr_t = 0; curr_t < `NT; curr_t=curr_t+1) - // if ((VX_dcache_req.out_cache_driver_in_valid[curr_t]) && !out_delay) begin - // if (VX_dcache_req.out_cache_driver_in_mem_read != `NO_MEM_READ) begin - // $display("Reading addr: %x val: %x", address[0], VX_mem_wb.loaded_data[0]); - // end - - // if (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) begin - // $display("Writing addr: %x val: %x", address[0], VX_dcache_req.out_cache_driver_in_data[0]); - // end - // end - // end - - // wire zero_temp = 0; - // VX_generic_register #(.N(142)) register_wb_data - // ( - // .clk (clk), - // .reset(reset), - // .stall(zero_temp), - // .flush(out_delay), - // .in ({VX_mem_wb_temp.loaded_data, VX_mem_wb_temp.rd, VX_mem_wb_temp.wb, VX_mem_wb_temp.wb_valid, VX_mem_wb_temp.wb_warp_num}), - // .out ({VX_mem_wb.loaded_data , VX_mem_wb.rd , VX_mem_wb.wb , VX_mem_wb.wb_valid , VX_mem_wb.wb_warp_num }) - // ); - - -endmodule // Memory - - diff --git a/hw/old_rtl/VX_lsu_addr_gen.v b/hw/old_rtl/VX_lsu_addr_gen.v deleted file mode 100644 index 85811da5..00000000 --- a/hw/old_rtl/VX_lsu_addr_gen.v +++ /dev/null @@ -1,17 +0,0 @@ -`include "VX_define.v" - -module VX_lsu_addr_gen ( - input wire[`NT_M1:0][31:0] base_address, - input wire[31:0] offset, - output wire[`NT_M1:0][31:0] address - -); - - - genvar index; - for (index = 0; index < `NT; index = index + 1) - begin - assign address[index] = base_address[index] + offset; - end - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_priority_encoder.v b/hw/old_rtl/VX_priority_encoder.v deleted file mode 100644 index a0f7934f..00000000 --- a/hw/old_rtl/VX_priority_encoder.v +++ /dev/null @@ -1,20 +0,0 @@ -`include "VX_define.v" - -module VX_priority_encoder ( - input wire[`NW-1:0] valids, - output reg[`NW_M1:0] index, - output reg found - ); - - integer i; - always @(*) begin - index = 0; - found = 0; - for (i = `NW-1; i >= 0; i = i - 1) begin - if (valids[i]) begin - index = i[`NW_M1:0]; - found = 1; - end - end - end -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_priority_encoder_w_mask.v b/hw/old_rtl/VX_priority_encoder_w_mask.v deleted file mode 100644 index fcd9d865..00000000 --- a/hw/old_rtl/VX_priority_encoder_w_mask.v +++ /dev/null @@ -1,32 +0,0 @@ -`include "../VX_define.v" -module VX_priority_encoder_w_mask - #( - parameter N = 10 - ) - ( - input wire[N-1:0] valids, - output reg [N-1:0] mask, - //output reg[$clog2(N)-1:0] index, - output reg[(`CLOG2(N))-1:0] index, - //output reg[`CLOG2(N):0] index, // eh - output reg found - ); - - integer i; - always @(valids) begin - index = 0; - found = 0; - // mask = 0; - for (i = 0; i < N; i=i+1) begin - if (valids[i]) begin - //index = i[$clog2(N)-1:0]; - index = i[(`CLOG2(N))-1:0]; - found = 1; - // mask[index] = (1 << i); - // $display("%h",(1 << i)); - end - end - end - - assign mask = found ? (1 << index) : 0; -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_scheduler.v b/hw/old_rtl/VX_scheduler.v deleted file mode 100644 index ce54db63..00000000 --- a/hw/old_rtl/VX_scheduler.v +++ /dev/null @@ -1,69 +0,0 @@ - - -`include "VX_define.v" - -module VX_scheduler ( - input wire clk, - input wire reset, - input wire memory_delay, - input wire gpr_stage_delay, - VX_frE_to_bckE_req_inter VX_bckE_req, - VX_wb_inter VX_writeback_inter, - - output wire schedule_delay - -); - - - - reg[31:0] rename_table[`NW-1:0]; - - wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0); - wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0); - - wire rs1_rename = rename_table[VX_bckE_req.warp_num][VX_bckE_req.rs1]; - wire rs2_rename = rename_table[VX_bckE_req.warp_num][VX_bckE_req.rs2]; - - wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE); - wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ); - - wire is_mem = is_store || is_load; - - - wire rs1_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs1))); - wire rs2_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs2))); - - // wire rs1_pass = 0; - // wire rs2_pass = 0; - - wire using_rs2 = (VX_bckE_req.rs2_src == `RS2_REG) || is_store || VX_bckE_req.is_barrier || VX_bckE_req.is_wspawn; - - wire rs1_rename_qual = ((rs1_rename || (rs1_pass && 0)) && (VX_bckE_req.rs1 != 0)); - wire rs2_rename_qual = ((rs2_rename || (rs2_pass && 0)) && (VX_bckE_req.rs2 != 0 && using_rs2)); - - - wire rename_valid = rs1_rename_qual || rs2_rename_qual ; - - - assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid)) || (memory_delay && (is_mem)) || (gpr_stage_delay && is_mem); - - integer i; - integer w; - always @(posedge clk or posedge reset) begin - - if (reset) begin - for (w = 0; w < `NW; w=w+1) - begin - for (i = 0; i < 32; i = i + 1) - begin - rename_table[w][i] <= 0; - end - end - end else begin - if (valid_wb ) rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] <= 0; - if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.warp_num ][VX_bckE_req.rd] <= 1; - end - end - - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_warp.v b/hw/old_rtl/VX_warp.v deleted file mode 100644 index 05712c8f..00000000 --- a/hw/old_rtl/VX_warp.v +++ /dev/null @@ -1,86 +0,0 @@ -`include "VX_define.v" - - -module VX_warp ( - input wire clk, - input wire reset, - input wire stall, - input wire remove, - input wire[`NT_M1:0] in_thread_mask, - input wire in_change_mask, - input wire in_jal, - input wire[31:0] in_jal_dest, - input wire in_branch_dir, - input wire[31:0] in_branch_dest, - input wire in_wspawn, - input wire[31:0] in_wspawn_pc, - - output wire[31:0] out_PC, - output wire[`NT_M1:0] out_valid -); - - reg[31:0] real_PC; - var[31:0] temp_PC; - var[31:0] use_PC; - reg[`NT_M1:0] valid; - - reg[`NT_M1:0] valid_zero; - - integer ini_cur_th = 0; - initial begin - real_PC = 0; - for (ini_cur_th = 1; ini_cur_th < `NT; ini_cur_th=ini_cur_th+1) begin - valid[ini_cur_th] = 0; // Thread 1 active - valid_zero[ini_cur_th] = 0; - end - valid[0] = 1; - valid_zero[0] = 0; - end - - - always @(posedge clk, posedge reset) begin - if (remove) begin - valid <= valid_zero; - end else if (in_change_mask) begin - valid <= in_thread_mask; - end - end - - - genvar out_cur_th; - generate - for (out_cur_th = 0; out_cur_th < `NT; out_cur_th = out_cur_th+1) - assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th]; - endgenerate - - - always @(*) begin - if (in_jal == 1'b1) begin - temp_PC = in_jal_dest; - // $display("LINKING TO %h", temp_PC); - end else if (in_branch_dir == 1'b1) begin - temp_PC = in_branch_dest; - end else begin - temp_PC = real_PC; - end - end - - assign use_PC = temp_PC; - assign out_PC = temp_PC; - - always @(posedge clk or posedge reset) begin - if (reset) begin - real_PC <= 0; - end else if (in_wspawn == 1'b1) begin - // $display("Inside warp ***** Spawn @ %H",in_wspawn_pc); - real_PC <= in_wspawn_pc; - end else if (!stall) begin - real_PC <= use_PC + 32'h4; - end else begin - real_PC <= use_PC; - end - - end - - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_warp_scheduler.v b/hw/old_rtl/VX_warp_scheduler.v deleted file mode 100644 index 0ee34940..00000000 --- a/hw/old_rtl/VX_warp_scheduler.v +++ /dev/null @@ -1,321 +0,0 @@ -`include "VX_define.v" - -module VX_warp_scheduler ( - input wire clk, // Clock - input wire reset, - input wire stall, - // Wspawn - input wire wspawn, - input wire[31:0] wsapwn_pc, - input wire[`NW-1:0] wspawn_new_active, - - // CTM - input wire ctm, - input wire[`NT_M1:0] ctm_mask, - input wire[`NW_M1:0] ctm_warp_num, - - // WHALT - input wire whalt, - input wire[`NW_M1:0] whalt_warp_num, - - input wire is_barrier, - input wire[31:0] barrier_id, - input wire[$clog2(`NW):0] num_warps, - input wire[`NW_M1:0] barrier_warp_num, - - // WSTALL - input wire wstall, - input wire[`NW_M1:0] wstall_warp_num, - - // Split - input wire is_split, - input wire dont_split, - input wire[`NT_M1:0] split_new_mask, - input wire[`NT_M1:0] split_later_mask, - input wire[31:0] split_save_pc, - input wire[`NW_M1:0] split_warp_num, - - // Join - input wire is_join, - input wire[`NW_M1:0] join_warp_num, - - // JAL - input wire jal, - input wire[31:0] jal_dest, - input wire[`NW_M1:0] jal_warp_num, - - // Branch - input wire branch_valid, - input wire branch_dir, - input wire[31:0] branch_dest, - input wire[`NW_M1:0] branch_warp_num, - - output wire[`NT_M1:0] thread_mask, - output wire[`NW_M1:0] warp_num, - output wire[31:0] warp_pc, - output wire out_ebreak, - output wire scheduled_warp - -); - - wire update_use_wspawn; - - wire update_visible_active; - - wire[(1+32+`NT_M1):0] d[`NW-1:0]; - - wire join_fall; - wire[31:0] join_pc; - wire[`NT_M1:0] join_tm; - - wire in_wspawn = wspawn; - wire in_ctm = ctm; - wire in_whalt = whalt; - wire in_wstall = wstall; - - reg[`NW-1:0] warp_active; - reg[`NW-1:0] warp_stalled; - - reg[`NW-1:0] visible_active; - wire[`NW-1:0] use_active; - - wire wstall_this_cycle; - - reg[`NT_M1:0] thread_masks[`NW-1:0]; - reg[31:0] warp_pcs[`NW-1:0]; - - // barriers - reg[`NW-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0]; - wire reached_barrier_limit; - wire[`NW-1:0] curr_barrier_mask; - wire[$clog2(`NW):0] curr_barrier_count; - - // wsapwn - reg[31:0] use_wsapwn_pc; - reg[`NW-1:0] use_wsapwn; - - wire[`NW_M1:0] warp_to_schedule; - wire schedule; - - wire hazard; - wire global_stall; - - wire real_schedule; - - wire[31:0] new_pc; - - reg[`NW-1:0] total_barrier_stall; - - reg didnt_split; - - /* verilator lint_off UNUSED */ - // wire[$clog2(`NW):0] num_active; - /* verilator lint_on UNUSED */ - - integer curr_w_help; - integer curr_barrier; - always @(posedge clk or posedge reset) begin - if (reset) begin - for (curr_barrier = 0; curr_barrier < `NUM_BARRIERS; curr_barrier=curr_barrier+1) begin - barrier_stall_mask[curr_barrier] <= 0; - end - use_wsapwn_pc <= 0; - use_wsapwn <= 0; - warp_pcs[0] <= (32'h80000000 - 4); - warp_active[0] <= 1; // Activating first warp - visible_active[0] <= 1; // Activating first warp - thread_masks[0] <= 1; // Activating first thread in first warp - warp_stalled <= 0; - didnt_split <= 0; - // total_barrier_stall = 0; - for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin - warp_pcs[curr_w_help] <= 0; - warp_active[curr_w_help] <= 0; // Activating first warp - visible_active[curr_w_help] <= 0; // Activating first warp - thread_masks[curr_w_help] <= 1; // Activating first thread in first warp - end - - end else begin - // Wsapwning warps - if (wspawn) begin - warp_active <= wspawn_new_active; - use_wsapwn_pc <= wsapwn_pc; - use_wsapwn <= wspawn_new_active & (~`NW'b1); - end - - if (is_barrier) begin - warp_stalled[barrier_warp_num] <= 0; - if (reached_barrier_limit) begin - barrier_stall_mask[barrier_id] <= 0; - end else begin - barrier_stall_mask[barrier_id][barrier_warp_num] <= 1; - end - end else if (ctm) begin - thread_masks[ctm_warp_num] <= ctm_mask; - warp_stalled[ctm_warp_num] <= 0; - end else if (is_join && !didnt_split) begin - if (!join_fall) begin - warp_pcs[join_warp_num] <= join_pc; - end - thread_masks[join_warp_num] <= join_tm; - didnt_split <= 0; - end else if (is_split) begin - warp_stalled[split_warp_num] <= 0; - if (!dont_split) begin - thread_masks[split_warp_num] <= split_new_mask; - didnt_split <= 0; - end else begin - didnt_split <= 1; - end - end - - if (whalt) begin - warp_active[whalt_warp_num] <= 0; - visible_active[whalt_warp_num] <= 0; - end - - if (update_use_wspawn) begin - use_wsapwn[warp_to_schedule] <= 0; - thread_masks[warp_to_schedule] <= 1; - end - - - // Stalling the scheduling of warps - if (wstall) begin - warp_stalled[wstall_warp_num] <= 1; - visible_active[wstall_warp_num] <= 0; - end - - // Refilling active warps - if (update_visible_active) begin - visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall); - end - - // Don't change state if stall - if (!global_stall && real_schedule && (thread_mask != 0)) begin - visible_active[warp_to_schedule] <= 0; - warp_pcs[warp_to_schedule] <= new_pc; - end - - // Jal - if (jal) begin - warp_pcs[jal_warp_num] <= jal_dest; - warp_stalled[jal_warp_num] <= 0; - end - - // Branch - if (branch_valid) begin - if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest; - warp_stalled[branch_warp_num] <= 0; - end - end - end - - VX_countones #(.N(`NW)) barrier_count( - .valids(curr_barrier_mask), - .count (curr_barrier_count) - ); - - wire[$clog2(`NW):0] count_visible_active; - VX_countones #(.N(`NW)) num_visible( - .valids(visible_active), - .count (count_visible_active) - ); - - // assign curr_barrier_count = $countones(curr_barrier_mask); - - assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NW-1:0]; - assign reached_barrier_limit = curr_barrier_count == (num_warps); - - assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug - - assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3]; - // integer curr_b; - // always @(*) begin - // total_barrier_stall = 0; - // for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1) - // begin - // total_barrier_stall[`NW-1:0] = total_barrier_stall[`NW-1:0] | barrier_stall_mask[curr_b]; - // end - // end - - - assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join); - - wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]}; - wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask}; - - - assign {join_fall, join_pc, join_tm} = d[join_warp_num]; - - - - genvar curr_warp; - for (curr_warp = 0; curr_warp < `NW; curr_warp = curr_warp + 1) begin - wire correct_warp_s = (curr_warp == split_warp_num); - wire correct_warp_j = (curr_warp == join_warp_num); - - wire push = (is_split && !dont_split) && correct_warp_s; - wire pop = is_join && correct_warp_j; - VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT)+1)) ipdom_stack( - .clk (clk), - .reset(reset), - .push (push), - .pop (pop), - .d (d[curr_warp]), - .q1 (q1), - .q2 (q2) - ); - end - - // wire should_stall = stall || (jal && (warp_to_schedule == jal_warp_num)) || (branch_dir && (warp_to_schedule == branch_warp_num)); - - wire should_jal = (jal && (warp_to_schedule == jal_warp_num)); - wire should_bra = (branch_dir && (warp_to_schedule == branch_warp_num)); - - assign hazard = (should_jal || should_bra) && schedule; - - assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule]; - - assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join); - - assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || is_join); - - wire real_use_wspawn = use_wsapwn[warp_to_schedule]; - - assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule]; - assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NT'b1 : thread_masks[warp_to_schedule]); - assign warp_num = warp_to_schedule; - - assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall; - - assign new_pc = warp_pc + 4; - - - assign use_active = (count_visible_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall)) : visible_active; - - // Choosing a warp to schedule - VX_priority_encoder choose_schedule( - .valids(use_active), - .index (warp_to_schedule), - .found (schedule) - ); - - // always @(*) begin - // $display("WarpPC: %h",warp_pc); - // $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]); - // end - - - // Valid counter - // assign num_active = $countones(visible_active); - // VX_one_counter valid_counter( - // .valids(visible_active), - // .ones_found() - // ); - - - wire ebreak = (warp_active == 0); - assign out_ebreak = ebreak; - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/VX_writeback.v b/hw/old_rtl/VX_writeback.v deleted file mode 100644 index 2f684bae..00000000 --- a/hw/old_rtl/VX_writeback.v +++ /dev/null @@ -1,111 +0,0 @@ - -`include "VX_define.v" - - -module VX_writeback ( - input wire clk, - input wire reset, - // Mem WB info - VX_inst_mem_wb_inter VX_mem_wb, - // EXEC Unit WB info - VX_inst_exec_wb_inter VX_inst_exec_wb, - // CSR Unit WB info - VX_csr_wb_inter VX_csr_wb, - - // Actual WB to GPR - VX_wb_inter VX_writeback_inter, - output wire no_slot_mem, - output wire no_slot_csr - ); - - - VX_wb_inter VX_writeback_tempp(); - - wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid); - wire mem_wb = (VX_mem_wb.wb != 0) && (|VX_mem_wb.wb_valid); - wire csr_wb = (VX_csr_wb.wb != 0) && (|VX_csr_wb.valid); - - - assign no_slot_mem = mem_wb && (exec_wb || csr_wb); - assign no_slot_csr = csr_wb && (exec_wb); - - assign VX_writeback_tempp.write_data = exec_wb ? VX_inst_exec_wb.alu_result : - csr_wb ? VX_csr_wb.csr_result : - mem_wb ? VX_mem_wb.loaded_data : - 0; - - - assign VX_writeback_tempp.wb_valid = exec_wb ? VX_inst_exec_wb.wb_valid : - csr_wb ? VX_csr_wb.valid : - mem_wb ? VX_mem_wb.wb_valid : - 0; - - assign VX_writeback_tempp.rd = exec_wb ? VX_inst_exec_wb.rd : - csr_wb ? VX_csr_wb.rd : - mem_wb ? VX_mem_wb.rd : - 0; - - assign VX_writeback_tempp.wb = exec_wb ? VX_inst_exec_wb.wb : - csr_wb ? VX_csr_wb.wb : - mem_wb ? VX_mem_wb.wb : - 0; - - assign VX_writeback_tempp.wb_warp_num = exec_wb ? VX_inst_exec_wb.wb_warp_num : - csr_wb ? VX_csr_wb.warp_num : - mem_wb ? VX_mem_wb.wb_warp_num : - 0; - - - - assign VX_writeback_tempp.wb_pc = exec_wb ? VX_inst_exec_wb.exec_wb_pc : - csr_wb ? 32'hdeadbeef : - mem_wb ? VX_mem_wb.mem_wb_pc : - 32'hdeadbeef; - - - wire zero = 0; - - wire[`NT-1:0][31:0] use_wb_data; - - reg prev_is_mem; - - always @(posedge clk, posedge reset) begin - if (reset) - begin - prev_is_mem = 0; - end begin - prev_is_mem = mem_wb && !no_slot_mem; - end - end - - VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register( - .clk (clk), - .reset(reset), - .stall(zero), - .flush(zero), - .in ({VX_writeback_tempp.write_data, VX_writeback_tempp.wb_valid, VX_writeback_tempp.rd, VX_writeback_tempp.wb, VX_writeback_tempp.wb_warp_num, VX_writeback_tempp.wb_pc}), - .out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc}) - ); - - reg[31:0] last_data_wb; - always @(posedge clk) begin - if ((|VX_writeback_inter.wb_valid) && (VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd == 28)) begin - last_data_wb <= use_wb_data[0]; - end - end - - `ifdef SYN - assign VX_writeback_inter.write_data = prev_is_mem ? VX_writeback_tempp.write_data : use_wb_data; - `else - assign VX_writeback_inter.write_data = use_wb_data; - `endif - - -endmodule // VX_writeback - - - - - - - diff --git a/hw/old_rtl/Vortex.v b/hw/old_rtl/Vortex.v deleted file mode 100644 index f4b13e7d..00000000 --- a/hw/old_rtl/Vortex.v +++ /dev/null @@ -1,249 +0,0 @@ - -`include "../VX_define.v" - - -module Vortex - /*#( - parameter CACHE_SIZE = 4096, // Bytes - parameter CACHE_WAYS = 2, - parameter CACHE_BLOCK = 128, // Bytes - parameter CACHE_BANKS = 8, - parameter NUM_WORDS_PER_BLOCK = 4 - )*/ - ( - input wire clk, - input wire reset, - input wire[31:0] icache_response_instruction, - output wire[31:0] icache_request_pc_address, - // IO - output wire io_valid, - output wire[31:0] io_data, - - // Req D Mem - output reg [31:0] o_m_read_addr_d, - output reg [31:0] o_m_evict_addr_d, - output reg o_m_valid_d, - output reg [31:0] o_m_writedata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0], - output reg o_m_read_or_write_d, - - // Rsp D Mem - input wire [31:0] i_m_readdata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0], - input wire i_m_ready_d, - - // Req I Mem - output reg [31:0] o_m_read_addr_i, - output reg [31:0] o_m_evict_addr_i, - output reg o_m_valid_i, - output reg [31:0] o_m_writedata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0], - output reg o_m_read_or_write_i, - - // Rsp I Mem - input wire [31:0] i_m_readdata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0], - input wire i_m_ready_i, - output wire out_ebreak - ); - - -reg[31:0] icache_banks = `ICACHE_BANKS; -reg[31:0] icache_num_words_per_block = `ICACHE_NUM_WORDS_PER_BLOCK; - - -reg[31:0] dcache_banks = `DCACHE_BANKS; -reg[31:0] dcache_num_words_per_block = `DCACHE_NUM_WORDS_PER_BLOCK; - -reg[31:0] number_threads = `NT; -reg[31:0] number_warps = `NW; - -always @(posedge clk) begin - icache_banks <= icache_banks; - icache_num_words_per_block <= icache_num_words_per_block; - - dcache_banks <= dcache_banks; - dcache_num_words_per_block <= dcache_num_words_per_block; - - number_threads <= number_threads; - number_warps <= number_warps; -end - -wire memory_delay; -wire gpr_stage_delay; -wire schedule_delay; - - -// Dcache Interface -VX_dcache_response_inter VX_dcache_rsp(); -VX_dcache_request_inter VX_dcache_req(); - -wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.out_cache_driver_in_valid) && (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (VX_dcache_req.out_cache_driver_in_address[0] == 32'h00010000); -wire[31:0] temp_io_data = VX_dcache_req.out_cache_driver_in_data[0]; -assign io_valid = temp_io_valid; -assign io_data = temp_io_data; - - -VX_dram_req_rsp_inter #( - .NUMBER_BANKS(`DCACHE_BANKS), - .NUM_WORDS_PER_BLOCK(`DCACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp(); - - VX_icache_response_inter icache_response_fe(); - VX_icache_request_inter icache_request_fe(); - VX_dram_req_rsp_inter #( - .NUMBER_BANKS(`ICACHE_BANKS), - .NUM_WORDS_PER_BLOCK(`ICACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp_icache(); - - //assign icache_response_fe.instruction = icache_response_instruction; - assign icache_request_pc_address = icache_request_fe.pc_address; - - // Need to fix this so that it is only 1 set of outputs - // o_m Values - - // L2 Cache - /* - assign VX_L2cache_req.out_cache_driver_in_valid = VX_dram_req_rsp.o_m_valid || VX_dram_req_rsp_icache.o_m_valid; // Ask about this (width) - // Ask about the adress - assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? icache_request_fe.pc_address: VX_dcache_req.out_cache_driver_in_address; - //assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_read_addr: VX_dram_req_rsp.o_m_read_addr; - //assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_evict_addr : VX_dram_req_rsp.o_m_evict_addr; - assign VX_L2cache_req.out_cache_driver_in_mem_read = (VX_dram_req_rsp_icache.o_m_valid) ? (VX_dram_req_rsp_icache.o_m_read_or_write ? icache_request_fe.out_cache_driver_in_mem_write : icache_request_fe.out_cache_driver_in_mem_read) - : (VX_dram_req_rsp.o_m_read_or_write ? VX_dcache_req.out_cache_driver_in_mem_write : VX_dcache_req.out_cache_driver_in_mem_read); - //assign VX_dram_req_rsp.i_m_ready = i_m_ready && !VX_dram_req_rsp_icache.o_m_valid && VX_dram_req_rsp.o_m_valid; - //assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready && VX_dram_req_rsp_icache.o_m_valid; - genvar cur_bank; - genvar cur_word; - for (cur_bank = 0; cur_bank < CACHE_BANKS; cur_bank = cur_bank + 1) begin - for (cur_word = 0; cur_word < NUM_WORDS_PER_BLOCK; cur_word = cur_word + 1) begin - assign VX_L2cache_req.out_cache_driver_in_data[cur_bank][cur_word] = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_writedata[cur_bank][cur_word] - : VX_dram_req_rsp.o_m_writedata[cur_bank][cur_word]; - assign VX_dram_req_rsp.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data - assign VX_dram_req_rsp_icache.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data - end - end - */ - - - assign o_m_valid_i = VX_dram_req_rsp_icache.o_m_valid; - assign o_m_valid_d = VX_dram_req_rsp.o_m_valid; - assign o_m_read_addr_i = VX_dram_req_rsp_icache.o_m_read_addr; - assign o_m_read_addr_d = VX_dram_req_rsp.o_m_read_addr; - assign o_m_evict_addr_i = VX_dram_req_rsp_icache.o_m_evict_addr; - assign o_m_evict_addr_d = VX_dram_req_rsp.o_m_evict_addr; - assign o_m_read_or_write_i = VX_dram_req_rsp_icache.o_m_read_or_write; - assign o_m_read_or_write_d = VX_dram_req_rsp.o_m_read_or_write; - assign VX_dram_req_rsp.i_m_ready = i_m_ready_d; - assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready_i; - genvar curr_bank; - genvar curr_word; - /* - for (curr_bank = 0; curr_bank < CACHE_BANKS; curr_bank = curr_bank + 1) begin - for (curr_word = 0; curr_word < NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin - assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word]; - assign o_m_writedata_d[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word]; - assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata_d[curr_bank][curr_word]; // fixed - assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed - end - end - */ - -for (curr_bank = 0; curr_bank < `DCACHE_BANKS; curr_bank = curr_bank + 1) begin - for (curr_word = 0; curr_word < `DCACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin - - assign o_m_writedata_d[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word]; - assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata_d[curr_bank][curr_word]; // fixed - - end -end - - -for (curr_bank = 0; curr_bank < `ICACHE_BANKS; curr_bank = curr_bank + 1) begin - for (curr_word = 0; curr_word < `ICACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin - assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word]; - assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed - end -end - - -///////////////////////////////////////////////////////////////////////// - - - -// Front-end to Back-end -VX_frE_to_bckE_req_inter VX_bckE_req(); // New instruction request to EXE/MEM - -// Back-end to Front-end -VX_wb_inter VX_writeback_inter(); // Writeback to GPRs -VX_branch_response_inter VX_branch_rsp(); // Branch Resolution to Fetch -VX_jal_response_inter VX_jal_rsp(); // Jump resolution to Fetch - -// CSR Buses -// VX_csr_write_request_inter VX_csr_w_req(); - - -VX_warp_ctl_inter VX_warp_ctl(); - - -VX_front_end vx_front_end( - .clk (clk), - .reset (reset), - .VX_warp_ctl (VX_warp_ctl), - .VX_bckE_req (VX_bckE_req), - .schedule_delay (schedule_delay), - .icache_response_fe (icache_response_fe), - .icache_request_fe (icache_request_fe), - .VX_jal_rsp (VX_jal_rsp), - .VX_branch_rsp (VX_branch_rsp), - .fetch_ebreak (out_ebreak) - ); - -VX_scheduler schedule( - .clk (clk), - .reset (reset), - .memory_delay (memory_delay), - .gpr_stage_delay (gpr_stage_delay), - .VX_bckE_req (VX_bckE_req), - .VX_writeback_inter(VX_writeback_inter), - .schedule_delay (schedule_delay) - ); - -VX_back_end vx_back_end( - .clk (clk), - .reset (reset), - .schedule_delay (schedule_delay), - .VX_warp_ctl (VX_warp_ctl), - .VX_bckE_req (VX_bckE_req), - .VX_jal_rsp (VX_jal_rsp), - .VX_branch_rsp (VX_branch_rsp), - .VX_dcache_rsp (VX_dcache_rsp), - .VX_dcache_req (VX_dcache_req), - .VX_writeback_inter (VX_writeback_inter), - .out_mem_delay (memory_delay), - .gpr_stage_delay (gpr_stage_delay) - ); - - -VX_dmem_controller VX_dmem_controller( - .clk (clk), - .reset (reset), - .VX_dram_req_rsp (VX_dram_req_rsp), - .VX_dram_req_rsp_icache (VX_dram_req_rsp_icache), - .VX_icache_req (icache_request_fe), - .VX_icache_rsp (icache_response_fe), - .VX_dcache_req (VX_dcache_req), - .VX_dcache_rsp (VX_dcache_rsp) - ); -// VX_csr_handler vx_csr_handler( -// .clk (clk), -// .in_decode_csr_address(decode_csr_address), -// .VX_csr_w_req (VX_csr_w_req), -// .in_wb_valid (VX_writeback_inter.wb_valid[0]), - -// .out_decode_csr_data (csr_decode_csr_data) -// ); - - - - -endmodule // Vortex - - - - - diff --git a/hw/old_rtl/byte_enabled_simple_dual_port_ram.v b/hw/old_rtl/byte_enabled_simple_dual_port_ram.v deleted file mode 100644 index 7a1173d5..00000000 --- a/hw/old_rtl/byte_enabled_simple_dual_port_ram.v +++ /dev/null @@ -1,53 +0,0 @@ - -`include "VX_define.v" - - -module byte_enabled_simple_dual_port_ram -( - input we, clk, - input wire reset, - input wire[4:0] waddr, raddr1, raddr2, - input wire[`NT_M1:0] be, - input wire[`NT_M1:0][31:0] wdata, - output reg[`NT_M1:0][31:0] q1, q2 -); - - // integer regi; - // integer threadi; - - // Thread Byte Bit - logic [`NT_M1:0][3:0][7:0] GPR[31:0]; - - // initial begin - // for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] = 0; - // end - - integer ini; - always@(posedge clk, posedge reset) begin - if (reset) begin - for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] <= 0; - end else if(we) begin - integer thread_ind; - for (thread_ind = 0; thread_ind <= `NT_M1; thread_ind = thread_ind + 1) begin - if(be[thread_ind]) GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0]; - if(be[thread_ind]) GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8]; - if(be[thread_ind]) GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16]; - if(be[thread_ind]) GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24]; - end - end - // $display("^^^^^^^^^^^^^^^^^^^^^^^"); - // for (regi = 0; regi <= 31; regi = regi + 1) begin - // for (threadi = 0; threadi <= `NT_M1; threadi = threadi + 1) begin - // if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]); - // end - // end - - end - - assign q1 = GPR[raddr1]; - assign q2 = GPR[raddr2]; - - // assign q1 = (raddr1 == waddr && (we)) ? wdata : GPR[raddr1]; - // assign q2 = (raddr2 == waddr && (we)) ? wdata : GPR[raddr2]; - -endmodule diff --git a/hw/old_rtl/cache/Makefile b/hw/old_rtl/cache/Makefile deleted file mode 100644 index 3e92307a..00000000 --- a/hw/old_rtl/cache/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -all: RUNFILE - - -VERILATOR: - verilator --compiler gcc --Wno-UNOPTFLAT -Wall --trace -cc VX_d_cache_encapsulate.v -Irtl --exe d_cache_test_bench.cpp -CFLAGS -std=c++11 - -RUNFILE: VERILATOR - (cd obj_dir && make -j -f VVX_d_cache_encapsulate.mk) - -clean: - rm ./obj_dir/* - diff --git a/hw/old_rtl/cache/Notes b/hw/old_rtl/cache/Notes deleted file mode 100644 index 0458c659..00000000 --- a/hw/old_rtl/cache/Notes +++ /dev/null @@ -1,46 +0,0 @@ -Notes - - -8 kB L1 Data Cache | 16 kB L1 I cache (maybe) -[tag index offset_remaining_block bank wordOffset], use a blocksize of 128 bytes between memory and cache. So each bank gets 16 bytes. - total offset is b its - 4 bits new offset, 2 bits block, 2 bits word offset - xxxxxxxIIIIIIIIoobbbyy - 9876543210 - bbbyyyyy - o = index into block offset - b = bank - y = word offset - I = index into cach - 6 bits indexes (64 indeces) No ways || 16 indexes with 4 ways - Rest of the bits are tag bits - -blocks / banks = 16 bytes, 8 banks. 128 bytes. 256 indexes (height). width is 16 bytes. 4 words per block (per bank). 17 bit tag - -gtkwave ___.vcd - - -// Splitting it up - -// word byte -wire[127:0][3:0] data_from_ram; - - -// word byte bank -wire[15:0][3:0] bank_data_n[3:0] - -integer i; -for (i = 0; i < something; i+=8) -{ - bank_data_n[0][i/8] = data_from_ram[i+0] - bank_data_n[1][i/8] = data_from_ram[i+1] - bank_data_n[2][i/8] = data_from_ram[i+2] - bank_data_n[3][i/8] = data_from_ram[i+3] - bank_data_n[4][i/8] = data_from_ram[i+4] - bank_data_n[5][i/8] = data_from_ram[i+5] - bank_data_n[6][i/8] = data_from_ram[i+6] - bank_data_n[7][i/8] = data_from_ram[i+7] -} - - -With Cache. If miss. Go to memory, grab all data, replace that data in the cache. Generate a new request, feed that into the cache (this one will hit), return that diff --git a/hw/old_rtl/cache/VX_Cache_Bank.v b/hw/old_rtl/cache/VX_Cache_Bank.v deleted file mode 100644 index e3251e72..00000000 --- a/hw/old_rtl/cache/VX_Cache_Bank.v +++ /dev/null @@ -1,253 +0,0 @@ -// To Do: Change way_id_out to an internal register which holds when in between access and finished. -// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default -// Also make sure all possible output states are transmitted back to the bank correctly - -`include "../VX_define.v" -// `include "VX_cache_data.v" - - -module VX_Cache_Bank - #( - parameter CACHE_SIZE = 4096, // Bytes - parameter CACHE_WAYS = 1, - parameter CACHE_BLOCK = 128, // Bytes - parameter CACHE_BANKS = 8, - parameter LOG_NUM_BANKS = 3, - parameter NUM_REQ = 8, - parameter LOG_NUM_REQ = 3, - parameter NUM_IND = 8, - parameter CACHE_WAY_INDEX = 1, - parameter NUM_WORDS_PER_BLOCK = 4, - parameter OFFSET_SIZE_START = 0, - parameter OFFSET_SIZE_END = 1, - parameter TAG_SIZE_START = 0, - parameter TAG_SIZE_END = 16, - parameter IND_SIZE_START = 0, - parameter IND_SIZE_END = 7, - parameter ADDR_TAG_START = 15, - parameter ADDR_TAG_END = 31, - parameter ADDR_OFFSET_START = 5, - parameter ADDR_OFFSET_END = 6, - parameter ADDR_IND_START = 7, - parameter ADDR_IND_END = 14 - ) - ( - clk, - rst, - state, - read_or_write, // Read = 0 | Write = 1 - i_p_mem_read, - i_p_mem_write, - valid_in, - //write_from_mem, - actual_index, - o_tag, - block_offset, - writedata, - fetched_writedata, - - byte_select, - - readdata, - hit, - //miss, - - eviction_wb, // Need to evict - eviction_addr, // What's the eviction tag - - data_evicted, - evicted_way - ); - - // localparam NUMBER_BANKS = `CACHE_BANKS; - // localparam CACHE_BLOCK_PER_BANK = (`CACHE_BLOCK / `CACHE_BANKS); - // localparam NUM_WORDS_PER_BLOCK = `CACHE_BLOCK / (`CACHE_BANKS*4); - // localparam NUMBER_INDEXES = `NUM_IND; - - localparam CACHE_IDLE = 0; // Idle - localparam SEND_MEM_REQ = 1; // Write back this block into memory - localparam RECIV_MEM_RSP = 2; - - - localparam BLOCK_NUM_BITS = `CLOG2(CACHE_BLOCK); - // Inputs - input wire rst; - input wire clk; - input wire [3:0] state; -//input wire write_from_mem; - - // Reading Data - input wire[IND_SIZE_END:IND_SIZE_START] actual_index; - - - input wire[TAG_SIZE_END:TAG_SIZE_START] o_tag; // When write_from_mem = 1, o_tag is the new tag - input wire[OFFSET_SIZE_END:OFFSET_SIZE_START] block_offset; - - - input wire[31:0] writedata; - input wire valid_in; - input wire read_or_write; // Specifies if it is a read or write operation - - input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] fetched_writedata; - input wire[2:0] i_p_mem_read; - input wire[2:0] i_p_mem_write; - input wire[1:0] byte_select; - - - input wire[CACHE_WAY_INDEX-1:0] evicted_way; - - // Outputs - // Normal shit - output wire[31:0] readdata; - output wire hit; - //output wire miss; - - // Eviction Data (Notice) - output wire eviction_wb; // Need to evict - output wire[31:0] eviction_addr; // What's the eviction tag - - // Eviction Data (Extraction) - output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_evicted; - - - - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use; - wire[TAG_SIZE_END:TAG_SIZE_START] tag_use; - wire[TAG_SIZE_END:TAG_SIZE_START] eviction_tag; - wire valid_use; - wire dirty_use; - wire access; - wire write_from_mem; - wire miss; // -10/21 - - - - wire[CACHE_WAY_INDEX-1:0] way_to_update; - - assign miss = (tag_use != o_tag) && valid_use && valid_in; - - - assign data_evicted = data_use; - - // assign eviction_wb = miss && (dirty_use != 1'b0) && valid_use; - assign eviction_wb = (dirty_use != 1'b0); - assign eviction_tag = tag_use; - assign access = (state == CACHE_IDLE) && valid_in; - assign write_from_mem = (state == RECIV_MEM_RSP) && valid_in; // TODO - assign hit = (access && (tag_use == o_tag) && valid_use); - //assign eviction_addr = {eviction_tag, actual_index, block_offset, 5'b0}; // Fix with actual data - assign eviction_addr = {eviction_tag, actual_index, {(BLOCK_NUM_BITS){1'b0}}}; // Fix with actual data - - - - wire lw = (i_p_mem_read == `LW_MEM_READ); - wire lb = (i_p_mem_read == `LB_MEM_READ); - wire lh = (i_p_mem_read == `LH_MEM_READ); - wire lhu = (i_p_mem_read == `LHU_MEM_READ); - wire lbu = (i_p_mem_read == `LBU_MEM_READ); - - wire sw = (i_p_mem_write == `SW_MEM_WRITE); - wire sb = (i_p_mem_write == `SB_MEM_WRITE); - wire sh = (i_p_mem_write == `SH_MEM_WRITE); - - wire b0 = (byte_select == 0); - wire b1 = (byte_select == 1); - wire b2 = (byte_select == 2); - wire b3 = (byte_select == 3); - - wire[31:0] data_unQual = (b0 || lw) ? (data_use[block_offset] ) : - b1 ? (data_use[block_offset] >> 8) : - b2 ? (data_use[block_offset] >> 16) : - (data_use[block_offset] >> 24); - - - wire[31:0] lb_data = (data_unQual[7] ) ? (data_unQual | 32'hFFFFFF00) : (data_unQual & 32'hFF); - wire[31:0] lh_data = (data_unQual[15]) ? (data_unQual | 32'hFFFF0000) : (data_unQual & 32'hFFFF); - wire[31:0] lbu_data = (data_unQual & 32'hFF); - wire[31:0] lhu_data = (data_unQual & 32'hFFFF); - wire[31:0] lw_data = (data_unQual); - - - wire[31:0] sw_data = writedata; - - wire[31:0] sb_data = b1 ? {{16{1'b0}}, writedata[7:0], { 8{1'b0}}} : - b2 ? {{ 8{1'b0}}, writedata[7:0], {16{1'b0}}} : - b3 ? {{ 0{1'b0}}, writedata[7:0], {24{1'b0}}} : - writedata; - - wire[31:0] sh_data = b2 ? {writedata[15:0], {16{1'b0}}} : writedata; - - - - wire[31:0] use_write_data = sb ? sb_data : - sh ? sh_data : - sw_data; - - - wire[31:0] data_Qual = lb ? lb_data : - lh ? lh_data : - lhu ? lhu_data : - lbu ? lbu_data : - lw_data; - - - assign readdata = (access) ? data_Qual : 32'b0; // Fix with actual data - - - wire[3:0] sb_mask = (b0 ? 4'b0001 : (b1 ? 4'b0010 : (b2 ? 4'b0100 : 4'b1000))); - wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100); - - - wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we; - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write; - genvar g; - for (g = 0; g < NUM_WORDS_PER_BLOCK; g = g + 1) begin - wire normal_write = (read_or_write && ((access && (block_offset == g))) && !miss); - - assign we[g] = (write_from_mem) ? 4'b1111 : - (normal_write && sw) ? 4'b1111 : - (normal_write && sb) ? sb_mask : - (normal_write && sh) ? sh_mask : - 4'b0000; - - - // assign we[g] = (normal_write || (write_from_mem)) ? 1'b1 : 1'b0; - assign data_write[g] = write_from_mem ? fetched_writedata[g] : use_write_data; - assign way_to_update = evicted_way; - end - - - VX_cache_data_per_index #( - .CACHE_WAYS (CACHE_WAYS), - .NUM_IND (NUM_IND), - .CACHE_WAY_INDEX (CACHE_WAY_INDEX), - .NUM_WORDS_PER_BLOCK(NUM_WORDS_PER_BLOCK), - .TAG_SIZE_START (TAG_SIZE_START), - .TAG_SIZE_END (TAG_SIZE_END), - .IND_SIZE_START (IND_SIZE_START), - .IND_SIZE_END (IND_SIZE_END)) data_structures( - .clk (clk), - .rst (rst), - .valid_in (valid_in), - .state (state), - // Inputs - .addr (actual_index), - .we (we), - .evict (write_from_mem), - .data_write (data_write), - .tag_write (o_tag), - .way_to_update(way_to_update), - // Outputs - .tag_use (tag_use), - .data_use (data_use), - .valid_use (valid_use), - .dirty_use (dirty_use) - ); - - - -endmodule - - - - diff --git a/hw/old_rtl/cache/VX_cache_bank_valid.v b/hw/old_rtl/cache/VX_cache_bank_valid.v deleted file mode 100644 index 48759b77..00000000 --- a/hw/old_rtl/cache/VX_cache_bank_valid.v +++ /dev/null @@ -1,30 +0,0 @@ -`include "../VX_define.v" - -module VX_cache_bank_valid -#( - parameter NUMBER_BANKS = 8, - parameter LOG_NUM_BANKS = 3, - parameter NUM_REQ = 1 -) -( - input wire [NUM_REQ-1:0] i_p_valid, - input wire [NUM_REQ-1:0][31:0] i_p_addr, - output reg [NUMBER_BANKS - 1 : 0][NUM_REQ-1:0] thread_track_banks -); - - generate - integer t_id; - always @(*) begin - thread_track_banks = 0; - for (t_id = 0; t_id < NUM_REQ; t_id = t_id + 1) - begin - if (NUMBER_BANKS != 1) begin - thread_track_banks[i_p_addr[t_id][2+LOG_NUM_BANKS-1:2]][t_id] = i_p_valid[t_id]; - end else begin - thread_track_banks[0][t_id] = i_p_valid[t_id]; - end - end - end - endgenerate - -endmodule diff --git a/hw/old_rtl/cache/VX_cache_data.v b/hw/old_rtl/cache/VX_cache_data.v deleted file mode 100644 index 6b6c91b1..00000000 --- a/hw/old_rtl/cache/VX_cache_data.v +++ /dev/null @@ -1,233 +0,0 @@ - - -`include "../VX_define.v" - -module VX_cache_data - #( - parameter NUM_IND = 8, - parameter NUM_WORDS_PER_BLOCK = 4, - parameter TAG_SIZE_START = 0, - parameter TAG_SIZE_END = 16, - parameter IND_SIZE_START = 0, - parameter IND_SIZE_END = 7 - ) - ( - input wire clk, rst, // Clock - - // `ifdef PARAM - // Addr - input wire[IND_SIZE_END:IND_SIZE_START] addr, - // WE - input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we, - input wire evict, - // Data - input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, - input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write, - - - output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use, - output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use, - output wire valid_use, - output wire dirty_use - // `else - // // Addr - // input wire[7:0] addr, - // // WE - // input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we, - // input wire evict, - // // Data - // input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data - // input wire[16:0] tag_write, - - - // output wire[16:0] tag_use, - // output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use, - // output wire valid_use, - // output wire dirty_use - // `endif - -); - - //localparam NUMBER_BANKS = CACHE_BANKS; - //localparam CACHE_BLOCK_PER_BANK = (CACHE_BLOCK / CACHE_BANKS); - // localparam NUM_WORDS_PER_BLOCK = CACHE_BLOCK / (CACHE_BANKS*4); - //localparam NUMBER_INDEXES = NUM_IND; - - wire currently_writing = (|we); - wire update_dirty = ((!dirty_use) && currently_writing) || (evict); - - wire dirt_new = evict ? 0 : (|we); - - - `ifndef SYN - - // (3:0) 4 bytes - reg[NUM_WORDS_PER_BLOCK-1:0][3:0][7:0] data[NUM_IND-1:0]; // Actual Data - reg[TAG_SIZE_END:TAG_SIZE_START] tag[NUM_IND-1:0]; - reg valid[NUM_IND-1:0]; - reg dirty[NUM_IND-1:0]; - - - // 16 bytes - assign data_use = data[addr]; // Read Port - assign tag_use = tag[addr]; - assign valid_use = valid[addr]; - assign dirty_use = dirty[addr]; - - integer f; - integer ini_ind; - always @(posedge clk, posedge rst) begin : update_all - if (rst) begin - for (ini_ind = 0; ini_ind < NUM_IND; ini_ind=ini_ind+1) begin - data[ini_ind] <= 0; - tag[ini_ind] <= 0; - valid[ini_ind] <= 0; - dirty[ini_ind] <= 0; - end - end else begin - if (update_dirty) dirty[addr] <= dirt_new; // WRite Port - if (evict) tag[addr] <= tag_write; - if (evict) valid[addr] <= 1; - - for (f = 0; f < NUM_WORDS_PER_BLOCK; f = f + 1) begin - if (we[f][0]) data[addr][f][0] <= data_write[f][7 :0 ]; - if (we[f][1]) data[addr][f][1] <= data_write[f][15:8 ]; - if (we[f][2]) data[addr][f][2] <= data_write[f][23:16]; - if (we[f][3]) data[addr][f][3] <= data_write[f][31:24]; - end - end - end - - `else - - wire[IND_SIZE_END:IND_SIZE_START] use_addr = addr; - - wire cena = 1; - - wire cenb_d = (|we); - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_d = data_write; - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] write_bit_mask_d; - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_d; - genvar cur_b; - for (cur_b = 0; cur_b < NUM_WORDS_PER_BLOCK; cur_b=cur_b+1) begin - assign write_bit_mask_d[cur_b] = {32{~we[cur_b]}}; - end - assign data_use = data_out_d; - - - // Using ASIC MEM - /* verilator lint_off PINCONNECTEMPTY */ - rf2_32x128_wm1 data ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(data_out_d), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena), - .AA(use_addr), - .CLKB(clk), - .CENB(cenb_d), - .WENB(write_bit_mask_d), - .AB(use_addr), - .DB(wdata_d), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - /* verilator lint_on PINCONNECTEMPTY */ - - - - - - wire[16:0] old_tag; - wire old_valid; - wire old_dirty; - - wire[16:0] new_tag = evict ? tag_write : old_tag; - wire new_valid = evict ? 1 : old_valid; - wire new_dirty = update_dirty ? dirt_new : old_dirty; - - - wire cenb_m = (evict || update_dirty); - wire[19-1:0][31:0] write_bit_mask_m = cenb_m ? 19'b0 : 19'b1; - - - - // Try to fix the error in memory conneciton, modified by Lingjun Zhu on Oct. 28 2019 - // wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_m = {new_tag, new_dirty, new_valid}; - // wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_m; - - wire[19-1:0] wdata_m = {new_tag, new_dirty, new_valid}; - - wire[19-1:0] data_out_m; - - assign {old_tag, old_dirty, old_valid} = data_out_m; - - - assign dirty_use = old_dirty; - assign valid_use = old_valid; - assign tag_use = old_tag; - - /* verilator lint_off PINCONNECTEMPTY */ - rf2_32x19_wm0 meta ( - .CENYA(), - .AYA(), - .CENYB(), - // .WENYB(), - .AYB(), - .QA(data_out_m), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena), - .AA(use_addr), - .CLKB(clk), - .CENB(cenb_m), - // .WENB(write_bit_mask_m), - .AB(use_addr), - .DB(wdata_m), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - // .TWENB(128'b0), - .TAB(5'b0), - .TDB(19'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - /* verilator lint_on PINCONNECTEMPTY */ - - - `endif - -endmodule diff --git a/hw/old_rtl/cache/VX_cache_data_per_index.v b/hw/old_rtl/cache/VX_cache_data_per_index.v deleted file mode 100644 index 4e95a42d..00000000 --- a/hw/old_rtl/cache/VX_cache_data_per_index.v +++ /dev/null @@ -1,163 +0,0 @@ - - -`include "../VX_define.v" - -module VX_cache_data_per_index - #( - parameter CACHE_WAYS = 1, - parameter NUM_IND = 8, - parameter CACHE_WAY_INDEX = 1, - parameter NUM_WORDS_PER_BLOCK = 4, - parameter TAG_SIZE_START = 0, - parameter TAG_SIZE_END = 16, - parameter IND_SIZE_START = 0, - parameter IND_SIZE_END = 7 - ) - ( - input wire clk, // Clock - input wire rst, - input wire valid_in, - input wire [3:0] state, - // Addr - input wire[IND_SIZE_END:IND_SIZE_START] addr, - // WE - input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we, - input wire evict, - input wire[CACHE_WAY_INDEX-1:0] way_to_update, - // Data - input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data - input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write, - - - output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use, - output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use, - output wire valid_use, - output wire dirty_use - -); - //localparam NUMBER_BANKS = CACHE_BANKS; - //localparam CACHE_BLOCK_PER_BANK = (CACHE_BLOCK / CACHE_BANKS); - // localparam NUM_WORDS_PER_BLOCK = CACHE_BLOCK / (CACHE_BANKS*4); - //localparam NUMBER_INDEXES = `DCACHE_NUM_IND; - - wire [CACHE_WAYS-1:0][TAG_SIZE_END:TAG_SIZE_START] tag_use_per_way; - wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] data_use_per_way; - wire [CACHE_WAYS-1:0] valid_use_per_way; - wire [CACHE_WAYS-1:0] dirty_use_per_way; - wire [CACHE_WAYS-1:0] hit_per_way; - // reg [CACHE_WAY_INDEX-1:0] eviction_way_index; - wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][3:0] we_per_way; - wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] data_write_per_way; - wire [CACHE_WAYS-1:0] write_from_mem_per_way; - wire invalid_found; - - wire [CACHE_WAY_INDEX-1:0] way_index; - wire [CACHE_WAY_INDEX-1:0] invalid_index; - - - localparam CACHE_IDLE = 0; // Idle - localparam SEND_MEM_REQ = 1; // Write back this block into memory - localparam RECIV_MEM_RSP = 2; - - if(CACHE_WAYS != 1) begin - VX_generic_priority_encoder #(.N(CACHE_WAYS)) valid_index - ( - .valids(~valid_use_per_way), - .index (invalid_index), - .found (invalid_found) - ); - - VX_generic_priority_encoder #(.N(CACHE_WAYS)) way_indexing - ( - .valids(hit_per_way), - .index (way_index), - .found () - ); - end - else begin - assign way_index = 0; - assign invalid_found = (valid_use_per_way == 1'b0) ? 1 : 0; - assign invalid_index = 0; - end - - - - - // wire hit = |hit_per_way; - // wire miss = ~hit; - // wire update = |we && !miss; - // wire valid = &valid_use_per_way; - - wire[CACHE_WAY_INDEX-1:0] way_use_Qual; - - assign way_use_Qual = (state != CACHE_IDLE) ? way_to_update : way_index; - - assign tag_use = tag_use_per_way[way_use_Qual]; - assign data_use = data_use_per_way[way_use_Qual]; - assign valid_use = valid_use_per_way[way_use_Qual]; - assign dirty_use = dirty_use_per_way[way_use_Qual]; - - // assign tag_use = hit ? tag_use_per_way[way_index] : (valid ? tag_use_per_way[eviction_way_index] : (invalid_found ? tag_use_per_way[invalid_index] : 0)); - // assign data_use = hit ? data_use_per_way[way_index] : (valid ? data_use_per_way[eviction_way_index] : (invalid_found ? data_use_per_way[invalid_index] : 0)); - // assign valid_use = hit ? valid_use_per_way[way_index] : (valid ? valid_use_per_way[eviction_way_index] : (invalid_found ? valid_use_per_way[invalid_index] : 0)); - // assign dirty_use = hit ? dirty_use_per_way[way_index] : (valid ? dirty_use_per_way[eviction_way_index] : (invalid_found ? dirty_use_per_way[invalid_index] : 0)); - - - - genvar ways; - for(ways=0; ways < CACHE_WAYS; ways = ways + 1) begin : each_way - - - assign hit_per_way[ways] = ((valid_use_per_way[ways] == 1'b1) && (tag_use_per_way[ways] == tag_write)) ? 1'b1 : 0; - - - assign write_from_mem_per_way[ways] = evict && (ways == way_use_Qual); - assign we_per_way[ways] = (ways == way_use_Qual) ? (we) : 0; - assign data_write_per_way[ways] = data_write; - - - // assign hit_per_way[ways] = ((valid_use_per_way[ways] == 1'b1) && (tag_use_per_way[ways] == tag_write)) ? 1'b1 : 0; - - // assign we_per_way[ways] = (evict == 1'b1) || (update == 1'b1) ? ((ways == way_use_Qual) ? (we) : 0) : 0; - // assign data_write_per_way[ways] = (evict == 1'b1) || (update == 1'b1) ? ((ways == way_use_Qual) ? data_write : 0) : 0; - // assign write_from_mem_per_way[ways] = (evict == 1'b1) ? ((ways == way_use_Qual) ? 1 : 0) : 0; - - VX_cache_data #( - .NUM_IND (NUM_IND), - .NUM_WORDS_PER_BLOCK (NUM_WORDS_PER_BLOCK), - .TAG_SIZE_START (TAG_SIZE_START), - .TAG_SIZE_END (TAG_SIZE_END), - .IND_SIZE_START (IND_SIZE_START), - .IND_SIZE_END (IND_SIZE_END)) data_structures( - .clk (clk), - .rst (rst), - // Inputs - .addr (addr), - .we (we_per_way[ways]), - .evict (write_from_mem_per_way[ways]), - .data_write(data_write_per_way[ways]), - .tag_write (tag_write), - // Outputs - .tag_use (tag_use_per_way[ways]), - .data_use (data_use_per_way[ways]), - .valid_use (valid_use_per_way[ways]), - .dirty_use (dirty_use_per_way[ways]) - ); - end - - // always @(posedge clk or posedge rst) begin - // if (rst) begin - // eviction_way_index <= 0; - // end else begin - // // if((miss && dirty_use && valid_use && !evict && valid_in)) begin // can be either evict or invalid cache entries - // if((state == SEND_MEM_REQ)) begin // can be either evict or invalid cache entries - // if((eviction_way_index+1) == CACHE_WAYS) begin - // eviction_way_index <= 0; - // end else begin - // eviction_way_index <= (eviction_way_index + 1); - // end - // end - // end - // end - -endmodule diff --git a/hw/old_rtl/cache/VX_d_cache.v b/hw/old_rtl/cache/VX_d_cache.v deleted file mode 100644 index 78b407f7..00000000 --- a/hw/old_rtl/cache/VX_d_cache.v +++ /dev/null @@ -1,387 +0,0 @@ -// Cache Memory (8way 4word) // -// i_ means input port // -// o_ means output port // -// _p_ means data exchange with processor // -// _m_ means data exchange with memory // - - -// TO DO: -// - Send in a response from memory of what the data is from the test bench - -`include "../VX_define.v" -//`include "VX_priority_encoder.v" -// `include "VX_Cache_Bank.v" -//`include "cache_set.v" - -module VX_d_cache - #( - parameter CACHE_SIZE = 4096, // Bytes - parameter CACHE_WAYS = 1, - parameter CACHE_BLOCK = 128, // Bytes - parameter CACHE_BANKS = 8, - parameter LOG_NUM_BANKS = 3, - parameter NUM_REQ = 8, - parameter LOG_NUM_REQ = 3, - parameter NUM_IND = 8, - parameter CACHE_WAY_INDEX = 1, - parameter NUM_WORDS_PER_BLOCK = 4, - parameter OFFSET_SIZE_START = 0, - parameter OFFSET_SIZE_END = 1, - parameter TAG_SIZE_START = 0, - parameter TAG_SIZE_END = 16, - parameter IND_SIZE_START = 0, - parameter IND_SIZE_END = 7, - parameter ADDR_TAG_START = 15, - parameter ADDR_TAG_END = 31, - parameter ADDR_OFFSET_START = 5, - parameter ADDR_OFFSET_END = 6, - parameter ADDR_IND_START = 7, - parameter ADDR_IND_END = 14, - parameter MEM_ADDR_REQ_MASK = 32'hffffffc0 - ) - ( - clk, - rst, - i_p_addr, - //i_p_byte_en, - i_p_writedata, - i_p_read_or_write, // 0 = Read | 1 = Write - i_p_mem_read, - i_p_mem_write, - i_p_valid, - //i_p_write, - o_p_readdata, - o_p_delay, // 0 = all threads done | 1 = Still threads that need to - - o_m_evict_addr, - o_m_read_addr, - - o_m_writedata, - - o_m_read_or_write, // 0 = Read | 1 = Write - o_m_valid, - i_m_readdata, - - i_m_ready - ); - - //parameter NUMBER_BANKS = `CACHE_BANKS; - //localparam NUM_WORDS_PER_BLOCK = `CACHE_BLOCK / (`CACHE_BANKS*4); - - //localparam CACHE_BLOCK_PER_BANK = (`CACHE_BLOCK / `CACHE_BANKS); - - localparam CACHE_IDLE = 0; // Idle - localparam SEND_MEM_REQ = 1; // Write back this block into memory - localparam RECIV_MEM_RSP = 2; - - - //parameter cache_entry = 9; - input wire clk, rst; - input wire [NUM_REQ-1:0] i_p_valid; - input wire [NUM_REQ-1:0][31:0] i_p_addr; // FIXME - input wire [NUM_REQ-1:0][31:0] i_p_writedata; - input wire i_p_read_or_write; //, i_p_write; - output reg [NUM_REQ-1:0][31:0] o_p_readdata; - output wire o_p_delay; - output reg [31:0] o_m_evict_addr; // Address is xxxxxxxxxxoooobbbyy - output reg [31:0] o_m_read_addr; - output reg o_m_valid; - output reg[CACHE_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata; - output reg o_m_read_or_write; //, o_m_write; - input wire[CACHE_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata; - input wire i_m_ready; - - input wire[2:0] i_p_mem_read; - input wire[2:0] i_p_mem_write; - - - // Buffer for final data - reg [NUM_REQ-1:0][31:0] final_data_read; - reg [NUM_REQ-1:0][31:0] new_final_data_read; - wire[NUM_REQ-1:0][31:0] new_final_data_read_Qual; - - assign o_p_readdata = new_final_data_read_Qual; - - - reg[CACHE_WAY_INDEX-1:0] global_way_to_evict; - - - wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] thread_track_banks; // Valid thread mask per bank - wire[CACHE_BANKS - 1 : 0][LOG_NUM_REQ-1:0] index_per_bank; // Index of thread each bank will try to service - wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] use_mask_per_bank; // A mask of index_per_bank - wire[CACHE_BANKS - 1 : 0] valid_per_bank; // Valid request going to each bank - wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] threads_serviced_per_bank; // Bank successfully serviced per bank - - wire[CACHE_BANKS-1:0][31:0] readdata_per_bank; // Data read from each bank - wire[CACHE_BANKS-1:0] hit_per_bank; // Whether each bank got a hit or a miss - wire[CACHE_BANKS-1:0] eviction_wb; - reg[CACHE_BANKS-1:0] eviction_wb_old; - - - // wire[CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] evicted_way_new; - // reg [CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] evicted_way_old; - // wire[CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] way_used; - - // Internal State - reg [3:0] state; - wire[3:0] new_state; - - wire[NUM_REQ-1:0] use_valid; // Valid used throught the code - reg[NUM_REQ-1:0] stored_valid; // Saving the threads still left (bank conflict or bank miss) - wire[NUM_REQ-1:0] new_stored_valid; // New stored valid - - - - reg[CACHE_BANKS - 1 : 0][31:0] eviction_addr_per_bank; - - reg[31:0] miss_addr; - // reg[31:0] evict_addr; - - wire curr_processor_request_valid = (|i_p_valid); - - - assign use_valid = (stored_valid == 0) ? i_p_valid : stored_valid; - - - - - - - VX_cache_bank_valid #(.NUMBER_BANKS (CACHE_BANKS), - .LOG_NUM_BANKS (LOG_NUM_BANKS), - .NUM_REQ (NUM_REQ)) multip_banks( - .i_p_valid (use_valid), - .i_p_addr (i_p_addr), - .thread_track_banks(thread_track_banks) - ); - - - reg[NUM_REQ-1:0] threads_serviced_Qual; - - reg[NUM_REQ-1:0] debug_hit_per_bank_mask[CACHE_BANKS-1:0]; - - genvar bid; - for (bid = 0; bid < CACHE_BANKS; bid=bid+1) - begin - wire[NUM_REQ-1:0] use_threads_track_banks = thread_track_banks[bid]; - wire[LOG_NUM_REQ-1:0] use_thread_index = index_per_bank[bid]; - wire use_write_final_data = hit_per_bank[bid]; - wire[31:0] use_data_final_data = readdata_per_bank[bid]; - VX_priority_encoder_w_mask #(.N(NUM_REQ)) choose_thread( - .valids(use_threads_track_banks), - .mask (use_mask_per_bank[bid]), - .index (index_per_bank[bid]), - .found (valid_per_bank[bid]) - ); - - assign debug_hit_per_bank_mask[bid] = {NUM_REQ{hit_per_bank[bid]}}; - assign threads_serviced_per_bank[bid] = use_mask_per_bank[bid] & debug_hit_per_bank_mask[bid]; - end - - integer test_bid; - always @(*) begin - new_final_data_read = 0; - for (test_bid=0; test_bid < CACHE_BANKS; test_bid=test_bid+1) - begin - if (hit_per_bank[test_bid]) begin - new_final_data_read[index_per_bank[test_bid]] = readdata_per_bank[test_bid]; - end - end - end - - - wire[CACHE_BANKS - 1 : 0] detect_bank_miss; - //assign threads_serviced_Qual = threads_serviced_per_bank[0] | threads_serviced_per_bank[1] | - // threads_serviced_per_bank[2] | threads_serviced_per_bank[3] | - // threads_serviced_per_bank[4] | threads_serviced_per_bank[5] | - // threads_serviced_per_bank[6] | threads_serviced_per_bank[7]; - integer bbid; - always @(*) begin - threads_serviced_Qual = 0; - for (bbid = 0; bbid < CACHE_BANKS; bbid=bbid+1) - begin - threads_serviced_Qual = threads_serviced_Qual | threads_serviced_per_bank[bbid]; - end - end - - - - genvar tid; - for (tid = 0; tid < NUM_REQ; tid =tid+1) - begin - assign new_final_data_read_Qual[tid] = threads_serviced_Qual[tid] ? new_final_data_read[tid] : final_data_read[tid]; - end - - - assign detect_bank_miss = (valid_per_bank & ~hit_per_bank); - - wire delay; - assign delay = (new_stored_valid != 0) || (state != CACHE_IDLE); // add other states - - assign o_p_delay = delay; - - wire[CACHE_BANKS - 1 : 0][LOG_NUM_REQ-1:0] send_index_to_bank = index_per_bank; - - - wire[LOG_NUM_BANKS-1:0] miss_bank_index; - wire miss_found; - VX_generic_priority_encoder #(.N(CACHE_BANKS)) get_miss_index - ( - .valids(detect_bank_miss), - .index (miss_bank_index), - .found (miss_found) - ); - - - - assign new_state = ((state == CACHE_IDLE) && (|detect_bank_miss)) ? SEND_MEM_REQ : - (state == SEND_MEM_REQ) ? RECIV_MEM_RSP : - ((state == RECIV_MEM_RSP) && !i_m_ready) ? RECIV_MEM_RSP : - CACHE_IDLE; - - // Handle if there is more than one miss - assign new_stored_valid = use_valid & (~threads_serviced_Qual); - - - wire update_global_way_to_evict = ((state == RECIV_MEM_RSP) && (new_state == CACHE_IDLE)) && (CACHE_WAYS > 1); - -/////////////////////////////////////////////////////////////////////// - genvar cur_t; - integer init_b; - always @(posedge clk, posedge rst) begin - if (rst) begin - final_data_read <= 0; - // new_final_data_read = 0; - state <= 0; - stored_valid <= 0; - // eviction_addr_per_bank <= 0; - miss_addr <= 0; - // evict_addr <= 0; - // threads_serviced_Qual = 0; - // for (init_b = 0; init_b < NUMBER_BANKS; init_b=init_b+1) - // begin - // debug_hit_per_bank_mask[init_b] <= 0; - // end - // evicted_way_old <= 0; - // eviction_wb_old <= 0; - global_way_to_evict <= 0; - - end else begin - - global_way_to_evict <= (update_global_way_to_evict) ? (global_way_to_evict+1) : global_way_to_evict; - - state <= new_state; - - stored_valid <= new_stored_valid; - - if (state == CACHE_IDLE) begin - if (miss_found) begin - miss_addr <= i_p_addr[send_index_to_bank[miss_bank_index]]; - // evict_addr <= eviction_addr_per_bank[miss_bank_index]; - end else begin - miss_addr <= 0; - // evict_addr <= 0; - end - end - - final_data_read <= new_final_data_read_Qual; - // evicted_way_old <= evicted_way_new; - // eviction_wb_old <= eviction_wb; - end - end - - - genvar bank_id; - generate - for (bank_id = 0; bank_id < CACHE_BANKS; bank_id = bank_id + 1) - begin - wire[31:0] bank_addr = (state == SEND_MEM_REQ) ? miss_addr : - (state == RECIV_MEM_RSP) ? miss_addr : - i_p_addr[send_index_to_bank[bank_id]]; - - // assign evicted_way_new[bank_id] = (state == SEND_MEM_REQ) ? way_used[bank_id] : - // (state == RECIV_MEM_RSP) ? evicted_way_old[bank_id] : - // 0; - - wire[1:0] byte_select = bank_addr[1:0]; - wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START]; - - `ifdef SYN_FUNC - wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = 0; - wire[IND_SIZE_END:IND_SIZE_START] cache_index = 0; - `else - wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = bank_addr[ADDR_OFFSET_END:ADDR_OFFSET_START]; - wire[IND_SIZE_END:IND_SIZE_START] cache_index = bank_addr[ADDR_IND_END:ADDR_IND_START]; - `endif - - - wire normal_valid_in = valid_per_bank[bank_id]; - wire use_valid_in = ((state == RECIV_MEM_RSP) && i_m_ready) ? 1'b1 : - ((state == RECIV_MEM_RSP) && !i_m_ready) ? 1'b0 : - ((state == SEND_MEM_REQ)) ? 1'b0 : - normal_valid_in; - - - VX_Cache_Bank #( - .CACHE_SIZE (CACHE_SIZE), - .CACHE_WAYS (CACHE_WAYS), - .CACHE_BLOCK (CACHE_BLOCK), - .CACHE_BANKS (CACHE_BANKS), - .LOG_NUM_BANKS (LOG_NUM_BANKS), - .NUM_REQ (NUM_REQ), - .LOG_NUM_REQ (LOG_NUM_REQ), - .NUM_IND (NUM_IND), - .CACHE_WAY_INDEX (CACHE_WAY_INDEX), - .NUM_WORDS_PER_BLOCK (NUM_WORDS_PER_BLOCK), - .OFFSET_SIZE_START (OFFSET_SIZE_START), - .OFFSET_SIZE_END (OFFSET_SIZE_END), - .TAG_SIZE_START (TAG_SIZE_START), - .TAG_SIZE_END (TAG_SIZE_END), - .IND_SIZE_START (IND_SIZE_START), - .IND_SIZE_END (IND_SIZE_END), - .ADDR_TAG_START (ADDR_TAG_START), - .ADDR_TAG_END (ADDR_TAG_END), - .ADDR_OFFSET_START (ADDR_OFFSET_START), - .ADDR_OFFSET_END (ADDR_OFFSET_END), - .ADDR_IND_START (ADDR_IND_START), - .ADDR_IND_END (ADDR_IND_END) - ) bank_structure ( - .clk (clk), - .rst (rst), - .state (state), - .valid_in (use_valid_in), - .actual_index (cache_index), - .o_tag (cache_tag), - .block_offset (cache_offset), - .writedata (i_p_writedata[send_index_to_bank[bank_id]]), - .read_or_write (i_p_read_or_write), - .i_p_mem_read (i_p_mem_read), - .i_p_mem_write (i_p_mem_write), - .byte_select (byte_select), - .hit (hit_per_bank[bank_id]), - .readdata (readdata_per_bank[bank_id]), // Data read - .eviction_addr (eviction_addr_per_bank[bank_id]), - .data_evicted (o_m_writedata[bank_id]), - .eviction_wb (eviction_wb[bank_id]), // Something needs to be written back - .fetched_writedata(i_m_readdata[bank_id]), // Data From memory - .evicted_way (global_way_to_evict) - ); - - end - endgenerate - - // Mem Rsp - - // Req to mem: - assign o_m_evict_addr = (eviction_addr_per_bank[0]) & MEM_ADDR_REQ_MASK; // Could be anything because tag+index are same - assign o_m_read_addr = miss_addr & MEM_ADDR_REQ_MASK; - assign o_m_valid = (state == SEND_MEM_REQ); - assign o_m_read_or_write = (state == SEND_MEM_REQ) && (|eviction_wb); - //end - -endmodule - - - - - diff --git a/hw/old_rtl/cache/VX_d_cache_encapsulate.v b/hw/old_rtl/cache/VX_d_cache_encapsulate.v deleted file mode 100644 index a35c322a..00000000 --- a/hw/old_rtl/cache/VX_d_cache_encapsulate.v +++ /dev/null @@ -1,118 +0,0 @@ - -`include "../VX_define.v" - -// `define NUM_WORDS_PER_BLOCK 4 - -module VX_d_cache_encapsulate ( - clk, - rst, - - i_p_initial_request, - i_p_addr, - i_p_writedata, - i_p_read_or_write, - i_p_valid, - - o_p_readdata, - o_p_readdata_valid, - o_p_waitrequest, - - o_m_addr, - o_m_writedata, - o_m_read_or_write, - o_m_valid, - - i_m_readdata, - i_m_ready -); - - parameter NUMBER_BANKS = 8; - - - - - //parameter cache_entry = 9; - input wire clk, rst; - - input wire i_p_valid[`NT_M1:0]; - input wire [31:0] i_p_addr[`NT_M1:0]; - input wire i_p_initial_request; - input wire [31:0] i_p_writedata[`NT_M1:0]; - input wire i_p_read_or_write; - - input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0]; - input wire i_m_ready; - - output reg [31:0] o_p_readdata[`NT_M1:0]; - output reg o_p_readdata_valid[`NT_M1:0] ; - output reg o_p_waitrequest; - - output reg [31:0] o_m_addr; - output reg o_m_valid; - output reg [31:0] o_m_writedata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0]; - output reg o_m_read_or_write; - - - // Inter - wire [`NT_M1:0] i_p_valid_inter; - wire [`NT_M1:0][31:0] i_p_addr_inter; - wire [`NT_M1:0][31:0] i_p_writedata_inter; - - reg [`NT_M1:0][31:0] o_p_readdata_inter; - reg [`NT_M1:0] o_p_readdata_valid_inter; - - reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter; - wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter; - - - genvar curr_thraed; - for (curr_thraed = 0; curr_thraed < `NT; curr_thraed = curr_thraed + 1) begin - assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed]; - assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed]; - assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed]; - assign o_p_readdata[curr_thraed] = o_p_readdata_inter[curr_thraed]; - assign o_p_readdata_valid[curr_thraed] = o_p_readdata_valid_inter[curr_thraed]; - end - - - genvar curr_bank; - genvar curr_word; - for (curr_bank = 0; curr_bank < NUMBER_BANKS; curr_bank = curr_bank + 1) begin - - for (curr_word = 0; curr_word < `NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin - - assign o_m_writedata[curr_bank][curr_word] = o_m_writedata_inter[curr_bank][curr_word]; - assign i_m_readdata_inter[curr_bank][curr_word] = i_m_readdata[curr_bank][curr_word]; - - end - end - -VX_d_cache dcache( - .clk (clk), - .rst (rst), - .i_p_valid (i_p_valid_inter), - .i_p_addr (i_p_addr_inter), - .i_p_initial_request(i_p_initial_request), - .i_p_writedata (i_p_writedata_inter), - .i_p_read_or_write (i_p_read_or_write), - .o_p_readdata (o_p_readdata_inter), - .o_p_readdata_valid (o_p_readdata_valid_inter), - .o_p_waitrequest (o_p_waitrequest), - .o_m_addr (o_m_addr), - .o_m_valid (o_m_valid), - .o_m_writedata (o_m_writedata_inter), - .o_m_read_or_write (o_m_read_or_write), - .i_m_readdata (i_m_readdata_inter), - .i_m_ready (i_m_ready) - ); - - -endmodule - - - - - - - - diff --git a/hw/old_rtl/cache/VX_d_cache_tb.v b/hw/old_rtl/cache/VX_d_cache_tb.v deleted file mode 100644 index 4f5681c3..00000000 --- a/hw/old_rtl/cache/VX_d_cache_tb.v +++ /dev/null @@ -1,58 +0,0 @@ -`include "VX_define.v" -`include "VX_d_cache.v" - -module VX_d_cache_tb; - - parameter NUMBER_BANKS = 8; - - reg clk, reset, im_ready; - reg [`NT_M1:0] i_p_valid; - reg [`NT_M1:0][13:0] i_p_addr; // FIXME - reg i_p_initial_request; - reg [`NT_M1:0][31:0] i_p_writedata; - reg i_p_read_or_write; //, i_p_write; - reg [`NT_M1:0][31:0] o_p_readdata; - reg [`NT_M1:0] o_p_readdata_valid; - reg o_p_waitrequest; - reg [13:0] o_m_addr; // Only one address is sent out at a time to memory - reg o_m_valid; - reg [(NUMBER_BANKS * 32) - 1:0] o_m_writedata; - reg o_m_read_or_write; //, o_m_write; - reg [(NUMBER_BANKS * 32) - 1:0] i_m_readdata; // Read Data that is passed from the memory module back to the controller - - - VX_d_cache d_cache(.clk(clk), - .rst(reset), - .i_p_initial_request(i_p_initial_request), - .i_p_addr(i_p_addr), - .i_p_writedata(i_p_writedata), - .i_p_read_or_write(i_p_read_or_write), // 0 = Read | 1 = Write - .i_p_valid(i_p_valid), - .o_p_readdata(o_p_readdata), - .o_p_readdata_valid(o_p_readdata_valid), - .o_p_waitrequest(o_p_waitrequest), // 0 = all threads done | 1 = Still threads that need to - .o_m_addr(o_m_addr), - .o_m_writedata(o_m_writedata), - .o_m_read_or_write(o_m_read_or_write), // 0 = Read | 1 = Write - .o_m_valid(o_m_valid), - .i_m_readdata(i_m_readdata), - .i_m_ready(im_ready) - //cnt_r, - //cnt_w, - //cnt_hit_r, - //cnt_hit_w - ); - - - - initial - begin - clk = 0; - reset = 0; - - end - - always - #5 clk = ! clk; - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/cache/VX_generic_pe.v b/hw/old_rtl/cache/VX_generic_pe.v deleted file mode 100644 index 4ff3cc17..00000000 --- a/hw/old_rtl/cache/VX_generic_pe.v +++ /dev/null @@ -1,24 +0,0 @@ -module VX_generic_pe - #( - parameter N = 8 - ) - ( - input wire[N-1:0] valids, - output reg[$clog2(N)-1:0] index, - output reg found - ); - -parameter my_secret = 0; - - integer i; - always @(*) begin - index = 0; - found = 0; - for (i = N-1; i >= 0; i = i - 1) begin - if (valids[i]) begin - index = i[$clog2(N)-1:0]; - found = 1; - end - end - end -endmodule \ No newline at end of file diff --git a/hw/old_rtl/cache/cache_set.v b/hw/old_rtl/cache/cache_set.v deleted file mode 100644 index 4f2445ea..00000000 --- a/hw/old_rtl/cache/cache_set.v +++ /dev/null @@ -1,233 +0,0 @@ -// To Do: Change way_id_out to an internal register which holds when in between access and finished. -// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default -// Also make sure all possible output states are transmitted back to the bank correctly - -// `include "VX_define.v" -module cache_set(clk, - rst, - // These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes - access, // First - find_evict, - write_from_mem, - idle, - // entry, - o_tag, - writedata, - //byte_en, - write, - //word_en, - //way_id_in, - //way_id_out, - readdata, - //wb_addr, - hit, - eviction_wb, - eviction_tag, - //eviction_data, - //modify, - miss - //valid_data - //read_miss - ); - - parameter cache_entry = 14; - parameter ways_per_set = 4; - - input wire clk, rst; - input wire access; - input wire find_evict; - input wire write_from_mem; - input wire idle; - //input wire [cache_entry-1:0] entry; - input wire [1:0] o_tag; - input wire [31:0] writedata; - //input wire [3:0] byte_en; - input wire write; // 0 == False - //input wire [3:0] word_en; - //input wire read_miss; - //input wire [1:0] way_id_in; - //output reg [1:0] way_id_out; - output reg [31:0] readdata; - //output reg [3:0] hit; - output reg hit; - output reg miss; - output wire eviction_wb; - output wire [1:0] eviction_tag; - reg [31:0] eviction_data; - //output wire [22:0] wb_addr; - //output wire modify, valid_data; - - - - //wire [2:0] i_tag; - //wire dirty; - //wire [24-cache_entry:0] write_tag_data; - - // Table for one set - reg [2:0] counter; // Determines which to evict - reg valid [ways_per_set-1:0]; - reg [1:0] tag [ways_per_set-1:0]; - reg clean [ways_per_set-1:0]; - reg [31:0] data [ways_per_set-1:0]; - - - assign eviction_wb = miss && clean[counter[1:0]] != 1'b1 && valid[counter[1:0]] == 1'b1; - assign eviction_tag = tag[counter[1:0]]; - //assign eviction_data = data[counter[1:0]]; - //assign hit = valid_data && (o_tag == i_tag); - //assign modify = valid_data && (o_tag != i_tag) && dirty; - //assign miss = !valid_data || ((o_tag != i_tag) && !dirty); - - //assign wb_addr = {i_tag, entry}; - always @(posedge clk) begin - if (rst) begin - - end - if (find_evict) begin - if (tag[0] == o_tag && valid[0]) begin - readdata <= data[0]; - end else if (tag[1] == o_tag && valid[1]) begin - readdata <= data[1]; - end else if (tag[2] == o_tag && valid[2]) begin - readdata <= data[2]; - end else if (tag[3] == o_tag && valid[3]) begin - readdata <= data[3]; - end - end else if (access) begin - //tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12]; - counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC - // Hit in First Column - if (tag[0] == o_tag && valid[0]) begin - if (write == 1'b0) begin // if it is a read - if (clean[0] == 1'b1 ) begin - //hit <= 4'b0001; - hit <= 1'b1; - readdata <= data[0]; - miss <= 1'b0; - end else begin - //hit <= 4'b0000; // SHOULD PROBABLY TRACK WHERE THIS MISS IS IN A DIFFERENT VARIABLE - hit <= 1'b0; - readdata <= 32'b0; - miss <= 1'b1; - end - end else if (write == 1'b1) begin - data[0] <= writedata; - clean[0] <= 1'b0; - //hit <= 4'b0001; - hit <= 1'b1; - end - end - // Hit in Second Column - else if (tag[1] == o_tag && valid[1]) begin - if (write == 1'b0) begin // if it is a read - if (clean[1] == 1'b1 ) begin - //hit <= 4'b0010; - hit <= 1'b1; - readdata <= data[1]; - miss <= 1'b0; - end else begin - //hit <= 4'b0000; - hit <= 1'b0; - readdata <= 32'b0; - miss <= 1'b1; - end - end else if (write == 1'b1) begin - data[1] <= writedata; - clean[1] <= 1'b0; - //hit <= 4'b0010; - hit <= 1'b1; - end - end - // Hit in Third Column - else if (tag[2] == o_tag && valid[2]) begin - if (write == 1'b0) begin // if it is a read - if (clean[2] == 1'b1 ) begin - //hit <= 4'b0100; - hit <= 1'b1; - readdata <= data[2]; - miss <= 1'b0; - end else begin - //hit <= 4'b0000; - hit <= 1'b0; - readdata <= 32'b0; - miss <= 1'b1; - end - end else if (write == 1'b1) begin - data[2] <= writedata; - clean[2] <= 1'b0; - //hit <= 4'b0100; - hit <= 1'b1; - end - end - // Hit in Fourth Column - else if (tag[3] == o_tag && valid[3]) begin - if (write == 1'b0) begin // if it is a read - if (clean[3] == 1'b1 ) begin - //hit <= 4'b1000; - hit <= 1'b1; - readdata <= data[3]; - miss <= 1'b0; - end else begin - //hit <= 4'b0000; - hit <= 1'b0; - readdata <= 32'b0; - miss <= 1'b1; - end - end else if (write == 1'b1) begin - data[3] <= writedata; - clean[3] <= 1'b0; - //hit <= 4'b1000; - hit <= 1'b1; - end - end - // Miss - else begin - //way_id_out <= counter; - miss <= 1'b1; - if (write == 1'b0) begin // Read Miss - clean[counter[1:0]] <= 1'b1; - data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS - end else if (write == 1'b1) begin // Write Miss - clean[counter[1:0]] <= 1'b1; - data[counter[1:0]] <= writedata; - end - end - - end - if (write_from_mem) begin - tag[counter[1:0]] <= o_tag; - valid[counter[1:0]] <= 1'b1; - hit <= 1'b1; - if (write == 1'b0) begin // Read Miss - clean[counter[1:0]] <= 1'b1; - data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS - end else if (write == 1'b1) begin // Write Miss - clean[counter[1:0]] <= 1'b0; - data[counter[1:0]] <= writedata; - end - end - if (idle) begin // Set "way" register equal to invalid value - hit <= 1'b1; // set to know it is ready - miss <= 1'b0; - readdata <= 32'hFFFFFFFF; - end - if (find_evict) begin // Keep "way" value the same !!!! Fix. Need to send back data with matching tag. Also need to ensure evicted data doesnt get lost - if (tag[3] == o_tag && valid[3]) begin - readdata <= data[3]; - end else if (tag[1] == o_tag && valid[1]) begin - readdata <= data[1]; - end else if (tag[2] == o_tag && valid[2]) begin - readdata <= data[2]; - end else if (tag[0] == o_tag && valid[0]) begin - readdata <= data[0]; - end else begin - readdata <= eviction_data; - end - hit <= 1'b1; - miss <= 1'b0; - end - counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC - eviction_data <= data[counter[1:0]]; - end - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/cache/d_cache_test_bench.cpp b/hw/old_rtl/cache/d_cache_test_bench.cpp deleted file mode 100644 index e7fb3214..00000000 --- a/hw/old_rtl/cache/d_cache_test_bench.cpp +++ /dev/null @@ -1,29 +0,0 @@ - - -#include "d_cache_test_bench.h" - -//#define NUM_TESTS 46 - -int main(int argc, char **argv) -{ - - Verilated::commandArgs(argc, argv); - - Verilated::traceEverOn(true); - - - VX_d_cache v; - - - bool curr = v.simulate(); - //if ( curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl; - //if (!curr) std::cerr << RED << "Test Failed: " << testing << std::endl; - if ( curr) std::cerr << GREEN << "Test Passed: " << std::endl; - if (!curr) std::cerr << RED << "Test Failed: " << std::endl; - - return 0; - -} - - - diff --git a/hw/old_rtl/cache/d_cache_test_bench.h b/hw/old_rtl/cache/d_cache_test_bench.h deleted file mode 100644 index 112aeb9a..00000000 --- a/hw/old_rtl/cache/d_cache_test_bench.h +++ /dev/null @@ -1,355 +0,0 @@ -// C++ libraries -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "VX_define.h" -#include "VVX_d_cache_encapsulate.h" -#include "verilated.h" - -#include "d_cache_test_bench_debug.h" - - -#ifdef VCD_OUTPUT -#include -#endif - -// void set_Index (auto & var, int index, int size, auto val) -// { -// int real_shift -// } - -class VX_d_cache -{ - public: - VX_d_cache(); - ~VX_d_cache(); - bool simulate(); - bool operation(int, bool); - - VVX_d_cache_encapsulate * vx_d_cache_; - long int curr_cycle; - int stats_total_cycles = 0; - int stats_dram_accesses = 0; - #ifdef VCD_OUTPUT - VerilatedVcdC *m_trace; - #endif -}; - - - -VX_d_cache::VX_d_cache() : curr_cycle(0), stats_total_cycles(0), stats_dram_accesses(0) -{ - - this->vx_d_cache_ = new VVX_d_cache_encapsulate; - #ifdef VCD_OUTPUT - this->m_trace = new VerilatedVcdC; - this->vx_d_cache_->trace(m_trace, 99); - this->m_trace->open("trace.vcd"); - #endif - //this->results.open("../results.txt"); -} - -VX_d_cache::~VX_d_cache() -{ - - - delete this->vx_d_cache_; - #ifdef VCD_OUTPUT - m_trace->close(); - #endif -} - -bool VX_d_cache::operation(int counter_value, bool do_op) { - if (do_op) { - vx_d_cache_->i_p_initial_request = 1; - } else { - vx_d_cache_->i_p_initial_request = 0; - } - - if (counter_value == 0 && do_op) { // Write to bank 1-4 at index 64 - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 1; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0x7f6f8f6f; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001008; // bank 2 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3 - } else { - vx_d_cache_->i_p_addr[3] = 0x30010010; // bank 4 -- This is serviced 1st, then the other 3 banks are at once - } - } - - } else if (counter_value == 1 && do_op) { // Write to bank 4-7 at index 108 - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 1; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0xd1d2d2d3; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001c14; // bank 5 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 6 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x30001c1c; // bank 7 - } else { - vx_d_cache_->i_p_addr[3] = 0x30001c10; // bank 4 - } - } - - } else if (counter_value == 2 && do_op) { // Read from bank 1-4 at those indexes - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 0; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0x23232332; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 5 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3 - } else { - vx_d_cache_->i_p_addr[3] = 0x30001c1c;; // bank 7 - } - } - } - } else if (counter_value == 3 && do_op) { // Write to Bank 1-5 (evictions will need to take place) - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 1; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0; - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1; - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2; - } else { - vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3; - } - } - } else if (counter_value == 4 && do_op) { // Read from addresses that were just overwritten above ^^^ - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 0; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0x23232332; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3 - } else { - vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5 - } - } - } - /* These will check writing multiple threads writing to the same block - } else if (counter_value == 3 && do_op) { // Write to Bank 0 - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 1; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0; - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1; - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2; - } else { - vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3; - } - } - } else if (counter_value == 4 && do_op) { // Read from Bank 0 - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 0; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0x23232332; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0 - } else { - vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1 - } - } - } - */ - // Handle Memory Accesses - unsigned int read_data_from_mem = 0x1111 + counter_value + this->stats_total_cycles; - - if (vx_d_cache_->o_m_valid) { - this->stats_dram_accesses = this->stats_dram_accesses + 1; // (assuming memory access takes 20 cycles) - - this->stats_total_cycles += 1; - vx_d_cache_->clk = 0; - vx_d_cache_->eval(); - #ifdef VCD_OUTPUT - m_trace->dump(2*this->stats_total_cycles); - #endif - vx_d_cache_->clk = 1; - vx_d_cache_->eval(); - #ifdef VCD_OUTPUT - m_trace->dump((2*this->stats_total_cycles)+1); - #endif - - vx_d_cache_->i_m_ready = 1; - for (int j1 = 0; j1 < 8; j1++) { - for (int j2 = 0; j2 < 4; j2++) { - vx_d_cache_->i_m_readdata[j1][j2] = read_data_from_mem; - } - } - } else { - vx_d_cache_->i_m_ready = 0; - } - - - if (vx_d_cache_->o_p_waitrequest == 0) { - return true; - } else { - return false; - } - - -} - - -bool VX_d_cache::simulate() -{ - -// this->instruction_file_name = file_to_simulate; - // this->results << "\n****************\t" << file_to_simulate << "\t****************\n"; - -// this->ProcessFile(); - - // auto start_time = std::chrono::high_resolution_clock::now(); - - - //static bool stop = false; - //static int counter = 0; - //counter = 0; - //stop = false; - - // auto start_time = clock(); - - - vx_d_cache_->clk = 0; - vx_d_cache_->rst = 1; - //vortex->eval(); - //counter = 0; - vx_d_cache_->rst = 0; - - bool cont = false; - bool out_operation = false; - bool do_operation = true; - int other_counter = 0; - //while (this->stop && ((other_counter < 5))) - while (other_counter < 5) - { - - // std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n"; - // istop = ibus_driver(); - // dstop = !dbus_driver(); - - vx_d_cache_->clk = 1; - vx_d_cache_->eval(); - #ifdef VCD_OUTPUT - m_trace->dump(2*this->stats_total_cycles); - #endif - - //vortex->eval(); - //dstop = !dbus_driver(); - - out_operation = operation(other_counter, do_operation); - vx_d_cache_->clk = 0; - vx_d_cache_->eval(); - #ifdef VCD_OUTPUT - m_trace->dump((2*this->stats_total_cycles)+1); - #endif - //vortex->eval(); - - /* - // stop = istop && dstop; - stop = vortex->out_ebreak; - if (stop || cont) - { - cont = true; - counter++; - } else - { - counter = 0; - } - */ - if (out_operation) { - other_counter++; - do_operation = true; - } else { - do_operation = false; - } - ++(this->stats_total_cycles); - - if (this->stats_total_cycles > 5000) { - break; - } - - } - - std::cerr << "New Total Cycles: " << (this->stats_total_cycles + (this->stats_dram_accesses * 20)) << "\n"; - - //uint32_t status; - //ram.getWord(0, &status); - - //this->print_stats(); - - - - return (true); -} - - - - - - - - - diff --git a/hw/old_rtl/cache/d_cache_test_bench_debug.h b/hw/old_rtl/cache/d_cache_test_bench_debug.h deleted file mode 100644 index 54afa11a..00000000 --- a/hw/old_rtl/cache/d_cache_test_bench_debug.h +++ /dev/null @@ -1 +0,0 @@ -#define VCD_OUTPUT \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_branch_response_inter.v b/hw/old_rtl/interfaces/VX_branch_response_inter.v deleted file mode 100644 index b25b47c9..00000000 --- a/hw/old_rtl/interfaces/VX_branch_response_inter.v +++ /dev/null @@ -1,18 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_BRANCH_RSP - -`define VX_BRANCH_RSP - -interface VX_branch_response_inter (); - wire valid_branch; - wire branch_dir; - wire[31:0] branch_dest; - wire[`NW_M1:0] branch_warp_num; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_csr_req_inter.v b/hw/old_rtl/interfaces/VX_csr_req_inter.v deleted file mode 100644 index 9080f0e1..00000000 --- a/hw/old_rtl/interfaces/VX_csr_req_inter.v +++ /dev/null @@ -1,24 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_CSR_REQ - -`define VX_CSR_REQ - -interface VX_csr_req_inter (); - - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; - wire[4:0] rd; - wire[1:0] wb; - wire[4:0] alu_op; - wire is_csr; - wire[11:0] csr_address; - wire csr_immed; - wire[31:0] csr_mask; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_csr_wb_inter.v b/hw/old_rtl/interfaces/VX_csr_wb_inter.v deleted file mode 100644 index d8389cdb..00000000 --- a/hw/old_rtl/interfaces/VX_csr_wb_inter.v +++ /dev/null @@ -1,21 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_CSR_WB_REQ - -`define VX_CSR_WB_REQ - -interface VX_csr_wb_inter (); - - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; - wire[4:0] rd; - wire[1:0] wb; - - wire[`NT_M1:0][31:0] csr_result; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_dcache_request_inter.v b/hw/old_rtl/interfaces/VX_dcache_request_inter.v deleted file mode 100644 index ac841a76..00000000 --- a/hw/old_rtl/interfaces/VX_dcache_request_inter.v +++ /dev/null @@ -1,19 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_DCACHE_REQ - -`define VX_DCACHE_REQ - -interface VX_dcache_request_inter (); - - wire[`NT_M1:0][31:0] out_cache_driver_in_address; - wire[2:0] out_cache_driver_in_mem_read; - wire[2:0] out_cache_driver_in_mem_write; - wire[`NT_M1:0] out_cache_driver_in_valid; - wire[`NT_M1:0][31:0] out_cache_driver_in_data; - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_dcache_response_inter.v b/hw/old_rtl/interfaces/VX_dcache_response_inter.v deleted file mode 100644 index 98ed58a3..00000000 --- a/hw/old_rtl/interfaces/VX_dcache_response_inter.v +++ /dev/null @@ -1,16 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_DCACHE_RSP - -`define VX_DCACHE_RSP - -interface VX_dcache_response_inter (); - - wire[`NT_M1:0][31:0] in_cache_driver_out_data; - wire delay; - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_dram_req_rsp_inter.v b/hw/old_rtl/interfaces/VX_dram_req_rsp_inter.v deleted file mode 100644 index f4d7012d..00000000 --- a/hw/old_rtl/interfaces/VX_dram_req_rsp_inter.v +++ /dev/null @@ -1,27 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_DRAM_REQ_RSP_INTER - -`define VX_DRAM_REQ_RSP_INTER - -interface VX_dram_req_rsp_inter #( - parameter NUMBER_BANKS = 8, - parameter NUM_WORDS_PER_BLOCK = 4) (); - - // Req - wire [31:0] o_m_evict_addr; - wire [31:0] o_m_read_addr; - wire o_m_valid; - wire[NUMBER_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata; - wire o_m_read_or_write; - - // Rsp - wire[NUMBER_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata; - wire i_m_ready; - - -endinterface - - -`endif diff --git a/hw/old_rtl/interfaces/VX_exec_unit_req_inter.v b/hw/old_rtl/interfaces/VX_exec_unit_req_inter.v deleted file mode 100644 index aab6c130..00000000 --- a/hw/old_rtl/interfaces/VX_exec_unit_req_inter.v +++ /dev/null @@ -1,51 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_EXE_UNIT_REQ_INTER - -`define VX_EXE_UNIT_REQ_INTER - -interface VX_exec_unit_req_inter (); - - // Meta - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; - wire[31:0] curr_PC; - wire[31:0] PC_next; - - // Write Back Info - wire[4:0] rd; - wire[1:0] wb; - - // Data and alu op - wire[`NT_M1:0][31:0] a_reg_data; - wire[`NT_M1:0][31:0] b_reg_data; - wire[4:0] alu_op; - wire[4:0] rs1; - wire[4:0] rs2; - wire rs2_src; - wire[31:0] itype_immed; - wire[19:0] upper_immed; - - // Branch type - wire[2:0] branch_type; - - // Jal info - wire jalQual; - wire jal; - wire[31:0] jal_offset; - - /* verilator lint_off UNUSED */ - wire ebreak; - wire wspawn; - /* verilator lint_on UNUSED */ - - // CSR info - wire is_csr; - wire[11:0] csr_address; - wire csr_immed; - wire[31:0] csr_mask; -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_frE_to_bckE_req_inter.v b/hw/old_rtl/interfaces/VX_frE_to_bckE_req_inter.v deleted file mode 100644 index 610d3525..00000000 --- a/hw/old_rtl/interfaces/VX_frE_to_bckE_req_inter.v +++ /dev/null @@ -1,46 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_FrE_to_BE_INTER - -`define VX_FrE_to_BE_INTER - -interface VX_frE_to_bckE_req_inter (); - - wire[11:0] csr_address; - wire is_csr; - wire csr_immed; - wire[31:0] csr_mask; - wire[4:0] rd; - wire[4:0] rs1; - wire[4:0] rs2; - wire[4:0] alu_op; - wire[1:0] wb; - wire rs2_src; - wire[31:0] itype_immed; - wire[2:0] mem_read; - wire[2:0] mem_write; - wire[2:0] branch_type; - wire[19:0] upper_immed; - wire[31:0] curr_PC; - /* verilator lint_off UNUSED */ - wire ebreak; - /* verilator lint_on UNUSED */ - wire jalQual; - wire jal; - wire[31:0] jal_offset; - wire[31:0] PC_next; - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; - - // GPGPU stuff - wire is_wspawn; - wire is_tmc; - wire is_split; - wire is_barrier; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_gpr_clone_inter.v b/hw/old_rtl/interfaces/VX_gpr_clone_inter.v deleted file mode 100644 index 26053ac9..00000000 --- a/hw/old_rtl/interfaces/VX_gpr_clone_inter.v +++ /dev/null @@ -1,18 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_GPR_CLONE_INTER - -`define VX_GPR_CLONE_INTER - - -interface VX_gpr_clone_inter (); -/* verilator lint_off UNUSED */ -wire is_clone; -wire[`NW_M1:0] warp_num; -/* verilator lint_on UNUSED */ -endinterface - - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_gpr_data_inter.v b/hw/old_rtl/interfaces/VX_gpr_data_inter.v deleted file mode 100644 index 912f04a1..00000000 --- a/hw/old_rtl/interfaces/VX_gpr_data_inter.v +++ /dev/null @@ -1,14 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_gpr_data_INTER - -`define VX_gpr_data_INTER - -interface VX_gpr_data_inter (); - wire[`NT_M1:0][31:0] a_reg_data; - wire[`NT_M1:0][31:0] b_reg_data; -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_gpr_jal_inter.v b/hw/old_rtl/interfaces/VX_gpr_jal_inter.v deleted file mode 100644 index 0c4b7afb..00000000 --- a/hw/old_rtl/interfaces/VX_gpr_jal_inter.v +++ /dev/null @@ -1,14 +0,0 @@ -`include "../VX_define.v" -`ifndef VX_GPR_JAL_INTER - -`define VX_GPR_JAL_INTER - - -interface VX_gpr_jal_inter (); - wire is_jal; - wire[31:0] curr_PC; -endinterface - - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_gpr_read_inter.v b/hw/old_rtl/interfaces/VX_gpr_read_inter.v deleted file mode 100644 index ccac96c0..00000000 --- a/hw/old_rtl/interfaces/VX_gpr_read_inter.v +++ /dev/null @@ -1,17 +0,0 @@ -`include "../VX_define.v" -`ifndef VX_GPR_READ - -`define VX_GPR_READ - - -interface VX_gpr_read_inter (); - - wire[4:0] rs1; - wire[4:0] rs2; - wire[`NW_M1:0] warp_num; - -endinterface - - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_gpr_wspawn_inter.v b/hw/old_rtl/interfaces/VX_gpr_wspawn_inter.v deleted file mode 100644 index dfa0fc4c..00000000 --- a/hw/old_rtl/interfaces/VX_gpr_wspawn_inter.v +++ /dev/null @@ -1,18 +0,0 @@ -`include "../VX_define.v" -`ifndef VX_GPR_WSPAWN_INTER - -`define VX_GPR_WSPAWN_INTER - - -interface VX_gpr_wspawn_inter (); - /* verilator lint_off UNUSED */ - wire is_wspawn; - wire[`NW_M1:0] which_wspawn; - // wire[`NW_M1:0] warp_num; - /* verilator lint_on UNUSED */ - -endinterface - - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_gpu_inst_req_inter.v b/hw/old_rtl/interfaces/VX_gpu_inst_req_inter.v deleted file mode 100644 index 1d24c960..00000000 --- a/hw/old_rtl/interfaces/VX_gpu_inst_req_inter.v +++ /dev/null @@ -1,27 +0,0 @@ -`include "../VX_define.v" - -`ifndef VX_GPU_INST_REQ_IN - -`define VX_GPU_INST_REQ_IN - -interface VX_gpu_inst_req_inter(); - - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; - wire is_wspawn; - wire is_tmc; - wire is_split; - - wire is_barrier; - - wire[31:0] pc_next; - - wire[`NT_M1:0][31:0] a_reg_data; - wire[31:0] rd2; - - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_icache_request_inter.v b/hw/old_rtl/interfaces/VX_icache_request_inter.v deleted file mode 100644 index 9de1312b..00000000 --- a/hw/old_rtl/interfaces/VX_icache_request_inter.v +++ /dev/null @@ -1,19 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_ICACHE_REQ - -`define VX_ICACHE_REQ - -interface VX_icache_request_inter (); - - wire[31:0] pc_address; - wire[2:0] out_cache_driver_in_mem_read; - wire[2:0] out_cache_driver_in_mem_write; - wire out_cache_driver_in_valid; - wire[31:0] out_cache_driver_in_data; - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_icache_response_inter.v b/hw/old_rtl/interfaces/VX_icache_response_inter.v deleted file mode 100644 index 2373046b..00000000 --- a/hw/old_rtl/interfaces/VX_icache_response_inter.v +++ /dev/null @@ -1,18 +0,0 @@ -`include "../VX_define.v" - -`ifndef VX_ICACHE_RSP - -`define VX_ICACHE_RSP - -interface VX_icache_response_inter (); - - // wire ready; - // wire stall; - wire[31:0] instruction; - wire delay; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_inst_exec_wb_inter.v b/hw/old_rtl/interfaces/VX_inst_exec_wb_inter.v deleted file mode 100644 index 929ba88d..00000000 --- a/hw/old_rtl/interfaces/VX_inst_exec_wb_inter.v +++ /dev/null @@ -1,21 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_EXEC_UNIT_WB_INST_INTER - -`define VX_EXEC_UNIT_WB_INST_INTER - -interface VX_inst_exec_wb_inter (); - - wire[`NT_M1:0][31:0] alu_result; - wire[31:0] exec_wb_pc; - wire[4:0] rd; - wire[1:0] wb; - wire[`NT_M1:0] wb_valid; - wire[`NW_M1:0] wb_warp_num; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_inst_mem_wb_inter.v b/hw/old_rtl/interfaces/VX_inst_mem_wb_inter.v deleted file mode 100644 index d752a3a6..00000000 --- a/hw/old_rtl/interfaces/VX_inst_mem_wb_inter.v +++ /dev/null @@ -1,21 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_MEM_WB_INST_INTER - -`define VX_MEM_WB_INST_INTER - -interface VX_inst_mem_wb_inter (); - - wire[`NT_M1:0][31:0] loaded_data; - wire[31:0] mem_wb_pc; - wire[4:0] rd; - wire[1:0] wb; - wire[`NT_M1:0] wb_valid; - wire[`NW_M1:0] wb_warp_num; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_inst_meta_inter.v b/hw/old_rtl/interfaces/VX_inst_meta_inter.v deleted file mode 100644 index 2fd68625..00000000 --- a/hw/old_rtl/interfaces/VX_inst_meta_inter.v +++ /dev/null @@ -1,16 +0,0 @@ -`include "../VX_define.v" - -`ifndef VX_F_D_INTER - -`define VX_F_D_INTER - -interface VX_inst_meta_inter (); - wire[31:0] instruction; - wire[31:0] inst_pc; - wire[`NW_M1:0] warp_num; - wire[`NT_M1:0] valid; - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_jal_response_inter.v b/hw/old_rtl/interfaces/VX_jal_response_inter.v deleted file mode 100644 index e93a2d0a..00000000 --- a/hw/old_rtl/interfaces/VX_jal_response_inter.v +++ /dev/null @@ -1,17 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_JAL_RSP - -`define VX_JAL_RSP - -interface VX_jal_response_inter (); - - wire jal; - wire[31:0] jal_dest; - wire[`NW_M1:0] jal_warp_num; - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_join_inter.v b/hw/old_rtl/interfaces/VX_join_inter.v deleted file mode 100644 index a465bf65..00000000 --- a/hw/old_rtl/interfaces/VX_join_inter.v +++ /dev/null @@ -1,17 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_JOIN_INTER - -`define VX_JOIN_INTER - -interface VX_join_inter (); - - wire is_join; - wire[`NW_M1:0] join_warp_num; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_lsu_req_inter.v b/hw/old_rtl/interfaces/VX_lsu_req_inter.v deleted file mode 100644 index 408791f6..00000000 --- a/hw/old_rtl/interfaces/VX_lsu_req_inter.v +++ /dev/null @@ -1,24 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_LSU_REQ_INTER - -`define VX_LSU_REQ_INTER - -interface VX_lsu_req_inter (); - - wire[`NT_M1:0] valid; - wire[31:0] lsu_pc; - wire[`NW_M1:0] warp_num; - wire[`NT_M1:0][31:0] store_data; - wire[`NT_M1:0][31:0] base_address; // A reg data - wire[31:0] offset; // itype_immed - wire[2:0] mem_read; - wire[2:0] mem_write; - wire[4:0] rd; - wire[1:0] wb; - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_mem_req_inter.v b/hw/old_rtl/interfaces/VX_mem_req_inter.v deleted file mode 100644 index ee2a975d..00000000 --- a/hw/old_rtl/interfaces/VX_mem_req_inter.v +++ /dev/null @@ -1,28 +0,0 @@ -`include "../VX_define.v" - -`ifndef VX_MEM_REQ_IN - -`define VX_MEM_REQ_IN - -interface VX_mem_req_inter (); - - wire[`NT_M1:0][31:0] alu_result; - wire[2:0] mem_read; - wire[2:0] mem_write; - wire[4:0] rd; - wire[1:0] wb; - wire[4:0] rs1; - wire[4:0] rs2; - wire[`NT_M1:0][31:0] rd2; - wire[31:0] PC_next; - wire[31:0] curr_PC; - wire[31:0] branch_offset; - wire[2:0] branch_type; - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_mw_wb_inter.v b/hw/old_rtl/interfaces/VX_mw_wb_inter.v deleted file mode 100644 index bbf4733e..00000000 --- a/hw/old_rtl/interfaces/VX_mw_wb_inter.v +++ /dev/null @@ -1,22 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_MW_WB_INTER - -`define VX_MW_WB_INTER - -interface VX_mw_wb_inter (); - - wire[`NT_M1:0][31:0] alu_result; - wire[`NT_M1:0][31:0] mem_result; - wire[4:0] rd; - wire[1:0] wb; - wire[31:0] PC_next; - wire[`NT_M1:0] valid; - wire [`NW_M1:0] warp_num; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_warp_ctl_inter.v b/hw/old_rtl/interfaces/VX_warp_ctl_inter.v deleted file mode 100644 index 53dec2a1..00000000 --- a/hw/old_rtl/interfaces/VX_warp_ctl_inter.v +++ /dev/null @@ -1,36 +0,0 @@ - -`include "../VX_define.v" - -`ifndef VX_WARP_CTL_INTER - -`define VX_WARP_CTL_INTER - -interface VX_warp_ctl_inter (); - - wire[`NW_M1:0] warp_num; - wire change_mask; - wire[`NT_M1:0] thread_mask; - - wire wspawn; - wire[31:0] wspawn_pc; - wire[`NW-1:0] wspawn_new_active; - - wire ebreak; - - // barrier - wire is_barrier; - wire[31:0] barrier_id; - wire[$clog2(`NW):0] num_warps; - - wire is_split; - wire dont_split; - wire[`NW_M1:0] split_warp_num; - wire[`NT_M1:0] split_new_mask; - wire[`NT_M1:0] split_later_mask; - wire[31:0] split_save_pc; - - -endinterface - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_wb_inter.v b/hw/old_rtl/interfaces/VX_wb_inter.v deleted file mode 100644 index c40cf4fe..00000000 --- a/hw/old_rtl/interfaces/VX_wb_inter.v +++ /dev/null @@ -1,21 +0,0 @@ -`include "../VX_define.v" - -`ifndef VX_WB_INTER - -`define VX_WB_INTER - - -interface VX_wb_inter (); - - wire[`NT_M1:0][31:0] write_data; - wire[31:0] wb_pc; - wire[4:0] rd; - wire[1:0] wb; - wire[`NT_M1:0] wb_valid; - wire[`NW_M1:0] wb_warp_num; - -endinterface - - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/interfaces/VX_wstall_inter.v b/hw/old_rtl/interfaces/VX_wstall_inter.v deleted file mode 100644 index 8699b022..00000000 --- a/hw/old_rtl/interfaces/VX_wstall_inter.v +++ /dev/null @@ -1,15 +0,0 @@ -`include "../VX_define.v" - -`ifndef VX_WSTALL_INTER - -`define VX_WSTALL_INTER - - -interface VX_wstall_inter(); - wire wstall; - wire[`NW_M1:0] warp_num; -endinterface - - - -`endif \ No newline at end of file diff --git a/hw/old_rtl/pipe_regs/VX_d_e_reg.v b/hw/old_rtl/pipe_regs/VX_d_e_reg.v deleted file mode 100644 index e25a0d88..00000000 --- a/hw/old_rtl/pipe_regs/VX_d_e_reg.v +++ /dev/null @@ -1,36 +0,0 @@ - - -`include "../VX_define.v" - -module VX_d_e_reg ( - input wire clk, - input wire reset, - input wire in_branch_stall, - input wire in_freeze, - VX_frE_to_bckE_req_inter VX_frE_to_bckE_req, - - - VX_frE_to_bckE_req_inter VX_bckE_req - ); - - - wire stall = in_freeze; - wire flush = (in_branch_stall == `STALL); - - - VX_generic_register #(.N(233 + `NW_M1 + 1 + `NT)) d_e_reg - ( - .clk (clk), - .reset(reset), - .stall(stall), - .flush(flush), - .in ({VX_frE_to_bckE_req.csr_address, VX_frE_to_bckE_req.jalQual, VX_frE_to_bckE_req.ebreak, VX_frE_to_bckE_req.is_csr, VX_frE_to_bckE_req.csr_immed, VX_frE_to_bckE_req.csr_mask, VX_frE_to_bckE_req.rd, VX_frE_to_bckE_req.rs1, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.alu_op, VX_frE_to_bckE_req.wb, VX_frE_to_bckE_req.rs2_src, VX_frE_to_bckE_req.itype_immed, VX_frE_to_bckE_req.mem_read, VX_frE_to_bckE_req.mem_write, VX_frE_to_bckE_req.branch_type, VX_frE_to_bckE_req.upper_immed, VX_frE_to_bckE_req.curr_PC, VX_frE_to_bckE_req.jal, VX_frE_to_bckE_req.jal_offset, VX_frE_to_bckE_req.PC_next, VX_frE_to_bckE_req.valid, VX_frE_to_bckE_req.warp_num, VX_frE_to_bckE_req.is_wspawn, VX_frE_to_bckE_req.is_tmc, VX_frE_to_bckE_req.is_split, VX_frE_to_bckE_req.is_barrier}), - .out ({VX_bckE_req.csr_address , VX_bckE_req.jalQual , VX_bckE_req.ebreak ,VX_bckE_req.is_csr , VX_bckE_req.csr_immed , VX_bckE_req.csr_mask , VX_bckE_req.rd , VX_bckE_req.rs1 , VX_bckE_req.rs2 , VX_bckE_req.alu_op , VX_bckE_req.wb , VX_bckE_req.rs2_src , VX_bckE_req.itype_immed , VX_bckE_req.mem_read , VX_bckE_req.mem_write , VX_bckE_req.branch_type , VX_bckE_req.upper_immed , VX_bckE_req.curr_PC , VX_bckE_req.jal , VX_bckE_req.jal_offset , VX_bckE_req.PC_next , VX_bckE_req.valid , VX_bckE_req.warp_num , VX_bckE_req.is_wspawn , VX_bckE_req.is_tmc , VX_bckE_req.is_split , VX_bckE_req.is_barrier }) - ); - - -endmodule - - - - diff --git a/hw/old_rtl/pipe_regs/VX_f_d_reg.v b/hw/old_rtl/pipe_regs/VX_f_d_reg.v deleted file mode 100644 index 0d5d99a8..00000000 --- a/hw/old_rtl/pipe_regs/VX_f_d_reg.v +++ /dev/null @@ -1,28 +0,0 @@ -`include "../VX_define.v" - -module VX_f_d_reg ( - input wire clk, - input wire reset, - input wire in_freeze, - - VX_inst_meta_inter fe_inst_meta_fd, - VX_inst_meta_inter fd_inst_meta_de - -); - - wire flush = 1'b0; - wire stall = in_freeze == 1'b1; - - - VX_generic_register #(.N(64 + `NW_M1 + 1 + `NT)) f_d_reg - ( - .clk (clk), - .reset(reset), - .stall(stall), - .flush(flush), - .in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}), - .out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid}) - ); - - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/results.txt b/hw/old_rtl/results.txt deleted file mode 100644 index 083332ec..00000000 --- a/hw/old_rtl/results.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Dynamic Instructions: 51711 -# of total cycles: 51728 -# of forwarding stalls: 0 -# of branch stalls: 0 -# CPI: 1.00033 -# time to simulate: 0 milliseconds -# GRADE: Failed on test: 4294967295 diff --git a/hw/old_rtl/shared_memory/VX_bank_valids.v b/hw/old_rtl/shared_memory/VX_bank_valids.v deleted file mode 100644 index 3b1e63ab..00000000 --- a/hw/old_rtl/shared_memory/VX_bank_valids.v +++ /dev/null @@ -1,36 +0,0 @@ -`include "../VX_define.v" - -// Converts in_valids to bank_valids -module VX_bank_valids - #( - parameter NB = 4, - parameter BITS_PER_BANK = 3 - ) - ( - input wire[`NT_M1:0] in_valids, - input wire[`NT_M1:0][31:0] in_addr, - output reg[NB:0][`NT_M1:0] bank_valids - ); - - - integer i, j; - always@(*) begin - for(j = 0; j <= NB; j = j+1 ) begin - for(i = 0; i <= `NT_M1; i = i+1) begin - if(in_valids[i]) begin - if(in_addr[i][(2+BITS_PER_BANK-1):2] == j[BITS_PER_BANK-1:0]) begin - bank_valids[j][i] = 1'b1; - end - else begin - bank_valids[j][i] = 1'b0; - end - - end - else begin - bank_valids[j][i] = 1'b0; - end - end - end - end - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/shared_memory/VX_priority_encoder_sm.v b/hw/old_rtl/shared_memory/VX_priority_encoder_sm.v deleted file mode 100644 index ba571fd3..00000000 --- a/hw/old_rtl/shared_memory/VX_priority_encoder_sm.v +++ /dev/null @@ -1,115 +0,0 @@ -`include "../VX_define.v" - -module VX_priority_encoder_sm - #( - parameter NB = 4, - parameter BITS_PER_BANK = 3, - parameter NUM_REQ = 3 - ) - ( - //INPUTS - input wire clk, - input wire reset, - input wire[`NT_M1:0] in_valid, - input wire[`NT_M1:0][31:0] in_address, - input wire[`NT_M1:0][31:0] in_data, - // OUTPUTS - // To SM Module - output reg[NB:0] out_valid, - output reg[NB:0][31:0] out_address, - output reg[NB:0][31:0] out_data, - - // To Processor - output wire[NB:0][`CLOG2(NUM_REQ) - 1:0] req_num, - output reg stall, - output wire send_data // Finished all of the requests -); - - reg[`NT_M1:0] left_requests; - reg[`NT_M1:0] serviced; - - - wire[`NT_M1:0] use_valid; - - - wire requests_left = (|left_requests); - - assign use_valid = (requests_left) ? left_requests : in_valid; - - - wire[NB:0][`NT_M1:0] bank_valids; - VX_bank_valids #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_bank_valid( - .in_valids(use_valid), - .in_addr(in_address), - .bank_valids(bank_valids) - ); - - wire[NB:0] more_than_one_valid; - - genvar curr_bank; - generate - for (curr_bank = 0; curr_bank <= NB; curr_bank = curr_bank + 1) - begin - wire[`CLOG2(`NT):0] num_valids; - - VX_countones #(.N(`NT)) valids_counter ( - .valids(bank_valids[curr_bank]), - .count (num_valids) - ); - assign more_than_one_valid[curr_bank] = num_valids > 1; - // assign more_than_one_valid[curr_bank] = $countones(bank_valids[curr_bank]) > 1; - end - endgenerate - - - assign stall = (|more_than_one_valid); - assign send_data = (!stall) && (|in_valid); // change - - wire[NB:0][(`CLOG2(NUM_REQ)) - 1:0] internal_req_num; - wire[NB:0] internal_out_valid; - - - // There's one or less valid per bank - genvar curr_bank_o; - for (curr_bank_o = 0; curr_bank_o <= NB; curr_bank_o = curr_bank_o + 1) - begin - - VX_generic_priority_encoder #(.N(NUM_REQ)) vx_priority_encoder( - .valids(bank_valids[curr_bank_o]), - .index(internal_req_num[curr_bank_o]), - .found(internal_out_valid[curr_bank_o]) - ); - assign out_address[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_address[internal_req_num[curr_bank_o]] : 0; - assign out_data[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_data[internal_req_num[curr_bank_o]] : 0; - end - - integer curr_b; - always @(*) begin - serviced = 0; - for (curr_b = 0; curr_b <= NB; curr_b=curr_b+1) begin - serviced[internal_req_num[curr_b]] = 1; - end - end - - - assign req_num = internal_req_num; - assign out_valid = internal_out_valid; - - - wire[`NT_M1:0] serviced_qual = in_valid & (serviced); - - wire[`NT_M1:0] new_left_requests = (left_requests == 0) ? (in_valid & ~serviced_qual) : (left_requests & ~ serviced_qual); - - // wire[`NT_M1:0] new_left_requests = left_requests & ~(serviced_qual); - - always @(posedge clk, posedge reset) begin - if (reset) begin - left_requests <= 0; - // serviced = 0; - end else begin - if (!stall) left_requests <= 0; - else left_requests <= new_left_requests; - end - end - -endmodule \ No newline at end of file diff --git a/hw/old_rtl/shared_memory/VX_shared_memory.v b/hw/old_rtl/shared_memory/VX_shared_memory.v deleted file mode 100644 index bd9cce36..00000000 --- a/hw/old_rtl/shared_memory/VX_shared_memory.v +++ /dev/null @@ -1,178 +0,0 @@ -`include "../VX_define.v" - -module VX_shared_memory - #( - parameter SM_SIZE = 4096, // Bytes - parameter SM_BANKS = 4, - parameter SM_BYTES_PER_READ = 16, - parameter SM_WORDS_PER_READ = 4, - parameter SM_LOG_WORDS_PER_READ = 2, - parameter SM_HEIGHT = 128, // Bytes - parameter SM_BANK_OFFSET_START = 2, - parameter SM_BANK_OFFSET_END = 4, - parameter SM_BLOCK_OFFSET_START = 5, - parameter SM_BLOCK_OFFSET_END = 6, - parameter SM_INDEX_START = 7, - parameter SM_INDEX_END = 13, - parameter NUM_REQ = 4, - parameter BITS_PER_BANK = 3 - ) - ( - //INPUTS - input wire clk, - input wire reset, - input wire[`NT_M1:0] in_valid, - input wire[`NT_M1:0][31:0] in_address, - input wire[`NT_M1:0][31:0] in_data, - input wire[2:0] mem_read, - input wire[2:0] mem_write, - //OUTPUTS - output wire[`NT_M1:0] out_valid, - output wire[`NT_M1:0][31:0] out_data, - output wire stall - ); - -//reg[NB:0][31:0] temp_address; -//reg[NB:0][31:0] temp_in_data; -//reg[NB:0] temp_in_valid; -reg[SM_BANKS - 1:0][31:0] temp_address; -reg[SM_BANKS - 1:0][31:0] temp_in_data; -reg[SM_BANKS - 1:0] temp_in_valid; - -reg[`NT_M1:0] temp_out_valid; -reg[`NT_M1:0][31:0] temp_out_data; - -//reg [NB:0][6:0] block_addr; -//reg [NB:0][3:0][31:0] block_wdata; -//reg [NB:0][3:0][31:0] block_rdata; -//reg [NB:0][1:0] block_we; -reg [SM_BANKS - 1:0][$clog2(SM_HEIGHT) - 1:0] block_addr; -reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_wdata; -reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_rdata; -reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we; - -wire send_data; - -//reg[NB:0][1:0] req_num; -reg[SM_BANKS - 1:0][`CLOG2(NUM_REQ) - 1:0] req_num; // not positive about this - -wire [`NT_M1:0] orig_in_valid; - - -genvar f; - generate - for(f = 0; f < `NT; f = f+1) begin - assign orig_in_valid[f] = in_valid[f]; - end - - assign out_valid = send_data ? temp_out_valid : 0; - assign out_data = send_data ? temp_out_data : 0; - endgenerate - - -//VX_priority_encoder_sm #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_priority_encoder_sm( -VX_priority_encoder_sm #(.NB(SM_BANKS - 1), .BITS_PER_BANK(BITS_PER_BANK), .NUM_REQ(NUM_REQ)) vx_priority_encoder_sm( - .clk(clk), - .reset(reset), - .in_valid(orig_in_valid), - .in_address(in_address), - .in_data(in_data), - - .out_valid(temp_in_valid), - .out_address(temp_address), - .out_data(temp_in_data), - - .req_num(req_num), - .stall(stall), - .send_data(send_data) - ); - - -genvar j; -integer i; -generate -//for(j=0; j<= NB; j=j+1) begin : sm_mem_block -for(j=0; j<= SM_BANKS - 1; j=j+1) begin - - wire shm_write = (mem_write != `NO_MEM_WRITE) && temp_in_valid[j]; - - VX_shared_memory_block# - ( - .SMB_HEIGHT(SM_HEIGHT), - .SMB_WORDS_PER_READ(SM_WORDS_PER_READ), - .SMB_LOG_WORDS_PER_READ(SM_LOG_WORDS_PER_READ) - ) vx_shared_memory_block - ( - .clk (clk), - .reset (reset), - .addr (block_addr[j]), - .wdata (block_wdata[j]), - .we (block_we[j]), - .shm_write(shm_write), - .data_out (block_rdata[j]) - ); -end - - -always @(*) begin - block_addr = 0; - block_we = 0; - block_wdata = 0; - //for(i = 0; i <= NB; i = i+1) begin - for(i = 0; i <= SM_BANKS - 1; i = i+1) begin - if(temp_in_valid[i] == 1'b1) begin - //1. Check if the request is actually to the shared memory - if((temp_address[i][31:24]) == 8'hFF) begin - // STORES - if(mem_write != `NO_MEM_WRITE) begin - if(mem_write == `SB_MEM_WRITE) begin - //TODO - end - else if(mem_write == `SH_MEM_WRITE) begin - //TODO - end - else if(mem_write == `SW_MEM_WRITE) begin - //block_addr[i] = temp_address[i][13:7]; - //block_we[i] = temp_address[i][6:5]; - //block_wdata[i][temp_address[i][6:5]] = temp_in_data[i]; - block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START]; - block_we[i] = temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]; - block_wdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]] = temp_in_data[i]; - end - end - //LOADS - else if(mem_read != `NO_MEM_READ) begin - if(mem_read == `LB_MEM_READ) begin - //TODO - end - else if (mem_read == `LH_MEM_READ) - begin - //TODO - end - else if (mem_read == `LW_MEM_READ) - begin - //block_addr[i] = temp_address[i][13:7]; - //temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][6:5]]; - //temp_out_valid[req_num[i]] = 1'b1; - block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START]; - temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]]; - temp_out_valid[req_num[i]] = 1'b1; - end - else if (mem_read == `LBU_MEM_READ) - begin - //TODO - end - else if (mem_read == `LHU_MEM_READ) - begin - //TODO - end - end - end - end - end -end - -endgenerate - - -endmodule diff --git a/hw/old_rtl/shared_memory/VX_shared_memory_block.v b/hw/old_rtl/shared_memory/VX_shared_memory_block.v deleted file mode 100644 index 9a37b6fe..00000000 --- a/hw/old_rtl/shared_memory/VX_shared_memory_block.v +++ /dev/null @@ -1,115 +0,0 @@ -module VX_shared_memory_block -#( - parameter SMB_SIZE = 4096, // Bytes - parameter SMB_BYTES_PER_READ = 16, - parameter SMB_WORDS_PER_READ = 4, - parameter SMB_LOG_WORDS_PER_READ = 2, - parameter SMB_HEIGHT = 128, // Bytes - parameter BITS_PER_BANK = 3 -) -( - input wire clk, // Clock - input wire reset, - //input wire[6:0] addr, - //input wire[3:0][31:0] wdata, - //input wire[1:0] we, - //input wire shm_write, - - //output wire[3:0][31:0] data_out - input wire[$clog2(SMB_HEIGHT) - 1:0] addr, - input wire[SMB_WORDS_PER_READ-1:0][31:0] wdata, - input wire[SMB_LOG_WORDS_PER_READ-1:0] we, - input wire shm_write, - - output wire[SMB_WORDS_PER_READ-1:0][31:0] data_out - -); - - - `ifndef SYN - - //reg[3:0][31:0] shared_memory[127:0]; - reg[SMB_WORDS_PER_READ-1:0][31:0] shared_memory[SMB_HEIGHT-1:0]; - - //wire need_to_write = (|we); - integer curr_ind; - always @(posedge clk, posedge reset) begin - if (reset) begin - //for (curr_ind = 0; curr_ind < 128; curr_ind = curr_ind + 1) - for (curr_ind = 0; curr_ind < SMB_HEIGHT; curr_ind = curr_ind + 1) - begin - shared_memory[curr_ind] = 0; - end - end else if(shm_write) begin - shared_memory[addr][we][31:0] = wdata[we][31:0]; // - Ethan's addition - //if (we == 2'b00) shared_memory[addr][0][31:0] <= wdata[0][31:0]; - //if (we == 2'b01) shared_memory[addr][1][31:0] <= wdata[1][31:0]; - //if (we == 2'b10) shared_memory[addr][2][31:0] <= wdata[2][31:0]; - //if (we == 2'b11) shared_memory[addr][3][31:0] <= wdata[3][31:0]; - end - end - - - assign data_out = shm_write ? 0 : shared_memory[addr]; - - `else - - wire cena = 0; - wire cenb = !shm_write; - - wire[3:0][31:0] write_bit_mask; - - //assign write_bit_mask[0] = (we == 2'b00) ? {32{1'b1}} : {32{1'b0}}; - //assign write_bit_mask[1] = (we == 2'b01) ? {32{1'b1}} : {32{1'b0}}; - //assign write_bit_mask[2] = (we == 2'b10) ? {32{1'b1}} : {32{1'b0}}; - //assign write_bit_mask[3] = (we == 2'b11) ? {32{1'b1}} : {32{1'b0}}; - genvar curr_word; - for (curr_word = 0; curr_word < SMB_WORDS_PER_READ; curr_word = curr_word + 1) - begin - assign write_bit_mask[curr_word] = (we == curr_word) ? 1 : {32{1'b0}}; - end - - // Using ASIC MEM - /* verilator lint_off PINCONNECTEMPTY */ - rf2_128x128_wm1 first_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(data_out), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena), - .AA(addr), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask), - .AB(addr), - .DB(wdata), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(7'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(7'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - /* verilator lint_on PINCONNECTEMPTY */ - - - `endif - -endmodule diff --git a/hw/unit_tests/cache/cachesim.cpp b/hw/unit_tests/cache/cachesim.cpp index 84e27a8c..93bef54b 100644 --- a/hw/unit_tests/cache/cachesim.cpp +++ b/hw/unit_tests/cache/cachesim.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include uint64_t timestamp = 0; diff --git a/simX/.gitignore b/simX/.gitignore deleted file mode 100644 index e204dd36..00000000 --- a/simX/.gitignore +++ /dev/null @@ -1 +0,0 @@ -obj_dir diff --git a/simX/BUGS b/simX/BUGS deleted file mode 100644 index e69de29b..00000000 diff --git a/simX/LICENSE b/simX/LICENSE deleted file mode 100644 index 9f83e434..00000000 --- a/simX/LICENSE +++ /dev/null @@ -1,12 +0,0 @@ -Copyright (c) 2011, Georgia Institute of Technology -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/simX/Makefile b/simX/Makefile index 3388fc07..e454b30a 100644 --- a/simX/Makefile +++ b/simX/Makefile @@ -1,37 +1,34 @@ -CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors -#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors +#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors -CFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized +CXXFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized +CXXFLAGS += -I. -I../hw +CXXFLAGS += -DDUMP_PERF_STATS -CFLAGS += -I../../hw +LDFLAGS += -TOP = cache_simX +TOP = vx_cache_sim -RTL_DIR = ../hw/old_rtl +RTL_DIR = ../hw/rtl -SRCS = simX.cpp args.cpp mem.cpp core.cpp instruction.cpp enc.cpp util.cpp +PROJECT = simX -RTL_INCLUDE=-I$(RTL_DIR) -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/shared_memory - -VL_FLAGS += -O2 --language 1800-2009 --assert -VL_FLAGS += -Wno-DECLFILENAME -VL_FLAGS += --x-initial unique --x-assign unique -VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH +SRCS = util.cpp args.cpp mem.cpp core.cpp warp.cpp instr.cpp decode.cpp execute.cpp simX.cpp # Debugigng ifdef DEBUG - VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS) - CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) + CXXFLAGS += $(DBG_FLAGS) else - VL_FLAGS += -DNDEBUG - CFLAGS += -DNDEBUG + CXXFLAGS += -DNDEBUG endif -all: simX +all: $(PROJECT) -simX: - verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' - make -j -C obj_dir -f V$(TOP).mk +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf obj_dir + rm -rf $(PROJECT) *.o .depend diff --git a/simX/WISHLIST b/simX/WISHLIST deleted file mode 100644 index 69800934..00000000 --- a/simX/WISHLIST +++ /dev/null @@ -1,14 +0,0 @@ -- Anonymous chunks whose names are not saved by the object writer. -- 32-bit instruction encoding for larger-pointered architecture versions. -- HOFDump mode for HARPTool/HOFTool -- Make operation information tables into member functions of Instruction, if - possible. -- Anonymous assigned values in the assembler. -- References (pointers) as .word directive contents in the assembler. -- Instruction validation before encoding. -- Make readError in obj.cpp throw something instead of printing a message and - exiting. -- Limit checking for byte/word encoders (e.g. 255 pRegs, 256 regs for byte) -- Eliminate the tmp_buf nonsense from the chunk encoder. -- Loosen arch restrictions imposed for interoperability (the number of lanes is - typically unimportant) diff --git a/simX/archdef.h b/simX/archdef.h new file mode 100644 index 00000000..78d12ad1 --- /dev/null +++ b/simX/archdef.h @@ -0,0 +1,166 @@ +#pragma once + +#include +#include + +#include +#include +#include "types.h" + +namespace vortex { + +class ArchDef { +public: + struct Undefined {}; + + ArchDef(const std::string &s, + int num_cores, + int num_warps, + int num_threads) { + std::istringstream iss(s.c_str()); + wordSize_ = 4; + encChar_ = 'w'; + numRegs_ = 32; + numPRegs_ = 0; + numCores_ = num_cores; + numWarps_ = num_warps; + numThreads_ = num_threads; + extent_ = EXT_END; + } + + operator std::string () const { + if (extent_ == EXT_NULL) + return ""; + + std::ostringstream oss; + if (extent_ >= EXT_WORDSIZE) oss << wordSize_; + if (extent_ >= EXT_ENC ) oss << encChar_; + if (extent_ >= EXT_REGS ) oss << numRegs_; + if (extent_ >= EXT_PREGS ) oss << '/' << numPRegs_; + if (extent_ >= EXT_THREADS ) oss << '/' << numThreads_; + if (extent_ >= EXT_WARPS ) oss << '/' << numWarps_; + if (extent_ >= EXT_CORES ) oss << '/' << numCores_; + + return oss.str(); + } + + bool operator==(const ArchDef &r) const { + Extent minExtent(r.extent_ > extent_ ? extent_ : r.extent_); + + // Can't be equal if we can't specify a binary encoding at all. + if (minExtent < EXT_PREGS) + return false; + + if (minExtent >= EXT_WORDSIZE) { + if (wordSize_!=r.wordSize_) + return false; + } + + if (minExtent >= EXT_ENC) { + if (encChar_ != r.encChar_) + return false; + } + + if (minExtent >= EXT_REGS) { + if (numRegs_ != r.numRegs_) + return false; + } + + if (minExtent >= EXT_PREGS) { + if (numPRegs_ != r.numPRegs_) + return false; + } + + if (minExtent >= EXT_THREADS) { + if (numThreads_ != r.numThreads_) + return false; + } + + if (minExtent >= EXT_WARPS) { + if (numWarps_ != r.numWarps_) + return false; + } + + if (minExtent >= EXT_CORES) { + if (numCores_ != r.numCores_) + return false; + } + + return true; + } + + bool operator!=(const ArchDef &r) const { + return !(*this == r); + } + + Size getWordSize() const { + if (extent_ < EXT_WORDSIZE) + throw Undefined(); + return wordSize_; + } + + char getEncChar() const { + if ((extent_ < EXT_ENC) || (encChar_ == 'x')) + throw Undefined(); + return encChar_; + } + + RegNum getNumRegs() const { + if (extent_ < EXT_REGS) + throw Undefined(); + return numRegs_; + } + + RegNum getNumPRegs() const { + if (extent_ < EXT_PREGS) + throw Undefined(); + return numPRegs_; + } + + ThdNum getNumThreads() const { + if (extent_ < EXT_THREADS) + throw Undefined(); + return numThreads_; + } + + ThdNum getNumWarps() const { + if (extent_ < EXT_WARPS) + throw Undefined(); + return numWarps_; + } + + ThdNum getNumCores() const { + if (extent_ < EXT_CORES) + throw Undefined(); + return numCores_; + } + + bool is_cpu_mode() const { + return cpu_mode_; + } + +private: + enum Extent { + EXT_NULL, + EXT_WORDSIZE, + EXT_ENC, + EXT_REGS, + EXT_PREGS, + EXT_THREADS, + EXT_WARPS, + EXT_CORES, + EXT_END + }; + + Extent extent_; + Size wordSize_; + ThdNum numThreads_; + ThdNum numWarps_; + ThdNum numCores_; + RegNum numRegs_; + ThdNum numPRegs_; + char encChar_; + bool cpu_mode_; +}; + +} \ No newline at end of file diff --git a/simX/args.cpp b/simX/args.cpp index e0e2fc25..2cd847b2 100644 --- a/simX/args.cpp +++ b/simX/args.cpp @@ -1,39 +1,34 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#include "include/args.h" - #include #include +#include "args.h" -using namespace HarpTools; +using namespace vortex; using std::string; -std::string CommandLineArg::helpString; -std::map CommandLineArg::longArgs; -std::map CommandLineArg::shortArgs; +std::string CommandLineArg::helpString_; +std::unordered_map CommandLineArg::longArgs_; +std::unordered_map CommandLineArg::shortArgs_; -CommandLineArg::CommandLineArg(string s, string l, const char *helpText) -{ - helpString += helpText; - longArgs[l] = this; - shortArgs[s] = this; +CommandLineArg::CommandLineArg(string s, string l, const char *helpText) { + helpString_ += helpText; + longArgs_[l] = this; + shortArgs_[s] = this; } CommandLineArg::CommandLineArg(string l, const char *helpText) { - helpString += helpText; - longArgs[l] = this; + helpString_ += helpText; + longArgs_[l] = this; } void CommandLineArg::readArgs(int argc, char **argv) { for (int i = 0; i < argc; i++) { - std::map::iterator - s = shortArgs.find(std::string(argv[i])), - l = longArgs.find(std::string(argv[i])); + std::unordered_map::iterator + s = shortArgs_.find(std::string(argv[i])), + l = longArgs_.find(std::string(argv[i])); - if (s != shortArgs.end()) { + if (s != shortArgs_.end()) { i += s->second->read(argc - i, &argv[i]); - } else if (l != longArgs.end()) { + } else if (l != longArgs_.end()) { i += l->second->read(argc - i, &argv[i]); } else { throw BadArg(string(argv[i])); @@ -42,11 +37,11 @@ void CommandLineArg::readArgs(int argc, char **argv) { } void CommandLineArg::clearArgs() { - shortArgs.clear(); - longArgs.clear(); - helpString = ""; + shortArgs_.clear(); + longArgs_.clear(); + helpString_ = ""; } void CommandLineArg::showHelp(std::ostream &os) { - os << helpString; + os << helpString_; } diff --git a/simX/args.h b/simX/args.h new file mode 100644 index 00000000..7fbf236c --- /dev/null +++ b/simX/args.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include +#include +#include +#include "util.h" + +namespace vortex { + +struct BadArg { BadArg(std::string s) : arg(s) {} std::string arg; }; + +class CommandLineArg { +public: + CommandLineArg(std::string s, std::string l, const char *helpText); + CommandLineArg(std::string l, const char *helpText); + virtual int read(int argc, char** argv) = 0; + + static void readArgs(int argc, char **argv); + static void clearArgs(); + static void showHelp(std::ostream &os); + +private: + static std::string helpString_; + static std::unordered_map longArgs_; + static std::unordered_map shortArgs_; +}; + +template class CommandLineArgSetter : public CommandLineArg { +public: + CommandLineArgSetter(std::string s, std::string l, const char *ht, T &x) : + CommandLineArg(s, l, ht), arg_(x) {} + + CommandLineArgSetter(std::string l, const char *ht, T &x) : + CommandLineArg(l, ht), arg_(x) {} + + int read(int argc, char **argv) { + __unused(argc); + std::istringstream iss(argv[1]); + iss >> arg_; + return 1; + } +private: + T &arg_; +}; + +class CommandLineArgFlag : public CommandLineArg { +public: + CommandLineArgFlag(std::string s, std::string l, const char *ht, bool &x) : + CommandLineArg(s, l, ht), arg_(x) { arg_ = false; } + + CommandLineArgFlag(std::string l, const char *ht, bool &x) : + CommandLineArg(l, ht), arg_(x) { arg_ = false; } + + int read(int argc, char **argv) { + __unused(argc, argv); + arg_ = true; + return 0; + } +private: + bool &arg_; +}; + +} \ No newline at end of file diff --git a/simX/cache_simX.v b/simX/cache_simX.v deleted file mode 100644 index 34174be3..00000000 --- a/simX/cache_simX.v +++ /dev/null @@ -1,113 +0,0 @@ -`include "VX_define.v" - -module cache_simX ( - input wire clk, // Clock - input wire reset, - - // Icache - input wire[31:0] icache_pc_addr, - input wire icache_valid_pc_addr, - output wire icache_stall, - - // Dcache - input wire[2:0] dcache_mem_read, - input wire[2:0] dcache_mem_write, - input wire dcache_in_valid[`NT_M1:0], - input wire[31:0] dcache_in_addr[`NT_M1:0], - output wire dcache_stall -); - //////////////////// ICACHE /////////////////// - - VX_icache_request_inter VX_icache_req; - assign VX_icache_req.pc_address = icache_pc_addr; - assign VX_icache_req.out_cache_driver_in_mem_read = (icache_valid_pc_addr) ? `LW_MEM_READ : `NO_MEM_READ; - assign VX_icache_req.out_cache_driver_in_mem_write = `NO_MEM_WRITE; - assign VX_icache_req.out_cache_driver_in_valid = icache_valid_pc_addr; - assign VX_icache_req.out_cache_driver_in_data = 0; - - VX_icache_response_inter VX_icache_rsp; - assign icache_stall = VX_icache_rsp.delay; - - VX_dram_req_rsp_inter #( - - .NUMBER_BANKS(`ICACHE_BANKS), - .NUM_WORDS_PER_BLOCK(`ICACHE_NUM_WORDS_PER_BLOCK) - - ) VX_dram_req_rsp_icache(); - - reg icache_i_m_ready; - - assign VX_dram_req_rsp_icache.i_m_ready = icache_i_m_ready; - - //////////////////// DCACHE /////////////////// - - VX_dcache_request_inter VX_dcache_req; - assign VX_dcache_req.out_cache_driver_in_mem_read = dcache_mem_read; - assign VX_dcache_req.out_cache_driver_in_mem_write = dcache_mem_write; - assign VX_dcache_req.out_cache_driver_in_data = 0; - - genvar curr_t; - for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) - begin - assign VX_dcache_req.out_cache_driver_in_address[curr_t] = dcache_in_addr[curr_t]; - assign VX_dcache_req.out_cache_driver_in_valid[curr_t] = dcache_in_valid[curr_t]; - end - - VX_dcache_response_inter VX_dcache_rsp; - assign dcache_stall = VX_dcache_rsp.delay; - - VX_dram_req_rsp_inter #( - - .NUMBER_BANKS(`DCACHE_BANKS), - .NUM_WORDS_PER_BLOCK(`DCACHE_NUM_WORDS_PER_BLOCK) - - ) VX_dram_req_rsp(); - - reg dcache_i_m_ready; - assign VX_dram_req_rsp.i_m_ready = dcache_i_m_ready; - - VX_dmem_controller dmem_ctrl ( - .clk (clk), - .reset (reset), - .VX_dram_req_rsp (VX_dram_req_rsp), - .VX_dram_req_rsp_icache(VX_dram_req_rsp_icache), - .VX_icache_req (VX_icache_req), - .VX_icache_rsp (VX_icache_rsp), - .VX_dcache_req (VX_dcache_req), - .VX_dcache_rsp (VX_dcache_rsp) - ); - - always @(posedge clk, posedge reset) begin - if (reset) - begin - icache_i_m_ready = 0; - dcache_i_m_ready = 0; - end else begin - - if (VX_dram_req_rsp_icache.o_m_valid) begin - icache_i_m_ready = 1; - // $display("cache_simX.v: setting icache_i_m_ready = %d", icache_i_m_ready); - end else if (icache_i_m_ready) begin - icache_i_m_ready = 0; - end else begin - icache_i_m_ready = 0; - end - - - if (VX_dram_req_rsp.o_m_valid) begin - dcache_i_m_ready = 1; - end else if (dcache_i_m_ready) begin - dcache_i_m_ready = 0; - end else begin - dcache_i_m_ready = 0; - end - - end - end - -endmodule - - - - - diff --git a/simX/core.cpp b/simX/core.cpp index efd7cdb3..06b8e9af 100644 --- a/simX/core.cpp +++ b/simX/core.cpp @@ -1,837 +1,404 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ - #include -#include +#include +#include // #define USE_DEBUG 7 // #define PRINT_ACTIVE_THREADS -#include "include/types.h" -#include "include/util.h" -#include "include/archdef.h" -#include "include/mem.h" -#include "include/enc.h" -#include "include/core.h" -#include "include/debug.h" +#include "types.h" +#include "util.h" +#include "archdef.h" +#include "mem.h" +#include "decode.h" +#include "core.h" +#include "debug.h" -#ifdef EMU_INSTRUMENTATION -#include "include/qsim-harp.h" -#endif +#define INIT_TRACE(trace_inst) \ + trace_inst.valid_inst = false; \ + trace_inst.pc = 0; \ + trace_inst.wid = schedule_w_; \ + trace_inst.rs1 = -1; \ + trace_inst.rs2 = -1; \ + trace_inst.rd = -1; \ + trace_inst.vs1 = -1; \ + trace_inst.vs2 = -1; \ + trace_inst.vd = -1; \ + trace_inst.is_lw = false; \ + trace_inst.is_sw = false; \ + if (trace_inst.mem_addresses != NULL) \ + free(trace_inst.mem_addresses); \ + trace_inst.mem_addresses = (unsigned *)malloc(32 * sizeof(unsigned)); \ + for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++) \ + trace_inst.mem_addresses[tid] = 0xdeadbeef; \ + trace_inst.mem_stall_cycles = 0; \ + trace_inst.fetch_stall_cycles = 0; \ + trace_inst.stall_warp = false; \ + trace_inst.wspawn = false; \ + trace_inst.stalled = false; +#define CPY_TRACE(drain, source) \ + drain.valid_inst = source.valid_inst; \ + drain.pc = source.pc; \ + drain.wid = source.wid; \ + drain.rs1 = source.rs1; \ + drain.rs2 = source.rs2; \ + drain.rd = source.rd; \ + drain.vs1 = source.vs1; \ + drain.vs2 = source.vs2; \ + drain.vd = source.vd; \ + drain.is_lw = source.is_lw; \ + drain.is_sw = source.is_sw; \ + for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++)\ + drain.mem_addresses[tid] = source.mem_addresses[tid]; \ + drain.mem_stall_cycles = source.mem_stall_cycles; \ + drain.fetch_stall_cycles = source.fetch_stall_cycles; \ + drain.stall_warp = source.stall_warp; \ + drain.wspawn = source.wspawn; \ + drain.stalled = false; -#define NO_MEM_READ 7 -#define LB_MEM_READ 0 -#define LH_MEM_READ 1 -#define LW_MEM_READ 2 -#define LBU_MEM_READ 4 -#define LHU_MEM_READ 5 +using namespace vortex; - -#define NO_MEM_WRITE 7 -#define SB_MEM_WRITE 0 -#define SH_MEM_WRITE 1 -#define SW_MEM_WRITE 2 - -#define INIT_TRACE(trace_inst) \ - trace_inst.valid_inst = false; \ - trace_inst.pc = 0; \ - trace_inst.wid = schedule_w; \ - trace_inst.rs1 = -1; \ - trace_inst.rs2 = -1; \ - trace_inst.rd = -1; \ - trace_inst.vs1 = -1; \ - trace_inst.vs2 = -1; \ - trace_inst.vd = -1; \ - trace_inst.is_lw = false; \ - trace_inst.is_sw = false; \ - if (trace_inst.mem_addresses != NULL) free(trace_inst.mem_addresses); \ - trace_inst.mem_addresses = (unsigned *) malloc(32 * sizeof(unsigned)); \ - for (int tid = 0; tid < a.getNThds(); tid++) trace_inst.mem_addresses[tid] = 0xdeadbeef; \ - trace_inst.mem_stall_cycles = 0; \ - trace_inst.fetch_stall_cycles = 0; \ - trace_inst.stall_warp = false; \ - trace_inst.wspawn = false; \ - trace_inst.stalled = false; - -#define CPY_TRACE(drain, source) \ - drain.valid_inst = source.valid_inst; \ - drain.pc = source.pc; \ - drain.wid = source.wid; \ - drain.rs1 = source.rs1; \ - drain.rs2 = source.rs2; \ - drain.rd = source.rd; \ - drain.vs1 = source.vs1; \ - drain.vs2 = source.vs2; \ - drain.vd = source.vd; \ - drain.is_lw = source.is_lw; \ - drain.is_sw = source.is_sw; \ - for (int tid = 0; tid < a.getNThds(); tid++) drain.mem_addresses[tid] = source.mem_addresses[tid]; \ - drain.mem_stall_cycles = source.mem_stall_cycles; \ - drain.fetch_stall_cycles = source.fetch_stall_cycles; \ - drain.stall_warp = source.stall_warp; \ - drain.wspawn = source.wspawn; \ - drain.stalled = false; - -using namespace Harp; -using namespace std; - - -void printTrace(trace_inst_t * trace, const char * stage_name) -{ - D(3, stage_name << ": valid=" << trace->valid_inst); - D(3, stage_name << ": PC=" << hex << trace->pc << dec); - D(3, stage_name << ": wid=" << trace->wid); - D(3, stage_name << ": rd=" << trace->rd << ", rs1=" << trace->rs1 << ", trs2=" << trace->rs2); - D(3, stage_name << ": is_lw=" << trace->is_lw); - D(3, stage_name << ": is_sw=" << trace->is_sw); - D(3, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles); - D(3, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles); - D(3, stage_name << ": stall_warp=" << trace->stall_warp); - D(3, stage_name << ": wspawn=" << trace->wspawn); - D(3, stage_name << ": stalled=" << trace->stalled); +void printTrace(trace_inst_t *trace, const char *stage_name) { + __unused(trace, stage_name); + D(3, stage_name << ": valid=" << trace->valid_inst); + D(3, stage_name << ": PC=" << std::hex << trace->pc << std::dec); + D(3, stage_name << ": wid=" << trace->wid); + D(3, stage_name << ": rd=" << trace->rd << ", rs1=" << trace->rs1 << ", trs2=" << trace->rs2); + D(3, stage_name << ": is_lw=" << trace->is_lw); + D(3, stage_name << ": is_sw=" << trace->is_sw); + D(3, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles); + D(3, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles); + D(3, stage_name << ": stall_warp=" << trace->stall_warp); + D(3, stage_name << ": wspawn=" << trace->wspawn); + D(3, stage_name << ": stalled=" << trace->stalled); } -#ifdef EMU_INSTRUMENTATION -void Harp::reg_doRead(Word cpuId, Word regNum) { - Harp::OSDomain::osDomain->do_reg(cpuId, regNum, 8, true); -} +Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id) + : id_(id) + , arch_(arch) + , decoder_(decoder) + , mem_(mem) + , steps_(0) + , num_instructions_(0) { + release_warp_ = false; + foundSchedule_ = true; + schedule_w_ = 0; -void Harp::reg_doWrite(Word cpuId, Word regNum) { - Harp::OSDomain::osDomain->do_reg(cpuId, regNum, 8, false); -} -#endif + memset(&inst_in_fetch_, 0, sizeof(inst_in_fetch_)); + memset(&inst_in_decode_, 0, sizeof(inst_in_decode_)); + memset(&inst_in_scheduler_, 0, sizeof(inst_in_scheduler_)); + memset(&inst_in_exe_, 0, sizeof(inst_in_exe_)); + memset(&inst_in_lsu_, 0, sizeof(inst_in_lsu_)); + memset(&inst_in_wb_, 0, sizeof(inst_in_wb_)); -Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id): - a(a), iDec(d), mem(mem), steps(4), num_cycles(0), num_instructions(0) -{ - release_warp = false; - foundSchedule = true; - schedule_w = 0; - - memset(&inst_in_fetch, 0, sizeof(inst_in_fetch)); - memset(&inst_in_decode, 0, sizeof(inst_in_decode)); - memset(&inst_in_scheduler, 0, sizeof(inst_in_scheduler)); - memset(&inst_in_exe, 0, sizeof(inst_in_exe)); - memset(&inst_in_lsu, 0, sizeof(inst_in_lsu)); - memset(&inst_in_wb, 0, sizeof(inst_in_wb)); - - INIT_TRACE(inst_in_fetch); - INIT_TRACE(inst_in_decode); - INIT_TRACE(inst_in_scheduler); - INIT_TRACE(inst_in_exe); - INIT_TRACE(inst_in_lsu); - INIT_TRACE(inst_in_wb); + INIT_TRACE(inst_in_fetch_); + INIT_TRACE(inst_in_decode_); + INIT_TRACE(inst_in_scheduler_); + INIT_TRACE(inst_in_exe_); + INIT_TRACE(inst_in_lsu_); + INIT_TRACE(inst_in_wb_); for (int i = 0; i < 32; i++) { - stallWarp[i] = false; + stalled_warps_[i] = false; for (int j = 0; j < 32; j++) { - renameTable[i][j] = true; + renameTable_[i][j] = true; } } - for(int i = 0; i < 32; i++) { - vecRenameTable[i] = true; + for (int i = 0; i < 32; i++) { + vecRenameTable_[i] = true; } - cache_simulator = new Vcache_simX; - - // m_trace = new VerilatedVcdC; - // cache_simulator->trace(m_trace, 99); - // m_trace->open("simXtrace.vcd"); - - cache_simulator->reset = 1; - cache_simulator->clk = 0; - cache_simulator->eval(); - // m_trace->dump(10); - cache_simulator->reset = 1; - cache_simulator->clk = 1; - cache_simulator->eval(); - // m_trace->dump(11); - cache_simulator->reset = 0; - cache_simulator->clk = 0; - - for (unsigned i = 0; i < a.getNWarps(); ++i) { - w.push_back(Warp(this, i)); + for (unsigned i = 0; i < arch_.getNumWarps(); ++i) { + warps_.push_back(Warp(this, i)); } - w[0].activeThreads = 1; - w[0].spawned = true; + warps_[0].setActiveThreads(1); + warps_[0].setSpawned(true); } bool Core::interrupt(Word r0) { - w[0].interrupt(r0); + warps_[0].interrupt(r0); return false; } -void Core::step() -{ - D(3, "###########################################################"); - - steps++; - this->num_cycles++; - D(3, "cycle: " << this->num_cycles); - - DPH(3, "stalled warps:"); - for (int widd = 0; widd < a.getNWarps(); widd++) { - DPN(3, " " << stallWarp[widd]); - } - DPN(3, "\n"); - - // cout << "Rename table\n"; - // for (int regii = 0; regii < 32; regii++) - // { - // cout << regii << ": " << renameTable[0][regii] << '\n'; - // } - - // cout << '\n' << flush; - - // cout << "About to call writeback" << endl; - this->writeback(); - // cout << "About to call load_store" << endl; - this->load_store(); - // cout << "About to call execute_unit" << endl; - this->execute_unit(); - // cout << "About to call scheduler" << endl; - this->scheduler(); - // cout << "About to call decode" << endl; - this->decode(); - // D(3, "About to call fetch" << flush); - this->fetch(); - // D(3, "Finished fetch" << flush); - - if (release_warp) - { - release_warp = false; - stallWarp[release_warp_num] = false; - } - - DPN(3, flush); +Core::~Core() { + //-- } -void Core::getCacheDelays(trace_inst_t * trace_inst) -{ - static int curr_cycle = 0; - if (trace_inst->valid_inst) - { +void Core::step() { + D(3, "###########################################################"); - std::vector in_dcache_in_valid(a.getNThds()); - std::vector in_dcache_in_address(a.getNThds()); + steps_++; + D(3, "cycle: " << steps_); - unsigned in_dcache_mem_read; - unsigned in_dcache_mem_write; - if (trace_inst->is_lw) - { - in_dcache_mem_read = LW_MEM_READ; - in_dcache_mem_write = NO_MEM_WRITE; - } - else if (trace_inst->is_sw) - { - in_dcache_mem_read = NO_MEM_READ; - in_dcache_mem_write = SW_MEM_WRITE; - } - else - { - in_dcache_mem_read = NO_MEM_READ; - in_dcache_mem_write = NO_MEM_WRITE; - } + DPH(3, "stalled warps:"); + for (ThdNum widd = 0; widd < arch_.getNumWarps(); widd++) { + DPN(3, " " << stalled_warps_[widd]); + } + DPN(3, "\n"); - for (int j = 0; j < a.getNThds(); j++) - { - if ((w[trace_inst->wid].tmask[j]) && (trace_inst->is_sw || trace_inst->is_lw)) - { - in_dcache_in_valid[j] = true; - in_dcache_in_address[j] = trace_inst->mem_addresses[j]; - } - else - { - in_dcache_in_valid[j] = false; - in_dcache_in_address[j] = 0xdeadbeef; - } - } + // cout << "About to call writeback" << std::endl; + this->writeback(); + // cout << "About to call load_store" << std::endl; + this->load_store(); + // cout << "About to call execute_unit" << std::endl; + this->execute_unit(); + // cout << "About to call scheduler" << std::endl; + this->scheduler(); + // cout << "About to call decode" << std::endl; + this->decode(); + // D(3, "About to call fetch" << std::flush); + this->fetch(); + // D(3, "Finished fetch" << std::flush); - cache_simulator->clk = 1; - cache_simulator->eval(); - // m_trace->dump(2*curr_cycle); + if (release_warp_) { + release_warp_ = false; + stalled_warps_[release_warp_num_] = false; + } - cache_simulator->icache_pc_addr = trace_inst->pc; - cache_simulator->icache_valid_pc_addr = 1; - - // DCache start - cache_simulator->dcache_mem_read = in_dcache_mem_read; - cache_simulator->dcache_mem_write = in_dcache_mem_write; - for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) - { - cache_simulator->dcache_in_valid[cur_t] = in_dcache_in_valid[cur_t]; - cache_simulator->dcache_in_addr[cur_t] = in_dcache_in_address[cur_t]; - } - // DCache end - cache_simulator->clk = 0; - cache_simulator->eval(); - // m_trace->dump(2*curr_cycle+1); - - curr_cycle++; - - while((cache_simulator->icache_stall || cache_simulator->dcache_stall)) - { - - ////////// Feed input - if (cache_simulator->icache_stall) - { - cache_simulator->icache_pc_addr = trace_inst->pc; - cache_simulator->icache_valid_pc_addr = 1; - trace_inst->fetch_stall_cycles++; - } - else - { - cache_simulator->icache_valid_pc_addr = 0; - } - - if (cache_simulator->dcache_stall) - { - cache_simulator->dcache_mem_read = in_dcache_mem_read; - cache_simulator->dcache_mem_write = in_dcache_mem_write; - for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) - { - cache_simulator->dcache_in_valid[cur_t] = in_dcache_in_valid[cur_t]; - cache_simulator->dcache_in_addr[cur_t] = in_dcache_in_address[cur_t]; - } - trace_inst->mem_stall_cycles++; - } - else - { - cache_simulator->dcache_mem_read = NO_MEM_READ; - cache_simulator->dcache_mem_write = NO_MEM_WRITE; - for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) - { - cache_simulator->dcache_in_valid[cur_t] = 0; - } - } - - cache_simulator->clk = 1; - cache_simulator->eval(); - // m_trace->dump(2*curr_cycle); - - //////// Feed input - if (cache_simulator->icache_stall) - { - cache_simulator->icache_pc_addr = trace_inst->pc; - cache_simulator->icache_valid_pc_addr = 1; - } - else - { - cache_simulator->icache_valid_pc_addr = 0; - } - - if (cache_simulator->dcache_stall) - { - cache_simulator->dcache_mem_read = in_dcache_mem_read; - cache_simulator->dcache_mem_write = in_dcache_mem_write; - for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) - { - cache_simulator->dcache_in_valid[cur_t] = in_dcache_in_valid[cur_t]; - cache_simulator->dcache_in_addr[cur_t] = in_dcache_in_address[cur_t]; - } - } - else - { - cache_simulator->dcache_mem_read = NO_MEM_READ; - cache_simulator->dcache_mem_write = NO_MEM_WRITE; - for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) - { - cache_simulator->dcache_in_valid[cur_t] = 0; - } - } - - cache_simulator->clk = 0; - cache_simulator->eval(); - // m_trace->dump(2*curr_cycle+1); - - - curr_cycle++; - - } - - } + DPN(3, std::flush); } -void Core::warpScheduler() -{ - int numSteps = 0; - bool cont; +void Core::warpScheduler() { + foundSchedule_ = false; + int next_warp = schedule_w_; + for (size_t wid = 0; wid < warps_.size(); ++wid) { + // round robin scheduling + next_warp = (next_warp + 1) % warps_.size(); - do - { - numSteps++; - schedule_w = (schedule_w+1) % w.size(); + bool has_active_threads = (warps_[next_warp].getActiveThreads() > 0); + bool stalled = stalled_warps_[next_warp]; - bool has_active_threads = (w[schedule_w].activeThreads > 0); - bool stalled = stallWarp[schedule_w]; - - cont = ((!has_active_threads) || (stalled)) && (numSteps <= w.size()); - - // cout << "&&&&&&&WID: " << schedule_w << '\n'; - // cout << "activeThreads: " << w[schedule_w].activeThreads << "\t!has_active_threads: " << (!has_active_threads) << '\n'; - - // cout << "stalled: " << stalled << '\n'; - // cout << "numSteps: " << numSteps << " CONT: " << cont << '\n'; - - } while (cont); - - if (numSteps > w.size()) - { - this->foundSchedule = false; + if (has_active_threads && !stalled) { + foundSchedule_ = true; + break; } - else - { - this->foundSchedule = true; - } - + } + schedule_w_ = next_warp; } -void Core::fetch() -{ +void Core::fetch() { - // D(-1, "Found schedule: " << foundSchedule); + // D(-1, "Found schedule: " << foundSchedule_); - if ((!inst_in_scheduler.stalled) && (inst_in_fetch.fetch_stall_cycles == 0)) + if ((!inst_in_scheduler_.stalled) + && (inst_in_fetch_.fetch_stall_cycles == 0)) { + // CPY_TRACE(inst_in_decode_, inst_in_fetch_); + // if (warps_[schedule_w_].activeThreads) { - // CPY_TRACE(inst_in_decode, inst_in_fetch); - // if (w[schedule_w].activeThreads) - { + INIT_TRACE(inst_in_fetch_); - INIT_TRACE(inst_in_fetch); + if (foundSchedule_) { + auto active_threads_b = warps_[schedule_w_].getActiveThreads(); - if (foundSchedule) - { - auto active_threads_b = w[schedule_w].activeThreads; + num_instructions_ = num_instructions_ + warps_[schedule_w_].getActiveThreads(); + warps_[schedule_w_].step(&inst_in_fetch_); - this->num_instructions = this->num_instructions + w[schedule_w].activeThreads; - // this->num_instructions++; - w[schedule_w].step(&inst_in_fetch); - - auto active_threads_a = w[schedule_w].activeThreads; - if (active_threads_b != active_threads_a) { - D(3, "** warp #" << schedule_w << " active threads changed from " << active_threads_b << " to " << active_threads_a); - } - - this->getCacheDelays(&inst_in_fetch); - - if (inst_in_fetch.stall_warp) { - stallWarp[inst_in_fetch.wid] = true; - } - } - warpScheduler(); + auto active_threads_a = warps_[schedule_w_].getActiveThreads(); + if (active_threads_b != active_threads_a) { + D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a); } - } - else - { - inst_in_fetch.stalled = false; - if (inst_in_fetch.fetch_stall_cycles > 0) inst_in_fetch.fetch_stall_cycles--; - } - printTrace(&inst_in_fetch, "Fetch"); - - // #ifdef PRINT_ACTIVE_THREADS - DPH(3, "active threads:"); - for (unsigned j = 0; j < w[schedule_w].tmask.size(); ++j) { - if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j]) { - DPN(3, " 1"); - } else { - DPN(3, " 0"); + this->getCacheDelays(&inst_in_fetch_); + + if (inst_in_fetch_.stall_warp) { + stalled_warps_[inst_in_fetch_.wid] = true; + } } - } - DPN(3, "\n"); - // #endif + this->warpScheduler(); + } + } else { + inst_in_fetch_.stalled = false; + if (inst_in_fetch_.fetch_stall_cycles > 0) + --inst_in_fetch_.fetch_stall_cycles; + } - // #ifdef PRINT_ACTIVE_THREADS - // #endif + printTrace(&inst_in_fetch_, "Fetch"); } -void Core::decode() -{ - if ((inst_in_fetch.fetch_stall_cycles == 0) && !inst_in_scheduler.stalled) - { - CPY_TRACE(inst_in_decode, inst_in_fetch); - INIT_TRACE(inst_in_fetch); - } - - //printTrace(&inst_in_decode, "Decode"); +void Core::decode() { + if ((inst_in_fetch_.fetch_stall_cycles == 0) + && !inst_in_scheduler_.stalled) { + CPY_TRACE(inst_in_decode_, inst_in_fetch_); + INIT_TRACE(inst_in_fetch_); + } + //printTrace(&inst_in_decode_, "Decode"); } -void Core::scheduler() -{ - - if (!inst_in_scheduler.stalled) - { - CPY_TRACE(inst_in_scheduler, inst_in_decode); - INIT_TRACE(inst_in_decode); - } - - //printTrace(&inst_in_scheduler, "Scheduler"); +void Core::scheduler() { + if (!inst_in_scheduler_.stalled) { + CPY_TRACE(inst_in_scheduler_, inst_in_decode_); + INIT_TRACE(inst_in_decode_); + } + //printTrace(&inst_in_scheduler_, "Scheduler"); } -void Core::load_store() -{ - bool do_nothing = false; - if ((inst_in_lsu.mem_stall_cycles > 0) || (inst_in_lsu.stalled)) - { - // LSU currently busy - if ((inst_in_scheduler.is_lw || inst_in_scheduler.is_sw)) - { - inst_in_scheduler.stalled = true; - } - do_nothing = true; +void Core::load_store() { + if ((inst_in_lsu_.mem_stall_cycles > 0) || (inst_in_lsu_.stalled)) { + // LSU currently busy + if ((inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)) { + inst_in_scheduler_.stalled = true; } - else - { - // LSU not busy - if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw) - { - // Scheduler has LSU inst - bool scheduler_srcs_ready = true; - if (inst_in_scheduler.rs1 > 0) - { - scheduler_srcs_ready = scheduler_srcs_ready && renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs1]; - } + } else { + // LSU not busy + if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) { + // Scheduler has LSU inst + bool scheduler_srcs_ready = true; + if (inst_in_scheduler_.rs1 > 0) { + scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1]; + } - if (inst_in_scheduler.rs2 > 0) - { - scheduler_srcs_ready = scheduler_srcs_ready && renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2]; - } + if (inst_in_scheduler_.rs2 > 0) { + scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2]; + } - if(inst_in_scheduler.vs1 > 0) - { - scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs1]; - } - if(inst_in_scheduler.vs2 > 0) - { - scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs2]; - } + if (inst_in_scheduler_.vs1 > 0) { + scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1]; + } + if (inst_in_scheduler_.vs2 > 0) { + scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2]; + } - if (scheduler_srcs_ready) - { - if (inst_in_scheduler.rd != -1) renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false; - if (inst_in_scheduler.rd != -1) vecRenameTable[inst_in_scheduler.vd] = false; - CPY_TRACE(inst_in_lsu, inst_in_scheduler); - INIT_TRACE(inst_in_scheduler); - } - else - { - inst_in_scheduler.stalled = true; - // INIT_TRACE(inst_in_lsu); - do_nothing = true; - } - } - else - { - // INIT_TRACE(inst_in_lsu); - do_nothing = true; - } + if (scheduler_srcs_ready) { + if (inst_in_scheduler_.rd != -1) + renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false; + if (inst_in_scheduler_.rd != -1) + vecRenameTable_[inst_in_scheduler_.vd] = false; + CPY_TRACE(inst_in_lsu_, inst_in_scheduler_); + INIT_TRACE(inst_in_scheduler_); + } else { + inst_in_scheduler_.stalled = true; + // INIT_TRACE(inst_in_lsu_); + } + } else { + // INIT_TRACE(inst_in_lsu_); } + } - if (inst_in_lsu.mem_stall_cycles > 0) inst_in_lsu.mem_stall_cycles--; + if (inst_in_lsu_.mem_stall_cycles > 0) + inst_in_lsu_.mem_stall_cycles--; - //printTrace(&inst_in_lsu, "LSU"); + //printTrace(&inst_in_lsu_, "LSU"); } -void Core::execute_unit() -{ - bool do_nothing = false; - // EXEC is always not busy - if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw) - { - // Not an execute instruction - // INIT_TRACE(inst_in_exe); - do_nothing = true; - } - else - { - bool scheduler_srcs_ready = true; - if (inst_in_scheduler.rs1 > 0) - { - scheduler_srcs_ready = scheduler_srcs_ready && renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs1]; - // cout << "Rename RS1: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs1] << " wid: " << inst_in_scheduler.wid << '\n'; - } - - if (inst_in_scheduler.rs2 > 0) - { - scheduler_srcs_ready = scheduler_srcs_ready && renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2]; - // cout << "Rename RS2: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2] << " wid: " << inst_in_scheduler.wid << '\n'; - } - - // cout << "About to check vs*\n" << flush; - if(inst_in_scheduler.vs1 > 0) - { - scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs1]; - } - if(inst_in_scheduler.vs2 > 0) - { - scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs2]; - } - // cout << "Finished sources\n" << flush; - - if (scheduler_srcs_ready) - { - if (inst_in_scheduler.rd != -1) { - // cout << "rename setting rd: " << inst_in_scheduler.rd << " to not useabel wid: " << inst_in_scheduler.wid << '\n'; - renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false; - } - - // cout << "About to check vector wb: " << inst_in_scheduler.vd << "\n" << flush; - if(inst_in_scheduler.vd != -1) { - vecRenameTable[inst_in_scheduler.vd] = false; - } - // cout << "Finished wb checking" << "\n" << flush; - CPY_TRACE(inst_in_exe, inst_in_scheduler); - INIT_TRACE(inst_in_scheduler); - // cout << "Finished trace copying and clearning" << "\n" << flush; - } - else - { - D(3, "Execute: srcs not ready!"); - inst_in_scheduler.stalled = true; - // INIT_TRACE(inst_in_exe); - do_nothing = true; - } +void Core::execute_unit() { + // EXEC is always not busy + if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) { + // Not an execute instruction + // INIT_TRACE(inst_in_exe_); + } else { + bool scheduler_srcs_ready = true; + if (inst_in_scheduler_.rs1 > 0) { + scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1]; + // cout << "Rename RS1: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1] << " wid: " << inst_in_scheduler_.wid << '\n'; } - // if (!do_nothing) - // { + if (inst_in_scheduler_.rs2 > 0) { + scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2]; + // cout << "Rename RS2: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2] << " wid: " << inst_in_scheduler_.wid << '\n'; + } - // } + // cout << "About to check vs*\n" << std::flush; + if (inst_in_scheduler_.vs1 > 0) { + scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1]; + } + if (inst_in_scheduler_.vs2 > 0) { + scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2]; + } + // cout << "Finished sources\n" << std::flush; - //printTrace(&inst_in_exe, "EXE"); - // INIT_TRACE(inst_in_exe); + if (scheduler_srcs_ready) { + if (inst_in_scheduler_.rd != -1) { + // cout << "rename setting rd: " << inst_in_scheduler_.rd << " to not useabel wid: " << inst_in_scheduler_.wid << '\n'; + renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false; + } + + // cout << "About to check vector wb: " << inst_in_scheduler_.vd << "\n" << std::flush; + if (inst_in_scheduler_.vd != -1) { + vecRenameTable_[inst_in_scheduler_.vd] = false; + } + // cout << "Finished wb checking" << "\n" << std::flush; + CPY_TRACE(inst_in_exe_, inst_in_scheduler_); + INIT_TRACE(inst_in_scheduler_); + // cout << "Finished trace copying and clearning" << "\n" << std::flush; + } else { + D(3, "Execute: srcs not ready!"); + inst_in_scheduler_.stalled = true; + // INIT_TRACE(inst_in_exe_); + } + } + + //printTrace(&inst_in_exe_, "EXE"); + // INIT_TRACE(inst_in_exe_); } -void Core::writeback() -{ - if (inst_in_wb.rd > 0) renameTable[inst_in_wb.wid][inst_in_wb.rd] = true; - if (inst_in_wb.vd > 0) vecRenameTable[inst_in_wb.vd] = true; +void Core::writeback() { + if (inst_in_wb_.rd > 0) + renameTable_[inst_in_wb_.wid][inst_in_wb_.rd] = true; + if (inst_in_wb_.vd > 0) + vecRenameTable_[inst_in_wb_.vd] = true; - if (inst_in_wb.stall_warp) - { - stallWarp[inst_in_wb.wid] = false; - // release_warp = true; - // release_warp_num = inst_in_wb.wid; - } + if (inst_in_wb_.stall_warp) { + stalled_warps_[inst_in_wb_.wid] = false; + // release_warp_ = true; + // release_warp_num_ = inst_in_wb_.wid; + } + INIT_TRACE(inst_in_wb_); - INIT_TRACE(inst_in_wb); + bool serviced_exe = false; + if ((inst_in_exe_.rd > 0) || (inst_in_exe_.stall_warp)) { + CPY_TRACE(inst_in_wb_, inst_in_exe_); + INIT_TRACE(inst_in_exe_); + serviced_exe = true; + // cout << "WRITEBACK SERVICED EXE\n"; + } - bool serviced_exe = false; - bool serviced_mem = false; - if ((inst_in_exe.rd > 0) || (inst_in_exe.stall_warp)) - { - CPY_TRACE(inst_in_wb, inst_in_exe); - INIT_TRACE(inst_in_exe); - - serviced_exe = true; - // cout << "WRITEBACK SERVICED EXE\n"; - } - - if (inst_in_lsu.is_sw) - { - INIT_TRACE(inst_in_lsu); - } - else - { - if (((inst_in_lsu.rd > 0) || (inst_in_lsu.vd > 0)) && (inst_in_lsu.mem_stall_cycles == 0)) - { - if (serviced_exe) - { - D(3, "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used"); - inst_in_lsu.stalled = true; - } - else - { - serviced_mem = true; - CPY_TRACE(inst_in_wb, inst_in_lsu); - INIT_TRACE(inst_in_lsu); - - } + if (inst_in_lsu_.is_sw) { + INIT_TRACE(inst_in_lsu_); + } else { + if (((inst_in_lsu_.rd > 0) || (inst_in_lsu_.vd > 0)) && (inst_in_lsu_.mem_stall_cycles == 0)) { + if (serviced_exe) { + D(3, "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used"); + inst_in_lsu_.stalled = true; + } else { + CPY_TRACE(inst_in_wb_, inst_in_lsu_); + INIT_TRACE(inst_in_lsu_); } } - - // if (!serviced_exe && !serviced_mem) INIT_TRACE(inst_in_wb); - - //printTrace(&inst_in_wb, "Writeback"); - + } } +void Core::getCacheDelays(trace_inst_t *trace_inst) { + trace_inst->fetch_stall_cycles += 3; + if (trace_inst->is_sw || trace_inst->is_lw) { + trace_inst->mem_stall_cycles += 5; + } +} bool Core::running() const { - bool stages_have_valid = inst_in_fetch.valid_inst || inst_in_decode.valid_inst || inst_in_scheduler.valid_inst || - inst_in_lsu.valid_inst || inst_in_exe.valid_inst || inst_in_wb.valid_inst; + bool stages_have_valid = inst_in_fetch_.valid_inst + || inst_in_decode_.valid_inst + || inst_in_scheduler_.valid_inst + || inst_in_lsu_.valid_inst + || inst_in_exe_.valid_inst + || inst_in_wb_.valid_inst; - if (stages_have_valid) return true; + if (stages_have_valid) + return true; - for (unsigned i = 0; i < w.size(); ++i) - if (w[i].running()) { - return true; + for (unsigned i = 0; i < warps_.size(); ++i) + if (warps_[i].running()) { + return true; } return false; } void Core::printStats() const { - // unsigned long insts = 0; - // for (unsigned i = 0; i < w.size(); ++i) - // insts += w[i].insts; - - // cerr << "Total steps: " << steps << endl; - // for (unsigned i = 0; i < w.size(); ++i) { - // // cout << "=== Warp " << i << " ===" << endl; - // w[i].printStats(); - // } -} - -Warp::Warp(Core *c, Word id) : - core(c), - pc(0x80000000), - shadowPc(0), - id(id), - activeThreads(0), - shadowActiveThreads(0), - reg(0), - pred(0), - shadowReg(core->a.getNRegs()), - shadowPReg(core->a.getNPRegs()), - VLEN(1024), - interruptEnable(true), - shadowInterruptEnable(false), - supervisorMode(true), - shadowSupervisorMode(false), - spawned(false), - steps(0), - insts(0), - loads(0), - stores(0) -{ - D(3, "Creating a new thread with PC: " << hex << this->pc << '\n'); - /* Build the register file. */ - Word regNum(0); - for (Word j = 0; j < core->a.getNThds(); ++j) { - reg.push_back(vector >(0)); - for (Word i = 0; i < core->a.getNRegs(); ++i) { - reg[j].push_back(Reg(id, regNum++)); - } - - pred.push_back(vector >(0)); - for (Word i = 0; i < core->a.getNPRegs(); ++i) { - pred[j].push_back(Reg(id, regNum++)); - } - - bool act = false; - if (j == 0) act = true; - tmask.push_back(act); - shadowTmask.push_back(act); + std::cout << "Total steps: " << steps_ << std::endl; + for (unsigned i = 0; i < warps_.size(); ++i) { + std::cout << "=== Warp " << i << " ===" << std::endl; + warps_[i].printStats(); } - - Word csrNum(0); - for (Word i = 0; i < (1<<12); i++) - { - csr.push_back(Reg(id, regNum++)); - } - - /* Set initial register contents. */ - reg[0][0] = (core->a.getNThds()<<(core->a.getWordSize()*8 / 2)) | id; -} - -void Warp::step(trace_inst_t * trace_inst) { - Size fetchPos(0), decPos, wordSize(core->a.getWordSize()); - vector fetchBuffer(wordSize); - - if (activeThreads == 0) return; - - // ++steps; - - D(3, "current PC=0x" << hex << pc); - - // std::cout << "pc: " << hex << pc << "\n"; - - trace_inst->pc = pc; - - /* Fetch and decode. */ - if (wordSize < sizeof(pc)) pc &= ((1ll<<(wordSize*8))-1); - Instruction *inst; - bool fetchMore; - - fetchMore = false; - // unsigned fetchSize(wordSize - (pc+fetchPos)%wordSize); - unsigned fetchSize = 4; - fetchBuffer.resize(fetchSize); - Word fetched = core->mem.fetch(pc + fetchPos, supervisorMode); - writeWord(fetchBuffer, fetchPos, fetchSize, fetched); - decPos = 0; - inst = core->iDec.decode(fetchBuffer, decPos, trace_inst); - - // Update pc - pc += decPos; - - // Execute - - inst->executeOn(*this, trace_inst); - - - // At Debug Level 3, print debug info after each instruction. - // #ifdef USE_DEBUG - // if (USE_DEBUG >= 3) { - D(3, "Register state:"); - for (unsigned i = 0; i < reg[0].size(); ++i) { - D_RAW(" %r" << setfill(' ') << setw(2) << dec << i << ':'); - for (unsigned j = 0; j < (this->activeThreads); ++j) - D_RAW(' ' << setfill('0') << setw(8) << hex << reg[j][i] << setfill(' ') << ' '); - D_RAW('(' << shadowReg[i] << ')' << endl); - } - - - DPH(3, "Thread mask:"); - for (unsigned i = 0; i < tmask.size(); ++i) DPN(3, " " << tmask[i]); - DPN(3, "\n"); - - // } - // #endif - - // Clean up. - delete inst; -} - -bool Warp::interrupt(Word r0) { - if (!interruptEnable) return false; - -#ifdef EMU_INSTRUMENTATION - Harp::OSDomain::osDomain->do_int(0, r0); -#endif - - shadowActiveThreads = activeThreads; - shadowTmask = tmask; - shadowInterruptEnable = interruptEnable; /* For traps. */ - shadowSupervisorMode = supervisorMode; - - for (Word i = 0; i < reg[0].size(); ++i) shadowReg[i] = reg[0][i]; - for (Word i = 0; i < pred[0].size(); ++i) shadowPReg[i] = pred[0][i]; - for (Word i = 0; i < reg.size(); ++i) tmask[i] = 1; - - shadowPc = pc; - activeThreads = 1; - interruptEnable = false; - supervisorMode = true; - reg[0][0] = r0; - pc = core->interruptEntry; - - return true; -} - -void Warp::printStats() const { - // cout << "Steps : " << steps << endl - // << "Insts : " << insts << endl - // << "Loads : " << loads << endl - // << "Stores: " << stores << endl; - - unsigned const grade = reg[0][28]; - - // if (grade == 1) cout << "GRADE: PASSED\n"; - // else cout << "GRADE: FAILED " << (grade >> 1) << "\n"; -} +} \ No newline at end of file diff --git a/simX/core.h b/simX/core.h new file mode 100644 index 00000000..5f225727 --- /dev/null +++ b/simX/core.h @@ -0,0 +1,100 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "debug.h" +#include "types.h" +#include "archdef.h" +#include "decode.h" +#include "mem.h" +#include "warp.h" +#include "trace.h" + +namespace vortex { + +class Core { +public: + Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id = 0); + ~Core(); + + bool interrupt(Word r0); + bool running() const; + + void getCacheDelays(trace_inst_t *); + void warpScheduler(); + void fetch(); + void decode(); + void scheduler(); + void execute_unit(); + void load_store(); + void writeback(); + + void step(); + + void printStats() const; + + Word id() const { + return id_; + } + + Warp& warp(int i) { + return warps_[i]; + } + + Decoder& decoder() { + return decoder_; + } + + MemoryUnit& mem() { + return mem_; + } + + const ArchDef& arch() const { + return arch_; + } + + Word interruptEntry() const { + return interruptEntry_; + } + + unsigned long num_instructions() const { + return num_instructions_; + } + + unsigned long num_steps() const { + return steps_; + } + +private: + + bool renameTable_[32][32]; + bool vecRenameTable_[32]; + bool stalled_warps_[32]; + bool foundSchedule_; + + Word id_; + const ArchDef &arch_; + Decoder &decoder_; + MemoryUnit &mem_; + std::vector warps_; + std::unordered_map> barriers_; + int schedule_w_; + uint64_t steps_; + uint64_t num_instructions_; + Word interruptEntry_; + bool release_warp_; + int release_warp_num_; + + trace_inst_t inst_in_fetch_; + trace_inst_t inst_in_decode_; + trace_inst_t inst_in_scheduler_; + trace_inst_t inst_in_exe_; + trace_inst_t inst_in_lsu_; + trace_inst_t inst_in_wb_; +}; + +} // namespace vortex \ No newline at end of file diff --git a/simX/debug.h b/simX/debug.h new file mode 100644 index 00000000..54277c74 --- /dev/null +++ b/simX/debug.h @@ -0,0 +1,42 @@ +#pragma once + +//#define USE_DEBUG 9 + +#ifdef USE_DEBUG + +#include +#include + +#define DX(x) x + +#define D(lvl, x) do { \ + if ((lvl) <= USE_DEBUG) { \ + std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x << std::endl; \ + } \ +} while(0) + +#define DPH(lvl, x) do { \ + if ((lvl) <= USE_DEBUG) { \ + std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x; \ + } \ +} while(0) + +#define DPN(lvl, x) do { \ + if ((lvl) <= USE_DEBUG) { \ + std::cout << x; \ + } \ +} while(0) + +#define D_RAW(x) do { \ + std::cout << x; \ +} while (0) + +#else + +#define DX(x) +#define D(lvl, x) do {} while(0) +#define DPH(lvl, x) do {} while(0) +#define DPN(lvl, x) do {} while(0) +#define D_RAW(x) do {} while(0) + +#endif \ No newline at end of file diff --git a/simX/decode.cpp b/simX/decode.cpp new file mode 100644 index 00000000..5e582ccc --- /dev/null +++ b/simX/decode.cpp @@ -0,0 +1,293 @@ +#include +#include +#include +#include +#include +#include +#include +#include "debug.h" +#include "types.h" +#include "util.h" +#include "decode.h" +#include "archdef.h" +#include "instr.h" +#include "trace.h" + +using namespace vortex; + +struct InstTableEntry_t { + const char *opString; + bool controlFlow; + InstType iType; +}; + +static const std::unordered_map sc_instTable = { + {Opcode::NOP, {"nop" , false, InstType::N_TYPE}}, + {Opcode::R_INST, {"r_type", false, InstType::R_TYPE}}, + {Opcode::L_INST, {"load" , false, InstType::I_TYPE}}, + {Opcode::I_INST, {"i_type", false, InstType::I_TYPE}}, + {Opcode::S_INST, {"store" , false, InstType::S_TYPE}}, + {Opcode::B_INST, {"branch", true , InstType::B_TYPE}}, + {Opcode::LUI_INST, {"lui" , false, InstType::U_TYPE}}, + {Opcode::AUIPC_INST, {"auipc" , false, InstType::U_TYPE}}, + {Opcode::JAL_INST, {"jal" , true , InstType::J_TYPE}}, + {Opcode::JALR_INST, {"jalr" , true , InstType::I_TYPE}}, + {Opcode::SYS_INST, {"SYS" , true , InstType::I_TYPE}}, + {Opcode::FENCE, {"fence" , true , InstType::I_TYPE}}, + {Opcode::PJ_INST, {"pred j", true , InstType::R_TYPE}}, + {Opcode::GPGPU, {"gpgpu" , false, InstType::R_TYPE}}, + {Opcode::VSET_ARITH, {"vsetvl", false, InstType::V_TYPE}}, + {Opcode::VL, {"vl" , false, InstType::V_TYPE}}, + {Opcode::VS, {"vs" , false, InstType::V_TYPE}} +}; + +std::ostream &vortex::operator<<(std::ostream &os, Instr &instr) { + os << std::dec << sc_instTable.at(instr.opcode_).opString; + return os; +} + +Decoder::Decoder(const ArchDef &arch) { + inst_s_ = arch.getWordSize() * 8; + opcode_s_ = 7; + reg_s_ = 5; + func3_s_ = 3; + mop_s_ = 3; + vmask_s_ = 1; + + shift_opcode_ = 0; + shift_rd_ = opcode_s_; + shift_func3_ = opcode_s_ + reg_s_; + shift_rs1_ = opcode_s_ + reg_s_ + func3_s_; + shift_rs2_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_; + shift_func7_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_; + shift_j_u_immed_ = opcode_s_ + reg_s_; + shift_s_b_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_; + shift_i_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_; + shift_vset_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_; + shift_vmask_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_; + shift_vmop_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + vmask_s_; + shift_vnf_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + vmask_s_ + mop_s_; + shift_func6_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + 1; + shift_vset_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + 6; + + reg_mask_ = 0x1f; + func3_mask_ = 0x7; + func6_mask_ = 0x3f; + func7_mask_ = 0x7f; + opcode_mask_ = 0x7f; + i_imm_mask_ = 0xfff; + s_imm_mask_ = 0xfff; + b_imm_mask_ = 0x1fff; + u_imm_mask_ = 0xfffff; + j_imm_mask_ = 0xfffff; + v_imm_mask_ = 0x7ff; +} + +std::shared_ptr Decoder::decode(const std::vector &v, Size &idx, trace_inst_t *trace_inst) { + Word code(readWord(v, idx, inst_s_ / 8)); + + // std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n"; + auto instr = std::make_shared(); + + Opcode op = (Opcode)((code >> shift_opcode_) & opcode_mask_); + // std::cout << "opcode: " << op << "\n"; + instr->setOpcode(op); + + Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5, + bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3; + + // std::cout << "op: " << std::hex << op << " what " << sc_instTable[op].iType << "\n"; + switch (sc_instTable.at(op).iType) { + case InstType::N_TYPE: + break; + + case InstType::R_TYPE: + instr->setPred((code >> shift_rs1_) & reg_mask_); + instr->setDestReg((code >> shift_rd_) & reg_mask_); + instr->setSrcReg((code >> shift_rs1_) & reg_mask_); + instr->setSrcReg((code >> shift_rs2_) & reg_mask_); + instr->setFunc3((code >> shift_func3_) & func3_mask_); + instr->setFunc7((code >> shift_func7_) & func7_mask_); + + trace_inst->valid_inst = true; + trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_); + trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_); + trace_inst->rd = ((code >> shift_rd_) & reg_mask_); + break; + + case InstType::I_TYPE: + instr->setDestReg((code >> shift_rd_) & reg_mask_); + instr->setSrcReg((code >> shift_rs1_) & reg_mask_); + instr->setFunc7((code >> shift_func7_) & func7_mask_); + func3 = (code >> shift_func3_) & func3_mask_; + instr->setFunc3(func3); + + if ((func3 == 5) && (op != L_INST)) { + // std::cout << "func7: " << func7 << "\n"; + instr->setSrcImm(signExt(((code >> shift_rs2_) & reg_mask_), 5, reg_mask_)); + } else { + instr->setSrcImm(signExt(code >> shift_i_immed_, 12, i_imm_mask_)); + } + + trace_inst->valid_inst = true; + trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_); + trace_inst->rd = ((code >> shift_rd_) & reg_mask_); + break; + + case InstType::S_TYPE: + // std::cout << "************STORE\n"; + instr->setSrcReg((code >> shift_rs1_) & reg_mask_); + instr->setSrcReg((code >> shift_rs2_) & reg_mask_); + instr->setFunc3((code >> shift_func3_) & func3_mask_); + + dest_bits = (code >> shift_rd_) & reg_mask_; + imm_bits = (code >> shift_s_b_immed_ & func7_mask_); + imeed = (imm_bits << reg_s_) | dest_bits; + // std::cout << "ENC: store imeed: " << imeed << "\n"; + instr->setSrcImm(signExt(imeed, 12, s_imm_mask_)); + + trace_inst->valid_inst = true; + trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_); + trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_); + break; + + case InstType::B_TYPE: + instr->setSrcReg((code >> shift_rs1_) & reg_mask_); + instr->setSrcReg((code >> shift_rs2_) & reg_mask_); + instr->setFunc3((code >> shift_func3_) & func3_mask_); + + dest_bits = (code >> shift_rd_) & reg_mask_; + imm_bits = (code >> shift_s_b_immed_ & func7_mask_); + + bit_11 = dest_bits & 0x1; + bits_4_1 = dest_bits >> 1; + bit_10_5 = imm_bits & 0x3f; + bit_12 = imm_bits >> 6; + + imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12); + + instr->setSrcImm(signExt(imeed, 13, b_imm_mask_)); + + trace_inst->valid_inst = true; + trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_); + trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_); + break; + + case InstType::U_TYPE: + instr->setDestReg((code >> shift_rd_) & reg_mask_); + instr->setSrcImm(signExt(code >> shift_j_u_immed_, 20, u_imm_mask_)); + trace_inst->valid_inst = true; + trace_inst->rd = ((code >> shift_rd_) & reg_mask_); + break; + + case InstType::J_TYPE: + instr->setDestReg((code >> shift_rd_) & reg_mask_); + + // [20 | 10:1 | 11 | 19:12] + + unordered = code >> shift_j_u_immed_; + + bits_19_12 = unordered & 0xff; + bit_11 = (unordered >> 8) & 0x1; + bits_10_1 = (unordered >> 9) & 0x3ff; + bit_20 = (unordered >> 19) & 0x1; + + imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20); + + if (bit_20) { + imeed |= ~j_imm_mask_; + } + + instr->setSrcImm(imeed); + + trace_inst->valid_inst = true; + trace_inst->rd = ((code >> shift_rd_) & reg_mask_); + break; + + case InstType::V_TYPE: + D(3, "Entered here: instr type = vector" << op); + switch (op) { + case Opcode::VSET_ARITH: //TODO: arithmetic ops + instr->setDestReg((code >> shift_rd_) & reg_mask_); + instr->setSrcReg((code >> shift_rs1_) & reg_mask_); + func3 = (code >> shift_func3_) & func3_mask_; + instr->setFunc3(func3); + D(3, "Entered here: instr type = vector"); + + if (func3 == 7) { + D(3, "Entered here: imm instr"); + instr->setVsetImm(!(code >> shift_vset_)); + if (instr->getVsetImm()) { + Word immed = (code >> shift_rs2_) & v_imm_mask_; + D(3, "immed" << immed); + instr->setSrcImm(immed); //TODO + instr->setVlmul(immed & 0x3); + D(3, "lmul " << (immed & 0x3)); + instr->setVediv((immed >> 4) & 0x3); + D(3, "ediv " << ((immed >> 4) & 0x3)); + instr->setVsew((immed >> 2) & 0x3); + D(3, "sew " << ((immed >> 2) & 0x3)); + } else { + instr->setSrcReg((code >> shift_rs2_) & reg_mask_); + trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_); + } + trace_inst->valid_inst = true; + trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_); + trace_inst->rd = ((code >> shift_rd_) & reg_mask_); + } else { + instr->setSrcReg((code >> shift_rs2_) & reg_mask_); + instr->setVmask((code >> shift_vmask_) & 0x1); + instr->setFunc6((code >> shift_func6_) & func6_mask_); + + trace_inst->valid_inst = true; + trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_); + trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_); + trace_inst->rd = ((code >> shift_rd_) & reg_mask_); + } + break; + + case Opcode::VL: + D(3, "vector load instr"); + instr->setDestReg((code >> shift_rd_) & reg_mask_); + instr->setSrcReg((code >> shift_rs1_) & reg_mask_); + instr->setVlsWidth((code >> shift_func3_) & func3_mask_); + instr->setSrcReg((code >> shift_rs2_) & reg_mask_); + instr->setVmask((code >> shift_vmask_)); + instr->setVmop((code >> shift_vmop_) & func3_mask_); + instr->setVnf((code >> shift_vnf_) & func3_mask_); + + trace_inst->valid_inst = true; + trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_); + trace_inst->vd = ((code >> shift_rd_) & reg_mask_); + //trace_inst->vs2 = ((code>>shift_rs2_) & reg_mask_); + break; + + case Opcode::VS: + instr->setVs3((code >> shift_rd_) & reg_mask_); + instr->setSrcReg((code >> shift_rs1_) & reg_mask_); + instr->setVlsWidth((code >> shift_func3_) & func3_mask_); + instr->setSrcReg((code >> shift_rs2_) & reg_mask_); + instr->setVmask((code >> shift_vmask_)); + instr->setVmop((code >> shift_vmop_) & func3_mask_); + instr->setVnf((code >> shift_vnf_) & func3_mask_); + + trace_inst->valid_inst = true; + trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_); + //trace_inst->vd = ((code>>shift_rd_) & reg_mask_); + trace_inst->vs1 = ((code >> shift_rd_) & reg_mask_); //vs3 + break; + + default: + std::cout << "Inavlid opcode.\n"; + std::abort(); + } + break; + default: + std::cout << "Unrecognized argument class in word decoder.\n"; + std::abort(); + } + + D(2, "Decoded instr 0x" << std::hex << code << " into: " << instr << std::flush); + + return instr; +} diff --git a/simX/decode.h b/simX/decode.h new file mode 100644 index 00000000..9aa5d6e4 --- /dev/null +++ b/simX/decode.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include "util.h" + +namespace vortex { + +class ArchDef; +class Instr; +class trace_inst_t; + +class Decoder { +public: + Decoder(const ArchDef &); + + virtual std::shared_ptr decode(const std::vector &v, Size &n, trace_inst_t * trace_inst); + +private: + + Word inst_s_; + Word opcode_s_; + Word reg_s_; + Word func3_s_; + Word shift_opcode_; + Word shift_rd_; + Word shift_rs1_; + Word shift_rs2_; + Word shift_func3_; + Word shift_func7_; + Word shift_j_u_immed_; + Word shift_s_b_immed_; + Word shift_i_immed_; + + Word reg_mask_; + Word func3_mask_; + Word func6_mask_; + Word func7_mask_; + Word opcode_mask_; + Word i_imm_mask_; + Word s_imm_mask_; + Word b_imm_mask_; + Word u_imm_mask_; + Word j_imm_mask_; + Word v_imm_mask_; + + //Vector + Word shift_vset_; + Word shift_vset_immed_; + Word shift_vmask_; + Word shift_vmop_; + Word shift_vnf_; + Word shift_func6_; + Word vmask_s_; + Word mop_s_; +}; + +} \ No newline at end of file diff --git a/simX/enc.cpp b/simX/enc.cpp deleted file mode 100644 index a19a99e8..00000000 --- a/simX/enc.cpp +++ /dev/null @@ -1,328 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#include -#include -#include -#include -#include -#include - -#include "include/debug.h" -#include "include/types.h" -#include "include/util.h" -#include "include/enc.h" -#include "include/archdef.h" -#include "include/instruction.h" - -using namespace std; -using namespace Harp; - -// ByteDecoder::ByteDecoder(const ArchDef &ad) { -// wordSize = ad.getWordSize(); -// } - -/*static void decodeError(string msg) { - cout << "Instruction decoder error: " << msg << '\n'; - std::abort(); -}*/ - -/*static unsigned ceilLog2(RegNum x) { - unsigned z = 0; - bool nonZeroInnerValues(false); - - if (x == 0) return 0; - - while (x != 1) { - z++; - if (x&1) nonZeroInnerValues = true; - x >>= 1; - } - - if (nonZeroInnerValues) z++; - - return z; -}*/ - -WordDecoder::WordDecoder(const ArchDef &arch) { - - inst_s = arch.getWordSize() * 8; - opcode_s = 7; - reg_s = 5; - func3_s = 3; - mop_s = 3; - vmask_s = 1; - - shift_opcode = 0; - shift_rd = opcode_s; - shift_func3 = opcode_s + reg_s; - shift_rs1 = opcode_s + reg_s + func3_s; - shift_rs2 = opcode_s + reg_s + func3_s + reg_s; - shift_func7 = opcode_s + reg_s + func3_s + reg_s + reg_s; - shift_j_u_immed = opcode_s + reg_s; - shift_s_b_immed = opcode_s + reg_s + func3_s + reg_s + reg_s; - shift_i_immed = opcode_s + reg_s + func3_s + reg_s; - shift_vset_immed = opcode_s + reg_s + func3_s + reg_s; - shift_vmask = opcode_s + reg_s + func3_s + reg_s + reg_s; - shift_vmop = opcode_s + reg_s + func3_s + reg_s + reg_s + vmask_s; - shift_vnf = opcode_s + reg_s + func3_s + reg_s + reg_s + vmask_s + mop_s; - shift_func6 = opcode_s + reg_s + func3_s + reg_s + reg_s + 1; - shift_vset = opcode_s + reg_s + func3_s + reg_s + reg_s + 6; - - - reg_mask = 0x1f; - func3_mask = 0x7; - func7_mask = 0x7f; - opcode_mask = 0x7f; - i_immed_mask = 0xfff; - s_immed_mask = 0xfff; - b_immed_mask = 0x1fff; - u_immed_mask = 0xfffff; - j_immed_mask = 0xfffff; - v_immed_mask = 0x7ff; - func6_mask = 0x3f; - -} - -static Word signExt(Word w, Size bit, Word mask) { - if (w>>(bit-1)) w |= ~mask; - return w; -} - -Instruction *WordDecoder::decode(const std::vector &v, Size &idx, trace_inst_t * trace_inst) { - Word code(readWord(v, idx, inst_s/8)); - - // std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n"; - - - Instruction &inst = * new Instruction(); - - // bool predicated = (code>>(n-1)); - bool predicated = false; - if (predicated) { inst.setPred((code>>(inst_s-p-1))&pMask); } - - Opcode op = (Opcode)((code>>shift_opcode)&opcode_mask); - // std::cout << "opcode: " << op << "\n"; - inst.setOpcode(op); - - bool usedImm(false); - Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5, - bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3; - - // std::cout << "op: " << std::hex << op << " what " << instTable[op].iType << "\n"; - switch(instTable[op].iType) - { - case InstType::N_TYPE: - break; - case InstType::R_TYPE: - inst.setPred((code>>shift_rs1) & reg_mask); - inst.setDestReg((code>>shift_rd) & reg_mask); - inst.setSrcReg((code>>shift_rs1) & reg_mask); - inst.setSrcReg((code>>shift_rs2) & reg_mask); - inst.setFunc3 ((code>>shift_func3) & func3_mask); - inst.setFunc7 ((code>>shift_func7) & func7_mask); - - trace_inst->valid_inst = true; - trace_inst->rs1 = ((code>>shift_rs1) & reg_mask); - trace_inst->rs2 = ((code>>shift_rs2) & reg_mask); - trace_inst->rd = ((code>>shift_rd) & reg_mask); - - break; - case InstType::I_TYPE: - inst.setDestReg((code>>shift_rd) & reg_mask); - inst.setSrcReg((code>>shift_rs1) & reg_mask); - inst.setFunc7 ((code>>shift_func7) & func7_mask); - func3 = (code>>shift_func3) & func3_mask; - inst.setFunc3 (func3); - - if ((func3 == 5) && (op != L_INST)) - { - // std::cout << "func7: " << func7 << "\n"; - inst.setSrcImm(signExt(((code>>shift_rs2)®_mask), 5, reg_mask)); - } - else - { - inst.setSrcImm(signExt(code>>shift_i_immed, 12, i_immed_mask)); - } - usedImm = true; - - trace_inst->valid_inst = true; - trace_inst->rs1 = ((code>>shift_rs1) & reg_mask); - trace_inst->rd = ((code>>shift_rd) & reg_mask); - - break; - case InstType::S_TYPE: - // std::cout << "************STORE\n"; - inst.setSrcReg((code>>shift_rs1) & reg_mask); - inst.setSrcReg((code>>shift_rs2) & reg_mask); - inst.setFunc3 ((code>>shift_func3) & func3_mask); - - dest_bits = (code>>shift_rd) & reg_mask; - imm_bits = (code>>shift_s_b_immed & func7_mask); - imeed = (imm_bits << reg_s) | dest_bits; - // std::cout << "ENC: store imeed: " << imeed << "\n"; - inst.setSrcImm(signExt(imeed, 12, s_immed_mask)); - usedImm = true; - - trace_inst->valid_inst = true; - trace_inst->rs1 = ((code>>shift_rs1) & reg_mask); - trace_inst->rs2 = ((code>>shift_rs2) & reg_mask); - - break; - case InstType::B_TYPE: - - inst.setSrcReg((code>>shift_rs1) & reg_mask); - inst.setSrcReg((code>>shift_rs2) & reg_mask); - inst.setFunc3 ((code>>shift_func3) & func3_mask); - - dest_bits = (code>>shift_rd) & reg_mask; - imm_bits = (code>>shift_s_b_immed & func7_mask); - - bit_11 = dest_bits & 0x1; - bits_4_1 = dest_bits >> 1; - bit_10_5 = imm_bits & 0x3f; - bit_12 = imm_bits >> 6; - - imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12); - - inst.setSrcImm(signExt(imeed, 13, b_immed_mask)); - usedImm = true; - - trace_inst->valid_inst = true; - trace_inst->rs1 = ((code>>shift_rs1) & reg_mask); - trace_inst->rs2 = ((code>>shift_rs2) & reg_mask); - - break; - case InstType::U_TYPE: - inst.setDestReg((code>>shift_rd) & reg_mask); - inst.setSrcImm(signExt(code>>shift_j_u_immed, 20, u_immed_mask)); - usedImm = true; - trace_inst->valid_inst = true; - trace_inst->rd = ((code>>shift_rd) & reg_mask); - break; - case InstType::J_TYPE: - inst.setDestReg((code>>shift_rd) & reg_mask); - - // [20 | 10:1 | 11 | 19:12] - - unordered = code>>shift_j_u_immed; - - bits_19_12 = unordered & 0xff; - bit_11 = (unordered>>8) & 0x1; - bits_10_1 = (unordered >> 9) & 0x3ff; - bit_20 = (unordered>>19) & 0x1; - - imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20); - - if (bit_20) - { - imeed |= ~j_immed_mask; - } - - // inst.setSrcImm(signExt(imeed, 20, j_immed_mask)); - inst.setSrcImm(imeed); - usedImm = true; - - trace_inst->valid_inst = true; - trace_inst->rd = ((code>>shift_rd) & reg_mask); - - break; - - case InstType::V_TYPE: - D(3, "Entered here: instr type = vector" << op); - switch (op) { - case Opcode::VSET_ARITH: //TODO: arithmetic ops - inst.setDestReg((code>>shift_rd) & reg_mask); - inst.setSrcReg((code>>shift_rs1) & reg_mask); - func3 = (code>>shift_func3) & func3_mask; - inst.setFunc3 (func3); - D(3, "Entered here: instr type = vector"); - - if(func3 == 7) { - D(3, "Entered here: imm instr"); - - inst.setVsetImm(!(code>>shift_vset)); - - if(inst.getVsetImm()) { - Word immed = (code>>shift_rs2) & v_immed_mask; - D(3, "immed" << immed); - inst.setSrcImm(immed); //TODO - inst.setvlmul(immed & 0x3); - D(3, "lmul " << (immed & 0x3)); - inst.setvediv((immed>>4) & 0x3); - D(3, "ediv " << ((immed>>4) & 0x3)); - inst.setvsew((immed>>2) & 0x3); - D(3, "sew " << ((immed>>2) & 0x3)); - } - else { - inst.setSrcReg((code>>shift_rs2) & reg_mask); - trace_inst->rs2 = ((code>>shift_rs2) & reg_mask); - } - trace_inst->valid_inst = true; - trace_inst->rs1 = ((code>>shift_rs1) & reg_mask); - trace_inst->rd = ((code>>shift_rd) & reg_mask); - } else { - inst.setSrcReg((code>>shift_rs2) & reg_mask); - inst.setVmask((code>>shift_vmask) & 0x1); - inst.setFunc6((code>>shift_func6) & func6_mask); - - trace_inst->valid_inst = true; - trace_inst->rs1 = ((code>>shift_rs1) & reg_mask); - trace_inst->rs2 = ((code>>shift_rs2) & reg_mask); - trace_inst->rd = ((code>>shift_rd) & reg_mask); - } - break; - case Opcode::VL: - D(3, "vector load instr"); - inst.setDestReg((code>>shift_rd) & reg_mask); - inst.setSrcReg((code>>shift_rs1) & reg_mask); - inst.setVlsWidth((code>>shift_func3) & func3_mask); - inst.setSrcReg((code>>shift_rs2) & reg_mask); - inst.setVmask((code>>shift_vmask)); - inst.setVmop((code>>shift_vmop) & func3_mask); - inst.setVnf((code>>shift_vnf) & func3_mask); - - trace_inst->valid_inst = true; - trace_inst->rs1 = ((code>>shift_rs1) & reg_mask); - trace_inst->vd = ((code>>shift_rd) & reg_mask); - //trace_inst->vs2 = ((code>>shift_rs2) & reg_mask); - - break; - case Opcode::VS: - inst.setVs3((code>>shift_rd) & reg_mask); - inst.setSrcReg((code>>shift_rs1) & reg_mask); - inst.setVlsWidth((code>>shift_func3) & func3_mask); - inst.setSrcReg((code>>shift_rs2) & reg_mask); - inst.setVmask((code>>shift_vmask)); - inst.setVmop((code>>shift_vmop) & func3_mask); - inst.setVnf((code>>shift_vnf) & func3_mask); - - trace_inst->valid_inst = true; - trace_inst->rs1 = ((code>>shift_rs1) & reg_mask); - //trace_inst->vd = ((code>>shift_rd) & reg_mask); - trace_inst->vs1 = ((code>>shift_rd) & reg_mask); //vs3 - break; - default: - cout << "Inavlid opcode.\n"; - std::abort(); - } - break; - default: - cout << "Unrecognized argument class in word decoder.\n"; - std::abort(); - } - - if (haveRefs && usedImm && refMap.find(idx-n/8) != refMap.end()) { - Ref *srcRef = refMap[idx-n/8]; - - /* Create a new ref tied to this instruction. */ - // Ref *r = new SimpleRef(srcRef->name, *(Addr*)inst.setSrcImm(), - // inst.hasRelImm()); - // inst.setImmRef(*r); - } - - D(2, "Decoded instr 0x" << hex << code << " into: " << inst); - - return &inst; -} - diff --git a/simX/execute.cpp b/simX/execute.cpp new file mode 100644 index 00000000..b0a531c8 --- /dev/null +++ b/simX/execute.cpp @@ -0,0 +1,1950 @@ +#include +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "warp.h" +#include "instr.h" +#include "core.h" + +using namespace vortex; + +struct DivergentBranchException {}; + +static bool checkUnanimous(unsigned p, + const std::vector>> &m, + const std::vector &tm) { + bool same; + unsigned i; + for (i = 0; i < m.size(); ++i) { + if (tm[i]) { + same = m[i][p]; + break; + } + } + if (i == m.size()) + throw DivergentBranchException(); + + //std::cout << "same: " << same << " with -> "; + for (; i < m.size(); ++i) { + if (tm[i]) { + //std::cout << " " << (bool(m[i][p])); + if (same != (bool(m[i][p]))) { + //std::cout << " FALSE\n"; + return false; + } + } + } + //std::cout << " TRUE\n"; + return true; +} + +void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { + /* If I try to execute a privileged instruction in user mode, throw an + exception 3. */ + if (instr.getPrivileged() && !supervisorMode_) { + D(3, "INTERRUPT SUPERVISOR\n"); + this->interrupt(3); + return; + } + + Size nextActiveThreads = activeThreads_; + Size wordSz = core_->arch().getWordSize(); + Word nextPc = pc_; + + memAccesses_.clear(); + + bool sjOnce(true); // Has not yet split or joined once. + bool pcSet(false); // PC has already been set + + Word func3 = instr.getFunc3(); + Word func6 = instr.getFunc6(); + Word func7 = instr.getFunc7(); + + Opcode opcode = instr.getOpcode(); + RegNum rdest = instr.getRDest(); + RegNum rsrc0 = instr.getRSrc(0); + RegNum rsrc1 = instr.getRSrc(1); + RegNum pred = instr.getPred(); + Word immsrc = instr.getImm(); + bool vmask = instr.getVmask(); + + for (Size t = 0; t < activeThreads_; t++) { + std::vector> ® = regFile_[t]; + + bool is_gpgpu = (opcode == GPGPU); + bool is_tmc = is_gpgpu && (func3 == 0); + bool is_wspawn = is_gpgpu && (func3 == 1); + bool is_barrier = is_gpgpu && (func3 == 4); + + bool not_active = !tmask_[t]; + bool gpgpu_zero = (is_tmc || is_barrier || is_wspawn) && (t != 0); + + if (not_active || gpgpu_zero) + continue; + + ++insts_; + + switch (opcode) { + case NOP: + //std::cout << "NOP_INST\n"; + break; + case R_INST: { + // std::cout << "R_INST\n"; + Word m_exten = func7 & 0x1; + if (m_exten) { + // std::cout << "FOUND A MUL/DIV\n"; + + switch (func3) { + case 0: + // MUL + D(3, "MUL: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + reg[rdest] = ((int)reg[rsrc0]) * ((int)reg[rsrc1]); + break; + case 1: + // MULH + D(3, "MULH: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + { + int64_t first = (int64_t)reg[rsrc0]; + if (reg[rsrc0] & 0x80000000) { + first = first | 0xFFFFFFFF00000000; + } + int64_t second = (int64_t)reg[rsrc1]; + if (reg[rsrc1] & 0x80000000) { + second = second | 0xFFFFFFFF00000000; + } + // cout << "mulh: " << std::dec << first << " * " << second; + uint64_t result = first * second; + reg[rdest] = (result >> 32) & 0xFFFFFFFF; + // cout << " = " << result << " or " << reg[rdest] << "\n"; + } + break; + case 2: + // MULHSU + D(3, "MULHSU: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + { + int64_t first = (int64_t)reg[rsrc0]; + if (reg[rsrc0] & 0x80000000) { + first = first | 0xFFFFFFFF00000000; + } + int64_t second = (int64_t)reg[rsrc1]; + reg[rdest] = ((first * second) >> 32) & 0xFFFFFFFF; + } + break; + case 3: + // MULHU + D(3, "MULHU: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + { + uint64_t first = (uint64_t)reg[rsrc0]; + uint64_t second = (uint64_t)reg[rsrc1]; + // cout << "MULHU\n"; + reg[rdest] = ((first * second) >> 32) & 0xFFFFFFFF; + } + break; + case 4: + // DIV + D(3, "DIV: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + if (reg[rsrc1] == 0) { + reg[rdest] = -1; + break; + } + // cout << "dividing: " << std::dec << ((int) reg[rsrc0]) << " / " << ((int) reg[rsrc1]); + reg[rdest] = ((int)reg[rsrc0]) / ((int)reg[rsrc1]); + // cout << " = " << ((int) reg[rdest]) << "\n"; + break; + case 5: + // DIVU + D(3, "DIVU: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + if (reg[rsrc1] == 0) { + reg[rdest] = -1; + break; + } + reg[rdest] = ((uint32_t)reg[rsrc0]) / ((uint32_t)reg[rsrc1]); + break; + case 6: + // REM + D(3, "REM: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + if (reg[rsrc1] == 0) { + reg[rdest] = reg[rsrc0]; + break; + } + reg[rdest] = ((int)reg[rsrc0]) % ((int)reg[rsrc1]); + break; + case 7: + // REMU + D(3, "REMU: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + if (reg[rsrc1] == 0) { + reg[rdest] = reg[rsrc0]; + break; + } + reg[rdest] = ((uint32_t)reg[rsrc0]) % ((uint32_t)reg[rsrc1]); + break; + default: + std::cout << "unsupported MUL/DIV instr\n"; + std::abort(); + } + } else { + // std::cout << "NORMAL R-TYPE\n"; + switch (func3) { + case 0: + if (func7) { + D(3, "SUBI: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + reg[rdest] = reg[rsrc0] - reg[rsrc1]; + reg[rdest].trunc(wordSz); + } else { + D(3, "ADDI: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + reg[rdest] = reg[rsrc0] + reg[rsrc1]; + reg[rdest].trunc(wordSz); + } + break; + case 1: + D(3, "SLLI: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + reg[rdest] = reg[rsrc0] << reg[rsrc1]; + reg[rdest].trunc(wordSz); + break; + case 2: + D(3, "SLTI: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + if (int(reg[rsrc0]) < int(reg[rsrc1])) { + reg[rdest] = 1; + } else { + reg[rdest] = 0; + } + break; + case 3: + D(3, "SLTU: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + if (Word_u(reg[rsrc0]) < Word_u(reg[rsrc1])) { + reg[rdest] = 1; + } else { + reg[rdest] = 0; + } + break; + case 4: + D(3, "XORI: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + reg[rdest] = reg[rsrc0] ^ reg[rsrc1]; + break; + case 5: + if (func7) { + D(3, "SRLI: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + reg[rdest] = int(reg[rsrc0]) >> int(reg[rsrc1]); + reg[rdest].trunc(wordSz); + } else { + D(3, "SRLU: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + reg[rdest] = Word_u(reg[rsrc0]) >> Word_u(reg[rsrc1]); + reg[rdest].trunc(wordSz); + } + break; + case 6: + D(3, "ORI: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + reg[rdest] = reg[rsrc0] | reg[rsrc1]; + break; + case 7: + D(3, "ANDI: r" << rdest << " <- r" << rsrc0 << ", r" << rsrc1); + reg[rdest] = reg[rsrc0] & reg[rsrc1]; + break; + default: + std::cout << "ERROR: UNSUPPORTED R INST\n"; + std::abort(); + } + } + } break; + case L_INST: { + Word memAddr = ((reg[rsrc0] + immsrc) & 0xFFFFFFFC); + Word shift_by = ((reg[rsrc0] + immsrc) & 0x00000003) * 8; + Word data_read = core_->mem().read(memAddr, supervisorMode_); + trace_inst->is_lw = true; + trace_inst->mem_addresses[t] = memAddr; + switch (func3) { + case 0: + // LBI + D(3, "LBI: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + reg[rdest] = signExt((data_read >> shift_by) & 0xFF, 8, 0xFF); + break; + case 1: + // LWI + D(3, "LWI: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + reg[rdest] = signExt((data_read >> shift_by) & 0xFFFF, 16, 0xFFFF); + break; + case 2: + // LDI + D(3, "LDI: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + reg[rdest] = int(data_read & 0xFFFFFFFF); + break; + case 4: + // LBU + D(3, "LBU: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + reg[rdest] = unsigned((data_read >> shift_by) & 0xFF); + break; + case 5: + // LWU + D(3, "LWU: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + reg[rdest] = unsigned((data_read >> shift_by) & 0xFFFF); + break; + default: + std::cout << "ERROR: UNSUPPORTED L INST\n"; + std::abort(); + memAccesses_.push_back(Warp::MemAccess(false, memAddr)); + } + D(3, "LOAD MEM ADDRESS: " << std::hex << memAddr); + D(3, "LOAD MEM DATA: " << std::hex << data_read); + } break; + case I_INST: + //std::cout << "I_INST\n"; + switch (func3) { + case 0: + // ADDI + D(3, "ADDI: r" << rdest << " <- r" << rsrc0 << ", imm=" << immsrc); + reg[rdest] = reg[rsrc0] + immsrc; + reg[rdest].trunc(wordSz); + break; + case 2: + // SLTI + D(3, "SLTI: r" << rdest << " <- r" << rsrc0 << ", imm=" << immsrc); + if (int(reg[rsrc0]) < int(immsrc)) { + reg[rdest] = 1; + } else { + reg[rdest] = 0; + } + break; + case 3: { + // SLTIU + D(3, "SLTIU: r" << rdest << " <- r" << rsrc0 << ", imm=" << immsrc); + if (unsigned(reg[rsrc0]) < unsigned(immsrc)) { + reg[rdest] = 1; + } else { + reg[rdest] = 0; + } + } break; + case 4: + // XORI + D(3, "XORI: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + reg[rdest] = reg[rsrc0] ^ immsrc; + break; + case 6: + // ORI + D(3, "ORI: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + reg[rdest] = reg[rsrc0] | immsrc; + break; + case 7: + // ANDI + D(3, "ANDI: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + reg[rdest] = reg[rsrc0] & immsrc; + break; + case 1: + // SLLI + D(3, "SLLI: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + reg[rdest] = reg[rsrc0] << immsrc; + reg[rdest].trunc(wordSz); + break; + case 5: + if ((func7 == 0)) { + // SRLI + D(3, "SRLI: r" << rdest << " <- r" << rsrc0 << ", imm=" << immsrc); + Word result = Word_u(reg[rsrc0]) >> Word_u(immsrc); + reg[rdest] = result; + reg[rdest].trunc(wordSz); + } else { + // SRAI + D(3, "SRAI: r" << rdest << " <- r" << rsrc0 << ", imm=" << immsrc); + Word op1 = reg[rsrc0]; + Word op2 = immsrc; + reg[rdest] = op1 >> op2; + reg[rdest].trunc(wordSz); + } + break; + default: + std::cout << "ERROR: UNSUPPORTED L INST\n"; + std::abort(); + } + break; + case S_INST: { + ++stores_; + Word memAddr = reg[rsrc0] + immsrc; + trace_inst->is_sw = true; + trace_inst->mem_addresses[t] = memAddr; + // //std::cout << "FUNC3: " << func3 << "\n"; + if ((memAddr == 0x00010000) && (t == 0)) { + Word num = reg[rsrc1]; + fprintf(stderr, "%c", (char)num); + break; + } + switch (func3) { + case 0: + // SB + D(3, "SB: r" << rsrc1 << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + core_->mem().write(memAddr, reg[rsrc1] & 0x000000FF, supervisorMode_, 1); + break; + case 1: + // SH + D(3, "SH: r" << rsrc1 << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + core_->mem().write(memAddr, reg[rsrc1], supervisorMode_, 2); + break; + case 2: + // SD + D(3, "SD: r" << rsrc1 << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + core_->mem().write(memAddr, reg[rsrc1], supervisorMode_, 4); + break; + default: + std::cout << "ERROR: UNSUPPORTED S INST\n"; + std::abort(); + } + D(3, "STORE MEM ADDRESS: " << std::hex << memAddr); + memAccesses_.push_back(Warp::MemAccess(true, memAddr)); + } break; + case B_INST: + trace_inst->stall_warp = true; + switch (func3) { + case 0: + // BEQ + D(3, "BEQ: r" << rsrc0 << ", r" << rsrc1 << ", imm=0x" << std::hex << immsrc); + if (int(reg[rsrc0]) == int(reg[rsrc1])) { + if (!pcSet) + nextPc = (pc_ - 4) + immsrc; + pcSet = true; + } + break; + case 1: + // BNE + D(3, "BNE: r" << rsrc0 << ", r" << rsrc1 << ", imm=0x" << std::hex << immsrc); + if (int(reg[rsrc0]) != int(reg[rsrc1])) { + if (!pcSet) + nextPc = (pc_ - 4) + immsrc; + pcSet = true; + } + break; + case 4: + // BLT + D(3, "BLT: r" << rsrc0 << ", r" << rsrc1 << ", imm=0x" << std::hex << immsrc); + if (int(reg[rsrc0]) < int(reg[rsrc1])) { + if (!pcSet) + nextPc = (pc_ - 4) + immsrc; + pcSet = true; + } + break; + case 5: + // BGE + D(3, "BGE: r" << rsrc0 << ", r" << rsrc1 << ", imm=0x" << std::hex << immsrc); + if (int(reg[rsrc0]) >= int(reg[rsrc1])) { + if (!pcSet) + nextPc = (pc_ - 4) + immsrc; + pcSet = true; + } + break; + case 6: + // BLTU + D(3, "BLTU: r" << rsrc0 << ", r" << rsrc1 << ", imm=0x" << std::hex << immsrc); + if (Word_u(reg[rsrc0]) < Word_u(reg[rsrc1])) { + if (!pcSet) + nextPc = (pc_ - 4) + immsrc; + pcSet = true; + } + break; + case 7: + // BGEU + D(3, "BGEU: r" << rsrc0 << ", r" << rsrc1 << ", imm=0x" << std::hex << immsrc); + if (Word_u(reg[rsrc0]) >= Word_u(reg[rsrc1])) { + if (!pcSet) + nextPc = (pc_ - 4) + immsrc; + pcSet = true; + } + break; + } + break; + case LUI_INST: + D(3, "LUI: r" << rdest << " <- imm=0x" << std::hex << immsrc); + reg[rdest] = (immsrc << 12) & 0xfffff000; + break; + case AUIPC_INST: + D(3, "AUIPC: r" << rdest << " <- imm=0x" << std::hex << immsrc); + reg[rdest] = ((immsrc << 12) & 0xfffff000) + (pc_ - 4); + break; + case JAL_INST: + D(3, "JAL: r" << rdest << " <- imm=0x" << std::hex << immsrc); + trace_inst->stall_warp = true; + if (!pcSet) { + nextPc = (pc_ - 4) + immsrc; + //std::cout << "JAL... SETTING PC: " << nextPc << "\n"; + } + if (rdest != 0) { + reg[rdest] = pc_; + } + pcSet = true; + break; + case JALR_INST: + D(3, "JALR: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + trace_inst->stall_warp = true; + if (!pcSet) { + nextPc = reg[rsrc0] + immsrc; + //std::cout << "JALR... SETTING PC: " << nextPc << "\n"; + } + if (rdest != 0) { + reg[rdest] = pc_; + } + pcSet = true; + break; + case SYS_INST: { + D(3, "SYS_INST: r" << rdest << " <- r" << rsrc0 << ", imm=0x" << std::hex << immsrc); + Word rs1 = reg[rsrc0]; + Word csr_addr = immsrc & 0x00000FFF; + // GPGPU CSR extension + if (csr_addr == CSR_WTID) { + // Warp threadID + reg[rdest] = t; + } else if (csr_addr == CSR_LTID) { + // Core threadID + reg[rdest] = t + + id_ * core_->arch().getNumThreads(); + } else if (csr_addr == CSR_GTID) { + // Processor threadID + reg[rdest] = t + + id_ * core_->arch().getNumThreads() + + core_->arch().getNumThreads() * core_->arch().getNumWarps() * core_->id(); + } else if (csr_addr == CSR_LWID) { + // Core warpID + reg[rdest] = id_; + } else if (csr_addr == CSR_GWID) { + // Processor warpID + reg[rdest] = id_ + core_->arch().getNumWarps() * core_->id(); + } else if (csr_addr == CSR_GCID) { + // Processor coreID + reg[rdest] = core_->id(); + } else if (csr_addr == CSR_NT) { + // Number of threads per warp + reg[rdest] = core_->arch().getNumThreads(); + } else if (csr_addr == CSR_NW) { + // Number of warps per core + reg[rdest] = core_->arch().getNumWarps(); + } else if (csr_addr == CSR_NC) { + // Number of cores + reg[rdest] = core_->arch().getNumCores(); + } else if (csr_addr == CSR_INSTRET) { + // NumInsts + reg[rdest] = (Word)core_->num_instructions(); + } else if (csr_addr == CSR_INSTRET_H) { + // NumInsts + reg[rdest] = (Word)(core_->num_instructions() >> 32); + } else if (csr_addr == CSR_CYCLE) { + // NumCycles + reg[rdest] = (Word)core_->num_steps(); + } else if (csr_addr == CSR_CYCLE_H) { + // NumCycles + reg[rdest] = (Word)(core_->num_steps() >> 32); + } else { + switch (func3) { + case 0: + if (csr_addr < 2) { + // ECALL/EBREAK + nextActiveThreads = 0; + spawned_ = false; + } + break; + case 1: + // CSRRW + if (rdest != 0) { + reg[rdest] = csrs_[csr_addr]; + } + csrs_[csr_addr] = rs1; + break; + case 2: + // CSRRS + if (rdest != 0) { + reg[rdest] = csrs_[csr_addr]; + } + csrs_[csr_addr] = rs1 | csrs_[csr_addr]; + break; + case 3: + // CSRRC + if (rdest != 0) { + reg[rdest] = csrs_[csr_addr]; + } + csrs_[csr_addr] = rs1 & (~csrs_[csr_addr]); + break; + case 5: + // CSRRWI + if (rdest != 0) { + reg[rdest] = csrs_[csr_addr]; + } + csrs_[csr_addr] = rsrc0; + break; + case 6: + // CSRRSI + if (rdest != 0) { + reg[rdest] = csrs_[csr_addr]; + } + csrs_[csr_addr] = rsrc0 | csrs_[csr_addr]; + break; + case 7: + // CSRRCI + if (rdest != 0) { + reg[rdest] = csrs_[csr_addr]; + } + csrs_[csr_addr] = rsrc0 & (~csrs_[csr_addr]); + break; + default: + break; + } + } + } break; + case FENCE: + D(3, "FENCE"); + break; + case PJ_INST: + D(3, "PJ_INST: r" << rsrc0 << ", r" << rsrc1); + if (reg[rsrc0]) { + if (!pcSet) + nextPc = reg[rsrc1]; + pcSet = true; + } + break; + case GPGPU: + switch (func3) { + case 1: + // WSPAWN + D(3, "WSPAWN: r" << rsrc0 << ", r" << rsrc1); + trace_inst->wspawn = true; + if (sjOnce) { + sjOnce = false; + unsigned num_to_wspawn = std::min(reg[rsrc0], core_->arch().getNumWarps()); + D(0, "Spawning " << num_to_wspawn << " new warps at PC: " << std::hex << reg[rsrc1]); + for (unsigned i = 1; i < num_to_wspawn; ++i) { + Warp &newWarp(core_->warp(i)); + { + newWarp.set_pc(reg[rsrc1]); + for (size_t kk = 0; kk < tmask_.size(); kk++) { + if (kk == 0) { + newWarp.setTmask(kk, true); + } else { + newWarp.setTmask(kk, false); + } + } + newWarp.setActiveThreads(1); + newWarp.setSupervisorMode(false); + newWarp.setSpawned(true); + } + } + break; + } + break; + case 2: { + // SPLIT + D(3, "SPLIT: r" << pred); + trace_inst->stall_warp = true; + if (sjOnce) { + sjOnce = false; + if (checkUnanimous(pred, regFile_, tmask_)) { + D(3, "Unanimous pred: " << pred << " val: " << reg[pred] << "\n"); + DomStackEntry e(tmask_); + e.uni = true; + domStack_.push(e); + break; + } + D(3, "Split: Original TM: "); + DX( for (auto y : tmask_) D(3, y << " "); ) + + DomStackEntry e(pred, regFile_, tmask_, pc_); + domStack_.push(tmask_); + domStack_.push(e); + for (unsigned i = 0; i < e.tmask.size(); ++i) { + tmask_[i] = !e.tmask[i] && tmask_[i]; + } + + D(3, "Split: New TM"); + DX( for (auto y : tmask_) D(3, y << " "); ) + D(3, "Split: Pushed TM PC: " << std::hex << e.pc << std::dec << "\n"); + DX( for (auto y : e.tmask) D(3, y << " "); ) + } + break; + } + case 3: + // JOIN + D(3, "JOIN"); + if (sjOnce) { + sjOnce = false; + if (!domStack_.empty() && domStack_.top().uni) { + D(2, "Uni branch at join"); + printf("NEW DOMESTACK: \n"); + tmask_ = domStack_.top().tmask; + domStack_.pop(); + break; + } + if (!domStack_.top().fallThrough) { + if (!pcSet) { + nextPc = domStack_.top().pc; + D(3, "join: NOT FALLTHROUGH PC: " << std::hex << nextPc << std::dec); + } + pcSet = true; + } + + D(3, "Join: Old TM: "); + DX( for (auto y : tmask_) D(3, y << " "); ) + std::cout << "\n"; + tmask_ = domStack_.top().tmask; + + D(3, "Join: New TM: "); + DX( for (auto y : tmask_) D(3, y << " "); ) + + domStack_.pop(); + } + break; + case 4: + trace_inst->stall_warp = true; + // is_barrier + break; + case 0: + // TMC + D(3, "TMC: r" << rsrc0); + trace_inst->stall_warp = true; + nextActiveThreads = std::min(reg[rsrc0], core_->arch().getNumThreads()); + { + for (size_t ff = 0; ff < tmask_.size(); ff++) { + if (ff < nextActiveThreads) { + tmask_[ff] = true; + } else { + tmask_[ff] = false; + } + } + } + if (nextActiveThreads == 0) { + spawned_ = false; + } + break; + default: + std::cout << "ERROR: UNSUPPORTED GPGPU INSTRUCTION " << instr << "\n"; + } + break; + case VSET_ARITH: { + D(3, "VSET_ARITH"); + int VLMAX = (instr.getVlmul() * VLEN_) / instr.getVsew(); + switch (func3) { + case 0: // vector-vector + trace_inst->vs1 = rsrc0; + trace_inst->vs2 = rsrc1; + trace_inst->vd = rdest; + switch (func6) { + case 0: { + D(3, "Addition " << rsrc0 << " " << rsrc1 << " Dest:" << rdest); + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + std::vector> &mask = vregFile_[0]; + + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *mask_ptr = (uint8_t *)mask[i].value(); + uint8_t value = (*mask_ptr & 0x1); + if (vmask || (!vmask && value)) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = *first_ptr + *second_ptr; + D(3, "Adding " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *mask_ptr = (uint16_t *)mask[i].value(); + uint16_t value = (*mask_ptr & 0x1); + if (vmask || (!vmask && value)) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = *first_ptr + *second_ptr; + D(3, "Adding " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + } + } else if (vtype_.vsew == 32) { + D(3, "Doing 32 bit vector addition"); + for (int i = 0; i < vl_; i++) { + int *mask_ptr = (int *)mask[i].value(); + int value = (*mask_ptr & 0x1); + if (vmask || (!vmask && value)) { + int *first_ptr = (int *)vr1[i].value(); + int *second_ptr = (int *)vr2[i].value(); + int result = *first_ptr + *second_ptr; + D(3, "Adding " << *first_ptr << " + " << *second_ptr << " = " << result); + + int *result_ptr = (int *)vd[i].value(); + *result_ptr = result; + } + } + } + + DX( + D(3, "Vector Register state after addition:"); + for (size_t i = 0; i < vregFile_.size(); i++) { + for (size_t j = 0; j < vregFile_[0].size(); j++) { + if (vtype_.vsew == 8) { + uint8_t *ptr_val = (uint8_t *)vregFile_[i][j].value(); + D(3, "reg[" << i << "][" << j << "] = " << *ptr_val); + } else if (vtype_.vsew == 16) { + uint16_t *ptr_val = (uint16_t *)vregFile_[i][j].value(); + D(3, "reg[" << i << "][" << j << "] = " << *ptr_val); + } else if (vtype_.vsew == 32) { + uint32_t *ptr_val = (uint32_t *)vregFile_[i][j].value(); + D(3, "reg[" << i << "][" << j << "] = " << *ptr_val); + } + } + } + D(3, "After vector register state after addition"); + ) + + } break; + case 24: //vmseq + { + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = (*first_ptr == *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = (*first_ptr == *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t result = (*first_ptr == *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + } + + } break; + case 25: //vmsne + { + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = (*first_ptr != *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = (*first_ptr != *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t result = (*first_ptr != *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + } + + } break; + case 26: //vmsltu + { + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = (*first_ptr < *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = (*first_ptr < *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t result = (*first_ptr < *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + } + + } break; + case 27: //vmslt + { + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + int8_t *first_ptr = (int8_t *)vr1[i].value(); + int8_t *second_ptr = (int8_t *)vr2[i].value(); + int8_t result = (*first_ptr < *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + int8_t *result_ptr = (int8_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + int16_t *first_ptr = (int16_t *)vr1[i].value(); + int16_t *second_ptr = (int16_t *)vr2[i].value(); + int16_t result = (*first_ptr < *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + int16_t *result_ptr = (int16_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + int32_t *first_ptr = (int32_t *)vr1[i].value(); + int32_t *second_ptr = (int32_t *)vr2[i].value(); + int32_t result = (*first_ptr < *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + int32_t *result_ptr = (int32_t *)vd[i].value(); + *result_ptr = result; + } + } + } break; + case 28: //vmsleu + { + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = (*first_ptr <= *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = (*first_ptr <= *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t result = (*first_ptr <= *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + } + } break; + case 29: //vmsle + { + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + int8_t *first_ptr = (int8_t *)vr1[i].value(); + int8_t *second_ptr = (int8_t *)vr2[i].value(); + int8_t result = (*first_ptr <= *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + int8_t *result_ptr = (int8_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + int16_t *first_ptr = (int16_t *)vr1[i].value(); + int16_t *second_ptr = (int16_t *)vr2[i].value(); + int16_t result = (*first_ptr <= *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + int16_t *result_ptr = (int16_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + int32_t *first_ptr = (int32_t *)vr1[i].value(); + int32_t *second_ptr = (int32_t *)vr2[i].value(); + int32_t result = (*first_ptr <= *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + int32_t *result_ptr = (int32_t *)vd[i].value(); + *result_ptr = result; + } + } + } break; + case 30: //vmsgtu + { + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = (*first_ptr > *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = (*first_ptr > *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t result = (*first_ptr > *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + } + } break; + case 31: //vmsgt + { + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + int8_t *first_ptr = (int8_t *)vr1[i].value(); + int8_t *second_ptr = (int8_t *)vr2[i].value(); + int8_t result = (*first_ptr > *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + int8_t *result_ptr = (int8_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + int16_t *first_ptr = (int16_t *)vr1[i].value(); + int16_t *second_ptr = (int16_t *)vr2[i].value(); + int16_t result = (*first_ptr > *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + int16_t *result_ptr = (int16_t *)vd[i].value(); + *result_ptr = result; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + int32_t *first_ptr = (int32_t *)vr1[i].value(); + int32_t *second_ptr = (int32_t *)vr2[i].value(); + int32_t result = (*first_ptr > *second_ptr) ? 1 : 0; + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + int32_t *result_ptr = (int32_t *)vd[i].value(); + *result_ptr = result; + } + } + } break; + } + break; + case 2: { + trace_inst->vs1 = rsrc0; + trace_inst->vs2 = rsrc1; + trace_inst->vd = rdest; + + switch (func6) { + case 24: //vmandnot + { + D(3, "vmandnot"); + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t first_value = (*first_ptr & 0x1); + uint8_t second_value = (*second_ptr & 0x1); + uint8_t result = (first_value & !second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t first_value = (*first_ptr & 0x1); + uint16_t second_value = (*second_ptr & 0x1); + uint16_t result = (first_value & !second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t first_value = (*first_ptr & 0x1); + uint32_t second_value = (*second_ptr & 0x1); + uint32_t result = (first_value & !second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 25: //vmand + { + D(3, "vmand"); + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t first_value = (*first_ptr & 0x1); + uint8_t second_value = (*second_ptr & 0x1); + uint8_t result = (first_value & second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t first_value = (*first_ptr & 0x1); + uint16_t second_value = (*second_ptr & 0x1); + uint16_t result = (first_value & second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + + for (int i = vl_; i < VLMAX; i++) { + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t first_value = (*first_ptr & 0x1); + uint32_t second_value = (*second_ptr & 0x1); + uint32_t result = (first_value & second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + + for (int i = vl_; i < VLMAX; i++) { + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 26: //vmor + { + D(3, "vmor"); + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t first_value = (*first_ptr & 0x1); + uint8_t second_value = (*second_ptr & 0x1); + uint8_t result = (first_value | second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 16) { + uint16_t *result_ptr; + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t first_value = (*first_ptr & 0x1); + uint16_t second_value = (*second_ptr & 0x1); + uint16_t result = (first_value | second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + } else if (vtype_.vsew == 32) { + uint32_t *result_ptr; + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t first_value = (*first_ptr & 0x1); + uint32_t second_value = (*second_ptr & 0x1); + uint32_t result = (first_value | second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + D(3, "VLMAX: " << VLMAX); + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 27: //vmxor + { + D(3, "vmxor"); + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + uint8_t *result_ptr; + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t first_value = (*first_ptr & 0x1); + uint8_t second_value = (*second_ptr & 0x1); + uint8_t result = (first_value ^ second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + } else if (vtype_.vsew == 16) { + uint16_t *result_ptr; + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t first_value = (*first_ptr & 0x1); + uint16_t second_value = (*second_ptr & 0x1); + uint16_t result = (first_value ^ second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + uint32_t *result_ptr; + + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t first_value = (*first_ptr & 0x1); + uint32_t second_value = (*second_ptr & 0x1); + uint32_t result = (first_value ^ second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 28: //vmornot + { + D(3, "vmornot"); + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t first_value = (*first_ptr & 0x1); + uint8_t second_value = (*second_ptr & 0x1); + uint8_t result = (first_value | !second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t first_value = (*first_ptr & 0x1); + uint16_t second_value = (*second_ptr & 0x1); + uint16_t result = (first_value | !second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t first_value = (*first_ptr & 0x1); + uint32_t second_value = (*second_ptr & 0x1); + uint32_t result = (first_value | !second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 29: //vmnand + { + D(3, "vmnand"); + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t first_value = (*first_ptr & 0x1); + uint8_t second_value = (*second_ptr & 0x1); + uint8_t result = !(first_value & second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint8_t *result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t first_value = (*first_ptr & 0x1); + uint16_t second_value = (*second_ptr & 0x1); + uint16_t result = !(first_value & second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + + for (int i = vl_; i < VLMAX; i++) { + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t first_value = (*first_ptr & 0x1); + uint32_t second_value = (*second_ptr & 0x1); + uint32_t result = !(first_value & second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + + for (int i = vl_; i < VLMAX; i++) { + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 30: //vmnor + { + D(3, "vmnor"); + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + uint8_t *result_ptr; + + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t first_value = (*first_ptr & 0x1); + uint8_t second_value = (*second_ptr & 0x1); + uint8_t result = !(first_value | second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t first_value = (*first_ptr & 0x1); + uint16_t second_value = (*second_ptr & 0x1); + uint16_t result = !(first_value | second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint16_t *result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t first_value = (*first_ptr & 0x1); + uint32_t second_value = (*second_ptr & 0x1); + uint32_t result = !(first_value | second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + uint32_t *result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 31: //vmxnor + { + D(3, "vmxnor"); + uint8_t *result_ptr; + + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t first_value = (*first_ptr & 0x1); + uint8_t second_value = (*second_ptr & 0x1); + uint8_t result = !(first_value ^ second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + } else if (vtype_.vsew == 16) { + uint16_t *result_ptr; + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t first_value = (*first_ptr & 0x1); + uint16_t second_value = (*second_ptr & 0x1); + uint16_t result = !(first_value ^ second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + uint32_t *result_ptr; + + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t first_value = (*first_ptr & 0x1); + uint32_t second_value = (*second_ptr & 0x1); + uint32_t result = !(first_value ^ second_value); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 37: //vmul + { + D(3, "vmul"); + uint8_t *result_ptr; + + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = (*first_ptr * *second_ptr); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + } else if (vtype_.vsew == 16) { + uint16_t *result_ptr; + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = (*first_ptr * *second_ptr); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + uint32_t *result_ptr; + + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t result = (*first_ptr * *second_ptr); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 45: //vmacc + { + D(3, "vmacc"); + uint8_t *result_ptr; + + std::vector> &vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = (*first_ptr * *second_ptr); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr += result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + } else if (vtype_.vsew == 16) { + uint16_t *result_ptr; + for (int i = 0; i < vl_; i++) { + uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = (*first_ptr * *second_ptr); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr += result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + uint32_t *result_ptr; + + for (int i = 0; i < vl_; i++) { + uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t result = (*first_ptr * *second_ptr); + D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); + + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr += result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + } + } break; + case 6: { + switch (func6) { + case 0: { + D(3, "vmadd.vx"); + uint8_t *result_ptr; + + //vector> & vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + //uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = (reg[rsrc0] + *second_ptr); + D(3, "Comparing " << reg[rsrc0] << " + " << *second_ptr << " = " << result); + + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + } else if (vtype_.vsew == 16) { + uint16_t *result_ptr; + for (int i = 0; i < vl_; i++) { + //uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = (reg[rsrc0] + *second_ptr); + D(3, "Comparing " << reg[rsrc0] << " + " << *second_ptr << " = " << result); + + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + uint32_t *result_ptr; + + for (int i = 0; i < vl_; i++) { + //uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t result = (reg[rsrc0] + *second_ptr); + D(3, "Comparing " << reg[rsrc0] << " + " << *second_ptr << " = " << result); + + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + case 37: //vmul.vx + { + D(3, "vmul.vx"); + uint8_t *result_ptr; + + //vector> & vr1 = vregFile_[rsrc0]; + std::vector> &vr2 = vregFile_[rsrc1]; + std::vector> &vd = vregFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + //uint8_t *first_ptr = (uint8_t *)vr1[i].value(); + uint8_t *second_ptr = (uint8_t *)vr2[i].value(); + uint8_t result = (reg[rsrc0] * *second_ptr); + D(3, "Comparing " << reg[rsrc0] << " + " << *second_ptr << " = " << result); + + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint8_t *)vd[i].value(); + *result_ptr = 0; + } + } else if (vtype_.vsew == 16) { + uint16_t *result_ptr; + for (int i = 0; i < vl_; i++) { + //uint16_t *first_ptr = (uint16_t *)vr1[i].value(); + uint16_t *second_ptr = (uint16_t *)vr2[i].value(); + uint16_t result = (reg[rsrc0] * *second_ptr); + D(3, "Comparing " << reg[rsrc0] << " + " << *second_ptr << " = " << result); + + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint16_t *)vd[i].value(); + *result_ptr = 0; + } + + } else if (vtype_.vsew == 32) { + uint32_t *result_ptr; + + for (int i = 0; i < vl_; i++) { + //uint32_t *first_ptr = (uint32_t *)vr1[i].value(); + uint32_t *second_ptr = (uint32_t *)vr2[i].value(); + uint32_t result = (reg[rsrc0] * *second_ptr); + D(3, "Comparing " << reg[rsrc0] << " + " << *second_ptr << " = " << result); + + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = result; + } + for (int i = vl_; i < VLMAX; i++) { + result_ptr = (uint32_t *)vd[i].value(); + *result_ptr = 0; + } + } + } break; + } + } break; + case 7: { + vtype_.vill = 0; //TODO + vtype_.vediv = instr.getVediv(); + vtype_.vsew = instr.getVsew(); + vtype_.vlmul = instr.getVlmul(); + + D(3, "lmul:" << vtype_.vlmul << " sew:" << vtype_.vsew << " ediv: " << vtype_.vediv << "rsrc_" << reg[rsrc0] << "VLMAX" << VLMAX); + + int s0 = reg[rsrc0]; + + if (s0 <= VLMAX) { + vl_ = s0; + } else if (s0 < (2 * VLMAX)) { + vl_ = (int)ceil((s0 * 1.0) / 2.0); + D(3, "Length:" << vl_ << ceil(s0 / 2)); + } else if (s0 >= (2 * VLMAX)) { + vl_ = VLMAX; + } + + reg[rdest] = vl_; + D(3, "VL:" << reg[rdest]); + + Word regNum(0); + + vregFile_.clear(); + for (int j = 0; j < 32; j++) { + vregFile_.push_back(std::vector>()); + for (Word i = 0; i < (VLEN_ / instr.getVsew()); ++i) { + int *elem_ptr = (int *)malloc(instr.getVsew() / 8); + for (Word f = 0; f < (instr.getVsew() / 32); f++) + elem_ptr[f] = 0; + vregFile_[j].push_back(Reg(id_, regNum++, (char *)elem_ptr)); + } + } + } break; + default: { + std::cout << "default???\n" << std::flush; + } + } + } break; + case VL: { + D(3, "Executing vector load"); + + D(3, "lmul: " << vtype_.vlmul << " VLEN:" << VLEN_ << "sew: " << vtype_.vsew); + D(3, "src: " << rsrc0 << " " << reg[rsrc0]); + D(3, "dest" << rdest); + D(3, "width" << instr.getVlsWidth()); + + std::vector> &vd = vregFile_[rdest]; + + switch (instr.getVlsWidth()) { + case 6: //load word and unit strided (not checking for unit stride) + { + for (int i = 0; i < vl_; i++) { + Word memAddr = ((reg[rsrc0]) & 0xFFFFFFFC) + (i * vtype_.vsew / 8); + Word data_read = core_->mem().read(memAddr, supervisorMode_); + D(3, "Mem addr: " << std::hex << memAddr << " Data read " << data_read); + int *result_ptr = (int *)vd[i].value(); + *result_ptr = data_read; + + trace_inst->is_lw = true; + trace_inst->mem_addresses[i] = memAddr; + } + D(3, "Vector Register state ----:"); + // cout << "Finished loop" << std::endl; + } + // cout << "aaaaaaaaaaaaaaaaaaaaaa" << std::endl; + break; + default: { + std::cout << "Serious default??\n" << std::flush; + } break; + } + break; + } break; + case VS: + for (int i = 0; i < vl_; i++) { + // cout << "iter" << std::endl; + ++stores_; + Word memAddr = reg[rsrc0] + (i * vtype_.vsew / 8); + // std::cout << "STORE MEM ADDRESS *** : " << std::hex << memAddr << "\n"; + + trace_inst->is_sw = true; + trace_inst->mem_addresses[i] = memAddr; + + switch (instr.getVlsWidth()) { + case 6: //store word and unit strided (not checking for unit stride) + { + uint32_t *ptr_val = (uint32_t *)vregFile_[instr.getVs3()][i].value(); + D(3, "value: " << std::flush << (*ptr_val) << std::flush); + core_->mem().write(memAddr, *ptr_val, supervisorMode_, 4); + D(3, "store: " << memAddr << " value:" << *ptr_val << std::flush); + } break; + default: + std::cout << "ERROR: UNSUPPORTED S INST\n" << std::flush; + std::abort(); + } + // cout << "Loop finished" << std::endl; + } + + // cout << "After for loop" << std::endl; + break; + default: + D(3, "pc: " << std::hex << (pc_ - 4)); + D(3, "ERROR: Unsupported instruction: " << instr); + std::abort(); + } + } + + activeThreads_ = nextActiveThreads; + + // This way, if pc was set by a side effect (such as interrupt), it will + // retain its new value. + if (pcSet) { + pc_ = nextPc; + D(3, "Next PC: " << std::hex << nextPc << std::dec); + } + + if (nextActiveThreads > regFile_.size()) { + std::cerr << "Error: attempt to spawn " << nextActiveThreads << " threads. " + << regFile_.size() << " available.\n"; + abort(); + } +} diff --git a/simX/include/archdef.h b/simX/include/archdef.h deleted file mode 100644 index 1b4b2f3e..00000000 --- a/simX/include/archdef.h +++ /dev/null @@ -1,121 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef __ARCHDEF_H -#define __ARCHDEF_H - -#include -#include - -#include -#include -#include "types.h" - -namespace Harp { - class ArchDef { - public: - struct Undefined {}; - - ArchDef(const std::string &s, int num_warps = 32, int num_threads = 32) { - std::istringstream iss(s.c_str()); - - wordSize = 4; - encChar = 'w'; - nRegs = 32; - nPRegs = 0; - nWarps = num_warps; - nThds = num_threads; - - extent = EXT_WARPS; - - // if (!iss) { extent = EXT_NULL; return; } - // iss >> encChar; - // if (!iss) { extent = EXT_WORDSIZE; return; } - // iss >> nRegs; - // if (!iss) { extent = EXT_ENC; return; } - // char sep; - // iss >> sep >> nPRegs; - // if (!iss || sep != '/') { extent = EXT_REGS; return; } - // iss >> sep >> nThds; - // if (!iss || sep != '/') { extent = EXT_PREGS; return; } - // iss >> sep >> nWarps; - // if (!iss || sep != '/') { extent = EXT_THDS; return; } - // extent = EXT_WARPS; - } - - operator std::string () const { - if (extent == EXT_NULL) return ""; - - std::ostringstream oss; - if (extent >= EXT_WORDSIZE) oss << wordSize; - if (extent >= EXT_ENC ) oss << encChar; - if (extent >= EXT_REGS ) oss << nRegs; - if (extent >= EXT_PREGS ) oss << '/' << nPRegs; - if (extent >= EXT_THDS ) oss << '/' << nThds; - if (extent >= EXT_WARPS ) oss << '/' << nWarps; - - return oss.str(); - } - - bool operator==(const ArchDef &r) const { - Extent minExtent(r.extent > extent ? extent : r.extent); - - // Can't be equal if we can't specify a binary encoding at all. - if (minExtent < EXT_PREGS) return false; - - if (minExtent >= EXT_WORDSIZE) { if (wordSize!=r.wordSize) return false; } - if (minExtent >= EXT_ENC ) { if (encChar != r.encChar) return false; } - if (minExtent >= EXT_REGS ) { if (nRegs != r.nRegs) return false; } - if (minExtent >= EXT_PREGS ) { if (nPRegs != r.nPRegs) return false; } - if (minExtent >= EXT_THDS ) { if (nThds != r.nThds) return false; } - if (minExtent >= EXT_WARPS ) { if (nWarps != r.nWarps) return false; } - - return true; - } - - bool operator!=(const ArchDef &r) const { return !(*this == r); } - - Size getWordSize() const { - if (extent < EXT_WORDSIZE) throw Undefined(); else return wordSize; - } - - char getEncChar() const { - if (extent -#include -#include -#include - -namespace HarpTools { - struct BadArg { BadArg(std::string s) : arg(s) {} std::string arg; }; - - class CommandLineArg { - public: - CommandLineArg(std::string s, std::string l, const char *helpText); - CommandLineArg(std::string l, const char *helpText); - virtual int read(int argc, char** argv) = 0; - - static void readArgs(int argc, char **argv); - static void clearArgs(); - static void showHelp(std::ostream &os); - - private: - static std::string helpString; - static std::map longArgs; - static std::map shortArgs; - }; - - template class CommandLineArgSetter : public CommandLineArg { - public: - CommandLineArgSetter(std::string s, std::string l, const char *ht, T &x) : - CommandLineArg(s, l, ht), x(x) {} - CommandLineArgSetter(std::string l, const char *ht, T &x) : - CommandLineArg(l, ht), x(x) {} - - int read(int argc, char **argv) { - std::istringstream iss(argv[1]); - iss >> x; - return 1; - } - private: - T &x; - }; - - class CommandLineArgFlag : public CommandLineArg { - public: - CommandLineArgFlag(std::string s, std::string l, const char *ht, bool &x) : - CommandLineArg(s, l, ht), x(x) { x = false; } - CommandLineArgFlag(std::string l, const char *ht, bool &x) : - CommandLineArg(l, ht), x(x) { x = false; } - - int read(int argc, char **argv) { x = true; return 0; } - private: - bool &x; - }; - -} - -#endif diff --git a/simX/include/asm-tokens.h b/simX/include/asm-tokens.h deleted file mode 100644 index 792c2f92..00000000 --- a/simX/include/asm-tokens.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef HARPTOOL_ASM_TOKENS -#define HARPTOOL_ASM_TOKENS - -namespace HarpTools { - enum AsmTokens { - ASM_T_DIR_DEF = 1, ASM_T_DIR_PERM, ASM_T_DIR_BYTE, ASM_T_DIR_WORD, - ASM_T_DIR_SPACE, ASM_T_DIR_STRING, ASM_T_DIR_ALIGN, ASM_T_DIR_ENTRY, - ASM_T_DIR_GLOBAL, ASM_T_DIR_ARG_NUM, ASM_T_DIR_ARG_STRING, - ASM_T_DIR_ARG_SYM, ASM_T_DIR_ARG_R, ASM_T_DIR_ARG_W, ASM_T_DIR_ARG_X, - ASM_T_DIR_END, ASM_T_LABEL, ASM_T_PRED, ASM_T_INST, - ASM_T_PREG, ASM_T_REG, ASM_T_REG_RA, ASM_T_REG_SP, - ASM_T_REG_FP, ASM_T_LIT, ASM_T_SYM, ASM_T_PEXP - }; -} - -#endif diff --git a/simX/include/core.h b/simX/include/core.h deleted file mode 100644 index 19087a18..00000000 --- a/simX/include/core.h +++ /dev/null @@ -1,206 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef __CORE_H -#define __CORE_H - -#include -#include -#include -#include -#include - -#include "types.h" -#include "archdef.h" -#include "enc.h" -#include "mem.h" -#include "debug.h" - -#include "Vcache_simX.h" -#include "verilated.h" - -#ifdef VCD_OUTPUT -#include -#endif - - -#include "trace.h" - -namespace Harp { - -#ifdef EMU_INSTRUMENTATION - void reg_doWrite(Word cpuId, Word regNum); - void reg_doRead(Word cpuId, Word regNum); -#endif - - template class Reg { - public: - Reg(): val(0), cpuId(0), regNum(0) {} - Reg(Word c, Word n): val(0), cpuId(c), regNum(n) {} - Reg(Word c, Word n, T v): val(v), cpuId(c), regNum(n) {} - - - Reg &operator=(T r) { if (regNum) {val = r; doWrite();} return *this; } - - operator T() const { doRead(); return val; } - - void trunc(Size s) { - Word mask((~0ull >> (sizeof(Word)-s)*8)); - val &= mask; - } - - T val; - private: - Word cpuId, regNum; -// T val; - -#ifdef EMU_INSTRUMENTATION - /* Access size here is 8, representing the register size of 64-bit cores. */ - void doWrite() const { reg_doWrite(cpuId, regNum); } - void doRead() const { reg_doRead(cpuId, regNum); } -#else - void doWrite() const {} - void doRead() const {} -#endif - }; - - // Entry in the IPDOM Stack - struct DomStackEntry { - DomStackEntry( - unsigned p, const std::vector > >& m, - std::vector &tm, Word pc - ): pc(pc), fallThrough(false), uni(false) - { - std::cout << "DomStackEntry TMASK: "; - for (unsigned i = 0; i < m.size(); ++i) - { - std::cout << " " << (!bool(m[i][p]) && tm[i]); - tmask.push_back(!bool(m[i][p]) && tm[i]); - } - std::cout << "\n"; - } - - DomStackEntry(const std::vector &tmask): - tmask(tmask), fallThrough(true), uni(false) {} - - std::vector tmask; - Word pc; - bool fallThrough; - bool uni; - }; - - struct vtype - { - int vill; - int vediv; - int vsew; - int vlmul; - }; - - class Warp; - - class Core { - public: - Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id=0); - - Vcache_simX * cache_simulator; - VerilatedVcdC * m_trace; - - bool renameTable[32][32]; - bool vecRenameTable[32]; - bool stallWarp[32]; - bool foundSchedule; - - trace_inst_t inst_in_fetch; - trace_inst_t inst_in_decode; - trace_inst_t inst_in_scheduler; - trace_inst_t inst_in_exe; - trace_inst_t inst_in_lsu; - trace_inst_t inst_in_wb; - - bool release_warp; - int release_warp_num; - - bool interrupt(Word r0); - bool running() const; - - void getCacheDelays(trace_inst_t *); - void warpScheduler(); - void fetch(); - void decode(); - void scheduler(); - void execute_unit(); - void load_store(); - void writeback(); - - void step(); - - void printStats() const; - - const ArchDef &a; - Decoder &iDec; - MemoryUnit &mem; - - Word interruptEntry; - - unsigned long steps; - unsigned long num_cycles; - unsigned long num_instructions; - std::vector w; - std::map > b; // Barriers - int schedule_w; - }; - - class Warp { - public: - Warp(Core *c, Word id=0); - - void step(trace_inst_t *); - bool interrupt(Word r0); - bool running() const { return activeThreads; } -#ifdef EMU_INSTRUMENTATION - bool getSupervisorMode() const { return supervisorMode; } -#endif - - void printStats() const; - - struct MemAccess { - MemAccess(bool w, Word a): wr(w), addr(a) {} - bool wr; - Word addr; - }; - std::vector memAccesses; - -// private: - Core *core; - - Word pc, shadowPc, id; - Size activeThreads, shadowActiveThreads; - std::vector > > reg; - std::vector > > pred; - std::vector > csr; - - std::vector tmask, shadowTmask; - std::stack domStack; - - std::vector shadowReg; - std::vector shadowPReg; - - //Vector CSR - struct vtype vtype; //both of them are XLEN WIDE - int vl; //both of them are XLEN WIDE - Word VLEN; //Total vector length - - std::vector>> vreg; // 32 vector registers - - bool interruptEnable, shadowInterruptEnable; - bool supervisorMode, shadowSupervisorMode; - bool spawned; - - unsigned long steps, insts, loads, stores; - - friend class Instruction; - }; -} - -#endif diff --git a/simX/include/debug.h b/simX/include/debug.h deleted file mode 100644 index 81b428e7..00000000 --- a/simX/include/debug.h +++ /dev/null @@ -1,47 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Spring 2013 -*******************************************************************************/ -#ifndef __DEBUG_H -#define __DEBUG_H - -//#define USE_DEBUG 9 - -#ifdef USE_DEBUG -#include - -#define D(lvl, x) do { \ - using namespace std; \ - if ((lvl) <= USE_DEBUG) { \ - cout << "DEBUG " << __FILE__ << ':' << dec << __LINE__ << ": " \ - << x << endl; \ - } \ -} while(0) - -#define DPH(lvl, x) do { \ - using namespace std; \ - if ((lvl) <= USE_DEBUG) { \ - cout << "DEBUG " << __FILE__ << ':' << dec << __LINE__ << ": " \ - << x; \ - } \ -} while(0) - -#define DPN(lvl, x) do { \ - using namespace std; \ - if ((lvl) <= USE_DEBUG) { \ - cout << x; \ - } \ -} while(0) - -#define D_RAW(x) do { \ - std::cout << x; \ -} while (0) - -#else - -#define D(lvl, x) do {} while(0) -#define DPH(lvl, x) do {} while(0) -#define DPN(lvl, x) do {} while(0) -#define D_RAW(x) do {} while(0) -#endif - -#endif diff --git a/simX/include/enc.h b/simX/include/enc.h deleted file mode 100644 index 37b054ee..00000000 --- a/simX/include/enc.h +++ /dev/null @@ -1,80 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef __ENC_H -#define __ENC_H - -#include - -#include "types.h" -#include "instruction.h" -#include "obj.h" -#include "trace.h" - // } trace_inst_t; - -namespace Harp { - class DataChunk; - class TextChunk; - class Ref; - - class Encoder { - public: - Encoder() {} - virtual ~Encoder() {} - - virtual Size encode(Ref *&ref, std::vector &v, Size n, - Instruction &i) = 0; - void encodeChunk(DataChunk &dest, const TextChunk &src); - }; - - class Decoder { - public: - Decoder() : haveRefs(false) {} - Decoder(const std::vector &refVec) : haveRefs(true) { - setRefs(refVec); - } - - virtual ~Decoder() {} - - void setRefs(const std::vector &); - void clearRefs() { refMap.clear(); } - virtual Instruction *decode(const std::vector &v, Size &n, trace_inst_t * trace_inst) = 0; - virtual Instruction *decode(const std::vector &v, Size &n) = 0; - void decodeChunk(TextChunk &dest, const DataChunk &src); - protected: - bool haveRefs; - std::map refMap; - }; - - class WordDecoder : public Decoder { - public: - WordDecoder(const ArchDef &); - virtual Instruction *decode(const std::vector &v, Size &n, trace_inst_t * trace_inst); - virtual Instruction *decode(const std::vector &v, Size &n) { - printf("Not implemented\n"); - return nullptr; - } - - private: - Size n, o, r, p, i1, i2, i3; - Word oMask, rMask, pMask, i1Mask, i2Mask, i3Mask; - - // FARES - Size inst_s, opcode_s, reg_s, func3_s; - Size shift_opcode, shift_rd, shift_rs1, shift_rs2, shift_func3, shift_func7; - Size shift_j_u_immed, shift_s_b_immed, shift_i_immed; - //Vector - Size shift_vset, shift_vset_immed, shift_vmask, shift_vmop, shift_vnf, shift_func6; - Size vmask_s, mop_s; - - - - - Word reg_mask, func3_mask, func7_mask, opcode_mask, i_immed_mask, - s_immed_mask, b_immed_mask, u_immed_mask, j_immed_mask, v_immed_mask, func6_mask; - - }; - -} - -#endif diff --git a/simX/include/harpfloat.h b/simX/include/harpfloat.h deleted file mode 100644 index c8cdbcfc..00000000 --- a/simX/include/harpfloat.h +++ /dev/null @@ -1,123 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#include -#include - -#include "types.h" - -#ifdef DEBUG -#include - -#define DEBUGMSG(x) do { \ - std::cout << __FILE__ << ':' <<__LINE__ << ": " << x << '\n'; \ -} while(0) -#else -#define DEBUGMSG(x) do { } while(0) -#endif - -namespace Harp { - // This class serves to handle the strange-precision floating point that can - // crop up in HARP. - class Float { - public: - Float(Word_u bin, Size n): sz(n) { - DEBUGMSG("Float(0x" << std::hex << bin << ", " << std::dec << n << ')'); - - bool sign(bin >> (n*8 - 1)); - - Size expSz; - if (n < 4) { - expSz = 5; - } else if (n < 8) { - expSz = 8; - } else { - expSz = 11; - } - - Size sigSz = n*8 - expSz - 1; - - DEBUGMSG(" exp: " << std::dec << expSz << - " bits, sig: " << std::dec << sigSz << " bits."); - - int exp = (bin >> sigSz) & ((1< Word_u: " << d); - Size expSz; - if (sz < 4) { - expSz = 5; - } else if (sz < 8) { - expSz = 8; - } else { - expSz = 11; - } - - Size sigSz = 8*sz - expSz - 1; - - bool sign(d < 0); - - bool inf(std::isinf(d)), zero(d == 0.0); - int exp; - - if (!inf && !zero) exp = floor(log2(fabs(d))); - - Word_u rval; - if (inf) { - // Infinity - DEBUGMSG(" Inf."); - rval = ((1llu<double " << d); return d; } - - private: - double d; - Size sz; - }; -} diff --git a/simX/include/help.h b/simX/include/help.h deleted file mode 100644 index 802bf501..00000000 --- a/simX/include/help.h +++ /dev/null @@ -1,37 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef __HELP_H -#define __HELP_H - -/* Help messages. */ -namespace HarpTools { - namespace Help { - const char *mainHelp = - "--help, -h, no arguments\n" - " Print this message.\n" - "-E, --emu; -A, --asm; -L, --ld; -D, --disasm\n" - " Invoke the emulator, assembler, linker, and disassembler, " - "respectively.\n" - " --help\n" - " Display contextual help.\n", - *emuHelp = "HARP Emulator command line arguments:\n" - " -c, --core RAM image\n" - " -a, --arch Architecture string\n" - " -s, --stats Print stats on exit.\n" - " -b, --basic Disable virtual memory.\n" - " -i, --batch Disable console input.\n", - *asmHelp = "HARP Assembler command line arguments:\n" - " -a, --arch \n" - " -o, --output \n", - *ldHelp = "HARP Linker command line arguments:\n" - " -o, --output \n" - " -a, --arch \n" - " -f, --format \n" - " --offset \n", - *disasmHelp = "HARP Disassembler command line arguments:\n" - " -a, --arch Architecture string.\n" - " -o, --output Output filename.\n"; - } -} -#endif diff --git a/simX/include/instruction.h b/simX/include/instruction.h deleted file mode 100644 index 4df21236..00000000 --- a/simX/include/instruction.h +++ /dev/null @@ -1,175 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef __INSTRUCTION_H -#define __INSTRUCTION_H - -#include -#include -#include - -#include "types.h" -#include "trace.h" -namespace Harp { - class Warp; - class Ref; - - enum Opcode - { - NOP = 0, - R_INST = 51, - L_INST = 3, - I_INST = 19, - S_INST = 35, - B_INST = 99, - LUI_INST = 55, - AUIPC_INST = 23, - JAL_INST = 111, - JALR_INST = 103, - SYS_INST = 115, - TRAP = 0x7f, - FENCE = 0x0f, - PJ_INST = 0x7b, - GPGPU = 0x6b, - VSET_ARITH = 0x57, - VL = 0x7, - VS = 0x27, - }; - - enum InstType { N_TYPE, R_TYPE, I_TYPE, S_TYPE, B_TYPE, U_TYPE, J_TYPE, V_TYPE}; - - // We build a table of instruction information out of this. - struct InstTableEntry_t { - const char *opString; - bool controlFlow, relAddress, allSrcArgs, privileged; - InstType iType; - - }; - - static std::map instTable = - { - {Opcode::NOP, {"nop" , false, false, false, false, InstType::N_TYPE }}, - {Opcode::R_INST, {"r_type", false, false, false, false, InstType::R_TYPE }}, - {Opcode::L_INST, {"load" , false, false, false, false, InstType::I_TYPE }}, - {Opcode::I_INST, {"i_type", false, false, false, false, InstType::I_TYPE }}, - {Opcode::S_INST, {"store" , false, false, false, false, InstType::S_TYPE }}, - {Opcode::B_INST, {"branch", true , false, false, false, InstType::B_TYPE }}, - {Opcode::LUI_INST, {"lui" , false, false, false, false, InstType::U_TYPE }}, - {Opcode::AUIPC_INST, {"auipc" , false, false, false, false, InstType::U_TYPE }}, - {Opcode::JAL_INST, {"jal" , true , false, false, false, InstType::J_TYPE }}, - {Opcode::JALR_INST, {"jalr" , true , false, false, false, InstType::I_TYPE }}, - {Opcode::SYS_INST, {"SYS" , true , false, false, false, InstType::I_TYPE }}, - {Opcode::TRAP, {"TRAP" , true , false, false, false, InstType::I_TYPE }}, - {Opcode::FENCE, {"fence" , true , false, false, false, InstType::I_TYPE }}, - {Opcode::PJ_INST, {"pred j", true , false, false, false, InstType::R_TYPE }}, - {Opcode::GPGPU, {"gpgpu" , false, false, false, false, InstType::R_TYPE }}, - {Opcode::VSET_ARITH, {"vsetvl" , false, false, false, false, InstType::V_TYPE }}, - {Opcode::VL, {"vl" , false, false, false, false, InstType::V_TYPE }}, - {Opcode::VS, {"vs" , false, false, false, false, InstType::V_TYPE }} - }; - - static const Size MAX_REG_SOURCES(3); - static const Size MAX_PRED_SOURCES(2); - - class Instruction; - - struct DivergentBranchException {}; - struct DomainException {}; - - std::ostream &operator<<(std::ostream &, Instruction &); - - class Instruction { - public: - Instruction() : - predicated(false), nRsrc(0), nPsrc(0), immsrcPresent(false), - rdestPresent(false), pdestPresent(false), refLiteral(NULL) - { - } - - void executeOn(Warp &warp, trace_inst_t *); - friend std::ostream &operator<<(std::ostream &, Instruction &); - - /* Setters used to "craft" the instruction. */ - void setOpcode (Opcode opc) { op = opc; } - void setPred (RegNum pReg) { predicated = true; pred = pReg; } - void setDestReg (RegNum destReg) { rdestPresent = true; rdest = destReg; } - void setSrcReg (RegNum srcReg) { rsrc[nRsrc++] = srcReg; } - void setFunc3 (Word func3) { this->func3 = func3; } - void setFunc7 (Word func7) { this->func7 = func7; } - void setDestPReg(RegNum dPReg) { pdestPresent = true; pdest = dPReg; } - void setSrcPReg (RegNum srcPReg) { psrc[nPsrc++] = srcPReg; } - Word *setSrcImm () { immsrcPresent = true; immsrc = 0xa5; return &immsrc;} - void setSrcImm (Word srcImm) { immsrcPresent = true; immsrc = srcImm; } - void setImmRef (Ref &r) { refLiteral = &r; } - void setVsetImm (Word vset_imm) { if(vset_imm) vsetImm = true; else vsetImm = false; } - void setVlsWidth (Word width) { vlsWidth = width; } - void setVmop( Word mop) { vMop = mop; } - void setVnf(Word nf) { vNf = nf; } - void setVmask(Word mask) { vmask = mask; } - void setVs3(Word vs) { vs3 = vs; } - void setvlmul(Word lmul) { vlmul = pow(2, lmul); } - void setvsew(Word sew) { vsew = pow(2, 3+sew); } - void setvediv(Word ediv) { vediv = pow(2,ediv); } - void setFunc6(Word func6) { this->func6 = func6; } - - /* Getters used by encoders. */ - Opcode getOpcode() const { return op; } - bool hasPred() const { return predicated; } - RegNum getPred() const { return pred; } - RegNum getNRSrc() const { return nRsrc; } - RegNum getRSrc(RegNum i) const { return rsrc[i]; } - RegNum getNPSrc() const { return nPsrc; } - RegNum getPSrc(RegNum i) const { return psrc[i]; } - bool hasRDest() const { return rdestPresent; } - RegNum getRDest() const { return rdest; } - bool hasPDest() const { return pdestPresent; } - RegNum getPDest() const { return pdest; } - bool hasImm() const { return immsrcPresent; } - Word getImm() const { return immsrc; } - bool hasRefLiteral() const { return refLiteral != NULL; } - Ref *getRefLiteral() const { return refLiteral; } - bool getVsetImm() const { return vsetImm; } - Word getVlsWidth() const { return vlsWidth; } - Word getVmop() const { return vMop; } - Word getvNf() const { return vNf; } - bool getVmask() const { return vmask; } - Word getVs3() const { return vs3; } - Word getvlmul() const { return vlmul; } - Word getvsew() const { return vsew; } - Word getvediv() const { return vediv; } - - - /* Getters used as table lookup. */ - bool hasRelImm() const { return (*(instTable.find(op))).second.relAddress; } - - private: - bool predicated; - RegNum pred; - Opcode op; - int nRsrc, nPsrc; - RegNum rsrc[MAX_REG_SOURCES], psrc[MAX_PRED_SOURCES]; - bool immsrcPresent; - Word immsrc; - Word func3; - Word func7; - bool rdestPresent, pdestPresent; - RegNum rdest, pdest; - Ref *refLiteral; - - //Vector - bool vsetImm, vmask; - Word vlsWidth, vMop, vNf, vs3, vlmul, vsew, vediv, func6; - - public: - - - }; -} - -#endif - - // static struct InstTableEntry { - // const char *opString; - // bool controlFlow, relAddress, allSrcArgs, privileged; - // InstType iType; - // }; \ No newline at end of file diff --git a/simX/include/obj.h b/simX/include/obj.h deleted file mode 100644 index c64607bb..00000000 --- a/simX/include/obj.h +++ /dev/null @@ -1,210 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef __OBJ_H -#define __OBJ_H - -#include -#include -#include -#include -#include - -#include "types.h" -#include "archdef.h" -#include "instruction.h" -#include "enc.h" -#include "asm-tokens.h" - -namespace Harp { - class Decoder; - class Encoder; - - class Ref { - public: - std::string name; - Ref(const std::string &n, bool r, Size ib = 0): - name(n), bound(false), relative(r), ibase(ib) { } - virtual ~Ref() { } - virtual void bind(Addr addr, Addr base = 0) = 0; - virtual Addr getAddr() const = 0; - - bool bound, relative; - Size ibase; - }; - - /* Used in not-yet-encoded code objects, plain old data. */ - class SimpleRef : public Ref { - public: - SimpleRef(const std::string &name, Addr &addr, bool rel = false) : - Ref(name, rel), addr(addr) { } - virtual void bind(Addr addr, Addr base = 0) { - std::cout << "Attempted to bind a SimpleRef.\n"; - std::abort(); - } - virtual Addr getAddr() const { return this->addr; } - Byte *getAddrPtr() { return (Byte*)&addr; } - - private: - Addr &addr; - }; - -// /* Used in already-encoded code objects. */ -// class OffsetRef : public Ref { -// public: -// OffsetRef( -// const std::string &name, std::vector &v, Size offset, Size bits, -// Size ws, bool rel = false, Size ibase = 0 -// ) : Ref(name, rel, ibase), data(v), offset(offset), bits(bits), wordSize(ws) -// {} - -// virtual void bind(Addr addr, Addr base = 0) { -// Size bytes(bits/8), remainder(bits%8); - -// if (relative) { -// addr = addr - base; -// Word_s addr_s(addr); -// if ((addr_s >> bits) != ~0ull && (addr_s >> bits) != 0) goto noFit; -// } else { -// Addr mask = (1ull< mask) goto noFit; -// } - -// { Byte mask((1ull<>= 8; -// } -// data[offset+i] &= ~mask; -// data[offset+i] |= (addr&mask); -// bound = true; -// } - -// return; -// noFit: -// std::cout << "Attempt to bind a " << bits << "-bit " -// << (relative?"":"non-") << "relative symbol to an address" -// " it cannot reach.\n"; -// std::abort(); -// } - -// virtual Addr getAddr() const { -// Size bytes = bits/8, remainder = bits%8; -// Byte mask((1< &data; -// Size offset, bits, wordSize; -// }; - -// class Chunk { -// public: -// Chunk(std::string n, Size a = 0, Word f = 0) : -// name(n), alignment(a), bound(false), flags(f), global(false) {} -// virtual ~Chunk() { for (Size i = 0; i < refs.size(); i++) delete refs[i]; } -// void bind(Addr a) { address = a; bound = true; } -// void setGlobal() { global = true; } -// bool isGlobal() const { return global; } -// std::string name; -// Size alignment; -// bool bound, global; -// Addr address; -// Word flags; -// std::vector refs; -// }; - -// class TextChunk : public Chunk { -// public: -// TextChunk(std::string n, Size a = 0, Word f = 0) -// : Chunk(n, a, f), instructions() {} - -// ~TextChunk() { -// for (Size i = 0; i < instructions.size(); i++) delete instructions[i]; -// } - -// std::vector instructions; -// }; - -// class DataChunk : public Chunk { -// public: -// DataChunk(std::string n, Size a = 0, Word f = 0) -// : Chunk(n, a, f), size(0), contents() {} -// Size size; -// std::vector contents; /* 0 to size bytes in length. */ -// }; - -// class Obj { -// public: -// ~Obj() { for (Size i = 0; i < chunks.size(); i++) delete chunks[i]; } -// std::vector chunks; -// Size entry; -// }; - -// class DynObj : public Obj { -// public: -// std::vector deps; -// }; - -// class ObjReader { -// public: -// virtual Obj *read(std::istream &input) = 0; -// private: -// }; - -// class ObjWriter { -// public: -// virtual void write(std::ostream &output, const Obj &o) = 0; -// private: -// }; - -// class AsmReader : public ObjReader { -// public: -// AsmReader(ArchDef arch) : -// wordSize(arch.getWordSize()), nRegs(arch.getNRegs()) {} -// virtual Obj *read(std::istream &input); -// private: -// Size wordSize, nRegs; - -// // Operand type sequences indexed by argument class -// enum ArgType {AT_END, AT_REG, AT_PREG, AT_LIT}; -// static ArgType operandtype_table[][4]; // ArgClass -> ArgType[arg_idx] -// }; - -// class HOFReader : public ObjReader { -// public: -// HOFReader(ArchDef &arch) : arch(arch) {} -// Obj *read(std::istream &input); -// private: -// const ArchDef &arch; -// }; - -// class AsmWriter : public ObjWriter { -// public: -// AsmWriter(ArchDef arch): wordSize(arch.getWordSize()) {} -// virtual void write(std::ostream &output, const Obj &obj); -// private: -// Size wordSize; -// }; - -// class HOFWriter : public ObjWriter { -// public: -// HOFWriter(ArchDef &arch) : arch(arch) {} -// virtual void write(std::ostream &output, const Obj &obj); -// private: -// const ArchDef &arch; -// }; -} - -#endif diff --git a/simX/include/qsim-harp.h b/simX/include/qsim-harp.h deleted file mode 100644 index 589a42e0..00000000 --- a/simX/include/qsim-harp.h +++ /dev/null @@ -1,169 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef EMU_INSTRUMENTATION -#define EMU_INSTRUMENTATION -#endif - -#ifndef __QSIM_HARP_H -#define __QSIM_HARP_H - -#include "types.h" -#include "core.h" -#include "enc.h" -#include "instruction.h" -#include "mem.h" -#include "obj.h" -#include "archdef.h" - -#include -#include -#include -#include - -#include - -namespace Harp { - class OSDomain { - public: - OSDomain(Harp::ArchDef &arch, std::string imgFile); - - bool idle(unsigned i) const { return cpus[i].idle(); } - int get_tid(unsigned i) const { return cpus[i].get_tid(); } - bool get_prot(unsigned i) const { return cpus[i].get_prot(); } - - int get_n() const { return cpus.size(); } - - uint64_t run(unsigned i, uint64_t n) { return cpus[i].run(n); } - void connect_console(std::ostream &s); - void timer_interrupt() { /* TODO: timer convention */ } - void interrupt(unsigned i, int vec) { cpus[i].interrupt(vec); } - bool booted(unsigned i) const { return cpus[i].booted(); } - void save_state(const char* state_file); - - template - void set_atomic_cb - (T *p, typename Qsim::OSDomain::atomic_cb_obj::atomic_cb_t f) - { - atomic_cbs.push_back(new Qsim::OSDomain::atomic_cb_obj(p, f)); - } - - template - void set_inst_cb - (T* p, typename Qsim::OSDomain::inst_cb_obj::inst_cb_t f) - { - inst_cbs.push_back(new Qsim::OSDomain::inst_cb_obj(p, f)); - } - - template - void set_int_cb - (T *p, typename Qsim::OSDomain::int_cb_obj::int_cb_t f) - { - int_cbs.push_back(new Qsim::OSDomain::int_cb_obj(p, f)); - } - - template - void set_mem_cb - (T *p, typename Qsim::OSDomain::mem_cb_obj::mem_cb_t f) - { - mem_cbs.push_back(new Qsim::OSDomain::mem_cb_obj(p, f)); - } - - template - void set_magic_cb - (T *p, typename Qsim::OSDomain::magic_cb_obj::magic_cb_t f) - { - magic_cbs.push_back(new Qsim::OSDomain::magic_cb_obj(p, f)); - } - - template - void set_io_cb - (T *p, typename Qsim::OSDomain::io_cb_obj::io_cb_t f) - { /* Do nothing. We have no separate IO address space. */ } - - template - void set_reg_cb - (T *p, typename Qsim::OSDomain::reg_cb_obj::reg_cb_t f) - { - reg_cbs.push_back(new Qsim::OSDomain::reg_cb_obj(p, f)); - } - - template void mem_rd(T& d, uint64_t paddr); - template void mem_rd_virt(unsigned i, T& d, uint64_t vaddr); - template void mem_wr(T& d, uint64_t paddr); - template void mem_wr_virt(unsigned i, T& d, uint64_t vaddr); - - static OSDomain *osDomain; - - bool do_atomic(unsigned c) { - bool rval(false); - for (unsigned i = 0; i < atomic_cbs.size(); ++i) - if ((*atomic_cbs[i])(c)) rval = true; - return rval; - } - - void do_inst(unsigned c, uint64_t va, uint64_t pa, uint8_t l, - const uint8_t *b, enum inst_type t) - { - for (unsigned i = 0; i < inst_cbs.size(); ++i) - (*inst_cbs[i])(c, va, pa, l, b, t); - } - - void do_int(unsigned c, int v) { - for (unsigned i = 0; i < int_cbs.size(); ++i) - (*int_cbs[i])(c, v); - } - - void do_mem(unsigned c, uint64_t va, uint64_t pa, uint8_t s, bool w) { - for (unsigned i = 0; i < mem_cbs.size(); ++i) - (*mem_cbs[i])(c, va, pa, s, w); - } - - bool do_magic(unsigned c, uint64_t r0) { - bool rval(false); - for (unsigned i = 0; i < magic_cbs.size(); ++i) - if ((*magic_cbs[i])(c, r0)) rval = true; - return rval; - } - - void do_reg(unsigned c, int r, uint8_t s, bool w) { - for (unsigned i = 0; i < reg_cbs.size(); ++i) - (*reg_cbs[i])(c, r, s, w); - } - - private: - class Cpu { - public: - Cpu(Harp::OSDomain &osd); - Cpu(): dec(NULL), core(NULL) {} - ~Cpu() { if (dec) delete dec; if (core) delete core; } - - bool idle() const { return false; } - int get_tid() const { return 0; } - bool get_prot() const { return core->getSupervisorMode(); } - uint64_t run(uint64_t n); - void interrupt(int vec) { core->interrupt(vec); } - bool booted() const { return core->running(); } - - Harp::OSDomain *osd; - Harp::Decoder *dec; - Harp::Core *core; - }; - - Harp::ArchDef arch; - - Harp::MemoryUnit mu; - Harp::RamMemDevice ram; - Harp::ConsoleMemDevice *console; - - std::vector cpus; - - std::vector atomic_cbs; - std::vector inst_cbs; - std::vector int_cbs; - std::vector mem_cbs; - std::vector magic_cbs; - std::vector reg_cbs; - }; -}; -#endif diff --git a/simX/include/types.h b/simX/include/types.h deleted file mode 100644 index 43598a7d..00000000 --- a/simX/include/types.h +++ /dev/null @@ -1,25 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef __TYPES_H -#define __TYPES_H - -#include - -namespace Harp { - typedef uint8_t Byte; - typedef uint32_t Word; - typedef uint32_t Word_u; - typedef int32_t Word_s; - - typedef Word_u Addr; - typedef Word_u Size; - - typedef unsigned RegNum; - typedef unsigned ThdNum; - - enum MemFlags {RD_USR = 1, WR_USR = 2, EX_USR = 4, - RD_SUP = 8, WR_SUP = 16, EX_SUP = 32}; -} - -#endif diff --git a/simX/include/util.h b/simX/include/util.h deleted file mode 100644 index a7935ca7..00000000 --- a/simX/include/util.h +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef __UTIL_H -#define __UTIL_H - -#include -#include "types.h" - -namespace Harp { - Word_u bytesToWord(const Byte *b, Size wordSize); - void wordToBytes(Byte *b, Word_u w, Size wordSize); - Word_u flagsToWord(bool r, bool w, bool x); - void wordToFlags(bool &r, bool &w, bool &x, Word_u f); - - class OutOfBytes {}; - - Byte readByte(const std::vector &b, Size &n); - Word_u readWord(const std::vector &b, Size &n, Size wordSize); - void writeByte(std::vector &p, Size &n, Byte b); - void writeWord(std::vector &p, Size &n, Size wordSize, Word w); -} - -#endif diff --git a/simX/instr.cpp b/simX/instr.cpp new file mode 100644 index 00000000..9f439a31 --- /dev/null +++ b/simX/instr.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "instr.h" + +using namespace vortex; + +void Instr::setVlmul(Word lmul) { + vlmul_ = std::pow(2, lmul); +} + +void Instr::setVsew(Word sew) { + vsew_ = std::pow(2, 3+sew); +} + +void Instr::setVediv(Word ediv) { + vediv_ = std::pow(2,ediv); +} \ No newline at end of file diff --git a/simX/instr.h b/simX/instr.h new file mode 100644 index 00000000..19412a20 --- /dev/null +++ b/simX/instr.h @@ -0,0 +1,137 @@ +#pragma once + +#include "types.h" +#include "trace.h" + +namespace vortex { + +class Warp; + +enum Opcode { + NOP = 0, + R_INST = 0x33, + L_INST = 0x3, + I_INST = 0x13, + S_INST = 0x23, + B_INST = 0x63, + LUI_INST = 0x37, + AUIPC_INST= 0x17, + JAL_INST = 0x6f, + JALR_INST = 0x67, + SYS_INST = 0x73, + FENCE = 0x0f, + PJ_INST = 0x7b, + GPGPU = 0x6b, + VSET_ARITH= 0x57, + VL = 0x7, + VS = 0x27, +}; + +enum InstType { + N_TYPE, + R_TYPE, + I_TYPE, + S_TYPE, + B_TYPE, + U_TYPE, + J_TYPE, + V_TYPE +}; + +class Instr { +public: + Instr() + : predicated_(false) + , nRsrc_(0) + , nPsrc_(0) + , hasImmSrc_(false) + , hasRDest_(false) + , hasPDest_(false) + {} + + friend std::ostream &operator<<(std::ostream &, Instr &); + + /* Setters used to "craft" the instruction. */ + void setOpcode(Opcode opcode) { opcode_ = opcode; } + void setPred(RegNum pReg) { predicated_ = true; pred_ = pReg; } + void setDestReg(RegNum destReg) { hasRDest_ = true; rdest_ = destReg; } + void setSrcReg(RegNum srcReg) { rsrc_[nRsrc_++] = srcReg; } + void setFunc3(Word func3) { func3_ = func3; } + void setFunc7(Word func7) { func7_ = func7; } + void setSrcImm(Word srcImm) { hasImmSrc_ = true; immsrc_ = srcImm; } + void setVsetImm(Word vset_imm) { if(vset_imm) vsetImm_ = true; else vsetImm_ = false; } + void setVlsWidth(Word width) { vlsWidth_ = width; } + void setVmop(Word mop) { vMop_ = mop; } + void setVnf(Word nf) { vNf_ = nf; } + void setVmask(Word mask) { vmask_ = mask; } + void setVs3(Word vs) { vs3_ = vs; } + void setVlmul(Word lmul); + void setVsew(Word sew); + void setVediv(Word ediv); + void setFunc6(Word func6) { func6_ = func6; } + void setPrivileged(bool privileged) { privileged_ = privileged; } + + /* Getters used by encoders. */ + Opcode getOpcode() const { return opcode_; } + Word getFunc3() const { return func3_; } + Word getFunc6() const { return func6_; } + Word getFunc7() const { return func7_; } + RegNum getNRSrc() const { return nRsrc_; } + RegNum getRSrc(RegNum i) const { return rsrc_[i]; } + bool hasRDest() const { return hasRDest_; } + RegNum getRDest() const { return rdest_; } + bool hasPDest() const { return hasPDest_; } + RegNum getPDest() const { return pdest_; } + bool hasPred() const { return predicated_; } + RegNum getPred() const { return pred_; } + bool hasImm() const { return hasImmSrc_; } + Word getImm() const { return immsrc_; } + bool getVsetImm() const { return vsetImm_; } + Word getVlsWidth() const { return vlsWidth_; } + Word getVmop() const { return vMop_; } + Word getvNf() const { return vNf_; } + bool getVmask() const { return vmask_; } + Word getVs3() const { return vs3_; } + Word getVlmul() const { return vlmul_; } + Word getVsew() const { return vsew_; } + Word getVediv() const { return vediv_; } + bool getPrivileged() const { return privileged_; } + +private: + + enum { + MAX_REG_SOURCES = 3 + }; + + Opcode opcode_; + bool predicated_; + RegNum pred_; + int nRsrc_; + int nPsrc_; + RegNum rsrc_[MAX_REG_SOURCES]; + bool hasImmSrc_; + Word immsrc_; + Word func3_; + Word func7_; + bool hasRDest_; + bool hasPDest_; + RegNum rdest_; + RegNum pdest_; + bool privileged_; + + //Vector + bool vsetImm_; + bool vmask_; + Word vlsWidth_; + Word vMop_; + Word vNf_; + Word vs3_; + Word vlmul_; + Word vsew_; + Word vediv_; + Word func6_; +}; + +std::ostream &operator<<(std::ostream &, Instr &); + +} \ No newline at end of file diff --git a/simX/instruction.cpp b/simX/instruction.cpp deleted file mode 100644 index 0c04eb6a..00000000 --- a/simX/instruction.cpp +++ /dev/null @@ -1,2284 +0,0 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#include -#include -#include - -#include "include/instruction.h" -#include "include/obj.h" -#include "include/core.h" -#include "include/harpfloat.h" -#include "include/debug.h" - -#ifdef EMU_INSTRUMENTATION -#include "include/qsim-harp.h" -#endif -#include -#include -#include - -using namespace Harp; -using namespace std; - -/* It is important that this stays consistent with the Harp::Instruction::Opcode - enum. */ - -ostream &Harp::operator<<(ostream &os, Instruction &inst) { - os << dec; - - // if (inst.predicated) { - // os << "@p" << dec << inst.pred << " ? "; - // } - - // os << inst.instTable[inst.op].opString << ' '; - // if (inst.rdestPresent) os << "%r" << dec << inst.rdest << ' '; - // if (inst.pdestPresent) os << "@p" << inst.pdest << ' '; - // for (int i = 0; i < inst.nRsrc; i++) { - // os << "%r" << dec << inst.rsrc[i] << ' '; - // } - // for (int i = 0; i < inst.nPsrc; i++) { - // os << "@p" << dec << inst.psrc[i] << ' '; - // } - // if (inst.immsrcPresent) { - // if (inst.refLiteral) os << inst.refLiteral->name; - // else os << "#0x" << hex << inst.immsrc; - // } - - os << instTable[inst.op].opString; - - return os; -} - -bool checkUnanimous(unsigned p, const std::vector>> &m, - const std::vector &tm) { - bool same; - unsigned i; - for (i = 0; i < m.size(); ++i) { - if (tm[i]) { - same = m[i][p]; - break; - } - } - if (i == m.size()) - throw DivergentBranchException(); - - //std::cout << "same: " << same << " with -> "; - for (; i < m.size(); ++i) { - if (tm[i]) { - //std::cout << " " << (bool(m[i][p])); - if (same != (bool(m[i][p]))) { - //std::cout << " FALSE\n"; - return false; - } - } - } - //std::cout << " TRUE\n"; - return true; -} - -Word signExt(Word w, Size bit, Word mask) { - if (w >> (bit - 1)) - w |= ~mask; - return w; -} - -void upload(unsigned *addr, char *src, int size, Warp &c) { - - // cerr << "WRITING FINAL: " << *src << " size: " << size << "\n"; - - unsigned current_addr = *addr; - - c.core->mem.write(current_addr, size, c.supervisorMode, 4); - current_addr += 4; - - for (int i = 0; i < size; i++) { - unsigned value = src[i] & 0x000000FF; - // cerr << "UPLOAD: (" << hex << current_addr << dec << ") = " << hex << ( value) << dec << "\n"; - c.core->mem.write(current_addr, value, c.supervisorMode, 1); - current_addr += 1; - } - - current_addr += (current_addr % 4); - - *addr = current_addr; -} - -void download(unsigned *addr, char *drain, Warp &c) { - unsigned current_addr = *addr; - - int size; - - size = c.core->mem.read(current_addr, c.supervisorMode); - current_addr += 4; - - for (int i = 0; i < size; i++) { - unsigned read_word = c.core->mem.read(current_addr, c.supervisorMode); - char read_byte = (char)(read_word & 0x000000FF); - drain[i] = read_byte; - current_addr += 1; - } - - current_addr += (current_addr % 4); - - *addr = current_addr; -} - -void downloadAlloc(unsigned *addr, char **drain_ptr, int &size, Warp &c) { - unsigned current_addr = *addr; - - size = c.core->mem.read(current_addr, c.supervisorMode); - current_addr += 4; - - (*drain_ptr) = (char *)malloc(size); - - char *drain = *drain_ptr; - - for (int i = 0; i < size; i++) { - unsigned read_word = c.core->mem.read(current_addr, c.supervisorMode); - char read_byte = (char)(read_word & 0x000000FF); - drain[i] = read_byte; - current_addr += 1; - } - - *addr = current_addr; -} - -#define CLOSE 1 -#define ISATTY 2 -#define LSEEK 3 -#define READ 4 -#define WRITE 5 -#define FSTAT 6 -#define OPEN 7 - -void trap_to_simulator(Warp &c) { - unsigned read_buffer = 0x71000000; - unsigned write_buffer = 0x72000000; - - // cerr << "RAW READ BUFFER:\n"; - // for (int i = 0; i < 10; i++) - // { - // unsigned new_addr = read_buffer + (4*i); - // unsigned data_read = c.core->mem.read(new_addr, c.supervisorMode); - // cerr << hex << new_addr << ": " << data_read << "\n"; - // } - - for (int j = 0; j < 1024; j += 1) { - c.core->mem.write((write_buffer + j), 0, c.supervisorMode, 1); - } - - int command; - download(&read_buffer, (char *)&command, c); - - // cerr << "Command: " << hex << command << dec << '\n'; - - switch (command) { - case (CLOSE): { - cerr << "trap_to_simulator: CLOSE not supported yet\n"; - } break; - case (ISATTY): { - - cerr << "trap_to_simulator: ISATTY not supported yet\n"; - } break; - case (LSEEK): { - - // cerr << "trap_to_simulator: LSEEK not supported yet\n"; - int fd; - int offset; - int whence; - - download(&read_buffer, (char *)&fd, c); - download(&read_buffer, (char *)&offset, c); - download(&read_buffer, (char *)&whence, c); - - int retval = lseek(fd, offset, whence); - - upload(&write_buffer, (char *)&retval, sizeof(int), c); - - } break; - case (READ): { - - // cerr << "trap_to_simulator: READ not supported yet\n"; - int file; - unsigned ptr; - int len; - - download(&read_buffer, (char *)&file, c); - download(&read_buffer, (char *)&ptr, c); - download(&read_buffer, (char *)&len, c); - - char *buff = (char *)malloc(len); - - int ret = read(file, buff, len); - - for (int i = 0; i < len; i++) { - c.core->mem.write(ptr, buff[i], c.supervisorMode, 1); - ptr++; - } - // c.core->mem.write(ptr, 0, c.supervisorMode, 1); - free(buff); - - } break; - case (WRITE): { - int file; - download(&read_buffer, (char *)&file, c); - - file = (file == 1) ? 2 : file; - - int size; - char *buf; - downloadAlloc(&read_buffer, &buf, size, c); - - int e = write(file, buf, size); - free(buf); - } break; - case (FSTAT): { - cerr << "trap_to_simulator: FSTAT not supported yet\n"; - int file; - download(&read_buffer, (char *)&file, c); - - struct stat st; - fstat(file, &st); - - fprintf(stderr, "------------------------\n"); - fprintf(stderr, "Size of struct: %ld\n", sizeof(struct stat)); - fprintf(stderr, "st_mode: %x\n", st.st_mode); - fprintf(stderr, "st_dev: %ld\n", st.st_dev); - fprintf(stderr, "st_ino: %ld\n", st.st_ino); - fprintf(stderr, "st_uid: %x\n", st.st_uid); - fprintf(stderr, "st_gid: %x\n", st.st_gid); - fprintf(stderr, "st_rdev: %ld\n", st.st_rdev); - fprintf(stderr, "st_size: %ld\n", st.st_size); - fprintf(stderr, "st_blksize: %ld\n", st.st_blksize); - fprintf(stderr, "st_blocks: %ld\n", st.st_blocks); - fprintf(stderr, "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n"); - - upload(&write_buffer, (char *)&st.st_mode, sizeof(st.st_mode), c); - upload(&write_buffer, (char *)&st.st_dev, sizeof(st.st_dev), c); - // upload(&write_buffer, (char *) &st.st_uid , sizeof(st.st_uid), c); - // upload(&write_buffer, (char *) &st.st_gid , sizeof(st.st_gid), c); - // upload(&write_buffer, (char *) &st.st_size , sizeof(st.st_size), c); - // upload(&write_buffer, (char *) &st.st_blksize , sizeof(st.st_blksize), c); - // upload(&write_buffer, (char *) &st.st_blocks , sizeof(st.st_blocks), c); - - // upload(&write_buffer, (char *) &st, sizeof(struct stat), c); - - cerr << "RAW Write BUFFER:\n"; - unsigned original_write_buffer = 0x72000000; - for (int i = 0; i < 10; i++) { - unsigned new_addr = original_write_buffer + (4 * i); - unsigned data_read = c.core->mem.read(new_addr, c.supervisorMode); - cerr << hex << new_addr << ": " << data_read << "\n"; - } - } break; - case (OPEN): { - // cerr << "$$$$$$$$$$$$$$$$$$$$$$$$$ OPEN FROM simX\n"; - unsigned name_ptr; - unsigned flags; - unsigned mode; - - download(&read_buffer, (char *)&name_ptr, c); - download(&read_buffer, (char *)&flags, c); - download(&read_buffer, (char *)&mode, c); - - char buffer[255]; - unsigned read_word; - char read_byte; - - int curr_ind = 0; - - read_word = c.core->mem.read(name_ptr, c.supervisorMode); - read_byte = (char)(read_word & 0x000000FF); - while (read_byte != 0) { - buffer[curr_ind] = read_byte; - - name_ptr++; - curr_ind++; - read_word = c.core->mem.read(name_ptr, c.supervisorMode); - read_byte = (char)(read_word & 0x000000FF); - } - buffer[curr_ind] = 0; - - int fd = open(buffer, flags, mode); - - // fprintf(stderr, "Name: --%s-- and fd: %d\n", buffer, fd); - - upload(&write_buffer, (char *)&fd, sizeof(int), c); - - } break; - default: { - - cerr << "trap_to_simulator: DEFAULT not supported yet\n"; - } break; - } -} - -void Instruction::executeOn(Warp &c, trace_inst_t *trace_inst) { - /* If I try to execute a privileged instruction in user mode, throw an - exception 3. */ - if (instTable[op].privileged && !c.supervisorMode) { - D(3, "INTERRUPT SUPERVISOR\n"); - c.interrupt(3); - return; - } - - bool is_vec = false; - - Size nextActiveThreads = c.activeThreads; - Size wordSz = c.core->a.getWordSize(); - Word nextPc = c.pc; - Word VLMAX; - - c.memAccesses.clear(); - - unsigned real_pc = c.pc - 4; - if ((real_pc) == (0x70000000)) { - trap_to_simulator(c); - } - - bool sjOnce(true), // Has not yet split or joined once. - pcSet(false); // PC has already been set - for (Size t = 0; t < c.activeThreads; t++) { - vector> ®(c.reg[t]); - vector> &pReg(c.pred[t]); - stack &domStack(c.domStack); - - bool split = (op == GPGPU) && (func3 == 2); - bool join = (op == GPGPU) && (func3 == 3); - - bool is_gpgpu = (op == GPGPU); - - bool is_tmc = is_gpgpu && (func3 == 0); - bool is_wspawn = is_gpgpu && (func3 == 1); - bool is_barrier = is_gpgpu && (func3 == 4); - bool is_split = is_gpgpu && (func3 == 2); - bool is_join = is_gpgpu && (func3 == 3); - - bool gpgpu_zero = (is_tmc || is_barrier || is_wspawn) && (t != 0); - - bool not_active = !c.tmask[t]; - - if (not_active || gpgpu_zero) { - continue; - } - - ++c.insts; - - Word memAddr; - Word shift_by; - Word shamt; - Word temp; - Word data_read; - int op1, op2; - bool m_exten; - // std::cout << "op = " << op << "\n"; - // std::cout << "R_INST: " << R_INST << "\n"; - int num_to_wspawn; - switch (op) { - - case NOP: - //std::cout << "NOP_INST\n"; - break; - case R_INST: - // std::cout << "R_INST\n"; - m_exten = func7 & 0x1; - if (m_exten) { - // std::cout << "FOUND A MUL/DIV\n"; - - switch (func3) { - case 0: - // MUL - D(3, "MUL: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - reg[rdest] = ((int)reg[rsrc[0]]) * ((int)reg[rsrc[1]]); - break; - case 1: - // MULH - D(3, "MULH: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - { - int64_t first = (int64_t)reg[rsrc[0]]; - if (reg[rsrc[0]] & 0x80000000) { - first = first | 0xFFFFFFFF00000000; - } - int64_t second = (int64_t)reg[rsrc[1]]; - if (reg[rsrc[1]] & 0x80000000) { - second = second | 0xFFFFFFFF00000000; - } - // cout << "mulh: " << std::dec << first << " * " << second; - uint64_t result = first * second; - reg[rdest] = (result >> 32) & 0xFFFFFFFF; - // cout << " = " << result << " or " << reg[rdest] << "\n"; - } - break; - case 2: - // MULHSU - D(3, "MULHSU: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - { - int64_t first = (int64_t)reg[rsrc[0]]; - if (reg[rsrc[0]] & 0x80000000) { - first = first | 0xFFFFFFFF00000000; - } - int64_t second = (int64_t)reg[rsrc[1]]; - reg[rdest] = ((first * second) >> 32) & 0xFFFFFFFF; - } - break; - case 3: - // MULHU - D(3, "MULHU: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - { - uint64_t first = (uint64_t)reg[rsrc[0]]; - uint64_t second = (uint64_t)reg[rsrc[1]]; - // cout << "MULHU\n"; - reg[rdest] = ((first * second) >> 32) & 0xFFFFFFFF; - } - break; - case 4: - // DIV - D(3, "DIV: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - if (reg[rsrc[1]] == 0) { - reg[rdest] = -1; - break; - } - // cout << "dividing: " << dec << ((int) reg[rsrc[0]]) << " / " << ((int) reg[rsrc[1]]); - reg[rdest] = ((int)reg[rsrc[0]]) / ((int)reg[rsrc[1]]); - // cout << " = " << ((int) reg[rdest]) << "\n"; - break; - case 5: - // DIVU - D(3, "DIVU: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - if (reg[rsrc[1]] == 0) { - reg[rdest] = -1; - break; - } - reg[rdest] = ((uint32_t)reg[rsrc[0]]) / ((uint32_t)reg[rsrc[1]]); - break; - case 6: - // REM - D(3, "REM: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - if (reg[rsrc[1]] == 0) { - reg[rdest] = reg[rsrc[0]]; - break; - } - reg[rdest] = ((int)reg[rsrc[0]]) % ((int)reg[rsrc[1]]); - break; - case 7: - // REMU - D(3, "REMU: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - if (reg[rsrc[1]] == 0) { - reg[rdest] = reg[rsrc[0]]; - break; - } - reg[rdest] = ((uint32_t)reg[rsrc[0]]) % ((uint32_t)reg[rsrc[1]]); - break; - default: - cout << "unsupported MUL/DIV instr\n"; - std::abort(); - } - } else { - // std::cout << "NORMAL R-TYPE\n"; - switch (func3) { - case 0: - if (func7) { - D(3, "SUBI: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - reg[rdest] = reg[rsrc[0]] - reg[rsrc[1]]; - reg[rdest].trunc(wordSz); - } else { - D(3, "ADDI: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - reg[rdest] = reg[rsrc[0]] + reg[rsrc[1]]; - reg[rdest].trunc(wordSz); - } - break; - case 1: - D(3, "SLLI: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - reg[rdest] = reg[rsrc[0]] << reg[rsrc[1]]; - reg[rdest].trunc(wordSz); - break; - case 2: - D(3, "SLTI: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - if (int(reg[rsrc[0]]) < int(reg[rsrc[1]])) { - reg[rdest] = 1; - } else { - reg[rdest] = 0; - } - break; - case 3: - D(3, "SLTU: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - if (Word_u(reg[rsrc[0]]) < Word_u(reg[rsrc[1]])) { - reg[rdest] = 1; - } else { - reg[rdest] = 0; - } - break; - case 4: - D(3, "XORI: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - reg[rdest] = reg[rsrc[0]] ^ reg[rsrc[1]]; - break; - case 5: - if (func7) { - D(3, "SRLI: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - reg[rdest] = int(reg[rsrc[0]]) >> int(reg[rsrc[1]]); - reg[rdest].trunc(wordSz); - } else { - D(3, "SRLU: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - reg[rdest] = Word_u(reg[rsrc[0]]) >> Word_u(reg[rsrc[1]]); - reg[rdest].trunc(wordSz); - } - break; - case 6: - D(3, "ORI: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - reg[rdest] = reg[rsrc[0]] | reg[rsrc[1]]; - break; - case 7: - D(3, "ANDI: r" << rdest << " <- r" << rsrc[0] << ", r" << rsrc[1]); - reg[rdest] = reg[rsrc[0]] & reg[rsrc[1]]; - break; - default: - cout << "ERROR: UNSUPPORTED R INST\n"; - std::abort(); - } - } - break; - case L_INST: - memAddr = ((reg[rsrc[0]] + immsrc) & 0xFFFFFFFC); - shift_by = ((reg[rsrc[0]] + immsrc) & 0x00000003) * 8; - data_read = c.core->mem.read(memAddr, c.supervisorMode); - trace_inst->is_lw = true; - trace_inst->mem_addresses[t] = memAddr; - switch (func3) { - case 0: - // LBI - D(3, "LBI: r" << rdest << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - reg[rdest] = signExt((data_read >> shift_by) & 0xFF, 8, 0xFF); - break; - case 1: - // LWI - D(3, "LWI: r" << rdest << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - reg[rdest] = signExt((data_read >> shift_by) & 0xFFFF, 16, 0xFFFF); - break; - case 2: - // LDI - D(3, "LDI: r" << rdest << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - reg[rdest] = int(data_read & 0xFFFFFFFF); - break; - case 4: - // LBU - D(3, "LBU: r" << rdest << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - reg[rdest] = unsigned((data_read >> shift_by) & 0xFF); - break; - case 5: - // LWU - D(3, "LWU: r" << rdest << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - reg[rdest] = unsigned((data_read >> shift_by) & 0xFFFF); - break; - default: - cout << "ERROR: UNSUPPORTED L INST\n"; - std::abort(); - c.memAccesses.push_back(Warp::MemAccess(false, memAddr)); - } - D(3, "LOAD MEM ADDRESS: " << std::hex << memAddr); - D(3, "LOAD MEM DATA: " << std::hex << data_read); - break; - case I_INST: - //std::cout << "I_INST\n"; - switch (func3) { - case 0: - // ADDI - D(3, "ADDI: r" << rdest << " <- r" << rsrc[0] << ", imm=" << immsrc); - reg[rdest] = reg[rsrc[0]] + immsrc; - reg[rdest].trunc(wordSz); - break; - case 2: - // SLTI - D(3, "SLTI: r" << rdest << " <- r" << rsrc[0] << ", imm=" << immsrc); - if (int(reg[rsrc[0]]) < int(immsrc)) { - reg[rdest] = 1; - } else { - reg[rdest] = 0; - } - break; - case 3: - // SLTIU - D(3, "SLTIU: r" << rdest << " <- r" << rsrc[0] << ", imm=" << immsrc); - op1 = (unsigned)reg[rsrc[0]]; - if (unsigned(reg[rsrc[0]]) < unsigned(immsrc)) { - reg[rdest] = 1; - } else { - reg[rdest] = 0; - } - break; - case 4: - // XORI - D(3, "XORI: r" << rdest << " <- r" << rsrc[0] << ", imm=0x" << hex << immsrc); - reg[rdest] = reg[rsrc[0]] ^ immsrc; - break; - case 6: - // ORI - D(3, "ORI: r" << rdest << " <- r" << rsrc[0] << ", imm=0x" << hex << immsrc); - reg[rdest] = reg[rsrc[0]] | immsrc; - break; - case 7: - // ANDI - D(3, "ANDI: r" << rdest << " <- r" << rsrc[0] << ", imm=0x" << hex << immsrc); - reg[rdest] = reg[rsrc[0]] & immsrc; - break; - case 1: - // SLLI - D(3, "SLLI: r" << rdest << " <- r" << rsrc[0] << ", imm=0x" << hex << immsrc); - reg[rdest] = reg[rsrc[0]] << immsrc; - reg[rdest].trunc(wordSz); - break; - case 5: - if ((func7 == 0)) { - // SRLI - D(3, "SRLI: r" << rdest << " <- r" << rsrc[0] << ", imm=" << immsrc); - bool isNeg = ((0x80000000 & reg[rsrc[0]])) > 0; - Word result = Word_u(reg[rsrc[0]]) >> Word_u(immsrc); - reg[rdest] = result; - reg[rdest].trunc(wordSz); - } else { - // SRAI - D(3, "SRAI: r" << rdest << " <- r" << rsrc[0] << ", imm=" << immsrc); - op1 = reg[rsrc[0]]; - op2 = immsrc; - reg[rdest] = op1 >> op2; - reg[rdest].trunc(wordSz); - } - break; - default: - cout << "ERROR: UNSUPPORTED L INST\n"; - std::abort(); - } - break; - case S_INST: - ++c.stores; - memAddr = reg[rsrc[0]] + immsrc; - trace_inst->is_sw = true; - trace_inst->mem_addresses[t] = memAddr; - // //std::cout << "FUNC3: " << func3 << "\n"; - if ((memAddr == 0x00010000) && (t == 0)) { - unsigned num = reg[rsrc[1]]; - fprintf(stderr, "%c", (char)reg[rsrc[1]]); - break; - } - switch (func3) { - case 0: - // SB - D(3, "SB: r" << rsrc[1] << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - c.core->mem.write(memAddr, reg[rsrc[1]] & 0x000000FF, c.supervisorMode, 1); - break; - case 1: - // SH - D(3, "SH: r" << rsrc[1] << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - c.core->mem.write(memAddr, reg[rsrc[1]], c.supervisorMode, 2); - break; - case 2: - // SD - D(3, "SD: r" << rsrc[1] << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - c.core->mem.write(memAddr, reg[rsrc[1]], c.supervisorMode, 4); - break; - default: - cout << "ERROR: UNSUPPORTED S INST\n"; - std::abort(); - } - D(3, "STORE MEM ADDRESS: " << std::hex << memAddr); - c.memAccesses.push_back(Warp::MemAccess(true, memAddr)); -#ifdef EMU_INSTRUMENTATION - Harp::OSDomain::osDomain->do_mem(0, memAddr, c.core->mem.virtToPhys(memAddr), 8, true); -#endif - break; - case B_INST: - trace_inst->stall_warp = true; - switch (func3) { - case 0: - // BEQ - D(3, "BEQ: r" << rsrc[0] << ", r" << rsrc[1] << ", imm=" << (int)immsrc); - if (int(reg[rsrc[0]]) == int(reg[rsrc[1]])) { - if (!pcSet) - nextPc = (c.pc - 4) + immsrc; - pcSet = true; - } - break; - case 1: - // BNE - D(3, "BNE: r" << rsrc[0] << ", r" << rsrc[1] << ", imm=" << (int)immsrc); - if (int(reg[rsrc[0]]) != int(reg[rsrc[1]])) { - if (!pcSet) - nextPc = (c.pc - 4) + immsrc; - pcSet = true; - } - break; - case 4: - // BLT - D(3, "BLT: r" << rsrc[0] << ", r" << rsrc[1] << ", imm=" << (int)immsrc); - if (int(reg[rsrc[0]]) < int(reg[rsrc[1]])) { - if (!pcSet) - nextPc = (c.pc - 4) + immsrc; - pcSet = true; - } - break; - case 5: - // BGE - D(3, "BGE: r" << rsrc[0] << ", r" << rsrc[1] << ", imm=" << (int)immsrc); - if (int(reg[rsrc[0]]) >= int(reg[rsrc[1]])) { - if (!pcSet) - nextPc = (c.pc - 4) + immsrc; - pcSet = true; - } - break; - case 6: - // BLTU - D(3, "BLTU: r" << rsrc[0] << ", r" << rsrc[1] << ", imm=" << (int)immsrc); - if (Word_u(reg[rsrc[0]]) < Word_u(reg[rsrc[1]])) { - if (!pcSet) - nextPc = (c.pc - 4) + immsrc; - pcSet = true; - } - break; - case 7: - // BGEU - D(3, "BGEU: r" << rsrc[0] << ", r" << rsrc[1] << ", imm=" << (int)immsrc); - if (Word_u(reg[rsrc[0]]) >= Word_u(reg[rsrc[1]])) { - if (!pcSet) - nextPc = (c.pc - 4) + immsrc; - pcSet = true; - } - break; - } - break; - case LUI_INST: - D(3, "LUI: r" << rdest << " <- imm=0x" << hex << immsrc); - reg[rdest] = (immsrc << 12) & 0xfffff000; - break; - case AUIPC_INST: - D(3, "AUIPC: r" << rdest << " <- imm=0x" << hex << immsrc); - reg[rdest] = ((immsrc << 12) & 0xfffff000) + (c.pc - 4); - break; - case JAL_INST: - D(3, "JAL: r" << rdest << " <- imm=" << (int)immsrc); - trace_inst->stall_warp = true; - if (!pcSet) - nextPc = (c.pc - 4) + immsrc; - if (!pcSet) { /*std::cout << "JAL... SETTING PC: " << nextPc << "\n"; */ - } - if (rdest != 0) { - reg[rdest] = c.pc; - } - pcSet = true; - break; - case JALR_INST: - D(3, "JALR: r" << rdest << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - trace_inst->stall_warp = true; - if (!pcSet) - nextPc = reg[rsrc[0]] + immsrc; - if (!pcSet) { /*std::cout << "JALR... SETTING PC: " << nextPc << "\n";*/ - } - if (rdest != 0) { - reg[rdest] = c.pc; - } - pcSet = true; - break; - case SYS_INST: - D(3, "SYS_INST: r" << rdest << " <- r" << rsrc[0] << ", imm=" << (int)immsrc); - temp = reg[rsrc[0]]; - // GPGPU CSR extension - if (immsrc == 0x20) { - // ThreadID - reg[rdest] = t; - D(3, "vx_threadID: r" << rdest << "=" << reg[rdest]); - } else if (immsrc == 0x21) { - // WarpID - reg[rdest] = c.id; - D(3, "vx_warpID: r" << rdest << "=" << reg[rdest]); - } else if (immsrc == 0x22) { - // WarpNum - reg[rdest] = c.id; - D(3, "vx_warpNum: r" << rdest << "=" << reg[rdest]); - } else if (immsrc == 0x25) { - // NumInsts - reg[rdest] = c.core->num_instructions; - D(3, "vx_getInst: r" << rdest << "=" << reg[rdest]); - } else if (immsrc == 0x26) { - // NumCycles - reg[rdest] = c.core->num_cycles; - D(3, "vx_getCycle: r" << rdest << "=" << reg[rdest]); - } else { - switch (func3) { - case 0: - if (immsrc < 2) { - //std::cout << "INTERRUPT ECALL/EBREAK\n"; - nextActiveThreads = 0; - c.spawned = false; - // c.interrupt(0); - } - break; - case 1: - // printf("Case 1\n"); - if (rdest != 0) { - reg[rdest] = c.csr[immsrc & 0x00000FFF]; - } - c.csr[immsrc & 0x00000FFF] = temp; - break; - case 2: - // printf("Case 2\n"); - if (rdest != 0) { - // printf("Reading from CSR: %d = %d\n", (immsrc & 0x00000FFF), c.csr[immsrc & 0x00000FFF]); - reg[rdest] = c.csr[immsrc & 0x00000FFF]; - } - // printf("Writing to CSR --> %d = %d\n", immsrc, (temp | c.csr[immsrc & 0x00000FFF])); - c.csr[immsrc & 0x00000FFF] = temp | c.csr[immsrc & 0x00000FFF]; - break; - case 3: - // printf("Case 3\n"); - if (rdest != 0) { - reg[rdest] = c.csr[immsrc & 0x00000FFF]; - } - c.csr[immsrc & 0x00000FFF] = temp & (~c.csr[immsrc & 0x00000FFF]); - break; - case 5: - // printf("Case 5\n"); - if (rdest != 0) { - reg[rdest] = c.csr[immsrc & 0x00000FFF]; - } - c.csr[immsrc & 0x00000FFF] = rsrc[0]; - break; - case 6: - // printf("Case 6\n"); - if (rdest != 0) { - reg[rdest] = c.csr[immsrc & 0x00000FFF]; - } - c.csr[immsrc & 0x00000FFF] = rsrc[0] | c.csr[immsrc & 0x00000FFF]; - break; - case 7: - // printf("Case 7\n"); - if (rdest != 0) { - reg[rdest] = c.csr[immsrc & 0x00000FFF]; - } - c.csr[immsrc & 0x00000FFF] = rsrc[0] & (~c.csr[immsrc & 0x00000FFF]); - break; - default: - break; - } - } - break; - case TRAP: - D(3, "TRAP"); - nextActiveThreads = 0; - c.interrupt(0); - break; - case FENCE: - D(3, "FENCE"); - break; - case PJ_INST: - D(3, "PJ_INST: r" << rsrc[0] << ", r" << rsrc[1]); - if (reg[rsrc[0]]) { - if (!pcSet) - nextPc = reg[rsrc[1]]; - pcSet = true; - } - break; - case GPGPU: - switch (func3) { - case 1: - // WSPAWN - D(3, "WSPAWN: r" << rsrc[0] << ", r" << rsrc[1]); - trace_inst->wspawn = true; - if (sjOnce) { - sjOnce = false; - num_to_wspawn = std::min(reg[rsrc[0]], c.core->a.getNWarps()); - D(0, "Spawning " << num_to_wspawn << " new warps at PC: " << hex << reg[rsrc[1]]); - for (unsigned i = 1; i < num_to_wspawn; ++i) { - Warp &newWarp(c.core->w[i]); - { - newWarp.pc = reg[rsrc[1]]; - for (int kk = 0; kk < newWarp.tmask.size(); kk++) { - if (kk == 0) { - newWarp.tmask[kk] = true; - } else { - newWarp.tmask[kk] = false; - } - } - newWarp.activeThreads = 1; - newWarp.supervisorMode = false; - newWarp.spawned = true; - } - } - break; - } - break; - case 2: { - // SPLIT - D(3, "SPLIT: r" << pred); - trace_inst->stall_warp = true; - if (sjOnce) { - sjOnce = false; - if (checkUnanimous(pred, c.reg, c.tmask)) { - D(3, "Unanimous pred: " << pred << " val: " << reg[pred] << "\n"); - DomStackEntry e(c.tmask); - e.uni = true; - c.domStack.push(e); - break; - } - D(3, "Split: Original TM: "); - for (auto y : c.tmask) - D(3, y << " "); - - DomStackEntry e(pred, c.reg, c.tmask, c.pc); - c.domStack.push(c.tmask); - c.domStack.push(e); - for (unsigned i = 0; i < e.tmask.size(); ++i) { - c.tmask[i] = !e.tmask[i] && c.tmask[i]; - } - - D(3, "Split: New TM"); - for (auto y : c.tmask) - D(3, y << " "); - D(3, "Split: Pushed TM PC: " << hex << e.pc << dec << "\n"); - for (auto y : e.tmask) - D(3, y << " "); - } - break; - } - case 3: - // JOIN - D(3, "JOIN"); - if (sjOnce) { - sjOnce = false; - if (!c.domStack.empty() && c.domStack.top().uni) { - D(2, "Uni branch at join"); - printf("NEW DOMESTACK: \n"); - c.tmask = c.domStack.top().tmask; - c.domStack.pop(); - break; - } - if (!c.domStack.top().fallThrough) { - if (!pcSet) { - nextPc = c.domStack.top().pc; - D(3, "join: NOT FALLTHROUGH PC: " << hex << nextPc << dec); - } - pcSet = true; - } - - D(3, "Join: Old TM: "); - for (auto y : c.tmask) - D(3, y << " "); - cout << "\n"; - c.tmask = c.domStack.top().tmask; - - D(3, "Join: New TM: "); - for (auto y : c.tmask) - D(3, y << " "); - - c.domStack.pop(); - } - break; - case 4: - trace_inst->stall_warp = true; - // is_barrier - break; - case 0: - // TMC - D(3, "TMC: r" << rsrc[0]); - trace_inst->stall_warp = true; - nextActiveThreads = std::min(reg[rsrc[0]], c.core->a.getNThds()); - { - for (int ff = 0; ff < c.tmask.size(); ff++) { - if (ff < nextActiveThreads) { - c.tmask[ff] = true; - } else { - c.tmask[ff] = false; - } - } - } - if (nextActiveThreads == 0) { - c.spawned = false; - } - break; - default: - cout << "ERROR: UNSUPPORTED GPGPU INSTRUCTION " << *this << "\n"; - } - break; - case VSET_ARITH: - D(3, "VSET_ARITH"); - is_vec = true; - switch (func3) { - case 0: // vector-vector - trace_inst->vs1 = rsrc[0]; - trace_inst->vs2 = rsrc[1]; - trace_inst->vd = rdest; - switch (func6) { - case 0: { - is_vec = true; - D(3, "Addition " << rsrc[0] << " " << rsrc[1] << " Dest:" << rdest); - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - vector> &mask = c.vreg[0]; - - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *mask_ptr = (uint8_t *)mask[i].val; - uint8_t value = (*mask_ptr & 0x1); - if (vmask || (!vmask && value)) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = *first_ptr + *second_ptr; - D(3, "Adding " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - } - - } else if (c.vtype.vsew == 16) { - - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *mask_ptr = (uint16_t *)mask[i].val; - uint16_t value = (*mask_ptr & 0x1); - if (vmask || (!vmask && value)) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = *first_ptr + *second_ptr; - D(3, "Adding " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - } - } else if (c.vtype.vsew == 32) { - D(3, "Doing 32 bit vector addition"); - for (Word i = 0; i < c.vl; i++) { - int *mask_ptr = (int *)mask[i].val; - int value = (*mask_ptr & 0x1); - if (vmask || (!vmask && value)) { - int *first_ptr = (int *)vr1[i].val; - int *second_ptr = (int *)vr2[i].val; - int result = *first_ptr + *second_ptr; - D(3, "Adding " << *first_ptr << " + " << *second_ptr << " = " << result); - - int *result_ptr = (int *)vd[i].val; - *result_ptr = result; - } - } - } - - D(3, "Vector Register state after addition:" << flush); - for (int i = 0; i < c.vreg.size(); i++) { - for (int j = 0; j < c.vreg[0].size(); j++) { - if (c.vtype.vsew == 8) { - uint8_t *ptr_val = (uint8_t *)c.vreg[i][j].val; - D(3, "reg[" << i << "][" << j << "] = " << *ptr_val); - } else if (c.vtype.vsew == 16) { - uint16_t *ptr_val = (uint16_t *)c.vreg[i][j].val; - D(3, "reg[" << i << "][" << j << "] = " << *ptr_val); - } else if (c.vtype.vsew == 32) { - uint32_t *ptr_val = (uint32_t *)c.vreg[i][j].val; - D(3, "reg[" << i << "][" << j << "] = " << *ptr_val); - } - } - } - - D(3, "After vector register state after addition" << flush); - } break; - case 24: //vmseq - { - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = (*first_ptr == *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = (*first_ptr == *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 32) { - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t result = (*first_ptr == *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - } - - } break; - case 25: //vmsne - { - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = (*first_ptr != *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = (*first_ptr != *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 32) { - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t result = (*first_ptr != *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - } - - } break; - case 26: //vmsltu - { - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = (*first_ptr < *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = (*first_ptr < *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 32) { - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t result = (*first_ptr < *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - } - - } break; - case 27: //vmslt - { - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (int8_t i = 0; i < c.vl; i++) { - int8_t *first_ptr = (int8_t *)vr1[i].val; - int8_t *second_ptr = (int8_t *)vr2[i].val; - int8_t result = (*first_ptr < *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - int8_t *result_ptr = (int8_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 16) { - for (int16_t i = 0; i < c.vl; i++) { - int16_t *first_ptr = (int16_t *)vr1[i].val; - int16_t *second_ptr = (int16_t *)vr2[i].val; - int16_t result = (*first_ptr < *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - int16_t *result_ptr = (int16_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 32) { - for (int32_t i = 0; i < c.vl; i++) { - int32_t *first_ptr = (int32_t *)vr1[i].val; - int32_t *second_ptr = (int32_t *)vr2[i].val; - int32_t result = (*first_ptr < *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - int32_t *result_ptr = (int32_t *)vd[i].val; - *result_ptr = result; - } - } - } break; - case 28: //vmsleu - { - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = (*first_ptr <= *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = (*first_ptr <= *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 32) { - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t result = (*first_ptr <= *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - } - } break; - case 29: //vmsle - { - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (int8_t i = 0; i < c.vl; i++) { - int8_t *first_ptr = (int8_t *)vr1[i].val; - int8_t *second_ptr = (int8_t *)vr2[i].val; - int8_t result = (*first_ptr <= *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - int8_t *result_ptr = (int8_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 16) { - for (int16_t i = 0; i < c.vl; i++) { - int16_t *first_ptr = (int16_t *)vr1[i].val; - int16_t *second_ptr = (int16_t *)vr2[i].val; - int16_t result = (*first_ptr <= *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - int16_t *result_ptr = (int16_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 32) { - for (int32_t i = 0; i < c.vl; i++) { - int32_t *first_ptr = (int32_t *)vr1[i].val; - int32_t *second_ptr = (int32_t *)vr2[i].val; - int32_t result = (*first_ptr <= *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - int32_t *result_ptr = (int32_t *)vd[i].val; - *result_ptr = result; - } - } - } break; - case 30: //vmsgtu - { - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = (*first_ptr > *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = (*first_ptr > *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 32) { - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t result = (*first_ptr > *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - } - } break; - case 31: //vmsgt - { - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (int8_t i = 0; i < c.vl; i++) { - int8_t *first_ptr = (int8_t *)vr1[i].val; - int8_t *second_ptr = (int8_t *)vr2[i].val; - int8_t result = (*first_ptr > *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - int8_t *result_ptr = (int8_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 16) { - for (int16_t i = 0; i < c.vl; i++) { - int16_t *first_ptr = (int16_t *)vr1[i].val; - int16_t *second_ptr = (int16_t *)vr2[i].val; - int16_t result = (*first_ptr > *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - int16_t *result_ptr = (int16_t *)vd[i].val; - *result_ptr = result; - } - - } else if (c.vtype.vsew == 32) { - for (int32_t i = 0; i < c.vl; i++) { - int32_t *first_ptr = (int32_t *)vr1[i].val; - int32_t *second_ptr = (int32_t *)vr2[i].val; - int32_t result = (*first_ptr > *second_ptr) ? 1 : 0; - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - int32_t *result_ptr = (int32_t *)vd[i].val; - *result_ptr = result; - } - } - } break; - } - break; - case 2: { - trace_inst->vs1 = rsrc[0]; - trace_inst->vs2 = rsrc[1]; - trace_inst->vd = rdest; - Word VLMAX = (c.vtype.vlmul * c.VLEN) / c.vtype.vsew; - - switch (func6) { - case 24: //vmandnot - { - D(3, "vmandnot"); - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t first_value = (*first_ptr & 0x1); - uint8_t second_value = (*second_ptr & 0x1); - uint8_t result = (first_value & !second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t first_value = (*first_ptr & 0x1); - uint16_t second_value = (*second_ptr & 0x1); - uint16_t result = (first_value & !second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t first_value = (*first_ptr & 0x1); - uint32_t second_value = (*second_ptr & 0x1); - uint32_t result = (first_value & !second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - for (Word i = c.vl; i < VLMAX; i++) { - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 25: //vmand - { - D(3, "vmand"); - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t first_value = (*first_ptr & 0x1); - uint8_t second_value = (*second_ptr & 0x1); - uint8_t result = (first_value & second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t first_value = (*first_ptr & 0x1); - uint16_t second_value = (*second_ptr & 0x1); - uint16_t result = (first_value & second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - - for (uint16_t i = c.vl; i < VLMAX; i++) { - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t first_value = (*first_ptr & 0x1); - uint32_t second_value = (*second_ptr & 0x1); - uint32_t result = (first_value & second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - - for (Word i = c.vl; i < VLMAX; i++) { - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 26: //vmor - { - D(3, "vmor"); - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t first_value = (*first_ptr & 0x1); - uint8_t second_value = (*second_ptr & 0x1); - uint8_t result = (first_value | second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 16) { - uint16_t *result_ptr; - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t first_value = (*first_ptr & 0x1); - uint16_t second_value = (*second_ptr & 0x1); - uint16_t result = (first_value | second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - } else if (c.vtype.vsew == 32) { - uint32_t *result_ptr; - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t first_value = (*first_ptr & 0x1); - uint32_t second_value = (*second_ptr & 0x1); - uint32_t result = (first_value | second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - D(3, "VLMAX: " << VLMAX); - for (Word i = c.vl; i < VLMAX; i++) { - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 27: //vmxor - { - D(3, "vmxor"); - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - uint8_t *result_ptr; - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t first_value = (*first_ptr & 0x1); - uint8_t second_value = (*second_ptr & 0x1); - uint8_t result = (first_value ^ second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - } else if (c.vtype.vsew == 16) { - uint16_t *result_ptr; - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t first_value = (*first_ptr & 0x1); - uint16_t second_value = (*second_ptr & 0x1); - uint16_t result = (first_value ^ second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - uint32_t *result_ptr; - - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t first_value = (*first_ptr & 0x1); - uint32_t second_value = (*second_ptr & 0x1); - uint32_t result = (first_value ^ second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - for (Word i = c.vl; i < VLMAX; i++) { - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 28: //vmornot - { - D(3, "vmornot"); - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t first_value = (*first_ptr & 0x1); - uint8_t second_value = (*second_ptr & 0x1); - uint8_t result = (first_value | !second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t first_value = (*first_ptr & 0x1); - uint16_t second_value = (*second_ptr & 0x1); - uint16_t result = (first_value | !second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t first_value = (*first_ptr & 0x1); - uint32_t second_value = (*second_ptr & 0x1); - uint32_t result = (first_value | !second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - for (Word i = c.vl; i < VLMAX; i++) { - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 29: //vmnand - { - D(3, "vmnand"); - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t first_value = (*first_ptr & 0x1); - uint8_t second_value = (*second_ptr & 0x1); - uint8_t result = !(first_value & second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - uint8_t *result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t first_value = (*first_ptr & 0x1); - uint16_t second_value = (*second_ptr & 0x1); - uint16_t result = !(first_value & second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - - for (uint16_t i = c.vl; i < VLMAX; i++) { - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t first_value = (*first_ptr & 0x1); - uint32_t second_value = (*second_ptr & 0x1); - uint32_t result = !(first_value & second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - - for (Word i = c.vl; i < VLMAX; i++) { - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 30: //vmnor - { - D(3, "vmnor"); - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - uint8_t *result_ptr; - - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t first_value = (*first_ptr & 0x1); - uint8_t second_value = (*second_ptr & 0x1); - uint8_t result = !(first_value | second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - } else if (c.vtype.vsew == 16) { - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t first_value = (*first_ptr & 0x1); - uint16_t second_value = (*second_ptr & 0x1); - uint16_t result = !(first_value | second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - uint16_t *result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t first_value = (*first_ptr & 0x1); - uint32_t second_value = (*second_ptr & 0x1); - uint32_t result = !(first_value | second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - for (Word i = c.vl; i < VLMAX; i++) { - uint32_t *result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 31: //vmxnor - { - D(3, "vmxnor"); - uint8_t *result_ptr; - - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t first_value = (*first_ptr & 0x1); - uint8_t second_value = (*second_ptr & 0x1); - uint8_t result = !(first_value ^ second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - } else if (c.vtype.vsew == 16) { - uint16_t *result_ptr; - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t first_value = (*first_ptr & 0x1); - uint16_t second_value = (*second_ptr & 0x1); - uint16_t result = !(first_value ^ second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - uint32_t *result_ptr; - - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t first_value = (*first_ptr & 0x1); - uint32_t second_value = (*second_ptr & 0x1); - uint32_t result = !(first_value ^ second_value); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - for (Word i = c.vl; i < VLMAX; i++) { - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 37: //vmul - { - D(3, "vmul"); - uint8_t *result_ptr; - - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = (*first_ptr * *second_ptr); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - } else if (c.vtype.vsew == 16) { - uint16_t *result_ptr; - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = (*first_ptr * *second_ptr); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - uint32_t *result_ptr; - - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t result = (*first_ptr * *second_ptr); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - for (Word i = c.vl; i < VLMAX; i++) { - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 45: //vmacc - { - D(3, "vmacc"); - uint8_t *result_ptr; - - vector> &vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = (*first_ptr * *second_ptr); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint8_t *)vd[i].val; - *result_ptr += result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - } else if (c.vtype.vsew == 16) { - uint16_t *result_ptr; - for (uint16_t i = 0; i < c.vl; i++) { - uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = (*first_ptr * *second_ptr); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint16_t *)vd[i].val; - *result_ptr += result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - uint32_t *result_ptr; - - for (uint32_t i = 0; i < c.vl; i++) { - uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t result = (*first_ptr * *second_ptr); - D(3, "Comparing " << *first_ptr << " + " << *second_ptr << " = " << result); - - result_ptr = (uint32_t *)vd[i].val; - *result_ptr += result; - } - for (Word i = c.vl; i < VLMAX; i++) { - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - } - } break; - case 6: { - switch (func6) { - case 0: { - D(3, "vmadd.vx"); - uint8_t *result_ptr; - - //vector> & vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - //uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = (reg[rsrc[0]] + *second_ptr); - D(3, "Comparing " << reg[rsrc[0]] << " + " << *second_ptr << " = " << result); - - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - } else if (c.vtype.vsew == 16) { - uint16_t *result_ptr; - for (uint16_t i = 0; i < c.vl; i++) { - //uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = (reg[rsrc[0]] + *second_ptr); - D(3, "Comparing " << reg[rsrc[0]] << " + " << *second_ptr << " = " << result); - - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - uint32_t *result_ptr; - - for (uint32_t i = 0; i < c.vl; i++) { - //uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t result = (reg[rsrc[0]] + *second_ptr); - D(3, "Comparing " << reg[rsrc[0]] << " + " << *second_ptr << " = " << result); - - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - for (Word i = c.vl; i < VLMAX; i++) { - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - case 37: //vmul.vx - { - D(3, "vmul.vx"); - uint8_t *result_ptr; - - //vector> & vr1 = c.vreg[rsrc[0]]; - vector> &vr2 = c.vreg[rsrc[1]]; - vector> &vd = c.vreg[rdest]; - if (c.vtype.vsew == 8) { - for (uint8_t i = 0; i < c.vl; i++) { - //uint8_t *first_ptr = (uint8_t *)vr1[i].val; - uint8_t *second_ptr = (uint8_t *)vr2[i].val; - uint8_t result = (reg[rsrc[0]] * *second_ptr); - D(3, "Comparing " << reg[rsrc[0]] << " + " << *second_ptr << " = " << result); - - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = result; - } - for (uint8_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint8_t *)vd[i].val; - *result_ptr = 0; - } - } else if (c.vtype.vsew == 16) { - uint16_t *result_ptr; - for (uint16_t i = 0; i < c.vl; i++) { - //uint16_t *first_ptr = (uint16_t *)vr1[i].val; - uint16_t *second_ptr = (uint16_t *)vr2[i].val; - uint16_t result = (reg[rsrc[0]] * *second_ptr); - D(3, "Comparing " << reg[rsrc[0]] << " + " << *second_ptr << " = " << result); - - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = result; - } - for (uint16_t i = c.vl; i < VLMAX; i++) { - result_ptr = (uint16_t *)vd[i].val; - *result_ptr = 0; - } - - } else if (c.vtype.vsew == 32) { - uint32_t *result_ptr; - - for (uint32_t i = 0; i < c.vl; i++) { - //uint32_t *first_ptr = (uint32_t *)vr1[i].val; - uint32_t *second_ptr = (uint32_t *)vr2[i].val; - uint32_t result = (reg[rsrc[0]] * *second_ptr); - D(3, "Comparing " << reg[rsrc[0]] << " + " << *second_ptr << " = " << result); - - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = result; - } - for (Word i = c.vl; i < VLMAX; i++) { - result_ptr = (uint32_t *)vd[i].val; - *result_ptr = 0; - } - } - } break; - } - } break; - case 7: { - is_vec = true; - c.vtype.vill = 0; //TODO - c.vtype.vediv = vediv; - c.vtype.vsew = vsew; - c.vtype.vlmul = vlmul; - - Word VLMAX = (vlmul * c.VLEN) / vsew; - D(3, "lmul:" << vlmul << " sew:" << vsew << " ediv: " << vediv << "rsrc" << reg[rsrc[0]] << "VLMAX" << VLMAX); - - if (reg[rsrc[0]] <= VLMAX) { - c.vl = reg[rsrc[0]]; - } else if (reg[rsrc[0]] < 2 * VLMAX) { - c.vl = (int)ceil((reg[rsrc[0]] * 1.0) / 2.0); - D(3, "Length:" << c.vl << ceil(reg[rsrc[0]] / 2)); - } else if (reg[rsrc[0]] >= (2 * VLMAX)) { - c.vl = VLMAX; - } - reg[rdest] = c.vl; - D(3, "VL:" << reg[rdest]); - - Word regNum(0); - - c.vreg.clear(); - for (int j = 0; j < 32; j++) { - c.vreg.push_back(vector>()); - for (int i = 0; i < (c.VLEN / vsew); ++i) { - int *elem_ptr = (int *)malloc(vsew / 8); - for (int f = 0; f < (vsew / 32); f++) - elem_ptr[f] = 0; - c.vreg[j].push_back(Reg(c.id, regNum++, (char *)elem_ptr)); - } - } - } break; - default: { - cout << "default???\n" - << flush; - } - } - break; - case VL: { - is_vec = true; - D(3, "Executing vector load"); - VLMAX = (c.vtype.vlmul * c.VLEN) / c.vtype.vsew; - D(3, "lmul: " << c.vtype.vlmul << " VLEN:" << c.VLEN << "sew: " << c.vtype.vsew); - D(3, "src: " << rsrc[0] << " " << reg[rsrc[0]]); - D(3, "dest" << rdest); - D(3, "width" << vlsWidth); - vector> &vd = c.vreg[rdest]; - - switch (vlsWidth) { - case 6: //load word and unit strided (not checking for unit stride) - { - for (Word i = 0; i < c.vl; i++) { - memAddr = ((reg[rsrc[0]]) & 0xFFFFFFFC) + (i * c.vtype.vsew / 8); - data_read = c.core->mem.read(memAddr, c.supervisorMode); - D(3, "Mem addr: " << std::hex << memAddr << " Data read " << data_read); - int *result_ptr = (int *)vd[i].val; - *result_ptr = data_read; - - trace_inst->is_lw = true; - trace_inst->mem_addresses[i] = memAddr; - } - /*for(Word i = c.vl; i < VLMAX; i++){ - int * result_ptr = (int *) vd[i].val; - *result_ptr = 0; - }*/ - - D(3, "Vector Register state ----:"); - // for(int i=0; i < 32; i++) - // { - // for(int j=0; j< c.vl; j++) - // { - // cout << "starting iter" << endl; - // if (c.vtype.vsew == 8) - // { - // uint8_t * ptr_val = (uint8_t *) c.vreg[i][j].val; - // std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; - // } else if (c.vtype.vsew == 16) - // { - // uint16_t * ptr_val = (uint16_t *) c.vreg[i][j].val; - // std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; - // } else if (c.vtype.vsew == 32) - // { - // uint32_t * ptr_val = (uint32_t *) c.vreg[i][j].val; - // std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl; - // } - - // cout << "Finished iter" << endl; - // } - // } - - // cout << "Finished loop" << endl; - } - // cout << "aaaaaaaaaaaaaaaaaaaaaa" << endl; - break; - default: { - cout << "Serious default??\n" - << flush; - } break; - } - break; - } break; - case VS: - is_vec = true; - VLMAX = (c.vtype.vlmul * c.VLEN) / c.vtype.vsew; - for (Word i = 0; i < c.vl; i++) { - // cout << "iter" << endl; - ++c.stores; - memAddr = reg[rsrc[0]] + (i * c.vtype.vsew / 8); - // std::cout << "STORE MEM ADDRESS *** : " << std::hex << memAddr << "\n"; - - trace_inst->is_sw = true; - trace_inst->mem_addresses[i] = memAddr; - - switch (vlsWidth) { - case 6: //store word and unit strided (not checking for unit stride) - { - uint32_t *ptr_val = (uint32_t *)c.vreg[vs3][i].val; - D(3, "value: " << flush << (*ptr_val) << flush); - c.core->mem.write(memAddr, *ptr_val, c.supervisorMode, 4); - D(3, "store: " << memAddr << " value:" << *ptr_val << flush); - } break; - default: - cout << "ERROR: UNSUPPORTED S INST\n" - << flush; - std::abort(); - } - // cout << "Loop finished" << endl; - // c.memAccesses.push_back(Warp::MemAccess(true, memAddr)); - } - - // cout << "After for loop" << endl; - break; - default: - D(3, "pc: " << hex << (c.pc - 4)); - D(3, "aERROR: Unsupported instruction: " << *this); - std::abort(); - } - - // break; - // cout << "outside case" << endl << flush; - } - - // std::cout << "finished instruction" << endl << flush; - - c.activeThreads = nextActiveThreads; - - // if (nextActiveThreads != 0) - // { - // for (int i = 7; i >= c.activeThreads; i--) - // { - // c.tmask[i] = c.tmask[i] && false; - // } - // } - - // //std::cout << "new thread mask: "; - // for (int i = 0; i < c.tmask.size(); ++i) //std::cout << " " << c.tmask[i]; - // //std::cout << "\n"; - - // This way, if pc was set by a side effect (such as interrupt), it will - // retain its new value. - if (pcSet) { - c.pc = nextPc; - D(3, "Next PC: " << hex << nextPc << dec); - } - - if (nextActiveThreads > c.reg.size()) { - cerr << "Error: attempt to spawn " << nextActiveThreads << " threads. " - << c.reg.size() << " available.\n"; - abort(); - } -} diff --git a/simX/mem.cpp b/simX/mem.cpp index 870f9946..f2cb2bab 100644 --- a/simX/mem.cpp +++ b/simX/mem.cpp @@ -1,168 +1,147 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ #include #include #include #include #include #include -// #include -#include "include/debug.h" -#include "include/types.h" -#include "include/util.h" -#include "include/mem.h" -#include "include/core.h" +#include "debug.h" +#include "types.h" +#include "util.h" +#include "mem.h" +#include "core.h" -using namespace std; -using namespace Harp; +using namespace vortex; -RamMemDevice::RamMemDevice(const char *filename, Size wordSize) : - wordSize(wordSize), contents() -{ - ifstream input(filename); +RamMemDevice::RamMemDevice(const char *filename, Size wordSize) + : wordSize(wordSize), contents() { + std::ifstream input(filename); if (!input) { - cout << "Error reading file \"" << filename << "\" into RamMemDevice.\n"; + std::cout << "Error reading file \"" << filename << "\" into RamMemDevice.\n"; std::abort(); } - do { contents.push_back(input.get()); } while (input); + do { + contents.push_back(input.get()); + } while (input); - while (contents.size() % wordSize) contents.push_back(0x00); + while (contents.size() % wordSize) + contents.push_back(0x00); } -RamMemDevice::RamMemDevice(Size size, Size wordSize) : - wordSize(wordSize), contents(size) {} +RamMemDevice::RamMemDevice(Size size, Size wordSize) + : wordSize(wordSize), contents(size) {} void RomMemDevice::write(Addr, Word) { - cout << "Attempt to write to ROM.\n"; + std::cout << "Attempt to write to ROM.\n"; std::abort(); } Word RamMemDevice::read(Addr addr) { - D(2, "RAM read, addr=0x" << hex << addr); - Word w = readWord(contents, addr, wordSize - addr%wordSize); + D(2, "RAM read, addr=0x" << std::hex << addr); + Word w = readWord(contents, addr, wordSize - addr % wordSize); return w; } void RamMemDevice::write(Addr addr, Word w) { - D(2, "RAM write, addr=0x" << hex << addr); - writeWord(contents, addr, wordSize - addr%wordSize, w); + D(2, "RAM write, addr=0x" << std::hex << addr); + writeWord(contents, addr, wordSize - addr % wordSize, w); } MemDevice &MemoryUnit::ADecoder::doLookup(Addr a, Size &bit) { - if (range == 0 || (a&((1ll<= range) { - ADecoder *p(((a>>bit)&1)?oneChild:zeroChild); - if (p) { bit--; return p->doLookup(a, bit); } - else {cout << "lookup of 0x" << hex << a << " failed.\n"; - throw BadAddress();} + if (range == 0 || (a & ((1ll << bit) - 1)) >= range) { + ADecoder *p(((a >> bit) & 1) ? oneChild : zeroChild); + if (p) { + bit--; + return p->doLookup(a, bit); + } else { + std::cout << "lookup of 0x" << std::hex << a << " failed.\n"; + throw BadAddress(); + } } else { return *md; } } -void MemoryUnit::ADecoder::map(Addr a, MemDevice &m, Size r, Size bit) -{ +void MemoryUnit::ADecoder::map(Addr a, MemDevice &m, Size r, Size bit) { if ((1llu << bit) <= r) { md = &m; range = m.size(); } else { - ADecoder *&child(((a>>bit)&1)?oneChild:zeroChild); - if (!child) child = new ADecoder(); - child->map(a, m, r, bit-1); + ADecoder *&child(((a >> bit) & 1) ? oneChild : zeroChild); + if (!child) + child = new ADecoder(); + child->map(a, m, r, bit - 1); } } Byte *MemoryUnit::ADecoder::getPtr(Addr a, Size sz, Size wordSize) { Size bit = wordSize - 1; MemDevice &m(doLookup(a, bit)); - a &= (2< "; - // std::cout << "Data: " << m.read(a) << "\n"; + a &= (2 << bit) - 1; return m.read(a); } -void MemoryUnit::ADecoder::write(Addr a, Word w, bool sup, Size wordSize) { +void MemoryUnit::ADecoder::write(Addr a, Word w, bool /*sup*/, Size wordSize) { Size bit = wordSize - 1; MemDevice &m(doLookup(a, bit)); - RAM & r = (RAM &) m; - // a &= (2<::iterator i; - if ((i = tlb.find(vAddr/pageSize)) != tlb.end()) { + std::unordered_map::iterator i; + if ((i = tlb.find(vAddr / pageSize)) != tlb.end()) { TLBEntry &t = i->second; - if (t.flags & flagMask) return t; + if (t.flags & flagMask) + return t; else { - D(2, "Page fault on addr 0x" << hex << vAddr << "(bad flags)"); + D(2, "Page fault on addr 0x" << std::hex << vAddr << "(bad flags)"); throw PageFault(vAddr, false); } } else { - D(2, "Page fault on addr 0x" << hex << vAddr << "(not in TLB)"); + D(2, "Page fault on addr 0x" << std::hex << vAddr << "(not in TLB)"); throw PageFault(vAddr, true); } } -#ifdef EMU_INSTRUMENTATION -Addr MemoryUnit::virtToPhys(Addr vAddr) { - TLBEntry t = tlbLookup(vAddr, 077); - return t.pfn*pageSize + vAddr%pageSize; -} -#endif - Word MemoryUnit::read(Addr vAddr, bool sup) { Addr pAddr; if (disableVm) { pAddr = vAddr; } else { - Word flagMask = sup?8:1; + Word flagMask = sup ? 8 : 1; TLBEntry t = tlbLookup(vAddr, flagMask); - pAddr = t.pfn*pageSize + vAddr%pageSize; + pAddr = t.pfn * pageSize + vAddr % pageSize; } // std::cout << "MU::write: About to read: " << std::hex << pAddr << " = " << (ad.read(pAddr, sup, 8*addrBytes)) << " with " << std::dec << (8*addrBytes) << "\n"; - return ad.read(pAddr, sup, 8*addrBytes); + return ad.read(pAddr, sup, 8 * addrBytes); } Word MemoryUnit::fetch(Addr vAddr, bool sup) { @@ -171,12 +150,12 @@ Word MemoryUnit::fetch(Addr vAddr, bool sup) { if (disableVm) { pAddr = vAddr; } else { - Word flagMask = sup?32:4; + Word flagMask = sup ? 32 : 4; TLBEntry t = tlbLookup(vAddr, flagMask); - pAddr = t.pfn*pageSize + vAddr%pageSize; + pAddr = t.pfn * pageSize + vAddr % pageSize; } - Word instruction = ad.read(pAddr, sup, 8*addrBytes); + Word instruction = ad.read(pAddr, sup, 8 * addrBytes); return instruction; } @@ -187,25 +166,26 @@ void MemoryUnit::write(Addr vAddr, Word w, bool sup, Size bytes) { if (disableVm) { pAddr = vAddr; } else { - Word flagMask = sup?16:2; + Word flagMask = sup ? 16 : 2; TLBEntry t = tlbLookup(vAddr, flagMask); - pAddr = t.pfn*pageSize + vAddr%pageSize; + pAddr = t.pfn * pageSize + vAddr % pageSize; } // std::cout << "MU::write: About to write: " << std::hex << pAddr << " = " << w << " with " << std::dec << 8*bytes << "\n"; - ad.write(pAddr, w, sup, 8*bytes); + ad.write(pAddr, w, sup, 8 * bytes); // std::cout << std::hex << "reading same address: " << (this->read(vAddr, sup)) << "\n"; } void MemoryUnit::tlbAdd(Addr virt, Addr phys, Word flags) { - D(1, "tlbAdd(0x" << hex << virt << ", 0x" << phys << ", 0x" << flags << ')'); - tlb[virt/pageSize] = TLBEntry(phys/pageSize, flags); + D(1, "tlbAdd(0x" << std::hex << virt << ", 0x" << phys << ", 0x" << flags << ')'); + tlb[virt / pageSize] = TLBEntry(phys / pageSize, flags); } void MemoryUnit::tlbRm(Addr va) { - if (tlb.find(va/pageSize) != tlb.end()) tlb.erase(tlb.find(va/pageSize)); + if (tlb.find(va / pageSize) != tlb.end()) + tlb.erase(tlb.find(va / pageSize)); } -void *Harp::consoleInputThread(void* arg_vp) { +void *vortex::consoleInputThread(void */*arg_vp*/) { // ConsoleMemDevice *arg = (ConsoleMemDevice *)arg_vp; // char c; // while (cin) { @@ -219,181 +199,175 @@ void *Harp::consoleInputThread(void* arg_vp) { return nullptr; } -// ConsoleMemDevice::ConsoleMemDevice(Size wS, std::ostream &o, Core &core, -// bool batch) : -// wordSize(wS), output(o), core(core), cBuf() -// { -// // Create a console input thread if we are running in interactive mode. -// if (!batch) { -// pthread_t *thread = new pthread_t; -// pthread_create(thread, NULL, consoleInputThread, (void*)this); -// } -// pthread_mutex_init(&cBufLock, NULL); -// } - -// void ConsoleMemDevice::poll() { -// pthread_mutex_lock(&cBufLock); -// if (!cBuf.empty()) core.interrupt(8); -// pthread_mutex_unlock(&cBufLock); -// } - Word DiskControllerMemDevice::read(Addr a) { - switch (a/8) { - case 0: return curDisk; - case 1: return curBlock; - case 2: return disks[curDisk].blocks * blockSize; - case 3: return physAddr; - case 4: return command; - case 5: return status; - default: - cout << "Attempt to read invalid disk controller register.\n"; - std::abort(); + switch (a / 8) { + case 0: + return curDisk; + case 1: + return curBlock; + case 2: + return disks[curDisk].blocks * blockSize; + case 3: + return physAddr; + case 4: + return command; + case 5: + return status; + default: + std::cout << "Attempt to read invalid disk controller register.\n"; + std::abort(); } } void DiskControllerMemDevice::write(Addr a, Word w) { - switch (a/8) { - case 0: if (w <= disks.size()) { - curDisk = w; - status = OK; - } else { - status = INVALID_DISK; - } - break; - case 1: if (w < disks[curDisk].blocks) { - curBlock = w; - } else { - status = INVALID_BLOCK; - } - break; - case 2: nBlocks = w >= disks[curDisk].blocks?disks[curDisk].blocks - 1 : w; - status = OK; - break; - case 3: physAddr = w; - status = OK; - break; - case 4: if (w == 0) { - } else { - } - cout << "TODO: Implement disk read and write!\n"; - break; + switch (a / 8) { + case 0: + if (w <= disks.size()) { + curDisk = w; + status = OK; + } else { + status = INVALID_DISK; + } + break; + case 1: + if (w < disks[curDisk].blocks) { + curBlock = w; + } else { + status = INVALID_BLOCK; + } + break; + case 2: + nBlocks = w >= disks[curDisk].blocks ? disks[curDisk].blocks - 1 : w; + status = OK; + break; + case 3: + physAddr = w; + status = OK; + break; + case 4: + if (w == 0) { + } else { + } + std::cout << "TODO: Implement disk read and write!\n"; + break; } } static uint32_t hti_old(char c) { - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - return c - '0'; - } + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + return c - '0'; +} static uint32_t hToI_old(char *c, uint32_t size) { - uint32_t value = 0; - for (uint32_t i = 0; i < size; i++) { - value += hti_old(c[i]) << ((size - i - 1) * 4); - } - return value; + uint32_t value = 0; + for (uint32_t i = 0; i < size; i++) { + value += hti_old(c[i]) << ((size - i - 1) * 4); + } + return value; } void RAM::loadHexImpl(std::string path) { - this->clear(); - FILE *fp = fopen(&path[0], "r"); - if(fp == 0){ - std::cout << path << " not found" << std::endl; - } + this->clear(); + FILE *fp = fopen(&path[0], "r"); + if (fp == 0) { + std::cout << path << " not found" << std::endl; + } - //Preload 0x0 <-> 0x80000000 jumps - ((uint32_t*)this->get(0))[0] = 0xf1401073; - ((uint32_t*)this->get(0))[1] = 0xf1401073; - ((uint32_t*)this->get(0))[2] = 0x30101073; - ((uint32_t*)this->get(0))[3] = 0x800000b7; - ((uint32_t*)this->get(0))[4] = 0x000080e7; - - ((uint32_t*)this->get(0x80000000))[0] = 0x00000097; + //Preload 0x0 <-> 0x80000000 jumps + ((uint32_t *)this->get(0))[0] = 0xf1401073; + ((uint32_t *)this->get(0))[1] = 0xf1401073; + ((uint32_t *)this->get(0))[2] = 0x30101073; + ((uint32_t *)this->get(0))[3] = 0x800000b7; + ((uint32_t *)this->get(0))[4] = 0x000080e7; - ((uint32_t*)this->get(0xb0000000))[0] = 0x01C02023; - - ((uint32_t*)this->get(0xf00fff10))[0] = 0x12345678; + ((uint32_t *)this->get(0x80000000))[0] = 0x00000097; - ((uint32_t*)this->get(0x70000000))[0] = 0x00008067; + ((uint32_t *)this->get(0xb0000000))[0] = 0x01C02023; - { - uint32_t init_addr = 0x70000004; - for (int off = 0; off < 1024; off+=4) { - uint32_t new_addr = init_addr+off; - ((uint32_t*)this->get(new_addr))[0] = 0x00000000; + ((uint32_t *)this->get(0xf00fff10))[0] = 0x12345678; + + ((uint32_t *)this->get(0x70000000))[0] = 0x00008067; + + { + uint32_t init_addr = 0x70000004; + for (int off = 0; off < 1024; off += 4) { + uint32_t new_addr = init_addr + off; + ((uint32_t *)this->get(new_addr))[0] = 0x00000000; + } + } + + { + uint32_t init_addr = 0x71000000; + for (int off = 0; off < 1024; off += 4) { + uint32_t new_addr = init_addr + off; + ((uint32_t *)this->get(new_addr))[0] = 0x00000000; + } + } + + { + uint32_t init_addr = 0x72000000; + for (int off = 0; off < 1024; off += 4) { + uint32_t new_addr = init_addr + off; + ((uint32_t *)this->get(new_addr))[0] = 0x00000000; + } + } + + fseek(fp, 0, SEEK_END); + uint32_t size = ftell(fp); + fseek(fp, 0, SEEK_SET); + char *content = new char[size]; + int x = fread(content, 1, size, fp); + + if (!x) { + std::cout << "COULD NOT READ FILE\n"; + std::abort(); + } + + int offset = 0; + char *line = content; + // std::cout << "WHTA\n"; + while (1) { + if (line[0] == ':') { + uint32_t byteCount = hToI_old(line + 1, 2); + uint32_t nextAddr = hToI_old(line + 3, 4) + offset; + uint32_t key = hToI_old(line + 7, 2); + switch (key) { + case 0: + for (uint32_t i = 0; i < byteCount; i++) { + unsigned add = nextAddr + i; + *(this->get(add)) = hToI_old(line + 9 + i * 2, 2); + // std::cout << "lhi: Address: " << std::hex <<(add) << "\tValue: " << std::hex << hToI_old(line + 9 + i * 2, 2) << std::endl; } + break; + case 2: + // cout << offset << std::endl; + offset = hToI_old(line + 9, 4) << 4; + break; + case 4: + // cout << offset << std::endl; + offset = hToI_old(line + 9, 4) << 16; + break; + default: + // cout << "??? " << key << std::endl; + break; } + } - { - uint32_t init_addr = 0x71000000; - for (int off = 0; off < 1024; off+=4) { - uint32_t new_addr = init_addr+off; - ((uint32_t*)this->get(new_addr))[0] = 0x00000000; - } - } + while (*line != '\n' && size != 0) { + line++; + size--; + } - { - uint32_t init_addr = 0x72000000; - for (int off = 0; off < 1024; off+=4) { - uint32_t new_addr = init_addr+off; - ((uint32_t*)this->get(new_addr))[0] = 0x00000000; - } - } + if (size <= 1) + break; - fseek(fp, 0, SEEK_END); - uint32_t size = ftell(fp); - fseek(fp, 0, SEEK_SET); - char* content = new char[size]; - int x = fread(content, 1, size, fp); + line++; + size--; + } - if (!x) { - std::cout << "COULD NOT READ FILE\n"; std::abort(); - } - - int offset = 0; - char* line = content; - // std::cout << "WHTA\n"; - while (1) { - if (line[0] == ':') { - uint32_t byteCount = hToI_old(line + 1, 2); - uint32_t nextAddr = hToI_old(line + 3, 4) + offset; - uint32_t key = hToI_old(line + 7, 2); - switch (key) { - case 0: - for (uint32_t i = 0; i < byteCount; i++) { - unsigned add = nextAddr + i; - *(this->get(add)) = hToI_old(line + 9 + i * 2, 2); - // std::cout << "lhi: Address: " << std::hex <<(add) << "\tValue: " << std::hex << hToI_old(line + 9 + i * 2, 2) << std::endl; - } - break; - case 2: - // cout << offset << endl; - offset = hToI_old(line + 9, 4) << 4; - break; - case 4: - // cout << offset << endl; - offset = hToI_old(line + 9, 4) << 16; - break; - default: - // cout << "??? " << key << endl; - break; - } - } - - while (*line != '\n' && size != 0) { - line++; - size--; - } - - if (size <= 1) - break; - - line++; - size--; - } - - if (content) - delete[] content; - } \ No newline at end of file + if (content) + delete[] content; +} \ No newline at end of file diff --git a/simX/include/mem.h b/simX/mem.h similarity index 87% rename from simX/include/mem.h rename to simX/mem.h index 96d1d38c..45fe16bc 100644 --- a/simX/include/mem.h +++ b/simX/mem.h @@ -1,18 +1,14 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ -#ifndef __MEM_H -#define __MEM_H +#pragma once #include #include #include -#include +#include // #include #include "types.h" -namespace Harp { +namespace vortex { void *consoleInputThread(void *); struct BadAddress {}; @@ -53,32 +49,6 @@ namespace Harp { }; class Core; - // class ConsoleMemDevice : public MemDevice { - // public: - // ConsoleMemDevice(Size wS, std::ostream &o, Core &core, bool batch = false); - // ~ConsoleMemDevice() {} - - // //virtual Size wordSize() const { return wordSize; } - // virtual Size size() const { return wordSize; } - // virtual Word read(Addr) { pthread_mutex_lock(&cBufLock); - // char c = cBuf.front(); - // cBuf.pop(); - // pthread_mutex_unlock(&cBufLock); - // return Word(c); } - // virtual void write(Addr a, Word w) { output << char(w); } - - // void poll(); - - // friend void *Harp::consoleInputThread(void *); - - // private: - // std::ostream &output; - // Size wordSize; - // Core &core; - - // std::queue cBuf; - // pthread_mutex_t cBufLock; - // }; class DiskControllerMemDevice : public MemDevice { public: @@ -130,10 +100,6 @@ namespace Harp { void tlbRm(Addr va); void tlbFlush() { tlb.clear(); } -#ifdef EMU_INSTRUMENTATION - Addr virtToPhys(Addr va); -#endif - private: class ADecoder { public: @@ -162,7 +128,7 @@ namespace Harp { ADecoder ad; - std::map tlb; + std::unordered_map tlb; TLBEntry tlbLookup(Addr vAddr, Word flagMask); bool disableVm; @@ -330,7 +296,4 @@ namespace Harp { void loadHexImpl(std::string path); }; -} - - -#endif +} \ No newline at end of file diff --git a/simX/simX.cpp b/simX/simX.cpp index 799d053f..7c2dda86 100644 --- a/simX/simX.cpp +++ b/simX/simX.cpp @@ -1,176 +1,78 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ #include #include #include #include #include #include - - -#include "include/debug.h" -#include "include/types.h" -#include "include/core.h" -#include "include/enc.h" -#include "include/instruction.h" -#include "include/mem.h" -#include "include/obj.h" -#include "include/archdef.h" - -#include "include/args.h" -#include "include/help.h" - -#include - #include -////////////// -///////////// +#include "debug.h" +#include "types.h" +#include "core.h" +#include "args.h" -using namespace Harp; -using namespace HarpTools; -using namespace std; +using namespace vortex; -enum HarpToolMode { HARPTOOL_MODE_ASM, HARPTOOL_MODE_DISASM, HARPTOOL_MODE_EMU, - HARPTOOL_MODE_LD, HARPTOOL_MODE_HELP }; +int main(int argc, char **argv) { -HarpToolMode findMode(int argc, char** argv) { - bool mode_asm, mode_disasm, mode_emu, mode_ld, mode_help; + std::string archString("rv32i"); + int num_cores(1); + int num_warps(NUM_WARPS); + int num_threads(NUM_THREADS); + std::string imgFileName; + bool showHelp(false); + bool showStats(false); - if (argc == 0) return HARPTOOL_MODE_HELP; + /* Read the command line arguments. */ + CommandLineArgFlag fh("-h", "--help", "", showHelp); + CommandLineArgSetter fa("-a", "--arch", "", archString); + CommandLineArgSetter fi("-i", "--image", "", imgFileName); + CommandLineArgSetter fc("-c", "--cores", "", num_cores); + CommandLineArgSetter fw("-w", "--warps", "", num_warps); + CommandLineArgSetter ft("-t", "--threads", "", num_threads); + CommandLineArgFlag fs("-s", "--stats", "", showStats); - CommandLineArgFlag fh("--help", "-h", "", mode_help); - CommandLineArgFlag fa("-A", "--asm", "", mode_asm); - CommandLineArgFlag fd("-D", "--disasm", "", mode_disasm); - CommandLineArgFlag fe("-E", "--emu", "", mode_emu); - CommandLineArgFlag fl("-L", "--ld", "", mode_ld); + CommandLineArg::readArgs(argc - 1, argv + 1); - CommandLineArg::readArgs((argc == 0?0:1), argv); - CommandLineArg::clearArgs(); - - if (mode_asm) return HARPTOOL_MODE_ASM; - if (mode_disasm) return HARPTOOL_MODE_DISASM; - if (mode_emu) return HARPTOOL_MODE_EMU; - if (mode_ld) return HARPTOOL_MODE_LD; - return HARPTOOL_MODE_HELP; -} - -int emu_main(int argc, char **argv) { - string archString("rv32i"); - string imgFileName("a.dsfsdout.bin"); - bool showHelp(false), showStats(false), basicMachine(true); - int max_warps(NUM_WARPS); - int max_threads(NUM_THREADS); - - /* Read the command line arguments. */ - CommandLineArgFlag fh("-h", "--help", "", showHelp); - CommandLineArgSetterfc("-c", "--core", "", imgFileName); - CommandLineArgSetterfa("-a", "--arch", "", archString); - CommandLineArgFlag fs("-s", "--stats", "", showStats); - CommandLineArgFlag fb("-b", "--basic", "", basicMachine); - CommandLineArgSetter fw("-w", "--warps", "", max_warps); - CommandLineArgSetter ft("-t", "--threads", "", max_threads); - - CommandLineArg::readArgs(argc, argv); - - if (showHelp) { - cout << Help::emuHelp; - return 0; - } - - /* Instantiate a Core, RAM, and console output. */ - ArchDef arch(archString, max_warps, max_threads); - - Decoder *dec; - - switch (arch.getEncChar()) { - case 'b': dec = new WordDecoder(arch); break; - case 'w': dec = new WordDecoder(arch); break; - case 'r': dec = new WordDecoder(arch); break; - default: - cout << "Unrecognized decoder type: '" << arch.getEncChar() << "'.\n"; - return 1; - } - - // std::cout << "TESTING: " << tests[t] << "\n"; - - MemoryUnit mu(4096, arch.getWordSize(), basicMachine); - Core core(arch, *dec, mu/*, ID in multicore implementations*/); - - // RamMemDevice mem(imgFileName.c_str(), arch.getWordSize()); - RAM old_ram; - old_ram.loadHexImpl(imgFileName.c_str()); - // old_ram.loadHexImpl(tests[t]); - // MemDevice * memory = &old_ram; - - // ConsoleMemDevice console(arch.getWordSize(), cout, core, batch); - mu.attach(old_ram, 0); - // mu.attach(console, 1ll<<(arch.getWordSize()*8 - 1)); - // mu.attach(console, 0xf0000000); - - // core.w[0].pc = 0x8000007c; // If I want to start at a specific location - // std::cout << "ABOUT TO START\n"; - // bool count_down = false; - // int cycles_left; - // while (!count_down || (count_down && (cycles_left == 0))) - // { - - // if (count_down) - // { - // cycles_left--; - // } - - // console.poll(); - // core.step(); - // bool run = core.running(); - // if (!run) - // { - // count_down = true; - // } - // } - - struct stat hello; - fstat(0, &hello); - - while (core.running()) {core.step(); } - - if (showStats) core.printStats(); - - - std::cout << "\n"; - return 0; -} - - -int main(int argc, char** argv) { - - Verilated::commandArgs(argc, argv); - Verilated::traceEverOn(true); - - try { - switch (findMode(argc - 1, argv + 1)) { - case HARPTOOL_MODE_ASM: - cout << "ASM not supported\n"; - return -1; - case HARPTOOL_MODE_DISASM: - cout << "DISASM not supported\n"; - return -1; - case HARPTOOL_MODE_EMU: - return emu_main(argc - 2, argv + 2); - case HARPTOOL_MODE_LD: - cout << "LD not supported\n"; - return -1; - case HARPTOOL_MODE_HELP: - [[fallthrough]]; - default: - cout << "Usage:\n" << Help::mainHelp; - return 0; - } - } catch (BadArg ba) { - cout << "Unrecognized argument \"" << ba.arg << "\".\n"; - return 1; + if (showHelp || imgFileName.empty()) { + std::cout << "Vortex emulator command line arguments:\n" + " -i, --image Program RAM image\n" + " -c, --cores Number of cores\n" + " -w, --warps Number of warps\n" + " -t, --threads Number of threads\n" + " -a, --arch Architecture string\n" + " -s, --stats Print stats on exit.\n"; + return 0; } + ArchDef arch(archString, num_cores, num_warps, num_threads); + + Decoder decoder(arch); + MemoryUnit mu(4096, arch.getWordSize(), true); + + RAM old_ram; + old_ram.loadHexImpl(imgFileName.c_str()); + mu.attach(old_ram, 0); + + struct stat hello; + fstat(0, &hello); + + std::vector> cores(num_cores); + for (int i = 0; i < num_cores; ++i) { + cores[i] = std::make_shared(arch, decoder, mu); + } + + bool running; + + do { + running = false; + for (int i = 0; i < num_cores; ++i) { + if (!cores[i]->running()) + continue; + running = true; + cores[i]->step(); + } + } while (running); + return 0; } diff --git a/simX/test_benchmark.sh b/simX/test_benchmark.sh index 95315e98..72f3b1ba 100755 --- a/simX/test_benchmark.sh +++ b/simX/test_benchmark.sh @@ -4,6 +4,6 @@ echo start > results.txt make printf "Fasten your seatbelts ladies and gentelmen!!\n\n\n\n" -#cd obj_dir && ./Vcache_simX -E -a rv32i --core ../benchmarks/vector/vecadd/vx_vec_vecadd.hex -s -b 1> emulator.debug -#cd obj_dir && ./Vcache_simX -E -a rv32i --core ../benchmarks/vector/saxpy/vx_vec_saxpy.hex -s -b 1> emulator.debug -cd obj_dir && ./Vcache_simX -E -a rv32i --core ../benchmarks/vector/sgemm_nn/vx_vec_sgemm_nn.hex -s -b 1> emulator.debug +#./simX -a rv32i -i ../benchmarks/vector/vecadd/vx_vec_vecadd.hex -s 1> emulator.debug +#./simX -a rv32i -i ../benchmarks/vector/saxpy/vx_vec_saxpy.hex -s 1> emulator.debug +./simX -a rv32i -i ../benchmarks/vector/sgemm_nn/vx_vec_sgemm_nn.hex -s 1> emulator.debug diff --git a/simX/test_riscv.sh b/simX/test_riscv.sh index 39d00b1d..58fe3949 100755 --- a/simX/test_riscv.sh +++ b/simX/test_riscv.sh @@ -1,143 +1,142 @@ #!/bin/bash make -cd obj_dir echo start > results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-add.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-add.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-add.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-add.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-addi.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-addi.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-addi.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-addi.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-and.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-and.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-and.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-and.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-andi.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-andi.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-andi.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-andi.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-auipc.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-auipc.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-auipc.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-auipc.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-beq.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-beq.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-beq.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-beq.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-bge.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-bge.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-bge.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-bge.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-bgeu.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-bgeu.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-bgeu.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-bgeu.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-blt.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-blt.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-blt.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-blt.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-bltu.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-bltu.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-bltu.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-bltu.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-bne.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-bne.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-bne.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-bne.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-jal.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-jal.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-jal.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-jal.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-jalr.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-jalr.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-jalr.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-jalr.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-lb.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-lb.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-lb.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-lb.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-lbu.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-lbu.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-lbu.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-lbu.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-lh.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-lh.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-lh.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-lh.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-lhu.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-lhu.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-lhu.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-lhu.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-lui.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-lui.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-lui.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-lui.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-lw.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-lw.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-lw.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-lw.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-or.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-or.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-or.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-or.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-ori.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-ori.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-ori.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-ori.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-sb.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-sb.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-sb.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-sb.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-sh.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-sh.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-sh.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-sh.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-simple.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-simple.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-simple.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-simple.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-sll.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-sll.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-sll.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-sll.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-slli.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-slli.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-slli.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-slli.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-slt.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-slt.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-slt.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-slt.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-slti.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-slti.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-slti.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-slti.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-sltiu.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-sltiu.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-sltiu.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-sltiu.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-sltu.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-sltu.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-sltu.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-sltu.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-sra.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-sra.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-sra.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-sra.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-srai.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-srai.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-srai.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-srai.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-srl.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-srl.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-srl.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-srl.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-srli.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-srli.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-srli.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-srli.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-sub.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-sub.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-sub.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-sub.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-sw.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-sw.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-sw.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-sw.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-xor.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-xor.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-xor.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-xor.hex -s >> results.txt -echo ./../benchmarks/riscv_tests/rv32ui-p-xori.hex >> results.txt -./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32ui-p-xori.hex -s -b >> results.txt +echo ./../benchmarks/isa/riscv_tests/rv32ui-p-xori.hex >> results.txt +./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32ui-p-xori.hex -s >> results.txt -# echo ./../benchmarks/riscv_tests/rv32um-p-div.hex >> results.txt -# ./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32um-p-div.hex -s -b >> results.txt +# echo ./../benchmarks/isa/riscv_tests/rv32um-p-div.hex >> results.txt +# ./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32um-p-div.hex -s >> results.txt -# echo ./../benchmarks/riscv_tests/rv32um-p-divu.hex >> results.txt -# ./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32um-p-divu.hex -s -b >> results.txt +# echo ./../benchmarks/isa/riscv_tests/rv32um-p-divu.hex >> results.txt +# ./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32um-p-divu.hex -s >> results.txt -# echo ./../benchmarks/riscv_tests/rv32um-p-mul.hex >> results.txt -# ./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32um-p-mul.hex -s -b >> results.txt +# echo ./../benchmarks/isa/riscv_tests/rv32um-p-mul.hex >> results.txt +# ./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32um-p-mul.hex -s >> results.txt -# echo ./../benchmarks/riscv_tests/rv32um-p-mulh.hex >> results.txt -# ./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32um-p-mulh.hex -s -b >> results.txt +# echo ./../benchmarks/isa/riscv_tests/rv32um-p-mulh.hex >> results.txt +# ./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32um-p-mulh.hex -s >> results.txt -# echo ./../benchmarks/riscv_tests/rv32um-p-mulhsu.hex >> results.txt -# ./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32um-p-mulhsu.hex -s -b >> results.txt +# echo ./../benchmarks/isa/riscv_tests/rv32um-p-mulhsu.hex >> results.txt +# ./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32um-p-mulhsu.hex -s >> results.txt -# echo ./../benchmarks/riscv_tests/rv32um-p-mulhu.hex >> results.txt -# ./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32um-p-mulhu.hex -s -b >> results.txt +# echo ./../benchmarks/isa/riscv_tests/rv32um-p-mulhu.hex >> results.txt +# ./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32um-p-mulhu.hex -s >> results.txt -# echo ./../benchmarks/riscv_tests/rv32um-p-rem.hex >> results.txt -# ./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32um-p-rem.hex -s -b >> results.txt +# echo ./../benchmarks/isa/riscv_tests/rv32um-p-rem.hex >> results.txt +# ./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32um-p-rem.hex -s >> results.txt -# echo ./../benchmarks/riscv_tests/rv32um-p-remu.hex >> results.txt -# ./Vcache_simX -E -a rv32i --core ../benchmarks/riscv_tests/rv32um-p-remu.hex -s -b >> results.txt +# echo ./../benchmarks/isa/riscv_tests/rv32um-p-remu.hex >> results.txt +# ./simX -a rv32i -i ../benchmarks/isa/riscv_tests/rv32um-p-remu.hex -s >> results.txt diff --git a/simX/test_runtime.sh b/simX/test_runtime.sh index 725e9600..16fe2f28 100755 --- a/simX/test_runtime.sh +++ b/simX/test_runtime.sh @@ -3,17 +3,14 @@ make make -C ../runtime/tests/dev make -C ../runtime/tests/hello -make -C ../runtime/tests/nativevecadd +make -C ../runtime/tests/nlTest make -C ../runtime/tests/simple -make -C ../runtime/tests/vecadd -cd obj_dir echo start > results.txt printf "Fasten your seatbelts ladies and gentelmen!!\n\n\n\n" -#./Vcache_simX -E -a rv32i --core ../runtime/tests/dev/vx_dev_main.hex -s -b 1> emulator.debug -#./Vcache_simX -E -a rv32i --core ../runtime/tests/hello/hello.hex -s -b 1> emulator.debug -./Vcache_simX -E -a rv32i --core ../runtime/tests/nativevecadd/vx_pocl_main.hex -s -b 1> emulator.debug -./Vcache_simX -E -a rv32i --core ../runtime/tests/simple/vx_simple_main.hex -s -b 1> emulator.debug -./Vcache_simX -E -a rv32i --core ../runtime/tests/vecadd/vx_pocl_main.hex -s -b 1> emulator.debug +#./simX -a rv32i -i ../runtime/tests/dev/vx_dev_main.hex -s 1> emulator.debug +#./simX -a rv32i -i ../runtime/tests/hello/hello.hex -s 1> emulator.debug +./simX -a rv32i -i ../runtime/tests/nlTest/vx_nl_main.hex -s 1> emulator.debug +./simX -a rv32i -i ../runtime/tests/simple/vx_simple_main.hex -s 1> emulator.debug diff --git a/simX/test_vec.sh b/simX/test_vec.sh index a1f34a55..1e3ee457 100755 --- a/simX/test_vec.sh +++ b/simX/test_vec.sh @@ -5,4 +5,4 @@ echo start > results.txt # echo ../kernel/vortex_test.hex make printf "Fasten your seatbelts ladies and gentelmen!!\n\n\n\n" -cd obj_dir && ./Vcache_simX -E -a rv32i --core ../rvvector/basic/vx_vector_main.hex -s -b 1> emulator.debug +./Vcache_simX -a rv32i -i ../rvvector/basic/vx_vector_main.hex -s 1> emulator.debug diff --git a/simX/include/trace.h b/simX/trace.h similarity index 79% rename from simX/include/trace.h rename to simX/trace.h index cdb93377..610276e3 100644 --- a/simX/include/trace.h +++ b/simX/trace.h @@ -1,10 +1,9 @@ #pragma once -namespace Harp { +namespace vortex { - typedef struct - { + struct trace_inst_t { // Warp step bool valid_inst; unsigned pc; @@ -28,14 +27,13 @@ namespace Harp { unsigned * mem_addresses; // dmem interface - int mem_stall_cycles; - int fetch_stall_cycles; + unsigned long mem_stall_cycles; + unsigned long fetch_stall_cycles; // Instruction execute bool stall_warp; bool wspawn; bool stalled; - } trace_inst_t; - + }; } \ No newline at end of file diff --git a/simX/types.h b/simX/types.h new file mode 100644 index 00000000..ac855983 --- /dev/null +++ b/simX/types.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +namespace vortex { + +typedef uint8_t Byte; +typedef uint32_t Word; +typedef uint32_t Word_u; +typedef int32_t Word_s; + +typedef Word_u Addr; +typedef Word_u Size; + +typedef unsigned RegNum; +typedef unsigned ThdNum; + +enum MemFlags { + RD_USR = 1, + WR_USR = 2, + EX_USR = 4, + RD_SUP = 8, + WR_SUP = 16, + EX_SUP = 32 +}; + +} \ No newline at end of file diff --git a/simX/util.cpp b/simX/util.cpp index a1d6ebe6..e7afaeba 100644 --- a/simX/util.cpp +++ b/simX/util.cpp @@ -1,28 +1,25 @@ -/******************************************************************************* - HARPtools by Chad D. Kersey, Summer 2011 -*******************************************************************************/ #include - -#include "include/types.h" -#include "include/util.h" #include +#include +#include "types.h" +#include "util.h" -using namespace Harp; -using namespace std; +using namespace vortex; -// Make it easy for autotools-based build systems to detect this library. -extern "C" { - int harplib_present = 1; +Word vortex::signExt(Word w, Size bit, Word mask) { + if (w >> (bit - 1)) + w |= ~mask; + return w; } -void Harp::wordToBytes(Byte *b, Word_u w, Size wordSize) { +void vortex::wordToBytes(Byte *b, Word_u w, Size wordSize) { while (wordSize--) { *(b++) = w & 0xff; w >>= 8; } } -Word_u Harp::bytesToWord(const Byte *b, Size wordSize) { +Word_u vortex::bytesToWord(const Byte *b, Size wordSize) { Word_u w = 0; b += wordSize-1; while (wordSize--) { @@ -32,7 +29,7 @@ Word_u Harp::bytesToWord(const Byte *b, Size wordSize) { return w; } -Word_u Harp::flagsToWord(bool r, bool w, bool x) { +Word_u vortex::flagsToWord(bool r, bool w, bool x) { Word_u word = 0; if (r) word |= RD_USR; if (w) word |= WR_USR; @@ -40,19 +37,21 @@ Word_u Harp::flagsToWord(bool r, bool w, bool x) { return word; } -void Harp::wordToFlags(bool &r, bool &w, bool &x, Word_u f) { +void vortex::wordToFlags(bool &r, bool &w, bool &x, Word_u f) { r = f & RD_USR; w = f & WR_USR; x = f & EX_USR; } -Byte Harp::readByte(const vector &b, Size &n) { - if (b.size() <= n) throw OutOfBytes(); +Byte vortex::readByte(const std::vector &b, Size &n) { + if (b.size() <= n) + throw std::out_of_range("out of range"); return b[n++]; } -Word_u Harp::readWord(const vector &b, Size &n, Size wordSize) { - // if (b.size() - n < wordSize) throw OutOfBytes(); +Word_u vortex::readWord(const std::vector &b, Size &n, Size wordSize) { + if (b.size() - n < wordSize) + throw std::out_of_range("out of range"); Word_u w(0); n += wordSize; // std::cout << "wordSize: " << wordSize << "\n"; @@ -62,16 +61,15 @@ Word_u Harp::readWord(const vector &b, Size &n, Size wordSize) { w |= b[n - i - 1]; } // cout << "b[0]" << std::hex << w << "\n"; - // throw OutOfBytes(); return w; } -void Harp::writeByte(vector &p, Size &n, Byte b) { +void vortex::writeByte(std::vector &p, Size &n, Byte b) { if (p.size() <= n) p.resize(n+1); p[n++] = b; } -void Harp::writeWord(vector &p, Size &n, Size wordSize, Word w) { +void vortex::writeWord(std::vector &p, Size &n, Size wordSize, Word w) { if (p.size() < (n+wordSize)) p.resize(n+wordSize); while (wordSize--) { p[n++] = w & 0xff; diff --git a/simX/util.h b/simX/util.h new file mode 100644 index 00000000..b9bef8f3 --- /dev/null +++ b/simX/util.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include "types.h" + +namespace vortex { + +template +void unused(Args&&...) {} + +#define __unused(...) unused(__VA_ARGS__) + +Word signExt(Word w, Size bit, Word mask); + +Word_u bytesToWord(const Byte *b, Size wordSize); +void wordToBytes(Byte *b, Word_u w, Size wordSize); +Word_u flagsToWord(bool r, bool w, bool x); +void wordToFlags(bool &r, bool &w, bool &x, Word_u f); + +Byte readByte(const std::vector &b, Size &n); +Word_u readWord(const std::vector &b, Size &n, Size wordSize); +void writeByte(std::vector &p, Size &n, Byte b); +void writeWord(std::vector &p, Size &n, Size wordSize, Word w); + +} \ No newline at end of file diff --git a/simX/warp.cpp b/simX/warp.cpp new file mode 100644 index 00000000..0e39d41f --- /dev/null +++ b/simX/warp.cpp @@ -0,0 +1,133 @@ +#include +#include +#include +#include + +#include "util.h" +#include "instr.h" +#include "core.h" + +using namespace vortex; + +Warp::Warp(Core *core, Word id) + : id_(id) + , core_(core) + , pc_(0x80000000) + , shadowPc_(0) + , activeThreads_(0) + , shadowActiveThreads_(0) + , shadowReg_(core_->arch().getNumRegs()) + , VLEN_(1024) + , interruptEnable_(true) + , shadowInterruptEnable_(false) + , supervisorMode_(true) + , shadowSupervisorMode_(false) + , spawned_(false) + , steps_(0) + , insts_(0) + , loads_(0) + , stores_(0) { + D(3, "Creating a new thread with PC: " << std::hex << pc_); + /* Build the register file. */ + Word regNum(0); + for (Word j = 0; j < core_->arch().getNumThreads(); ++j) { + regFile_.push_back(std::vector>(0)); + for (Word i = 0; i < core_->arch().getNumRegs(); ++i) { + regFile_[j].push_back(Reg(id, regNum++)); + } + + bool act = false; + if (j == 0) + act = true; + tmask_.push_back(act); + shadowTmask_.push_back(act); + } + + for (Word i = 0; i < (1 << 12); i++) { + csrs_.push_back(Reg(id, regNum++)); + } + + /* Set initial register contents. */ + regFile_[0][0] = (core_->arch().getNumThreads() << (core_->arch().getWordSize() * 8 / 2)) | id; +} + +void Warp::step(trace_inst_t *trace_inst) { + Size fetchPos(0); + Size decPos; + Size wordSize(core_->arch().getWordSize()); + std::vector fetchBuffer(wordSize); + + if (activeThreads_ == 0) + return; + + ++steps_; + + D(3, "current PC=0x" << std::hex << pc_); + + // std::cout << "pc: " << std::hex << pc << "\n"; + trace_inst->pc = pc_; + + /* Fetch and decode. */ + if (wordSize < sizeof(pc_)) + pc_ &= ((1ll << (wordSize * 8)) - 1); + + unsigned fetchSize = 4; + fetchBuffer.resize(fetchSize); + Word fetched = core_->mem().fetch(pc_ + fetchPos, supervisorMode_); + writeWord(fetchBuffer, fetchPos, fetchSize, fetched); + + decPos = 0; + std::shared_ptr instr = core_->decoder().decode(fetchBuffer, decPos, trace_inst); + + // Update pc + pc_ += decPos; + + // Execute + this->execute(*instr, trace_inst); + + // At Debug Level 3, print debug info after each instruction. + D(3, "Register state:"); + for (unsigned i = 0; i < regFile_[0].size(); ++i) { + D_RAW(" %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':'); + for (unsigned j = 0; j < (activeThreads_); ++j) + D_RAW(' ' << std::setfill('0') << std::setw(8) << std::hex << regFile_[j][i] << std::setfill(' ') << ' '); + D_RAW('(' << shadowReg_[i] << ')' << std::endl); + } + + DPH(3, "Thread mask:"); + for (unsigned i = 0; i < tmask_.size(); ++i) + DPN(3, " " << tmask_[i]); + DPN(3, "\n"); +} + +bool Warp::interrupt(Word r0) { + if (!interruptEnable_) + return false; + + shadowActiveThreads_ = activeThreads_; + shadowTmask_ = tmask_; + shadowInterruptEnable_ = interruptEnable_; /* For traps. */ + shadowSupervisorMode_ = supervisorMode_; + + for (Word i = 0; i < regFile_[0].size(); ++i) + shadowReg_[i] = regFile_[0][i]; + + for (Word i = 0; i < regFile_.size(); ++i) + tmask_[i] = 1; + + shadowPc_ = pc_; + activeThreads_ = 1; + interruptEnable_ = false; + supervisorMode_ = true; + regFile_[0][0] = r0; + pc_ = core_->interruptEntry(); + + return true; +} + +void Warp::printStats() const { + std::cout << "Steps : " << steps_ << std::endl + << "Insts : " << insts_ << std::endl + << "Loads : " << loads_ << std::endl + << "Stores: " << stores_ << std::endl; +} \ No newline at end of file diff --git a/simX/warp.h b/simX/warp.h new file mode 100644 index 00000000..ebf7e836 --- /dev/null +++ b/simX/warp.h @@ -0,0 +1,188 @@ +#ifndef __WARP_H +#define __WARP_H + +#include +#include +#include "types.h" + +namespace vortex { + +template +class Reg { +public: + Reg() + : value_(0), cpuId_(0), regNum_(0) {} + Reg(Word c, Word n) + : value_(0), cpuId_(c), regNum_(n) {} + Reg(Word c, Word n, T v) + : value_(v), cpuId_(c), regNum_(n) {} + + const T &value() const { + return value_; + } + + Reg &operator=(T r) { + if (regNum_) { + value_ = r; + doWrite(); + } + return *this; + } + + operator T() const { + doRead(); + return value_; + } + + void trunc(Size s) { + Word mask((~0ull >> (sizeof(Word) - s) * 8)); + value_ &= mask; + } + +private: + T value_; + Word cpuId_, regNum_; + + void doWrite() const {} + void doRead() const {} +}; + +/////////////////////////////////////////////////////////////////////////////// + +struct DomStackEntry { + DomStackEntry( + unsigned p, + const std::vector>> &m, + std::vector &tm, + Word pc + ) : pc(pc) + , fallThrough(false) + , uni(false) { + for (unsigned i = 0; i < m.size(); ++i) { + tmask.push_back(!bool(m[i][p]) && tm[i]); + } + } + + DomStackEntry(const std::vector &tmask) + : tmask(tmask), fallThrough(true), uni(false) {} + + std::vector tmask; + Word pc; + bool fallThrough; + bool uni; +}; + +struct vtype { + int vill; + int vediv; + int vsew; + int vlmul; +}; + +class Core; +class Instr; +class trace_inst_t; + +class Warp { +public: + Warp(Core *core, Word id = 0); + + void step(trace_inst_t *); + + bool interrupt(Word r0); + + bool running() const { + return (activeThreads_ != 0); + } + + void printStats() const; + + Core *core() { + return core_; + } + + Word id() const { + return id_; + } + + Word get_pc() const { + return pc_; + } + + void set_pc(Word pc) { + pc_ = pc; + } + + void setActiveThreads(Size activeThreads) { + activeThreads_ = activeThreads; + } + + Size getActiveThreads() const { + return activeThreads_; + } + + void setSpawned(bool spawned) { + spawned_ = spawned; + } + + void setSupervisorMode(bool supervisorMode) { + supervisorMode_ = supervisorMode; + } + + bool getSupervisorMode() const { + return supervisorMode_; + } + + void setTmask(size_t index, bool value) { + tmask_[index] = value; + } + +private: + + void execute(Instr &instr, trace_inst_t *); + + struct MemAccess { + MemAccess(bool w, Word a) + : wr(w), addr(a) {} + bool wr; + Word addr; + }; + + std::vector memAccesses_; + + Word id_; + Core *core_; + Word pc_; + Word shadowPc_; + Size activeThreads_; + Size shadowActiveThreads_; + std::vector>> regFile_; + std::vector> csrs_; + + std::vector tmask_; + std::vector shadowTmask_; + std::stack domStack_; + + std::vector shadowReg_; + + struct vtype vtype_; // both of them are XLEN WIDE + int vl_; // both of them are XLEN WIDE + Word VLEN_; // total vector length + + std::vector>> vregFile_; // 32 vector registers + + bool interruptEnable_; + bool shadowInterruptEnable_; + bool supervisorMode_; + bool shadowSupervisorMode_; + bool spawned_; + + unsigned long steps_; + unsigned long insts_; + unsigned long loads_; + unsigned long stores_; +}; + +} + +#endif \ No newline at end of file