simX refactoring + removed oldRTL + CSR updates
This commit is contained in:
@@ -6,7 +6,7 @@ set -e
|
||||
show_usage()
|
||||
{
|
||||
echo "Vortex BlackBox Test Driver v1.0"
|
||||
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
|
||||
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim|simx] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
|
||||
}
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
@@ -104,6 +104,10 @@ case $DRIVER in
|
||||
fpga)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=fpga
|
||||
;;
|
||||
simx)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/simx
|
||||
DRIVER_EXTRA=
|
||||
;;
|
||||
*)
|
||||
echo "invalid driver: $DRIVER"
|
||||
|
||||
@@ -25,28 +25,6 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_
|
||||
// get buffer address
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
|
||||
#if defined(USE_SIMX)
|
||||
// default startup routine
|
||||
((uint32_t*)buf_ptr)[0] = 0xf1401073;
|
||||
((uint32_t*)buf_ptr)[1] = 0xf1401073;
|
||||
((uint32_t*)buf_ptr)[2] = 0x30101073;
|
||||
((uint32_t*)buf_ptr)[3] = 0x800000b7;
|
||||
((uint32_t*)buf_ptr)[4] = 0x000080e7;
|
||||
err = vx_copy_to_dev(buffer, 0, 5 * 4, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
}
|
||||
|
||||
// newlib io simulator trap
|
||||
((uint32_t*)buf_ptr)[0] = 0x00008067;
|
||||
err = vx_copy_to_dev(buffer, 0x70000000, 4, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
//
|
||||
// upload content
|
||||
//
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized
|
||||
|
||||
CFLAGS += -fPIC -Wno-aligned-new -Wno-maybe-uninitialized
|
||||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
@@ -26,9 +25,7 @@ DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
|
||||
CFLAGS += $(CONFIGS)
|
||||
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
|
||||
@@ -1,53 +1,42 @@
|
||||
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
PROJECT = libvortex.so
|
||||
#PROJECT = libvortex.dylib
|
||||
|
||||
CFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized
|
||||
SIMX_DIR = ../../simX
|
||||
|
||||
CFLAGS += -I../../include -I../../../simX/include -I../../../hw
|
||||
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -fPIC
|
||||
CXXFLAGS += -fPIC -Wno-aligned-new -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR)
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
CFLAGS += -DUSE_SIMX
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
#LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
TOP = cache_simX
|
||||
|
||||
RTL_DIR = ../../hw/old_rtl
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||
SRCS += ../../simX/args.cpp ../../simX/mem.cpp ../../simX/core.cpp ../../simX/instruction.cpp ../../simX/enc.cpp ../../simX/util.cpp
|
||||
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/shared_memory
|
||||
RTL_INCLUDE += -I../../simX
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert
|
||||
VL_FLAGS += -Wno-DECLFILENAME
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/instr.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
CXXFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CFLAGS += -DNDEBUG
|
||||
CXXFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
PROJECT = libvortex.so
|
||||
#PROJECT = libvortex.dylib
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
@@ -142,16 +142,27 @@ public:
|
||||
private:
|
||||
|
||||
void run() {
|
||||
Harp::ArchDef arch("rv32i", NUM_WARPS, NUM_THREADS);
|
||||
Harp::WordDecoder dec(arch);
|
||||
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
|
||||
Harp::Core core(arch, dec, mu);
|
||||
vortex::ArchDef arch("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS);
|
||||
vortex::Decoder decoder(arch);
|
||||
vortex::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
|
||||
mu.attach(ram_, 0);
|
||||
|
||||
while (core.running()) {
|
||||
core.step();
|
||||
std::vector<std::shared_ptr<vortex::Core>> cores(NUM_CORES);
|
||||
for (size_t i = 0; i < NUM_CORES; ++i) {
|
||||
cores[i] = std::make_shared<vortex::Core>(arch, decoder, mu);
|
||||
}
|
||||
core.printStats();
|
||||
|
||||
bool running;
|
||||
|
||||
do {
|
||||
running = false;
|
||||
for (size_t i = 0; i < NUM_CORES; ++i) {
|
||||
if (!cores[i]->running())
|
||||
continue;
|
||||
running = true;
|
||||
cores[i]->step();
|
||||
}
|
||||
} while (running);
|
||||
}
|
||||
|
||||
void thread_proc() {
|
||||
@@ -190,7 +201,7 @@ private:
|
||||
bool is_running_;
|
||||
size_t mem_allocation_;
|
||||
std::thread thread_;
|
||||
Harp::RAM ram_;
|
||||
vortex::RAM ram_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,139 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_alu(
|
||||
input wire[31:0] in_1,
|
||||
input wire[31:0] in_2,
|
||||
input wire in_rs2_src,
|
||||
input wire[31:0] in_itype_immed,
|
||||
input wire[19:0] in_upper_immed,
|
||||
input wire[4:0] in_alu_op,
|
||||
input wire[31:0] in_curr_PC,
|
||||
output reg[31:0] out_alu_result
|
||||
);
|
||||
|
||||
|
||||
`ifdef SYN_FUNC
|
||||
wire which_in2;
|
||||
|
||||
wire[31:0] ALU_in1;
|
||||
wire[31:0] ALU_in2;
|
||||
wire[63:0] ALU_in1_mult;
|
||||
wire[63:0] ALU_in2_mult;
|
||||
wire[31:0] upper_immed;
|
||||
wire[31:0] div_result;
|
||||
wire[31:0] rem_result;
|
||||
|
||||
|
||||
assign which_in2 = in_rs2_src == `RS2_IMMED;
|
||||
|
||||
assign ALU_in1 = in_1;
|
||||
|
||||
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
|
||||
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
|
||||
|
||||
//always @(posedge `MUL) begin
|
||||
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
|
||||
|
||||
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
|
||||
wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2};
|
||||
assign ALU_in1_mult = (in_alu_op == `MULHU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in1} : alu_in1_signed;
|
||||
assign ALU_in2_mult = (in_alu_op == `MULHU || in_alu_op == `MULHSU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in2} : alu_in2_signed;
|
||||
wire[63:0] mult_result = ALU_in1_mult * ALU_in2_mult;
|
||||
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
always @(in_alu_op or ALU_in1 or ALU_in2) begin
|
||||
case(in_alu_op)
|
||||
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
|
||||
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
|
||||
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
|
||||
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
|
||||
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
|
||||
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
|
||||
`OR: out_alu_result = ALU_in1 | ALU_in2;
|
||||
`AND: out_alu_result = ALU_in2 & ALU_in1;
|
||||
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
|
||||
`LUI_ALU: out_alu_result = upper_immed;
|
||||
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
|
||||
`MUL: out_alu_result = mult_result[31:0];
|
||||
`MULH: out_alu_result = mult_result[63:32];
|
||||
`MULHSU: out_alu_result = mult_result[63:32];
|
||||
`MULHU: out_alu_result = mult_result[63:32];
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2));
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2));
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
|
||||
default: out_alu_result = 32'h0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
|
||||
`else
|
||||
wire which_in2;
|
||||
|
||||
wire[31:0] ALU_in1;
|
||||
wire[31:0] ALU_in2;
|
||||
wire[31:0] upper_immed;
|
||||
|
||||
|
||||
assign which_in2 = in_rs2_src == `RS2_IMMED;
|
||||
|
||||
assign ALU_in1 = in_1;
|
||||
|
||||
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
|
||||
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
|
||||
|
||||
// always @(*) begin
|
||||
// $display("EXECUTE CURR_PC: %h",in_curr_PC);
|
||||
// end
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[63:0] mult_unsigned_result = ALU_in1 * ALU_in2;
|
||||
wire[63:0] mult_signed_result = $signed(ALU_in1) * $signed(ALU_in2);
|
||||
|
||||
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
|
||||
|
||||
wire[63:0] mult_signed_un_result = alu_in1_signed * ALU_in2;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
always @(in_alu_op or ALU_in1 or ALU_in2) begin
|
||||
case(in_alu_op)
|
||||
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
|
||||
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
|
||||
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
|
||||
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
|
||||
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
|
||||
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
|
||||
`OR: out_alu_result = ALU_in1 | ALU_in2;
|
||||
`AND: out_alu_result = ALU_in2 & ALU_in1;
|
||||
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
|
||||
`LUI_ALU: out_alu_result = upper_immed;
|
||||
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
|
||||
`MUL: begin out_alu_result = mult_signed_result[31:0]; end
|
||||
`MULH: out_alu_result = mult_signed_result[63:32];
|
||||
`MULHSU: out_alu_result = mult_signed_un_result[63:32];
|
||||
`MULHU: out_alu_result = mult_unsigned_result[63:32];
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2));
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2));
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
|
||||
default: out_alu_result = 32'h0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule // VX_alu
|
||||
@@ -1,133 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_back_end (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire schedule_delay,
|
||||
|
||||
output wire out_mem_delay,
|
||||
output wire gpr_stage_delay,
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
|
||||
VX_warp_ctl_inter VX_warp_ctl,
|
||||
|
||||
VX_dcache_response_inter VX_dcache_rsp,
|
||||
VX_dcache_request_inter VX_dcache_req
|
||||
|
||||
);
|
||||
|
||||
|
||||
VX_wb_inter VX_writeback_temp();
|
||||
assign VX_writeback_inter.wb = VX_writeback_temp.wb;
|
||||
assign VX_writeback_inter.rd = VX_writeback_temp.rd;
|
||||
assign VX_writeback_inter.write_data = VX_writeback_temp.write_data;
|
||||
assign VX_writeback_inter.wb_valid = VX_writeback_temp.wb_valid;
|
||||
assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num;
|
||||
|
||||
// assign VX_writeback_inter(VX_writeback_temp);
|
||||
|
||||
|
||||
VX_mw_wb_inter VX_mw_wb();
|
||||
wire no_slot_mem;
|
||||
|
||||
|
||||
VX_mem_req_inter VX_exe_mem_req();
|
||||
VX_mem_req_inter VX_mem_req();
|
||||
|
||||
|
||||
|
||||
// LSU input + output
|
||||
VX_lsu_req_inter VX_lsu_req();
|
||||
VX_inst_mem_wb_inter VX_mem_wb();
|
||||
|
||||
// Exec unit input + output
|
||||
VX_exec_unit_req_inter VX_exec_unit_req();
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb();
|
||||
|
||||
|
||||
// GPU unit input
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req();
|
||||
|
||||
// CSR unit inputs
|
||||
VX_csr_req_inter VX_csr_req();
|
||||
VX_csr_wb_inter VX_csr_wb();
|
||||
wire no_slot_csr;
|
||||
wire stall_gpr_csr;
|
||||
|
||||
VX_gpr_stage VX_gpr_stage(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.VX_writeback_inter(VX_writeback_temp),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
// New
|
||||
.VX_exec_unit_req(VX_exec_unit_req),
|
||||
.VX_lsu_req (VX_lsu_req),
|
||||
.VX_gpu_inst_req (VX_gpu_inst_req),
|
||||
.VX_csr_req (VX_csr_req),
|
||||
.stall_gpr_csr (stall_gpr_csr),
|
||||
// End new
|
||||
.memory_delay (out_mem_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
|
||||
VX_lsu load_store_unit(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_lsu_req (VX_lsu_req),
|
||||
.VX_mem_wb (VX_mem_wb),
|
||||
.VX_dcache_rsp(VX_dcache_rsp),
|
||||
.VX_dcache_req(VX_dcache_req),
|
||||
.out_delay (out_mem_delay),
|
||||
.no_slot_mem (no_slot_mem)
|
||||
);
|
||||
|
||||
|
||||
VX_execute_unit VX_execUnit(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_exec_unit_req(VX_exec_unit_req),
|
||||
.VX_inst_exec_wb (VX_inst_exec_wb),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp)
|
||||
);
|
||||
|
||||
|
||||
VX_gpgpu_inst VX_gpgpu_inst(
|
||||
.VX_gpu_inst_req(VX_gpu_inst_req),
|
||||
.VX_warp_ctl (VX_warp_ctl)
|
||||
);
|
||||
|
||||
// VX_csr_wrapper VX_csr_wrapper(
|
||||
// .VX_csr_req(VX_csr_req),
|
||||
// .VX_csr_wb (VX_csr_wb)
|
||||
// );
|
||||
|
||||
VX_csr_pipe VX_csr_pipe(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.no_slot_csr (no_slot_csr),
|
||||
.VX_csr_req (VX_csr_req),
|
||||
.VX_writeback(VX_writeback_temp),
|
||||
.VX_csr_wb (VX_csr_wb),
|
||||
.stall_gpr_csr(stall_gpr_csr)
|
||||
);
|
||||
|
||||
VX_writeback VX_wb(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_mem_wb (VX_mem_wb),
|
||||
.VX_inst_exec_wb (VX_inst_exec_wb),
|
||||
.VX_csr_wb (VX_csr_wb),
|
||||
|
||||
.VX_writeback_inter(VX_writeback_temp),
|
||||
.no_slot_mem (no_slot_mem),
|
||||
.no_slot_csr (no_slot_csr)
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -1,22 +0,0 @@
|
||||
module VX_countones
|
||||
#(
|
||||
parameter N = 10
|
||||
)
|
||||
(
|
||||
|
||||
input wire[N-1:0] valids,
|
||||
output reg[$clog2(N):0] count
|
||||
|
||||
);
|
||||
|
||||
integer i;
|
||||
always @(*) begin
|
||||
count = 0;
|
||||
for (i = N-1; i >= 0; i = i - 1) begin
|
||||
if (valids[i]) begin
|
||||
count = count + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,82 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_csr_data (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
|
||||
input wire[11:0] in_read_csr_address,
|
||||
|
||||
input wire in_write_valid,
|
||||
input wire[31:0] in_write_csr_data,
|
||||
input wire[11:0] in_write_csr_address,
|
||||
|
||||
output wire[31:0] out_read_csr_data,
|
||||
|
||||
// For instruction retire counting
|
||||
input wire in_writeback_valid
|
||||
|
||||
);
|
||||
|
||||
|
||||
// wire[`NT_M1:0][31:0] thread_ids;
|
||||
// wire[`NT_M1:0][31:0] warp_ids;
|
||||
|
||||
// genvar cur_t;
|
||||
// for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
|
||||
// assign thread_ids[cur_t] = cur_t;
|
||||
// end
|
||||
|
||||
// genvar cur_tw;
|
||||
// for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
|
||||
// assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num};
|
||||
// end
|
||||
|
||||
reg[11:0] csr[1023:0];
|
||||
reg[63:0] cycle;
|
||||
reg[63:0] instret;
|
||||
|
||||
|
||||
wire read_cycle;
|
||||
wire read_cycleh;
|
||||
wire read_instret;
|
||||
wire read_instreth;
|
||||
|
||||
assign read_cycle = in_read_csr_address == 12'hC00;
|
||||
assign read_cycleh = in_read_csr_address == 12'hC80;
|
||||
assign read_instret = in_read_csr_address == 12'hC02;
|
||||
assign read_instreth = in_read_csr_address == 12'hC82;
|
||||
|
||||
// wire thread_select = in_read_csr_address == 12'h20;
|
||||
// wire warp_select = in_read_csr_address == 12'h21;
|
||||
|
||||
// assign out_read_csr_data = thread_select ? thread_ids :
|
||||
// warp_select ? warp_ids :
|
||||
// 0;
|
||||
|
||||
integer curr_e;
|
||||
always @(posedge clk or posedge reset) begin
|
||||
if (reset) begin
|
||||
for (curr_e = 0; curr_e < 1024; curr_e=curr_e+1) begin
|
||||
assign csr[curr_e] = 0;
|
||||
end
|
||||
cycle <= 0;
|
||||
instret <= 0;
|
||||
end else begin
|
||||
cycle <= cycle + 1;
|
||||
if (in_write_valid) begin
|
||||
csr[in_write_csr_address] <= in_write_csr_data[11:0];
|
||||
end
|
||||
if (in_writeback_valid) begin
|
||||
instret <= instret + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign out_read_csr_data = read_cycle ? cycle[31:0] :
|
||||
read_cycleh ? cycle[63:32] :
|
||||
read_instret ? instret[31:0] :
|
||||
read_instreth ? instret[63:32] :
|
||||
{{20{1'b0}}, csr[in_read_csr_address]};
|
||||
|
||||
endmodule
|
||||
@@ -1,84 +0,0 @@
|
||||
|
||||
|
||||
module VX_csr_handler (
|
||||
input wire clk,
|
||||
input wire[11:0] in_decode_csr_address, // done
|
||||
VX_csr_write_request_inter VX_csr_w_req,
|
||||
input wire in_wb_valid,
|
||||
output wire[31:0] out_decode_csr_data // done
|
||||
);
|
||||
|
||||
wire in_mem_is_csr;
|
||||
wire[11:0] in_mem_csr_address;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] in_mem_csr_result;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
|
||||
assign in_mem_is_csr = VX_csr_w_req.is_csr;
|
||||
assign in_mem_csr_address = VX_csr_w_req.csr_address;
|
||||
assign in_mem_csr_result = VX_csr_w_req.csr_result;
|
||||
|
||||
|
||||
reg[1024:0][11:0] csr;
|
||||
reg[63:0] cycle;
|
||||
reg[63:0] instret;
|
||||
reg[11:0] decode_csr_address;
|
||||
|
||||
|
||||
wire read_cycle;
|
||||
wire read_cycleh;
|
||||
wire read_instret;
|
||||
wire read_instreth;
|
||||
|
||||
initial begin
|
||||
cycle = 0;
|
||||
instret = 0;
|
||||
decode_csr_address = 0;
|
||||
end
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
cycle <= cycle + 1;
|
||||
decode_csr_address <= in_decode_csr_address;
|
||||
if (in_wb_valid) begin
|
||||
instret <= instret + 1;
|
||||
end
|
||||
end
|
||||
|
||||
reg[11:0] data_read;
|
||||
always @(posedge clk) begin
|
||||
if(in_mem_is_csr) begin
|
||||
csr[in_mem_csr_address] <= in_mem_csr_result[11:0];
|
||||
end
|
||||
end
|
||||
|
||||
assign data_read = csr[decode_csr_address];
|
||||
|
||||
|
||||
assign read_cycle = decode_csr_address == 12'hC00;
|
||||
assign read_cycleh = decode_csr_address == 12'hC80;
|
||||
assign read_instret = decode_csr_address == 12'hC02;
|
||||
assign read_instreth = decode_csr_address == 12'hC82;
|
||||
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign out_decode_csr_data = read_cycle ? cycle[31:0] :
|
||||
read_cycleh ? cycle[63:32] :
|
||||
read_instret ? instret[31:0] :
|
||||
read_instreth ? instret[63:32] :
|
||||
{{20{1'b0}}, data_read};
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
endmodule // VX_csr_handler
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,105 +0,0 @@
|
||||
|
||||
module VX_csr_pipe (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
input wire no_slot_csr,
|
||||
VX_csr_req_inter VX_csr_req,
|
||||
VX_wb_inter VX_writeback,
|
||||
VX_csr_wb_inter VX_csr_wb,
|
||||
output wire stall_gpr_csr
|
||||
|
||||
);
|
||||
|
||||
wire[`NT_M1:0] valid_s2;
|
||||
wire[`NW_M1:0] warp_num_s2;
|
||||
wire[4:0] rd_s2;
|
||||
wire[1:0] wb_s2;
|
||||
wire[4:0] alu_op_s2;
|
||||
wire is_csr_s2;
|
||||
wire[11:0] csr_address_s2;
|
||||
wire[31:0] csr_read_data_s2;
|
||||
wire[31:0] csr_updated_data_s2;
|
||||
|
||||
wire[31:0] csr_read_data_unqual;
|
||||
wire[31:0] csr_read_data;
|
||||
|
||||
assign stall_gpr_csr = no_slot_csr && VX_csr_req.is_csr && |(VX_csr_req.valid);
|
||||
|
||||
assign csr_read_data = (csr_address_s2 == VX_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
|
||||
wire writeback = |VX_writeback.wb_valid;
|
||||
VX_csr_data VX_csr_data(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_read_csr_address (VX_csr_req.csr_address),
|
||||
|
||||
.in_write_valid (is_csr_s2),
|
||||
.in_write_csr_data (csr_updated_data_s2),
|
||||
.in_write_csr_address(csr_address_s2),
|
||||
|
||||
.out_read_csr_data (csr_read_data_unqual),
|
||||
|
||||
.in_writeback_valid (writeback)
|
||||
);
|
||||
|
||||
|
||||
|
||||
reg[31:0] csr_updated_data;
|
||||
always @(*) begin
|
||||
case(VX_csr_req.alu_op)
|
||||
`CSR_ALU_RW: csr_updated_data = VX_csr_req.csr_mask;
|
||||
`CSR_ALU_RS: csr_updated_data = csr_read_data | VX_csr_req.csr_mask;
|
||||
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - VX_csr_req.csr_mask);
|
||||
default: csr_updated_data = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
VX_generic_register #(.N(`NT + `NW_M1 + 1 + 5 + 2 + 5 + 12 + 64)) csr_reg_s2 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(no_slot_csr),
|
||||
.flush(zero),
|
||||
.in ({VX_csr_req.valid, VX_csr_req.warp_num, VX_csr_req.rd, VX_csr_req.wb, VX_csr_req.is_csr, VX_csr_req.csr_address, csr_read_data , csr_updated_data }),
|
||||
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] final_csr_data;
|
||||
|
||||
wire[`NT_M1:0][31:0] thread_ids;
|
||||
wire[`NT_M1:0][31:0] warp_ids;
|
||||
wire[`NT_M1:0][31:0] csr_vec_read_data_s2;
|
||||
|
||||
genvar cur_t;
|
||||
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
|
||||
assign thread_ids[cur_t] = cur_t;
|
||||
end
|
||||
|
||||
genvar cur_tw;
|
||||
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2};
|
||||
end
|
||||
|
||||
genvar cur_v;
|
||||
for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin
|
||||
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
|
||||
end
|
||||
|
||||
wire thread_select = csr_address_s2 == 12'h20;
|
||||
wire warp_select = csr_address_s2 == 12'h21;
|
||||
|
||||
assign final_csr_data = thread_select ? thread_ids :
|
||||
warp_select ? warp_ids :
|
||||
csr_vec_read_data_s2;
|
||||
|
||||
|
||||
|
||||
assign VX_csr_wb.valid = valid_s2;
|
||||
assign VX_csr_wb.warp_num = warp_num_s2;
|
||||
assign VX_csr_wb.rd = rd_s2;
|
||||
assign VX_csr_wb.wb = wb_s2;
|
||||
assign VX_csr_wb.csr_result = final_csr_data;
|
||||
|
||||
endmodule
|
||||
@@ -1,38 +0,0 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_csr_wrapper (
|
||||
VX_csr_req_inter VX_csr_req,
|
||||
|
||||
VX_csr_wb_inter VX_csr_wb
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] thread_ids;
|
||||
wire[`NT_M1:0][31:0] warp_ids;
|
||||
|
||||
genvar cur_t;
|
||||
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
|
||||
assign thread_ids[cur_t] = cur_t;
|
||||
end
|
||||
|
||||
genvar cur_tw;
|
||||
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, VX_csr_req.warp_num};
|
||||
end
|
||||
|
||||
|
||||
assign VX_csr_wb.valid = VX_csr_req.valid;
|
||||
assign VX_csr_wb.warp_num = VX_csr_req.warp_num;
|
||||
assign VX_csr_wb.rd = VX_csr_req.rd;
|
||||
assign VX_csr_wb.wb = VX_csr_req.wb;
|
||||
|
||||
|
||||
wire thread_select = VX_csr_req.csr_address == 12'h20;
|
||||
wire warp_select = VX_csr_req.csr_address == 12'h21;
|
||||
|
||||
assign VX_csr_wb.csr_result = thread_select ? thread_ids :
|
||||
warp_select ? warp_ids :
|
||||
0;
|
||||
|
||||
endmodule
|
||||
@@ -1,361 +0,0 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_decode(
|
||||
// Fetch Inputs
|
||||
VX_inst_meta_inter fd_inst_meta_de,
|
||||
|
||||
// Outputs
|
||||
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req,
|
||||
VX_wstall_inter VX_wstall,
|
||||
VX_join_inter VX_join,
|
||||
|
||||
output wire terminate_sim
|
||||
|
||||
);
|
||||
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_M1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
|
||||
assign VX_frE_to_bckE_req.curr_PC = in_curr_PC;
|
||||
|
||||
wire[`NT_M1:0] in_valid = fd_inst_meta_de.valid;
|
||||
|
||||
wire[6:0] curr_opcode;
|
||||
|
||||
wire is_itype;
|
||||
wire is_rtype;
|
||||
wire is_stype;
|
||||
wire is_btype;
|
||||
wire is_linst;
|
||||
wire is_jal;
|
||||
wire is_jalr;
|
||||
wire is_lui;
|
||||
wire is_auipc;
|
||||
wire is_csr;
|
||||
wire is_csr_immed;
|
||||
wire is_e_inst;
|
||||
|
||||
wire is_gpgpu;
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
wire is_join;
|
||||
wire is_barrier;
|
||||
|
||||
wire[2:0] func3;
|
||||
wire[6:0] func7;
|
||||
wire[11:0] u_12;
|
||||
|
||||
|
||||
wire[7:0] jal_b_19_to_12;
|
||||
wire jal_b_11;
|
||||
wire[9:0] jal_b_10_to_1;
|
||||
wire jal_b_20;
|
||||
wire jal_b_0;
|
||||
wire[20:0] jal_unsigned_offset;
|
||||
wire[31:0] jal_1_offset;
|
||||
|
||||
wire[11:0] jalr_immed;
|
||||
wire[31:0] jal_2_offset;
|
||||
|
||||
wire jal_sys_cond1;
|
||||
wire jal_sys_cond2;
|
||||
wire jal_sys_jal;
|
||||
wire[31:0] jal_sys_off;
|
||||
|
||||
wire csr_cond1;
|
||||
wire csr_cond2;
|
||||
|
||||
wire[11:0] alu_tempp;
|
||||
wire alu_shift_i;
|
||||
wire[11:0] alu_shift_i_immed;
|
||||
|
||||
wire[1:0] csr_type;
|
||||
|
||||
reg[4:0] csr_alu;
|
||||
reg[4:0] alu_op;
|
||||
reg[4:0] mul_alu;
|
||||
reg[19:0] temp_upper_immed;
|
||||
reg temp_jal;
|
||||
reg[31:0] temp_jal_offset;
|
||||
reg[31:0] temp_itype_immed;
|
||||
reg[2:0] temp_branch_type;
|
||||
reg temp_branch_stall;
|
||||
|
||||
// always @(posedge reset) begin
|
||||
|
||||
// end
|
||||
|
||||
assign VX_frE_to_bckE_req.valid = fd_inst_meta_de.valid;
|
||||
|
||||
assign VX_frE_to_bckE_req.warp_num = in_warp_num;
|
||||
|
||||
|
||||
assign curr_opcode = in_instruction[6:0];
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.rd = in_instruction[11:7];
|
||||
assign VX_frE_to_bckE_req.rs1 = in_instruction[19:15];
|
||||
assign VX_frE_to_bckE_req.rs2 = in_instruction[24:20];
|
||||
assign func3 = in_instruction[14:12];
|
||||
assign func7 = in_instruction[31:25];
|
||||
assign u_12 = in_instruction[31:20];
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.PC_next = in_curr_PC + 32'h4;
|
||||
|
||||
|
||||
// Write Back sigal
|
||||
assign is_rtype = (curr_opcode == `R_INST);
|
||||
assign is_linst = (curr_opcode == `L_INST);
|
||||
assign is_itype = (curr_opcode == `ALU_INST) || is_linst;
|
||||
assign is_stype = (curr_opcode == `S_INST);
|
||||
assign is_btype = (curr_opcode == `B_INST);
|
||||
assign is_jal = (curr_opcode == `JAL_INST);
|
||||
assign is_jalr = (curr_opcode == `JALR_INST);
|
||||
assign is_lui = (curr_opcode == `LUI_INST);
|
||||
assign is_auipc = (curr_opcode == `AUIPC_INST);
|
||||
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
|
||||
assign is_csr_immed = (is_csr) && (func3[2] == 1);
|
||||
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
|
||||
assign is_e_inst = in_instruction == 32'h00000073;
|
||||
|
||||
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
|
||||
|
||||
assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE
|
||||
assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE
|
||||
assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE
|
||||
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
|
||||
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
|
||||
|
||||
|
||||
assign VX_join.is_join = is_join;
|
||||
assign VX_join.join_warp_num = in_warp_num;
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.is_wspawn = is_wspawn;
|
||||
assign VX_frE_to_bckE_req.is_tmc = is_tmc;
|
||||
assign VX_frE_to_bckE_req.is_split = is_split;
|
||||
assign VX_frE_to_bckE_req.is_barrier = is_barrier;
|
||||
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.csr_immed = is_csr_immed;
|
||||
assign VX_frE_to_bckE_req.is_csr = is_csr;
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
|
||||
is_linst ? `WB_MEM :
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
`NO_WB;
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
|
||||
|
||||
// MEM signals
|
||||
assign VX_frE_to_bckE_req.mem_read = (is_linst) ? func3 : `NO_MEM_READ;
|
||||
assign VX_frE_to_bckE_req.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE;
|
||||
|
||||
// UPPER IMMEDIATE
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`LUI_INST: temp_upper_immed = {func7, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.rs1, func3};
|
||||
`AUIPC_INST: temp_upper_immed = {func7, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.rs1, func3};
|
||||
default: temp_upper_immed = 20'h0;
|
||||
endcase // curr_opcode
|
||||
end
|
||||
|
||||
assign VX_frE_to_bckE_req.upper_immed = temp_upper_immed;
|
||||
|
||||
|
||||
assign jal_b_19_to_12 = in_instruction[19:12];
|
||||
assign jal_b_11 = in_instruction[20];
|
||||
assign jal_b_10_to_1 = in_instruction[30:21];
|
||||
assign jal_b_20 = in_instruction[31];
|
||||
assign jal_b_0 = 1'b0;
|
||||
assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0};
|
||||
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
|
||||
|
||||
|
||||
assign jalr_immed = {func7, VX_frE_to_bckE_req.rs2};
|
||||
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
|
||||
|
||||
|
||||
assign jal_sys_cond1 = func3 == 3'h0;
|
||||
assign jal_sys_cond2 = u_12 < 12'h2;
|
||||
|
||||
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
|
||||
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
|
||||
|
||||
// JAL
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal_offset = jal_1_offset;
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal_offset = jal_2_offset;
|
||||
end
|
||||
`SYS_INST:
|
||||
begin
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) );
|
||||
temp_jal = jal_sys_jal && (|in_valid);
|
||||
temp_jal_offset = jal_sys_off;
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_jal = 1'b0 && (|in_valid);
|
||||
temp_jal_offset = 32'hdeadbeef;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign VX_frE_to_bckE_req.jalQual = is_jal;
|
||||
assign VX_frE_to_bckE_req.jal = temp_jal;
|
||||
assign VX_frE_to_bckE_req.jal_offset = temp_jal_offset;
|
||||
|
||||
// wire is_ebreak;
|
||||
|
||||
|
||||
// assign is_ebreak = is_e_inst;
|
||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
|
||||
assign VX_frE_to_bckE_req.ebreak = ebreak;
|
||||
wire out_ebreak = ebreak;
|
||||
assign terminate_sim = is_e_inst;
|
||||
|
||||
|
||||
// CSR
|
||||
|
||||
assign csr_cond1 = func3 != 3'h0;
|
||||
assign csr_cond2 = u_12 >= 12'h2;
|
||||
|
||||
assign VX_frE_to_bckE_req.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
|
||||
|
||||
|
||||
// ITYPE IMEED
|
||||
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
assign alu_shift_i_immed = {{7{1'b0}}, VX_frE_to_bckE_req.rs2};
|
||||
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
|
||||
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
|
||||
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, VX_frE_to_bckE_req.rd};
|
||||
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
|
||||
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
|
||||
default: temp_itype_immed = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign VX_frE_to_bckE_req.itype_immed = temp_itype_immed;
|
||||
|
||||
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`B_INST:
|
||||
begin
|
||||
// $display("BRANCH IN DECODE");
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
case(func3)
|
||||
3'h0: temp_branch_type = `BEQ;
|
||||
3'h1: temp_branch_type = `BNE;
|
||||
3'h4: temp_branch_type = `BLT;
|
||||
3'h5: temp_branch_type = `BGT;
|
||||
3'h6: temp_branch_type = `BLTU;
|
||||
3'h7: temp_branch_type = `BGTU;
|
||||
default: temp_branch_type = `NO_BRANCH;
|
||||
endcase
|
||||
end
|
||||
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b0 && (|in_valid);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign VX_frE_to_bckE_req.branch_type = temp_branch_type;
|
||||
|
||||
assign VX_wstall.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
|
||||
assign VX_wstall.warp_num = in_warp_num;
|
||||
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
case(func3)
|
||||
3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB);
|
||||
3'h1: alu_op = `SLLA;
|
||||
3'h2: alu_op = `SLT;
|
||||
3'h3: alu_op = `SLTU;
|
||||
3'h4: alu_op = `XOR;
|
||||
3'h5: alu_op = (func7 == 7'h0) ? `SRL : `SRA;
|
||||
3'h6: alu_op = `OR;
|
||||
3'h7: alu_op = `AND;
|
||||
default: alu_op = `NO_ALU;
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
case(func3)
|
||||
3'h0: mul_alu = `MUL;
|
||||
3'h1: mul_alu = `MULH;
|
||||
3'h2: mul_alu = `MULHSU;
|
||||
3'h3: mul_alu = `MULHU;
|
||||
3'h4: mul_alu = `DIV;
|
||||
3'h5: mul_alu = `DIVU;
|
||||
3'h6: mul_alu = `REM;
|
||||
3'h7: mul_alu = `REMU;
|
||||
default: mul_alu = `NO_ALU;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign csr_type = func3[1:0];
|
||||
|
||||
always @(*) begin
|
||||
case(csr_type)
|
||||
2'h1: csr_alu = `CSR_ALU_RW;
|
||||
2'h2: csr_alu = `CSR_ALU_RS;
|
||||
2'h3: csr_alu = `CSR_ALU_RC;
|
||||
default: csr_alu = `NO_ALU;
|
||||
endcase
|
||||
end
|
||||
|
||||
wire[4:0] temp_final_alu;
|
||||
|
||||
assign temp_final_alu = is_btype ? ((VX_frE_to_bckE_req.branch_type < `BLTU) ? `SUB : `SUBU) :
|
||||
is_lui ? `LUI_ALU :
|
||||
is_auipc ? `AUIPC_ALU :
|
||||
is_csr ? csr_alu :
|
||||
(is_stype || is_linst) ? `ADD :
|
||||
alu_op;
|
||||
|
||||
assign VX_frE_to_bckE_req.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,269 +0,0 @@
|
||||
`include "./VX_define_synth.v"
|
||||
|
||||
|
||||
|
||||
`define NT_M1 (`NT-1)
|
||||
|
||||
// NW_M1 is actually log2(NW)
|
||||
`define NW_M1 (`CLOG2(`NW))
|
||||
|
||||
// Uncomment the below line if NW=1
|
||||
// `define ONLY
|
||||
|
||||
// `define SYN 1
|
||||
// `define ASIC 1
|
||||
// `define SYN_FUNC 1
|
||||
|
||||
`define NUM_BARRIERS 4
|
||||
|
||||
`define R_INST 7'd51
|
||||
`define L_INST 7'd3
|
||||
`define ALU_INST 7'd19
|
||||
`define S_INST 7'd35
|
||||
`define B_INST 7'd99
|
||||
`define LUI_INST 7'd55
|
||||
`define AUIPC_INST 7'd23
|
||||
`define JAL_INST 7'd111
|
||||
`define JALR_INST 7'd103
|
||||
`define SYS_INST 7'd115
|
||||
`define GPGPU_INST 7'h6b
|
||||
|
||||
|
||||
`define WB_ALU 2'h1
|
||||
`define WB_MEM 2'h2
|
||||
`define WB_JAL 2'h3
|
||||
`define NO_WB 2'h0
|
||||
|
||||
|
||||
`define RS2_IMMED 1
|
||||
`define RS2_REG 0
|
||||
|
||||
|
||||
`define NO_MEM_READ 3'h7
|
||||
`define LB_MEM_READ 3'h0
|
||||
`define LH_MEM_READ 3'h1
|
||||
`define LW_MEM_READ 3'h2
|
||||
`define LBU_MEM_READ 3'h4
|
||||
`define LHU_MEM_READ 3'h5
|
||||
|
||||
|
||||
`define NO_MEM_WRITE 3'h7
|
||||
`define SB_MEM_WRITE 3'h0
|
||||
`define SH_MEM_WRITE 3'h1
|
||||
`define SW_MEM_WRITE 3'h2
|
||||
|
||||
|
||||
`define NO_BRANCH 3'h0
|
||||
`define BEQ 3'h1
|
||||
`define BNE 3'h2
|
||||
`define BLT 3'h3
|
||||
`define BGT 3'h4
|
||||
`define BLTU 3'h5
|
||||
`define BGTU 3'h6
|
||||
|
||||
|
||||
`define NO_ALU 5'd15
|
||||
`define ADD 5'd0
|
||||
`define SUB 5'd1
|
||||
`define SLLA 5'd2
|
||||
`define SLT 5'd3
|
||||
`define SLTU 5'd4
|
||||
`define XOR 5'd5
|
||||
`define SRL 5'd6
|
||||
`define SRA 5'd7
|
||||
`define OR 5'd8
|
||||
`define AND 5'd9
|
||||
`define SUBU 5'd10
|
||||
`define LUI_ALU 5'd11
|
||||
`define AUIPC_ALU 5'd12
|
||||
`define CSR_ALU_RW 5'd13
|
||||
`define CSR_ALU_RS 5'd14
|
||||
`define CSR_ALU_RC 5'd15
|
||||
`define MUL 5'd16
|
||||
`define MULH 5'd17
|
||||
`define MULHSU 5'd18
|
||||
`define MULHU 5'd19
|
||||
`define DIV 5'd20
|
||||
`define DIVU 5'd21
|
||||
`define REM 5'd22
|
||||
`define REMU 5'd23
|
||||
|
||||
|
||||
|
||||
// WRITEBACK
|
||||
`define WB_ALU 2'h1
|
||||
`define WB_MEM 2'h2
|
||||
`define WB_JAL 2'h3
|
||||
`define NO_WB 2'h0
|
||||
|
||||
|
||||
// JAL
|
||||
`define JUMP 1'h1
|
||||
`define NO_JUMP 1'h0
|
||||
|
||||
// STALLS
|
||||
`define STALL 1'h1
|
||||
`define NO_STALL 1'h0
|
||||
|
||||
|
||||
`define TAKEN 1'b1
|
||||
`define NOT_TAKEN 1'b0
|
||||
|
||||
|
||||
`define ZERO_REG 5'h0
|
||||
|
||||
`define CLOG2(x) \
|
||||
(x <= 2) ? 1 : \
|
||||
(x <= 4) ? 2 : \
|
||||
(x <= 8) ? 3 : \
|
||||
(x <= 16) ? 4 : \
|
||||
(x <= 32) ? 5 : \
|
||||
(x <= 64) ? 6 : \
|
||||
(x <= 128) ? 7 : \
|
||||
(x <= 256) ? 8 : \
|
||||
(x <= 512) ? 9 : \
|
||||
(x <= 1024) ? 10 : \
|
||||
-199
|
||||
|
||||
|
||||
// `define PARAM
|
||||
|
||||
// oooooo
|
||||
|
||||
//Cache configurations
|
||||
//Cache configurations
|
||||
//Bytes
|
||||
`define ICACHE_SIZE 4096
|
||||
`define ICACHE_WAYS 2
|
||||
//Bytes
|
||||
`define ICACHE_BLOCK 64
|
||||
`define ICACHE_BANKS 4
|
||||
`define ICACHE_LOG_NUM_BANKS `CLOG2(`ICACHE_BANKS)
|
||||
|
||||
`define ICACHE_NUM_WORDS_PER_BLOCK (`ICACHE_BLOCK / (`ICACHE_BANKS * 4))
|
||||
`define ICACHE_NUM_REQ 1
|
||||
`define ICACHE_LOG_NUM_REQ `CLOG2(`ICACHE_NUM_REQ)
|
||||
|
||||
//set this to 1 if CACHE_WAYS is 1
|
||||
`define ICACHE_WAY_INDEX `CLOG2(`ICACHE_WAYS)
|
||||
//`define ICACHE_WAY_INDEX 1
|
||||
`define ICACHE_BLOCK_PER_BANK (`ICACHE_BLOCK / `ICACHE_BANKS)
|
||||
|
||||
// Offset
|
||||
`define ICACHE_OFFSET_NB (`CLOG2(`ICACHE_NUM_WORDS_PER_BLOCK))
|
||||
|
||||
`define ICACHE_ADDR_OFFSET_ST (2+$clog2(`ICACHE_BANKS))
|
||||
`define ICACHE_ADDR_OFFSET_ED (`ICACHE_ADDR_OFFSET_ST+(`ICACHE_OFFSET_NB)-1)
|
||||
|
||||
|
||||
`define ICACHE_ADDR_OFFSET_RNG `ICACHE_ADDR_OFFSET_ED:`ICACHE_ADDR_OFFSET_ST
|
||||
`define ICACHE_OFFSET_SIZE_RNG (`CLOG2(`ICACHE_NUM_WORDS_PER_BLOCK)-1):0
|
||||
`define ICACHE_OFFSET_ST 0
|
||||
`define ICACHE_OFFSET_ED ($clog2(`ICACHE_NUM_WORDS_PER_BLOCK)-1)
|
||||
|
||||
// Index
|
||||
// `define ICACHE_NUM_IND (`ICACHE_SIZE / (`ICACHE_WAYS * `ICACHE_BLOCK_PER_BANK))
|
||||
`define ICACHE_NUM_IND (`ICACHE_SIZE / (`ICACHE_WAYS * `ICACHE_BLOCK))
|
||||
`define ICACHE_IND_NB ($clog2(`ICACHE_NUM_IND))
|
||||
|
||||
`define ICACHE_IND_ST (`ICACHE_ADDR_OFFSET_ED+1)
|
||||
`define ICACHE_IND_ED (`ICACHE_IND_ST+`ICACHE_IND_NB-1)
|
||||
|
||||
`define ICACHE_ADDR_IND_RNG `ICACHE_IND_ED:`ICACHE_IND_ST
|
||||
`define ICACHE_IND_SIZE_RNG `ICACHE_IND_NB-1:0
|
||||
|
||||
`define ICACHE_IND_SIZE_START 0
|
||||
`define ICACHE_IND_SIZE_END `ICACHE_IND_NB-1
|
||||
|
||||
|
||||
// Tag
|
||||
`define ICACHE_ADDR_TAG_RNG 31:(`ICACHE_IND_ED+1)
|
||||
`define ICACHE_TAG_SIZE_RNG (32-(`ICACHE_IND_ED+1)-1):0
|
||||
`define ICACHE_TAG_SIZE_START 0
|
||||
`define ICACHE_TAG_SIZE_END (32-(`ICACHE_IND_ED+1)-1)
|
||||
`define ICACHE_ADDR_TAG_START (`ICACHE_IND_ED+1)
|
||||
`define ICACHE_ADDR_TAG_END 31
|
||||
|
||||
//Cache configurations
|
||||
//Bytes
|
||||
`define DCACHE_SIZE 4096
|
||||
`define DCACHE_WAYS 2
|
||||
|
||||
//Bytes
|
||||
`define DCACHE_BLOCK 64
|
||||
`define DCACHE_BANKS 4
|
||||
`define DCACHE_LOG_NUM_BANKS $clog2(`DCACHE_BANKS)
|
||||
`define DCACHE_NUM_WORDS_PER_BLOCK (`DCACHE_BLOCK / (`DCACHE_BANKS * 4))
|
||||
`define DCACHE_NUM_REQ `NT
|
||||
`define DCACHE_LOG_NUM_REQ $clog2(`DCACHE_NUM_REQ)
|
||||
|
||||
//set this to 1 if CACHE_WAYS is 1
|
||||
`define DCACHE_WAY_INDEX $clog2(`DCACHE_WAYS)
|
||||
//`define DCACHE_WAY_INDEX 1
|
||||
`define DCACHE_BLOCK_PER_BANK (`DCACHE_BLOCK / `DCACHE_BANKS)
|
||||
|
||||
// Offset
|
||||
`define DCACHE_OFFSET_NB ($clog2(`DCACHE_NUM_WORDS_PER_BLOCK))
|
||||
|
||||
`define DCACHE_ADDR_OFFSET_ST (2+$clog2(`DCACHE_BANKS))
|
||||
`define DCACHE_ADDR_OFFSET_ED (`DCACHE_ADDR_OFFSET_ST+(`DCACHE_OFFSET_NB)-1)
|
||||
|
||||
|
||||
`define DCACHE_ADDR_OFFSET_RNG `DCACHE_ADDR_OFFSET_ED:`DCACHE_ADDR_OFFSET_ST
|
||||
`define DCACHE_OFFSET_SIZE_RNG ($clog2(`DCACHE_NUM_WORDS_PER_BLOCK)-1):0
|
||||
`define DCACHE_OFFSET_ST 0
|
||||
`define DCACHE_OFFSET_ED ($clog2(`DCACHE_NUM_WORDS_PER_BLOCK)-1)
|
||||
|
||||
// Index
|
||||
// `define DCACHE_NUM_IND (`DCACHE_SIZE / (`DCACHE_WAYS * `DCACHE_BLOCK_PER_BANK))
|
||||
`define DCACHE_NUM_IND (`DCACHE_SIZE / (`DCACHE_WAYS * `DCACHE_BLOCK))
|
||||
`define DCACHE_IND_NB ($clog2(`DCACHE_NUM_IND))
|
||||
|
||||
`define DCACHE_IND_ST (`DCACHE_ADDR_OFFSET_ED+1)
|
||||
`define DCACHE_IND_ED (`DCACHE_IND_ST+`DCACHE_IND_NB-1)
|
||||
|
||||
`define DCACHE_ADDR_IND_RNG `DCACHE_IND_ED:`DCACHE_IND_ST
|
||||
`define DCACHE_IND_SIZE_RNG `DCACHE_IND_NB-1:0
|
||||
|
||||
`define DCACHE_IND_SIZE_START 0
|
||||
`define DCACHE_IND_SIZE_END `DCACHE_IND_NB-1
|
||||
|
||||
|
||||
// Tag
|
||||
`define DCACHE_ADDR_TAG_RNG 31:(`DCACHE_IND_ED+1)
|
||||
`define DCACHE_TAG_SIZE_RNG (32-(`DCACHE_IND_ED+1)-1):0
|
||||
`define DCACHE_TAG_SIZE_START 0
|
||||
`define DCACHE_TAG_SIZE_END (32-(`DCACHE_IND_ED+1)-1)
|
||||
`define DCACHE_ADDR_TAG_START (`DCACHE_IND_ED+1)
|
||||
`define DCACHE_ADDR_TAG_END 31
|
||||
|
||||
// Mask
|
||||
`define DCACHE_MEM_REQ_ADDR_MASK (32'hffffffff - (`DCACHE_BLOCK-1))
|
||||
`define ICACHE_MEM_REQ_ADDR_MASK (32'hffffffff - (`ICACHE_BLOCK-1))
|
||||
|
||||
|
||||
|
||||
///////
|
||||
|
||||
//`define SHARED_MEMORY_SIZE 4096
|
||||
`define SHARED_MEMORY_SIZE 8192
|
||||
`define SHARED_MEMORY_BANKS 4
|
||||
//`define SHARED_MEMORY_BYTES_PER_READ 16
|
||||
//`define SHARED_MEMORY_HEIGHT ((`SHARED_MEMORY_SIZE) / (`SHARED_MEMORY_BANKS * `SHARED_MEMORY_BYTES_PER_READ))
|
||||
|
||||
//`define SHARED_MEMORY_SIZE 16384
|
||||
//`define SHARED_MEMORY_BANKS 8
|
||||
`define SHARED_MEMORY_BYTES_PER_READ 16
|
||||
//`define SHARED_MEMORY_BITS_PER_BANK 3
|
||||
`define SHARED_MEMORY_BITS_PER_BANK `CLOG2(`SHARED_MEMORY_BANKS)
|
||||
`define SHARED_MEMORY_NUM_REQ `NT
|
||||
`define SHARED_MEMORY_WORDS_PER_READ (`SHARED_MEMORY_BYTES_PER_READ / 4)
|
||||
`define SHARED_MEMORY_LOG_WORDS_PER_READ $clog2(`SHARED_MEMORY_WORDS_PER_READ)
|
||||
`define SHARED_MEMORY_HEIGHT ((`SHARED_MEMORY_SIZE) / (`SHARED_MEMORY_BANKS * `SHARED_MEMORY_BYTES_PER_READ))
|
||||
|
||||
`define SHARED_MEMORY_BANK_OFFSET_ST (2)
|
||||
`define SHARED_MEMORY_BANK_OFFSET_ED (2+$clog2(`SHARED_MEMORY_BANKS)-1)
|
||||
`define SHARED_MEMORY_BLOCK_OFFSET_ST (`SHARED_MEMORY_BANK_OFFSET_ED + 1)
|
||||
`define SHARED_MEMORY_BLOCK_OFFSET_ED (`SHARED_MEMORY_BLOCK_OFFSET_ST +`SHARED_MEMORY_LOG_WORDS_PER_READ-1)
|
||||
`define SHARED_MEMORY_INDEX_OFFSET_ST (`SHARED_MEMORY_BLOCK_OFFSET_ED + 1)
|
||||
`define SHARED_MEMORY_INDEX_OFFSET_ED (`SHARED_MEMORY_INDEX_OFFSET_ST + $clog2(`SHARED_MEMORY_HEIGHT)-1)
|
||||
@@ -1,2 +0,0 @@
|
||||
`define NT 4
|
||||
`define NW 8
|
||||
@@ -1,188 +0,0 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_dmem_controller (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// MEM-RAM
|
||||
VX_dram_req_rsp_inter VX_dram_req_rsp,
|
||||
VX_dram_req_rsp_inter VX_dram_req_rsp_icache,
|
||||
// MEM-Processor
|
||||
VX_icache_request_inter VX_icache_req,
|
||||
VX_icache_response_inter VX_icache_rsp,
|
||||
VX_dcache_request_inter VX_dcache_req,
|
||||
VX_dcache_response_inter VX_dcache_rsp
|
||||
);
|
||||
|
||||
|
||||
wire to_shm = VX_dcache_req.out_cache_driver_in_address[0][31:24] == 8'hFF;
|
||||
|
||||
wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{to_shm}};
|
||||
wire[`NT_M1:0] cache_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{~to_shm}};
|
||||
|
||||
wire read_or_write = (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|cache_driver_in_valid);
|
||||
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] cache_driver_in_address = VX_dcache_req.out_cache_driver_in_address;
|
||||
wire[2:0] cache_driver_in_mem_read = !(|cache_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.out_cache_driver_in_mem_read;
|
||||
wire[2:0] cache_driver_in_mem_write = !(|cache_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.out_cache_driver_in_mem_write;
|
||||
wire[`NT_M1:0][31:0] cache_driver_in_data = VX_dcache_req.out_cache_driver_in_data;
|
||||
|
||||
|
||||
wire[2:0] sm_driver_in_mem_read = !(|sm_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.out_cache_driver_in_mem_read;
|
||||
wire[2:0] sm_driver_in_mem_write = !(|sm_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.out_cache_driver_in_mem_write;
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] cache_driver_out_data;
|
||||
wire[`NT_M1:0][31:0] sm_driver_out_data;
|
||||
wire[`NT_M1:0] cache_driver_out_valid; // Not used for now
|
||||
wire sm_delay;
|
||||
wire cache_delay;
|
||||
|
||||
|
||||
// I_Cache Signals
|
||||
|
||||
wire[31:0] icache_instruction_out;
|
||||
wire icache_delay;
|
||||
wire icache_driver_in_valid = VX_icache_req.out_cache_driver_in_valid;
|
||||
wire[31:0] icache_driver_in_address = VX_icache_req.pc_address;
|
||||
wire[2:0] icache_driver_in_mem_read = !(|icache_driver_in_valid) ? `NO_MEM_READ : VX_icache_req.out_cache_driver_in_mem_read;
|
||||
wire[2:0] icache_driver_in_mem_write = !(|icache_driver_in_valid) ? `NO_MEM_WRITE : VX_icache_req.out_cache_driver_in_mem_write;
|
||||
wire[31:0] icache_driver_in_data = VX_icache_req.out_cache_driver_in_data;
|
||||
wire read_or_write_ic = (VX_icache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|icache_driver_in_valid);
|
||||
|
||||
wire valid_read_cache = !cache_delay && cache_driver_in_valid[0];
|
||||
|
||||
|
||||
VX_shared_memory #(
|
||||
.SM_SIZE (`SHARED_MEMORY_SIZE),
|
||||
.SM_BANKS (`SHARED_MEMORY_BANKS),
|
||||
.SM_BYTES_PER_READ (`SHARED_MEMORY_BYTES_PER_READ),
|
||||
.SM_WORDS_PER_READ (`SHARED_MEMORY_WORDS_PER_READ),
|
||||
.SM_LOG_WORDS_PER_READ (`SHARED_MEMORY_LOG_WORDS_PER_READ),
|
||||
.SM_BANK_OFFSET_START (`SHARED_MEMORY_BANK_OFFSET_ST),
|
||||
.SM_BANK_OFFSET_END (`SHARED_MEMORY_BANK_OFFSET_ED),
|
||||
.SM_BLOCK_OFFSET_START (`SHARED_MEMORY_BLOCK_OFFSET_ST),
|
||||
.SM_BLOCK_OFFSET_END (`SHARED_MEMORY_BLOCK_OFFSET_ED),
|
||||
.SM_INDEX_START (`SHARED_MEMORY_INDEX_OFFSET_ST),
|
||||
.SM_INDEX_END (`SHARED_MEMORY_INDEX_OFFSET_ED),
|
||||
.SM_HEIGHT (`SHARED_MEMORY_HEIGHT),
|
||||
.NUM_REQ (`SHARED_MEMORY_NUM_REQ),
|
||||
.BITS_PER_BANK (`SHARED_MEMORY_BITS_PER_BANK)
|
||||
)
|
||||
shared_memory
|
||||
(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (sm_driver_in_valid),
|
||||
.in_address(cache_driver_in_address),
|
||||
.in_data (cache_driver_in_data),
|
||||
.mem_read (sm_driver_in_mem_read),
|
||||
.mem_write (sm_driver_in_mem_write),
|
||||
.out_valid (cache_driver_out_valid),
|
||||
.out_data (sm_driver_out_data),
|
||||
.stall (sm_delay)
|
||||
);
|
||||
|
||||
|
||||
VX_d_cache#(
|
||||
.CACHE_SIZE (`DCACHE_SIZE),
|
||||
.CACHE_WAYS (`DCACHE_WAYS),
|
||||
.CACHE_BLOCK (`DCACHE_BLOCK),
|
||||
.CACHE_BANKS (`DCACHE_BANKS),
|
||||
.LOG_NUM_BANKS (`DCACHE_LOG_NUM_BANKS),
|
||||
.NUM_REQ (`DCACHE_NUM_REQ),
|
||||
.LOG_NUM_REQ (`DCACHE_LOG_NUM_REQ),
|
||||
.NUM_IND (`DCACHE_NUM_IND),
|
||||
.CACHE_WAY_INDEX (`DCACHE_WAY_INDEX),
|
||||
.NUM_WORDS_PER_BLOCK (`DCACHE_NUM_WORDS_PER_BLOCK),
|
||||
.OFFSET_SIZE_START (`DCACHE_OFFSET_ST),
|
||||
.OFFSET_SIZE_END (`DCACHE_OFFSET_ED),
|
||||
.TAG_SIZE_START (`DCACHE_TAG_SIZE_START),
|
||||
.TAG_SIZE_END (`DCACHE_TAG_SIZE_END),
|
||||
.IND_SIZE_START (`DCACHE_IND_SIZE_START),
|
||||
.IND_SIZE_END (`DCACHE_IND_SIZE_END),
|
||||
.ADDR_TAG_START (`DCACHE_ADDR_TAG_START),
|
||||
.ADDR_TAG_END (`DCACHE_ADDR_TAG_END),
|
||||
.ADDR_OFFSET_START (`DCACHE_ADDR_OFFSET_ST),
|
||||
.ADDR_OFFSET_END (`DCACHE_ADDR_OFFSET_ED),
|
||||
.ADDR_IND_START (`DCACHE_IND_ST),
|
||||
.ADDR_IND_END (`DCACHE_IND_ED),
|
||||
.MEM_ADDR_REQ_MASK (`DCACHE_MEM_REQ_ADDR_MASK)
|
||||
)
|
||||
dcache
|
||||
(
|
||||
.clk (clk),
|
||||
.rst (reset),
|
||||
.i_p_valid (cache_driver_in_valid),
|
||||
.i_p_addr (cache_driver_in_address),
|
||||
.i_p_writedata (cache_driver_in_data),
|
||||
.i_p_read_or_write (read_or_write),
|
||||
.i_p_mem_read (cache_driver_in_mem_read),
|
||||
.i_p_mem_write (cache_driver_in_mem_write),
|
||||
.o_p_readdata (cache_driver_out_data),
|
||||
.o_p_delay (cache_delay),
|
||||
.o_m_evict_addr (VX_dram_req_rsp.o_m_evict_addr),
|
||||
.o_m_read_addr (VX_dram_req_rsp.o_m_read_addr),
|
||||
.o_m_valid (VX_dram_req_rsp.o_m_valid),
|
||||
.o_m_writedata (VX_dram_req_rsp.o_m_writedata),
|
||||
.o_m_read_or_write (VX_dram_req_rsp.o_m_read_or_write),
|
||||
.i_m_readdata (VX_dram_req_rsp.i_m_readdata),
|
||||
.i_m_ready (VX_dram_req_rsp.i_m_ready)
|
||||
);
|
||||
|
||||
|
||||
VX_d_cache#(
|
||||
.CACHE_SIZE (`ICACHE_SIZE),
|
||||
.CACHE_WAYS (`ICACHE_WAYS),
|
||||
.CACHE_BLOCK (`ICACHE_BLOCK),
|
||||
.CACHE_BANKS (`ICACHE_BANKS),
|
||||
.LOG_NUM_BANKS (`ICACHE_LOG_NUM_BANKS),
|
||||
.NUM_REQ (`ICACHE_NUM_REQ),
|
||||
.LOG_NUM_REQ (`ICACHE_LOG_NUM_REQ),
|
||||
.NUM_IND (`ICACHE_NUM_IND),
|
||||
.CACHE_WAY_INDEX (`ICACHE_WAY_INDEX),
|
||||
.NUM_WORDS_PER_BLOCK (`ICACHE_NUM_WORDS_PER_BLOCK),
|
||||
.OFFSET_SIZE_START (`ICACHE_OFFSET_ST),
|
||||
.OFFSET_SIZE_END (`ICACHE_OFFSET_ED),
|
||||
.TAG_SIZE_START (`ICACHE_TAG_SIZE_START),
|
||||
.TAG_SIZE_END (`ICACHE_TAG_SIZE_END),
|
||||
.IND_SIZE_START (`ICACHE_IND_SIZE_START),
|
||||
.IND_SIZE_END (`ICACHE_IND_SIZE_END),
|
||||
.ADDR_TAG_START (`ICACHE_ADDR_TAG_START),
|
||||
.ADDR_TAG_END (`ICACHE_ADDR_TAG_END),
|
||||
.ADDR_OFFSET_START (`ICACHE_ADDR_OFFSET_ST),
|
||||
.ADDR_OFFSET_END (`ICACHE_ADDR_OFFSET_ED),
|
||||
.ADDR_IND_START (`ICACHE_IND_ST),
|
||||
.ADDR_IND_END (`ICACHE_IND_ED),
|
||||
.MEM_ADDR_REQ_MASK (`ICACHE_MEM_REQ_ADDR_MASK)
|
||||
) icache
|
||||
(
|
||||
.clk (clk),
|
||||
.rst (reset),
|
||||
.i_p_valid (icache_driver_in_valid),
|
||||
.i_p_addr (icache_driver_in_address),
|
||||
.i_p_writedata (icache_driver_in_data),
|
||||
.i_p_read_or_write (read_or_write_ic),
|
||||
.i_p_mem_read (icache_driver_in_mem_read),
|
||||
.i_p_mem_write (icache_driver_in_mem_write),
|
||||
.o_p_readdata (icache_instruction_out),
|
||||
.o_p_delay (icache_delay),
|
||||
.o_m_evict_addr (VX_dram_req_rsp_icache.o_m_evict_addr),
|
||||
.o_m_read_addr (VX_dram_req_rsp_icache.o_m_read_addr),
|
||||
.o_m_valid (VX_dram_req_rsp_icache.o_m_valid),
|
||||
.o_m_writedata (VX_dram_req_rsp_icache.o_m_writedata),
|
||||
.o_m_read_or_write (VX_dram_req_rsp_icache.o_m_read_or_write),
|
||||
.i_m_readdata (VX_dram_req_rsp_icache.i_m_readdata),
|
||||
.i_m_ready (VX_dram_req_rsp_icache.i_m_ready)
|
||||
);
|
||||
|
||||
assign VX_dcache_rsp.in_cache_driver_out_data = to_shm ? sm_driver_out_data : cache_driver_out_data;
|
||||
assign VX_dcache_rsp.delay = sm_delay || cache_delay;
|
||||
|
||||
assign VX_icache_rsp.instruction = icache_instruction_out;
|
||||
assign VX_icache_rsp.delay = icache_delay;
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,168 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_execute_unit (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Request
|
||||
VX_exec_unit_req_inter VX_exec_unit_req,
|
||||
|
||||
// Output
|
||||
// Writeback
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb,
|
||||
// JAL Response
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
// Branch Response
|
||||
VX_branch_response_inter VX_branch_rsp
|
||||
);
|
||||
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] in_a_reg_data;
|
||||
wire[`NT_M1:0][31:0] in_b_reg_data;
|
||||
wire[4:0] in_alu_op;
|
||||
wire in_rs2_src;
|
||||
wire[31:0] in_itype_immed;
|
||||
wire[2:0] in_branch_type;
|
||||
wire[19:0] in_upper_immed;
|
||||
wire in_jal;
|
||||
wire[31:0] in_jal_offset;
|
||||
wire[31:0] in_curr_PC;
|
||||
|
||||
assign in_a_reg_data = VX_exec_unit_req.a_reg_data;
|
||||
assign in_b_reg_data = VX_exec_unit_req.b_reg_data;
|
||||
assign in_alu_op = VX_exec_unit_req.alu_op;
|
||||
assign in_rs2_src = VX_exec_unit_req.rs2_src;
|
||||
assign in_itype_immed = VX_exec_unit_req.itype_immed;
|
||||
assign in_branch_type = VX_exec_unit_req.branch_type;
|
||||
assign in_upper_immed = VX_exec_unit_req.upper_immed;
|
||||
assign in_jal = VX_exec_unit_req.jal;
|
||||
assign in_jal_offset = VX_exec_unit_req.jal_offset;
|
||||
assign in_curr_PC = VX_exec_unit_req.curr_PC;
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
genvar index_out_reg;
|
||||
generate
|
||||
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1)
|
||||
begin
|
||||
VX_alu vx_alu(
|
||||
// .in_reg_data (in_reg_data[1:0]),
|
||||
.in_1 (in_a_reg_data[index_out_reg]),
|
||||
.in_2 (in_b_reg_data[index_out_reg]),
|
||||
.in_rs2_src (in_rs2_src),
|
||||
.in_itype_immed(in_itype_immed),
|
||||
.in_upper_immed(in_upper_immed),
|
||||
.in_alu_op (in_alu_op),
|
||||
.in_curr_PC (in_curr_PC),
|
||||
.out_alu_result(alu_result[index_out_reg])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
wire [$clog2(`NT)-1:0] jal_branch_use_index;
|
||||
wire jal_branch_found_valid;
|
||||
VX_generic_priority_encoder #(.N(`NT)) choose_alu_result(
|
||||
.valids(VX_exec_unit_req.valid),
|
||||
.index (jal_branch_use_index),
|
||||
.found (jal_branch_found_valid)
|
||||
);
|
||||
|
||||
wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index];
|
||||
|
||||
reg temp_branch_dir;
|
||||
always @(*)
|
||||
begin
|
||||
case(VX_exec_unit_req.branch_type)
|
||||
`BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BLTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`NO_BRANCH: temp_branch_dir = `NOT_TAKEN;
|
||||
default: temp_branch_dir = `NOT_TAKEN;
|
||||
endcase // in_branch_type
|
||||
end
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] duplicate_PC_data;
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NT; i=i+1)
|
||||
begin
|
||||
assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
// VX_inst_exec_wb_inter VX_inst_exec_wb_temp();
|
||||
// JAL Response
|
||||
VX_jal_response_inter VX_jal_rsp_temp();
|
||||
// Branch Response
|
||||
VX_branch_response_inter VX_branch_rsp_temp();
|
||||
|
||||
// Actual Writeback
|
||||
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
|
||||
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
|
||||
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid;
|
||||
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
|
||||
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
|
||||
|
||||
assign VX_inst_exec_wb.exec_wb_pc = in_curr_PC;
|
||||
// Jal rsp
|
||||
assign VX_jal_rsp_temp.jal = in_jal;
|
||||
assign VX_jal_rsp_temp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
|
||||
assign VX_jal_rsp_temp.jal_warp_num = VX_exec_unit_req.warp_num;
|
||||
|
||||
// Branch rsp
|
||||
assign VX_branch_rsp_temp.valid_branch = (VX_exec_unit_req.branch_type != `NO_BRANCH) && (|VX_exec_unit_req.valid);
|
||||
assign VX_branch_rsp_temp.branch_dir = temp_branch_dir;
|
||||
assign VX_branch_rsp_temp.branch_warp_num = VX_exec_unit_req.warp_num;
|
||||
assign VX_branch_rsp_temp.branch_dest = $signed(VX_exec_unit_req.curr_PC) + ($signed(VX_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset
|
||||
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
// VX_generic_register #(.N(174)) exec_reg(
|
||||
// .clk (clk),
|
||||
// .reset(reset),
|
||||
// .stall(zero),
|
||||
// .flush(zero),
|
||||
// .in ({VX_inst_exec_wb_temp.rd, VX_inst_exec_wb_temp.wb, VX_inst_exec_wb_temp.wb_valid, VX_inst_exec_wb_temp.wb_warp_num, VX_inst_exec_wb_temp.alu_result, VX_inst_exec_wb_temp.exec_wb_pc}),
|
||||
// .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc })
|
||||
// );
|
||||
|
||||
VX_generic_register #(.N(33 + `NW_M1 + 1)) jal_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zero),
|
||||
.flush(zero),
|
||||
.in ({VX_jal_rsp_temp.jal, VX_jal_rsp_temp.jal_dest, VX_jal_rsp_temp.jal_warp_num}),
|
||||
.out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num})
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(34 + `NW_M1 + 1)) branch_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zero),
|
||||
.flush(zero),
|
||||
.in ({VX_branch_rsp_temp.valid_branch, VX_branch_rsp_temp.branch_dir, VX_branch_rsp_temp.branch_warp_num, VX_branch_rsp_temp.branch_dest}),
|
||||
.out ({VX_branch_rsp.valid_branch , VX_branch_rsp.branch_dir , VX_branch_rsp.branch_warp_num , VX_branch_rsp.branch_dest })
|
||||
);
|
||||
|
||||
// always @(*) begin
|
||||
// case(in_alu_op)
|
||||
// `CSR_ALU_RW: out_csr_result = in_csr_mask;
|
||||
// `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
|
||||
// `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
|
||||
// default: out_csr_result = 32'hdeadbeef;
|
||||
// endcase
|
||||
|
||||
// end
|
||||
|
||||
|
||||
// assign out_is_csr = VX_exec_unit_req.is_csr;
|
||||
// assign out_csr_address = VX_exec_unit_req.csr_address;
|
||||
|
||||
endmodule
|
||||
@@ -1,103 +0,0 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_fetch (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_wstall_inter VX_wstall,
|
||||
VX_join_inter VX_join,
|
||||
input wire schedule_delay,
|
||||
VX_icache_response_inter icache_response,
|
||||
VX_icache_request_inter icache_request,
|
||||
|
||||
output wire out_ebreak,
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
VX_inst_meta_inter fe_inst_meta_fd,
|
||||
VX_warp_ctl_inter VX_warp_ctl
|
||||
);
|
||||
|
||||
// Locals
|
||||
wire pipe_stall;
|
||||
|
||||
|
||||
assign pipe_stall = schedule_delay || icache_response.delay;
|
||||
|
||||
wire[`NT_M1:0] thread_mask;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[31:0] warp_pc;
|
||||
wire scheduled_warp;
|
||||
VX_warp_scheduler warp_scheduler(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipe_stall),
|
||||
|
||||
.is_barrier (VX_warp_ctl.is_barrier),
|
||||
.barrier_id (VX_warp_ctl.barrier_id),
|
||||
.num_warps (VX_warp_ctl.num_warps),
|
||||
.barrier_warp_num (VX_warp_ctl.warp_num),
|
||||
|
||||
// Wspawn
|
||||
.wspawn (VX_warp_ctl.wspawn),
|
||||
.wsapwn_pc (VX_warp_ctl.wspawn_pc),
|
||||
.wspawn_new_active(VX_warp_ctl.wspawn_new_active),
|
||||
// CTM
|
||||
.ctm (VX_warp_ctl.change_mask),
|
||||
.ctm_mask (VX_warp_ctl.thread_mask),
|
||||
.ctm_warp_num (VX_warp_ctl.warp_num),
|
||||
// WHALT
|
||||
.whalt (VX_warp_ctl.ebreak),
|
||||
.whalt_warp_num (VX_warp_ctl.warp_num),
|
||||
// Wstall
|
||||
.wstall (VX_wstall.wstall),
|
||||
.wstall_warp_num (VX_wstall.warp_num),
|
||||
|
||||
// Join
|
||||
.is_join (VX_join.is_join),
|
||||
.join_warp_num (VX_join.join_warp_num),
|
||||
|
||||
// Split
|
||||
.is_split (VX_warp_ctl.is_split),
|
||||
.dont_split (VX_warp_ctl.dont_split),
|
||||
.split_new_mask (VX_warp_ctl.split_new_mask),
|
||||
.split_later_mask (VX_warp_ctl.split_later_mask),
|
||||
.split_save_pc (VX_warp_ctl.split_save_pc),
|
||||
.split_warp_num (VX_warp_ctl.warp_num),
|
||||
|
||||
// JAL
|
||||
.jal (VX_jal_rsp.jal),
|
||||
.jal_dest (VX_jal_rsp.jal_dest),
|
||||
.jal_warp_num (VX_jal_rsp.jal_warp_num),
|
||||
|
||||
// Branch
|
||||
.branch_valid (VX_branch_rsp.valid_branch),
|
||||
.branch_dir (VX_branch_rsp.branch_dir),
|
||||
.branch_dest (VX_branch_rsp.branch_dest),
|
||||
.branch_warp_num (VX_branch_rsp.branch_warp_num),
|
||||
|
||||
// Outputs
|
||||
.thread_mask (thread_mask),
|
||||
.warp_num (warp_num),
|
||||
.warp_pc (warp_pc),
|
||||
.out_ebreak (out_ebreak),
|
||||
.scheduled_warp (scheduled_warp)
|
||||
);
|
||||
|
||||
// always @(*) begin
|
||||
// $display("Inside verilog instr: %h, pc: %h", icache_response.instruction, warp_pc);
|
||||
// end
|
||||
|
||||
assign icache_request.pc_address = warp_pc;
|
||||
assign icache_request.out_cache_driver_in_valid = !schedule_delay && scheduled_warp;
|
||||
assign icache_request.out_cache_driver_in_mem_read = `LW_MEM_READ;
|
||||
assign icache_request.out_cache_driver_in_mem_write = `NO_MEM_WRITE;
|
||||
assign icache_request.out_cache_driver_in_data = 32'b0;
|
||||
|
||||
assign fe_inst_meta_fd.warp_num = warp_num;
|
||||
assign fe_inst_meta_fd.valid = thread_mask;
|
||||
|
||||
assign fe_inst_meta_fd.instruction = (thread_mask == 0) ? 32'b0 : icache_response.instruction;
|
||||
assign fe_inst_meta_fd.inst_pc = warp_pc;
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,89 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_front_end (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire schedule_delay,
|
||||
|
||||
VX_warp_ctl_inter VX_warp_ctl,
|
||||
|
||||
VX_icache_response_inter icache_response_fe,
|
||||
VX_icache_request_inter icache_request_fe,
|
||||
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
|
||||
output wire fetch_ebreak
|
||||
);
|
||||
|
||||
|
||||
VX_inst_meta_inter fe_inst_meta_fd();
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req();
|
||||
VX_inst_meta_inter fd_inst_meta_de();
|
||||
|
||||
wire total_freeze = schedule_delay;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
// wire real_fetch_ebreak;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
wire vortex_ebreak;
|
||||
wire terminate_sim;
|
||||
|
||||
assign fetch_ebreak = vortex_ebreak || terminate_sim;
|
||||
|
||||
|
||||
VX_wstall_inter VX_wstall();
|
||||
VX_join_inter VX_join();
|
||||
|
||||
VX_fetch vx_fetch(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_wstall (VX_wstall),
|
||||
.VX_join (VX_join),
|
||||
.schedule_delay (schedule_delay),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.icache_response (icache_response_fe),
|
||||
.VX_warp_ctl (VX_warp_ctl),
|
||||
|
||||
.icache_request (icache_request_fe),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.out_ebreak (vortex_ebreak), // fetch_ebreak
|
||||
.fe_inst_meta_fd (fe_inst_meta_fd)
|
||||
);
|
||||
|
||||
VX_f_d_reg vx_f_d_reg(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_freeze (total_freeze),
|
||||
.fe_inst_meta_fd(fe_inst_meta_fd),
|
||||
.fd_inst_meta_de(fd_inst_meta_de)
|
||||
);
|
||||
|
||||
|
||||
VX_decode vx_decode(
|
||||
.fd_inst_meta_de (fd_inst_meta_de),
|
||||
.VX_frE_to_bckE_req(VX_frE_to_bckE_req),
|
||||
.VX_wstall (VX_wstall),
|
||||
.VX_join (VX_join),
|
||||
.terminate_sim (terminate_sim)
|
||||
);
|
||||
|
||||
wire no_br_stall = 0;
|
||||
|
||||
VX_d_e_reg vx_d_e_reg(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_branch_stall(no_br_stall),
|
||||
.in_freeze (total_freeze),
|
||||
.VX_frE_to_bckE_req(VX_frE_to_bckE_req),
|
||||
.VX_bckE_req (VX_bckE_req)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_generic_priority_encoder
|
||||
#(
|
||||
parameter N = 1
|
||||
)
|
||||
(
|
||||
input wire[N-1:0] valids,
|
||||
//output reg[$clog2(N)-1:0] index,
|
||||
output reg[(`CLOG2(N))-1:0] index,
|
||||
//output reg[`CLOG2(N):0] index, // eh
|
||||
output reg found
|
||||
);
|
||||
|
||||
integer i;
|
||||
always @(*) begin
|
||||
index = 0;
|
||||
found = 0;
|
||||
for (i = N-1; i >= 0; i = i - 1) begin
|
||||
if (valids[i]) begin
|
||||
//index = i[$clog2(N)-1:0];
|
||||
index = i[(`CLOG2(N))-1:0];
|
||||
found = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
@@ -1,34 +0,0 @@
|
||||
|
||||
|
||||
module VX_generic_register
|
||||
#(
|
||||
parameter N = 1
|
||||
)
|
||||
(
|
||||
input clk,
|
||||
input reset,
|
||||
input stall,
|
||||
input flush,
|
||||
input[N-1:0] in,
|
||||
output [N-1:0] out
|
||||
);
|
||||
|
||||
|
||||
reg[N-1:0] value;
|
||||
|
||||
|
||||
|
||||
always @(posedge clk or posedge reset) begin
|
||||
if (reset) begin
|
||||
value <= 0;
|
||||
end else if (flush) begin
|
||||
value <= 0;
|
||||
end else if (~stall) begin
|
||||
value <= in;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign out = value;
|
||||
|
||||
endmodule
|
||||
@@ -1,38 +0,0 @@
|
||||
module VX_generic_stack
|
||||
#(
|
||||
parameter WIDTH = 40,
|
||||
parameter DEPTH = 2
|
||||
)
|
||||
(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
input reg [WIDTH - 1:0] q1,
|
||||
input reg [WIDTH - 1:0] q2,
|
||||
output wire[WIDTH - 1:0] d
|
||||
);
|
||||
|
||||
|
||||
reg [DEPTH - 1:0] ptr;
|
||||
reg [WIDTH - 1:0] stack [0:(1 << DEPTH) - 1];
|
||||
|
||||
integer i;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
ptr <= 0;
|
||||
for (i = 0; i < (1 << DEPTH); i=i+1) stack[i] <= 0;
|
||||
end else if (push) begin
|
||||
stack[ptr] <= q1;
|
||||
stack[ptr+1] <= q2;
|
||||
ptr <= ptr + 2;
|
||||
end else if (pop) begin
|
||||
ptr <= ptr - 1;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
assign d = stack[ptr - 1];
|
||||
|
||||
endmodule
|
||||
@@ -1,85 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_gpgpu_inst (
|
||||
// Input
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req,
|
||||
|
||||
// Output
|
||||
VX_warp_ctl_inter VX_warp_ctl
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0] curr_valids = VX_gpu_inst_req.valid;
|
||||
wire is_split = (VX_gpu_inst_req.is_split);
|
||||
|
||||
wire[`NT_M1:0] tmc_new_mask;
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
|
||||
begin
|
||||
assign tmc_new_mask[curr_t] = curr_t < VX_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
|
||||
wire valid_inst = (|curr_valids);
|
||||
|
||||
assign VX_warp_ctl.warp_num = VX_gpu_inst_req.warp_num;
|
||||
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc) && valid_inst;
|
||||
assign VX_warp_ctl.thread_mask = VX_gpu_inst_req.is_tmc ? tmc_new_mask : 0;
|
||||
|
||||
// assign VX_warp_ctl.ebreak = (VX_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
|
||||
assign VX_warp_ctl.ebreak = VX_warp_ctl.change_mask && (VX_warp_ctl.thread_mask == 0);
|
||||
|
||||
|
||||
wire wspawn = VX_gpu_inst_req.is_wspawn;
|
||||
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
|
||||
wire[`NW-1:0] wspawn_new_active;
|
||||
genvar curr_w;
|
||||
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1)
|
||||
begin
|
||||
assign wspawn_new_active[curr_w] = curr_w < VX_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
|
||||
|
||||
assign VX_warp_ctl.is_barrier = VX_gpu_inst_req.is_barrier && valid_inst;
|
||||
assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0];
|
||||
|
||||
wire[31:0] num_warps_m1 = VX_gpu_inst_req.rd2 - 1;
|
||||
assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NW):0];
|
||||
|
||||
assign VX_warp_ctl.wspawn = wspawn;
|
||||
assign VX_warp_ctl.wspawn_pc = wspawn_pc;
|
||||
assign VX_warp_ctl.wspawn_new_active = wspawn_new_active;
|
||||
|
||||
wire[`NT_M1:0] split_new_use_mask;
|
||||
wire[`NT_M1:0] split_new_later_mask;
|
||||
|
||||
// VX_gpu_inst_req.pc
|
||||
genvar curr_s_t;
|
||||
for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin
|
||||
wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
|
||||
|
||||
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
|
||||
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
|
||||
end
|
||||
|
||||
wire[$clog2(`NT):0] num_valids;
|
||||
|
||||
VX_countones #(.N(`NT)) valids_counter (
|
||||
.valids(curr_valids),
|
||||
.count (num_valids)
|
||||
);
|
||||
|
||||
// wire[`NW_M1:0] num_valids = $countones(curr_valids);
|
||||
|
||||
|
||||
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
|
||||
assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NT{1'b1}}));
|
||||
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
|
||||
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
|
||||
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;
|
||||
assign VX_warp_ctl.split_warp_num = VX_gpu_inst_req.warp_num;
|
||||
|
||||
// VX_gpu_inst_req.is_wspawn
|
||||
// VX_gpu_inst_req.is_split
|
||||
// VX_gpu_inst_req.is_barrier
|
||||
|
||||
endmodule
|
||||
@@ -1,172 +0,0 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_gpr (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_write_request,
|
||||
VX_gpr_read_inter VX_gpr_read,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
|
||||
output reg[`NT_M1:0][31:0] out_a_reg_data,
|
||||
output reg[`NT_M1:0][31:0] out_b_reg_data
|
||||
);
|
||||
|
||||
|
||||
|
||||
wire write_enable;
|
||||
|
||||
|
||||
`ifndef ASIC
|
||||
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0)) && (VX_writeback_inter.rd != 0);
|
||||
|
||||
byte_enabled_simple_dual_port_ram first_ram(
|
||||
.we (write_enable),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.waddr (VX_writeback_inter.rd),
|
||||
.raddr1(VX_gpr_read.rs1),
|
||||
.raddr2(VX_gpr_read.rs2),
|
||||
.be (VX_writeback_inter.wb_valid),
|
||||
.wdata (VX_writeback_inter.write_data),
|
||||
.q1 (out_a_reg_data),
|
||||
.q2 (out_b_reg_data)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0));
|
||||
|
||||
|
||||
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] write_bit_mask;
|
||||
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
|
||||
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
|
||||
assign write_bit_mask[curr_t] = {32{~local_write}};
|
||||
end
|
||||
|
||||
|
||||
|
||||
// wire cenb = !going_to_write;
|
||||
wire cenb = 0;
|
||||
|
||||
// wire cena_1 = (VX_gpr_read.rs1 == 0);
|
||||
// wire cena_2 = (VX_gpr_read.rs2 == 0);
|
||||
wire cena_1 = 0;
|
||||
wire cena_2 = 0;
|
||||
|
||||
wire[`NT_M1:0][31:0] temp_a;
|
||||
wire[`NT_M1:0][31:0] temp_b;
|
||||
|
||||
|
||||
`ifndef SYN
|
||||
genvar thread;
|
||||
genvar curr_bit;
|
||||
for (thread = 0; thread < `NT; thread = thread + 1)
|
||||
begin
|
||||
for (curr_bit = 0; curr_bit < 32; curr_bit=curr_bit+1)
|
||||
begin
|
||||
assign out_a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
|
||||
assign out_b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
|
||||
end
|
||||
end
|
||||
|
||||
`else
|
||||
|
||||
assign out_a_reg_data = temp_a;
|
||||
assign out_b_reg_data = temp_b;
|
||||
|
||||
`endif
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
|
||||
|
||||
genvar curr_base_thread;
|
||||
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
|
||||
begin
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(VX_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(VX_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,223 +0,0 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_gpr_stage (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire schedule_delay,
|
||||
|
||||
input wire memory_delay,
|
||||
input wire stall_gpr_csr,
|
||||
output wire gpr_stage_delay,
|
||||
|
||||
// inputs
|
||||
// Instruction Information
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
|
||||
// WriteBack inputs
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
|
||||
|
||||
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_inter VX_exec_unit_req,
|
||||
VX_lsu_req_inter VX_lsu_req,
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req,
|
||||
VX_csr_req_inter VX_csr_req
|
||||
);
|
||||
|
||||
|
||||
wire[31:0] curr_PC = VX_bckE_req.curr_PC;
|
||||
wire[2:0] branchType = VX_bckE_req.branch_type;
|
||||
|
||||
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
|
||||
|
||||
wire jalQual = VX_bckE_req.jalQual;
|
||||
|
||||
VX_gpr_read_inter VX_gpr_read();
|
||||
assign VX_gpr_read.rs1 = VX_bckE_req.rs1;
|
||||
assign VX_gpr_read.rs2 = VX_bckE_req.rs2;
|
||||
assign VX_gpr_read.warp_num = VX_bckE_req.warp_num;
|
||||
|
||||
`ifndef ASIC
|
||||
VX_gpr_jal_inter VX_gpr_jal();
|
||||
assign VX_gpr_jal.is_jal = VX_bckE_req.jalQual;
|
||||
assign VX_gpr_jal.curr_PC = VX_bckE_req.curr_PC;
|
||||
`else
|
||||
VX_gpr_jal_inter VX_gpr_jal();
|
||||
assign VX_gpr_jal.is_jal = VX_exec_unit_req.jalQual;
|
||||
assign VX_gpr_jal.curr_PC = VX_exec_unit_req.curr_PC;
|
||||
`endif
|
||||
|
||||
|
||||
VX_gpr_data_inter VX_gpr_datf();
|
||||
|
||||
|
||||
VX_gpr_wrapper vx_grp_wrapper(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_writeback_inter(VX_writeback_inter),
|
||||
.VX_gpr_read (VX_gpr_read),
|
||||
.VX_gpr_jal (VX_gpr_jal),
|
||||
|
||||
.out_a_reg_data (VX_gpr_datf.a_reg_data),
|
||||
.out_b_reg_data (VX_gpr_datf.b_reg_data)
|
||||
);
|
||||
|
||||
// assign VX_bckE_req.is_csr = is_csr;
|
||||
// assign VX_bckE_req_out.csr_mask = (VX_bckE_req.sr_immed == 1'b1) ? {27'h0, VX_bckE_req.rs1} : VX_gpr_data.a_reg_data[0];
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_inter VX_exec_unit_req_temp();
|
||||
VX_lsu_req_inter VX_lsu_req_temp();
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req_temp();
|
||||
VX_csr_req_inter VX_csr_req_temp();
|
||||
|
||||
VX_inst_multiplex VX_inst_mult(
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.VX_gpr_data (VX_gpr_datf),
|
||||
.VX_exec_unit_req(VX_exec_unit_req_temp),
|
||||
.VX_lsu_req (VX_lsu_req_temp),
|
||||
.VX_gpu_inst_req (VX_gpu_inst_req_temp),
|
||||
.VX_csr_req (VX_csr_req_temp)
|
||||
);
|
||||
|
||||
wire is_lsu = (|VX_lsu_req_temp.valid);
|
||||
|
||||
wire stall_rest = 0;
|
||||
wire flush_rest = schedule_delay;
|
||||
|
||||
|
||||
wire stall_lsu = memory_delay;
|
||||
wire flush_lsu = schedule_delay && !stall_lsu;
|
||||
|
||||
assign gpr_stage_delay = stall_lsu || (stall_gpr_csr && VX_bckE_req.is_csr && (|VX_bckE_req.valid));
|
||||
|
||||
`ifdef ASIC
|
||||
wire delayed_lsu_last_cycle;
|
||||
|
||||
VX_generic_register #(.N(1)) delayed_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(stall_rest),
|
||||
.in (stall_lsu),
|
||||
.out (delayed_lsu_last_cycle)
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] temp_store_data;
|
||||
wire[`NT_M1:0][31:0] temp_base_address; // A reg data
|
||||
|
||||
wire[`NT_M1:0][31:0] real_store_data;
|
||||
wire[`NT_M1:0][31:0] real_base_address; // A reg data
|
||||
|
||||
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
|
||||
|
||||
VX_generic_register #(.N(`NT*32*2)) lsu_data(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(!store_curr_real),
|
||||
.flush(stall_rest),
|
||||
.in ({real_store_data, real_base_address}),
|
||||
.out ({temp_store_data, temp_base_address})
|
||||
);
|
||||
|
||||
assign real_store_data = VX_lsu_req_temp.store_data;
|
||||
assign real_base_address = VX_lsu_req_temp.base_address;
|
||||
|
||||
|
||||
assign VX_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
|
||||
assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
|
||||
|
||||
|
||||
VX_generic_register #(.N(77 + `NW_M1 + 1 + (`NT))) lsu_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_lsu),
|
||||
.flush(flush_lsu),
|
||||
.in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.lsu_pc, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}),
|
||||
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
|
||||
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
|
||||
);
|
||||
|
||||
assign VX_exec_unit_req.a_reg_data = real_base_address;
|
||||
assign VX_exec_unit_req.b_reg_data = real_store_data;
|
||||
|
||||
VX_generic_register #(.N(36 + `NW_M1 + 1 + (`NT))) gpu_inst_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next}),
|
||||
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next })
|
||||
);
|
||||
|
||||
assign VX_gpu_inst_req.a_reg_data = real_base_address;
|
||||
assign VX_gpu_inst_req.rd2 = real_store_data;
|
||||
|
||||
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_gpr_csr),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
|
||||
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
|
||||
);
|
||||
|
||||
|
||||
// assign
|
||||
|
||||
`else
|
||||
|
||||
// 341
|
||||
VX_generic_register #(.N(77 + `NW_M1 + 1 + 65*(`NT))) lsu_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_lsu),
|
||||
.flush(flush_lsu),
|
||||
.in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.lsu_pc, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.store_data, VX_lsu_req_temp.base_address, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}),
|
||||
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.a_reg_data, VX_exec_unit_req_temp.b_reg_data, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
|
||||
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(68 + `NW_M1 + 1 + 33*(`NT))) gpu_inst_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next, VX_gpu_inst_req_temp.a_reg_data, VX_gpu_inst_req_temp.rd2}),
|
||||
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_gpr_csr),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
|
||||
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,70 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_gpr_wrapper (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_gpr_read_inter VX_gpr_read,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
VX_gpr_jal_inter VX_gpr_jal,
|
||||
|
||||
output wire[`NT_M1:0][31:0] out_a_reg_data,
|
||||
output wire[`NT_M1:0][31:0] out_b_reg_data
|
||||
|
||||
);
|
||||
|
||||
wire[`NW-1:0][`NT_M1:0][31:0] temp_a_reg_data;
|
||||
wire[`NW-1:0][`NT_M1:0][31:0] temp_b_reg_data;
|
||||
|
||||
wire[`NT_M1:0][31:0] jal_data;
|
||||
genvar index;
|
||||
for (index = 0; index <= `NT_M1; index = index + 1) begin
|
||||
assign jal_data[index] = VX_gpr_jal.curr_PC;
|
||||
end
|
||||
|
||||
|
||||
`ifndef ASIC
|
||||
assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[VX_gpr_read.warp_num]));
|
||||
assign out_b_reg_data = (temp_b_reg_data[VX_gpr_read.warp_num]);
|
||||
`else
|
||||
|
||||
wire zer = 0;
|
||||
|
||||
wire[`NW_M1:0] old_warp_num;
|
||||
VX_generic_register #(`NW_M1+1) store_wn(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zer),
|
||||
.flush(zer),
|
||||
.in (VX_gpr_read.warp_num),
|
||||
.out (old_warp_num)
|
||||
);
|
||||
|
||||
assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
|
||||
assign out_b_reg_data = (temp_b_reg_data[old_warp_num]);
|
||||
|
||||
`endif
|
||||
|
||||
genvar warp_index;
|
||||
generate
|
||||
|
||||
for (warp_index = 0; warp_index < `NW; warp_index = warp_index + 1) begin
|
||||
|
||||
wire valid_write_request = warp_index == VX_writeback_inter.wb_warp_num;
|
||||
VX_gpr vx_gpr(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_write_request(valid_write_request),
|
||||
.VX_gpr_read (VX_gpr_read),
|
||||
.VX_writeback_inter (VX_writeback_inter),
|
||||
.out_a_reg_data (temp_a_reg_data[warp_index]),
|
||||
.out_b_reg_data (temp_b_reg_data[warp_index])
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_inst_multiplex (
|
||||
// Inputs
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_gpr_data_inter VX_gpr_data,
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_inter VX_exec_unit_req,
|
||||
VX_lsu_req_inter VX_lsu_req,
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req,
|
||||
VX_csr_req_inter VX_csr_req
|
||||
);
|
||||
|
||||
wire[`NT_M1:0] is_mem_mask;
|
||||
wire[`NT_M1:0] is_gpu_mask;
|
||||
wire[`NT_M1:0] is_csr_mask;
|
||||
|
||||
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
|
||||
wire is_csr = VX_bckE_req.is_csr;
|
||||
// wire is_gpu = 0;
|
||||
|
||||
genvar currT;
|
||||
for (currT = 0; currT < `NT; currT = currT + 1) begin
|
||||
assign is_mem_mask[currT] = is_mem;
|
||||
assign is_gpu_mask[currT] = is_gpu;
|
||||
assign is_csr_mask[currT] = is_csr;
|
||||
end
|
||||
|
||||
// LSU Unit
|
||||
assign VX_lsu_req.valid = VX_bckE_req.valid & is_mem_mask;
|
||||
assign VX_lsu_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_lsu_req.base_address = VX_gpr_data.a_reg_data;
|
||||
assign VX_lsu_req.store_data = VX_gpr_data.b_reg_data;
|
||||
|
||||
assign VX_lsu_req.offset = VX_bckE_req.itype_immed;
|
||||
|
||||
assign VX_lsu_req.mem_read = VX_bckE_req.mem_read;
|
||||
assign VX_lsu_req.mem_write = VX_bckE_req.mem_write;
|
||||
assign VX_lsu_req.rd = VX_bckE_req.rd;
|
||||
assign VX_lsu_req.wb = VX_bckE_req.wb;
|
||||
assign VX_lsu_req.lsu_pc = VX_bckE_req.curr_PC;
|
||||
|
||||
|
||||
// Execute Unit
|
||||
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
|
||||
assign VX_exec_unit_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_exec_unit_req.curr_PC = VX_bckE_req.curr_PC;
|
||||
assign VX_exec_unit_req.PC_next = VX_bckE_req.PC_next;
|
||||
assign VX_exec_unit_req.rd = VX_bckE_req.rd;
|
||||
assign VX_exec_unit_req.wb = VX_bckE_req.wb;
|
||||
assign VX_exec_unit_req.a_reg_data = VX_gpr_data.a_reg_data;
|
||||
assign VX_exec_unit_req.b_reg_data = VX_gpr_data.b_reg_data;
|
||||
assign VX_exec_unit_req.alu_op = VX_bckE_req.alu_op;
|
||||
assign VX_exec_unit_req.rs1 = VX_bckE_req.rs1;
|
||||
assign VX_exec_unit_req.rs2 = VX_bckE_req.rs2;
|
||||
assign VX_exec_unit_req.rs2_src = VX_bckE_req.rs2_src;
|
||||
assign VX_exec_unit_req.itype_immed = VX_bckE_req.itype_immed;
|
||||
assign VX_exec_unit_req.upper_immed = VX_bckE_req.upper_immed;
|
||||
assign VX_exec_unit_req.branch_type = VX_bckE_req.branch_type;
|
||||
assign VX_exec_unit_req.jalQual = VX_bckE_req.jalQual;
|
||||
assign VX_exec_unit_req.jal = VX_bckE_req.jal;
|
||||
assign VX_exec_unit_req.jal_offset = VX_bckE_req.jal_offset;
|
||||
assign VX_exec_unit_req.ebreak = VX_bckE_req.ebreak;
|
||||
|
||||
|
||||
// GPR Req
|
||||
assign VX_gpu_inst_req.valid = VX_bckE_req.valid & is_gpu_mask;
|
||||
assign VX_gpu_inst_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_gpu_inst_req.is_wspawn = VX_bckE_req.is_wspawn;
|
||||
assign VX_gpu_inst_req.is_tmc = VX_bckE_req.is_tmc;
|
||||
assign VX_gpu_inst_req.is_split = VX_bckE_req.is_split;
|
||||
assign VX_gpu_inst_req.is_barrier = VX_bckE_req.is_barrier;
|
||||
assign VX_gpu_inst_req.a_reg_data = VX_gpr_data.a_reg_data;
|
||||
assign VX_gpu_inst_req.rd2 = VX_gpr_data.b_reg_data[0];
|
||||
assign VX_gpu_inst_req.pc_next = VX_bckE_req.PC_next;
|
||||
|
||||
|
||||
// CSR Req
|
||||
assign VX_csr_req.valid = VX_bckE_req.valid & is_csr_mask;
|
||||
assign VX_csr_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_csr_req.rd = VX_bckE_req.rd;
|
||||
assign VX_csr_req.wb = VX_bckE_req.wb;
|
||||
assign VX_csr_req.alu_op = VX_bckE_req.alu_op;
|
||||
assign VX_csr_req.is_csr = VX_bckE_req.is_csr;
|
||||
assign VX_csr_req.csr_address = VX_bckE_req.csr_address;
|
||||
assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed;
|
||||
assign VX_csr_req.csr_mask = VX_bckE_req.csr_mask;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
|
||||
module VX_lsu (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire no_slot_mem,
|
||||
VX_lsu_req_inter VX_lsu_req,
|
||||
|
||||
// Write back to GPR
|
||||
VX_inst_mem_wb_inter VX_mem_wb,
|
||||
|
||||
VX_dcache_response_inter VX_dcache_rsp,
|
||||
VX_dcache_request_inter VX_dcache_req,
|
||||
output wire out_delay
|
||||
);
|
||||
|
||||
// VX_inst_mem_wb_inter VX_mem_wb_temp();
|
||||
|
||||
assign out_delay = VX_dcache_rsp.delay || no_slot_mem;
|
||||
|
||||
|
||||
// Generate Addresses
|
||||
wire[`NT_M1:0][31:0] address;
|
||||
VX_lsu_addr_gen VX_lsu_addr_gen
|
||||
(
|
||||
.base_address(VX_lsu_req.base_address),
|
||||
.offset (VX_lsu_req.offset),
|
||||
.address (address)
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] use_address;
|
||||
wire[`NT_M1:0][31:0] use_store_data;
|
||||
wire[`NT_M1:0] use_valid;
|
||||
wire[2:0] use_mem_read;
|
||||
wire[2:0] use_mem_write;
|
||||
wire[4:0] use_rd;
|
||||
wire[`NW_M1:0] use_warp_num;
|
||||
wire[1:0] use_wb;
|
||||
wire[31:0] use_pc;
|
||||
|
||||
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
VX_generic_register #(.N(45 + `NW_M1 + 1 + `NT*65)) lsu_buffer(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(out_delay),
|
||||
.flush(zero),
|
||||
.in ({address , VX_lsu_req.store_data, VX_lsu_req.valid, VX_lsu_req.mem_read, VX_lsu_req.mem_write, VX_lsu_req.rd, VX_lsu_req.warp_num, VX_lsu_req.wb, VX_lsu_req.lsu_pc}),
|
||||
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
|
||||
);
|
||||
|
||||
|
||||
genvar index;
|
||||
for (index = 0; index <= `NT_M1; index = index + 1) begin
|
||||
assign VX_dcache_req.out_cache_driver_in_address[index] = use_address[index];
|
||||
assign VX_dcache_req.out_cache_driver_in_data[index] = use_store_data[index];
|
||||
assign VX_dcache_req.out_cache_driver_in_valid[index] = (use_valid[index]);
|
||||
|
||||
assign VX_mem_wb.loaded_data[index] = VX_dcache_rsp.in_cache_driver_out_data[index];
|
||||
end
|
||||
|
||||
assign VX_dcache_req.out_cache_driver_in_mem_read = use_mem_read;
|
||||
assign VX_dcache_req.out_cache_driver_in_mem_write = use_mem_write;
|
||||
|
||||
|
||||
assign VX_mem_wb.rd = use_rd;
|
||||
assign VX_mem_wb.wb = use_wb & {!VX_dcache_rsp.delay, !VX_dcache_rsp.delay};
|
||||
assign VX_mem_wb.wb_valid = use_valid;
|
||||
assign VX_mem_wb.wb_warp_num = use_warp_num;
|
||||
|
||||
assign VX_mem_wb.mem_wb_pc = use_pc;
|
||||
|
||||
// integer curr_t;
|
||||
// always @(negedge clk) begin
|
||||
// for (int curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
|
||||
// if ((VX_dcache_req.out_cache_driver_in_valid[curr_t]) && !out_delay) begin
|
||||
// if (VX_dcache_req.out_cache_driver_in_mem_read != `NO_MEM_READ) begin
|
||||
// $display("Reading addr: %x val: %x", address[0], VX_mem_wb.loaded_data[0]);
|
||||
// end
|
||||
|
||||
// if (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) begin
|
||||
// $display("Writing addr: %x val: %x", address[0], VX_dcache_req.out_cache_driver_in_data[0]);
|
||||
// end
|
||||
// end
|
||||
// end
|
||||
|
||||
// wire zero_temp = 0;
|
||||
// VX_generic_register #(.N(142)) register_wb_data
|
||||
// (
|
||||
// .clk (clk),
|
||||
// .reset(reset),
|
||||
// .stall(zero_temp),
|
||||
// .flush(out_delay),
|
||||
// .in ({VX_mem_wb_temp.loaded_data, VX_mem_wb_temp.rd, VX_mem_wb_temp.wb, VX_mem_wb_temp.wb_valid, VX_mem_wb_temp.wb_warp_num}),
|
||||
// .out ({VX_mem_wb.loaded_data , VX_mem_wb.rd , VX_mem_wb.wb , VX_mem_wb.wb_valid , VX_mem_wb.wb_warp_num })
|
||||
// );
|
||||
|
||||
|
||||
endmodule // Memory
|
||||
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_lsu_addr_gen (
|
||||
input wire[`NT_M1:0][31:0] base_address,
|
||||
input wire[31:0] offset,
|
||||
output wire[`NT_M1:0][31:0] address
|
||||
|
||||
);
|
||||
|
||||
|
||||
genvar index;
|
||||
for (index = 0; index < `NT; index = index + 1)
|
||||
begin
|
||||
assign address[index] = base_address[index] + offset;
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,20 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_priority_encoder (
|
||||
input wire[`NW-1:0] valids,
|
||||
output reg[`NW_M1:0] index,
|
||||
output reg found
|
||||
);
|
||||
|
||||
integer i;
|
||||
always @(*) begin
|
||||
index = 0;
|
||||
found = 0;
|
||||
for (i = `NW-1; i >= 0; i = i - 1) begin
|
||||
if (valids[i]) begin
|
||||
index = i[`NW_M1:0];
|
||||
found = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
@@ -1,32 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
module VX_priority_encoder_w_mask
|
||||
#(
|
||||
parameter N = 10
|
||||
)
|
||||
(
|
||||
input wire[N-1:0] valids,
|
||||
output reg [N-1:0] mask,
|
||||
//output reg[$clog2(N)-1:0] index,
|
||||
output reg[(`CLOG2(N))-1:0] index,
|
||||
//output reg[`CLOG2(N):0] index, // eh
|
||||
output reg found
|
||||
);
|
||||
|
||||
integer i;
|
||||
always @(valids) begin
|
||||
index = 0;
|
||||
found = 0;
|
||||
// mask = 0;
|
||||
for (i = 0; i < N; i=i+1) begin
|
||||
if (valids[i]) begin
|
||||
//index = i[$clog2(N)-1:0];
|
||||
index = i[(`CLOG2(N))-1:0];
|
||||
found = 1;
|
||||
// mask[index] = (1 << i);
|
||||
// $display("%h",(1 << i));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mask = found ? (1 << index) : 0;
|
||||
endmodule
|
||||
@@ -1,69 +0,0 @@
|
||||
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_scheduler (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire memory_delay,
|
||||
input wire gpr_stage_delay,
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
|
||||
output wire schedule_delay
|
||||
|
||||
);
|
||||
|
||||
|
||||
|
||||
reg[31:0] rename_table[`NW-1:0];
|
||||
|
||||
wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0);
|
||||
wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0);
|
||||
|
||||
wire rs1_rename = rename_table[VX_bckE_req.warp_num][VX_bckE_req.rs1];
|
||||
wire rs2_rename = rename_table[VX_bckE_req.warp_num][VX_bckE_req.rs2];
|
||||
|
||||
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
|
||||
wire is_mem = is_store || is_load;
|
||||
|
||||
|
||||
wire rs1_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs1)));
|
||||
wire rs2_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs2)));
|
||||
|
||||
// wire rs1_pass = 0;
|
||||
// wire rs2_pass = 0;
|
||||
|
||||
wire using_rs2 = (VX_bckE_req.rs2_src == `RS2_REG) || is_store || VX_bckE_req.is_barrier || VX_bckE_req.is_wspawn;
|
||||
|
||||
wire rs1_rename_qual = ((rs1_rename || (rs1_pass && 0)) && (VX_bckE_req.rs1 != 0));
|
||||
wire rs2_rename_qual = ((rs2_rename || (rs2_pass && 0)) && (VX_bckE_req.rs2 != 0 && using_rs2));
|
||||
|
||||
|
||||
wire rename_valid = rs1_rename_qual || rs2_rename_qual ;
|
||||
|
||||
|
||||
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid)) || (memory_delay && (is_mem)) || (gpr_stage_delay && is_mem);
|
||||
|
||||
integer i;
|
||||
integer w;
|
||||
always @(posedge clk or posedge reset) begin
|
||||
|
||||
if (reset) begin
|
||||
for (w = 0; w < `NW; w=w+1)
|
||||
begin
|
||||
for (i = 0; i < 32; i = i + 1)
|
||||
begin
|
||||
rename_table[w][i] <= 0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
if (valid_wb ) rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] <= 0;
|
||||
if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.warp_num ][VX_bckE_req.rd] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,86 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
|
||||
module VX_warp (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
input wire remove,
|
||||
input wire[`NT_M1:0] in_thread_mask,
|
||||
input wire in_change_mask,
|
||||
input wire in_jal,
|
||||
input wire[31:0] in_jal_dest,
|
||||
input wire in_branch_dir,
|
||||
input wire[31:0] in_branch_dest,
|
||||
input wire in_wspawn,
|
||||
input wire[31:0] in_wspawn_pc,
|
||||
|
||||
output wire[31:0] out_PC,
|
||||
output wire[`NT_M1:0] out_valid
|
||||
);
|
||||
|
||||
reg[31:0] real_PC;
|
||||
var[31:0] temp_PC;
|
||||
var[31:0] use_PC;
|
||||
reg[`NT_M1:0] valid;
|
||||
|
||||
reg[`NT_M1:0] valid_zero;
|
||||
|
||||
integer ini_cur_th = 0;
|
||||
initial begin
|
||||
real_PC = 0;
|
||||
for (ini_cur_th = 1; ini_cur_th < `NT; ini_cur_th=ini_cur_th+1) begin
|
||||
valid[ini_cur_th] = 0; // Thread 1 active
|
||||
valid_zero[ini_cur_th] = 0;
|
||||
end
|
||||
valid[0] = 1;
|
||||
valid_zero[0] = 0;
|
||||
end
|
||||
|
||||
|
||||
always @(posedge clk, posedge reset) begin
|
||||
if (remove) begin
|
||||
valid <= valid_zero;
|
||||
end else if (in_change_mask) begin
|
||||
valid <= in_thread_mask;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
genvar out_cur_th;
|
||||
generate
|
||||
for (out_cur_th = 0; out_cur_th < `NT; out_cur_th = out_cur_th+1)
|
||||
assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th];
|
||||
endgenerate
|
||||
|
||||
|
||||
always @(*) begin
|
||||
if (in_jal == 1'b1) begin
|
||||
temp_PC = in_jal_dest;
|
||||
// $display("LINKING TO %h", temp_PC);
|
||||
end else if (in_branch_dir == 1'b1) begin
|
||||
temp_PC = in_branch_dest;
|
||||
end else begin
|
||||
temp_PC = real_PC;
|
||||
end
|
||||
end
|
||||
|
||||
assign use_PC = temp_PC;
|
||||
assign out_PC = temp_PC;
|
||||
|
||||
always @(posedge clk or posedge reset) begin
|
||||
if (reset) begin
|
||||
real_PC <= 0;
|
||||
end else if (in_wspawn == 1'b1) begin
|
||||
// $display("Inside warp ***** Spawn @ %H",in_wspawn_pc);
|
||||
real_PC <= in_wspawn_pc;
|
||||
end else if (!stall) begin
|
||||
real_PC <= use_PC + 32'h4;
|
||||
end else begin
|
||||
real_PC <= use_PC;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,321 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_warp_scheduler (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
// Wspawn
|
||||
input wire wspawn,
|
||||
input wire[31:0] wsapwn_pc,
|
||||
input wire[`NW-1:0] wspawn_new_active,
|
||||
|
||||
// CTM
|
||||
input wire ctm,
|
||||
input wire[`NT_M1:0] ctm_mask,
|
||||
input wire[`NW_M1:0] ctm_warp_num,
|
||||
|
||||
// WHALT
|
||||
input wire whalt,
|
||||
input wire[`NW_M1:0] whalt_warp_num,
|
||||
|
||||
input wire is_barrier,
|
||||
input wire[31:0] barrier_id,
|
||||
input wire[$clog2(`NW):0] num_warps,
|
||||
input wire[`NW_M1:0] barrier_warp_num,
|
||||
|
||||
// WSTALL
|
||||
input wire wstall,
|
||||
input wire[`NW_M1:0] wstall_warp_num,
|
||||
|
||||
// Split
|
||||
input wire is_split,
|
||||
input wire dont_split,
|
||||
input wire[`NT_M1:0] split_new_mask,
|
||||
input wire[`NT_M1:0] split_later_mask,
|
||||
input wire[31:0] split_save_pc,
|
||||
input wire[`NW_M1:0] split_warp_num,
|
||||
|
||||
// Join
|
||||
input wire is_join,
|
||||
input wire[`NW_M1:0] join_warp_num,
|
||||
|
||||
// JAL
|
||||
input wire jal,
|
||||
input wire[31:0] jal_dest,
|
||||
input wire[`NW_M1:0] jal_warp_num,
|
||||
|
||||
// Branch
|
||||
input wire branch_valid,
|
||||
input wire branch_dir,
|
||||
input wire[31:0] branch_dest,
|
||||
input wire[`NW_M1:0] branch_warp_num,
|
||||
|
||||
output wire[`NT_M1:0] thread_mask,
|
||||
output wire[`NW_M1:0] warp_num,
|
||||
output wire[31:0] warp_pc,
|
||||
output wire out_ebreak,
|
||||
output wire scheduled_warp
|
||||
|
||||
);
|
||||
|
||||
wire update_use_wspawn;
|
||||
|
||||
wire update_visible_active;
|
||||
|
||||
wire[(1+32+`NT_M1):0] d[`NW-1:0];
|
||||
|
||||
wire join_fall;
|
||||
wire[31:0] join_pc;
|
||||
wire[`NT_M1:0] join_tm;
|
||||
|
||||
wire in_wspawn = wspawn;
|
||||
wire in_ctm = ctm;
|
||||
wire in_whalt = whalt;
|
||||
wire in_wstall = wstall;
|
||||
|
||||
reg[`NW-1:0] warp_active;
|
||||
reg[`NW-1:0] warp_stalled;
|
||||
|
||||
reg[`NW-1:0] visible_active;
|
||||
wire[`NW-1:0] use_active;
|
||||
|
||||
wire wstall_this_cycle;
|
||||
|
||||
reg[`NT_M1:0] thread_masks[`NW-1:0];
|
||||
reg[31:0] warp_pcs[`NW-1:0];
|
||||
|
||||
// barriers
|
||||
reg[`NW-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
|
||||
wire reached_barrier_limit;
|
||||
wire[`NW-1:0] curr_barrier_mask;
|
||||
wire[$clog2(`NW):0] curr_barrier_count;
|
||||
|
||||
// wsapwn
|
||||
reg[31:0] use_wsapwn_pc;
|
||||
reg[`NW-1:0] use_wsapwn;
|
||||
|
||||
wire[`NW_M1:0] warp_to_schedule;
|
||||
wire schedule;
|
||||
|
||||
wire hazard;
|
||||
wire global_stall;
|
||||
|
||||
wire real_schedule;
|
||||
|
||||
wire[31:0] new_pc;
|
||||
|
||||
reg[`NW-1:0] total_barrier_stall;
|
||||
|
||||
reg didnt_split;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
// wire[$clog2(`NW):0] num_active;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
integer curr_w_help;
|
||||
integer curr_barrier;
|
||||
always @(posedge clk or posedge reset) begin
|
||||
if (reset) begin
|
||||
for (curr_barrier = 0; curr_barrier < `NUM_BARRIERS; curr_barrier=curr_barrier+1) begin
|
||||
barrier_stall_mask[curr_barrier] <= 0;
|
||||
end
|
||||
use_wsapwn_pc <= 0;
|
||||
use_wsapwn <= 0;
|
||||
warp_pcs[0] <= (32'h80000000 - 4);
|
||||
warp_active[0] <= 1; // Activating first warp
|
||||
visible_active[0] <= 1; // Activating first warp
|
||||
thread_masks[0] <= 1; // Activating first thread in first warp
|
||||
warp_stalled <= 0;
|
||||
didnt_split <= 0;
|
||||
// total_barrier_stall = 0;
|
||||
for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin
|
||||
warp_pcs[curr_w_help] <= 0;
|
||||
warp_active[curr_w_help] <= 0; // Activating first warp
|
||||
visible_active[curr_w_help] <= 0; // Activating first warp
|
||||
thread_masks[curr_w_help] <= 1; // Activating first thread in first warp
|
||||
end
|
||||
|
||||
end else begin
|
||||
// Wsapwning warps
|
||||
if (wspawn) begin
|
||||
warp_active <= wspawn_new_active;
|
||||
use_wsapwn_pc <= wsapwn_pc;
|
||||
use_wsapwn <= wspawn_new_active & (~`NW'b1);
|
||||
end
|
||||
|
||||
if (is_barrier) begin
|
||||
warp_stalled[barrier_warp_num] <= 0;
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_stall_mask[barrier_id] <= 0;
|
||||
end else begin
|
||||
barrier_stall_mask[barrier_id][barrier_warp_num] <= 1;
|
||||
end
|
||||
end else if (ctm) begin
|
||||
thread_masks[ctm_warp_num] <= ctm_mask;
|
||||
warp_stalled[ctm_warp_num] <= 0;
|
||||
end else if (is_join && !didnt_split) begin
|
||||
if (!join_fall) begin
|
||||
warp_pcs[join_warp_num] <= join_pc;
|
||||
end
|
||||
thread_masks[join_warp_num] <= join_tm;
|
||||
didnt_split <= 0;
|
||||
end else if (is_split) begin
|
||||
warp_stalled[split_warp_num] <= 0;
|
||||
if (!dont_split) begin
|
||||
thread_masks[split_warp_num] <= split_new_mask;
|
||||
didnt_split <= 0;
|
||||
end else begin
|
||||
didnt_split <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (whalt) begin
|
||||
warp_active[whalt_warp_num] <= 0;
|
||||
visible_active[whalt_warp_num] <= 0;
|
||||
end
|
||||
|
||||
if (update_use_wspawn) begin
|
||||
use_wsapwn[warp_to_schedule] <= 0;
|
||||
thread_masks[warp_to_schedule] <= 1;
|
||||
end
|
||||
|
||||
|
||||
// Stalling the scheduling of warps
|
||||
if (wstall) begin
|
||||
warp_stalled[wstall_warp_num] <= 1;
|
||||
visible_active[wstall_warp_num] <= 0;
|
||||
end
|
||||
|
||||
// Refilling active warps
|
||||
if (update_visible_active) begin
|
||||
visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall);
|
||||
end
|
||||
|
||||
// Don't change state if stall
|
||||
if (!global_stall && real_schedule && (thread_mask != 0)) begin
|
||||
visible_active[warp_to_schedule] <= 0;
|
||||
warp_pcs[warp_to_schedule] <= new_pc;
|
||||
end
|
||||
|
||||
// Jal
|
||||
if (jal) begin
|
||||
warp_pcs[jal_warp_num] <= jal_dest;
|
||||
warp_stalled[jal_warp_num] <= 0;
|
||||
end
|
||||
|
||||
// Branch
|
||||
if (branch_valid) begin
|
||||
if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest;
|
||||
warp_stalled[branch_warp_num] <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_countones #(.N(`NW)) barrier_count(
|
||||
.valids(curr_barrier_mask),
|
||||
.count (curr_barrier_count)
|
||||
);
|
||||
|
||||
wire[$clog2(`NW):0] count_visible_active;
|
||||
VX_countones #(.N(`NW)) num_visible(
|
||||
.valids(visible_active),
|
||||
.count (count_visible_active)
|
||||
);
|
||||
|
||||
// assign curr_barrier_count = $countones(curr_barrier_mask);
|
||||
|
||||
assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NW-1:0];
|
||||
assign reached_barrier_limit = curr_barrier_count == (num_warps);
|
||||
|
||||
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
|
||||
|
||||
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
||||
// integer curr_b;
|
||||
// always @(*) begin
|
||||
// total_barrier_stall = 0;
|
||||
// for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
|
||||
// begin
|
||||
// total_barrier_stall[`NW-1:0] = total_barrier_stall[`NW-1:0] | barrier_stall_mask[curr_b];
|
||||
// end
|
||||
// end
|
||||
|
||||
|
||||
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
|
||||
|
||||
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
|
||||
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask};
|
||||
|
||||
|
||||
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
|
||||
|
||||
|
||||
|
||||
genvar curr_warp;
|
||||
for (curr_warp = 0; curr_warp < `NW; curr_warp = curr_warp + 1) begin
|
||||
wire correct_warp_s = (curr_warp == split_warp_num);
|
||||
wire correct_warp_j = (curr_warp == join_warp_num);
|
||||
|
||||
wire push = (is_split && !dont_split) && correct_warp_s;
|
||||
wire pop = is_join && correct_warp_j;
|
||||
VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT)+1)) ipdom_stack(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.d (d[curr_warp]),
|
||||
.q1 (q1),
|
||||
.q2 (q2)
|
||||
);
|
||||
end
|
||||
|
||||
// wire should_stall = stall || (jal && (warp_to_schedule == jal_warp_num)) || (branch_dir && (warp_to_schedule == branch_warp_num));
|
||||
|
||||
wire should_jal = (jal && (warp_to_schedule == jal_warp_num));
|
||||
wire should_bra = (branch_dir && (warp_to_schedule == branch_warp_num));
|
||||
|
||||
assign hazard = (should_jal || should_bra) && schedule;
|
||||
|
||||
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule];
|
||||
|
||||
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
|
||||
|
||||
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || is_join);
|
||||
|
||||
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
|
||||
|
||||
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
|
||||
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NT'b1 : thread_masks[warp_to_schedule]);
|
||||
assign warp_num = warp_to_schedule;
|
||||
|
||||
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;
|
||||
|
||||
assign new_pc = warp_pc + 4;
|
||||
|
||||
|
||||
assign use_active = (count_visible_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall)) : visible_active;
|
||||
|
||||
// Choosing a warp to schedule
|
||||
VX_priority_encoder choose_schedule(
|
||||
.valids(use_active),
|
||||
.index (warp_to_schedule),
|
||||
.found (schedule)
|
||||
);
|
||||
|
||||
// always @(*) begin
|
||||
// $display("WarpPC: %h",warp_pc);
|
||||
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
|
||||
// end
|
||||
|
||||
|
||||
// Valid counter
|
||||
// assign num_active = $countones(visible_active);
|
||||
// VX_one_counter valid_counter(
|
||||
// .valids(visible_active),
|
||||
// .ones_found()
|
||||
// );
|
||||
|
||||
|
||||
wire ebreak = (warp_active == 0);
|
||||
assign out_ebreak = ebreak;
|
||||
|
||||
endmodule
|
||||
@@ -1,111 +0,0 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
|
||||
module VX_writeback (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Mem WB info
|
||||
VX_inst_mem_wb_inter VX_mem_wb,
|
||||
// EXEC Unit WB info
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb,
|
||||
// CSR Unit WB info
|
||||
VX_csr_wb_inter VX_csr_wb,
|
||||
|
||||
// Actual WB to GPR
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
output wire no_slot_mem,
|
||||
output wire no_slot_csr
|
||||
);
|
||||
|
||||
|
||||
VX_wb_inter VX_writeback_tempp();
|
||||
|
||||
wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid);
|
||||
wire mem_wb = (VX_mem_wb.wb != 0) && (|VX_mem_wb.wb_valid);
|
||||
wire csr_wb = (VX_csr_wb.wb != 0) && (|VX_csr_wb.valid);
|
||||
|
||||
|
||||
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
|
||||
assign no_slot_csr = csr_wb && (exec_wb);
|
||||
|
||||
assign VX_writeback_tempp.write_data = exec_wb ? VX_inst_exec_wb.alu_result :
|
||||
csr_wb ? VX_csr_wb.csr_result :
|
||||
mem_wb ? VX_mem_wb.loaded_data :
|
||||
0;
|
||||
|
||||
|
||||
assign VX_writeback_tempp.wb_valid = exec_wb ? VX_inst_exec_wb.wb_valid :
|
||||
csr_wb ? VX_csr_wb.valid :
|
||||
mem_wb ? VX_mem_wb.wb_valid :
|
||||
0;
|
||||
|
||||
assign VX_writeback_tempp.rd = exec_wb ? VX_inst_exec_wb.rd :
|
||||
csr_wb ? VX_csr_wb.rd :
|
||||
mem_wb ? VX_mem_wb.rd :
|
||||
0;
|
||||
|
||||
assign VX_writeback_tempp.wb = exec_wb ? VX_inst_exec_wb.wb :
|
||||
csr_wb ? VX_csr_wb.wb :
|
||||
mem_wb ? VX_mem_wb.wb :
|
||||
0;
|
||||
|
||||
assign VX_writeback_tempp.wb_warp_num = exec_wb ? VX_inst_exec_wb.wb_warp_num :
|
||||
csr_wb ? VX_csr_wb.warp_num :
|
||||
mem_wb ? VX_mem_wb.wb_warp_num :
|
||||
0;
|
||||
|
||||
|
||||
|
||||
assign VX_writeback_tempp.wb_pc = exec_wb ? VX_inst_exec_wb.exec_wb_pc :
|
||||
csr_wb ? 32'hdeadbeef :
|
||||
mem_wb ? VX_mem_wb.mem_wb_pc :
|
||||
32'hdeadbeef;
|
||||
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
wire[`NT-1:0][31:0] use_wb_data;
|
||||
|
||||
reg prev_is_mem;
|
||||
|
||||
always @(posedge clk, posedge reset) begin
|
||||
if (reset)
|
||||
begin
|
||||
prev_is_mem = 0;
|
||||
end begin
|
||||
prev_is_mem = mem_wb && !no_slot_mem;
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zero),
|
||||
.flush(zero),
|
||||
.in ({VX_writeback_tempp.write_data, VX_writeback_tempp.wb_valid, VX_writeback_tempp.rd, VX_writeback_tempp.wb, VX_writeback_tempp.wb_warp_num, VX_writeback_tempp.wb_pc}),
|
||||
.out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc})
|
||||
);
|
||||
|
||||
reg[31:0] last_data_wb;
|
||||
always @(posedge clk) begin
|
||||
if ((|VX_writeback_inter.wb_valid) && (VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd == 28)) begin
|
||||
last_data_wb <= use_wb_data[0];
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef SYN
|
||||
assign VX_writeback_inter.write_data = prev_is_mem ? VX_writeback_tempp.write_data : use_wb_data;
|
||||
`else
|
||||
assign VX_writeback_inter.write_data = use_wb_data;
|
||||
`endif
|
||||
|
||||
|
||||
endmodule // VX_writeback
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,249 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
|
||||
module Vortex
|
||||
/*#(
|
||||
parameter CACHE_SIZE = 4096, // Bytes
|
||||
parameter CACHE_WAYS = 2,
|
||||
parameter CACHE_BLOCK = 128, // Bytes
|
||||
parameter CACHE_BANKS = 8,
|
||||
parameter NUM_WORDS_PER_BLOCK = 4
|
||||
)*/
|
||||
(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire[31:0] icache_response_instruction,
|
||||
output wire[31:0] icache_request_pc_address,
|
||||
// IO
|
||||
output wire io_valid,
|
||||
output wire[31:0] io_data,
|
||||
|
||||
// Req D Mem
|
||||
output reg [31:0] o_m_read_addr_d,
|
||||
output reg [31:0] o_m_evict_addr_d,
|
||||
output reg o_m_valid_d,
|
||||
output reg [31:0] o_m_writedata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0],
|
||||
output reg o_m_read_or_write_d,
|
||||
|
||||
// Rsp D Mem
|
||||
input wire [31:0] i_m_readdata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0],
|
||||
input wire i_m_ready_d,
|
||||
|
||||
// Req I Mem
|
||||
output reg [31:0] o_m_read_addr_i,
|
||||
output reg [31:0] o_m_evict_addr_i,
|
||||
output reg o_m_valid_i,
|
||||
output reg [31:0] o_m_writedata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0],
|
||||
output reg o_m_read_or_write_i,
|
||||
|
||||
// Rsp I Mem
|
||||
input wire [31:0] i_m_readdata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0],
|
||||
input wire i_m_ready_i,
|
||||
output wire out_ebreak
|
||||
);
|
||||
|
||||
|
||||
reg[31:0] icache_banks = `ICACHE_BANKS;
|
||||
reg[31:0] icache_num_words_per_block = `ICACHE_NUM_WORDS_PER_BLOCK;
|
||||
|
||||
|
||||
reg[31:0] dcache_banks = `DCACHE_BANKS;
|
||||
reg[31:0] dcache_num_words_per_block = `DCACHE_NUM_WORDS_PER_BLOCK;
|
||||
|
||||
reg[31:0] number_threads = `NT;
|
||||
reg[31:0] number_warps = `NW;
|
||||
|
||||
always @(posedge clk) begin
|
||||
icache_banks <= icache_banks;
|
||||
icache_num_words_per_block <= icache_num_words_per_block;
|
||||
|
||||
dcache_banks <= dcache_banks;
|
||||
dcache_num_words_per_block <= dcache_num_words_per_block;
|
||||
|
||||
number_threads <= number_threads;
|
||||
number_warps <= number_warps;
|
||||
end
|
||||
|
||||
wire memory_delay;
|
||||
wire gpr_stage_delay;
|
||||
wire schedule_delay;
|
||||
|
||||
|
||||
// Dcache Interface
|
||||
VX_dcache_response_inter VX_dcache_rsp();
|
||||
VX_dcache_request_inter VX_dcache_req();
|
||||
|
||||
wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.out_cache_driver_in_valid) && (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (VX_dcache_req.out_cache_driver_in_address[0] == 32'h00010000);
|
||||
wire[31:0] temp_io_data = VX_dcache_req.out_cache_driver_in_data[0];
|
||||
assign io_valid = temp_io_valid;
|
||||
assign io_data = temp_io_data;
|
||||
|
||||
|
||||
VX_dram_req_rsp_inter #(
|
||||
.NUMBER_BANKS(`DCACHE_BANKS),
|
||||
.NUM_WORDS_PER_BLOCK(`DCACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp();
|
||||
|
||||
VX_icache_response_inter icache_response_fe();
|
||||
VX_icache_request_inter icache_request_fe();
|
||||
VX_dram_req_rsp_inter #(
|
||||
.NUMBER_BANKS(`ICACHE_BANKS),
|
||||
.NUM_WORDS_PER_BLOCK(`ICACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp_icache();
|
||||
|
||||
//assign icache_response_fe.instruction = icache_response_instruction;
|
||||
assign icache_request_pc_address = icache_request_fe.pc_address;
|
||||
|
||||
// Need to fix this so that it is only 1 set of outputs
|
||||
// o_m Values
|
||||
|
||||
// L2 Cache
|
||||
/*
|
||||
assign VX_L2cache_req.out_cache_driver_in_valid = VX_dram_req_rsp.o_m_valid || VX_dram_req_rsp_icache.o_m_valid; // Ask about this (width)
|
||||
// Ask about the adress
|
||||
assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? icache_request_fe.pc_address: VX_dcache_req.out_cache_driver_in_address;
|
||||
//assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_read_addr: VX_dram_req_rsp.o_m_read_addr;
|
||||
//assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_evict_addr : VX_dram_req_rsp.o_m_evict_addr;
|
||||
assign VX_L2cache_req.out_cache_driver_in_mem_read = (VX_dram_req_rsp_icache.o_m_valid) ? (VX_dram_req_rsp_icache.o_m_read_or_write ? icache_request_fe.out_cache_driver_in_mem_write : icache_request_fe.out_cache_driver_in_mem_read)
|
||||
: (VX_dram_req_rsp.o_m_read_or_write ? VX_dcache_req.out_cache_driver_in_mem_write : VX_dcache_req.out_cache_driver_in_mem_read);
|
||||
//assign VX_dram_req_rsp.i_m_ready = i_m_ready && !VX_dram_req_rsp_icache.o_m_valid && VX_dram_req_rsp.o_m_valid;
|
||||
//assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready && VX_dram_req_rsp_icache.o_m_valid;
|
||||
genvar cur_bank;
|
||||
genvar cur_word;
|
||||
for (cur_bank = 0; cur_bank < CACHE_BANKS; cur_bank = cur_bank + 1) begin
|
||||
for (cur_word = 0; cur_word < NUM_WORDS_PER_BLOCK; cur_word = cur_word + 1) begin
|
||||
assign VX_L2cache_req.out_cache_driver_in_data[cur_bank][cur_word] = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_writedata[cur_bank][cur_word]
|
||||
: VX_dram_req_rsp.o_m_writedata[cur_bank][cur_word];
|
||||
assign VX_dram_req_rsp.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data
|
||||
assign VX_dram_req_rsp_icache.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data
|
||||
end
|
||||
end
|
||||
*/
|
||||
|
||||
|
||||
assign o_m_valid_i = VX_dram_req_rsp_icache.o_m_valid;
|
||||
assign o_m_valid_d = VX_dram_req_rsp.o_m_valid;
|
||||
assign o_m_read_addr_i = VX_dram_req_rsp_icache.o_m_read_addr;
|
||||
assign o_m_read_addr_d = VX_dram_req_rsp.o_m_read_addr;
|
||||
assign o_m_evict_addr_i = VX_dram_req_rsp_icache.o_m_evict_addr;
|
||||
assign o_m_evict_addr_d = VX_dram_req_rsp.o_m_evict_addr;
|
||||
assign o_m_read_or_write_i = VX_dram_req_rsp_icache.o_m_read_or_write;
|
||||
assign o_m_read_or_write_d = VX_dram_req_rsp.o_m_read_or_write;
|
||||
assign VX_dram_req_rsp.i_m_ready = i_m_ready_d;
|
||||
assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready_i;
|
||||
genvar curr_bank;
|
||||
genvar curr_word;
|
||||
/*
|
||||
for (curr_bank = 0; curr_bank < CACHE_BANKS; curr_bank = curr_bank + 1) begin
|
||||
for (curr_word = 0; curr_word < NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
|
||||
assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word];
|
||||
assign o_m_writedata_d[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word];
|
||||
assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata_d[curr_bank][curr_word]; // fixed
|
||||
assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed
|
||||
end
|
||||
end
|
||||
*/
|
||||
|
||||
for (curr_bank = 0; curr_bank < `DCACHE_BANKS; curr_bank = curr_bank + 1) begin
|
||||
for (curr_word = 0; curr_word < `DCACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
|
||||
|
||||
assign o_m_writedata_d[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word];
|
||||
assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata_d[curr_bank][curr_word]; // fixed
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
for (curr_bank = 0; curr_bank < `ICACHE_BANKS; curr_bank = curr_bank + 1) begin
|
||||
for (curr_word = 0; curr_word < `ICACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
|
||||
assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word];
|
||||
assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
// Front-end to Back-end
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req(); // New instruction request to EXE/MEM
|
||||
|
||||
// Back-end to Front-end
|
||||
VX_wb_inter VX_writeback_inter(); // Writeback to GPRs
|
||||
VX_branch_response_inter VX_branch_rsp(); // Branch Resolution to Fetch
|
||||
VX_jal_response_inter VX_jal_rsp(); // Jump resolution to Fetch
|
||||
|
||||
// CSR Buses
|
||||
// VX_csr_write_request_inter VX_csr_w_req();
|
||||
|
||||
|
||||
VX_warp_ctl_inter VX_warp_ctl();
|
||||
|
||||
|
||||
VX_front_end vx_front_end(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_warp_ctl (VX_warp_ctl),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.schedule_delay (schedule_delay),
|
||||
.icache_response_fe (icache_response_fe),
|
||||
.icache_request_fe (icache_request_fe),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.fetch_ebreak (out_ebreak)
|
||||
);
|
||||
|
||||
VX_scheduler schedule(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.memory_delay (memory_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.VX_writeback_inter(VX_writeback_inter),
|
||||
.schedule_delay (schedule_delay)
|
||||
);
|
||||
|
||||
VX_back_end vx_back_end(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.VX_warp_ctl (VX_warp_ctl),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.VX_dcache_rsp (VX_dcache_rsp),
|
||||
.VX_dcache_req (VX_dcache_req),
|
||||
.VX_writeback_inter (VX_writeback_inter),
|
||||
.out_mem_delay (memory_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
|
||||
VX_dmem_controller VX_dmem_controller(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_dram_req_rsp (VX_dram_req_rsp),
|
||||
.VX_dram_req_rsp_icache (VX_dram_req_rsp_icache),
|
||||
.VX_icache_req (icache_request_fe),
|
||||
.VX_icache_rsp (icache_response_fe),
|
||||
.VX_dcache_req (VX_dcache_req),
|
||||
.VX_dcache_rsp (VX_dcache_rsp)
|
||||
);
|
||||
// VX_csr_handler vx_csr_handler(
|
||||
// .clk (clk),
|
||||
// .in_decode_csr_address(decode_csr_address),
|
||||
// .VX_csr_w_req (VX_csr_w_req),
|
||||
// .in_wb_valid (VX_writeback_inter.wb_valid[0]),
|
||||
|
||||
// .out_decode_csr_data (csr_decode_csr_data)
|
||||
// );
|
||||
|
||||
|
||||
|
||||
|
||||
endmodule // Vortex
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
|
||||
module byte_enabled_simple_dual_port_ram
|
||||
(
|
||||
input we, clk,
|
||||
input wire reset,
|
||||
input wire[4:0] waddr, raddr1, raddr2,
|
||||
input wire[`NT_M1:0] be,
|
||||
input wire[`NT_M1:0][31:0] wdata,
|
||||
output reg[`NT_M1:0][31:0] q1, q2
|
||||
);
|
||||
|
||||
// integer regi;
|
||||
// integer threadi;
|
||||
|
||||
// Thread Byte Bit
|
||||
logic [`NT_M1:0][3:0][7:0] GPR[31:0];
|
||||
|
||||
// initial begin
|
||||
// for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] = 0;
|
||||
// end
|
||||
|
||||
integer ini;
|
||||
always@(posedge clk, posedge reset) begin
|
||||
if (reset) begin
|
||||
for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] <= 0;
|
||||
end else if(we) begin
|
||||
integer thread_ind;
|
||||
for (thread_ind = 0; thread_ind <= `NT_M1; thread_ind = thread_ind + 1) begin
|
||||
if(be[thread_ind]) GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
|
||||
if(be[thread_ind]) GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
|
||||
if(be[thread_ind]) GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
|
||||
if(be[thread_ind]) GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
|
||||
end
|
||||
end
|
||||
// $display("^^^^^^^^^^^^^^^^^^^^^^^");
|
||||
// for (regi = 0; regi <= 31; regi = regi + 1) begin
|
||||
// for (threadi = 0; threadi <= `NT_M1; threadi = threadi + 1) begin
|
||||
// if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]);
|
||||
// end
|
||||
// end
|
||||
|
||||
end
|
||||
|
||||
assign q1 = GPR[raddr1];
|
||||
assign q2 = GPR[raddr2];
|
||||
|
||||
// assign q1 = (raddr1 == waddr && (we)) ? wdata : GPR[raddr1];
|
||||
// assign q2 = (raddr2 == waddr && (we)) ? wdata : GPR[raddr2];
|
||||
|
||||
endmodule
|
||||
12
hw/old_rtl/cache/Makefile
vendored
12
hw/old_rtl/cache/Makefile
vendored
@@ -1,12 +0,0 @@
|
||||
all: RUNFILE
|
||||
|
||||
|
||||
VERILATOR:
|
||||
verilator --compiler gcc --Wno-UNOPTFLAT -Wall --trace -cc VX_d_cache_encapsulate.v -Irtl --exe d_cache_test_bench.cpp -CFLAGS -std=c++11
|
||||
|
||||
RUNFILE: VERILATOR
|
||||
(cd obj_dir && make -j -f VVX_d_cache_encapsulate.mk)
|
||||
|
||||
clean:
|
||||
rm ./obj_dir/*
|
||||
|
||||
46
hw/old_rtl/cache/Notes
vendored
46
hw/old_rtl/cache/Notes
vendored
@@ -1,46 +0,0 @@
|
||||
Notes
|
||||
|
||||
|
||||
8 kB L1 Data Cache | 16 kB L1 I cache (maybe)
|
||||
[tag index offset_remaining_block bank wordOffset], use a blocksize of 128 bytes between memory and cache. So each bank gets 16 bytes.
|
||||
total offset is b its
|
||||
4 bits new offset, 2 bits block, 2 bits word offset
|
||||
xxxxxxxIIIIIIIIoobbbyy
|
||||
9876543210
|
||||
bbbyyyyy
|
||||
o = index into block offset
|
||||
b = bank
|
||||
y = word offset
|
||||
I = index into cach
|
||||
6 bits indexes (64 indeces) No ways || 16 indexes with 4 ways
|
||||
Rest of the bits are tag bits
|
||||
|
||||
blocks / banks = 16 bytes, 8 banks. 128 bytes. 256 indexes (height). width is 16 bytes. 4 words per block (per bank). 17 bit tag
|
||||
|
||||
gtkwave ___.vcd
|
||||
|
||||
|
||||
// Splitting it up
|
||||
|
||||
// word byte
|
||||
wire[127:0][3:0] data_from_ram;
|
||||
|
||||
|
||||
// word byte bank
|
||||
wire[15:0][3:0] bank_data_n[3:0]
|
||||
|
||||
integer i;
|
||||
for (i = 0; i < something; i+=8)
|
||||
{
|
||||
bank_data_n[0][i/8] = data_from_ram[i+0]
|
||||
bank_data_n[1][i/8] = data_from_ram[i+1]
|
||||
bank_data_n[2][i/8] = data_from_ram[i+2]
|
||||
bank_data_n[3][i/8] = data_from_ram[i+3]
|
||||
bank_data_n[4][i/8] = data_from_ram[i+4]
|
||||
bank_data_n[5][i/8] = data_from_ram[i+5]
|
||||
bank_data_n[6][i/8] = data_from_ram[i+6]
|
||||
bank_data_n[7][i/8] = data_from_ram[i+7]
|
||||
}
|
||||
|
||||
|
||||
With Cache. If miss. Go to memory, grab all data, replace that data in the cache. Generate a new request, feed that into the cache (this one will hit), return that
|
||||
253
hw/old_rtl/cache/VX_Cache_Bank.v
vendored
253
hw/old_rtl/cache/VX_Cache_Bank.v
vendored
@@ -1,253 +0,0 @@
|
||||
// To Do: Change way_id_out to an internal register which holds when in between access and finished.
|
||||
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
|
||||
// Also make sure all possible output states are transmitted back to the bank correctly
|
||||
|
||||
`include "../VX_define.v"
|
||||
// `include "VX_cache_data.v"
|
||||
|
||||
|
||||
module VX_Cache_Bank
|
||||
#(
|
||||
parameter CACHE_SIZE = 4096, // Bytes
|
||||
parameter CACHE_WAYS = 1,
|
||||
parameter CACHE_BLOCK = 128, // Bytes
|
||||
parameter CACHE_BANKS = 8,
|
||||
parameter LOG_NUM_BANKS = 3,
|
||||
parameter NUM_REQ = 8,
|
||||
parameter LOG_NUM_REQ = 3,
|
||||
parameter NUM_IND = 8,
|
||||
parameter CACHE_WAY_INDEX = 1,
|
||||
parameter NUM_WORDS_PER_BLOCK = 4,
|
||||
parameter OFFSET_SIZE_START = 0,
|
||||
parameter OFFSET_SIZE_END = 1,
|
||||
parameter TAG_SIZE_START = 0,
|
||||
parameter TAG_SIZE_END = 16,
|
||||
parameter IND_SIZE_START = 0,
|
||||
parameter IND_SIZE_END = 7,
|
||||
parameter ADDR_TAG_START = 15,
|
||||
parameter ADDR_TAG_END = 31,
|
||||
parameter ADDR_OFFSET_START = 5,
|
||||
parameter ADDR_OFFSET_END = 6,
|
||||
parameter ADDR_IND_START = 7,
|
||||
parameter ADDR_IND_END = 14
|
||||
)
|
||||
(
|
||||
clk,
|
||||
rst,
|
||||
state,
|
||||
read_or_write, // Read = 0 | Write = 1
|
||||
i_p_mem_read,
|
||||
i_p_mem_write,
|
||||
valid_in,
|
||||
//write_from_mem,
|
||||
actual_index,
|
||||
o_tag,
|
||||
block_offset,
|
||||
writedata,
|
||||
fetched_writedata,
|
||||
|
||||
byte_select,
|
||||
|
||||
readdata,
|
||||
hit,
|
||||
//miss,
|
||||
|
||||
eviction_wb, // Need to evict
|
||||
eviction_addr, // What's the eviction tag
|
||||
|
||||
data_evicted,
|
||||
evicted_way
|
||||
);
|
||||
|
||||
// localparam NUMBER_BANKS = `CACHE_BANKS;
|
||||
// localparam CACHE_BLOCK_PER_BANK = (`CACHE_BLOCK / `CACHE_BANKS);
|
||||
// localparam NUM_WORDS_PER_BLOCK = `CACHE_BLOCK / (`CACHE_BANKS*4);
|
||||
// localparam NUMBER_INDEXES = `NUM_IND;
|
||||
|
||||
localparam CACHE_IDLE = 0; // Idle
|
||||
localparam SEND_MEM_REQ = 1; // Write back this block into memory
|
||||
localparam RECIV_MEM_RSP = 2;
|
||||
|
||||
|
||||
localparam BLOCK_NUM_BITS = `CLOG2(CACHE_BLOCK);
|
||||
// Inputs
|
||||
input wire rst;
|
||||
input wire clk;
|
||||
input wire [3:0] state;
|
||||
//input wire write_from_mem;
|
||||
|
||||
// Reading Data
|
||||
input wire[IND_SIZE_END:IND_SIZE_START] actual_index;
|
||||
|
||||
|
||||
input wire[TAG_SIZE_END:TAG_SIZE_START] o_tag; // When write_from_mem = 1, o_tag is the new tag
|
||||
input wire[OFFSET_SIZE_END:OFFSET_SIZE_START] block_offset;
|
||||
|
||||
|
||||
input wire[31:0] writedata;
|
||||
input wire valid_in;
|
||||
input wire read_or_write; // Specifies if it is a read or write operation
|
||||
|
||||
input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] fetched_writedata;
|
||||
input wire[2:0] i_p_mem_read;
|
||||
input wire[2:0] i_p_mem_write;
|
||||
input wire[1:0] byte_select;
|
||||
|
||||
|
||||
input wire[CACHE_WAY_INDEX-1:0] evicted_way;
|
||||
|
||||
// Outputs
|
||||
// Normal shit
|
||||
output wire[31:0] readdata;
|
||||
output wire hit;
|
||||
//output wire miss;
|
||||
|
||||
// Eviction Data (Notice)
|
||||
output wire eviction_wb; // Need to evict
|
||||
output wire[31:0] eviction_addr; // What's the eviction tag
|
||||
|
||||
// Eviction Data (Extraction)
|
||||
output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_evicted;
|
||||
|
||||
|
||||
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use;
|
||||
wire[TAG_SIZE_END:TAG_SIZE_START] tag_use;
|
||||
wire[TAG_SIZE_END:TAG_SIZE_START] eviction_tag;
|
||||
wire valid_use;
|
||||
wire dirty_use;
|
||||
wire access;
|
||||
wire write_from_mem;
|
||||
wire miss; // -10/21
|
||||
|
||||
|
||||
|
||||
wire[CACHE_WAY_INDEX-1:0] way_to_update;
|
||||
|
||||
assign miss = (tag_use != o_tag) && valid_use && valid_in;
|
||||
|
||||
|
||||
assign data_evicted = data_use;
|
||||
|
||||
// assign eviction_wb = miss && (dirty_use != 1'b0) && valid_use;
|
||||
assign eviction_wb = (dirty_use != 1'b0);
|
||||
assign eviction_tag = tag_use;
|
||||
assign access = (state == CACHE_IDLE) && valid_in;
|
||||
assign write_from_mem = (state == RECIV_MEM_RSP) && valid_in; // TODO
|
||||
assign hit = (access && (tag_use == o_tag) && valid_use);
|
||||
//assign eviction_addr = {eviction_tag, actual_index, block_offset, 5'b0}; // Fix with actual data
|
||||
assign eviction_addr = {eviction_tag, actual_index, {(BLOCK_NUM_BITS){1'b0}}}; // Fix with actual data
|
||||
|
||||
|
||||
|
||||
wire lw = (i_p_mem_read == `LW_MEM_READ);
|
||||
wire lb = (i_p_mem_read == `LB_MEM_READ);
|
||||
wire lh = (i_p_mem_read == `LH_MEM_READ);
|
||||
wire lhu = (i_p_mem_read == `LHU_MEM_READ);
|
||||
wire lbu = (i_p_mem_read == `LBU_MEM_READ);
|
||||
|
||||
wire sw = (i_p_mem_write == `SW_MEM_WRITE);
|
||||
wire sb = (i_p_mem_write == `SB_MEM_WRITE);
|
||||
wire sh = (i_p_mem_write == `SH_MEM_WRITE);
|
||||
|
||||
wire b0 = (byte_select == 0);
|
||||
wire b1 = (byte_select == 1);
|
||||
wire b2 = (byte_select == 2);
|
||||
wire b3 = (byte_select == 3);
|
||||
|
||||
wire[31:0] data_unQual = (b0 || lw) ? (data_use[block_offset] ) :
|
||||
b1 ? (data_use[block_offset] >> 8) :
|
||||
b2 ? (data_use[block_offset] >> 16) :
|
||||
(data_use[block_offset] >> 24);
|
||||
|
||||
|
||||
wire[31:0] lb_data = (data_unQual[7] ) ? (data_unQual | 32'hFFFFFF00) : (data_unQual & 32'hFF);
|
||||
wire[31:0] lh_data = (data_unQual[15]) ? (data_unQual | 32'hFFFF0000) : (data_unQual & 32'hFFFF);
|
||||
wire[31:0] lbu_data = (data_unQual & 32'hFF);
|
||||
wire[31:0] lhu_data = (data_unQual & 32'hFFFF);
|
||||
wire[31:0] lw_data = (data_unQual);
|
||||
|
||||
|
||||
wire[31:0] sw_data = writedata;
|
||||
|
||||
wire[31:0] sb_data = b1 ? {{16{1'b0}}, writedata[7:0], { 8{1'b0}}} :
|
||||
b2 ? {{ 8{1'b0}}, writedata[7:0], {16{1'b0}}} :
|
||||
b3 ? {{ 0{1'b0}}, writedata[7:0], {24{1'b0}}} :
|
||||
writedata;
|
||||
|
||||
wire[31:0] sh_data = b2 ? {writedata[15:0], {16{1'b0}}} : writedata;
|
||||
|
||||
|
||||
|
||||
wire[31:0] use_write_data = sb ? sb_data :
|
||||
sh ? sh_data :
|
||||
sw_data;
|
||||
|
||||
|
||||
wire[31:0] data_Qual = lb ? lb_data :
|
||||
lh ? lh_data :
|
||||
lhu ? lhu_data :
|
||||
lbu ? lbu_data :
|
||||
lw_data;
|
||||
|
||||
|
||||
assign readdata = (access) ? data_Qual : 32'b0; // Fix with actual data
|
||||
|
||||
|
||||
wire[3:0] sb_mask = (b0 ? 4'b0001 : (b1 ? 4'b0010 : (b2 ? 4'b0100 : 4'b1000)));
|
||||
wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
|
||||
|
||||
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we;
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write;
|
||||
genvar g;
|
||||
for (g = 0; g < NUM_WORDS_PER_BLOCK; g = g + 1) begin
|
||||
wire normal_write = (read_or_write && ((access && (block_offset == g))) && !miss);
|
||||
|
||||
assign we[g] = (write_from_mem) ? 4'b1111 :
|
||||
(normal_write && sw) ? 4'b1111 :
|
||||
(normal_write && sb) ? sb_mask :
|
||||
(normal_write && sh) ? sh_mask :
|
||||
4'b0000;
|
||||
|
||||
|
||||
// assign we[g] = (normal_write || (write_from_mem)) ? 1'b1 : 1'b0;
|
||||
assign data_write[g] = write_from_mem ? fetched_writedata[g] : use_write_data;
|
||||
assign way_to_update = evicted_way;
|
||||
end
|
||||
|
||||
|
||||
VX_cache_data_per_index #(
|
||||
.CACHE_WAYS (CACHE_WAYS),
|
||||
.NUM_IND (NUM_IND),
|
||||
.CACHE_WAY_INDEX (CACHE_WAY_INDEX),
|
||||
.NUM_WORDS_PER_BLOCK(NUM_WORDS_PER_BLOCK),
|
||||
.TAG_SIZE_START (TAG_SIZE_START),
|
||||
.TAG_SIZE_END (TAG_SIZE_END),
|
||||
.IND_SIZE_START (IND_SIZE_START),
|
||||
.IND_SIZE_END (IND_SIZE_END)) data_structures(
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.valid_in (valid_in),
|
||||
.state (state),
|
||||
// Inputs
|
||||
.addr (actual_index),
|
||||
.we (we),
|
||||
.evict (write_from_mem),
|
||||
.data_write (data_write),
|
||||
.tag_write (o_tag),
|
||||
.way_to_update(way_to_update),
|
||||
// Outputs
|
||||
.tag_use (tag_use),
|
||||
.data_use (data_use),
|
||||
.valid_use (valid_use),
|
||||
.dirty_use (dirty_use)
|
||||
);
|
||||
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
30
hw/old_rtl/cache/VX_cache_bank_valid.v
vendored
30
hw/old_rtl/cache/VX_cache_bank_valid.v
vendored
@@ -1,30 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_cache_bank_valid
|
||||
#(
|
||||
parameter NUMBER_BANKS = 8,
|
||||
parameter LOG_NUM_BANKS = 3,
|
||||
parameter NUM_REQ = 1
|
||||
)
|
||||
(
|
||||
input wire [NUM_REQ-1:0] i_p_valid,
|
||||
input wire [NUM_REQ-1:0][31:0] i_p_addr,
|
||||
output reg [NUMBER_BANKS - 1 : 0][NUM_REQ-1:0] thread_track_banks
|
||||
);
|
||||
|
||||
generate
|
||||
integer t_id;
|
||||
always @(*) begin
|
||||
thread_track_banks = 0;
|
||||
for (t_id = 0; t_id < NUM_REQ; t_id = t_id + 1)
|
||||
begin
|
||||
if (NUMBER_BANKS != 1) begin
|
||||
thread_track_banks[i_p_addr[t_id][2+LOG_NUM_BANKS-1:2]][t_id] = i_p_valid[t_id];
|
||||
end else begin
|
||||
thread_track_banks[0][t_id] = i_p_valid[t_id];
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
233
hw/old_rtl/cache/VX_cache_data.v
vendored
233
hw/old_rtl/cache/VX_cache_data.v
vendored
@@ -1,233 +0,0 @@
|
||||
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_cache_data
|
||||
#(
|
||||
parameter NUM_IND = 8,
|
||||
parameter NUM_WORDS_PER_BLOCK = 4,
|
||||
parameter TAG_SIZE_START = 0,
|
||||
parameter TAG_SIZE_END = 16,
|
||||
parameter IND_SIZE_START = 0,
|
||||
parameter IND_SIZE_END = 7
|
||||
)
|
||||
(
|
||||
input wire clk, rst, // Clock
|
||||
|
||||
// `ifdef PARAM
|
||||
// Addr
|
||||
input wire[IND_SIZE_END:IND_SIZE_START] addr,
|
||||
// WE
|
||||
input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
|
||||
input wire evict,
|
||||
// Data
|
||||
input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write,
|
||||
input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write,
|
||||
|
||||
|
||||
output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use,
|
||||
output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
|
||||
output wire valid_use,
|
||||
output wire dirty_use
|
||||
// `else
|
||||
// // Addr
|
||||
// input wire[7:0] addr,
|
||||
// // WE
|
||||
// input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
|
||||
// input wire evict,
|
||||
// // Data
|
||||
// input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data
|
||||
// input wire[16:0] tag_write,
|
||||
|
||||
|
||||
// output wire[16:0] tag_use,
|
||||
// output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
|
||||
// output wire valid_use,
|
||||
// output wire dirty_use
|
||||
// `endif
|
||||
|
||||
);
|
||||
|
||||
//localparam NUMBER_BANKS = CACHE_BANKS;
|
||||
//localparam CACHE_BLOCK_PER_BANK = (CACHE_BLOCK / CACHE_BANKS);
|
||||
// localparam NUM_WORDS_PER_BLOCK = CACHE_BLOCK / (CACHE_BANKS*4);
|
||||
//localparam NUMBER_INDEXES = NUM_IND;
|
||||
|
||||
wire currently_writing = (|we);
|
||||
wire update_dirty = ((!dirty_use) && currently_writing) || (evict);
|
||||
|
||||
wire dirt_new = evict ? 0 : (|we);
|
||||
|
||||
|
||||
`ifndef SYN
|
||||
|
||||
// (3:0) 4 bytes
|
||||
reg[NUM_WORDS_PER_BLOCK-1:0][3:0][7:0] data[NUM_IND-1:0]; // Actual Data
|
||||
reg[TAG_SIZE_END:TAG_SIZE_START] tag[NUM_IND-1:0];
|
||||
reg valid[NUM_IND-1:0];
|
||||
reg dirty[NUM_IND-1:0];
|
||||
|
||||
|
||||
// 16 bytes
|
||||
assign data_use = data[addr]; // Read Port
|
||||
assign tag_use = tag[addr];
|
||||
assign valid_use = valid[addr];
|
||||
assign dirty_use = dirty[addr];
|
||||
|
||||
integer f;
|
||||
integer ini_ind;
|
||||
always @(posedge clk, posedge rst) begin : update_all
|
||||
if (rst) begin
|
||||
for (ini_ind = 0; ini_ind < NUM_IND; ini_ind=ini_ind+1) begin
|
||||
data[ini_ind] <= 0;
|
||||
tag[ini_ind] <= 0;
|
||||
valid[ini_ind] <= 0;
|
||||
dirty[ini_ind] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (update_dirty) dirty[addr] <= dirt_new; // WRite Port
|
||||
if (evict) tag[addr] <= tag_write;
|
||||
if (evict) valid[addr] <= 1;
|
||||
|
||||
for (f = 0; f < NUM_WORDS_PER_BLOCK; f = f + 1) begin
|
||||
if (we[f][0]) data[addr][f][0] <= data_write[f][7 :0 ];
|
||||
if (we[f][1]) data[addr][f][1] <= data_write[f][15:8 ];
|
||||
if (we[f][2]) data[addr][f][2] <= data_write[f][23:16];
|
||||
if (we[f][3]) data[addr][f][3] <= data_write[f][31:24];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`else
|
||||
|
||||
wire[IND_SIZE_END:IND_SIZE_START] use_addr = addr;
|
||||
|
||||
wire cena = 1;
|
||||
|
||||
wire cenb_d = (|we);
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_d = data_write;
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] write_bit_mask_d;
|
||||
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_d;
|
||||
genvar cur_b;
|
||||
for (cur_b = 0; cur_b < NUM_WORDS_PER_BLOCK; cur_b=cur_b+1) begin
|
||||
assign write_bit_mask_d[cur_b] = {32{~we[cur_b]}};
|
||||
end
|
||||
assign data_use = data_out_d;
|
||||
|
||||
|
||||
// Using ASIC MEM
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 data (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(data_out_d),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena),
|
||||
.AA(use_addr),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb_d),
|
||||
.WENB(write_bit_mask_d),
|
||||
.AB(use_addr),
|
||||
.DB(wdata_d),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
wire[16:0] old_tag;
|
||||
wire old_valid;
|
||||
wire old_dirty;
|
||||
|
||||
wire[16:0] new_tag = evict ? tag_write : old_tag;
|
||||
wire new_valid = evict ? 1 : old_valid;
|
||||
wire new_dirty = update_dirty ? dirt_new : old_dirty;
|
||||
|
||||
|
||||
wire cenb_m = (evict || update_dirty);
|
||||
wire[19-1:0][31:0] write_bit_mask_m = cenb_m ? 19'b0 : 19'b1;
|
||||
|
||||
|
||||
|
||||
// Try to fix the error in memory conneciton, modified by Lingjun Zhu on Oct. 28 2019
|
||||
// wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_m = {new_tag, new_dirty, new_valid};
|
||||
// wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_m;
|
||||
|
||||
wire[19-1:0] wdata_m = {new_tag, new_dirty, new_valid};
|
||||
|
||||
wire[19-1:0] data_out_m;
|
||||
|
||||
assign {old_tag, old_dirty, old_valid} = data_out_m;
|
||||
|
||||
|
||||
assign dirty_use = old_dirty;
|
||||
assign valid_use = old_valid;
|
||||
assign tag_use = old_tag;
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x19_wm0 meta (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
// .WENYB(),
|
||||
.AYB(),
|
||||
.QA(data_out_m),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena),
|
||||
.AA(use_addr),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb_m),
|
||||
// .WENB(write_bit_mask_m),
|
||||
.AB(use_addr),
|
||||
.DB(wdata_m),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
// .TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(19'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
163
hw/old_rtl/cache/VX_cache_data_per_index.v
vendored
163
hw/old_rtl/cache/VX_cache_data_per_index.v
vendored
@@ -1,163 +0,0 @@
|
||||
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_cache_data_per_index
|
||||
#(
|
||||
parameter CACHE_WAYS = 1,
|
||||
parameter NUM_IND = 8,
|
||||
parameter CACHE_WAY_INDEX = 1,
|
||||
parameter NUM_WORDS_PER_BLOCK = 4,
|
||||
parameter TAG_SIZE_START = 0,
|
||||
parameter TAG_SIZE_END = 16,
|
||||
parameter IND_SIZE_START = 0,
|
||||
parameter IND_SIZE_END = 7
|
||||
)
|
||||
(
|
||||
input wire clk, // Clock
|
||||
input wire rst,
|
||||
input wire valid_in,
|
||||
input wire [3:0] state,
|
||||
// Addr
|
||||
input wire[IND_SIZE_END:IND_SIZE_START] addr,
|
||||
// WE
|
||||
input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
|
||||
input wire evict,
|
||||
input wire[CACHE_WAY_INDEX-1:0] way_to_update,
|
||||
// Data
|
||||
input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data
|
||||
input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write,
|
||||
|
||||
|
||||
output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use,
|
||||
output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
|
||||
output wire valid_use,
|
||||
output wire dirty_use
|
||||
|
||||
);
|
||||
//localparam NUMBER_BANKS = CACHE_BANKS;
|
||||
//localparam CACHE_BLOCK_PER_BANK = (CACHE_BLOCK / CACHE_BANKS);
|
||||
// localparam NUM_WORDS_PER_BLOCK = CACHE_BLOCK / (CACHE_BANKS*4);
|
||||
//localparam NUMBER_INDEXES = `DCACHE_NUM_IND;
|
||||
|
||||
wire [CACHE_WAYS-1:0][TAG_SIZE_END:TAG_SIZE_START] tag_use_per_way;
|
||||
wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] data_use_per_way;
|
||||
wire [CACHE_WAYS-1:0] valid_use_per_way;
|
||||
wire [CACHE_WAYS-1:0] dirty_use_per_way;
|
||||
wire [CACHE_WAYS-1:0] hit_per_way;
|
||||
// reg [CACHE_WAY_INDEX-1:0] eviction_way_index;
|
||||
wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][3:0] we_per_way;
|
||||
wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] data_write_per_way;
|
||||
wire [CACHE_WAYS-1:0] write_from_mem_per_way;
|
||||
wire invalid_found;
|
||||
|
||||
wire [CACHE_WAY_INDEX-1:0] way_index;
|
||||
wire [CACHE_WAY_INDEX-1:0] invalid_index;
|
||||
|
||||
|
||||
localparam CACHE_IDLE = 0; // Idle
|
||||
localparam SEND_MEM_REQ = 1; // Write back this block into memory
|
||||
localparam RECIV_MEM_RSP = 2;
|
||||
|
||||
if(CACHE_WAYS != 1) begin
|
||||
VX_generic_priority_encoder #(.N(CACHE_WAYS)) valid_index
|
||||
(
|
||||
.valids(~valid_use_per_way),
|
||||
.index (invalid_index),
|
||||
.found (invalid_found)
|
||||
);
|
||||
|
||||
VX_generic_priority_encoder #(.N(CACHE_WAYS)) way_indexing
|
||||
(
|
||||
.valids(hit_per_way),
|
||||
.index (way_index),
|
||||
.found ()
|
||||
);
|
||||
end
|
||||
else begin
|
||||
assign way_index = 0;
|
||||
assign invalid_found = (valid_use_per_way == 1'b0) ? 1 : 0;
|
||||
assign invalid_index = 0;
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
// wire hit = |hit_per_way;
|
||||
// wire miss = ~hit;
|
||||
// wire update = |we && !miss;
|
||||
// wire valid = &valid_use_per_way;
|
||||
|
||||
wire[CACHE_WAY_INDEX-1:0] way_use_Qual;
|
||||
|
||||
assign way_use_Qual = (state != CACHE_IDLE) ? way_to_update : way_index;
|
||||
|
||||
assign tag_use = tag_use_per_way[way_use_Qual];
|
||||
assign data_use = data_use_per_way[way_use_Qual];
|
||||
assign valid_use = valid_use_per_way[way_use_Qual];
|
||||
assign dirty_use = dirty_use_per_way[way_use_Qual];
|
||||
|
||||
// assign tag_use = hit ? tag_use_per_way[way_index] : (valid ? tag_use_per_way[eviction_way_index] : (invalid_found ? tag_use_per_way[invalid_index] : 0));
|
||||
// assign data_use = hit ? data_use_per_way[way_index] : (valid ? data_use_per_way[eviction_way_index] : (invalid_found ? data_use_per_way[invalid_index] : 0));
|
||||
// assign valid_use = hit ? valid_use_per_way[way_index] : (valid ? valid_use_per_way[eviction_way_index] : (invalid_found ? valid_use_per_way[invalid_index] : 0));
|
||||
// assign dirty_use = hit ? dirty_use_per_way[way_index] : (valid ? dirty_use_per_way[eviction_way_index] : (invalid_found ? dirty_use_per_way[invalid_index] : 0));
|
||||
|
||||
|
||||
|
||||
genvar ways;
|
||||
for(ways=0; ways < CACHE_WAYS; ways = ways + 1) begin : each_way
|
||||
|
||||
|
||||
assign hit_per_way[ways] = ((valid_use_per_way[ways] == 1'b1) && (tag_use_per_way[ways] == tag_write)) ? 1'b1 : 0;
|
||||
|
||||
|
||||
assign write_from_mem_per_way[ways] = evict && (ways == way_use_Qual);
|
||||
assign we_per_way[ways] = (ways == way_use_Qual) ? (we) : 0;
|
||||
assign data_write_per_way[ways] = data_write;
|
||||
|
||||
|
||||
// assign hit_per_way[ways] = ((valid_use_per_way[ways] == 1'b1) && (tag_use_per_way[ways] == tag_write)) ? 1'b1 : 0;
|
||||
|
||||
// assign we_per_way[ways] = (evict == 1'b1) || (update == 1'b1) ? ((ways == way_use_Qual) ? (we) : 0) : 0;
|
||||
// assign data_write_per_way[ways] = (evict == 1'b1) || (update == 1'b1) ? ((ways == way_use_Qual) ? data_write : 0) : 0;
|
||||
// assign write_from_mem_per_way[ways] = (evict == 1'b1) ? ((ways == way_use_Qual) ? 1 : 0) : 0;
|
||||
|
||||
VX_cache_data #(
|
||||
.NUM_IND (NUM_IND),
|
||||
.NUM_WORDS_PER_BLOCK (NUM_WORDS_PER_BLOCK),
|
||||
.TAG_SIZE_START (TAG_SIZE_START),
|
||||
.TAG_SIZE_END (TAG_SIZE_END),
|
||||
.IND_SIZE_START (IND_SIZE_START),
|
||||
.IND_SIZE_END (IND_SIZE_END)) data_structures(
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
// Inputs
|
||||
.addr (addr),
|
||||
.we (we_per_way[ways]),
|
||||
.evict (write_from_mem_per_way[ways]),
|
||||
.data_write(data_write_per_way[ways]),
|
||||
.tag_write (tag_write),
|
||||
// Outputs
|
||||
.tag_use (tag_use_per_way[ways]),
|
||||
.data_use (data_use_per_way[ways]),
|
||||
.valid_use (valid_use_per_way[ways]),
|
||||
.dirty_use (dirty_use_per_way[ways])
|
||||
);
|
||||
end
|
||||
|
||||
// always @(posedge clk or posedge rst) begin
|
||||
// if (rst) begin
|
||||
// eviction_way_index <= 0;
|
||||
// end else begin
|
||||
// // if((miss && dirty_use && valid_use && !evict && valid_in)) begin // can be either evict or invalid cache entries
|
||||
// if((state == SEND_MEM_REQ)) begin // can be either evict or invalid cache entries
|
||||
// if((eviction_way_index+1) == CACHE_WAYS) begin
|
||||
// eviction_way_index <= 0;
|
||||
// end else begin
|
||||
// eviction_way_index <= (eviction_way_index + 1);
|
||||
// end
|
||||
// end
|
||||
// end
|
||||
// end
|
||||
|
||||
endmodule
|
||||
387
hw/old_rtl/cache/VX_d_cache.v
vendored
387
hw/old_rtl/cache/VX_d_cache.v
vendored
@@ -1,387 +0,0 @@
|
||||
// Cache Memory (8way 4word) //
|
||||
// i_ means input port //
|
||||
// o_ means output port //
|
||||
// _p_ means data exchange with processor //
|
||||
// _m_ means data exchange with memory //
|
||||
|
||||
|
||||
// TO DO:
|
||||
// - Send in a response from memory of what the data is from the test bench
|
||||
|
||||
`include "../VX_define.v"
|
||||
//`include "VX_priority_encoder.v"
|
||||
// `include "VX_Cache_Bank.v"
|
||||
//`include "cache_set.v"
|
||||
|
||||
module VX_d_cache
|
||||
#(
|
||||
parameter CACHE_SIZE = 4096, // Bytes
|
||||
parameter CACHE_WAYS = 1,
|
||||
parameter CACHE_BLOCK = 128, // Bytes
|
||||
parameter CACHE_BANKS = 8,
|
||||
parameter LOG_NUM_BANKS = 3,
|
||||
parameter NUM_REQ = 8,
|
||||
parameter LOG_NUM_REQ = 3,
|
||||
parameter NUM_IND = 8,
|
||||
parameter CACHE_WAY_INDEX = 1,
|
||||
parameter NUM_WORDS_PER_BLOCK = 4,
|
||||
parameter OFFSET_SIZE_START = 0,
|
||||
parameter OFFSET_SIZE_END = 1,
|
||||
parameter TAG_SIZE_START = 0,
|
||||
parameter TAG_SIZE_END = 16,
|
||||
parameter IND_SIZE_START = 0,
|
||||
parameter IND_SIZE_END = 7,
|
||||
parameter ADDR_TAG_START = 15,
|
||||
parameter ADDR_TAG_END = 31,
|
||||
parameter ADDR_OFFSET_START = 5,
|
||||
parameter ADDR_OFFSET_END = 6,
|
||||
parameter ADDR_IND_START = 7,
|
||||
parameter ADDR_IND_END = 14,
|
||||
parameter MEM_ADDR_REQ_MASK = 32'hffffffc0
|
||||
)
|
||||
(
|
||||
clk,
|
||||
rst,
|
||||
i_p_addr,
|
||||
//i_p_byte_en,
|
||||
i_p_writedata,
|
||||
i_p_read_or_write, // 0 = Read | 1 = Write
|
||||
i_p_mem_read,
|
||||
i_p_mem_write,
|
||||
i_p_valid,
|
||||
//i_p_write,
|
||||
o_p_readdata,
|
||||
o_p_delay, // 0 = all threads done | 1 = Still threads that need to
|
||||
|
||||
o_m_evict_addr,
|
||||
o_m_read_addr,
|
||||
|
||||
o_m_writedata,
|
||||
|
||||
o_m_read_or_write, // 0 = Read | 1 = Write
|
||||
o_m_valid,
|
||||
i_m_readdata,
|
||||
|
||||
i_m_ready
|
||||
);
|
||||
|
||||
//parameter NUMBER_BANKS = `CACHE_BANKS;
|
||||
//localparam NUM_WORDS_PER_BLOCK = `CACHE_BLOCK / (`CACHE_BANKS*4);
|
||||
|
||||
//localparam CACHE_BLOCK_PER_BANK = (`CACHE_BLOCK / `CACHE_BANKS);
|
||||
|
||||
localparam CACHE_IDLE = 0; // Idle
|
||||
localparam SEND_MEM_REQ = 1; // Write back this block into memory
|
||||
localparam RECIV_MEM_RSP = 2;
|
||||
|
||||
|
||||
//parameter cache_entry = 9;
|
||||
input wire clk, rst;
|
||||
input wire [NUM_REQ-1:0] i_p_valid;
|
||||
input wire [NUM_REQ-1:0][31:0] i_p_addr; // FIXME
|
||||
input wire [NUM_REQ-1:0][31:0] i_p_writedata;
|
||||
input wire i_p_read_or_write; //, i_p_write;
|
||||
output reg [NUM_REQ-1:0][31:0] o_p_readdata;
|
||||
output wire o_p_delay;
|
||||
output reg [31:0] o_m_evict_addr; // Address is xxxxxxxxxxoooobbbyy
|
||||
output reg [31:0] o_m_read_addr;
|
||||
output reg o_m_valid;
|
||||
output reg[CACHE_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata;
|
||||
output reg o_m_read_or_write; //, o_m_write;
|
||||
input wire[CACHE_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata;
|
||||
input wire i_m_ready;
|
||||
|
||||
input wire[2:0] i_p_mem_read;
|
||||
input wire[2:0] i_p_mem_write;
|
||||
|
||||
|
||||
// Buffer for final data
|
||||
reg [NUM_REQ-1:0][31:0] final_data_read;
|
||||
reg [NUM_REQ-1:0][31:0] new_final_data_read;
|
||||
wire[NUM_REQ-1:0][31:0] new_final_data_read_Qual;
|
||||
|
||||
assign o_p_readdata = new_final_data_read_Qual;
|
||||
|
||||
|
||||
reg[CACHE_WAY_INDEX-1:0] global_way_to_evict;
|
||||
|
||||
|
||||
wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] thread_track_banks; // Valid thread mask per bank
|
||||
wire[CACHE_BANKS - 1 : 0][LOG_NUM_REQ-1:0] index_per_bank; // Index of thread each bank will try to service
|
||||
wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] use_mask_per_bank; // A mask of index_per_bank
|
||||
wire[CACHE_BANKS - 1 : 0] valid_per_bank; // Valid request going to each bank
|
||||
wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] threads_serviced_per_bank; // Bank successfully serviced per bank
|
||||
|
||||
wire[CACHE_BANKS-1:0][31:0] readdata_per_bank; // Data read from each bank
|
||||
wire[CACHE_BANKS-1:0] hit_per_bank; // Whether each bank got a hit or a miss
|
||||
wire[CACHE_BANKS-1:0] eviction_wb;
|
||||
reg[CACHE_BANKS-1:0] eviction_wb_old;
|
||||
|
||||
|
||||
// wire[CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] evicted_way_new;
|
||||
// reg [CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] evicted_way_old;
|
||||
// wire[CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] way_used;
|
||||
|
||||
// Internal State
|
||||
reg [3:0] state;
|
||||
wire[3:0] new_state;
|
||||
|
||||
wire[NUM_REQ-1:0] use_valid; // Valid used throught the code
|
||||
reg[NUM_REQ-1:0] stored_valid; // Saving the threads still left (bank conflict or bank miss)
|
||||
wire[NUM_REQ-1:0] new_stored_valid; // New stored valid
|
||||
|
||||
|
||||
|
||||
reg[CACHE_BANKS - 1 : 0][31:0] eviction_addr_per_bank;
|
||||
|
||||
reg[31:0] miss_addr;
|
||||
// reg[31:0] evict_addr;
|
||||
|
||||
wire curr_processor_request_valid = (|i_p_valid);
|
||||
|
||||
|
||||
assign use_valid = (stored_valid == 0) ? i_p_valid : stored_valid;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VX_cache_bank_valid #(.NUMBER_BANKS (CACHE_BANKS),
|
||||
.LOG_NUM_BANKS (LOG_NUM_BANKS),
|
||||
.NUM_REQ (NUM_REQ)) multip_banks(
|
||||
.i_p_valid (use_valid),
|
||||
.i_p_addr (i_p_addr),
|
||||
.thread_track_banks(thread_track_banks)
|
||||
);
|
||||
|
||||
|
||||
reg[NUM_REQ-1:0] threads_serviced_Qual;
|
||||
|
||||
reg[NUM_REQ-1:0] debug_hit_per_bank_mask[CACHE_BANKS-1:0];
|
||||
|
||||
genvar bid;
|
||||
for (bid = 0; bid < CACHE_BANKS; bid=bid+1)
|
||||
begin
|
||||
wire[NUM_REQ-1:0] use_threads_track_banks = thread_track_banks[bid];
|
||||
wire[LOG_NUM_REQ-1:0] use_thread_index = index_per_bank[bid];
|
||||
wire use_write_final_data = hit_per_bank[bid];
|
||||
wire[31:0] use_data_final_data = readdata_per_bank[bid];
|
||||
VX_priority_encoder_w_mask #(.N(NUM_REQ)) choose_thread(
|
||||
.valids(use_threads_track_banks),
|
||||
.mask (use_mask_per_bank[bid]),
|
||||
.index (index_per_bank[bid]),
|
||||
.found (valid_per_bank[bid])
|
||||
);
|
||||
|
||||
assign debug_hit_per_bank_mask[bid] = {NUM_REQ{hit_per_bank[bid]}};
|
||||
assign threads_serviced_per_bank[bid] = use_mask_per_bank[bid] & debug_hit_per_bank_mask[bid];
|
||||
end
|
||||
|
||||
integer test_bid;
|
||||
always @(*) begin
|
||||
new_final_data_read = 0;
|
||||
for (test_bid=0; test_bid < CACHE_BANKS; test_bid=test_bid+1)
|
||||
begin
|
||||
if (hit_per_bank[test_bid]) begin
|
||||
new_final_data_read[index_per_bank[test_bid]] = readdata_per_bank[test_bid];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
wire[CACHE_BANKS - 1 : 0] detect_bank_miss;
|
||||
//assign threads_serviced_Qual = threads_serviced_per_bank[0] | threads_serviced_per_bank[1] |
|
||||
// threads_serviced_per_bank[2] | threads_serviced_per_bank[3] |
|
||||
// threads_serviced_per_bank[4] | threads_serviced_per_bank[5] |
|
||||
// threads_serviced_per_bank[6] | threads_serviced_per_bank[7];
|
||||
integer bbid;
|
||||
always @(*) begin
|
||||
threads_serviced_Qual = 0;
|
||||
for (bbid = 0; bbid < CACHE_BANKS; bbid=bbid+1)
|
||||
begin
|
||||
threads_serviced_Qual = threads_serviced_Qual | threads_serviced_per_bank[bbid];
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
genvar tid;
|
||||
for (tid = 0; tid < NUM_REQ; tid =tid+1)
|
||||
begin
|
||||
assign new_final_data_read_Qual[tid] = threads_serviced_Qual[tid] ? new_final_data_read[tid] : final_data_read[tid];
|
||||
end
|
||||
|
||||
|
||||
assign detect_bank_miss = (valid_per_bank & ~hit_per_bank);
|
||||
|
||||
wire delay;
|
||||
assign delay = (new_stored_valid != 0) || (state != CACHE_IDLE); // add other states
|
||||
|
||||
assign o_p_delay = delay;
|
||||
|
||||
wire[CACHE_BANKS - 1 : 0][LOG_NUM_REQ-1:0] send_index_to_bank = index_per_bank;
|
||||
|
||||
|
||||
wire[LOG_NUM_BANKS-1:0] miss_bank_index;
|
||||
wire miss_found;
|
||||
VX_generic_priority_encoder #(.N(CACHE_BANKS)) get_miss_index
|
||||
(
|
||||
.valids(detect_bank_miss),
|
||||
.index (miss_bank_index),
|
||||
.found (miss_found)
|
||||
);
|
||||
|
||||
|
||||
|
||||
assign new_state = ((state == CACHE_IDLE) && (|detect_bank_miss)) ? SEND_MEM_REQ :
|
||||
(state == SEND_MEM_REQ) ? RECIV_MEM_RSP :
|
||||
((state == RECIV_MEM_RSP) && !i_m_ready) ? RECIV_MEM_RSP :
|
||||
CACHE_IDLE;
|
||||
|
||||
// Handle if there is more than one miss
|
||||
assign new_stored_valid = use_valid & (~threads_serviced_Qual);
|
||||
|
||||
|
||||
wire update_global_way_to_evict = ((state == RECIV_MEM_RSP) && (new_state == CACHE_IDLE)) && (CACHE_WAYS > 1);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
genvar cur_t;
|
||||
integer init_b;
|
||||
always @(posedge clk, posedge rst) begin
|
||||
if (rst) begin
|
||||
final_data_read <= 0;
|
||||
// new_final_data_read = 0;
|
||||
state <= 0;
|
||||
stored_valid <= 0;
|
||||
// eviction_addr_per_bank <= 0;
|
||||
miss_addr <= 0;
|
||||
// evict_addr <= 0;
|
||||
// threads_serviced_Qual = 0;
|
||||
// for (init_b = 0; init_b < NUMBER_BANKS; init_b=init_b+1)
|
||||
// begin
|
||||
// debug_hit_per_bank_mask[init_b] <= 0;
|
||||
// end
|
||||
// evicted_way_old <= 0;
|
||||
// eviction_wb_old <= 0;
|
||||
global_way_to_evict <= 0;
|
||||
|
||||
end else begin
|
||||
|
||||
global_way_to_evict <= (update_global_way_to_evict) ? (global_way_to_evict+1) : global_way_to_evict;
|
||||
|
||||
state <= new_state;
|
||||
|
||||
stored_valid <= new_stored_valid;
|
||||
|
||||
if (state == CACHE_IDLE) begin
|
||||
if (miss_found) begin
|
||||
miss_addr <= i_p_addr[send_index_to_bank[miss_bank_index]];
|
||||
// evict_addr <= eviction_addr_per_bank[miss_bank_index];
|
||||
end else begin
|
||||
miss_addr <= 0;
|
||||
// evict_addr <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
final_data_read <= new_final_data_read_Qual;
|
||||
// evicted_way_old <= evicted_way_new;
|
||||
// eviction_wb_old <= eviction_wb;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
genvar bank_id;
|
||||
generate
|
||||
for (bank_id = 0; bank_id < CACHE_BANKS; bank_id = bank_id + 1)
|
||||
begin
|
||||
wire[31:0] bank_addr = (state == SEND_MEM_REQ) ? miss_addr :
|
||||
(state == RECIV_MEM_RSP) ? miss_addr :
|
||||
i_p_addr[send_index_to_bank[bank_id]];
|
||||
|
||||
// assign evicted_way_new[bank_id] = (state == SEND_MEM_REQ) ? way_used[bank_id] :
|
||||
// (state == RECIV_MEM_RSP) ? evicted_way_old[bank_id] :
|
||||
// 0;
|
||||
|
||||
wire[1:0] byte_select = bank_addr[1:0];
|
||||
wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START];
|
||||
|
||||
`ifdef SYN_FUNC
|
||||
wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = 0;
|
||||
wire[IND_SIZE_END:IND_SIZE_START] cache_index = 0;
|
||||
`else
|
||||
wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = bank_addr[ADDR_OFFSET_END:ADDR_OFFSET_START];
|
||||
wire[IND_SIZE_END:IND_SIZE_START] cache_index = bank_addr[ADDR_IND_END:ADDR_IND_START];
|
||||
`endif
|
||||
|
||||
|
||||
wire normal_valid_in = valid_per_bank[bank_id];
|
||||
wire use_valid_in = ((state == RECIV_MEM_RSP) && i_m_ready) ? 1'b1 :
|
||||
((state == RECIV_MEM_RSP) && !i_m_ready) ? 1'b0 :
|
||||
((state == SEND_MEM_REQ)) ? 1'b0 :
|
||||
normal_valid_in;
|
||||
|
||||
|
||||
VX_Cache_Bank #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_WAYS (CACHE_WAYS),
|
||||
.CACHE_BLOCK (CACHE_BLOCK),
|
||||
.CACHE_BANKS (CACHE_BANKS),
|
||||
.LOG_NUM_BANKS (LOG_NUM_BANKS),
|
||||
.NUM_REQ (NUM_REQ),
|
||||
.LOG_NUM_REQ (LOG_NUM_REQ),
|
||||
.NUM_IND (NUM_IND),
|
||||
.CACHE_WAY_INDEX (CACHE_WAY_INDEX),
|
||||
.NUM_WORDS_PER_BLOCK (NUM_WORDS_PER_BLOCK),
|
||||
.OFFSET_SIZE_START (OFFSET_SIZE_START),
|
||||
.OFFSET_SIZE_END (OFFSET_SIZE_END),
|
||||
.TAG_SIZE_START (TAG_SIZE_START),
|
||||
.TAG_SIZE_END (TAG_SIZE_END),
|
||||
.IND_SIZE_START (IND_SIZE_START),
|
||||
.IND_SIZE_END (IND_SIZE_END),
|
||||
.ADDR_TAG_START (ADDR_TAG_START),
|
||||
.ADDR_TAG_END (ADDR_TAG_END),
|
||||
.ADDR_OFFSET_START (ADDR_OFFSET_START),
|
||||
.ADDR_OFFSET_END (ADDR_OFFSET_END),
|
||||
.ADDR_IND_START (ADDR_IND_START),
|
||||
.ADDR_IND_END (ADDR_IND_END)
|
||||
) bank_structure (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.state (state),
|
||||
.valid_in (use_valid_in),
|
||||
.actual_index (cache_index),
|
||||
.o_tag (cache_tag),
|
||||
.block_offset (cache_offset),
|
||||
.writedata (i_p_writedata[send_index_to_bank[bank_id]]),
|
||||
.read_or_write (i_p_read_or_write),
|
||||
.i_p_mem_read (i_p_mem_read),
|
||||
.i_p_mem_write (i_p_mem_write),
|
||||
.byte_select (byte_select),
|
||||
.hit (hit_per_bank[bank_id]),
|
||||
.readdata (readdata_per_bank[bank_id]), // Data read
|
||||
.eviction_addr (eviction_addr_per_bank[bank_id]),
|
||||
.data_evicted (o_m_writedata[bank_id]),
|
||||
.eviction_wb (eviction_wb[bank_id]), // Something needs to be written back
|
||||
.fetched_writedata(i_m_readdata[bank_id]), // Data From memory
|
||||
.evicted_way (global_way_to_evict)
|
||||
);
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Mem Rsp
|
||||
|
||||
// Req to mem:
|
||||
assign o_m_evict_addr = (eviction_addr_per_bank[0]) & MEM_ADDR_REQ_MASK; // Could be anything because tag+index are same
|
||||
assign o_m_read_addr = miss_addr & MEM_ADDR_REQ_MASK;
|
||||
assign o_m_valid = (state == SEND_MEM_REQ);
|
||||
assign o_m_read_or_write = (state == SEND_MEM_REQ) && (|eviction_wb);
|
||||
//end
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
118
hw/old_rtl/cache/VX_d_cache_encapsulate.v
vendored
118
hw/old_rtl/cache/VX_d_cache_encapsulate.v
vendored
@@ -1,118 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
// `define NUM_WORDS_PER_BLOCK 4
|
||||
|
||||
module VX_d_cache_encapsulate (
|
||||
clk,
|
||||
rst,
|
||||
|
||||
i_p_initial_request,
|
||||
i_p_addr,
|
||||
i_p_writedata,
|
||||
i_p_read_or_write,
|
||||
i_p_valid,
|
||||
|
||||
o_p_readdata,
|
||||
o_p_readdata_valid,
|
||||
o_p_waitrequest,
|
||||
|
||||
o_m_addr,
|
||||
o_m_writedata,
|
||||
o_m_read_or_write,
|
||||
o_m_valid,
|
||||
|
||||
i_m_readdata,
|
||||
i_m_ready
|
||||
);
|
||||
|
||||
parameter NUMBER_BANKS = 8;
|
||||
|
||||
|
||||
|
||||
|
||||
//parameter cache_entry = 9;
|
||||
input wire clk, rst;
|
||||
|
||||
input wire i_p_valid[`NT_M1:0];
|
||||
input wire [31:0] i_p_addr[`NT_M1:0];
|
||||
input wire i_p_initial_request;
|
||||
input wire [31:0] i_p_writedata[`NT_M1:0];
|
||||
input wire i_p_read_or_write;
|
||||
|
||||
input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0];
|
||||
input wire i_m_ready;
|
||||
|
||||
output reg [31:0] o_p_readdata[`NT_M1:0];
|
||||
output reg o_p_readdata_valid[`NT_M1:0] ;
|
||||
output reg o_p_waitrequest;
|
||||
|
||||
output reg [31:0] o_m_addr;
|
||||
output reg o_m_valid;
|
||||
output reg [31:0] o_m_writedata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0];
|
||||
output reg o_m_read_or_write;
|
||||
|
||||
|
||||
// Inter
|
||||
wire [`NT_M1:0] i_p_valid_inter;
|
||||
wire [`NT_M1:0][31:0] i_p_addr_inter;
|
||||
wire [`NT_M1:0][31:0] i_p_writedata_inter;
|
||||
|
||||
reg [`NT_M1:0][31:0] o_p_readdata_inter;
|
||||
reg [`NT_M1:0] o_p_readdata_valid_inter;
|
||||
|
||||
reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter;
|
||||
wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter;
|
||||
|
||||
|
||||
genvar curr_thraed;
|
||||
for (curr_thraed = 0; curr_thraed < `NT; curr_thraed = curr_thraed + 1) begin
|
||||
assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed];
|
||||
assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed];
|
||||
assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed];
|
||||
assign o_p_readdata[curr_thraed] = o_p_readdata_inter[curr_thraed];
|
||||
assign o_p_readdata_valid[curr_thraed] = o_p_readdata_valid_inter[curr_thraed];
|
||||
end
|
||||
|
||||
|
||||
genvar curr_bank;
|
||||
genvar curr_word;
|
||||
for (curr_bank = 0; curr_bank < NUMBER_BANKS; curr_bank = curr_bank + 1) begin
|
||||
|
||||
for (curr_word = 0; curr_word < `NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
|
||||
|
||||
assign o_m_writedata[curr_bank][curr_word] = o_m_writedata_inter[curr_bank][curr_word];
|
||||
assign i_m_readdata_inter[curr_bank][curr_word] = i_m_readdata[curr_bank][curr_word];
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
VX_d_cache dcache(
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.i_p_valid (i_p_valid_inter),
|
||||
.i_p_addr (i_p_addr_inter),
|
||||
.i_p_initial_request(i_p_initial_request),
|
||||
.i_p_writedata (i_p_writedata_inter),
|
||||
.i_p_read_or_write (i_p_read_or_write),
|
||||
.o_p_readdata (o_p_readdata_inter),
|
||||
.o_p_readdata_valid (o_p_readdata_valid_inter),
|
||||
.o_p_waitrequest (o_p_waitrequest),
|
||||
.o_m_addr (o_m_addr),
|
||||
.o_m_valid (o_m_valid),
|
||||
.o_m_writedata (o_m_writedata_inter),
|
||||
.o_m_read_or_write (o_m_read_or_write),
|
||||
.i_m_readdata (i_m_readdata_inter),
|
||||
.i_m_ready (i_m_ready)
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
58
hw/old_rtl/cache/VX_d_cache_tb.v
vendored
58
hw/old_rtl/cache/VX_d_cache_tb.v
vendored
@@ -1,58 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
`include "VX_d_cache.v"
|
||||
|
||||
module VX_d_cache_tb;
|
||||
|
||||
parameter NUMBER_BANKS = 8;
|
||||
|
||||
reg clk, reset, im_ready;
|
||||
reg [`NT_M1:0] i_p_valid;
|
||||
reg [`NT_M1:0][13:0] i_p_addr; // FIXME
|
||||
reg i_p_initial_request;
|
||||
reg [`NT_M1:0][31:0] i_p_writedata;
|
||||
reg i_p_read_or_write; //, i_p_write;
|
||||
reg [`NT_M1:0][31:0] o_p_readdata;
|
||||
reg [`NT_M1:0] o_p_readdata_valid;
|
||||
reg o_p_waitrequest;
|
||||
reg [13:0] o_m_addr; // Only one address is sent out at a time to memory
|
||||
reg o_m_valid;
|
||||
reg [(NUMBER_BANKS * 32) - 1:0] o_m_writedata;
|
||||
reg o_m_read_or_write; //, o_m_write;
|
||||
reg [(NUMBER_BANKS * 32) - 1:0] i_m_readdata; // Read Data that is passed from the memory module back to the controller
|
||||
|
||||
|
||||
VX_d_cache d_cache(.clk(clk),
|
||||
.rst(reset),
|
||||
.i_p_initial_request(i_p_initial_request),
|
||||
.i_p_addr(i_p_addr),
|
||||
.i_p_writedata(i_p_writedata),
|
||||
.i_p_read_or_write(i_p_read_or_write), // 0 = Read | 1 = Write
|
||||
.i_p_valid(i_p_valid),
|
||||
.o_p_readdata(o_p_readdata),
|
||||
.o_p_readdata_valid(o_p_readdata_valid),
|
||||
.o_p_waitrequest(o_p_waitrequest), // 0 = all threads done | 1 = Still threads that need to
|
||||
.o_m_addr(o_m_addr),
|
||||
.o_m_writedata(o_m_writedata),
|
||||
.o_m_read_or_write(o_m_read_or_write), // 0 = Read | 1 = Write
|
||||
.o_m_valid(o_m_valid),
|
||||
.i_m_readdata(i_m_readdata),
|
||||
.i_m_ready(im_ready)
|
||||
//cnt_r,
|
||||
//cnt_w,
|
||||
//cnt_hit_r,
|
||||
//cnt_hit_w
|
||||
);
|
||||
|
||||
|
||||
|
||||
initial
|
||||
begin
|
||||
clk = 0;
|
||||
reset = 0;
|
||||
|
||||
end
|
||||
|
||||
always
|
||||
#5 clk = ! clk;
|
||||
|
||||
endmodule
|
||||
24
hw/old_rtl/cache/VX_generic_pe.v
vendored
24
hw/old_rtl/cache/VX_generic_pe.v
vendored
@@ -1,24 +0,0 @@
|
||||
module VX_generic_pe
|
||||
#(
|
||||
parameter N = 8
|
||||
)
|
||||
(
|
||||
input wire[N-1:0] valids,
|
||||
output reg[$clog2(N)-1:0] index,
|
||||
output reg found
|
||||
);
|
||||
|
||||
parameter my_secret = 0;
|
||||
|
||||
integer i;
|
||||
always @(*) begin
|
||||
index = 0;
|
||||
found = 0;
|
||||
for (i = N-1; i >= 0; i = i - 1) begin
|
||||
if (valids[i]) begin
|
||||
index = i[$clog2(N)-1:0];
|
||||
found = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
233
hw/old_rtl/cache/cache_set.v
vendored
233
hw/old_rtl/cache/cache_set.v
vendored
@@ -1,233 +0,0 @@
|
||||
// To Do: Change way_id_out to an internal register which holds when in between access and finished.
|
||||
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
|
||||
// Also make sure all possible output states are transmitted back to the bank correctly
|
||||
|
||||
// `include "VX_define.v"
|
||||
module cache_set(clk,
|
||||
rst,
|
||||
// These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes
|
||||
access, // First
|
||||
find_evict,
|
||||
write_from_mem,
|
||||
idle,
|
||||
// entry,
|
||||
o_tag,
|
||||
writedata,
|
||||
//byte_en,
|
||||
write,
|
||||
//word_en,
|
||||
//way_id_in,
|
||||
//way_id_out,
|
||||
readdata,
|
||||
//wb_addr,
|
||||
hit,
|
||||
eviction_wb,
|
||||
eviction_tag,
|
||||
//eviction_data,
|
||||
//modify,
|
||||
miss
|
||||
//valid_data
|
||||
//read_miss
|
||||
);
|
||||
|
||||
parameter cache_entry = 14;
|
||||
parameter ways_per_set = 4;
|
||||
|
||||
input wire clk, rst;
|
||||
input wire access;
|
||||
input wire find_evict;
|
||||
input wire write_from_mem;
|
||||
input wire idle;
|
||||
//input wire [cache_entry-1:0] entry;
|
||||
input wire [1:0] o_tag;
|
||||
input wire [31:0] writedata;
|
||||
//input wire [3:0] byte_en;
|
||||
input wire write; // 0 == False
|
||||
//input wire [3:0] word_en;
|
||||
//input wire read_miss;
|
||||
//input wire [1:0] way_id_in;
|
||||
//output reg [1:0] way_id_out;
|
||||
output reg [31:0] readdata;
|
||||
//output reg [3:0] hit;
|
||||
output reg hit;
|
||||
output reg miss;
|
||||
output wire eviction_wb;
|
||||
output wire [1:0] eviction_tag;
|
||||
reg [31:0] eviction_data;
|
||||
//output wire [22:0] wb_addr;
|
||||
//output wire modify, valid_data;
|
||||
|
||||
|
||||
|
||||
//wire [2:0] i_tag;
|
||||
//wire dirty;
|
||||
//wire [24-cache_entry:0] write_tag_data;
|
||||
|
||||
// Table for one set
|
||||
reg [2:0] counter; // Determines which to evict
|
||||
reg valid [ways_per_set-1:0];
|
||||
reg [1:0] tag [ways_per_set-1:0];
|
||||
reg clean [ways_per_set-1:0];
|
||||
reg [31:0] data [ways_per_set-1:0];
|
||||
|
||||
|
||||
assign eviction_wb = miss && clean[counter[1:0]] != 1'b1 && valid[counter[1:0]] == 1'b1;
|
||||
assign eviction_tag = tag[counter[1:0]];
|
||||
//assign eviction_data = data[counter[1:0]];
|
||||
//assign hit = valid_data && (o_tag == i_tag);
|
||||
//assign modify = valid_data && (o_tag != i_tag) && dirty;
|
||||
//assign miss = !valid_data || ((o_tag != i_tag) && !dirty);
|
||||
|
||||
//assign wb_addr = {i_tag, entry};
|
||||
always @(posedge clk) begin
|
||||
if (rst) begin
|
||||
|
||||
end
|
||||
if (find_evict) begin
|
||||
if (tag[0] == o_tag && valid[0]) begin
|
||||
readdata <= data[0];
|
||||
end else if (tag[1] == o_tag && valid[1]) begin
|
||||
readdata <= data[1];
|
||||
end else if (tag[2] == o_tag && valid[2]) begin
|
||||
readdata <= data[2];
|
||||
end else if (tag[3] == o_tag && valid[3]) begin
|
||||
readdata <= data[3];
|
||||
end
|
||||
end else if (access) begin
|
||||
//tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12];
|
||||
counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC
|
||||
// Hit in First Column
|
||||
if (tag[0] == o_tag && valid[0]) begin
|
||||
if (write == 1'b0) begin // if it is a read
|
||||
if (clean[0] == 1'b1 ) begin
|
||||
//hit <= 4'b0001;
|
||||
hit <= 1'b1;
|
||||
readdata <= data[0];
|
||||
miss <= 1'b0;
|
||||
end else begin
|
||||
//hit <= 4'b0000; // SHOULD PROBABLY TRACK WHERE THIS MISS IS IN A DIFFERENT VARIABLE
|
||||
hit <= 1'b0;
|
||||
readdata <= 32'b0;
|
||||
miss <= 1'b1;
|
||||
end
|
||||
end else if (write == 1'b1) begin
|
||||
data[0] <= writedata;
|
||||
clean[0] <= 1'b0;
|
||||
//hit <= 4'b0001;
|
||||
hit <= 1'b1;
|
||||
end
|
||||
end
|
||||
// Hit in Second Column
|
||||
else if (tag[1] == o_tag && valid[1]) begin
|
||||
if (write == 1'b0) begin // if it is a read
|
||||
if (clean[1] == 1'b1 ) begin
|
||||
//hit <= 4'b0010;
|
||||
hit <= 1'b1;
|
||||
readdata <= data[1];
|
||||
miss <= 1'b0;
|
||||
end else begin
|
||||
//hit <= 4'b0000;
|
||||
hit <= 1'b0;
|
||||
readdata <= 32'b0;
|
||||
miss <= 1'b1;
|
||||
end
|
||||
end else if (write == 1'b1) begin
|
||||
data[1] <= writedata;
|
||||
clean[1] <= 1'b0;
|
||||
//hit <= 4'b0010;
|
||||
hit <= 1'b1;
|
||||
end
|
||||
end
|
||||
// Hit in Third Column
|
||||
else if (tag[2] == o_tag && valid[2]) begin
|
||||
if (write == 1'b0) begin // if it is a read
|
||||
if (clean[2] == 1'b1 ) begin
|
||||
//hit <= 4'b0100;
|
||||
hit <= 1'b1;
|
||||
readdata <= data[2];
|
||||
miss <= 1'b0;
|
||||
end else begin
|
||||
//hit <= 4'b0000;
|
||||
hit <= 1'b0;
|
||||
readdata <= 32'b0;
|
||||
miss <= 1'b1;
|
||||
end
|
||||
end else if (write == 1'b1) begin
|
||||
data[2] <= writedata;
|
||||
clean[2] <= 1'b0;
|
||||
//hit <= 4'b0100;
|
||||
hit <= 1'b1;
|
||||
end
|
||||
end
|
||||
// Hit in Fourth Column
|
||||
else if (tag[3] == o_tag && valid[3]) begin
|
||||
if (write == 1'b0) begin // if it is a read
|
||||
if (clean[3] == 1'b1 ) begin
|
||||
//hit <= 4'b1000;
|
||||
hit <= 1'b1;
|
||||
readdata <= data[3];
|
||||
miss <= 1'b0;
|
||||
end else begin
|
||||
//hit <= 4'b0000;
|
||||
hit <= 1'b0;
|
||||
readdata <= 32'b0;
|
||||
miss <= 1'b1;
|
||||
end
|
||||
end else if (write == 1'b1) begin
|
||||
data[3] <= writedata;
|
||||
clean[3] <= 1'b0;
|
||||
//hit <= 4'b1000;
|
||||
hit <= 1'b1;
|
||||
end
|
||||
end
|
||||
// Miss
|
||||
else begin
|
||||
//way_id_out <= counter;
|
||||
miss <= 1'b1;
|
||||
if (write == 1'b0) begin // Read Miss
|
||||
clean[counter[1:0]] <= 1'b1;
|
||||
data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS
|
||||
end else if (write == 1'b1) begin // Write Miss
|
||||
clean[counter[1:0]] <= 1'b1;
|
||||
data[counter[1:0]] <= writedata;
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
if (write_from_mem) begin
|
||||
tag[counter[1:0]] <= o_tag;
|
||||
valid[counter[1:0]] <= 1'b1;
|
||||
hit <= 1'b1;
|
||||
if (write == 1'b0) begin // Read Miss
|
||||
clean[counter[1:0]] <= 1'b1;
|
||||
data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS
|
||||
end else if (write == 1'b1) begin // Write Miss
|
||||
clean[counter[1:0]] <= 1'b0;
|
||||
data[counter[1:0]] <= writedata;
|
||||
end
|
||||
end
|
||||
if (idle) begin // Set "way" register equal to invalid value
|
||||
hit <= 1'b1; // set to know it is ready
|
||||
miss <= 1'b0;
|
||||
readdata <= 32'hFFFFFFFF;
|
||||
end
|
||||
if (find_evict) begin // Keep "way" value the same !!!! Fix. Need to send back data with matching tag. Also need to ensure evicted data doesnt get lost
|
||||
if (tag[3] == o_tag && valid[3]) begin
|
||||
readdata <= data[3];
|
||||
end else if (tag[1] == o_tag && valid[1]) begin
|
||||
readdata <= data[1];
|
||||
end else if (tag[2] == o_tag && valid[2]) begin
|
||||
readdata <= data[2];
|
||||
end else if (tag[0] == o_tag && valid[0]) begin
|
||||
readdata <= data[0];
|
||||
end else begin
|
||||
readdata <= eviction_data;
|
||||
end
|
||||
hit <= 1'b1;
|
||||
miss <= 1'b0;
|
||||
end
|
||||
counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC
|
||||
eviction_data <= data[counter[1:0]];
|
||||
end
|
||||
|
||||
endmodule
|
||||
29
hw/old_rtl/cache/d_cache_test_bench.cpp
vendored
29
hw/old_rtl/cache/d_cache_test_bench.cpp
vendored
@@ -1,29 +0,0 @@
|
||||
|
||||
|
||||
#include "d_cache_test_bench.h"
|
||||
|
||||
//#define NUM_TESTS 46
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
|
||||
Verilated::commandArgs(argc, argv);
|
||||
|
||||
Verilated::traceEverOn(true);
|
||||
|
||||
|
||||
VX_d_cache v;
|
||||
|
||||
|
||||
bool curr = v.simulate();
|
||||
//if ( curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl;
|
||||
//if (!curr) std::cerr << RED << "Test Failed: " << testing << std::endl;
|
||||
if ( curr) std::cerr << GREEN << "Test Passed: " << std::endl;
|
||||
if (!curr) std::cerr << RED << "Test Failed: " << std::endl;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
355
hw/old_rtl/cache/d_cache_test_bench.h
vendored
355
hw/old_rtl/cache/d_cache_test_bench.h
vendored
@@ -1,355 +0,0 @@
|
||||
// C++ libraries
|
||||
#include <utility>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <iterator>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "VX_define.h"
|
||||
#include "VVX_d_cache_encapsulate.h"
|
||||
#include "verilated.h"
|
||||
|
||||
#include "d_cache_test_bench_debug.h"
|
||||
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
// void set_Index (auto & var, int index, int size, auto val)
|
||||
// {
|
||||
// int real_shift
|
||||
// }
|
||||
|
||||
class VX_d_cache
|
||||
{
|
||||
public:
|
||||
VX_d_cache();
|
||||
~VX_d_cache();
|
||||
bool simulate();
|
||||
bool operation(int, bool);
|
||||
|
||||
VVX_d_cache_encapsulate * vx_d_cache_;
|
||||
long int curr_cycle;
|
||||
int stats_total_cycles = 0;
|
||||
int stats_dram_accesses = 0;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *m_trace;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
VX_d_cache::VX_d_cache() : curr_cycle(0), stats_total_cycles(0), stats_dram_accesses(0)
|
||||
{
|
||||
|
||||
this->vx_d_cache_ = new VVX_d_cache_encapsulate;
|
||||
#ifdef VCD_OUTPUT
|
||||
this->m_trace = new VerilatedVcdC;
|
||||
this->vx_d_cache_->trace(m_trace, 99);
|
||||
this->m_trace->open("trace.vcd");
|
||||
#endif
|
||||
//this->results.open("../results.txt");
|
||||
}
|
||||
|
||||
VX_d_cache::~VX_d_cache()
|
||||
{
|
||||
|
||||
|
||||
delete this->vx_d_cache_;
|
||||
#ifdef VCD_OUTPUT
|
||||
m_trace->close();
|
||||
#endif
|
||||
}
|
||||
|
||||
bool VX_d_cache::operation(int counter_value, bool do_op) {
|
||||
if (do_op) {
|
||||
vx_d_cache_->i_p_initial_request = 1;
|
||||
} else {
|
||||
vx_d_cache_->i_p_initial_request = 0;
|
||||
}
|
||||
|
||||
if (counter_value == 0 && do_op) { // Write to bank 1-4 at index 64
|
||||
vx_d_cache_->i_p_initial_request = 1;
|
||||
vx_d_cache_->i_p_read_or_write = 1;
|
||||
vx_d_cache_->i_m_ready = 0;
|
||||
for (int j = 0; j < NT; j++) {
|
||||
vx_d_cache_->i_p_valid[j] = 1;
|
||||
vx_d_cache_->i_p_writedata[j] = 0x7f6f8f6f;
|
||||
vx_d_cache_->i_m_readdata[j][0] = 1;
|
||||
if (j == 0) {
|
||||
vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1
|
||||
} else if (j == 1) {
|
||||
vx_d_cache_->i_p_addr[1] = 0x30001008; // bank 2
|
||||
} else if (j == 2) {
|
||||
vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3
|
||||
} else {
|
||||
vx_d_cache_->i_p_addr[3] = 0x30010010; // bank 4 -- This is serviced 1st, then the other 3 banks are at once
|
||||
}
|
||||
}
|
||||
|
||||
} else if (counter_value == 1 && do_op) { // Write to bank 4-7 at index 108
|
||||
vx_d_cache_->i_p_initial_request = 1;
|
||||
vx_d_cache_->i_p_read_or_write = 1;
|
||||
vx_d_cache_->i_m_ready = 0;
|
||||
for (int j = 0; j < NT; j++) {
|
||||
vx_d_cache_->i_p_valid[j] = 1;
|
||||
vx_d_cache_->i_p_writedata[j] = 0xd1d2d2d3;
|
||||
vx_d_cache_->i_m_readdata[j][0] = 1;
|
||||
if (j == 0) {
|
||||
vx_d_cache_->i_p_addr[0] = 0x30001c14; // bank 5
|
||||
} else if (j == 1) {
|
||||
vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 6
|
||||
} else if (j == 2) {
|
||||
vx_d_cache_->i_p_addr[2] = 0x30001c1c; // bank 7
|
||||
} else {
|
||||
vx_d_cache_->i_p_addr[3] = 0x30001c10; // bank 4
|
||||
}
|
||||
}
|
||||
|
||||
} else if (counter_value == 2 && do_op) { // Read from bank 1-4 at those indexes
|
||||
for (int j = 0; j < NT; j++) {
|
||||
vx_d_cache_->i_p_initial_request = 1;
|
||||
vx_d_cache_->i_p_read_or_write = 0;
|
||||
vx_d_cache_->i_m_ready = 0;
|
||||
for (int j = 0; j < NT; j++) {
|
||||
vx_d_cache_->i_p_valid[j] = 1;
|
||||
vx_d_cache_->i_p_writedata[j] = 0x23232332;
|
||||
vx_d_cache_->i_m_readdata[j][0] = 1;
|
||||
if (j == 0) {
|
||||
vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1
|
||||
} else if (j == 1) {
|
||||
vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 5
|
||||
} else if (j == 2) {
|
||||
vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3
|
||||
} else {
|
||||
vx_d_cache_->i_p_addr[3] = 0x30001c1c;; // bank 7
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (counter_value == 3 && do_op) { // Write to Bank 1-5 (evictions will need to take place)
|
||||
vx_d_cache_->i_p_initial_request = 1;
|
||||
vx_d_cache_->i_p_read_or_write = 1;
|
||||
vx_d_cache_->i_m_ready = 0;
|
||||
for (int j = 0; j < NT; j++) {
|
||||
vx_d_cache_->i_p_valid[j] = 1;
|
||||
vx_d_cache_->i_m_readdata[j][0] = 1;
|
||||
if (j == 0) {
|
||||
vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1
|
||||
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0;
|
||||
} else if (j == 1) {
|
||||
vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2
|
||||
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1;
|
||||
} else if (j == 2) {
|
||||
vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3
|
||||
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2;
|
||||
} else {
|
||||
vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5
|
||||
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3;
|
||||
}
|
||||
}
|
||||
} else if (counter_value == 4 && do_op) { // Read from addresses that were just overwritten above ^^^
|
||||
vx_d_cache_->i_p_initial_request = 1;
|
||||
vx_d_cache_->i_p_read_or_write = 0;
|
||||
vx_d_cache_->i_m_ready = 0;
|
||||
for (int j = 0; j < NT; j++) {
|
||||
vx_d_cache_->i_p_valid[j] = 1;
|
||||
vx_d_cache_->i_p_writedata[j] = 0x23232332;
|
||||
vx_d_cache_->i_m_readdata[j][0] = 1;
|
||||
if (j == 0) {
|
||||
vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1
|
||||
} else if (j == 1) {
|
||||
vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2
|
||||
} else if (j == 2) {
|
||||
vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3
|
||||
} else {
|
||||
vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5
|
||||
}
|
||||
}
|
||||
}
|
||||
/* These will check writing multiple threads writing to the same block
|
||||
} else if (counter_value == 3 && do_op) { // Write to Bank 0
|
||||
vx_d_cache_->i_p_initial_request = 1;
|
||||
vx_d_cache_->i_p_read_or_write = 1;
|
||||
vx_d_cache_->i_m_ready = 0;
|
||||
for (int j = 0; j < NT; j++) {
|
||||
vx_d_cache_->i_p_valid[j] = 1;
|
||||
vx_d_cache_->i_m_readdata[j][0] = 1;
|
||||
if (j == 0) {
|
||||
vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0
|
||||
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0;
|
||||
} else if (j == 1) {
|
||||
vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0
|
||||
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1;
|
||||
} else if (j == 2) {
|
||||
vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0
|
||||
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2;
|
||||
} else {
|
||||
vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1
|
||||
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3;
|
||||
}
|
||||
}
|
||||
} else if (counter_value == 4 && do_op) { // Read from Bank 0
|
||||
vx_d_cache_->i_p_initial_request = 1;
|
||||
vx_d_cache_->i_p_read_or_write = 0;
|
||||
vx_d_cache_->i_m_ready = 0;
|
||||
for (int j = 0; j < NT; j++) {
|
||||
vx_d_cache_->i_p_valid[j] = 1;
|
||||
vx_d_cache_->i_p_writedata[j] = 0x23232332;
|
||||
vx_d_cache_->i_m_readdata[j][0] = 1;
|
||||
if (j == 0) {
|
||||
vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0
|
||||
} else if (j == 1) {
|
||||
vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0
|
||||
} else if (j == 2) {
|
||||
vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0
|
||||
} else {
|
||||
vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
// Handle Memory Accesses
|
||||
unsigned int read_data_from_mem = 0x1111 + counter_value + this->stats_total_cycles;
|
||||
|
||||
if (vx_d_cache_->o_m_valid) {
|
||||
this->stats_dram_accesses = this->stats_dram_accesses + 1; // (assuming memory access takes 20 cycles)
|
||||
|
||||
this->stats_total_cycles += 1;
|
||||
vx_d_cache_->clk = 0;
|
||||
vx_d_cache_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
m_trace->dump(2*this->stats_total_cycles);
|
||||
#endif
|
||||
vx_d_cache_->clk = 1;
|
||||
vx_d_cache_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
m_trace->dump((2*this->stats_total_cycles)+1);
|
||||
#endif
|
||||
|
||||
vx_d_cache_->i_m_ready = 1;
|
||||
for (int j1 = 0; j1 < 8; j1++) {
|
||||
for (int j2 = 0; j2 < 4; j2++) {
|
||||
vx_d_cache_->i_m_readdata[j1][j2] = read_data_from_mem;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vx_d_cache_->i_m_ready = 0;
|
||||
}
|
||||
|
||||
|
||||
if (vx_d_cache_->o_p_waitrequest == 0) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
bool VX_d_cache::simulate()
|
||||
{
|
||||
|
||||
// this->instruction_file_name = file_to_simulate;
|
||||
// this->results << "\n****************\t" << file_to_simulate << "\t****************\n";
|
||||
|
||||
// this->ProcessFile();
|
||||
|
||||
// auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
|
||||
//static bool stop = false;
|
||||
//static int counter = 0;
|
||||
//counter = 0;
|
||||
//stop = false;
|
||||
|
||||
// auto start_time = clock();
|
||||
|
||||
|
||||
vx_d_cache_->clk = 0;
|
||||
vx_d_cache_->rst = 1;
|
||||
//vortex->eval();
|
||||
//counter = 0;
|
||||
vx_d_cache_->rst = 0;
|
||||
|
||||
bool cont = false;
|
||||
bool out_operation = false;
|
||||
bool do_operation = true;
|
||||
int other_counter = 0;
|
||||
//while (this->stop && ((other_counter < 5)))
|
||||
while (other_counter < 5)
|
||||
{
|
||||
|
||||
// std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n";
|
||||
// istop = ibus_driver();
|
||||
// dstop = !dbus_driver();
|
||||
|
||||
vx_d_cache_->clk = 1;
|
||||
vx_d_cache_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
m_trace->dump(2*this->stats_total_cycles);
|
||||
#endif
|
||||
|
||||
//vortex->eval();
|
||||
//dstop = !dbus_driver();
|
||||
|
||||
out_operation = operation(other_counter, do_operation);
|
||||
vx_d_cache_->clk = 0;
|
||||
vx_d_cache_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
m_trace->dump((2*this->stats_total_cycles)+1);
|
||||
#endif
|
||||
//vortex->eval();
|
||||
|
||||
/*
|
||||
// stop = istop && dstop;
|
||||
stop = vortex->out_ebreak;
|
||||
if (stop || cont)
|
||||
{
|
||||
cont = true;
|
||||
counter++;
|
||||
} else
|
||||
{
|
||||
counter = 0;
|
||||
}
|
||||
*/
|
||||
if (out_operation) {
|
||||
other_counter++;
|
||||
do_operation = true;
|
||||
} else {
|
||||
do_operation = false;
|
||||
}
|
||||
++(this->stats_total_cycles);
|
||||
|
||||
if (this->stats_total_cycles > 5000) {
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::cerr << "New Total Cycles: " << (this->stats_total_cycles + (this->stats_dram_accesses * 20)) << "\n";
|
||||
|
||||
//uint32_t status;
|
||||
//ram.getWord(0, &status);
|
||||
|
||||
//this->print_stats();
|
||||
|
||||
|
||||
|
||||
return (true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
1
hw/old_rtl/cache/d_cache_test_bench_debug.h
vendored
1
hw/old_rtl/cache/d_cache_test_bench_debug.h
vendored
@@ -1 +0,0 @@
|
||||
#define VCD_OUTPUT
|
||||
@@ -1,18 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_BRANCH_RSP
|
||||
|
||||
`define VX_BRANCH_RSP
|
||||
|
||||
interface VX_branch_response_inter ();
|
||||
wire valid_branch;
|
||||
wire branch_dir;
|
||||
wire[31:0] branch_dest;
|
||||
wire[`NW_M1:0] branch_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,24 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_CSR_REQ
|
||||
|
||||
`define VX_CSR_REQ
|
||||
|
||||
interface VX_csr_req_inter ();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[4:0] alu_op;
|
||||
wire is_csr;
|
||||
wire[11:0] csr_address;
|
||||
wire csr_immed;
|
||||
wire[31:0] csr_mask;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,21 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_CSR_WB_REQ
|
||||
|
||||
`define VX_CSR_WB_REQ
|
||||
|
||||
interface VX_csr_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
|
||||
wire[`NT_M1:0][31:0] csr_result;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,19 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_DCACHE_REQ
|
||||
|
||||
`define VX_DCACHE_REQ
|
||||
|
||||
interface VX_dcache_request_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] out_cache_driver_in_address;
|
||||
wire[2:0] out_cache_driver_in_mem_read;
|
||||
wire[2:0] out_cache_driver_in_mem_write;
|
||||
wire[`NT_M1:0] out_cache_driver_in_valid;
|
||||
wire[`NT_M1:0][31:0] out_cache_driver_in_data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,16 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_DCACHE_RSP
|
||||
|
||||
`define VX_DCACHE_RSP
|
||||
|
||||
interface VX_dcache_response_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] in_cache_driver_out_data;
|
||||
wire delay;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,27 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_DRAM_REQ_RSP_INTER
|
||||
|
||||
`define VX_DRAM_REQ_RSP_INTER
|
||||
|
||||
interface VX_dram_req_rsp_inter #(
|
||||
parameter NUMBER_BANKS = 8,
|
||||
parameter NUM_WORDS_PER_BLOCK = 4) ();
|
||||
|
||||
// Req
|
||||
wire [31:0] o_m_evict_addr;
|
||||
wire [31:0] o_m_read_addr;
|
||||
wire o_m_valid;
|
||||
wire[NUMBER_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata;
|
||||
wire o_m_read_or_write;
|
||||
|
||||
// Rsp
|
||||
wire[NUMBER_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata;
|
||||
wire i_m_ready;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,51 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_EXE_UNIT_REQ_INTER
|
||||
|
||||
`define VX_EXE_UNIT_REQ_INTER
|
||||
|
||||
interface VX_exec_unit_req_inter ();
|
||||
|
||||
// Meta
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[31:0] curr_PC;
|
||||
wire[31:0] PC_next;
|
||||
|
||||
// Write Back Info
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
|
||||
// Data and alu op
|
||||
wire[`NT_M1:0][31:0] a_reg_data;
|
||||
wire[`NT_M1:0][31:0] b_reg_data;
|
||||
wire[4:0] alu_op;
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire rs2_src;
|
||||
wire[31:0] itype_immed;
|
||||
wire[19:0] upper_immed;
|
||||
|
||||
// Branch type
|
||||
wire[2:0] branch_type;
|
||||
|
||||
// Jal info
|
||||
wire jalQual;
|
||||
wire jal;
|
||||
wire[31:0] jal_offset;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire ebreak;
|
||||
wire wspawn;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
// CSR info
|
||||
wire is_csr;
|
||||
wire[11:0] csr_address;
|
||||
wire csr_immed;
|
||||
wire[31:0] csr_mask;
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,46 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_FrE_to_BE_INTER
|
||||
|
||||
`define VX_FrE_to_BE_INTER
|
||||
|
||||
interface VX_frE_to_bckE_req_inter ();
|
||||
|
||||
wire[11:0] csr_address;
|
||||
wire is_csr;
|
||||
wire csr_immed;
|
||||
wire[31:0] csr_mask;
|
||||
wire[4:0] rd;
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire[4:0] alu_op;
|
||||
wire[1:0] wb;
|
||||
wire rs2_src;
|
||||
wire[31:0] itype_immed;
|
||||
wire[2:0] mem_read;
|
||||
wire[2:0] mem_write;
|
||||
wire[2:0] branch_type;
|
||||
wire[19:0] upper_immed;
|
||||
wire[31:0] curr_PC;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire ebreak;
|
||||
/* verilator lint_on UNUSED */
|
||||
wire jalQual;
|
||||
wire jal;
|
||||
wire[31:0] jal_offset;
|
||||
wire[31:0] PC_next;
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
|
||||
// GPGPU stuff
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
wire is_barrier;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,18 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_GPR_CLONE_INTER
|
||||
|
||||
`define VX_GPR_CLONE_INTER
|
||||
|
||||
|
||||
interface VX_gpr_clone_inter ();
|
||||
/* verilator lint_off UNUSED */
|
||||
wire is_clone;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
/* verilator lint_on UNUSED */
|
||||
endinterface
|
||||
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,14 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_gpr_data_INTER
|
||||
|
||||
`define VX_gpr_data_INTER
|
||||
|
||||
interface VX_gpr_data_inter ();
|
||||
wire[`NT_M1:0][31:0] a_reg_data;
|
||||
wire[`NT_M1:0][31:0] b_reg_data;
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,14 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
`ifndef VX_GPR_JAL_INTER
|
||||
|
||||
`define VX_GPR_JAL_INTER
|
||||
|
||||
|
||||
interface VX_gpr_jal_inter ();
|
||||
wire is_jal;
|
||||
wire[31:0] curr_PC;
|
||||
endinterface
|
||||
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,17 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
`ifndef VX_GPR_READ
|
||||
|
||||
`define VX_GPR_READ
|
||||
|
||||
|
||||
interface VX_gpr_read_inter ();
|
||||
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,18 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
`ifndef VX_GPR_WSPAWN_INTER
|
||||
|
||||
`define VX_GPR_WSPAWN_INTER
|
||||
|
||||
|
||||
interface VX_gpr_wspawn_inter ();
|
||||
/* verilator lint_off UNUSED */
|
||||
wire is_wspawn;
|
||||
wire[`NW_M1:0] which_wspawn;
|
||||
// wire[`NW_M1:0] warp_num;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,27 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_GPU_INST_REQ_IN
|
||||
|
||||
`define VX_GPU_INST_REQ_IN
|
||||
|
||||
interface VX_gpu_inst_req_inter();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
|
||||
wire is_barrier;
|
||||
|
||||
wire[31:0] pc_next;
|
||||
|
||||
wire[`NT_M1:0][31:0] a_reg_data;
|
||||
wire[31:0] rd2;
|
||||
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,19 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_ICACHE_REQ
|
||||
|
||||
`define VX_ICACHE_REQ
|
||||
|
||||
interface VX_icache_request_inter ();
|
||||
|
||||
wire[31:0] pc_address;
|
||||
wire[2:0] out_cache_driver_in_mem_read;
|
||||
wire[2:0] out_cache_driver_in_mem_write;
|
||||
wire out_cache_driver_in_valid;
|
||||
wire[31:0] out_cache_driver_in_data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,18 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_ICACHE_RSP
|
||||
|
||||
`define VX_ICACHE_RSP
|
||||
|
||||
interface VX_icache_response_inter ();
|
||||
|
||||
// wire ready;
|
||||
// wire stall;
|
||||
wire[31:0] instruction;
|
||||
wire delay;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,21 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_EXEC_UNIT_WB_INST_INTER
|
||||
|
||||
`define VX_EXEC_UNIT_WB_INST_INTER
|
||||
|
||||
interface VX_inst_exec_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[31:0] exec_wb_pc;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NT_M1:0] wb_valid;
|
||||
wire[`NW_M1:0] wb_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,21 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_MEM_WB_INST_INTER
|
||||
|
||||
`define VX_MEM_WB_INST_INTER
|
||||
|
||||
interface VX_inst_mem_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] loaded_data;
|
||||
wire[31:0] mem_wb_pc;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NT_M1:0] wb_valid;
|
||||
wire[`NW_M1:0] wb_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,16 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_F_D_INTER
|
||||
|
||||
`define VX_F_D_INTER
|
||||
|
||||
interface VX_inst_meta_inter ();
|
||||
wire[31:0] instruction;
|
||||
wire[31:0] inst_pc;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NT_M1:0] valid;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,17 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_JAL_RSP
|
||||
|
||||
`define VX_JAL_RSP
|
||||
|
||||
interface VX_jal_response_inter ();
|
||||
|
||||
wire jal;
|
||||
wire[31:0] jal_dest;
|
||||
wire[`NW_M1:0] jal_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,17 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_JOIN_INTER
|
||||
|
||||
`define VX_JOIN_INTER
|
||||
|
||||
interface VX_join_inter ();
|
||||
|
||||
wire is_join;
|
||||
wire[`NW_M1:0] join_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,24 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_LSU_REQ_INTER
|
||||
|
||||
`define VX_LSU_REQ_INTER
|
||||
|
||||
interface VX_lsu_req_inter ();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[31:0] lsu_pc;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NT_M1:0][31:0] store_data;
|
||||
wire[`NT_M1:0][31:0] base_address; // A reg data
|
||||
wire[31:0] offset; // itype_immed
|
||||
wire[2:0] mem_read;
|
||||
wire[2:0] mem_write;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,28 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_MEM_REQ_IN
|
||||
|
||||
`define VX_MEM_REQ_IN
|
||||
|
||||
interface VX_mem_req_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[2:0] mem_read;
|
||||
wire[2:0] mem_write;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire[`NT_M1:0][31:0] rd2;
|
||||
wire[31:0] PC_next;
|
||||
wire[31:0] curr_PC;
|
||||
wire[31:0] branch_offset;
|
||||
wire[2:0] branch_type;
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,22 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_MW_WB_INTER
|
||||
|
||||
`define VX_MW_WB_INTER
|
||||
|
||||
interface VX_mw_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[`NT_M1:0][31:0] mem_result;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[31:0] PC_next;
|
||||
wire[`NT_M1:0] valid;
|
||||
wire [`NW_M1:0] warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,36 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_WARP_CTL_INTER
|
||||
|
||||
`define VX_WARP_CTL_INTER
|
||||
|
||||
interface VX_warp_ctl_inter ();
|
||||
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire change_mask;
|
||||
wire[`NT_M1:0] thread_mask;
|
||||
|
||||
wire wspawn;
|
||||
wire[31:0] wspawn_pc;
|
||||
wire[`NW-1:0] wspawn_new_active;
|
||||
|
||||
wire ebreak;
|
||||
|
||||
// barrier
|
||||
wire is_barrier;
|
||||
wire[31:0] barrier_id;
|
||||
wire[$clog2(`NW):0] num_warps;
|
||||
|
||||
wire is_split;
|
||||
wire dont_split;
|
||||
wire[`NW_M1:0] split_warp_num;
|
||||
wire[`NT_M1:0] split_new_mask;
|
||||
wire[`NT_M1:0] split_later_mask;
|
||||
wire[31:0] split_save_pc;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,21 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_WB_INTER
|
||||
|
||||
`define VX_WB_INTER
|
||||
|
||||
|
||||
interface VX_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] write_data;
|
||||
wire[31:0] wb_pc;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NT_M1:0] wb_valid;
|
||||
wire[`NW_M1:0] wb_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,15 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_WSTALL_INTER
|
||||
|
||||
`define VX_WSTALL_INTER
|
||||
|
||||
|
||||
interface VX_wstall_inter();
|
||||
wire wstall;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
endinterface
|
||||
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,36 +0,0 @@
|
||||
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_d_e_reg (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire in_branch_stall,
|
||||
input wire in_freeze,
|
||||
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req,
|
||||
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req
|
||||
);
|
||||
|
||||
|
||||
wire stall = in_freeze;
|
||||
wire flush = (in_branch_stall == `STALL);
|
||||
|
||||
|
||||
VX_generic_register #(.N(233 + `NW_M1 + 1 + `NT)) d_e_reg
|
||||
(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
.flush(flush),
|
||||
.in ({VX_frE_to_bckE_req.csr_address, VX_frE_to_bckE_req.jalQual, VX_frE_to_bckE_req.ebreak, VX_frE_to_bckE_req.is_csr, VX_frE_to_bckE_req.csr_immed, VX_frE_to_bckE_req.csr_mask, VX_frE_to_bckE_req.rd, VX_frE_to_bckE_req.rs1, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.alu_op, VX_frE_to_bckE_req.wb, VX_frE_to_bckE_req.rs2_src, VX_frE_to_bckE_req.itype_immed, VX_frE_to_bckE_req.mem_read, VX_frE_to_bckE_req.mem_write, VX_frE_to_bckE_req.branch_type, VX_frE_to_bckE_req.upper_immed, VX_frE_to_bckE_req.curr_PC, VX_frE_to_bckE_req.jal, VX_frE_to_bckE_req.jal_offset, VX_frE_to_bckE_req.PC_next, VX_frE_to_bckE_req.valid, VX_frE_to_bckE_req.warp_num, VX_frE_to_bckE_req.is_wspawn, VX_frE_to_bckE_req.is_tmc, VX_frE_to_bckE_req.is_split, VX_frE_to_bckE_req.is_barrier}),
|
||||
.out ({VX_bckE_req.csr_address , VX_bckE_req.jalQual , VX_bckE_req.ebreak ,VX_bckE_req.is_csr , VX_bckE_req.csr_immed , VX_bckE_req.csr_mask , VX_bckE_req.rd , VX_bckE_req.rs1 , VX_bckE_req.rs2 , VX_bckE_req.alu_op , VX_bckE_req.wb , VX_bckE_req.rs2_src , VX_bckE_req.itype_immed , VX_bckE_req.mem_read , VX_bckE_req.mem_write , VX_bckE_req.branch_type , VX_bckE_req.upper_immed , VX_bckE_req.curr_PC , VX_bckE_req.jal , VX_bckE_req.jal_offset , VX_bckE_req.PC_next , VX_bckE_req.valid , VX_bckE_req.warp_num , VX_bckE_req.is_wspawn , VX_bckE_req.is_tmc , VX_bckE_req.is_split , VX_bckE_req.is_barrier })
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_f_d_reg (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire in_freeze,
|
||||
|
||||
VX_inst_meta_inter fe_inst_meta_fd,
|
||||
VX_inst_meta_inter fd_inst_meta_de
|
||||
|
||||
);
|
||||
|
||||
wire flush = 1'b0;
|
||||
wire stall = in_freeze == 1'b1;
|
||||
|
||||
|
||||
VX_generic_register #(.N(64 + `NW_M1 + 1 + `NT)) f_d_reg
|
||||
(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
.flush(flush),
|
||||
.in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}),
|
||||
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,7 +0,0 @@
|
||||
# Dynamic Instructions: 51711
|
||||
# of total cycles: 51728
|
||||
# of forwarding stalls: 0
|
||||
# of branch stalls: 0
|
||||
# CPI: 1.00033
|
||||
# time to simulate: 0 milliseconds
|
||||
# GRADE: Failed on test: 4294967295
|
||||
@@ -1,36 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
// Converts in_valids to bank_valids
|
||||
module VX_bank_valids
|
||||
#(
|
||||
parameter NB = 4,
|
||||
parameter BITS_PER_BANK = 3
|
||||
)
|
||||
(
|
||||
input wire[`NT_M1:0] in_valids,
|
||||
input wire[`NT_M1:0][31:0] in_addr,
|
||||
output reg[NB:0][`NT_M1:0] bank_valids
|
||||
);
|
||||
|
||||
|
||||
integer i, j;
|
||||
always@(*) begin
|
||||
for(j = 0; j <= NB; j = j+1 ) begin
|
||||
for(i = 0; i <= `NT_M1; i = i+1) begin
|
||||
if(in_valids[i]) begin
|
||||
if(in_addr[i][(2+BITS_PER_BANK-1):2] == j[BITS_PER_BANK-1:0]) begin
|
||||
bank_valids[j][i] = 1'b1;
|
||||
end
|
||||
else begin
|
||||
bank_valids[j][i] = 1'b0;
|
||||
end
|
||||
|
||||
end
|
||||
else begin
|
||||
bank_valids[j][i] = 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,115 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_priority_encoder_sm
|
||||
#(
|
||||
parameter NB = 4,
|
||||
parameter BITS_PER_BANK = 3,
|
||||
parameter NUM_REQ = 3
|
||||
)
|
||||
(
|
||||
//INPUTS
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire[`NT_M1:0] in_valid,
|
||||
input wire[`NT_M1:0][31:0] in_address,
|
||||
input wire[`NT_M1:0][31:0] in_data,
|
||||
// OUTPUTS
|
||||
// To SM Module
|
||||
output reg[NB:0] out_valid,
|
||||
output reg[NB:0][31:0] out_address,
|
||||
output reg[NB:0][31:0] out_data,
|
||||
|
||||
// To Processor
|
||||
output wire[NB:0][`CLOG2(NUM_REQ) - 1:0] req_num,
|
||||
output reg stall,
|
||||
output wire send_data // Finished all of the requests
|
||||
);
|
||||
|
||||
reg[`NT_M1:0] left_requests;
|
||||
reg[`NT_M1:0] serviced;
|
||||
|
||||
|
||||
wire[`NT_M1:0] use_valid;
|
||||
|
||||
|
||||
wire requests_left = (|left_requests);
|
||||
|
||||
assign use_valid = (requests_left) ? left_requests : in_valid;
|
||||
|
||||
|
||||
wire[NB:0][`NT_M1:0] bank_valids;
|
||||
VX_bank_valids #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_bank_valid(
|
||||
.in_valids(use_valid),
|
||||
.in_addr(in_address),
|
||||
.bank_valids(bank_valids)
|
||||
);
|
||||
|
||||
wire[NB:0] more_than_one_valid;
|
||||
|
||||
genvar curr_bank;
|
||||
generate
|
||||
for (curr_bank = 0; curr_bank <= NB; curr_bank = curr_bank + 1)
|
||||
begin
|
||||
wire[`CLOG2(`NT):0] num_valids;
|
||||
|
||||
VX_countones #(.N(`NT)) valids_counter (
|
||||
.valids(bank_valids[curr_bank]),
|
||||
.count (num_valids)
|
||||
);
|
||||
assign more_than_one_valid[curr_bank] = num_valids > 1;
|
||||
// assign more_than_one_valid[curr_bank] = $countones(bank_valids[curr_bank]) > 1;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
assign stall = (|more_than_one_valid);
|
||||
assign send_data = (!stall) && (|in_valid); // change
|
||||
|
||||
wire[NB:0][(`CLOG2(NUM_REQ)) - 1:0] internal_req_num;
|
||||
wire[NB:0] internal_out_valid;
|
||||
|
||||
|
||||
// There's one or less valid per bank
|
||||
genvar curr_bank_o;
|
||||
for (curr_bank_o = 0; curr_bank_o <= NB; curr_bank_o = curr_bank_o + 1)
|
||||
begin
|
||||
|
||||
VX_generic_priority_encoder #(.N(NUM_REQ)) vx_priority_encoder(
|
||||
.valids(bank_valids[curr_bank_o]),
|
||||
.index(internal_req_num[curr_bank_o]),
|
||||
.found(internal_out_valid[curr_bank_o])
|
||||
);
|
||||
assign out_address[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_address[internal_req_num[curr_bank_o]] : 0;
|
||||
assign out_data[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_data[internal_req_num[curr_bank_o]] : 0;
|
||||
end
|
||||
|
||||
integer curr_b;
|
||||
always @(*) begin
|
||||
serviced = 0;
|
||||
for (curr_b = 0; curr_b <= NB; curr_b=curr_b+1) begin
|
||||
serviced[internal_req_num[curr_b]] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign req_num = internal_req_num;
|
||||
assign out_valid = internal_out_valid;
|
||||
|
||||
|
||||
wire[`NT_M1:0] serviced_qual = in_valid & (serviced);
|
||||
|
||||
wire[`NT_M1:0] new_left_requests = (left_requests == 0) ? (in_valid & ~serviced_qual) : (left_requests & ~ serviced_qual);
|
||||
|
||||
// wire[`NT_M1:0] new_left_requests = left_requests & ~(serviced_qual);
|
||||
|
||||
always @(posedge clk, posedge reset) begin
|
||||
if (reset) begin
|
||||
left_requests <= 0;
|
||||
// serviced = 0;
|
||||
end else begin
|
||||
if (!stall) left_requests <= 0;
|
||||
else left_requests <= new_left_requests;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,178 +0,0 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_shared_memory
|
||||
#(
|
||||
parameter SM_SIZE = 4096, // Bytes
|
||||
parameter SM_BANKS = 4,
|
||||
parameter SM_BYTES_PER_READ = 16,
|
||||
parameter SM_WORDS_PER_READ = 4,
|
||||
parameter SM_LOG_WORDS_PER_READ = 2,
|
||||
parameter SM_HEIGHT = 128, // Bytes
|
||||
parameter SM_BANK_OFFSET_START = 2,
|
||||
parameter SM_BANK_OFFSET_END = 4,
|
||||
parameter SM_BLOCK_OFFSET_START = 5,
|
||||
parameter SM_BLOCK_OFFSET_END = 6,
|
||||
parameter SM_INDEX_START = 7,
|
||||
parameter SM_INDEX_END = 13,
|
||||
parameter NUM_REQ = 4,
|
||||
parameter BITS_PER_BANK = 3
|
||||
)
|
||||
(
|
||||
//INPUTS
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire[`NT_M1:0] in_valid,
|
||||
input wire[`NT_M1:0][31:0] in_address,
|
||||
input wire[`NT_M1:0][31:0] in_data,
|
||||
input wire[2:0] mem_read,
|
||||
input wire[2:0] mem_write,
|
||||
//OUTPUTS
|
||||
output wire[`NT_M1:0] out_valid,
|
||||
output wire[`NT_M1:0][31:0] out_data,
|
||||
output wire stall
|
||||
);
|
||||
|
||||
//reg[NB:0][31:0] temp_address;
|
||||
//reg[NB:0][31:0] temp_in_data;
|
||||
//reg[NB:0] temp_in_valid;
|
||||
reg[SM_BANKS - 1:0][31:0] temp_address;
|
||||
reg[SM_BANKS - 1:0][31:0] temp_in_data;
|
||||
reg[SM_BANKS - 1:0] temp_in_valid;
|
||||
|
||||
reg[`NT_M1:0] temp_out_valid;
|
||||
reg[`NT_M1:0][31:0] temp_out_data;
|
||||
|
||||
//reg [NB:0][6:0] block_addr;
|
||||
//reg [NB:0][3:0][31:0] block_wdata;
|
||||
//reg [NB:0][3:0][31:0] block_rdata;
|
||||
//reg [NB:0][1:0] block_we;
|
||||
reg [SM_BANKS - 1:0][$clog2(SM_HEIGHT) - 1:0] block_addr;
|
||||
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_wdata;
|
||||
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_rdata;
|
||||
reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we;
|
||||
|
||||
wire send_data;
|
||||
|
||||
//reg[NB:0][1:0] req_num;
|
||||
reg[SM_BANKS - 1:0][`CLOG2(NUM_REQ) - 1:0] req_num; // not positive about this
|
||||
|
||||
wire [`NT_M1:0] orig_in_valid;
|
||||
|
||||
|
||||
genvar f;
|
||||
generate
|
||||
for(f = 0; f < `NT; f = f+1) begin
|
||||
assign orig_in_valid[f] = in_valid[f];
|
||||
end
|
||||
|
||||
assign out_valid = send_data ? temp_out_valid : 0;
|
||||
assign out_data = send_data ? temp_out_data : 0;
|
||||
endgenerate
|
||||
|
||||
|
||||
//VX_priority_encoder_sm #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_priority_encoder_sm(
|
||||
VX_priority_encoder_sm #(.NB(SM_BANKS - 1), .BITS_PER_BANK(BITS_PER_BANK), .NUM_REQ(NUM_REQ)) vx_priority_encoder_sm(
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.in_valid(orig_in_valid),
|
||||
.in_address(in_address),
|
||||
.in_data(in_data),
|
||||
|
||||
.out_valid(temp_in_valid),
|
||||
.out_address(temp_address),
|
||||
.out_data(temp_in_data),
|
||||
|
||||
.req_num(req_num),
|
||||
.stall(stall),
|
||||
.send_data(send_data)
|
||||
);
|
||||
|
||||
|
||||
genvar j;
|
||||
integer i;
|
||||
generate
|
||||
//for(j=0; j<= NB; j=j+1) begin : sm_mem_block
|
||||
for(j=0; j<= SM_BANKS - 1; j=j+1) begin
|
||||
|
||||
wire shm_write = (mem_write != `NO_MEM_WRITE) && temp_in_valid[j];
|
||||
|
||||
VX_shared_memory_block#
|
||||
(
|
||||
.SMB_HEIGHT(SM_HEIGHT),
|
||||
.SMB_WORDS_PER_READ(SM_WORDS_PER_READ),
|
||||
.SMB_LOG_WORDS_PER_READ(SM_LOG_WORDS_PER_READ)
|
||||
) vx_shared_memory_block
|
||||
(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.addr (block_addr[j]),
|
||||
.wdata (block_wdata[j]),
|
||||
.we (block_we[j]),
|
||||
.shm_write(shm_write),
|
||||
.data_out (block_rdata[j])
|
||||
);
|
||||
end
|
||||
|
||||
|
||||
always @(*) begin
|
||||
block_addr = 0;
|
||||
block_we = 0;
|
||||
block_wdata = 0;
|
||||
//for(i = 0; i <= NB; i = i+1) begin
|
||||
for(i = 0; i <= SM_BANKS - 1; i = i+1) begin
|
||||
if(temp_in_valid[i] == 1'b1) begin
|
||||
//1. Check if the request is actually to the shared memory
|
||||
if((temp_address[i][31:24]) == 8'hFF) begin
|
||||
// STORES
|
||||
if(mem_write != `NO_MEM_WRITE) begin
|
||||
if(mem_write == `SB_MEM_WRITE) begin
|
||||
//TODO
|
||||
end
|
||||
else if(mem_write == `SH_MEM_WRITE) begin
|
||||
//TODO
|
||||
end
|
||||
else if(mem_write == `SW_MEM_WRITE) begin
|
||||
//block_addr[i] = temp_address[i][13:7];
|
||||
//block_we[i] = temp_address[i][6:5];
|
||||
//block_wdata[i][temp_address[i][6:5]] = temp_in_data[i];
|
||||
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
|
||||
block_we[i] = temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START];
|
||||
block_wdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]] = temp_in_data[i];
|
||||
end
|
||||
end
|
||||
//LOADS
|
||||
else if(mem_read != `NO_MEM_READ) begin
|
||||
if(mem_read == `LB_MEM_READ) begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_read == `LH_MEM_READ)
|
||||
begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_read == `LW_MEM_READ)
|
||||
begin
|
||||
//block_addr[i] = temp_address[i][13:7];
|
||||
//temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][6:5]];
|
||||
//temp_out_valid[req_num[i]] = 1'b1;
|
||||
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
|
||||
temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]];
|
||||
temp_out_valid[req_num[i]] = 1'b1;
|
||||
end
|
||||
else if (mem_read == `LBU_MEM_READ)
|
||||
begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_read == `LHU_MEM_READ)
|
||||
begin
|
||||
//TODO
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,115 +0,0 @@
|
||||
module VX_shared_memory_block
|
||||
#(
|
||||
parameter SMB_SIZE = 4096, // Bytes
|
||||
parameter SMB_BYTES_PER_READ = 16,
|
||||
parameter SMB_WORDS_PER_READ = 4,
|
||||
parameter SMB_LOG_WORDS_PER_READ = 2,
|
||||
parameter SMB_HEIGHT = 128, // Bytes
|
||||
parameter BITS_PER_BANK = 3
|
||||
)
|
||||
(
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
//input wire[6:0] addr,
|
||||
//input wire[3:0][31:0] wdata,
|
||||
//input wire[1:0] we,
|
||||
//input wire shm_write,
|
||||
|
||||
//output wire[3:0][31:0] data_out
|
||||
input wire[$clog2(SMB_HEIGHT) - 1:0] addr,
|
||||
input wire[SMB_WORDS_PER_READ-1:0][31:0] wdata,
|
||||
input wire[SMB_LOG_WORDS_PER_READ-1:0] we,
|
||||
input wire shm_write,
|
||||
|
||||
output wire[SMB_WORDS_PER_READ-1:0][31:0] data_out
|
||||
|
||||
);
|
||||
|
||||
|
||||
`ifndef SYN
|
||||
|
||||
//reg[3:0][31:0] shared_memory[127:0];
|
||||
reg[SMB_WORDS_PER_READ-1:0][31:0] shared_memory[SMB_HEIGHT-1:0];
|
||||
|
||||
//wire need_to_write = (|we);
|
||||
integer curr_ind;
|
||||
always @(posedge clk, posedge reset) begin
|
||||
if (reset) begin
|
||||
//for (curr_ind = 0; curr_ind < 128; curr_ind = curr_ind + 1)
|
||||
for (curr_ind = 0; curr_ind < SMB_HEIGHT; curr_ind = curr_ind + 1)
|
||||
begin
|
||||
shared_memory[curr_ind] = 0;
|
||||
end
|
||||
end else if(shm_write) begin
|
||||
shared_memory[addr][we][31:0] = wdata[we][31:0]; // - Ethan's addition
|
||||
//if (we == 2'b00) shared_memory[addr][0][31:0] <= wdata[0][31:0];
|
||||
//if (we == 2'b01) shared_memory[addr][1][31:0] <= wdata[1][31:0];
|
||||
//if (we == 2'b10) shared_memory[addr][2][31:0] <= wdata[2][31:0];
|
||||
//if (we == 2'b11) shared_memory[addr][3][31:0] <= wdata[3][31:0];
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign data_out = shm_write ? 0 : shared_memory[addr];
|
||||
|
||||
`else
|
||||
|
||||
wire cena = 0;
|
||||
wire cenb = !shm_write;
|
||||
|
||||
wire[3:0][31:0] write_bit_mask;
|
||||
|
||||
//assign write_bit_mask[0] = (we == 2'b00) ? {32{1'b1}} : {32{1'b0}};
|
||||
//assign write_bit_mask[1] = (we == 2'b01) ? {32{1'b1}} : {32{1'b0}};
|
||||
//assign write_bit_mask[2] = (we == 2'b10) ? {32{1'b1}} : {32{1'b0}};
|
||||
//assign write_bit_mask[3] = (we == 2'b11) ? {32{1'b1}} : {32{1'b0}};
|
||||
genvar curr_word;
|
||||
for (curr_word = 0; curr_word < SMB_WORDS_PER_READ; curr_word = curr_word + 1)
|
||||
begin
|
||||
assign write_bit_mask[curr_word] = (we == curr_word) ? 1 : {32{1'b0}};
|
||||
end
|
||||
|
||||
// Using ASIC MEM
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_128x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(data_out),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena),
|
||||
.AA(addr),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask),
|
||||
.AB(addr),
|
||||
.DB(wdata),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(7'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(7'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
1
hw/unit_tests/cache/cachesim.cpp
vendored
1
hw/unit_tests/cache/cachesim.cpp
vendored
@@ -3,7 +3,6 @@
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <bitset>
|
||||
|
||||
uint64_t timestamp = 0;
|
||||
|
||||
1
simX/.gitignore
vendored
1
simX/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
obj_dir
|
||||
12
simX/LICENSE
12
simX/LICENSE
@@ -1,12 +0,0 @@
|
||||
Copyright (c) 2011, Georgia Institute of Technology
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -1,37 +1,34 @@
|
||||
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I. -I../hw
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
CFLAGS += -I../../hw
|
||||
LDFLAGS +=
|
||||
|
||||
TOP = cache_simX
|
||||
TOP = vx_cache_sim
|
||||
|
||||
RTL_DIR = ../hw/old_rtl
|
||||
RTL_DIR = ../hw/rtl
|
||||
|
||||
SRCS = simX.cpp args.cpp mem.cpp core.cpp instruction.cpp enc.cpp util.cpp
|
||||
PROJECT = simX
|
||||
|
||||
RTL_INCLUDE=-I$(RTL_DIR) -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/shared_memory
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert
|
||||
VL_FLAGS += -Wno-DECLFILENAME
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
|
||||
SRCS = util.cpp args.cpp mem.cpp core.cpp warp.cpp instr.cpp decode.cpp execute.cpp simX.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
CXXFLAGS += $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CFLAGS += -DNDEBUG
|
||||
CXXFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
all: simX
|
||||
all: $(PROJECT)
|
||||
|
||||
simX:
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)'
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
- Anonymous chunks whose names are not saved by the object writer.
|
||||
- 32-bit instruction encoding for larger-pointered architecture versions.
|
||||
- HOFDump mode for HARPTool/HOFTool
|
||||
- Make operation information tables into member functions of Instruction, if
|
||||
possible.
|
||||
- Anonymous assigned values in the assembler.
|
||||
- References (pointers) as .word directive contents in the assembler.
|
||||
- Instruction validation before encoding.
|
||||
- Make readError in obj.cpp throw something instead of printing a message and
|
||||
exiting.
|
||||
- Limit checking for byte/word encoders (e.g. 255 pRegs, 256 regs for byte)
|
||||
- Eliminate the tmp_buf nonsense from the chunk encoder.
|
||||
- Loosen arch restrictions imposed for interoperability (the number of lanes is
|
||||
typically unimportant)
|
||||
166
simX/archdef.h
Normal file
166
simX/archdef.h
Normal file
@@ -0,0 +1,166 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <stdio.h>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef {
|
||||
public:
|
||||
struct Undefined {};
|
||||
|
||||
ArchDef(const std::string &s,
|
||||
int num_cores,
|
||||
int num_warps,
|
||||
int num_threads) {
|
||||
std::istringstream iss(s.c_str());
|
||||
wordSize_ = 4;
|
||||
encChar_ = 'w';
|
||||
numRegs_ = 32;
|
||||
numPRegs_ = 0;
|
||||
numCores_ = num_cores;
|
||||
numWarps_ = num_warps;
|
||||
numThreads_ = num_threads;
|
||||
extent_ = EXT_END;
|
||||
}
|
||||
|
||||
operator std::string () const {
|
||||
if (extent_ == EXT_NULL)
|
||||
return "";
|
||||
|
||||
std::ostringstream oss;
|
||||
if (extent_ >= EXT_WORDSIZE) oss << wordSize_;
|
||||
if (extent_ >= EXT_ENC ) oss << encChar_;
|
||||
if (extent_ >= EXT_REGS ) oss << numRegs_;
|
||||
if (extent_ >= EXT_PREGS ) oss << '/' << numPRegs_;
|
||||
if (extent_ >= EXT_THREADS ) oss << '/' << numThreads_;
|
||||
if (extent_ >= EXT_WARPS ) oss << '/' << numWarps_;
|
||||
if (extent_ >= EXT_CORES ) oss << '/' << numCores_;
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
bool operator==(const ArchDef &r) const {
|
||||
Extent minExtent(r.extent_ > extent_ ? extent_ : r.extent_);
|
||||
|
||||
// Can't be equal if we can't specify a binary encoding at all.
|
||||
if (minExtent < EXT_PREGS)
|
||||
return false;
|
||||
|
||||
if (minExtent >= EXT_WORDSIZE) {
|
||||
if (wordSize_!=r.wordSize_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_ENC) {
|
||||
if (encChar_ != r.encChar_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_REGS) {
|
||||
if (numRegs_ != r.numRegs_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_PREGS) {
|
||||
if (numPRegs_ != r.numPRegs_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_THREADS) {
|
||||
if (numThreads_ != r.numThreads_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_WARPS) {
|
||||
if (numWarps_ != r.numWarps_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_CORES) {
|
||||
if (numCores_ != r.numCores_)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool operator!=(const ArchDef &r) const {
|
||||
return !(*this == r);
|
||||
}
|
||||
|
||||
Size getWordSize() const {
|
||||
if (extent_ < EXT_WORDSIZE)
|
||||
throw Undefined();
|
||||
return wordSize_;
|
||||
}
|
||||
|
||||
char getEncChar() const {
|
||||
if ((extent_ < EXT_ENC) || (encChar_ == 'x'))
|
||||
throw Undefined();
|
||||
return encChar_;
|
||||
}
|
||||
|
||||
RegNum getNumRegs() const {
|
||||
if (extent_ < EXT_REGS)
|
||||
throw Undefined();
|
||||
return numRegs_;
|
||||
}
|
||||
|
||||
RegNum getNumPRegs() const {
|
||||
if (extent_ < EXT_PREGS)
|
||||
throw Undefined();
|
||||
return numPRegs_;
|
||||
}
|
||||
|
||||
ThdNum getNumThreads() const {
|
||||
if (extent_ < EXT_THREADS)
|
||||
throw Undefined();
|
||||
return numThreads_;
|
||||
}
|
||||
|
||||
ThdNum getNumWarps() const {
|
||||
if (extent_ < EXT_WARPS)
|
||||
throw Undefined();
|
||||
return numWarps_;
|
||||
}
|
||||
|
||||
ThdNum getNumCores() const {
|
||||
if (extent_ < EXT_CORES)
|
||||
throw Undefined();
|
||||
return numCores_;
|
||||
}
|
||||
|
||||
bool is_cpu_mode() const {
|
||||
return cpu_mode_;
|
||||
}
|
||||
|
||||
private:
|
||||
enum Extent {
|
||||
EXT_NULL,
|
||||
EXT_WORDSIZE,
|
||||
EXT_ENC,
|
||||
EXT_REGS,
|
||||
EXT_PREGS,
|
||||
EXT_THREADS,
|
||||
EXT_WARPS,
|
||||
EXT_CORES,
|
||||
EXT_END
|
||||
};
|
||||
|
||||
Extent extent_;
|
||||
Size wordSize_;
|
||||
ThdNum numThreads_;
|
||||
ThdNum numWarps_;
|
||||
ThdNum numCores_;
|
||||
RegNum numRegs_;
|
||||
ThdNum numPRegs_;
|
||||
char encChar_;
|
||||
bool cpu_mode_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,39 +1,34 @@
|
||||
/*******************************************************************************
|
||||
HARPtools by Chad D. Kersey, Summer 2011
|
||||
*******************************************************************************/
|
||||
#include "include/args.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "args.h"
|
||||
|
||||
using namespace HarpTools;
|
||||
using namespace vortex;
|
||||
using std::string;
|
||||
|
||||
std::string CommandLineArg::helpString;
|
||||
std::map<string, CommandLineArg *> CommandLineArg::longArgs;
|
||||
std::map<string, CommandLineArg *> CommandLineArg::shortArgs;
|
||||
std::string CommandLineArg::helpString_;
|
||||
std::unordered_map<string, CommandLineArg *> CommandLineArg::longArgs_;
|
||||
std::unordered_map<string, CommandLineArg *> CommandLineArg::shortArgs_;
|
||||
|
||||
CommandLineArg::CommandLineArg(string s, string l, const char *helpText)
|
||||
{
|
||||
helpString += helpText;
|
||||
longArgs[l] = this;
|
||||
shortArgs[s] = this;
|
||||
CommandLineArg::CommandLineArg(string s, string l, const char *helpText) {
|
||||
helpString_ += helpText;
|
||||
longArgs_[l] = this;
|
||||
shortArgs_[s] = this;
|
||||
}
|
||||
|
||||
CommandLineArg::CommandLineArg(string l, const char *helpText) {
|
||||
helpString += helpText;
|
||||
longArgs[l] = this;
|
||||
helpString_ += helpText;
|
||||
longArgs_[l] = this;
|
||||
}
|
||||
|
||||
void CommandLineArg::readArgs(int argc, char **argv) {
|
||||
for (int i = 0; i < argc; i++) {
|
||||
std::map<string, CommandLineArg *>::iterator
|
||||
s = shortArgs.find(std::string(argv[i])),
|
||||
l = longArgs.find(std::string(argv[i]));
|
||||
std::unordered_map<string, CommandLineArg *>::iterator
|
||||
s = shortArgs_.find(std::string(argv[i])),
|
||||
l = longArgs_.find(std::string(argv[i]));
|
||||
|
||||
if (s != shortArgs.end()) {
|
||||
if (s != shortArgs_.end()) {
|
||||
i += s->second->read(argc - i, &argv[i]);
|
||||
} else if (l != longArgs.end()) {
|
||||
} else if (l != longArgs_.end()) {
|
||||
i += l->second->read(argc - i, &argv[i]);
|
||||
} else {
|
||||
throw BadArg(string(argv[i]));
|
||||
@@ -42,11 +37,11 @@ void CommandLineArg::readArgs(int argc, char **argv) {
|
||||
}
|
||||
|
||||
void CommandLineArg::clearArgs() {
|
||||
shortArgs.clear();
|
||||
longArgs.clear();
|
||||
helpString = "";
|
||||
shortArgs_.clear();
|
||||
longArgs_.clear();
|
||||
helpString_ = "";
|
||||
}
|
||||
|
||||
void CommandLineArg::showHelp(std::ostream &os) {
|
||||
os << helpString;
|
||||
os << helpString_;
|
||||
}
|
||||
|
||||
64
simX/args.h
Normal file
64
simX/args.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include "util.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
struct BadArg { BadArg(std::string s) : arg(s) {} std::string arg; };
|
||||
|
||||
class CommandLineArg {
|
||||
public:
|
||||
CommandLineArg(std::string s, std::string l, const char *helpText);
|
||||
CommandLineArg(std::string l, const char *helpText);
|
||||
virtual int read(int argc, char** argv) = 0;
|
||||
|
||||
static void readArgs(int argc, char **argv);
|
||||
static void clearArgs();
|
||||
static void showHelp(std::ostream &os);
|
||||
|
||||
private:
|
||||
static std::string helpString_;
|
||||
static std::unordered_map<std::string, CommandLineArg *> longArgs_;
|
||||
static std::unordered_map<std::string, CommandLineArg *> shortArgs_;
|
||||
};
|
||||
|
||||
template <typename T> class CommandLineArgSetter : public CommandLineArg {
|
||||
public:
|
||||
CommandLineArgSetter(std::string s, std::string l, const char *ht, T &x) :
|
||||
CommandLineArg(s, l, ht), arg_(x) {}
|
||||
|
||||
CommandLineArgSetter(std::string l, const char *ht, T &x) :
|
||||
CommandLineArg(l, ht), arg_(x) {}
|
||||
|
||||
int read(int argc, char **argv) {
|
||||
__unused(argc);
|
||||
std::istringstream iss(argv[1]);
|
||||
iss >> arg_;
|
||||
return 1;
|
||||
}
|
||||
private:
|
||||
T &arg_;
|
||||
};
|
||||
|
||||
class CommandLineArgFlag : public CommandLineArg {
|
||||
public:
|
||||
CommandLineArgFlag(std::string s, std::string l, const char *ht, bool &x) :
|
||||
CommandLineArg(s, l, ht), arg_(x) { arg_ = false; }
|
||||
|
||||
CommandLineArgFlag(std::string l, const char *ht, bool &x) :
|
||||
CommandLineArg(l, ht), arg_(x) { arg_ = false; }
|
||||
|
||||
int read(int argc, char **argv) {
|
||||
__unused(argc, argv);
|
||||
arg_ = true;
|
||||
return 0;
|
||||
}
|
||||
private:
|
||||
bool &arg_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,113 +0,0 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module cache_simX (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
|
||||
// Icache
|
||||
input wire[31:0] icache_pc_addr,
|
||||
input wire icache_valid_pc_addr,
|
||||
output wire icache_stall,
|
||||
|
||||
// Dcache
|
||||
input wire[2:0] dcache_mem_read,
|
||||
input wire[2:0] dcache_mem_write,
|
||||
input wire dcache_in_valid[`NT_M1:0],
|
||||
input wire[31:0] dcache_in_addr[`NT_M1:0],
|
||||
output wire dcache_stall
|
||||
);
|
||||
//////////////////// ICACHE ///////////////////
|
||||
|
||||
VX_icache_request_inter VX_icache_req;
|
||||
assign VX_icache_req.pc_address = icache_pc_addr;
|
||||
assign VX_icache_req.out_cache_driver_in_mem_read = (icache_valid_pc_addr) ? `LW_MEM_READ : `NO_MEM_READ;
|
||||
assign VX_icache_req.out_cache_driver_in_mem_write = `NO_MEM_WRITE;
|
||||
assign VX_icache_req.out_cache_driver_in_valid = icache_valid_pc_addr;
|
||||
assign VX_icache_req.out_cache_driver_in_data = 0;
|
||||
|
||||
VX_icache_response_inter VX_icache_rsp;
|
||||
assign icache_stall = VX_icache_rsp.delay;
|
||||
|
||||
VX_dram_req_rsp_inter #(
|
||||
|
||||
.NUMBER_BANKS(`ICACHE_BANKS),
|
||||
.NUM_WORDS_PER_BLOCK(`ICACHE_NUM_WORDS_PER_BLOCK)
|
||||
|
||||
) VX_dram_req_rsp_icache();
|
||||
|
||||
reg icache_i_m_ready;
|
||||
|
||||
assign VX_dram_req_rsp_icache.i_m_ready = icache_i_m_ready;
|
||||
|
||||
//////////////////// DCACHE ///////////////////
|
||||
|
||||
VX_dcache_request_inter VX_dcache_req;
|
||||
assign VX_dcache_req.out_cache_driver_in_mem_read = dcache_mem_read;
|
||||
assign VX_dcache_req.out_cache_driver_in_mem_write = dcache_mem_write;
|
||||
assign VX_dcache_req.out_cache_driver_in_data = 0;
|
||||
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
|
||||
begin
|
||||
assign VX_dcache_req.out_cache_driver_in_address[curr_t] = dcache_in_addr[curr_t];
|
||||
assign VX_dcache_req.out_cache_driver_in_valid[curr_t] = dcache_in_valid[curr_t];
|
||||
end
|
||||
|
||||
VX_dcache_response_inter VX_dcache_rsp;
|
||||
assign dcache_stall = VX_dcache_rsp.delay;
|
||||
|
||||
VX_dram_req_rsp_inter #(
|
||||
|
||||
.NUMBER_BANKS(`DCACHE_BANKS),
|
||||
.NUM_WORDS_PER_BLOCK(`DCACHE_NUM_WORDS_PER_BLOCK)
|
||||
|
||||
) VX_dram_req_rsp();
|
||||
|
||||
reg dcache_i_m_ready;
|
||||
assign VX_dram_req_rsp.i_m_ready = dcache_i_m_ready;
|
||||
|
||||
VX_dmem_controller dmem_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.VX_dram_req_rsp (VX_dram_req_rsp),
|
||||
.VX_dram_req_rsp_icache(VX_dram_req_rsp_icache),
|
||||
.VX_icache_req (VX_icache_req),
|
||||
.VX_icache_rsp (VX_icache_rsp),
|
||||
.VX_dcache_req (VX_dcache_req),
|
||||
.VX_dcache_rsp (VX_dcache_rsp)
|
||||
);
|
||||
|
||||
always @(posedge clk, posedge reset) begin
|
||||
if (reset)
|
||||
begin
|
||||
icache_i_m_ready = 0;
|
||||
dcache_i_m_ready = 0;
|
||||
end else begin
|
||||
|
||||
if (VX_dram_req_rsp_icache.o_m_valid) begin
|
||||
icache_i_m_ready = 1;
|
||||
// $display("cache_simX.v: setting icache_i_m_ready = %d", icache_i_m_ready);
|
||||
end else if (icache_i_m_ready) begin
|
||||
icache_i_m_ready = 0;
|
||||
end else begin
|
||||
icache_i_m_ready = 0;
|
||||
end
|
||||
|
||||
|
||||
if (VX_dram_req_rsp.o_m_valid) begin
|
||||
dcache_i_m_ready = 1;
|
||||
end else if (dcache_i_m_ready) begin
|
||||
dcache_i_m_ready = 0;
|
||||
end else begin
|
||||
dcache_i_m_ready = 0;
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
1057
simX/core.cpp
1057
simX/core.cpp
File diff suppressed because it is too large
Load Diff
100
simX/core.h
Normal file
100
simX/core.h
Normal file
@@ -0,0 +1,100 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
#include <unordered_map>
|
||||
#include <set>
|
||||
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
#include "decode.h"
|
||||
#include "mem.h"
|
||||
#include "warp.h"
|
||||
#include "trace.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Core {
|
||||
public:
|
||||
Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id = 0);
|
||||
~Core();
|
||||
|
||||
bool interrupt(Word r0);
|
||||
bool running() const;
|
||||
|
||||
void getCacheDelays(trace_inst_t *);
|
||||
void warpScheduler();
|
||||
void fetch();
|
||||
void decode();
|
||||
void scheduler();
|
||||
void execute_unit();
|
||||
void load_store();
|
||||
void writeback();
|
||||
|
||||
void step();
|
||||
|
||||
void printStats() const;
|
||||
|
||||
Word id() const {
|
||||
return id_;
|
||||
}
|
||||
|
||||
Warp& warp(int i) {
|
||||
return warps_[i];
|
||||
}
|
||||
|
||||
Decoder& decoder() {
|
||||
return decoder_;
|
||||
}
|
||||
|
||||
MemoryUnit& mem() {
|
||||
return mem_;
|
||||
}
|
||||
|
||||
const ArchDef& arch() const {
|
||||
return arch_;
|
||||
}
|
||||
|
||||
Word interruptEntry() const {
|
||||
return interruptEntry_;
|
||||
}
|
||||
|
||||
unsigned long num_instructions() const {
|
||||
return num_instructions_;
|
||||
}
|
||||
|
||||
unsigned long num_steps() const {
|
||||
return steps_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
bool renameTable_[32][32];
|
||||
bool vecRenameTable_[32];
|
||||
bool stalled_warps_[32];
|
||||
bool foundSchedule_;
|
||||
|
||||
Word id_;
|
||||
const ArchDef &arch_;
|
||||
Decoder &decoder_;
|
||||
MemoryUnit &mem_;
|
||||
std::vector<Warp> warps_;
|
||||
std::unordered_map<Word, std::set<Warp *>> barriers_;
|
||||
int schedule_w_;
|
||||
uint64_t steps_;
|
||||
uint64_t num_instructions_;
|
||||
Word interruptEntry_;
|
||||
bool release_warp_;
|
||||
int release_warp_num_;
|
||||
|
||||
trace_inst_t inst_in_fetch_;
|
||||
trace_inst_t inst_in_decode_;
|
||||
trace_inst_t inst_in_scheduler_;
|
||||
trace_inst_t inst_in_exe_;
|
||||
trace_inst_t inst_in_lsu_;
|
||||
trace_inst_t inst_in_wb_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
42
simX/debug.h
Normal file
42
simX/debug.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#pragma once
|
||||
|
||||
//#define USE_DEBUG 9
|
||||
|
||||
#ifdef USE_DEBUG
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#define DX(x) x
|
||||
|
||||
#define D(lvl, x) do { \
|
||||
if ((lvl) <= USE_DEBUG) { \
|
||||
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x << std::endl; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define DPH(lvl, x) do { \
|
||||
if ((lvl) <= USE_DEBUG) { \
|
||||
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define DPN(lvl, x) do { \
|
||||
if ((lvl) <= USE_DEBUG) { \
|
||||
std::cout << x; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define D_RAW(x) do { \
|
||||
std::cout << x; \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define DX(x)
|
||||
#define D(lvl, x) do {} while(0)
|
||||
#define DPH(lvl, x) do {} while(0)
|
||||
#define DPN(lvl, x) do {} while(0)
|
||||
#define D_RAW(x) do {} while(0)
|
||||
|
||||
#endif
|
||||
293
simX/decode.cpp
Normal file
293
simX/decode.cpp
Normal file
@@ -0,0 +1,293 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "util.h"
|
||||
#include "decode.h"
|
||||
#include "archdef.h"
|
||||
#include "instr.h"
|
||||
#include "trace.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
struct InstTableEntry_t {
|
||||
const char *opString;
|
||||
bool controlFlow;
|
||||
InstType iType;
|
||||
};
|
||||
|
||||
static const std::unordered_map<int, struct InstTableEntry_t> sc_instTable = {
|
||||
{Opcode::NOP, {"nop" , false, InstType::N_TYPE}},
|
||||
{Opcode::R_INST, {"r_type", false, InstType::R_TYPE}},
|
||||
{Opcode::L_INST, {"load" , false, InstType::I_TYPE}},
|
||||
{Opcode::I_INST, {"i_type", false, InstType::I_TYPE}},
|
||||
{Opcode::S_INST, {"store" , false, InstType::S_TYPE}},
|
||||
{Opcode::B_INST, {"branch", true , InstType::B_TYPE}},
|
||||
{Opcode::LUI_INST, {"lui" , false, InstType::U_TYPE}},
|
||||
{Opcode::AUIPC_INST, {"auipc" , false, InstType::U_TYPE}},
|
||||
{Opcode::JAL_INST, {"jal" , true , InstType::J_TYPE}},
|
||||
{Opcode::JALR_INST, {"jalr" , true , InstType::I_TYPE}},
|
||||
{Opcode::SYS_INST, {"SYS" , true , InstType::I_TYPE}},
|
||||
{Opcode::FENCE, {"fence" , true , InstType::I_TYPE}},
|
||||
{Opcode::PJ_INST, {"pred j", true , InstType::R_TYPE}},
|
||||
{Opcode::GPGPU, {"gpgpu" , false, InstType::R_TYPE}},
|
||||
{Opcode::VSET_ARITH, {"vsetvl", false, InstType::V_TYPE}},
|
||||
{Opcode::VL, {"vl" , false, InstType::V_TYPE}},
|
||||
{Opcode::VS, {"vs" , false, InstType::V_TYPE}}
|
||||
};
|
||||
|
||||
std::ostream &vortex::operator<<(std::ostream &os, Instr &instr) {
|
||||
os << std::dec << sc_instTable.at(instr.opcode_).opString;
|
||||
return os;
|
||||
}
|
||||
|
||||
Decoder::Decoder(const ArchDef &arch) {
|
||||
inst_s_ = arch.getWordSize() * 8;
|
||||
opcode_s_ = 7;
|
||||
reg_s_ = 5;
|
||||
func3_s_ = 3;
|
||||
mop_s_ = 3;
|
||||
vmask_s_ = 1;
|
||||
|
||||
shift_opcode_ = 0;
|
||||
shift_rd_ = opcode_s_;
|
||||
shift_func3_ = opcode_s_ + reg_s_;
|
||||
shift_rs1_ = opcode_s_ + reg_s_ + func3_s_;
|
||||
shift_rs2_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_;
|
||||
shift_func7_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
|
||||
shift_j_u_immed_ = opcode_s_ + reg_s_;
|
||||
shift_s_b_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
|
||||
shift_i_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_;
|
||||
shift_vset_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_;
|
||||
shift_vmask_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
|
||||
shift_vmop_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + vmask_s_;
|
||||
shift_vnf_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + vmask_s_ + mop_s_;
|
||||
shift_func6_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + 1;
|
||||
shift_vset_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + 6;
|
||||
|
||||
reg_mask_ = 0x1f;
|
||||
func3_mask_ = 0x7;
|
||||
func6_mask_ = 0x3f;
|
||||
func7_mask_ = 0x7f;
|
||||
opcode_mask_ = 0x7f;
|
||||
i_imm_mask_ = 0xfff;
|
||||
s_imm_mask_ = 0xfff;
|
||||
b_imm_mask_ = 0x1fff;
|
||||
u_imm_mask_ = 0xfffff;
|
||||
j_imm_mask_ = 0xfffff;
|
||||
v_imm_mask_ = 0x7ff;
|
||||
}
|
||||
|
||||
std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, trace_inst_t *trace_inst) {
|
||||
Word code(readWord(v, idx, inst_s_ / 8));
|
||||
|
||||
// std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n";
|
||||
auto instr = std::make_shared<Instr>();
|
||||
|
||||
Opcode op = (Opcode)((code >> shift_opcode_) & opcode_mask_);
|
||||
// std::cout << "opcode: " << op << "\n";
|
||||
instr->setOpcode(op);
|
||||
|
||||
Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5,
|
||||
bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3;
|
||||
|
||||
// std::cout << "op: " << std::hex << op << " what " << sc_instTable[op].iType << "\n";
|
||||
switch (sc_instTable.at(op).iType) {
|
||||
case InstType::N_TYPE:
|
||||
break;
|
||||
|
||||
case InstType::R_TYPE:
|
||||
instr->setPred((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setFunc3((code >> shift_func3_) & func3_mask_);
|
||||
instr->setFunc7((code >> shift_func7_) & func7_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::I_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setFunc7((code >> shift_func7_) & func7_mask_);
|
||||
func3 = (code >> shift_func3_) & func3_mask_;
|
||||
instr->setFunc3(func3);
|
||||
|
||||
if ((func3 == 5) && (op != L_INST)) {
|
||||
// std::cout << "func7: " << func7 << "\n";
|
||||
instr->setSrcImm(signExt(((code >> shift_rs2_) & reg_mask_), 5, reg_mask_));
|
||||
} else {
|
||||
instr->setSrcImm(signExt(code >> shift_i_immed_, 12, i_imm_mask_));
|
||||
}
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::S_TYPE:
|
||||
// std::cout << "************STORE\n";
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setFunc3((code >> shift_func3_) & func3_mask_);
|
||||
|
||||
dest_bits = (code >> shift_rd_) & reg_mask_;
|
||||
imm_bits = (code >> shift_s_b_immed_ & func7_mask_);
|
||||
imeed = (imm_bits << reg_s_) | dest_bits;
|
||||
// std::cout << "ENC: store imeed: " << imeed << "\n";
|
||||
instr->setSrcImm(signExt(imeed, 12, s_imm_mask_));
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::B_TYPE:
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setFunc3((code >> shift_func3_) & func3_mask_);
|
||||
|
||||
dest_bits = (code >> shift_rd_) & reg_mask_;
|
||||
imm_bits = (code >> shift_s_b_immed_ & func7_mask_);
|
||||
|
||||
bit_11 = dest_bits & 0x1;
|
||||
bits_4_1 = dest_bits >> 1;
|
||||
bit_10_5 = imm_bits & 0x3f;
|
||||
bit_12 = imm_bits >> 6;
|
||||
|
||||
imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
|
||||
|
||||
instr->setSrcImm(signExt(imeed, 13, b_imm_mask_));
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::U_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcImm(signExt(code >> shift_j_u_immed_, 20, u_imm_mask_));
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::J_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
|
||||
// [20 | 10:1 | 11 | 19:12]
|
||||
|
||||
unordered = code >> shift_j_u_immed_;
|
||||
|
||||
bits_19_12 = unordered & 0xff;
|
||||
bit_11 = (unordered >> 8) & 0x1;
|
||||
bits_10_1 = (unordered >> 9) & 0x3ff;
|
||||
bit_20 = (unordered >> 19) & 0x1;
|
||||
|
||||
imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
|
||||
|
||||
if (bit_20) {
|
||||
imeed |= ~j_imm_mask_;
|
||||
}
|
||||
|
||||
instr->setSrcImm(imeed);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::V_TYPE:
|
||||
D(3, "Entered here: instr type = vector" << op);
|
||||
switch (op) {
|
||||
case Opcode::VSET_ARITH: //TODO: arithmetic ops
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
func3 = (code >> shift_func3_) & func3_mask_;
|
||||
instr->setFunc3(func3);
|
||||
D(3, "Entered here: instr type = vector");
|
||||
|
||||
if (func3 == 7) {
|
||||
D(3, "Entered here: imm instr");
|
||||
instr->setVsetImm(!(code >> shift_vset_));
|
||||
if (instr->getVsetImm()) {
|
||||
Word immed = (code >> shift_rs2_) & v_imm_mask_;
|
||||
D(3, "immed" << immed);
|
||||
instr->setSrcImm(immed); //TODO
|
||||
instr->setVlmul(immed & 0x3);
|
||||
D(3, "lmul " << (immed & 0x3));
|
||||
instr->setVediv((immed >> 4) & 0x3);
|
||||
D(3, "ediv " << ((immed >> 4) & 0x3));
|
||||
instr->setVsew((immed >> 2) & 0x3);
|
||||
D(3, "sew " << ((immed >> 2) & 0x3));
|
||||
} else {
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
}
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
} else {
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setVmask((code >> shift_vmask_) & 0x1);
|
||||
instr->setFunc6((code >> shift_func6_) & func6_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
}
|
||||
break;
|
||||
|
||||
case Opcode::VL:
|
||||
D(3, "vector load instr");
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setVmask((code >> shift_vmask_));
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->vd = ((code >> shift_rd_) & reg_mask_);
|
||||
//trace_inst->vs2 = ((code>>shift_rs2_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case Opcode::VS:
|
||||
instr->setVs3((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setVmask((code >> shift_vmask_));
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
//trace_inst->vd = ((code>>shift_rd_) & reg_mask_);
|
||||
trace_inst->vs1 = ((code >> shift_rd_) & reg_mask_); //vs3
|
||||
break;
|
||||
|
||||
default:
|
||||
std::cout << "Inavlid opcode.\n";
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unrecognized argument class in word decoder.\n";
|
||||
std::abort();
|
||||
}
|
||||
|
||||
D(2, "Decoded instr 0x" << std::hex << code << " into: " << instr << std::flush);
|
||||
|
||||
return instr;
|
||||
}
|
||||
58
simX/decode.h
Normal file
58
simX/decode.h
Normal file
@@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "util.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef;
|
||||
class Instr;
|
||||
class trace_inst_t;
|
||||
|
||||
class Decoder {
|
||||
public:
|
||||
Decoder(const ArchDef &);
|
||||
|
||||
virtual std::shared_ptr<Instr> decode(const std::vector<Byte> &v, Size &n, trace_inst_t * trace_inst);
|
||||
|
||||
private:
|
||||
|
||||
Word inst_s_;
|
||||
Word opcode_s_;
|
||||
Word reg_s_;
|
||||
Word func3_s_;
|
||||
Word shift_opcode_;
|
||||
Word shift_rd_;
|
||||
Word shift_rs1_;
|
||||
Word shift_rs2_;
|
||||
Word shift_func3_;
|
||||
Word shift_func7_;
|
||||
Word shift_j_u_immed_;
|
||||
Word shift_s_b_immed_;
|
||||
Word shift_i_immed_;
|
||||
|
||||
Word reg_mask_;
|
||||
Word func3_mask_;
|
||||
Word func6_mask_;
|
||||
Word func7_mask_;
|
||||
Word opcode_mask_;
|
||||
Word i_imm_mask_;
|
||||
Word s_imm_mask_;
|
||||
Word b_imm_mask_;
|
||||
Word u_imm_mask_;
|
||||
Word j_imm_mask_;
|
||||
Word v_imm_mask_;
|
||||
|
||||
//Vector
|
||||
Word shift_vset_;
|
||||
Word shift_vset_immed_;
|
||||
Word shift_vmask_;
|
||||
Word shift_vmop_;
|
||||
Word shift_vnf_;
|
||||
Word shift_func6_;
|
||||
Word vmask_s_;
|
||||
Word mop_s_;
|
||||
};
|
||||
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user