merging changes from OPAE branch making this branch

This commit is contained in:
Blaise Tine
2020-03-27 20:19:16 -04:00
parent 39516a6f98
commit 9b1b8789ac
267 changed files with 498191 additions and 166 deletions

52
old_rtl/Makefile Normal file
View File

@@ -0,0 +1,52 @@
all: RUNFILE
# /rf2_256x128_wm1/
BaseMEM=../models/memory/cln28hpm
INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Isimulate
FILE=Vortex.v
EXE=--exe ./simulate/test_bench.cpp
COMP=--compiler gcc
WNO=-Wno-UNDRIVEN --Wno-PINMISSING -Wno-STMTDLY -Wno-WIDTH -Wno-UNSIGNED -Wno-UNOPTFLAT
# WNO=
# LIGHTW=
LIGHTW=-Wno-UNOPTFLAT
# LIB=-LDFLAGS '-L/usr/local/systemc/'
LIB=
CF=-CFLAGS '-std=c++11 -O3'
DEB=--trace --prof-cfuncs -DVL_DEBUG=1
MAKECPP=(cd obj_dir && make -j -f VVortex.mk OPT='-DVL_DEBUG' VL_DEBUG=1 DVL_DEBUG=1)
# -LDFLAGS '-lsystemc'
VERILATOR:
echo "#define VCD_OFF" > simulate/tb_debug.h
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(LIGHTW)
VERILATORnoWarnings:
echo "#define VCD_OFF" > simulate/tb_debug.h
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(WNO) $(DEB)
compdebug:
echo "#define VCD_OUTPUT" > simulate/tb_debug.h
verilator_bin_dbg $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) -CFLAGS '-std=c++11 -DVL_DEBUG' $(WNO) $(DEB)
RUNFILE: VERILATOR
$(MAKECPP)
debug: compdebug
$(MAKECPP)
w: VERILATORnoWarnings
$(MAKECPP)
clean:
rm -rf obj_dir

139
old_rtl/VX_alu.v Normal file
View File

@@ -0,0 +1,139 @@
`include "VX_define.v"
module VX_alu(
input wire[31:0] in_1,
input wire[31:0] in_2,
input wire in_rs2_src,
input wire[31:0] in_itype_immed,
input wire[19:0] in_upper_immed,
input wire[4:0] in_alu_op,
input wire[31:0] in_curr_PC,
output reg[31:0] out_alu_result
);
`ifdef SYN_FUNC
wire which_in2;
wire[31:0] ALU_in1;
wire[31:0] ALU_in2;
wire[63:0] ALU_in1_mult;
wire[63:0] ALU_in2_mult;
wire[31:0] upper_immed;
wire[31:0] div_result;
wire[31:0] rem_result;
assign which_in2 = in_rs2_src == `RS2_IMMED;
assign ALU_in1 = in_1;
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
assign upper_immed = {in_upper_immed, {12{1'b0}}};
//always @(posedge `MUL) begin
/* verilator lint_off UNUSED */
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2};
assign ALU_in1_mult = (in_alu_op == `MULHU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in1} : alu_in1_signed;
assign ALU_in2_mult = (in_alu_op == `MULHU || in_alu_op == `MULHSU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in2} : alu_in2_signed;
wire[63:0] mult_result = ALU_in1_mult * ALU_in2_mult;
/* verilator lint_on UNUSED */
always @(in_alu_op or ALU_in1 or ALU_in2) begin
case(in_alu_op)
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
`OR: out_alu_result = ALU_in1 | ALU_in2;
`AND: out_alu_result = ALU_in2 & ALU_in1;
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: out_alu_result = upper_immed;
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
`MUL: out_alu_result = mult_result[31:0];
`MULH: out_alu_result = mult_result[63:32];
`MULHSU: out_alu_result = mult_result[63:32];
`MULHU: out_alu_result = mult_result[63:32];
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2));
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2;
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2));
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
default: out_alu_result = 32'h0;
endcase // in_alu_op
end
`else
wire which_in2;
wire[31:0] ALU_in1;
wire[31:0] ALU_in2;
wire[31:0] upper_immed;
assign which_in2 = in_rs2_src == `RS2_IMMED;
assign ALU_in1 = in_1;
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
assign upper_immed = {in_upper_immed, {12{1'b0}}};
// always @(*) begin
// $display("EXECUTE CURR_PC: %h",in_curr_PC);
// end
/* verilator lint_off UNUSED */
wire[63:0] mult_unsigned_result = ALU_in1 * ALU_in2;
wire[63:0] mult_signed_result = $signed(ALU_in1) * $signed(ALU_in2);
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
wire[63:0] mult_signed_un_result = alu_in1_signed * ALU_in2;
/* verilator lint_on UNUSED */
always @(in_alu_op or ALU_in1 or ALU_in2) begin
case(in_alu_op)
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
`OR: out_alu_result = ALU_in1 | ALU_in2;
`AND: out_alu_result = ALU_in2 & ALU_in1;
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: out_alu_result = upper_immed;
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
`MUL: begin out_alu_result = mult_signed_result[31:0]; end
`MULH: out_alu_result = mult_signed_result[63:32];
`MULHSU: out_alu_result = mult_signed_un_result[63:32];
`MULHU: out_alu_result = mult_unsigned_result[63:32];
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2));
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2;
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2));
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
default: out_alu_result = 32'h0;
endcase // in_alu_op
end
`endif
endmodule // VX_alu

133
old_rtl/VX_back_end.v Normal file
View File

@@ -0,0 +1,133 @@
`include "VX_define.v"
module VX_back_end (
input wire clk,
input wire reset,
input wire schedule_delay,
output wire out_mem_delay,
output wire gpr_stage_delay,
VX_jal_response_inter VX_jal_rsp,
VX_branch_response_inter VX_branch_rsp,
VX_frE_to_bckE_req_inter VX_bckE_req,
VX_wb_inter VX_writeback_inter,
VX_warp_ctl_inter VX_warp_ctl,
VX_dcache_response_inter VX_dcache_rsp,
VX_dcache_request_inter VX_dcache_req
);
VX_wb_inter VX_writeback_temp();
assign VX_writeback_inter.wb = VX_writeback_temp.wb;
assign VX_writeback_inter.rd = VX_writeback_temp.rd;
assign VX_writeback_inter.write_data = VX_writeback_temp.write_data;
assign VX_writeback_inter.wb_valid = VX_writeback_temp.wb_valid;
assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num;
// assign VX_writeback_inter(VX_writeback_temp);
VX_mw_wb_inter VX_mw_wb();
wire no_slot_mem;
VX_mem_req_inter VX_exe_mem_req();
VX_mem_req_inter VX_mem_req();
// LSU input + output
VX_lsu_req_inter VX_lsu_req();
VX_inst_mem_wb_inter VX_mem_wb();
// Exec unit input + output
VX_exec_unit_req_inter VX_exec_unit_req();
VX_inst_exec_wb_inter VX_inst_exec_wb();
// GPU unit input
VX_gpu_inst_req_inter VX_gpu_inst_req();
// CSR unit inputs
VX_csr_req_inter VX_csr_req();
VX_csr_wb_inter VX_csr_wb();
wire no_slot_csr;
wire stall_gpr_csr;
VX_gpr_stage VX_gpr_stage(
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.VX_writeback_inter(VX_writeback_temp),
.VX_bckE_req (VX_bckE_req),
// New
.VX_exec_unit_req(VX_exec_unit_req),
.VX_lsu_req (VX_lsu_req),
.VX_gpu_inst_req (VX_gpu_inst_req),
.VX_csr_req (VX_csr_req),
.stall_gpr_csr (stall_gpr_csr),
// End new
.memory_delay (out_mem_delay),
.gpr_stage_delay (gpr_stage_delay)
);
VX_lsu load_store_unit(
.clk (clk),
.reset (reset),
.VX_lsu_req (VX_lsu_req),
.VX_mem_wb (VX_mem_wb),
.VX_dcache_rsp(VX_dcache_rsp),
.VX_dcache_req(VX_dcache_req),
.out_delay (out_mem_delay),
.no_slot_mem (no_slot_mem)
);
VX_execute_unit VX_execUnit(
.clk (clk),
.reset (reset),
.VX_exec_unit_req(VX_exec_unit_req),
.VX_inst_exec_wb (VX_inst_exec_wb),
.VX_jal_rsp (VX_jal_rsp),
.VX_branch_rsp (VX_branch_rsp)
);
VX_gpgpu_inst VX_gpgpu_inst(
.VX_gpu_inst_req(VX_gpu_inst_req),
.VX_warp_ctl (VX_warp_ctl)
);
// VX_csr_wrapper VX_csr_wrapper(
// .VX_csr_req(VX_csr_req),
// .VX_csr_wb (VX_csr_wb)
// );
VX_csr_pipe VX_csr_pipe(
.clk (clk),
.reset (reset),
.no_slot_csr (no_slot_csr),
.VX_csr_req (VX_csr_req),
.VX_writeback(VX_writeback_temp),
.VX_csr_wb (VX_csr_wb),
.stall_gpr_csr(stall_gpr_csr)
);
VX_writeback VX_wb(
.clk (clk),
.reset (reset),
.VX_mem_wb (VX_mem_wb),
.VX_inst_exec_wb (VX_inst_exec_wb),
.VX_csr_wb (VX_csr_wb),
.VX_writeback_inter(VX_writeback_temp),
.no_slot_mem (no_slot_mem),
.no_slot_csr (no_slot_csr)
);
endmodule

22
old_rtl/VX_countones.v Normal file
View File

@@ -0,0 +1,22 @@
module VX_countones
#(
parameter N = 10
)
(
input wire[N-1:0] valids,
output reg[$clog2(N):0] count
);
integer i;
always @(*) begin
count = 0;
for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
count = count + 1;
end
end
end
endmodule

82
old_rtl/VX_csr_data.v Normal file
View File

@@ -0,0 +1,82 @@
`include "../VX_define.v"
module VX_csr_data (
input wire clk, // Clock
input wire reset,
input wire[11:0] in_read_csr_address,
input wire in_write_valid,
input wire[31:0] in_write_csr_data,
input wire[11:0] in_write_csr_address,
output wire[31:0] out_read_csr_data,
// For instruction retire counting
input wire in_writeback_valid
);
// wire[`NT_M1:0][31:0] thread_ids;
// wire[`NT_M1:0][31:0] warp_ids;
// genvar cur_t;
// for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
// assign thread_ids[cur_t] = cur_t;
// end
// genvar cur_tw;
// for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
// assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num};
// end
reg[11:0] csr[1023:0];
reg[63:0] cycle;
reg[63:0] instret;
wire read_cycle;
wire read_cycleh;
wire read_instret;
wire read_instreth;
assign read_cycle = in_read_csr_address == 12'hC00;
assign read_cycleh = in_read_csr_address == 12'hC80;
assign read_instret = in_read_csr_address == 12'hC02;
assign read_instreth = in_read_csr_address == 12'hC82;
// wire thread_select = in_read_csr_address == 12'h20;
// wire warp_select = in_read_csr_address == 12'h21;
// assign out_read_csr_data = thread_select ? thread_ids :
// warp_select ? warp_ids :
// 0;
integer curr_e;
always @(posedge clk or posedge reset) begin
if (reset) begin
for (curr_e = 0; curr_e < 1024; curr_e=curr_e+1) begin
assign csr[curr_e] = 0;
end
cycle <= 0;
instret <= 0;
end else begin
cycle <= cycle + 1;
if (in_write_valid) begin
csr[in_write_csr_address] <= in_write_csr_data[11:0];
end
if (in_writeback_valid) begin
instret <= instret + 1;
end
end
end
assign out_read_csr_data = read_cycle ? cycle[31:0] :
read_cycleh ? cycle[63:32] :
read_instret ? instret[31:0] :
read_instreth ? instret[63:32] :
{{20{1'b0}}, csr[in_read_csr_address]};
endmodule

84
old_rtl/VX_csr_handler.v Normal file
View File

@@ -0,0 +1,84 @@
module VX_csr_handler (
input wire clk,
input wire[11:0] in_decode_csr_address, // done
VX_csr_write_request_inter VX_csr_w_req,
input wire in_wb_valid,
output wire[31:0] out_decode_csr_data // done
);
wire in_mem_is_csr;
wire[11:0] in_mem_csr_address;
/* verilator lint_off UNUSED */
wire[31:0] in_mem_csr_result;
/* verilator lint_on UNUSED */
assign in_mem_is_csr = VX_csr_w_req.is_csr;
assign in_mem_csr_address = VX_csr_w_req.csr_address;
assign in_mem_csr_result = VX_csr_w_req.csr_result;
reg[1024:0][11:0] csr;
reg[63:0] cycle;
reg[63:0] instret;
reg[11:0] decode_csr_address;
wire read_cycle;
wire read_cycleh;
wire read_instret;
wire read_instreth;
initial begin
cycle = 0;
instret = 0;
decode_csr_address = 0;
end
always @(posedge clk) begin
cycle <= cycle + 1;
decode_csr_address <= in_decode_csr_address;
if (in_wb_valid) begin
instret <= instret + 1;
end
end
reg[11:0] data_read;
always @(posedge clk) begin
if(in_mem_is_csr) begin
csr[in_mem_csr_address] <= in_mem_csr_result[11:0];
end
end
assign data_read = csr[decode_csr_address];
assign read_cycle = decode_csr_address == 12'hC00;
assign read_cycleh = decode_csr_address == 12'hC80;
assign read_instret = decode_csr_address == 12'hC02;
assign read_instreth = decode_csr_address == 12'hC82;
/* verilator lint_off WIDTH */
assign out_decode_csr_data = read_cycle ? cycle[31:0] :
read_cycleh ? cycle[63:32] :
read_instret ? instret[31:0] :
read_instreth ? instret[63:32] :
{{20{1'b0}}, data_read};
/* verilator lint_on WIDTH */
endmodule // VX_csr_handler

105
old_rtl/VX_csr_pipe.v Normal file
View File

@@ -0,0 +1,105 @@
module VX_csr_pipe (
input wire clk, // Clock
input wire reset,
input wire no_slot_csr,
VX_csr_req_inter VX_csr_req,
VX_wb_inter VX_writeback,
VX_csr_wb_inter VX_csr_wb,
output wire stall_gpr_csr
);
wire[`NT_M1:0] valid_s2;
wire[`NW_M1:0] warp_num_s2;
wire[4:0] rd_s2;
wire[1:0] wb_s2;
wire[4:0] alu_op_s2;
wire is_csr_s2;
wire[11:0] csr_address_s2;
wire[31:0] csr_read_data_s2;
wire[31:0] csr_updated_data_s2;
wire[31:0] csr_read_data_unqual;
wire[31:0] csr_read_data;
assign stall_gpr_csr = no_slot_csr && VX_csr_req.is_csr && |(VX_csr_req.valid);
assign csr_read_data = (csr_address_s2 == VX_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
wire writeback = |VX_writeback.wb_valid;
VX_csr_data VX_csr_data(
.clk (clk),
.reset (reset),
.in_read_csr_address (VX_csr_req.csr_address),
.in_write_valid (is_csr_s2),
.in_write_csr_data (csr_updated_data_s2),
.in_write_csr_address(csr_address_s2),
.out_read_csr_data (csr_read_data_unqual),
.in_writeback_valid (writeback)
);
reg[31:0] csr_updated_data;
always @(*) begin
case(VX_csr_req.alu_op)
`CSR_ALU_RW: csr_updated_data = VX_csr_req.csr_mask;
`CSR_ALU_RS: csr_updated_data = csr_read_data | VX_csr_req.csr_mask;
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - VX_csr_req.csr_mask);
default: csr_updated_data = 32'hdeadbeef;
endcase
end
wire zero = 0;
VX_generic_register #(.N(`NT + `NW_M1 + 1 + 5 + 2 + 5 + 12 + 64)) csr_reg_s2 (
.clk (clk),
.reset(reset),
.stall(no_slot_csr),
.flush(zero),
.in ({VX_csr_req.valid, VX_csr_req.warp_num, VX_csr_req.rd, VX_csr_req.wb, VX_csr_req.is_csr, VX_csr_req.csr_address, csr_read_data , csr_updated_data }),
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
);
wire[`NT_M1:0][31:0] final_csr_data;
wire[`NT_M1:0][31:0] thread_ids;
wire[`NT_M1:0][31:0] warp_ids;
wire[`NT_M1:0][31:0] csr_vec_read_data_s2;
genvar cur_t;
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
assign thread_ids[cur_t] = cur_t;
end
genvar cur_tw;
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2};
end
genvar cur_v;
for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
end
wire thread_select = csr_address_s2 == 12'h20;
wire warp_select = csr_address_s2 == 12'h21;
assign final_csr_data = thread_select ? thread_ids :
warp_select ? warp_ids :
csr_vec_read_data_s2;
assign VX_csr_wb.valid = valid_s2;
assign VX_csr_wb.warp_num = warp_num_s2;
assign VX_csr_wb.rd = rd_s2;
assign VX_csr_wb.wb = wb_s2;
assign VX_csr_wb.csr_result = final_csr_data;
endmodule

38
old_rtl/VX_csr_wrapper.v Normal file
View File

@@ -0,0 +1,38 @@
`include "VX_define.v"
module VX_csr_wrapper (
VX_csr_req_inter VX_csr_req,
VX_csr_wb_inter VX_csr_wb
);
wire[`NT_M1:0][31:0] thread_ids;
wire[`NT_M1:0][31:0] warp_ids;
genvar cur_t;
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
assign thread_ids[cur_t] = cur_t;
end
genvar cur_tw;
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, VX_csr_req.warp_num};
end
assign VX_csr_wb.valid = VX_csr_req.valid;
assign VX_csr_wb.warp_num = VX_csr_req.warp_num;
assign VX_csr_wb.rd = VX_csr_req.rd;
assign VX_csr_wb.wb = VX_csr_req.wb;
wire thread_select = VX_csr_req.csr_address == 12'h20;
wire warp_select = VX_csr_req.csr_address == 12'h21;
assign VX_csr_wb.csr_result = thread_select ? thread_ids :
warp_select ? warp_ids :
0;
endmodule

361
old_rtl/VX_decode.v Normal file
View File

@@ -0,0 +1,361 @@
`include "VX_define.v"
module VX_decode(
// Fetch Inputs
VX_inst_meta_inter fd_inst_meta_de,
// Outputs
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req,
VX_wstall_inter VX_wstall,
VX_join_inter VX_join,
output wire terminate_sim
);
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
wire[`NW_M1:0] in_warp_num = fd_inst_meta_de.warp_num;
assign VX_frE_to_bckE_req.curr_PC = in_curr_PC;
wire[`NT_M1:0] in_valid = fd_inst_meta_de.valid;
wire[6:0] curr_opcode;
wire is_itype;
wire is_rtype;
wire is_stype;
wire is_btype;
wire is_linst;
wire is_jal;
wire is_jalr;
wire is_lui;
wire is_auipc;
wire is_csr;
wire is_csr_immed;
wire is_e_inst;
wire is_gpgpu;
wire is_wspawn;
wire is_tmc;
wire is_split;
wire is_join;
wire is_barrier;
wire[2:0] func3;
wire[6:0] func7;
wire[11:0] u_12;
wire[7:0] jal_b_19_to_12;
wire jal_b_11;
wire[9:0] jal_b_10_to_1;
wire jal_b_20;
wire jal_b_0;
wire[20:0] jal_unsigned_offset;
wire[31:0] jal_1_offset;
wire[11:0] jalr_immed;
wire[31:0] jal_2_offset;
wire jal_sys_cond1;
wire jal_sys_cond2;
wire jal_sys_jal;
wire[31:0] jal_sys_off;
wire csr_cond1;
wire csr_cond2;
wire[11:0] alu_tempp;
wire alu_shift_i;
wire[11:0] alu_shift_i_immed;
wire[1:0] csr_type;
reg[4:0] csr_alu;
reg[4:0] alu_op;
reg[4:0] mul_alu;
reg[19:0] temp_upper_immed;
reg temp_jal;
reg[31:0] temp_jal_offset;
reg[31:0] temp_itype_immed;
reg[2:0] temp_branch_type;
reg temp_branch_stall;
// always @(posedge reset) begin
// end
assign VX_frE_to_bckE_req.valid = fd_inst_meta_de.valid;
assign VX_frE_to_bckE_req.warp_num = in_warp_num;
assign curr_opcode = in_instruction[6:0];
assign VX_frE_to_bckE_req.rd = in_instruction[11:7];
assign VX_frE_to_bckE_req.rs1 = in_instruction[19:15];
assign VX_frE_to_bckE_req.rs2 = in_instruction[24:20];
assign func3 = in_instruction[14:12];
assign func7 = in_instruction[31:25];
assign u_12 = in_instruction[31:20];
assign VX_frE_to_bckE_req.PC_next = in_curr_PC + 32'h4;
// Write Back sigal
assign is_rtype = (curr_opcode == `R_INST);
assign is_linst = (curr_opcode == `L_INST);
assign is_itype = (curr_opcode == `ALU_INST) || is_linst;
assign is_stype = (curr_opcode == `S_INST);
assign is_btype = (curr_opcode == `B_INST);
assign is_jal = (curr_opcode == `JAL_INST);
assign is_jalr = (curr_opcode == `JALR_INST);
assign is_lui = (curr_opcode == `LUI_INST);
assign is_auipc = (curr_opcode == `AUIPC_INST);
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
assign is_csr_immed = (is_csr) && (func3[2] == 1);
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
assign is_e_inst = in_instruction == 32'h00000073;
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE
assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE
assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
assign VX_join.is_join = is_join;
assign VX_join.join_warp_num = in_warp_num;
assign VX_frE_to_bckE_req.is_wspawn = is_wspawn;
assign VX_frE_to_bckE_req.is_tmc = is_tmc;
assign VX_frE_to_bckE_req.is_split = is_split;
assign VX_frE_to_bckE_req.is_barrier = is_barrier;
assign VX_frE_to_bckE_req.csr_immed = is_csr_immed;
assign VX_frE_to_bckE_req.is_csr = is_csr;
assign VX_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
is_linst ? `WB_MEM :
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
`NO_WB;
assign VX_frE_to_bckE_req.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
// MEM signals
assign VX_frE_to_bckE_req.mem_read = (is_linst) ? func3 : `NO_MEM_READ;
assign VX_frE_to_bckE_req.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE;
// UPPER IMMEDIATE
always @(*) begin
case(curr_opcode)
`LUI_INST: temp_upper_immed = {func7, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.rs1, func3};
`AUIPC_INST: temp_upper_immed = {func7, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.rs1, func3};
default: temp_upper_immed = 20'h0;
endcase // curr_opcode
end
assign VX_frE_to_bckE_req.upper_immed = temp_upper_immed;
assign jal_b_19_to_12 = in_instruction[19:12];
assign jal_b_11 = in_instruction[20];
assign jal_b_10_to_1 = in_instruction[30:21];
assign jal_b_20 = in_instruction[31];
assign jal_b_0 = 1'b0;
assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0};
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
assign jalr_immed = {func7, VX_frE_to_bckE_req.rs2};
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
assign jal_sys_cond1 = func3 == 3'h0;
assign jal_sys_cond2 = u_12 < 12'h2;
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
// JAL
always @(*) begin
case(curr_opcode)
`JAL_INST:
begin
temp_jal = 1'b1 && (|in_valid);
temp_jal_offset = jal_1_offset;
end
`JALR_INST:
begin
temp_jal = 1'b1 && (|in_valid);
temp_jal_offset = jal_2_offset;
end
`SYS_INST:
begin
// $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) );
temp_jal = jal_sys_jal && (|in_valid);
temp_jal_offset = jal_sys_off;
end
default:
begin
temp_jal = 1'b0 && (|in_valid);
temp_jal_offset = 32'hdeadbeef;
end
endcase
end
assign VX_frE_to_bckE_req.jalQual = is_jal;
assign VX_frE_to_bckE_req.jal = temp_jal;
assign VX_frE_to_bckE_req.jal_offset = temp_jal_offset;
// wire is_ebreak;
// assign is_ebreak = is_e_inst;
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
assign VX_frE_to_bckE_req.ebreak = ebreak;
wire out_ebreak = ebreak;
assign terminate_sim = is_e_inst;
// CSR
assign csr_cond1 = func3 != 3'h0;
assign csr_cond2 = u_12 >= 12'h2;
assign VX_frE_to_bckE_req.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
// ITYPE IMEED
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
assign alu_shift_i_immed = {{7{1'b0}}, VX_frE_to_bckE_req.rs2};
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
always @(*) begin
case(curr_opcode)
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, VX_frE_to_bckE_req.rd};
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
default: temp_itype_immed = 32'hdeadbeef;
endcase
end
assign VX_frE_to_bckE_req.itype_immed = temp_itype_immed;
always @(*) begin
case(curr_opcode)
`B_INST:
begin
// $display("BRANCH IN DECODE");
temp_branch_stall = 1'b1 && (|in_valid);
case(func3)
3'h0: temp_branch_type = `BEQ;
3'h1: temp_branch_type = `BNE;
3'h4: temp_branch_type = `BLT;
3'h5: temp_branch_type = `BGT;
3'h6: temp_branch_type = `BLTU;
3'h7: temp_branch_type = `BGTU;
default: temp_branch_type = `NO_BRANCH;
endcase
end
`JAL_INST:
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && (|in_valid);
end
`JALR_INST:
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && (|in_valid);
end
default:
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b0 && (|in_valid);
end
endcase
end
assign VX_frE_to_bckE_req.branch_type = temp_branch_type;
assign VX_wstall.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
assign VX_wstall.warp_num = in_warp_num;
always @(*) begin
// ALU OP
case(func3)
3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB);
3'h1: alu_op = `SLLA;
3'h2: alu_op = `SLT;
3'h3: alu_op = `SLTU;
3'h4: alu_op = `XOR;
3'h5: alu_op = (func7 == 7'h0) ? `SRL : `SRA;
3'h6: alu_op = `OR;
3'h7: alu_op = `AND;
default: alu_op = `NO_ALU;
endcase
end
always @(*) begin
// ALU OP
case(func3)
3'h0: mul_alu = `MUL;
3'h1: mul_alu = `MULH;
3'h2: mul_alu = `MULHSU;
3'h3: mul_alu = `MULHU;
3'h4: mul_alu = `DIV;
3'h5: mul_alu = `DIVU;
3'h6: mul_alu = `REM;
3'h7: mul_alu = `REMU;
default: mul_alu = `NO_ALU;
endcase
end
assign csr_type = func3[1:0];
always @(*) begin
case(csr_type)
2'h1: csr_alu = `CSR_ALU_RW;
2'h2: csr_alu = `CSR_ALU_RS;
2'h3: csr_alu = `CSR_ALU_RC;
default: csr_alu = `NO_ALU;
endcase
end
wire[4:0] temp_final_alu;
assign temp_final_alu = is_btype ? ((VX_frE_to_bckE_req.branch_type < `BLTU) ? `SUB : `SUBU) :
is_lui ? `LUI_ALU :
is_auipc ? `AUIPC_ALU :
is_csr ? csr_alu :
(is_stype || is_linst) ? `ADD :
alu_op;
assign VX_frE_to_bckE_req.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
endmodule

269
old_rtl/VX_define.v Normal file
View File

@@ -0,0 +1,269 @@
`include "./VX_define_synth.v"
`define NT_M1 (`NT-1)
// NW_M1 is actually log2(NW)
`define NW_M1 (`CLOG2(`NW))
// Uncomment the below line if NW=1
// `define ONLY
// `define SYN 1
// `define ASIC 1
// `define SYN_FUNC 1
`define NUM_BARRIERS 4
`define R_INST 7'd51
`define L_INST 7'd3
`define ALU_INST 7'd19
`define S_INST 7'd35
`define B_INST 7'd99
`define LUI_INST 7'd55
`define AUIPC_INST 7'd23
`define JAL_INST 7'd111
`define JALR_INST 7'd103
`define SYS_INST 7'd115
`define GPGPU_INST 7'h6b
`define WB_ALU 2'h1
`define WB_MEM 2'h2
`define WB_JAL 2'h3
`define NO_WB 2'h0
`define RS2_IMMED 1
`define RS2_REG 0
`define NO_MEM_READ 3'h7
`define LB_MEM_READ 3'h0
`define LH_MEM_READ 3'h1
`define LW_MEM_READ 3'h2
`define LBU_MEM_READ 3'h4
`define LHU_MEM_READ 3'h5
`define NO_MEM_WRITE 3'h7
`define SB_MEM_WRITE 3'h0
`define SH_MEM_WRITE 3'h1
`define SW_MEM_WRITE 3'h2
`define NO_BRANCH 3'h0
`define BEQ 3'h1
`define BNE 3'h2
`define BLT 3'h3
`define BGT 3'h4
`define BLTU 3'h5
`define BGTU 3'h6
`define NO_ALU 5'd15
`define ADD 5'd0
`define SUB 5'd1
`define SLLA 5'd2
`define SLT 5'd3
`define SLTU 5'd4
`define XOR 5'd5
`define SRL 5'd6
`define SRA 5'd7
`define OR 5'd8
`define AND 5'd9
`define SUBU 5'd10
`define LUI_ALU 5'd11
`define AUIPC_ALU 5'd12
`define CSR_ALU_RW 5'd13
`define CSR_ALU_RS 5'd14
`define CSR_ALU_RC 5'd15
`define MUL 5'd16
`define MULH 5'd17
`define MULHSU 5'd18
`define MULHU 5'd19
`define DIV 5'd20
`define DIVU 5'd21
`define REM 5'd22
`define REMU 5'd23
// WRITEBACK
`define WB_ALU 2'h1
`define WB_MEM 2'h2
`define WB_JAL 2'h3
`define NO_WB 2'h0
// JAL
`define JUMP 1'h1
`define NO_JUMP 1'h0
// STALLS
`define STALL 1'h1
`define NO_STALL 1'h0
`define TAKEN 1'b1
`define NOT_TAKEN 1'b0
`define ZERO_REG 5'h0
`define CLOG2(x) \
(x <= 2) ? 1 : \
(x <= 4) ? 2 : \
(x <= 8) ? 3 : \
(x <= 16) ? 4 : \
(x <= 32) ? 5 : \
(x <= 64) ? 6 : \
(x <= 128) ? 7 : \
(x <= 256) ? 8 : \
(x <= 512) ? 9 : \
(x <= 1024) ? 10 : \
-199
// `define PARAM
// oooooo
//Cache configurations
//Cache configurations
//Bytes
`define ICACHE_SIZE 4096
`define ICACHE_WAYS 2
//Bytes
`define ICACHE_BLOCK 64
`define ICACHE_BANKS 4
`define ICACHE_LOG_NUM_BANKS `CLOG2(`ICACHE_BANKS)
`define ICACHE_NUM_WORDS_PER_BLOCK (`ICACHE_BLOCK / (`ICACHE_BANKS * 4))
`define ICACHE_NUM_REQ 1
`define ICACHE_LOG_NUM_REQ `CLOG2(`ICACHE_NUM_REQ)
//set this to 1 if CACHE_WAYS is 1
`define ICACHE_WAY_INDEX `CLOG2(`ICACHE_WAYS)
//`define ICACHE_WAY_INDEX 1
`define ICACHE_BLOCK_PER_BANK (`ICACHE_BLOCK / `ICACHE_BANKS)
// Offset
`define ICACHE_OFFSET_NB (`CLOG2(`ICACHE_NUM_WORDS_PER_BLOCK))
`define ICACHE_ADDR_OFFSET_ST (2+$clog2(`ICACHE_BANKS))
`define ICACHE_ADDR_OFFSET_ED (`ICACHE_ADDR_OFFSET_ST+(`ICACHE_OFFSET_NB)-1)
`define ICACHE_ADDR_OFFSET_RNG `ICACHE_ADDR_OFFSET_ED:`ICACHE_ADDR_OFFSET_ST
`define ICACHE_OFFSET_SIZE_RNG (`CLOG2(`ICACHE_NUM_WORDS_PER_BLOCK)-1):0
`define ICACHE_OFFSET_ST 0
`define ICACHE_OFFSET_ED ($clog2(`ICACHE_NUM_WORDS_PER_BLOCK)-1)
// Index
// `define ICACHE_NUM_IND (`ICACHE_SIZE / (`ICACHE_WAYS * `ICACHE_BLOCK_PER_BANK))
`define ICACHE_NUM_IND (`ICACHE_SIZE / (`ICACHE_WAYS * `ICACHE_BLOCK))
`define ICACHE_IND_NB ($clog2(`ICACHE_NUM_IND))
`define ICACHE_IND_ST (`ICACHE_ADDR_OFFSET_ED+1)
`define ICACHE_IND_ED (`ICACHE_IND_ST+`ICACHE_IND_NB-1)
`define ICACHE_ADDR_IND_RNG `ICACHE_IND_ED:`ICACHE_IND_ST
`define ICACHE_IND_SIZE_RNG `ICACHE_IND_NB-1:0
`define ICACHE_IND_SIZE_START 0
`define ICACHE_IND_SIZE_END `ICACHE_IND_NB-1
// Tag
`define ICACHE_ADDR_TAG_RNG 31:(`ICACHE_IND_ED+1)
`define ICACHE_TAG_SIZE_RNG (32-(`ICACHE_IND_ED+1)-1):0
`define ICACHE_TAG_SIZE_START 0
`define ICACHE_TAG_SIZE_END (32-(`ICACHE_IND_ED+1)-1)
`define ICACHE_ADDR_TAG_START (`ICACHE_IND_ED+1)
`define ICACHE_ADDR_TAG_END 31
//Cache configurations
//Bytes
`define DCACHE_SIZE 4096
`define DCACHE_WAYS 2
//Bytes
`define DCACHE_BLOCK 64
`define DCACHE_BANKS 4
`define DCACHE_LOG_NUM_BANKS $clog2(`DCACHE_BANKS)
`define DCACHE_NUM_WORDS_PER_BLOCK (`DCACHE_BLOCK / (`DCACHE_BANKS * 4))
`define DCACHE_NUM_REQ `NT
`define DCACHE_LOG_NUM_REQ $clog2(`DCACHE_NUM_REQ)
//set this to 1 if CACHE_WAYS is 1
`define DCACHE_WAY_INDEX $clog2(`DCACHE_WAYS)
//`define DCACHE_WAY_INDEX 1
`define DCACHE_BLOCK_PER_BANK (`DCACHE_BLOCK / `DCACHE_BANKS)
// Offset
`define DCACHE_OFFSET_NB ($clog2(`DCACHE_NUM_WORDS_PER_BLOCK))
`define DCACHE_ADDR_OFFSET_ST (2+$clog2(`DCACHE_BANKS))
`define DCACHE_ADDR_OFFSET_ED (`DCACHE_ADDR_OFFSET_ST+(`DCACHE_OFFSET_NB)-1)
`define DCACHE_ADDR_OFFSET_RNG `DCACHE_ADDR_OFFSET_ED:`DCACHE_ADDR_OFFSET_ST
`define DCACHE_OFFSET_SIZE_RNG ($clog2(`DCACHE_NUM_WORDS_PER_BLOCK)-1):0
`define DCACHE_OFFSET_ST 0
`define DCACHE_OFFSET_ED ($clog2(`DCACHE_NUM_WORDS_PER_BLOCK)-1)
// Index
// `define DCACHE_NUM_IND (`DCACHE_SIZE / (`DCACHE_WAYS * `DCACHE_BLOCK_PER_BANK))
`define DCACHE_NUM_IND (`DCACHE_SIZE / (`DCACHE_WAYS * `DCACHE_BLOCK))
`define DCACHE_IND_NB ($clog2(`DCACHE_NUM_IND))
`define DCACHE_IND_ST (`DCACHE_ADDR_OFFSET_ED+1)
`define DCACHE_IND_ED (`DCACHE_IND_ST+`DCACHE_IND_NB-1)
`define DCACHE_ADDR_IND_RNG `DCACHE_IND_ED:`DCACHE_IND_ST
`define DCACHE_IND_SIZE_RNG `DCACHE_IND_NB-1:0
`define DCACHE_IND_SIZE_START 0
`define DCACHE_IND_SIZE_END `DCACHE_IND_NB-1
// Tag
`define DCACHE_ADDR_TAG_RNG 31:(`DCACHE_IND_ED+1)
`define DCACHE_TAG_SIZE_RNG (32-(`DCACHE_IND_ED+1)-1):0
`define DCACHE_TAG_SIZE_START 0
`define DCACHE_TAG_SIZE_END (32-(`DCACHE_IND_ED+1)-1)
`define DCACHE_ADDR_TAG_START (`DCACHE_IND_ED+1)
`define DCACHE_ADDR_TAG_END 31
// Mask
`define DCACHE_MEM_REQ_ADDR_MASK (32'hffffffff - (`DCACHE_BLOCK-1))
`define ICACHE_MEM_REQ_ADDR_MASK (32'hffffffff - (`ICACHE_BLOCK-1))
///////
//`define SHARED_MEMORY_SIZE 4096
`define SHARED_MEMORY_SIZE 8192
`define SHARED_MEMORY_BANKS 4
//`define SHARED_MEMORY_BYTES_PER_READ 16
//`define SHARED_MEMORY_HEIGHT ((`SHARED_MEMORY_SIZE) / (`SHARED_MEMORY_BANKS * `SHARED_MEMORY_BYTES_PER_READ))
//`define SHARED_MEMORY_SIZE 16384
//`define SHARED_MEMORY_BANKS 8
`define SHARED_MEMORY_BYTES_PER_READ 16
//`define SHARED_MEMORY_BITS_PER_BANK 3
`define SHARED_MEMORY_BITS_PER_BANK `CLOG2(`SHARED_MEMORY_BANKS)
`define SHARED_MEMORY_NUM_REQ `NT
`define SHARED_MEMORY_WORDS_PER_READ (`SHARED_MEMORY_BYTES_PER_READ / 4)
`define SHARED_MEMORY_LOG_WORDS_PER_READ $clog2(`SHARED_MEMORY_WORDS_PER_READ)
`define SHARED_MEMORY_HEIGHT ((`SHARED_MEMORY_SIZE) / (`SHARED_MEMORY_BANKS * `SHARED_MEMORY_BYTES_PER_READ))
`define SHARED_MEMORY_BANK_OFFSET_ST (2)
`define SHARED_MEMORY_BANK_OFFSET_ED (2+$clog2(`SHARED_MEMORY_BANKS)-1)
`define SHARED_MEMORY_BLOCK_OFFSET_ST (`SHARED_MEMORY_BANK_OFFSET_ED + 1)
`define SHARED_MEMORY_BLOCK_OFFSET_ED (`SHARED_MEMORY_BLOCK_OFFSET_ST +`SHARED_MEMORY_LOG_WORDS_PER_READ-1)
`define SHARED_MEMORY_INDEX_OFFSET_ST (`SHARED_MEMORY_BLOCK_OFFSET_ED + 1)
`define SHARED_MEMORY_INDEX_OFFSET_ED (`SHARED_MEMORY_INDEX_OFFSET_ST + $clog2(`SHARED_MEMORY_HEIGHT)-1)

View File

@@ -0,0 +1,2 @@
`define NT 4
`define NW 8

View File

@@ -0,0 +1,188 @@
`include "VX_define.v"
module VX_dmem_controller (
input wire clk,
input wire reset,
// MEM-RAM
VX_dram_req_rsp_inter VX_dram_req_rsp,
VX_dram_req_rsp_inter VX_dram_req_rsp_icache,
// MEM-Processor
VX_icache_request_inter VX_icache_req,
VX_icache_response_inter VX_icache_rsp,
VX_dcache_request_inter VX_dcache_req,
VX_dcache_response_inter VX_dcache_rsp
);
wire to_shm = VX_dcache_req.out_cache_driver_in_address[0][31:24] == 8'hFF;
wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{to_shm}};
wire[`NT_M1:0] cache_driver_in_valid = VX_dcache_req.out_cache_driver_in_valid & {`NT{~to_shm}};
wire read_or_write = (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|cache_driver_in_valid);
wire[`NT_M1:0][31:0] cache_driver_in_address = VX_dcache_req.out_cache_driver_in_address;
wire[2:0] cache_driver_in_mem_read = !(|cache_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.out_cache_driver_in_mem_read;
wire[2:0] cache_driver_in_mem_write = !(|cache_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.out_cache_driver_in_mem_write;
wire[`NT_M1:0][31:0] cache_driver_in_data = VX_dcache_req.out_cache_driver_in_data;
wire[2:0] sm_driver_in_mem_read = !(|sm_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.out_cache_driver_in_mem_read;
wire[2:0] sm_driver_in_mem_write = !(|sm_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.out_cache_driver_in_mem_write;
wire[`NT_M1:0][31:0] cache_driver_out_data;
wire[`NT_M1:0][31:0] sm_driver_out_data;
wire[`NT_M1:0] cache_driver_out_valid; // Not used for now
wire sm_delay;
wire cache_delay;
// I_Cache Signals
wire[31:0] icache_instruction_out;
wire icache_delay;
wire icache_driver_in_valid = VX_icache_req.out_cache_driver_in_valid;
wire[31:0] icache_driver_in_address = VX_icache_req.pc_address;
wire[2:0] icache_driver_in_mem_read = !(|icache_driver_in_valid) ? `NO_MEM_READ : VX_icache_req.out_cache_driver_in_mem_read;
wire[2:0] icache_driver_in_mem_write = !(|icache_driver_in_valid) ? `NO_MEM_WRITE : VX_icache_req.out_cache_driver_in_mem_write;
wire[31:0] icache_driver_in_data = VX_icache_req.out_cache_driver_in_data;
wire read_or_write_ic = (VX_icache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|icache_driver_in_valid);
wire valid_read_cache = !cache_delay && cache_driver_in_valid[0];
VX_shared_memory #(
.SM_SIZE (`SHARED_MEMORY_SIZE),
.SM_BANKS (`SHARED_MEMORY_BANKS),
.SM_BYTES_PER_READ (`SHARED_MEMORY_BYTES_PER_READ),
.SM_WORDS_PER_READ (`SHARED_MEMORY_WORDS_PER_READ),
.SM_LOG_WORDS_PER_READ (`SHARED_MEMORY_LOG_WORDS_PER_READ),
.SM_BANK_OFFSET_START (`SHARED_MEMORY_BANK_OFFSET_ST),
.SM_BANK_OFFSET_END (`SHARED_MEMORY_BANK_OFFSET_ED),
.SM_BLOCK_OFFSET_START (`SHARED_MEMORY_BLOCK_OFFSET_ST),
.SM_BLOCK_OFFSET_END (`SHARED_MEMORY_BLOCK_OFFSET_ED),
.SM_INDEX_START (`SHARED_MEMORY_INDEX_OFFSET_ST),
.SM_INDEX_END (`SHARED_MEMORY_INDEX_OFFSET_ED),
.SM_HEIGHT (`SHARED_MEMORY_HEIGHT),
.NUM_REQ (`SHARED_MEMORY_NUM_REQ),
.BITS_PER_BANK (`SHARED_MEMORY_BITS_PER_BANK)
)
shared_memory
(
.clk (clk),
.reset (reset),
.in_valid (sm_driver_in_valid),
.in_address(cache_driver_in_address),
.in_data (cache_driver_in_data),
.mem_read (sm_driver_in_mem_read),
.mem_write (sm_driver_in_mem_write),
.out_valid (cache_driver_out_valid),
.out_data (sm_driver_out_data),
.stall (sm_delay)
);
VX_d_cache#(
.CACHE_SIZE (`DCACHE_SIZE),
.CACHE_WAYS (`DCACHE_WAYS),
.CACHE_BLOCK (`DCACHE_BLOCK),
.CACHE_BANKS (`DCACHE_BANKS),
.LOG_NUM_BANKS (`DCACHE_LOG_NUM_BANKS),
.NUM_REQ (`DCACHE_NUM_REQ),
.LOG_NUM_REQ (`DCACHE_LOG_NUM_REQ),
.NUM_IND (`DCACHE_NUM_IND),
.CACHE_WAY_INDEX (`DCACHE_WAY_INDEX),
.NUM_WORDS_PER_BLOCK (`DCACHE_NUM_WORDS_PER_BLOCK),
.OFFSET_SIZE_START (`DCACHE_OFFSET_ST),
.OFFSET_SIZE_END (`DCACHE_OFFSET_ED),
.TAG_SIZE_START (`DCACHE_TAG_SIZE_START),
.TAG_SIZE_END (`DCACHE_TAG_SIZE_END),
.IND_SIZE_START (`DCACHE_IND_SIZE_START),
.IND_SIZE_END (`DCACHE_IND_SIZE_END),
.ADDR_TAG_START (`DCACHE_ADDR_TAG_START),
.ADDR_TAG_END (`DCACHE_ADDR_TAG_END),
.ADDR_OFFSET_START (`DCACHE_ADDR_OFFSET_ST),
.ADDR_OFFSET_END (`DCACHE_ADDR_OFFSET_ED),
.ADDR_IND_START (`DCACHE_IND_ST),
.ADDR_IND_END (`DCACHE_IND_ED),
.MEM_ADDR_REQ_MASK (`DCACHE_MEM_REQ_ADDR_MASK)
)
dcache
(
.clk (clk),
.rst (reset),
.i_p_valid (cache_driver_in_valid),
.i_p_addr (cache_driver_in_address),
.i_p_writedata (cache_driver_in_data),
.i_p_read_or_write (read_or_write),
.i_p_mem_read (cache_driver_in_mem_read),
.i_p_mem_write (cache_driver_in_mem_write),
.o_p_readdata (cache_driver_out_data),
.o_p_delay (cache_delay),
.o_m_evict_addr (VX_dram_req_rsp.o_m_evict_addr),
.o_m_read_addr (VX_dram_req_rsp.o_m_read_addr),
.o_m_valid (VX_dram_req_rsp.o_m_valid),
.o_m_writedata (VX_dram_req_rsp.o_m_writedata),
.o_m_read_or_write (VX_dram_req_rsp.o_m_read_or_write),
.i_m_readdata (VX_dram_req_rsp.i_m_readdata),
.i_m_ready (VX_dram_req_rsp.i_m_ready)
);
VX_d_cache#(
.CACHE_SIZE (`ICACHE_SIZE),
.CACHE_WAYS (`ICACHE_WAYS),
.CACHE_BLOCK (`ICACHE_BLOCK),
.CACHE_BANKS (`ICACHE_BANKS),
.LOG_NUM_BANKS (`ICACHE_LOG_NUM_BANKS),
.NUM_REQ (`ICACHE_NUM_REQ),
.LOG_NUM_REQ (`ICACHE_LOG_NUM_REQ),
.NUM_IND (`ICACHE_NUM_IND),
.CACHE_WAY_INDEX (`ICACHE_WAY_INDEX),
.NUM_WORDS_PER_BLOCK (`ICACHE_NUM_WORDS_PER_BLOCK),
.OFFSET_SIZE_START (`ICACHE_OFFSET_ST),
.OFFSET_SIZE_END (`ICACHE_OFFSET_ED),
.TAG_SIZE_START (`ICACHE_TAG_SIZE_START),
.TAG_SIZE_END (`ICACHE_TAG_SIZE_END),
.IND_SIZE_START (`ICACHE_IND_SIZE_START),
.IND_SIZE_END (`ICACHE_IND_SIZE_END),
.ADDR_TAG_START (`ICACHE_ADDR_TAG_START),
.ADDR_TAG_END (`ICACHE_ADDR_TAG_END),
.ADDR_OFFSET_START (`ICACHE_ADDR_OFFSET_ST),
.ADDR_OFFSET_END (`ICACHE_ADDR_OFFSET_ED),
.ADDR_IND_START (`ICACHE_IND_ST),
.ADDR_IND_END (`ICACHE_IND_ED),
.MEM_ADDR_REQ_MASK (`ICACHE_MEM_REQ_ADDR_MASK)
) icache
(
.clk (clk),
.rst (reset),
.i_p_valid (icache_driver_in_valid),
.i_p_addr (icache_driver_in_address),
.i_p_writedata (icache_driver_in_data),
.i_p_read_or_write (read_or_write_ic),
.i_p_mem_read (icache_driver_in_mem_read),
.i_p_mem_write (icache_driver_in_mem_write),
.o_p_readdata (icache_instruction_out),
.o_p_delay (icache_delay),
.o_m_evict_addr (VX_dram_req_rsp_icache.o_m_evict_addr),
.o_m_read_addr (VX_dram_req_rsp_icache.o_m_read_addr),
.o_m_valid (VX_dram_req_rsp_icache.o_m_valid),
.o_m_writedata (VX_dram_req_rsp_icache.o_m_writedata),
.o_m_read_or_write (VX_dram_req_rsp_icache.o_m_read_or_write),
.i_m_readdata (VX_dram_req_rsp_icache.i_m_readdata),
.i_m_ready (VX_dram_req_rsp_icache.i_m_ready)
);
assign VX_dcache_rsp.in_cache_driver_out_data = to_shm ? sm_driver_out_data : cache_driver_out_data;
assign VX_dcache_rsp.delay = sm_delay || cache_delay;
assign VX_icache_rsp.instruction = icache_instruction_out;
assign VX_icache_rsp.delay = icache_delay;
endmodule

168
old_rtl/VX_execute_unit.v Normal file
View File

@@ -0,0 +1,168 @@
`include "VX_define.v"
module VX_execute_unit (
input wire clk,
input wire reset,
// Request
VX_exec_unit_req_inter VX_exec_unit_req,
// Output
// Writeback
VX_inst_exec_wb_inter VX_inst_exec_wb,
// JAL Response
VX_jal_response_inter VX_jal_rsp,
// Branch Response
VX_branch_response_inter VX_branch_rsp
);
wire[`NT_M1:0][31:0] in_a_reg_data;
wire[`NT_M1:0][31:0] in_b_reg_data;
wire[4:0] in_alu_op;
wire in_rs2_src;
wire[31:0] in_itype_immed;
wire[2:0] in_branch_type;
wire[19:0] in_upper_immed;
wire in_jal;
wire[31:0] in_jal_offset;
wire[31:0] in_curr_PC;
assign in_a_reg_data = VX_exec_unit_req.a_reg_data;
assign in_b_reg_data = VX_exec_unit_req.b_reg_data;
assign in_alu_op = VX_exec_unit_req.alu_op;
assign in_rs2_src = VX_exec_unit_req.rs2_src;
assign in_itype_immed = VX_exec_unit_req.itype_immed;
assign in_branch_type = VX_exec_unit_req.branch_type;
assign in_upper_immed = VX_exec_unit_req.upper_immed;
assign in_jal = VX_exec_unit_req.jal;
assign in_jal_offset = VX_exec_unit_req.jal_offset;
assign in_curr_PC = VX_exec_unit_req.curr_PC;
wire[`NT_M1:0][31:0] alu_result;
genvar index_out_reg;
generate
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1)
begin
VX_alu vx_alu(
// .in_reg_data (in_reg_data[1:0]),
.in_1 (in_a_reg_data[index_out_reg]),
.in_2 (in_b_reg_data[index_out_reg]),
.in_rs2_src (in_rs2_src),
.in_itype_immed(in_itype_immed),
.in_upper_immed(in_upper_immed),
.in_alu_op (in_alu_op),
.in_curr_PC (in_curr_PC),
.out_alu_result(alu_result[index_out_reg])
);
end
endgenerate
wire [$clog2(`NT)-1:0] jal_branch_use_index;
wire jal_branch_found_valid;
VX_generic_priority_encoder #(.N(`NT)) choose_alu_result(
.valids(VX_exec_unit_req.valid),
.index (jal_branch_use_index),
.found (jal_branch_found_valid)
);
wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index];
reg temp_branch_dir;
always @(*)
begin
case(VX_exec_unit_req.branch_type)
`BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN;
`BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN;
`BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
`BLTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
`NO_BRANCH: temp_branch_dir = `NOT_TAKEN;
default: temp_branch_dir = `NOT_TAKEN;
endcase // in_branch_type
end
wire[`NT_M1:0][31:0] duplicate_PC_data;
genvar i;
generate
for (i = 0; i < `NT; i=i+1)
begin
assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next;
end
endgenerate
// VX_inst_exec_wb_inter VX_inst_exec_wb_temp();
// JAL Response
VX_jal_response_inter VX_jal_rsp_temp();
// Branch Response
VX_branch_response_inter VX_branch_rsp_temp();
// Actual Writeback
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid;
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
assign VX_inst_exec_wb.exec_wb_pc = in_curr_PC;
// Jal rsp
assign VX_jal_rsp_temp.jal = in_jal;
assign VX_jal_rsp_temp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
assign VX_jal_rsp_temp.jal_warp_num = VX_exec_unit_req.warp_num;
// Branch rsp
assign VX_branch_rsp_temp.valid_branch = (VX_exec_unit_req.branch_type != `NO_BRANCH) && (|VX_exec_unit_req.valid);
assign VX_branch_rsp_temp.branch_dir = temp_branch_dir;
assign VX_branch_rsp_temp.branch_warp_num = VX_exec_unit_req.warp_num;
assign VX_branch_rsp_temp.branch_dest = $signed(VX_exec_unit_req.curr_PC) + ($signed(VX_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset
wire zero = 0;
// VX_generic_register #(.N(174)) exec_reg(
// .clk (clk),
// .reset(reset),
// .stall(zero),
// .flush(zero),
// .in ({VX_inst_exec_wb_temp.rd, VX_inst_exec_wb_temp.wb, VX_inst_exec_wb_temp.wb_valid, VX_inst_exec_wb_temp.wb_warp_num, VX_inst_exec_wb_temp.alu_result, VX_inst_exec_wb_temp.exec_wb_pc}),
// .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc })
// );
VX_generic_register #(.N(33 + `NW_M1 + 1)) jal_reg(
.clk (clk),
.reset(reset),
.stall(zero),
.flush(zero),
.in ({VX_jal_rsp_temp.jal, VX_jal_rsp_temp.jal_dest, VX_jal_rsp_temp.jal_warp_num}),
.out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num})
);
VX_generic_register #(.N(34 + `NW_M1 + 1)) branch_reg(
.clk (clk),
.reset(reset),
.stall(zero),
.flush(zero),
.in ({VX_branch_rsp_temp.valid_branch, VX_branch_rsp_temp.branch_dir, VX_branch_rsp_temp.branch_warp_num, VX_branch_rsp_temp.branch_dest}),
.out ({VX_branch_rsp.valid_branch , VX_branch_rsp.branch_dir , VX_branch_rsp.branch_warp_num , VX_branch_rsp.branch_dest })
);
// always @(*) begin
// case(in_alu_op)
// `CSR_ALU_RW: out_csr_result = in_csr_mask;
// `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
// `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
// default: out_csr_result = 32'hdeadbeef;
// endcase
// end
// assign out_is_csr = VX_exec_unit_req.is_csr;
// assign out_csr_address = VX_exec_unit_req.csr_address;
endmodule

103
old_rtl/VX_fetch.v Normal file
View File

@@ -0,0 +1,103 @@
`include "VX_define.v"
module VX_fetch (
input wire clk,
input wire reset,
VX_wstall_inter VX_wstall,
VX_join_inter VX_join,
input wire schedule_delay,
VX_icache_response_inter icache_response,
VX_icache_request_inter icache_request,
output wire out_ebreak,
VX_jal_response_inter VX_jal_rsp,
VX_branch_response_inter VX_branch_rsp,
VX_inst_meta_inter fe_inst_meta_fd,
VX_warp_ctl_inter VX_warp_ctl
);
// Locals
wire pipe_stall;
assign pipe_stall = schedule_delay || icache_response.delay;
wire[`NT_M1:0] thread_mask;
wire[`NW_M1:0] warp_num;
wire[31:0] warp_pc;
wire scheduled_warp;
VX_warp_scheduler warp_scheduler(
.clk (clk),
.reset (reset),
.stall (pipe_stall),
.is_barrier (VX_warp_ctl.is_barrier),
.barrier_id (VX_warp_ctl.barrier_id),
.num_warps (VX_warp_ctl.num_warps),
.barrier_warp_num (VX_warp_ctl.warp_num),
// Wspawn
.wspawn (VX_warp_ctl.wspawn),
.wsapwn_pc (VX_warp_ctl.wspawn_pc),
.wspawn_new_active(VX_warp_ctl.wspawn_new_active),
// CTM
.ctm (VX_warp_ctl.change_mask),
.ctm_mask (VX_warp_ctl.thread_mask),
.ctm_warp_num (VX_warp_ctl.warp_num),
// WHALT
.whalt (VX_warp_ctl.ebreak),
.whalt_warp_num (VX_warp_ctl.warp_num),
// Wstall
.wstall (VX_wstall.wstall),
.wstall_warp_num (VX_wstall.warp_num),
// Join
.is_join (VX_join.is_join),
.join_warp_num (VX_join.join_warp_num),
// Split
.is_split (VX_warp_ctl.is_split),
.dont_split (VX_warp_ctl.dont_split),
.split_new_mask (VX_warp_ctl.split_new_mask),
.split_later_mask (VX_warp_ctl.split_later_mask),
.split_save_pc (VX_warp_ctl.split_save_pc),
.split_warp_num (VX_warp_ctl.warp_num),
// JAL
.jal (VX_jal_rsp.jal),
.jal_dest (VX_jal_rsp.jal_dest),
.jal_warp_num (VX_jal_rsp.jal_warp_num),
// Branch
.branch_valid (VX_branch_rsp.valid_branch),
.branch_dir (VX_branch_rsp.branch_dir),
.branch_dest (VX_branch_rsp.branch_dest),
.branch_warp_num (VX_branch_rsp.branch_warp_num),
// Outputs
.thread_mask (thread_mask),
.warp_num (warp_num),
.warp_pc (warp_pc),
.out_ebreak (out_ebreak),
.scheduled_warp (scheduled_warp)
);
// always @(*) begin
// $display("Inside verilog instr: %h, pc: %h", icache_response.instruction, warp_pc);
// end
assign icache_request.pc_address = warp_pc;
assign icache_request.out_cache_driver_in_valid = !schedule_delay && scheduled_warp;
assign icache_request.out_cache_driver_in_mem_read = `LW_MEM_READ;
assign icache_request.out_cache_driver_in_mem_write = `NO_MEM_WRITE;
assign icache_request.out_cache_driver_in_data = 32'b0;
assign fe_inst_meta_fd.warp_num = warp_num;
assign fe_inst_meta_fd.valid = thread_mask;
assign fe_inst_meta_fd.instruction = (thread_mask == 0) ? 32'b0 : icache_response.instruction;
assign fe_inst_meta_fd.inst_pc = warp_pc;
endmodule

89
old_rtl/VX_front_end.v Normal file
View File

@@ -0,0 +1,89 @@
`include "VX_define.v"
module VX_front_end (
input wire clk,
input wire reset,
input wire schedule_delay,
VX_warp_ctl_inter VX_warp_ctl,
VX_icache_response_inter icache_response_fe,
VX_icache_request_inter icache_request_fe,
VX_jal_response_inter VX_jal_rsp,
VX_branch_response_inter VX_branch_rsp,
VX_frE_to_bckE_req_inter VX_bckE_req,
output wire fetch_ebreak
);
VX_inst_meta_inter fe_inst_meta_fd();
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req();
VX_inst_meta_inter fd_inst_meta_de();
wire total_freeze = schedule_delay;
/* verilator lint_off UNUSED */
// wire real_fetch_ebreak;
/* verilator lint_on UNUSED */
wire vortex_ebreak;
wire terminate_sim;
assign fetch_ebreak = vortex_ebreak || terminate_sim;
VX_wstall_inter VX_wstall();
VX_join_inter VX_join();
VX_fetch vx_fetch(
.clk (clk),
.reset (reset),
.VX_wstall (VX_wstall),
.VX_join (VX_join),
.schedule_delay (schedule_delay),
.VX_jal_rsp (VX_jal_rsp),
.icache_response (icache_response_fe),
.VX_warp_ctl (VX_warp_ctl),
.icache_request (icache_request_fe),
.VX_branch_rsp (VX_branch_rsp),
.out_ebreak (vortex_ebreak), // fetch_ebreak
.fe_inst_meta_fd (fe_inst_meta_fd)
);
VX_f_d_reg vx_f_d_reg(
.clk (clk),
.reset (reset),
.in_freeze (total_freeze),
.fe_inst_meta_fd(fe_inst_meta_fd),
.fd_inst_meta_de(fd_inst_meta_de)
);
VX_decode vx_decode(
.fd_inst_meta_de (fd_inst_meta_de),
.VX_frE_to_bckE_req(VX_frE_to_bckE_req),
.VX_wstall (VX_wstall),
.VX_join (VX_join),
.terminate_sim (terminate_sim)
);
wire no_br_stall = 0;
VX_d_e_reg vx_d_e_reg(
.clk (clk),
.reset (reset),
.in_branch_stall(no_br_stall),
.in_freeze (total_freeze),
.VX_frE_to_bckE_req(VX_frE_to_bckE_req),
.VX_bckE_req (VX_bckE_req)
);
endmodule

View File

@@ -0,0 +1,27 @@
`include "../VX_define.v"
module VX_generic_priority_encoder
#(
parameter N = 1
)
(
input wire[N-1:0] valids,
//output reg[$clog2(N)-1:0] index,
output reg[(`CLOG2(N))-1:0] index,
//output reg[`CLOG2(N):0] index, // eh
output reg found
);
integer i;
always @(*) begin
index = 0;
found = 0;
for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
//index = i[$clog2(N)-1:0];
index = i[(`CLOG2(N))-1:0];
found = 1;
end
end
end
endmodule

View File

@@ -0,0 +1,34 @@
module VX_generic_register
#(
parameter N = 1
)
(
input clk,
input reset,
input stall,
input flush,
input[N-1:0] in,
output [N-1:0] out
);
reg[N-1:0] value;
always @(posedge clk or posedge reset) begin
if (reset) begin
value <= 0;
end else if (flush) begin
value <= 0;
end else if (~stall) begin
value <= in;
end
end
assign out = value;
endmodule

View File

@@ -0,0 +1,38 @@
module VX_generic_stack
#(
parameter WIDTH = 40,
parameter DEPTH = 2
)
(
input wire clk,
input wire reset,
input wire push,
input wire pop,
input reg [WIDTH - 1:0] q1,
input reg [WIDTH - 1:0] q2,
output wire[WIDTH - 1:0] d
);
reg [DEPTH - 1:0] ptr;
reg [WIDTH - 1:0] stack [0:(1 << DEPTH) - 1];
integer i;
always @(posedge clk) begin
if (reset) begin
ptr <= 0;
for (i = 0; i < (1 << DEPTH); i=i+1) stack[i] <= 0;
end else if (push) begin
stack[ptr] <= q1;
stack[ptr+1] <= q2;
ptr <= ptr + 2;
end else if (pop) begin
ptr <= ptr - 1;
end
end
assign d = stack[ptr - 1];
endmodule

85
old_rtl/VX_gpgpu_inst.v Normal file
View File

@@ -0,0 +1,85 @@
`include "VX_define.v"
module VX_gpgpu_inst (
// Input
VX_gpu_inst_req_inter VX_gpu_inst_req,
// Output
VX_warp_ctl_inter VX_warp_ctl
);
wire[`NT_M1:0] curr_valids = VX_gpu_inst_req.valid;
wire is_split = (VX_gpu_inst_req.is_split);
wire[`NT_M1:0] tmc_new_mask;
genvar curr_t;
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
begin
assign tmc_new_mask[curr_t] = curr_t < VX_gpu_inst_req.a_reg_data[0];
end
wire valid_inst = (|curr_valids);
assign VX_warp_ctl.warp_num = VX_gpu_inst_req.warp_num;
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc) && valid_inst;
assign VX_warp_ctl.thread_mask = VX_gpu_inst_req.is_tmc ? tmc_new_mask : 0;
// assign VX_warp_ctl.ebreak = (VX_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
assign VX_warp_ctl.ebreak = VX_warp_ctl.change_mask && (VX_warp_ctl.thread_mask == 0);
wire wspawn = VX_gpu_inst_req.is_wspawn;
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
wire[`NW-1:0] wspawn_new_active;
genvar curr_w;
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1)
begin
assign wspawn_new_active[curr_w] = curr_w < VX_gpu_inst_req.a_reg_data[0];
end
assign VX_warp_ctl.is_barrier = VX_gpu_inst_req.is_barrier && valid_inst;
assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0];
wire[31:0] num_warps_m1 = VX_gpu_inst_req.rd2 - 1;
assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NW):0];
assign VX_warp_ctl.wspawn = wspawn;
assign VX_warp_ctl.wspawn_pc = wspawn_pc;
assign VX_warp_ctl.wspawn_new_active = wspawn_new_active;
wire[`NT_M1:0] split_new_use_mask;
wire[`NT_M1:0] split_new_later_mask;
// VX_gpu_inst_req.pc
genvar curr_s_t;
for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin
wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
end
wire[$clog2(`NT):0] num_valids;
VX_countones #(.N(`NT)) valids_counter (
.valids(curr_valids),
.count (num_valids)
);
// wire[`NW_M1:0] num_valids = $countones(curr_valids);
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NT{1'b1}}));
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;
assign VX_warp_ctl.split_warp_num = VX_gpu_inst_req.warp_num;
// VX_gpu_inst_req.is_wspawn
// VX_gpu_inst_req.is_split
// VX_gpu_inst_req.is_barrier
endmodule

172
old_rtl/VX_gpr.v Normal file
View File

@@ -0,0 +1,172 @@
`include "VX_define.v"
module VX_gpr (
input wire clk,
input wire reset,
input wire valid_write_request,
VX_gpr_read_inter VX_gpr_read,
VX_wb_inter VX_writeback_inter,
output reg[`NT_M1:0][31:0] out_a_reg_data,
output reg[`NT_M1:0][31:0] out_b_reg_data
);
wire write_enable;
`ifndef ASIC
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0)) && (VX_writeback_inter.rd != 0);
byte_enabled_simple_dual_port_ram first_ram(
.we (write_enable),
.clk (clk),
.reset (reset),
.waddr (VX_writeback_inter.rd),
.raddr1(VX_gpr_read.rs1),
.raddr2(VX_gpr_read.rs2),
.be (VX_writeback_inter.wb_valid),
.wdata (VX_writeback_inter.write_data),
.q1 (out_a_reg_data),
.q2 (out_b_reg_data)
);
`else
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0));
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
wire[`NT_M1:0][31:0] write_bit_mask;
genvar curr_t;
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
assign write_bit_mask[curr_t] = {32{~local_write}};
end
// wire cenb = !going_to_write;
wire cenb = 0;
// wire cena_1 = (VX_gpr_read.rs1 == 0);
// wire cena_2 = (VX_gpr_read.rs2 == 0);
wire cena_1 = 0;
wire cena_2 = 0;
wire[`NT_M1:0][31:0] temp_a;
wire[`NT_M1:0][31:0] temp_b;
`ifndef SYN
genvar thread;
genvar curr_bit;
for (thread = 0; thread < `NT; thread = thread + 1)
begin
for (curr_bit = 0; curr_bit < 32; curr_bit=curr_bit+1)
begin
assign out_a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
assign out_b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
end
end
`else
assign out_a_reg_data = temp_a;
assign out_b_reg_data = temp_b;
`endif
wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
genvar curr_base_thread;
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
begin
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(VX_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(VX_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
end
`endif
endmodule

223
old_rtl/VX_gpr_stage.v Normal file
View File

@@ -0,0 +1,223 @@
`include "VX_define.v"
module VX_gpr_stage (
input wire clk,
input wire reset,
input wire schedule_delay,
input wire memory_delay,
input wire stall_gpr_csr,
output wire gpr_stage_delay,
// inputs
// Instruction Information
VX_frE_to_bckE_req_inter VX_bckE_req,
// WriteBack inputs
VX_wb_inter VX_writeback_inter,
// Outputs
VX_exec_unit_req_inter VX_exec_unit_req,
VX_lsu_req_inter VX_lsu_req,
VX_gpu_inst_req_inter VX_gpu_inst_req,
VX_csr_req_inter VX_csr_req
);
wire[31:0] curr_PC = VX_bckE_req.curr_PC;
wire[2:0] branchType = VX_bckE_req.branch_type;
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
wire jalQual = VX_bckE_req.jalQual;
VX_gpr_read_inter VX_gpr_read();
assign VX_gpr_read.rs1 = VX_bckE_req.rs1;
assign VX_gpr_read.rs2 = VX_bckE_req.rs2;
assign VX_gpr_read.warp_num = VX_bckE_req.warp_num;
`ifndef ASIC
VX_gpr_jal_inter VX_gpr_jal();
assign VX_gpr_jal.is_jal = VX_bckE_req.jalQual;
assign VX_gpr_jal.curr_PC = VX_bckE_req.curr_PC;
`else
VX_gpr_jal_inter VX_gpr_jal();
assign VX_gpr_jal.is_jal = VX_exec_unit_req.jalQual;
assign VX_gpr_jal.curr_PC = VX_exec_unit_req.curr_PC;
`endif
VX_gpr_data_inter VX_gpr_datf();
VX_gpr_wrapper vx_grp_wrapper(
.clk (clk),
.reset (reset),
.VX_writeback_inter(VX_writeback_inter),
.VX_gpr_read (VX_gpr_read),
.VX_gpr_jal (VX_gpr_jal),
.out_a_reg_data (VX_gpr_datf.a_reg_data),
.out_b_reg_data (VX_gpr_datf.b_reg_data)
);
// assign VX_bckE_req.is_csr = is_csr;
// assign VX_bckE_req_out.csr_mask = (VX_bckE_req.sr_immed == 1'b1) ? {27'h0, VX_bckE_req.rs1} : VX_gpr_data.a_reg_data[0];
// Outputs
VX_exec_unit_req_inter VX_exec_unit_req_temp();
VX_lsu_req_inter VX_lsu_req_temp();
VX_gpu_inst_req_inter VX_gpu_inst_req_temp();
VX_csr_req_inter VX_csr_req_temp();
VX_inst_multiplex VX_inst_mult(
.VX_bckE_req (VX_bckE_req),
.VX_gpr_data (VX_gpr_datf),
.VX_exec_unit_req(VX_exec_unit_req_temp),
.VX_lsu_req (VX_lsu_req_temp),
.VX_gpu_inst_req (VX_gpu_inst_req_temp),
.VX_csr_req (VX_csr_req_temp)
);
wire is_lsu = (|VX_lsu_req_temp.valid);
wire stall_rest = 0;
wire flush_rest = schedule_delay;
wire stall_lsu = memory_delay;
wire flush_lsu = schedule_delay && !stall_lsu;
assign gpr_stage_delay = stall_lsu || (stall_gpr_csr && VX_bckE_req.is_csr && (|VX_bckE_req.valid));
`ifdef ASIC
wire delayed_lsu_last_cycle;
VX_generic_register #(.N(1)) delayed_reg (
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(stall_rest),
.in (stall_lsu),
.out (delayed_lsu_last_cycle)
);
wire[`NT_M1:0][31:0] temp_store_data;
wire[`NT_M1:0][31:0] temp_base_address; // A reg data
wire[`NT_M1:0][31:0] real_store_data;
wire[`NT_M1:0][31:0] real_base_address; // A reg data
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
VX_generic_register #(.N(`NT*32*2)) lsu_data(
.clk (clk),
.reset(reset),
.stall(!store_curr_real),
.flush(stall_rest),
.in ({real_store_data, real_base_address}),
.out ({temp_store_data, temp_base_address})
);
assign real_store_data = VX_lsu_req_temp.store_data;
assign real_base_address = VX_lsu_req_temp.base_address;
assign VX_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
VX_generic_register #(.N(77 + `NW_M1 + 1 + (`NT))) lsu_reg(
.clk (clk),
.reset(reset),
.stall(stall_lsu),
.flush(flush_lsu),
.in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.lsu_pc, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}),
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
);
VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
);
assign VX_exec_unit_req.a_reg_data = real_base_address;
assign VX_exec_unit_req.b_reg_data = real_store_data;
VX_generic_register #(.N(36 + `NW_M1 + 1 + (`NT))) gpu_inst_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next}),
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next })
);
assign VX_gpu_inst_req.a_reg_data = real_base_address;
assign VX_gpu_inst_req.rd2 = real_store_data;
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
.clk (clk),
.reset(reset),
.stall(stall_gpr_csr),
.flush(flush_rest),
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
);
// assign
`else
// 341
VX_generic_register #(.N(77 + `NW_M1 + 1 + 65*(`NT))) lsu_reg(
.clk (clk),
.reset(reset),
.stall(stall_lsu),
.flush(flush_lsu),
.in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.lsu_pc, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.store_data, VX_lsu_req_temp.base_address, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}),
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
);
VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.a_reg_data, VX_exec_unit_req_temp.b_reg_data, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
);
VX_generic_register #(.N(68 + `NW_M1 + 1 + 33*(`NT))) gpu_inst_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next, VX_gpu_inst_req_temp.a_reg_data, VX_gpu_inst_req_temp.rd2}),
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 })
);
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
.clk (clk),
.reset(reset),
.stall(stall_gpr_csr),
.flush(flush_rest),
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
);
`endif
endmodule

70
old_rtl/VX_gpr_wrapper.v Normal file
View File

@@ -0,0 +1,70 @@
`include "VX_define.v"
module VX_gpr_wrapper (
input wire clk,
input wire reset,
VX_gpr_read_inter VX_gpr_read,
VX_wb_inter VX_writeback_inter,
VX_gpr_jal_inter VX_gpr_jal,
output wire[`NT_M1:0][31:0] out_a_reg_data,
output wire[`NT_M1:0][31:0] out_b_reg_data
);
wire[`NW-1:0][`NT_M1:0][31:0] temp_a_reg_data;
wire[`NW-1:0][`NT_M1:0][31:0] temp_b_reg_data;
wire[`NT_M1:0][31:0] jal_data;
genvar index;
for (index = 0; index <= `NT_M1; index = index + 1) begin
assign jal_data[index] = VX_gpr_jal.curr_PC;
end
`ifndef ASIC
assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[VX_gpr_read.warp_num]));
assign out_b_reg_data = (temp_b_reg_data[VX_gpr_read.warp_num]);
`else
wire zer = 0;
wire[`NW_M1:0] old_warp_num;
VX_generic_register #(`NW_M1+1) store_wn(
.clk (clk),
.reset(reset),
.stall(zer),
.flush(zer),
.in (VX_gpr_read.warp_num),
.out (old_warp_num)
);
assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
assign out_b_reg_data = (temp_b_reg_data[old_warp_num]);
`endif
genvar warp_index;
generate
for (warp_index = 0; warp_index < `NW; warp_index = warp_index + 1) begin
wire valid_write_request = warp_index == VX_writeback_inter.wb_warp_num;
VX_gpr vx_gpr(
.clk (clk),
.reset (reset),
.valid_write_request(valid_write_request),
.VX_gpr_read (VX_gpr_read),
.VX_writeback_inter (VX_writeback_inter),
.out_a_reg_data (temp_a_reg_data[warp_index]),
.out_b_reg_data (temp_b_reg_data[warp_index])
);
end
endgenerate
endmodule

View File

@@ -0,0 +1,95 @@
`include "VX_define.v"
module VX_inst_multiplex (
// Inputs
VX_frE_to_bckE_req_inter VX_bckE_req,
VX_gpr_data_inter VX_gpr_data,
// Outputs
VX_exec_unit_req_inter VX_exec_unit_req,
VX_lsu_req_inter VX_lsu_req,
VX_gpu_inst_req_inter VX_gpu_inst_req,
VX_csr_req_inter VX_csr_req
);
wire[`NT_M1:0] is_mem_mask;
wire[`NT_M1:0] is_gpu_mask;
wire[`NT_M1:0] is_csr_mask;
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
wire is_csr = VX_bckE_req.is_csr;
// wire is_gpu = 0;
genvar currT;
for (currT = 0; currT < `NT; currT = currT + 1) begin
assign is_mem_mask[currT] = is_mem;
assign is_gpu_mask[currT] = is_gpu;
assign is_csr_mask[currT] = is_csr;
end
// LSU Unit
assign VX_lsu_req.valid = VX_bckE_req.valid & is_mem_mask;
assign VX_lsu_req.warp_num = VX_bckE_req.warp_num;
assign VX_lsu_req.base_address = VX_gpr_data.a_reg_data;
assign VX_lsu_req.store_data = VX_gpr_data.b_reg_data;
assign VX_lsu_req.offset = VX_bckE_req.itype_immed;
assign VX_lsu_req.mem_read = VX_bckE_req.mem_read;
assign VX_lsu_req.mem_write = VX_bckE_req.mem_write;
assign VX_lsu_req.rd = VX_bckE_req.rd;
assign VX_lsu_req.wb = VX_bckE_req.wb;
assign VX_lsu_req.lsu_pc = VX_bckE_req.curr_PC;
// Execute Unit
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
assign VX_exec_unit_req.warp_num = VX_bckE_req.warp_num;
assign VX_exec_unit_req.curr_PC = VX_bckE_req.curr_PC;
assign VX_exec_unit_req.PC_next = VX_bckE_req.PC_next;
assign VX_exec_unit_req.rd = VX_bckE_req.rd;
assign VX_exec_unit_req.wb = VX_bckE_req.wb;
assign VX_exec_unit_req.a_reg_data = VX_gpr_data.a_reg_data;
assign VX_exec_unit_req.b_reg_data = VX_gpr_data.b_reg_data;
assign VX_exec_unit_req.alu_op = VX_bckE_req.alu_op;
assign VX_exec_unit_req.rs1 = VX_bckE_req.rs1;
assign VX_exec_unit_req.rs2 = VX_bckE_req.rs2;
assign VX_exec_unit_req.rs2_src = VX_bckE_req.rs2_src;
assign VX_exec_unit_req.itype_immed = VX_bckE_req.itype_immed;
assign VX_exec_unit_req.upper_immed = VX_bckE_req.upper_immed;
assign VX_exec_unit_req.branch_type = VX_bckE_req.branch_type;
assign VX_exec_unit_req.jalQual = VX_bckE_req.jalQual;
assign VX_exec_unit_req.jal = VX_bckE_req.jal;
assign VX_exec_unit_req.jal_offset = VX_bckE_req.jal_offset;
assign VX_exec_unit_req.ebreak = VX_bckE_req.ebreak;
// GPR Req
assign VX_gpu_inst_req.valid = VX_bckE_req.valid & is_gpu_mask;
assign VX_gpu_inst_req.warp_num = VX_bckE_req.warp_num;
assign VX_gpu_inst_req.is_wspawn = VX_bckE_req.is_wspawn;
assign VX_gpu_inst_req.is_tmc = VX_bckE_req.is_tmc;
assign VX_gpu_inst_req.is_split = VX_bckE_req.is_split;
assign VX_gpu_inst_req.is_barrier = VX_bckE_req.is_barrier;
assign VX_gpu_inst_req.a_reg_data = VX_gpr_data.a_reg_data;
assign VX_gpu_inst_req.rd2 = VX_gpr_data.b_reg_data[0];
assign VX_gpu_inst_req.pc_next = VX_bckE_req.PC_next;
// CSR Req
assign VX_csr_req.valid = VX_bckE_req.valid & is_csr_mask;
assign VX_csr_req.warp_num = VX_bckE_req.warp_num;
assign VX_csr_req.rd = VX_bckE_req.rd;
assign VX_csr_req.wb = VX_bckE_req.wb;
assign VX_csr_req.alu_op = VX_bckE_req.alu_op;
assign VX_csr_req.is_csr = VX_bckE_req.is_csr;
assign VX_csr_req.csr_address = VX_bckE_req.csr_address;
assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed;
assign VX_csr_req.csr_mask = VX_bckE_req.csr_mask;
endmodule

106
old_rtl/VX_lsu.v Normal file
View File

@@ -0,0 +1,106 @@
`include "VX_define.v"
module VX_lsu (
input wire clk,
input wire reset,
input wire no_slot_mem,
VX_lsu_req_inter VX_lsu_req,
// Write back to GPR
VX_inst_mem_wb_inter VX_mem_wb,
VX_dcache_response_inter VX_dcache_rsp,
VX_dcache_request_inter VX_dcache_req,
output wire out_delay
);
// VX_inst_mem_wb_inter VX_mem_wb_temp();
assign out_delay = VX_dcache_rsp.delay || no_slot_mem;
// Generate Addresses
wire[`NT_M1:0][31:0] address;
VX_lsu_addr_gen VX_lsu_addr_gen
(
.base_address(VX_lsu_req.base_address),
.offset (VX_lsu_req.offset),
.address (address)
);
wire[`NT_M1:0][31:0] use_address;
wire[`NT_M1:0][31:0] use_store_data;
wire[`NT_M1:0] use_valid;
wire[2:0] use_mem_read;
wire[2:0] use_mem_write;
wire[4:0] use_rd;
wire[`NW_M1:0] use_warp_num;
wire[1:0] use_wb;
wire[31:0] use_pc;
wire zero = 0;
VX_generic_register #(.N(45 + `NW_M1 + 1 + `NT*65)) lsu_buffer(
.clk (clk),
.reset(reset),
.stall(out_delay),
.flush(zero),
.in ({address , VX_lsu_req.store_data, VX_lsu_req.valid, VX_lsu_req.mem_read, VX_lsu_req.mem_write, VX_lsu_req.rd, VX_lsu_req.warp_num, VX_lsu_req.wb, VX_lsu_req.lsu_pc}),
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
);
genvar index;
for (index = 0; index <= `NT_M1; index = index + 1) begin
assign VX_dcache_req.out_cache_driver_in_address[index] = use_address[index];
assign VX_dcache_req.out_cache_driver_in_data[index] = use_store_data[index];
assign VX_dcache_req.out_cache_driver_in_valid[index] = (use_valid[index]);
assign VX_mem_wb.loaded_data[index] = VX_dcache_rsp.in_cache_driver_out_data[index];
end
assign VX_dcache_req.out_cache_driver_in_mem_read = use_mem_read;
assign VX_dcache_req.out_cache_driver_in_mem_write = use_mem_write;
assign VX_mem_wb.rd = use_rd;
assign VX_mem_wb.wb = use_wb & {!VX_dcache_rsp.delay, !VX_dcache_rsp.delay};
assign VX_mem_wb.wb_valid = use_valid;
assign VX_mem_wb.wb_warp_num = use_warp_num;
assign VX_mem_wb.mem_wb_pc = use_pc;
// integer curr_t;
// always @(negedge clk) begin
// for (int curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
// if ((VX_dcache_req.out_cache_driver_in_valid[curr_t]) && !out_delay) begin
// if (VX_dcache_req.out_cache_driver_in_mem_read != `NO_MEM_READ) begin
// $display("Reading addr: %x val: %x", address[0], VX_mem_wb.loaded_data[0]);
// end
// if (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) begin
// $display("Writing addr: %x val: %x", address[0], VX_dcache_req.out_cache_driver_in_data[0]);
// end
// end
// end
// wire zero_temp = 0;
// VX_generic_register #(.N(142)) register_wb_data
// (
// .clk (clk),
// .reset(reset),
// .stall(zero_temp),
// .flush(out_delay),
// .in ({VX_mem_wb_temp.loaded_data, VX_mem_wb_temp.rd, VX_mem_wb_temp.wb, VX_mem_wb_temp.wb_valid, VX_mem_wb_temp.wb_warp_num}),
// .out ({VX_mem_wb.loaded_data , VX_mem_wb.rd , VX_mem_wb.wb , VX_mem_wb.wb_valid , VX_mem_wb.wb_warp_num })
// );
endmodule // Memory

17
old_rtl/VX_lsu_addr_gen.v Normal file
View File

@@ -0,0 +1,17 @@
`include "VX_define.v"
module VX_lsu_addr_gen (
input wire[`NT_M1:0][31:0] base_address,
input wire[31:0] offset,
output wire[`NT_M1:0][31:0] address
);
genvar index;
for (index = 0; index < `NT; index = index + 1)
begin
assign address[index] = base_address[index] + offset;
end
endmodule

View File

@@ -0,0 +1,20 @@
`include "VX_define.v"
module VX_priority_encoder (
input wire[`NW-1:0] valids,
output reg[`NW_M1:0] index,
output reg found
);
integer i;
always @(*) begin
index = 0;
found = 0;
for (i = `NW-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
index = i[`NW_M1:0];
found = 1;
end
end
end
endmodule

View File

@@ -0,0 +1,32 @@
`include "../VX_define.v"
module VX_priority_encoder_w_mask
#(
parameter N = 10
)
(
input wire[N-1:0] valids,
output reg [N-1:0] mask,
//output reg[$clog2(N)-1:0] index,
output reg[(`CLOG2(N))-1:0] index,
//output reg[`CLOG2(N):0] index, // eh
output reg found
);
integer i;
always @(valids) begin
index = 0;
found = 0;
// mask = 0;
for (i = 0; i < N; i=i+1) begin
if (valids[i]) begin
//index = i[$clog2(N)-1:0];
index = i[(`CLOG2(N))-1:0];
found = 1;
// mask[index] = (1 << i);
// $display("%h",(1 << i));
end
end
end
assign mask = found ? (1 << index) : 0;
endmodule

69
old_rtl/VX_scheduler.v Normal file
View File

@@ -0,0 +1,69 @@
`include "VX_define.v"
module VX_scheduler (
input wire clk,
input wire reset,
input wire memory_delay,
input wire gpr_stage_delay,
VX_frE_to_bckE_req_inter VX_bckE_req,
VX_wb_inter VX_writeback_inter,
output wire schedule_delay
);
reg[31:0] rename_table[`NW-1:0];
wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0);
wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0);
wire rs1_rename = rename_table[VX_bckE_req.warp_num][VX_bckE_req.rs1];
wire rs2_rename = rename_table[VX_bckE_req.warp_num][VX_bckE_req.rs2];
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
wire is_mem = is_store || is_load;
wire rs1_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs1)));
wire rs2_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs2)));
// wire rs1_pass = 0;
// wire rs2_pass = 0;
wire using_rs2 = (VX_bckE_req.rs2_src == `RS2_REG) || is_store || VX_bckE_req.is_barrier || VX_bckE_req.is_wspawn;
wire rs1_rename_qual = ((rs1_rename || (rs1_pass && 0)) && (VX_bckE_req.rs1 != 0));
wire rs2_rename_qual = ((rs2_rename || (rs2_pass && 0)) && (VX_bckE_req.rs2 != 0 && using_rs2));
wire rename_valid = rs1_rename_qual || rs2_rename_qual ;
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid)) || (memory_delay && (is_mem)) || (gpr_stage_delay && is_mem);
integer i;
integer w;
always @(posedge clk or posedge reset) begin
if (reset) begin
for (w = 0; w < `NW; w=w+1)
begin
for (i = 0; i < 32; i = i + 1)
begin
rename_table[w][i] <= 0;
end
end
end else begin
if (valid_wb ) rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] <= 0;
if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.warp_num ][VX_bckE_req.rd] <= 1;
end
end
endmodule

86
old_rtl/VX_warp.v Normal file
View File

@@ -0,0 +1,86 @@
`include "VX_define.v"
module VX_warp (
input wire clk,
input wire reset,
input wire stall,
input wire remove,
input wire[`NT_M1:0] in_thread_mask,
input wire in_change_mask,
input wire in_jal,
input wire[31:0] in_jal_dest,
input wire in_branch_dir,
input wire[31:0] in_branch_dest,
input wire in_wspawn,
input wire[31:0] in_wspawn_pc,
output wire[31:0] out_PC,
output wire[`NT_M1:0] out_valid
);
reg[31:0] real_PC;
var[31:0] temp_PC;
var[31:0] use_PC;
reg[`NT_M1:0] valid;
reg[`NT_M1:0] valid_zero;
integer ini_cur_th = 0;
initial begin
real_PC = 0;
for (ini_cur_th = 1; ini_cur_th < `NT; ini_cur_th=ini_cur_th+1) begin
valid[ini_cur_th] = 0; // Thread 1 active
valid_zero[ini_cur_th] = 0;
end
valid[0] = 1;
valid_zero[0] = 0;
end
always @(posedge clk, posedge reset) begin
if (remove) begin
valid <= valid_zero;
end else if (in_change_mask) begin
valid <= in_thread_mask;
end
end
genvar out_cur_th;
generate
for (out_cur_th = 0; out_cur_th < `NT; out_cur_th = out_cur_th+1)
assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th];
endgenerate
always @(*) begin
if (in_jal == 1'b1) begin
temp_PC = in_jal_dest;
// $display("LINKING TO %h", temp_PC);
end else if (in_branch_dir == 1'b1) begin
temp_PC = in_branch_dest;
end else begin
temp_PC = real_PC;
end
end
assign use_PC = temp_PC;
assign out_PC = temp_PC;
always @(posedge clk or posedge reset) begin
if (reset) begin
real_PC <= 0;
end else if (in_wspawn == 1'b1) begin
// $display("Inside warp ***** Spawn @ %H",in_wspawn_pc);
real_PC <= in_wspawn_pc;
end else if (!stall) begin
real_PC <= use_PC + 32'h4;
end else begin
real_PC <= use_PC;
end
end
endmodule

321
old_rtl/VX_warp_scheduler.v Normal file
View File

@@ -0,0 +1,321 @@
`include "VX_define.v"
module VX_warp_scheduler (
input wire clk, // Clock
input wire reset,
input wire stall,
// Wspawn
input wire wspawn,
input wire[31:0] wsapwn_pc,
input wire[`NW-1:0] wspawn_new_active,
// CTM
input wire ctm,
input wire[`NT_M1:0] ctm_mask,
input wire[`NW_M1:0] ctm_warp_num,
// WHALT
input wire whalt,
input wire[`NW_M1:0] whalt_warp_num,
input wire is_barrier,
input wire[31:0] barrier_id,
input wire[$clog2(`NW):0] num_warps,
input wire[`NW_M1:0] barrier_warp_num,
// WSTALL
input wire wstall,
input wire[`NW_M1:0] wstall_warp_num,
// Split
input wire is_split,
input wire dont_split,
input wire[`NT_M1:0] split_new_mask,
input wire[`NT_M1:0] split_later_mask,
input wire[31:0] split_save_pc,
input wire[`NW_M1:0] split_warp_num,
// Join
input wire is_join,
input wire[`NW_M1:0] join_warp_num,
// JAL
input wire jal,
input wire[31:0] jal_dest,
input wire[`NW_M1:0] jal_warp_num,
// Branch
input wire branch_valid,
input wire branch_dir,
input wire[31:0] branch_dest,
input wire[`NW_M1:0] branch_warp_num,
output wire[`NT_M1:0] thread_mask,
output wire[`NW_M1:0] warp_num,
output wire[31:0] warp_pc,
output wire out_ebreak,
output wire scheduled_warp
);
wire update_use_wspawn;
wire update_visible_active;
wire[(1+32+`NT_M1):0] d[`NW-1:0];
wire join_fall;
wire[31:0] join_pc;
wire[`NT_M1:0] join_tm;
wire in_wspawn = wspawn;
wire in_ctm = ctm;
wire in_whalt = whalt;
wire in_wstall = wstall;
reg[`NW-1:0] warp_active;
reg[`NW-1:0] warp_stalled;
reg[`NW-1:0] visible_active;
wire[`NW-1:0] use_active;
wire wstall_this_cycle;
reg[`NT_M1:0] thread_masks[`NW-1:0];
reg[31:0] warp_pcs[`NW-1:0];
// barriers
reg[`NW-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
wire reached_barrier_limit;
wire[`NW-1:0] curr_barrier_mask;
wire[$clog2(`NW):0] curr_barrier_count;
// wsapwn
reg[31:0] use_wsapwn_pc;
reg[`NW-1:0] use_wsapwn;
wire[`NW_M1:0] warp_to_schedule;
wire schedule;
wire hazard;
wire global_stall;
wire real_schedule;
wire[31:0] new_pc;
reg[`NW-1:0] total_barrier_stall;
reg didnt_split;
/* verilator lint_off UNUSED */
// wire[$clog2(`NW):0] num_active;
/* verilator lint_on UNUSED */
integer curr_w_help;
integer curr_barrier;
always @(posedge clk or posedge reset) begin
if (reset) begin
for (curr_barrier = 0; curr_barrier < `NUM_BARRIERS; curr_barrier=curr_barrier+1) begin
barrier_stall_mask[curr_barrier] <= 0;
end
use_wsapwn_pc <= 0;
use_wsapwn <= 0;
warp_pcs[0] <= (32'h80000000 - 4);
warp_active[0] <= 1; // Activating first warp
visible_active[0] <= 1; // Activating first warp
thread_masks[0] <= 1; // Activating first thread in first warp
warp_stalled <= 0;
didnt_split <= 0;
// total_barrier_stall = 0;
for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin
warp_pcs[curr_w_help] <= 0;
warp_active[curr_w_help] <= 0; // Activating first warp
visible_active[curr_w_help] <= 0; // Activating first warp
thread_masks[curr_w_help] <= 1; // Activating first thread in first warp
end
end else begin
// Wsapwning warps
if (wspawn) begin
warp_active <= wspawn_new_active;
use_wsapwn_pc <= wsapwn_pc;
use_wsapwn <= wspawn_new_active & (~`NW'b1);
end
if (is_barrier) begin
warp_stalled[barrier_warp_num] <= 0;
if (reached_barrier_limit) begin
barrier_stall_mask[barrier_id] <= 0;
end else begin
barrier_stall_mask[barrier_id][barrier_warp_num] <= 1;
end
end else if (ctm) begin
thread_masks[ctm_warp_num] <= ctm_mask;
warp_stalled[ctm_warp_num] <= 0;
end else if (is_join && !didnt_split) begin
if (!join_fall) begin
warp_pcs[join_warp_num] <= join_pc;
end
thread_masks[join_warp_num] <= join_tm;
didnt_split <= 0;
end else if (is_split) begin
warp_stalled[split_warp_num] <= 0;
if (!dont_split) begin
thread_masks[split_warp_num] <= split_new_mask;
didnt_split <= 0;
end else begin
didnt_split <= 1;
end
end
if (whalt) begin
warp_active[whalt_warp_num] <= 0;
visible_active[whalt_warp_num] <= 0;
end
if (update_use_wspawn) begin
use_wsapwn[warp_to_schedule] <= 0;
thread_masks[warp_to_schedule] <= 1;
end
// Stalling the scheduling of warps
if (wstall) begin
warp_stalled[wstall_warp_num] <= 1;
visible_active[wstall_warp_num] <= 0;
end
// Refilling active warps
if (update_visible_active) begin
visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall);
end
// Don't change state if stall
if (!global_stall && real_schedule && (thread_mask != 0)) begin
visible_active[warp_to_schedule] <= 0;
warp_pcs[warp_to_schedule] <= new_pc;
end
// Jal
if (jal) begin
warp_pcs[jal_warp_num] <= jal_dest;
warp_stalled[jal_warp_num] <= 0;
end
// Branch
if (branch_valid) begin
if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest;
warp_stalled[branch_warp_num] <= 0;
end
end
end
VX_countones #(.N(`NW)) barrier_count(
.valids(curr_barrier_mask),
.count (curr_barrier_count)
);
wire[$clog2(`NW):0] count_visible_active;
VX_countones #(.N(`NW)) num_visible(
.valids(visible_active),
.count (count_visible_active)
);
// assign curr_barrier_count = $countones(curr_barrier_mask);
assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NW-1:0];
assign reached_barrier_limit = curr_barrier_count == (num_warps);
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
// integer curr_b;
// always @(*) begin
// total_barrier_stall = 0;
// for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
// begin
// total_barrier_stall[`NW-1:0] = total_barrier_stall[`NW-1:0] | barrier_stall_mask[curr_b];
// end
// end
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask};
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
genvar curr_warp;
for (curr_warp = 0; curr_warp < `NW; curr_warp = curr_warp + 1) begin
wire correct_warp_s = (curr_warp == split_warp_num);
wire correct_warp_j = (curr_warp == join_warp_num);
wire push = (is_split && !dont_split) && correct_warp_s;
wire pop = is_join && correct_warp_j;
VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT)+1)) ipdom_stack(
.clk (clk),
.reset(reset),
.push (push),
.pop (pop),
.d (d[curr_warp]),
.q1 (q1),
.q2 (q2)
);
end
// wire should_stall = stall || (jal && (warp_to_schedule == jal_warp_num)) || (branch_dir && (warp_to_schedule == branch_warp_num));
wire should_jal = (jal && (warp_to_schedule == jal_warp_num));
wire should_bra = (branch_dir && (warp_to_schedule == branch_warp_num));
assign hazard = (should_jal || should_bra) && schedule;
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule];
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || is_join);
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NT'b1 : thread_masks[warp_to_schedule]);
assign warp_num = warp_to_schedule;
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;
assign new_pc = warp_pc + 4;
assign use_active = (count_visible_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall)) : visible_active;
// Choosing a warp to schedule
VX_priority_encoder choose_schedule(
.valids(use_active),
.index (warp_to_schedule),
.found (schedule)
);
// always @(*) begin
// $display("WarpPC: %h",warp_pc);
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
// end
// Valid counter
// assign num_active = $countones(visible_active);
// VX_one_counter valid_counter(
// .valids(visible_active),
// .ones_found()
// );
wire ebreak = (warp_active == 0);
assign out_ebreak = ebreak;
endmodule

111
old_rtl/VX_writeback.v Normal file
View File

@@ -0,0 +1,111 @@
`include "VX_define.v"
module VX_writeback (
input wire clk,
input wire reset,
// Mem WB info
VX_inst_mem_wb_inter VX_mem_wb,
// EXEC Unit WB info
VX_inst_exec_wb_inter VX_inst_exec_wb,
// CSR Unit WB info
VX_csr_wb_inter VX_csr_wb,
// Actual WB to GPR
VX_wb_inter VX_writeback_inter,
output wire no_slot_mem,
output wire no_slot_csr
);
VX_wb_inter VX_writeback_tempp();
wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid);
wire mem_wb = (VX_mem_wb.wb != 0) && (|VX_mem_wb.wb_valid);
wire csr_wb = (VX_csr_wb.wb != 0) && (|VX_csr_wb.valid);
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
assign no_slot_csr = csr_wb && (exec_wb);
assign VX_writeback_tempp.write_data = exec_wb ? VX_inst_exec_wb.alu_result :
csr_wb ? VX_csr_wb.csr_result :
mem_wb ? VX_mem_wb.loaded_data :
0;
assign VX_writeback_tempp.wb_valid = exec_wb ? VX_inst_exec_wb.wb_valid :
csr_wb ? VX_csr_wb.valid :
mem_wb ? VX_mem_wb.wb_valid :
0;
assign VX_writeback_tempp.rd = exec_wb ? VX_inst_exec_wb.rd :
csr_wb ? VX_csr_wb.rd :
mem_wb ? VX_mem_wb.rd :
0;
assign VX_writeback_tempp.wb = exec_wb ? VX_inst_exec_wb.wb :
csr_wb ? VX_csr_wb.wb :
mem_wb ? VX_mem_wb.wb :
0;
assign VX_writeback_tempp.wb_warp_num = exec_wb ? VX_inst_exec_wb.wb_warp_num :
csr_wb ? VX_csr_wb.warp_num :
mem_wb ? VX_mem_wb.wb_warp_num :
0;
assign VX_writeback_tempp.wb_pc = exec_wb ? VX_inst_exec_wb.exec_wb_pc :
csr_wb ? 32'hdeadbeef :
mem_wb ? VX_mem_wb.mem_wb_pc :
32'hdeadbeef;
wire zero = 0;
wire[`NT-1:0][31:0] use_wb_data;
reg prev_is_mem;
always @(posedge clk, posedge reset) begin
if (reset)
begin
prev_is_mem = 0;
end begin
prev_is_mem = mem_wb && !no_slot_mem;
end
end
VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register(
.clk (clk),
.reset(reset),
.stall(zero),
.flush(zero),
.in ({VX_writeback_tempp.write_data, VX_writeback_tempp.wb_valid, VX_writeback_tempp.rd, VX_writeback_tempp.wb, VX_writeback_tempp.wb_warp_num, VX_writeback_tempp.wb_pc}),
.out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc})
);
reg[31:0] last_data_wb;
always @(posedge clk) begin
if ((|VX_writeback_inter.wb_valid) && (VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd == 28)) begin
last_data_wb <= use_wb_data[0];
end
end
`ifdef SYN
assign VX_writeback_inter.write_data = prev_is_mem ? VX_writeback_tempp.write_data : use_wb_data;
`else
assign VX_writeback_inter.write_data = use_wb_data;
`endif
endmodule // VX_writeback

249
old_rtl/Vortex.v Normal file
View File

@@ -0,0 +1,249 @@
`include "../VX_define.v"
module Vortex
/*#(
parameter CACHE_SIZE = 4096, // Bytes
parameter CACHE_WAYS = 2,
parameter CACHE_BLOCK = 128, // Bytes
parameter CACHE_BANKS = 8,
parameter NUM_WORDS_PER_BLOCK = 4
)*/
(
input wire clk,
input wire reset,
input wire[31:0] icache_response_instruction,
output wire[31:0] icache_request_pc_address,
// IO
output wire io_valid,
output wire[31:0] io_data,
// Req D Mem
output reg [31:0] o_m_read_addr_d,
output reg [31:0] o_m_evict_addr_d,
output reg o_m_valid_d,
output reg [31:0] o_m_writedata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0],
output reg o_m_read_or_write_d,
// Rsp D Mem
input wire [31:0] i_m_readdata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0],
input wire i_m_ready_d,
// Req I Mem
output reg [31:0] o_m_read_addr_i,
output reg [31:0] o_m_evict_addr_i,
output reg o_m_valid_i,
output reg [31:0] o_m_writedata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0],
output reg o_m_read_or_write_i,
// Rsp I Mem
input wire [31:0] i_m_readdata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0],
input wire i_m_ready_i,
output wire out_ebreak
);
reg[31:0] icache_banks = `ICACHE_BANKS;
reg[31:0] icache_num_words_per_block = `ICACHE_NUM_WORDS_PER_BLOCK;
reg[31:0] dcache_banks = `DCACHE_BANKS;
reg[31:0] dcache_num_words_per_block = `DCACHE_NUM_WORDS_PER_BLOCK;
reg[31:0] number_threads = `NT;
reg[31:0] number_warps = `NW;
always @(posedge clk) begin
icache_banks <= icache_banks;
icache_num_words_per_block <= icache_num_words_per_block;
dcache_banks <= dcache_banks;
dcache_num_words_per_block <= dcache_num_words_per_block;
number_threads <= number_threads;
number_warps <= number_warps;
end
wire memory_delay;
wire gpr_stage_delay;
wire schedule_delay;
// Dcache Interface
VX_dcache_response_inter VX_dcache_rsp();
VX_dcache_request_inter VX_dcache_req();
wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.out_cache_driver_in_valid) && (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (VX_dcache_req.out_cache_driver_in_address[0] == 32'h00010000);
wire[31:0] temp_io_data = VX_dcache_req.out_cache_driver_in_data[0];
assign io_valid = temp_io_valid;
assign io_data = temp_io_data;
VX_dram_req_rsp_inter #(
.NUMBER_BANKS(`DCACHE_BANKS),
.NUM_WORDS_PER_BLOCK(`DCACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp();
VX_icache_response_inter icache_response_fe();
VX_icache_request_inter icache_request_fe();
VX_dram_req_rsp_inter #(
.NUMBER_BANKS(`ICACHE_BANKS),
.NUM_WORDS_PER_BLOCK(`ICACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp_icache();
//assign icache_response_fe.instruction = icache_response_instruction;
assign icache_request_pc_address = icache_request_fe.pc_address;
// Need to fix this so that it is only 1 set of outputs
// o_m Values
// L2 Cache
/*
assign VX_L2cache_req.out_cache_driver_in_valid = VX_dram_req_rsp.o_m_valid || VX_dram_req_rsp_icache.o_m_valid; // Ask about this (width)
// Ask about the adress
assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? icache_request_fe.pc_address: VX_dcache_req.out_cache_driver_in_address;
//assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_read_addr: VX_dram_req_rsp.o_m_read_addr;
//assign VX_L2cache_req.out_cache_driver_in_address = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_evict_addr : VX_dram_req_rsp.o_m_evict_addr;
assign VX_L2cache_req.out_cache_driver_in_mem_read = (VX_dram_req_rsp_icache.o_m_valid) ? (VX_dram_req_rsp_icache.o_m_read_or_write ? icache_request_fe.out_cache_driver_in_mem_write : icache_request_fe.out_cache_driver_in_mem_read)
: (VX_dram_req_rsp.o_m_read_or_write ? VX_dcache_req.out_cache_driver_in_mem_write : VX_dcache_req.out_cache_driver_in_mem_read);
//assign VX_dram_req_rsp.i_m_ready = i_m_ready && !VX_dram_req_rsp_icache.o_m_valid && VX_dram_req_rsp.o_m_valid;
//assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready && VX_dram_req_rsp_icache.o_m_valid;
genvar cur_bank;
genvar cur_word;
for (cur_bank = 0; cur_bank < CACHE_BANKS; cur_bank = cur_bank + 1) begin
for (cur_word = 0; cur_word < NUM_WORDS_PER_BLOCK; cur_word = cur_word + 1) begin
assign VX_L2cache_req.out_cache_driver_in_data[cur_bank][cur_word] = (VX_dram_req_rsp_icache.o_m_valid) ? VX_dram_req_rsp_icache.o_m_writedata[cur_bank][cur_word]
: VX_dram_req_rsp.o_m_writedata[cur_bank][cur_word];
assign VX_dram_req_rsp.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data
assign VX_dram_req_rsp_icache.i_m_readdata[cur_bank][cur_word] = VX_dram_req_rsp_L2.i_m_readdata[cur_bank][cur_word]; // fill in correct response data
end
end
*/
assign o_m_valid_i = VX_dram_req_rsp_icache.o_m_valid;
assign o_m_valid_d = VX_dram_req_rsp.o_m_valid;
assign o_m_read_addr_i = VX_dram_req_rsp_icache.o_m_read_addr;
assign o_m_read_addr_d = VX_dram_req_rsp.o_m_read_addr;
assign o_m_evict_addr_i = VX_dram_req_rsp_icache.o_m_evict_addr;
assign o_m_evict_addr_d = VX_dram_req_rsp.o_m_evict_addr;
assign o_m_read_or_write_i = VX_dram_req_rsp_icache.o_m_read_or_write;
assign o_m_read_or_write_d = VX_dram_req_rsp.o_m_read_or_write;
assign VX_dram_req_rsp.i_m_ready = i_m_ready_d;
assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready_i;
genvar curr_bank;
genvar curr_word;
/*
for (curr_bank = 0; curr_bank < CACHE_BANKS; curr_bank = curr_bank + 1) begin
for (curr_word = 0; curr_word < NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word];
assign o_m_writedata_d[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word];
assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata_d[curr_bank][curr_word]; // fixed
assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed
end
end
*/
for (curr_bank = 0; curr_bank < `DCACHE_BANKS; curr_bank = curr_bank + 1) begin
for (curr_word = 0; curr_word < `DCACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
assign o_m_writedata_d[curr_bank][curr_word] = VX_dram_req_rsp.o_m_writedata[curr_bank][curr_word];
assign VX_dram_req_rsp.i_m_readdata[curr_bank][curr_word] = i_m_readdata_d[curr_bank][curr_word]; // fixed
end
end
for (curr_bank = 0; curr_bank < `ICACHE_BANKS; curr_bank = curr_bank + 1) begin
for (curr_word = 0; curr_word < `ICACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word];
assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed
end
end
/////////////////////////////////////////////////////////////////////////
// Front-end to Back-end
VX_frE_to_bckE_req_inter VX_bckE_req(); // New instruction request to EXE/MEM
// Back-end to Front-end
VX_wb_inter VX_writeback_inter(); // Writeback to GPRs
VX_branch_response_inter VX_branch_rsp(); // Branch Resolution to Fetch
VX_jal_response_inter VX_jal_rsp(); // Jump resolution to Fetch
// CSR Buses
// VX_csr_write_request_inter VX_csr_w_req();
VX_warp_ctl_inter VX_warp_ctl();
VX_front_end vx_front_end(
.clk (clk),
.reset (reset),
.VX_warp_ctl (VX_warp_ctl),
.VX_bckE_req (VX_bckE_req),
.schedule_delay (schedule_delay),
.icache_response_fe (icache_response_fe),
.icache_request_fe (icache_request_fe),
.VX_jal_rsp (VX_jal_rsp),
.VX_branch_rsp (VX_branch_rsp),
.fetch_ebreak (out_ebreak)
);
VX_scheduler schedule(
.clk (clk),
.reset (reset),
.memory_delay (memory_delay),
.gpr_stage_delay (gpr_stage_delay),
.VX_bckE_req (VX_bckE_req),
.VX_writeback_inter(VX_writeback_inter),
.schedule_delay (schedule_delay)
);
VX_back_end vx_back_end(
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.VX_warp_ctl (VX_warp_ctl),
.VX_bckE_req (VX_bckE_req),
.VX_jal_rsp (VX_jal_rsp),
.VX_branch_rsp (VX_branch_rsp),
.VX_dcache_rsp (VX_dcache_rsp),
.VX_dcache_req (VX_dcache_req),
.VX_writeback_inter (VX_writeback_inter),
.out_mem_delay (memory_delay),
.gpr_stage_delay (gpr_stage_delay)
);
VX_dmem_controller VX_dmem_controller(
.clk (clk),
.reset (reset),
.VX_dram_req_rsp (VX_dram_req_rsp),
.VX_dram_req_rsp_icache (VX_dram_req_rsp_icache),
.VX_icache_req (icache_request_fe),
.VX_icache_rsp (icache_response_fe),
.VX_dcache_req (VX_dcache_req),
.VX_dcache_rsp (VX_dcache_rsp)
);
// VX_csr_handler vx_csr_handler(
// .clk (clk),
// .in_decode_csr_address(decode_csr_address),
// .VX_csr_w_req (VX_csr_w_req),
// .in_wb_valid (VX_writeback_inter.wb_valid[0]),
// .out_decode_csr_data (csr_decode_csr_data)
// );
endmodule // Vortex

View File

@@ -0,0 +1,53 @@
`include "VX_define.v"
module byte_enabled_simple_dual_port_ram
(
input we, clk,
input wire reset,
input wire[4:0] waddr, raddr1, raddr2,
input wire[`NT_M1:0] be,
input wire[`NT_M1:0][31:0] wdata,
output reg[`NT_M1:0][31:0] q1, q2
);
// integer regi;
// integer threadi;
// Thread Byte Bit
logic [`NT_M1:0][3:0][7:0] GPR[31:0];
// initial begin
// for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] = 0;
// end
integer ini;
always@(posedge clk, posedge reset) begin
if (reset) begin
for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] <= 0;
end else if(we) begin
integer thread_ind;
for (thread_ind = 0; thread_ind <= `NT_M1; thread_ind = thread_ind + 1) begin
if(be[thread_ind]) GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
if(be[thread_ind]) GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
if(be[thread_ind]) GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
if(be[thread_ind]) GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
end
end
// $display("^^^^^^^^^^^^^^^^^^^^^^^");
// for (regi = 0; regi <= 31; regi = regi + 1) begin
// for (threadi = 0; threadi <= `NT_M1; threadi = threadi + 1) begin
// if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]);
// end
// end
end
assign q1 = GPR[raddr1];
assign q2 = GPR[raddr2];
// assign q1 = (raddr1 == waddr && (we)) ? wdata : GPR[raddr1];
// assign q2 = (raddr2 == waddr && (we)) ? wdata : GPR[raddr2];
endmodule

12
old_rtl/cache/Makefile vendored Normal file
View File

@@ -0,0 +1,12 @@
all: RUNFILE
VERILATOR:
verilator --compiler gcc --Wno-UNOPTFLAT -Wall --trace -cc VX_d_cache_encapsulate.v -Irtl --exe d_cache_test_bench.cpp -CFLAGS -std=c++11
RUNFILE: VERILATOR
(cd obj_dir && make -j -f VVX_d_cache_encapsulate.mk)
clean:
rm ./obj_dir/*

46
old_rtl/cache/Notes vendored Normal file
View File

@@ -0,0 +1,46 @@
Notes
8 kB L1 Data Cache | 16 kB L1 I cache (maybe)
[tag index offset_remaining_block bank wordOffset], use a blocksize of 128 bytes between memory and cache. So each bank gets 16 bytes.
total offset is b its
4 bits new offset, 2 bits block, 2 bits word offset
xxxxxxxIIIIIIIIoobbbyy
9876543210
bbbyyyyy
o = index into block offset
b = bank
y = word offset
I = index into cach
6 bits indexes (64 indeces) No ways || 16 indexes with 4 ways
Rest of the bits are tag bits
blocks / banks = 16 bytes, 8 banks. 128 bytes. 256 indexes (height). width is 16 bytes. 4 words per block (per bank). 17 bit tag
gtkwave ___.vcd
// Splitting it up
// word byte
wire[127:0][3:0] data_from_ram;
// word byte bank
wire[15:0][3:0] bank_data_n[3:0]
integer i;
for (i = 0; i < something; i+=8)
{
bank_data_n[0][i/8] = data_from_ram[i+0]
bank_data_n[1][i/8] = data_from_ram[i+1]
bank_data_n[2][i/8] = data_from_ram[i+2]
bank_data_n[3][i/8] = data_from_ram[i+3]
bank_data_n[4][i/8] = data_from_ram[i+4]
bank_data_n[5][i/8] = data_from_ram[i+5]
bank_data_n[6][i/8] = data_from_ram[i+6]
bank_data_n[7][i/8] = data_from_ram[i+7]
}
With Cache. If miss. Go to memory, grab all data, replace that data in the cache. Generate a new request, feed that into the cache (this one will hit), return that

253
old_rtl/cache/VX_Cache_Bank.v vendored Normal file
View File

@@ -0,0 +1,253 @@
// To Do: Change way_id_out to an internal register which holds when in between access and finished.
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
// Also make sure all possible output states are transmitted back to the bank correctly
`include "../VX_define.v"
// `include "VX_cache_data.v"
module VX_Cache_Bank
#(
parameter CACHE_SIZE = 4096, // Bytes
parameter CACHE_WAYS = 1,
parameter CACHE_BLOCK = 128, // Bytes
parameter CACHE_BANKS = 8,
parameter LOG_NUM_BANKS = 3,
parameter NUM_REQ = 8,
parameter LOG_NUM_REQ = 3,
parameter NUM_IND = 8,
parameter CACHE_WAY_INDEX = 1,
parameter NUM_WORDS_PER_BLOCK = 4,
parameter OFFSET_SIZE_START = 0,
parameter OFFSET_SIZE_END = 1,
parameter TAG_SIZE_START = 0,
parameter TAG_SIZE_END = 16,
parameter IND_SIZE_START = 0,
parameter IND_SIZE_END = 7,
parameter ADDR_TAG_START = 15,
parameter ADDR_TAG_END = 31,
parameter ADDR_OFFSET_START = 5,
parameter ADDR_OFFSET_END = 6,
parameter ADDR_IND_START = 7,
parameter ADDR_IND_END = 14
)
(
clk,
rst,
state,
read_or_write, // Read = 0 | Write = 1
i_p_mem_read,
i_p_mem_write,
valid_in,
//write_from_mem,
actual_index,
o_tag,
block_offset,
writedata,
fetched_writedata,
byte_select,
readdata,
hit,
//miss,
eviction_wb, // Need to evict
eviction_addr, // What's the eviction tag
data_evicted,
evicted_way
);
// localparam NUMBER_BANKS = `CACHE_BANKS;
// localparam CACHE_BLOCK_PER_BANK = (`CACHE_BLOCK / `CACHE_BANKS);
// localparam NUM_WORDS_PER_BLOCK = `CACHE_BLOCK / (`CACHE_BANKS*4);
// localparam NUMBER_INDEXES = `NUM_IND;
localparam CACHE_IDLE = 0; // Idle
localparam SEND_MEM_REQ = 1; // Write back this block into memory
localparam RECIV_MEM_RSP = 2;
localparam BLOCK_NUM_BITS = `CLOG2(CACHE_BLOCK);
// Inputs
input wire rst;
input wire clk;
input wire [3:0] state;
//input wire write_from_mem;
// Reading Data
input wire[IND_SIZE_END:IND_SIZE_START] actual_index;
input wire[TAG_SIZE_END:TAG_SIZE_START] o_tag; // When write_from_mem = 1, o_tag is the new tag
input wire[OFFSET_SIZE_END:OFFSET_SIZE_START] block_offset;
input wire[31:0] writedata;
input wire valid_in;
input wire read_or_write; // Specifies if it is a read or write operation
input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] fetched_writedata;
input wire[2:0] i_p_mem_read;
input wire[2:0] i_p_mem_write;
input wire[1:0] byte_select;
input wire[CACHE_WAY_INDEX-1:0] evicted_way;
// Outputs
// Normal shit
output wire[31:0] readdata;
output wire hit;
//output wire miss;
// Eviction Data (Notice)
output wire eviction_wb; // Need to evict
output wire[31:0] eviction_addr; // What's the eviction tag
// Eviction Data (Extraction)
output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_evicted;
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use;
wire[TAG_SIZE_END:TAG_SIZE_START] tag_use;
wire[TAG_SIZE_END:TAG_SIZE_START] eviction_tag;
wire valid_use;
wire dirty_use;
wire access;
wire write_from_mem;
wire miss; // -10/21
wire[CACHE_WAY_INDEX-1:0] way_to_update;
assign miss = (tag_use != o_tag) && valid_use && valid_in;
assign data_evicted = data_use;
// assign eviction_wb = miss && (dirty_use != 1'b0) && valid_use;
assign eviction_wb = (dirty_use != 1'b0);
assign eviction_tag = tag_use;
assign access = (state == CACHE_IDLE) && valid_in;
assign write_from_mem = (state == RECIV_MEM_RSP) && valid_in; // TODO
assign hit = (access && (tag_use == o_tag) && valid_use);
//assign eviction_addr = {eviction_tag, actual_index, block_offset, 5'b0}; // Fix with actual data
assign eviction_addr = {eviction_tag, actual_index, {(BLOCK_NUM_BITS){1'b0}}}; // Fix with actual data
wire lw = (i_p_mem_read == `LW_MEM_READ);
wire lb = (i_p_mem_read == `LB_MEM_READ);
wire lh = (i_p_mem_read == `LH_MEM_READ);
wire lhu = (i_p_mem_read == `LHU_MEM_READ);
wire lbu = (i_p_mem_read == `LBU_MEM_READ);
wire sw = (i_p_mem_write == `SW_MEM_WRITE);
wire sb = (i_p_mem_write == `SB_MEM_WRITE);
wire sh = (i_p_mem_write == `SH_MEM_WRITE);
wire b0 = (byte_select == 0);
wire b1 = (byte_select == 1);
wire b2 = (byte_select == 2);
wire b3 = (byte_select == 3);
wire[31:0] data_unQual = (b0 || lw) ? (data_use[block_offset] ) :
b1 ? (data_use[block_offset] >> 8) :
b2 ? (data_use[block_offset] >> 16) :
(data_use[block_offset] >> 24);
wire[31:0] lb_data = (data_unQual[7] ) ? (data_unQual | 32'hFFFFFF00) : (data_unQual & 32'hFF);
wire[31:0] lh_data = (data_unQual[15]) ? (data_unQual | 32'hFFFF0000) : (data_unQual & 32'hFFFF);
wire[31:0] lbu_data = (data_unQual & 32'hFF);
wire[31:0] lhu_data = (data_unQual & 32'hFFFF);
wire[31:0] lw_data = (data_unQual);
wire[31:0] sw_data = writedata;
wire[31:0] sb_data = b1 ? {{16{1'b0}}, writedata[7:0], { 8{1'b0}}} :
b2 ? {{ 8{1'b0}}, writedata[7:0], {16{1'b0}}} :
b3 ? {{ 0{1'b0}}, writedata[7:0], {24{1'b0}}} :
writedata;
wire[31:0] sh_data = b2 ? {writedata[15:0], {16{1'b0}}} : writedata;
wire[31:0] use_write_data = sb ? sb_data :
sh ? sh_data :
sw_data;
wire[31:0] data_Qual = lb ? lb_data :
lh ? lh_data :
lhu ? lhu_data :
lbu ? lbu_data :
lw_data;
assign readdata = (access) ? data_Qual : 32'b0; // Fix with actual data
wire[3:0] sb_mask = (b0 ? 4'b0001 : (b1 ? 4'b0010 : (b2 ? 4'b0100 : 4'b1000)));
wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we;
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write;
genvar g;
for (g = 0; g < NUM_WORDS_PER_BLOCK; g = g + 1) begin
wire normal_write = (read_or_write && ((access && (block_offset == g))) && !miss);
assign we[g] = (write_from_mem) ? 4'b1111 :
(normal_write && sw) ? 4'b1111 :
(normal_write && sb) ? sb_mask :
(normal_write && sh) ? sh_mask :
4'b0000;
// assign we[g] = (normal_write || (write_from_mem)) ? 1'b1 : 1'b0;
assign data_write[g] = write_from_mem ? fetched_writedata[g] : use_write_data;
assign way_to_update = evicted_way;
end
VX_cache_data_per_index #(
.CACHE_WAYS (CACHE_WAYS),
.NUM_IND (NUM_IND),
.CACHE_WAY_INDEX (CACHE_WAY_INDEX),
.NUM_WORDS_PER_BLOCK(NUM_WORDS_PER_BLOCK),
.TAG_SIZE_START (TAG_SIZE_START),
.TAG_SIZE_END (TAG_SIZE_END),
.IND_SIZE_START (IND_SIZE_START),
.IND_SIZE_END (IND_SIZE_END)) data_structures(
.clk (clk),
.rst (rst),
.valid_in (valid_in),
.state (state),
// Inputs
.addr (actual_index),
.we (we),
.evict (write_from_mem),
.data_write (data_write),
.tag_write (o_tag),
.way_to_update(way_to_update),
// Outputs
.tag_use (tag_use),
.data_use (data_use),
.valid_use (valid_use),
.dirty_use (dirty_use)
);
endmodule

30
old_rtl/cache/VX_cache_bank_valid.v vendored Normal file
View File

@@ -0,0 +1,30 @@
`include "../VX_define.v"
module VX_cache_bank_valid
#(
parameter NUMBER_BANKS = 8,
parameter LOG_NUM_BANKS = 3,
parameter NUM_REQ = 1
)
(
input wire [NUM_REQ-1:0] i_p_valid,
input wire [NUM_REQ-1:0][31:0] i_p_addr,
output reg [NUMBER_BANKS - 1 : 0][NUM_REQ-1:0] thread_track_banks
);
generate
integer t_id;
always @(*) begin
thread_track_banks = 0;
for (t_id = 0; t_id < NUM_REQ; t_id = t_id + 1)
begin
if (NUMBER_BANKS != 1) begin
thread_track_banks[i_p_addr[t_id][2+LOG_NUM_BANKS-1:2]][t_id] = i_p_valid[t_id];
end else begin
thread_track_banks[0][t_id] = i_p_valid[t_id];
end
end
end
endgenerate
endmodule

233
old_rtl/cache/VX_cache_data.v vendored Normal file
View File

@@ -0,0 +1,233 @@
`include "../VX_define.v"
module VX_cache_data
#(
parameter NUM_IND = 8,
parameter NUM_WORDS_PER_BLOCK = 4,
parameter TAG_SIZE_START = 0,
parameter TAG_SIZE_END = 16,
parameter IND_SIZE_START = 0,
parameter IND_SIZE_END = 7
)
(
input wire clk, rst, // Clock
// `ifdef PARAM
// Addr
input wire[IND_SIZE_END:IND_SIZE_START] addr,
// WE
input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
input wire evict,
// Data
input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write,
input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write,
output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use,
output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
output wire valid_use,
output wire dirty_use
// `else
// // Addr
// input wire[7:0] addr,
// // WE
// input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
// input wire evict,
// // Data
// input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data
// input wire[16:0] tag_write,
// output wire[16:0] tag_use,
// output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
// output wire valid_use,
// output wire dirty_use
// `endif
);
//localparam NUMBER_BANKS = CACHE_BANKS;
//localparam CACHE_BLOCK_PER_BANK = (CACHE_BLOCK / CACHE_BANKS);
// localparam NUM_WORDS_PER_BLOCK = CACHE_BLOCK / (CACHE_BANKS*4);
//localparam NUMBER_INDEXES = NUM_IND;
wire currently_writing = (|we);
wire update_dirty = ((!dirty_use) && currently_writing) || (evict);
wire dirt_new = evict ? 0 : (|we);
`ifndef SYN
// (3:0) 4 bytes
reg[NUM_WORDS_PER_BLOCK-1:0][3:0][7:0] data[NUM_IND-1:0]; // Actual Data
reg[TAG_SIZE_END:TAG_SIZE_START] tag[NUM_IND-1:0];
reg valid[NUM_IND-1:0];
reg dirty[NUM_IND-1:0];
// 16 bytes
assign data_use = data[addr]; // Read Port
assign tag_use = tag[addr];
assign valid_use = valid[addr];
assign dirty_use = dirty[addr];
integer f;
integer ini_ind;
always @(posedge clk, posedge rst) begin : update_all
if (rst) begin
for (ini_ind = 0; ini_ind < NUM_IND; ini_ind=ini_ind+1) begin
data[ini_ind] <= 0;
tag[ini_ind] <= 0;
valid[ini_ind] <= 0;
dirty[ini_ind] <= 0;
end
end else begin
if (update_dirty) dirty[addr] <= dirt_new; // WRite Port
if (evict) tag[addr] <= tag_write;
if (evict) valid[addr] <= 1;
for (f = 0; f < NUM_WORDS_PER_BLOCK; f = f + 1) begin
if (we[f][0]) data[addr][f][0] <= data_write[f][7 :0 ];
if (we[f][1]) data[addr][f][1] <= data_write[f][15:8 ];
if (we[f][2]) data[addr][f][2] <= data_write[f][23:16];
if (we[f][3]) data[addr][f][3] <= data_write[f][31:24];
end
end
end
`else
wire[IND_SIZE_END:IND_SIZE_START] use_addr = addr;
wire cena = 1;
wire cenb_d = (|we);
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_d = data_write;
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] write_bit_mask_d;
wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_d;
genvar cur_b;
for (cur_b = 0; cur_b < NUM_WORDS_PER_BLOCK; cur_b=cur_b+1) begin
assign write_bit_mask_d[cur_b] = {32{~we[cur_b]}};
end
assign data_use = data_out_d;
// Using ASIC MEM
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 data (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(data_out_d),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena),
.AA(use_addr),
.CLKB(clk),
.CENB(cenb_d),
.WENB(write_bit_mask_d),
.AB(use_addr),
.DB(wdata_d),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
wire[16:0] old_tag;
wire old_valid;
wire old_dirty;
wire[16:0] new_tag = evict ? tag_write : old_tag;
wire new_valid = evict ? 1 : old_valid;
wire new_dirty = update_dirty ? dirt_new : old_dirty;
wire cenb_m = (evict || update_dirty);
wire[19-1:0][31:0] write_bit_mask_m = cenb_m ? 19'b0 : 19'b1;
// Try to fix the error in memory conneciton, modified by Lingjun Zhu on Oct. 28 2019
// wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_m = {new_tag, new_dirty, new_valid};
// wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_m;
wire[19-1:0] wdata_m = {new_tag, new_dirty, new_valid};
wire[19-1:0] data_out_m;
assign {old_tag, old_dirty, old_valid} = data_out_m;
assign dirty_use = old_dirty;
assign valid_use = old_valid;
assign tag_use = old_tag;
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x19_wm0 meta (
.CENYA(),
.AYA(),
.CENYB(),
// .WENYB(),
.AYB(),
.QA(data_out_m),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena),
.AA(use_addr),
.CLKB(clk),
.CENB(cenb_m),
// .WENB(write_bit_mask_m),
.AB(use_addr),
.DB(wdata_m),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
// .TWENB(128'b0),
.TAB(5'b0),
.TDB(19'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
`endif
endmodule

163
old_rtl/cache/VX_cache_data_per_index.v vendored Normal file
View File

@@ -0,0 +1,163 @@
`include "../VX_define.v"
module VX_cache_data_per_index
#(
parameter CACHE_WAYS = 1,
parameter NUM_IND = 8,
parameter CACHE_WAY_INDEX = 1,
parameter NUM_WORDS_PER_BLOCK = 4,
parameter TAG_SIZE_START = 0,
parameter TAG_SIZE_END = 16,
parameter IND_SIZE_START = 0,
parameter IND_SIZE_END = 7
)
(
input wire clk, // Clock
input wire rst,
input wire valid_in,
input wire [3:0] state,
// Addr
input wire[IND_SIZE_END:IND_SIZE_START] addr,
// WE
input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we,
input wire evict,
input wire[CACHE_WAY_INDEX-1:0] way_to_update,
// Data
input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data
input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write,
output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use,
output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use,
output wire valid_use,
output wire dirty_use
);
//localparam NUMBER_BANKS = CACHE_BANKS;
//localparam CACHE_BLOCK_PER_BANK = (CACHE_BLOCK / CACHE_BANKS);
// localparam NUM_WORDS_PER_BLOCK = CACHE_BLOCK / (CACHE_BANKS*4);
//localparam NUMBER_INDEXES = `DCACHE_NUM_IND;
wire [CACHE_WAYS-1:0][TAG_SIZE_END:TAG_SIZE_START] tag_use_per_way;
wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] data_use_per_way;
wire [CACHE_WAYS-1:0] valid_use_per_way;
wire [CACHE_WAYS-1:0] dirty_use_per_way;
wire [CACHE_WAYS-1:0] hit_per_way;
// reg [CACHE_WAY_INDEX-1:0] eviction_way_index;
wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][3:0] we_per_way;
wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] data_write_per_way;
wire [CACHE_WAYS-1:0] write_from_mem_per_way;
wire invalid_found;
wire [CACHE_WAY_INDEX-1:0] way_index;
wire [CACHE_WAY_INDEX-1:0] invalid_index;
localparam CACHE_IDLE = 0; // Idle
localparam SEND_MEM_REQ = 1; // Write back this block into memory
localparam RECIV_MEM_RSP = 2;
if(CACHE_WAYS != 1) begin
VX_generic_priority_encoder #(.N(CACHE_WAYS)) valid_index
(
.valids(~valid_use_per_way),
.index (invalid_index),
.found (invalid_found)
);
VX_generic_priority_encoder #(.N(CACHE_WAYS)) way_indexing
(
.valids(hit_per_way),
.index (way_index),
.found ()
);
end
else begin
assign way_index = 0;
assign invalid_found = (valid_use_per_way == 1'b0) ? 1 : 0;
assign invalid_index = 0;
end
// wire hit = |hit_per_way;
// wire miss = ~hit;
// wire update = |we && !miss;
// wire valid = &valid_use_per_way;
wire[CACHE_WAY_INDEX-1:0] way_use_Qual;
assign way_use_Qual = (state != CACHE_IDLE) ? way_to_update : way_index;
assign tag_use = tag_use_per_way[way_use_Qual];
assign data_use = data_use_per_way[way_use_Qual];
assign valid_use = valid_use_per_way[way_use_Qual];
assign dirty_use = dirty_use_per_way[way_use_Qual];
// assign tag_use = hit ? tag_use_per_way[way_index] : (valid ? tag_use_per_way[eviction_way_index] : (invalid_found ? tag_use_per_way[invalid_index] : 0));
// assign data_use = hit ? data_use_per_way[way_index] : (valid ? data_use_per_way[eviction_way_index] : (invalid_found ? data_use_per_way[invalid_index] : 0));
// assign valid_use = hit ? valid_use_per_way[way_index] : (valid ? valid_use_per_way[eviction_way_index] : (invalid_found ? valid_use_per_way[invalid_index] : 0));
// assign dirty_use = hit ? dirty_use_per_way[way_index] : (valid ? dirty_use_per_way[eviction_way_index] : (invalid_found ? dirty_use_per_way[invalid_index] : 0));
genvar ways;
for(ways=0; ways < CACHE_WAYS; ways = ways + 1) begin : each_way
assign hit_per_way[ways] = ((valid_use_per_way[ways] == 1'b1) && (tag_use_per_way[ways] == tag_write)) ? 1'b1 : 0;
assign write_from_mem_per_way[ways] = evict && (ways == way_use_Qual);
assign we_per_way[ways] = (ways == way_use_Qual) ? (we) : 0;
assign data_write_per_way[ways] = data_write;
// assign hit_per_way[ways] = ((valid_use_per_way[ways] == 1'b1) && (tag_use_per_way[ways] == tag_write)) ? 1'b1 : 0;
// assign we_per_way[ways] = (evict == 1'b1) || (update == 1'b1) ? ((ways == way_use_Qual) ? (we) : 0) : 0;
// assign data_write_per_way[ways] = (evict == 1'b1) || (update == 1'b1) ? ((ways == way_use_Qual) ? data_write : 0) : 0;
// assign write_from_mem_per_way[ways] = (evict == 1'b1) ? ((ways == way_use_Qual) ? 1 : 0) : 0;
VX_cache_data #(
.NUM_IND (NUM_IND),
.NUM_WORDS_PER_BLOCK (NUM_WORDS_PER_BLOCK),
.TAG_SIZE_START (TAG_SIZE_START),
.TAG_SIZE_END (TAG_SIZE_END),
.IND_SIZE_START (IND_SIZE_START),
.IND_SIZE_END (IND_SIZE_END)) data_structures(
.clk (clk),
.rst (rst),
// Inputs
.addr (addr),
.we (we_per_way[ways]),
.evict (write_from_mem_per_way[ways]),
.data_write(data_write_per_way[ways]),
.tag_write (tag_write),
// Outputs
.tag_use (tag_use_per_way[ways]),
.data_use (data_use_per_way[ways]),
.valid_use (valid_use_per_way[ways]),
.dirty_use (dirty_use_per_way[ways])
);
end
// always @(posedge clk or posedge rst) begin
// if (rst) begin
// eviction_way_index <= 0;
// end else begin
// // if((miss && dirty_use && valid_use && !evict && valid_in)) begin // can be either evict or invalid cache entries
// if((state == SEND_MEM_REQ)) begin // can be either evict or invalid cache entries
// if((eviction_way_index+1) == CACHE_WAYS) begin
// eviction_way_index <= 0;
// end else begin
// eviction_way_index <= (eviction_way_index + 1);
// end
// end
// end
// end
endmodule

387
old_rtl/cache/VX_d_cache.v vendored Normal file
View File

@@ -0,0 +1,387 @@
// Cache Memory (8way 4word) //
// i_ means input port //
// o_ means output port //
// _p_ means data exchange with processor //
// _m_ means data exchange with memory //
// TO DO:
// - Send in a response from memory of what the data is from the test bench
`include "../VX_define.v"
//`include "VX_priority_encoder.v"
// `include "VX_Cache_Bank.v"
//`include "cache_set.v"
module VX_d_cache
#(
parameter CACHE_SIZE = 4096, // Bytes
parameter CACHE_WAYS = 1,
parameter CACHE_BLOCK = 128, // Bytes
parameter CACHE_BANKS = 8,
parameter LOG_NUM_BANKS = 3,
parameter NUM_REQ = 8,
parameter LOG_NUM_REQ = 3,
parameter NUM_IND = 8,
parameter CACHE_WAY_INDEX = 1,
parameter NUM_WORDS_PER_BLOCK = 4,
parameter OFFSET_SIZE_START = 0,
parameter OFFSET_SIZE_END = 1,
parameter TAG_SIZE_START = 0,
parameter TAG_SIZE_END = 16,
parameter IND_SIZE_START = 0,
parameter IND_SIZE_END = 7,
parameter ADDR_TAG_START = 15,
parameter ADDR_TAG_END = 31,
parameter ADDR_OFFSET_START = 5,
parameter ADDR_OFFSET_END = 6,
parameter ADDR_IND_START = 7,
parameter ADDR_IND_END = 14,
parameter MEM_ADDR_REQ_MASK = 32'hffffffc0
)
(
clk,
rst,
i_p_addr,
//i_p_byte_en,
i_p_writedata,
i_p_read_or_write, // 0 = Read | 1 = Write
i_p_mem_read,
i_p_mem_write,
i_p_valid,
//i_p_write,
o_p_readdata,
o_p_delay, // 0 = all threads done | 1 = Still threads that need to
o_m_evict_addr,
o_m_read_addr,
o_m_writedata,
o_m_read_or_write, // 0 = Read | 1 = Write
o_m_valid,
i_m_readdata,
i_m_ready
);
//parameter NUMBER_BANKS = `CACHE_BANKS;
//localparam NUM_WORDS_PER_BLOCK = `CACHE_BLOCK / (`CACHE_BANKS*4);
//localparam CACHE_BLOCK_PER_BANK = (`CACHE_BLOCK / `CACHE_BANKS);
localparam CACHE_IDLE = 0; // Idle
localparam SEND_MEM_REQ = 1; // Write back this block into memory
localparam RECIV_MEM_RSP = 2;
//parameter cache_entry = 9;
input wire clk, rst;
input wire [NUM_REQ-1:0] i_p_valid;
input wire [NUM_REQ-1:0][31:0] i_p_addr; // FIXME
input wire [NUM_REQ-1:0][31:0] i_p_writedata;
input wire i_p_read_or_write; //, i_p_write;
output reg [NUM_REQ-1:0][31:0] o_p_readdata;
output wire o_p_delay;
output reg [31:0] o_m_evict_addr; // Address is xxxxxxxxxxoooobbbyy
output reg [31:0] o_m_read_addr;
output reg o_m_valid;
output reg[CACHE_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata;
output reg o_m_read_or_write; //, o_m_write;
input wire[CACHE_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata;
input wire i_m_ready;
input wire[2:0] i_p_mem_read;
input wire[2:0] i_p_mem_write;
// Buffer for final data
reg [NUM_REQ-1:0][31:0] final_data_read;
reg [NUM_REQ-1:0][31:0] new_final_data_read;
wire[NUM_REQ-1:0][31:0] new_final_data_read_Qual;
assign o_p_readdata = new_final_data_read_Qual;
reg[CACHE_WAY_INDEX-1:0] global_way_to_evict;
wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] thread_track_banks; // Valid thread mask per bank
wire[CACHE_BANKS - 1 : 0][LOG_NUM_REQ-1:0] index_per_bank; // Index of thread each bank will try to service
wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] use_mask_per_bank; // A mask of index_per_bank
wire[CACHE_BANKS - 1 : 0] valid_per_bank; // Valid request going to each bank
wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] threads_serviced_per_bank; // Bank successfully serviced per bank
wire[CACHE_BANKS-1:0][31:0] readdata_per_bank; // Data read from each bank
wire[CACHE_BANKS-1:0] hit_per_bank; // Whether each bank got a hit or a miss
wire[CACHE_BANKS-1:0] eviction_wb;
reg[CACHE_BANKS-1:0] eviction_wb_old;
// wire[CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] evicted_way_new;
// reg [CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] evicted_way_old;
// wire[CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] way_used;
// Internal State
reg [3:0] state;
wire[3:0] new_state;
wire[NUM_REQ-1:0] use_valid; // Valid used throught the code
reg[NUM_REQ-1:0] stored_valid; // Saving the threads still left (bank conflict or bank miss)
wire[NUM_REQ-1:0] new_stored_valid; // New stored valid
reg[CACHE_BANKS - 1 : 0][31:0] eviction_addr_per_bank;
reg[31:0] miss_addr;
// reg[31:0] evict_addr;
wire curr_processor_request_valid = (|i_p_valid);
assign use_valid = (stored_valid == 0) ? i_p_valid : stored_valid;
VX_cache_bank_valid #(.NUMBER_BANKS (CACHE_BANKS),
.LOG_NUM_BANKS (LOG_NUM_BANKS),
.NUM_REQ (NUM_REQ)) multip_banks(
.i_p_valid (use_valid),
.i_p_addr (i_p_addr),
.thread_track_banks(thread_track_banks)
);
reg[NUM_REQ-1:0] threads_serviced_Qual;
reg[NUM_REQ-1:0] debug_hit_per_bank_mask[CACHE_BANKS-1:0];
genvar bid;
for (bid = 0; bid < CACHE_BANKS; bid=bid+1)
begin
wire[NUM_REQ-1:0] use_threads_track_banks = thread_track_banks[bid];
wire[LOG_NUM_REQ-1:0] use_thread_index = index_per_bank[bid];
wire use_write_final_data = hit_per_bank[bid];
wire[31:0] use_data_final_data = readdata_per_bank[bid];
VX_priority_encoder_w_mask #(.N(NUM_REQ)) choose_thread(
.valids(use_threads_track_banks),
.mask (use_mask_per_bank[bid]),
.index (index_per_bank[bid]),
.found (valid_per_bank[bid])
);
assign debug_hit_per_bank_mask[bid] = {NUM_REQ{hit_per_bank[bid]}};
assign threads_serviced_per_bank[bid] = use_mask_per_bank[bid] & debug_hit_per_bank_mask[bid];
end
integer test_bid;
always @(*) begin
new_final_data_read = 0;
for (test_bid=0; test_bid < CACHE_BANKS; test_bid=test_bid+1)
begin
if (hit_per_bank[test_bid]) begin
new_final_data_read[index_per_bank[test_bid]] = readdata_per_bank[test_bid];
end
end
end
wire[CACHE_BANKS - 1 : 0] detect_bank_miss;
//assign threads_serviced_Qual = threads_serviced_per_bank[0] | threads_serviced_per_bank[1] |
// threads_serviced_per_bank[2] | threads_serviced_per_bank[3] |
// threads_serviced_per_bank[4] | threads_serviced_per_bank[5] |
// threads_serviced_per_bank[6] | threads_serviced_per_bank[7];
integer bbid;
always @(*) begin
threads_serviced_Qual = 0;
for (bbid = 0; bbid < CACHE_BANKS; bbid=bbid+1)
begin
threads_serviced_Qual = threads_serviced_Qual | threads_serviced_per_bank[bbid];
end
end
genvar tid;
for (tid = 0; tid < NUM_REQ; tid =tid+1)
begin
assign new_final_data_read_Qual[tid] = threads_serviced_Qual[tid] ? new_final_data_read[tid] : final_data_read[tid];
end
assign detect_bank_miss = (valid_per_bank & ~hit_per_bank);
wire delay;
assign delay = (new_stored_valid != 0) || (state != CACHE_IDLE); // add other states
assign o_p_delay = delay;
wire[CACHE_BANKS - 1 : 0][LOG_NUM_REQ-1:0] send_index_to_bank = index_per_bank;
wire[LOG_NUM_BANKS-1:0] miss_bank_index;
wire miss_found;
VX_generic_priority_encoder #(.N(CACHE_BANKS)) get_miss_index
(
.valids(detect_bank_miss),
.index (miss_bank_index),
.found (miss_found)
);
assign new_state = ((state == CACHE_IDLE) && (|detect_bank_miss)) ? SEND_MEM_REQ :
(state == SEND_MEM_REQ) ? RECIV_MEM_RSP :
((state == RECIV_MEM_RSP) && !i_m_ready) ? RECIV_MEM_RSP :
CACHE_IDLE;
// Handle if there is more than one miss
assign new_stored_valid = use_valid & (~threads_serviced_Qual);
wire update_global_way_to_evict = ((state == RECIV_MEM_RSP) && (new_state == CACHE_IDLE)) && (CACHE_WAYS > 1);
///////////////////////////////////////////////////////////////////////
genvar cur_t;
integer init_b;
always @(posedge clk, posedge rst) begin
if (rst) begin
final_data_read <= 0;
// new_final_data_read = 0;
state <= 0;
stored_valid <= 0;
// eviction_addr_per_bank <= 0;
miss_addr <= 0;
// evict_addr <= 0;
// threads_serviced_Qual = 0;
// for (init_b = 0; init_b < NUMBER_BANKS; init_b=init_b+1)
// begin
// debug_hit_per_bank_mask[init_b] <= 0;
// end
// evicted_way_old <= 0;
// eviction_wb_old <= 0;
global_way_to_evict <= 0;
end else begin
global_way_to_evict <= (update_global_way_to_evict) ? (global_way_to_evict+1) : global_way_to_evict;
state <= new_state;
stored_valid <= new_stored_valid;
if (state == CACHE_IDLE) begin
if (miss_found) begin
miss_addr <= i_p_addr[send_index_to_bank[miss_bank_index]];
// evict_addr <= eviction_addr_per_bank[miss_bank_index];
end else begin
miss_addr <= 0;
// evict_addr <= 0;
end
end
final_data_read <= new_final_data_read_Qual;
// evicted_way_old <= evicted_way_new;
// eviction_wb_old <= eviction_wb;
end
end
genvar bank_id;
generate
for (bank_id = 0; bank_id < CACHE_BANKS; bank_id = bank_id + 1)
begin
wire[31:0] bank_addr = (state == SEND_MEM_REQ) ? miss_addr :
(state == RECIV_MEM_RSP) ? miss_addr :
i_p_addr[send_index_to_bank[bank_id]];
// assign evicted_way_new[bank_id] = (state == SEND_MEM_REQ) ? way_used[bank_id] :
// (state == RECIV_MEM_RSP) ? evicted_way_old[bank_id] :
// 0;
wire[1:0] byte_select = bank_addr[1:0];
wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START];
`ifdef SYN_FUNC
wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = 0;
wire[IND_SIZE_END:IND_SIZE_START] cache_index = 0;
`else
wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = bank_addr[ADDR_OFFSET_END:ADDR_OFFSET_START];
wire[IND_SIZE_END:IND_SIZE_START] cache_index = bank_addr[ADDR_IND_END:ADDR_IND_START];
`endif
wire normal_valid_in = valid_per_bank[bank_id];
wire use_valid_in = ((state == RECIV_MEM_RSP) && i_m_ready) ? 1'b1 :
((state == RECIV_MEM_RSP) && !i_m_ready) ? 1'b0 :
((state == SEND_MEM_REQ)) ? 1'b0 :
normal_valid_in;
VX_Cache_Bank #(
.CACHE_SIZE (CACHE_SIZE),
.CACHE_WAYS (CACHE_WAYS),
.CACHE_BLOCK (CACHE_BLOCK),
.CACHE_BANKS (CACHE_BANKS),
.LOG_NUM_BANKS (LOG_NUM_BANKS),
.NUM_REQ (NUM_REQ),
.LOG_NUM_REQ (LOG_NUM_REQ),
.NUM_IND (NUM_IND),
.CACHE_WAY_INDEX (CACHE_WAY_INDEX),
.NUM_WORDS_PER_BLOCK (NUM_WORDS_PER_BLOCK),
.OFFSET_SIZE_START (OFFSET_SIZE_START),
.OFFSET_SIZE_END (OFFSET_SIZE_END),
.TAG_SIZE_START (TAG_SIZE_START),
.TAG_SIZE_END (TAG_SIZE_END),
.IND_SIZE_START (IND_SIZE_START),
.IND_SIZE_END (IND_SIZE_END),
.ADDR_TAG_START (ADDR_TAG_START),
.ADDR_TAG_END (ADDR_TAG_END),
.ADDR_OFFSET_START (ADDR_OFFSET_START),
.ADDR_OFFSET_END (ADDR_OFFSET_END),
.ADDR_IND_START (ADDR_IND_START),
.ADDR_IND_END (ADDR_IND_END)
) bank_structure (
.clk (clk),
.rst (rst),
.state (state),
.valid_in (use_valid_in),
.actual_index (cache_index),
.o_tag (cache_tag),
.block_offset (cache_offset),
.writedata (i_p_writedata[send_index_to_bank[bank_id]]),
.read_or_write (i_p_read_or_write),
.i_p_mem_read (i_p_mem_read),
.i_p_mem_write (i_p_mem_write),
.byte_select (byte_select),
.hit (hit_per_bank[bank_id]),
.readdata (readdata_per_bank[bank_id]), // Data read
.eviction_addr (eviction_addr_per_bank[bank_id]),
.data_evicted (o_m_writedata[bank_id]),
.eviction_wb (eviction_wb[bank_id]), // Something needs to be written back
.fetched_writedata(i_m_readdata[bank_id]), // Data From memory
.evicted_way (global_way_to_evict)
);
end
endgenerate
// Mem Rsp
// Req to mem:
assign o_m_evict_addr = (eviction_addr_per_bank[0]) & MEM_ADDR_REQ_MASK; // Could be anything because tag+index are same
assign o_m_read_addr = miss_addr & MEM_ADDR_REQ_MASK;
assign o_m_valid = (state == SEND_MEM_REQ);
assign o_m_read_or_write = (state == SEND_MEM_REQ) && (|eviction_wb);
//end
endmodule

118
old_rtl/cache/VX_d_cache_encapsulate.v vendored Normal file
View File

@@ -0,0 +1,118 @@
`include "../VX_define.v"
// `define NUM_WORDS_PER_BLOCK 4
module VX_d_cache_encapsulate (
clk,
rst,
i_p_initial_request,
i_p_addr,
i_p_writedata,
i_p_read_or_write,
i_p_valid,
o_p_readdata,
o_p_readdata_valid,
o_p_waitrequest,
o_m_addr,
o_m_writedata,
o_m_read_or_write,
o_m_valid,
i_m_readdata,
i_m_ready
);
parameter NUMBER_BANKS = 8;
//parameter cache_entry = 9;
input wire clk, rst;
input wire i_p_valid[`NT_M1:0];
input wire [31:0] i_p_addr[`NT_M1:0];
input wire i_p_initial_request;
input wire [31:0] i_p_writedata[`NT_M1:0];
input wire i_p_read_or_write;
input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0];
input wire i_m_ready;
output reg [31:0] o_p_readdata[`NT_M1:0];
output reg o_p_readdata_valid[`NT_M1:0] ;
output reg o_p_waitrequest;
output reg [31:0] o_m_addr;
output reg o_m_valid;
output reg [31:0] o_m_writedata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0];
output reg o_m_read_or_write;
// Inter
wire [`NT_M1:0] i_p_valid_inter;
wire [`NT_M1:0][31:0] i_p_addr_inter;
wire [`NT_M1:0][31:0] i_p_writedata_inter;
reg [`NT_M1:0][31:0] o_p_readdata_inter;
reg [`NT_M1:0] o_p_readdata_valid_inter;
reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter;
wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter;
genvar curr_thraed;
for (curr_thraed = 0; curr_thraed < `NT; curr_thraed = curr_thraed + 1) begin
assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed];
assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed];
assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed];
assign o_p_readdata[curr_thraed] = o_p_readdata_inter[curr_thraed];
assign o_p_readdata_valid[curr_thraed] = o_p_readdata_valid_inter[curr_thraed];
end
genvar curr_bank;
genvar curr_word;
for (curr_bank = 0; curr_bank < NUMBER_BANKS; curr_bank = curr_bank + 1) begin
for (curr_word = 0; curr_word < `NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin
assign o_m_writedata[curr_bank][curr_word] = o_m_writedata_inter[curr_bank][curr_word];
assign i_m_readdata_inter[curr_bank][curr_word] = i_m_readdata[curr_bank][curr_word];
end
end
VX_d_cache dcache(
.clk (clk),
.rst (rst),
.i_p_valid (i_p_valid_inter),
.i_p_addr (i_p_addr_inter),
.i_p_initial_request(i_p_initial_request),
.i_p_writedata (i_p_writedata_inter),
.i_p_read_or_write (i_p_read_or_write),
.o_p_readdata (o_p_readdata_inter),
.o_p_readdata_valid (o_p_readdata_valid_inter),
.o_p_waitrequest (o_p_waitrequest),
.o_m_addr (o_m_addr),
.o_m_valid (o_m_valid),
.o_m_writedata (o_m_writedata_inter),
.o_m_read_or_write (o_m_read_or_write),
.i_m_readdata (i_m_readdata_inter),
.i_m_ready (i_m_ready)
);
endmodule

58
old_rtl/cache/VX_d_cache_tb.v vendored Normal file
View File

@@ -0,0 +1,58 @@
`include "VX_define.v"
`include "VX_d_cache.v"
module VX_d_cache_tb;
parameter NUMBER_BANKS = 8;
reg clk, reset, im_ready;
reg [`NT_M1:0] i_p_valid;
reg [`NT_M1:0][13:0] i_p_addr; // FIXME
reg i_p_initial_request;
reg [`NT_M1:0][31:0] i_p_writedata;
reg i_p_read_or_write; //, i_p_write;
reg [`NT_M1:0][31:0] o_p_readdata;
reg [`NT_M1:0] o_p_readdata_valid;
reg o_p_waitrequest;
reg [13:0] o_m_addr; // Only one address is sent out at a time to memory
reg o_m_valid;
reg [(NUMBER_BANKS * 32) - 1:0] o_m_writedata;
reg o_m_read_or_write; //, o_m_write;
reg [(NUMBER_BANKS * 32) - 1:0] i_m_readdata; // Read Data that is passed from the memory module back to the controller
VX_d_cache d_cache(.clk(clk),
.rst(reset),
.i_p_initial_request(i_p_initial_request),
.i_p_addr(i_p_addr),
.i_p_writedata(i_p_writedata),
.i_p_read_or_write(i_p_read_or_write), // 0 = Read | 1 = Write
.i_p_valid(i_p_valid),
.o_p_readdata(o_p_readdata),
.o_p_readdata_valid(o_p_readdata_valid),
.o_p_waitrequest(o_p_waitrequest), // 0 = all threads done | 1 = Still threads that need to
.o_m_addr(o_m_addr),
.o_m_writedata(o_m_writedata),
.o_m_read_or_write(o_m_read_or_write), // 0 = Read | 1 = Write
.o_m_valid(o_m_valid),
.i_m_readdata(i_m_readdata),
.i_m_ready(im_ready)
//cnt_r,
//cnt_w,
//cnt_hit_r,
//cnt_hit_w
);
initial
begin
clk = 0;
reset = 0;
end
always
#5 clk = ! clk;
endmodule

24
old_rtl/cache/VX_generic_pe.v vendored Normal file
View File

@@ -0,0 +1,24 @@
module VX_generic_pe
#(
parameter N = 8
)
(
input wire[N-1:0] valids,
output reg[$clog2(N)-1:0] index,
output reg found
);
parameter my_secret = 0;
integer i;
always @(*) begin
index = 0;
found = 0;
for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
index = i[$clog2(N)-1:0];
found = 1;
end
end
end
endmodule

233
old_rtl/cache/cache_set.v vendored Normal file
View File

@@ -0,0 +1,233 @@
// To Do: Change way_id_out to an internal register which holds when in between access and finished.
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
// Also make sure all possible output states are transmitted back to the bank correctly
// `include "VX_define.v"
module cache_set(clk,
rst,
// These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes
access, // First
find_evict,
write_from_mem,
idle,
// entry,
o_tag,
writedata,
//byte_en,
write,
//word_en,
//way_id_in,
//way_id_out,
readdata,
//wb_addr,
hit,
eviction_wb,
eviction_tag,
//eviction_data,
//modify,
miss
//valid_data
//read_miss
);
parameter cache_entry = 14;
parameter ways_per_set = 4;
input wire clk, rst;
input wire access;
input wire find_evict;
input wire write_from_mem;
input wire idle;
//input wire [cache_entry-1:0] entry;
input wire [1:0] o_tag;
input wire [31:0] writedata;
//input wire [3:0] byte_en;
input wire write; // 0 == False
//input wire [3:0] word_en;
//input wire read_miss;
//input wire [1:0] way_id_in;
//output reg [1:0] way_id_out;
output reg [31:0] readdata;
//output reg [3:0] hit;
output reg hit;
output reg miss;
output wire eviction_wb;
output wire [1:0] eviction_tag;
reg [31:0] eviction_data;
//output wire [22:0] wb_addr;
//output wire modify, valid_data;
//wire [2:0] i_tag;
//wire dirty;
//wire [24-cache_entry:0] write_tag_data;
// Table for one set
reg [2:0] counter; // Determines which to evict
reg valid [ways_per_set-1:0];
reg [1:0] tag [ways_per_set-1:0];
reg clean [ways_per_set-1:0];
reg [31:0] data [ways_per_set-1:0];
assign eviction_wb = miss && clean[counter[1:0]] != 1'b1 && valid[counter[1:0]] == 1'b1;
assign eviction_tag = tag[counter[1:0]];
//assign eviction_data = data[counter[1:0]];
//assign hit = valid_data && (o_tag == i_tag);
//assign modify = valid_data && (o_tag != i_tag) && dirty;
//assign miss = !valid_data || ((o_tag != i_tag) && !dirty);
//assign wb_addr = {i_tag, entry};
always @(posedge clk) begin
if (rst) begin
end
if (find_evict) begin
if (tag[0] == o_tag && valid[0]) begin
readdata <= data[0];
end else if (tag[1] == o_tag && valid[1]) begin
readdata <= data[1];
end else if (tag[2] == o_tag && valid[2]) begin
readdata <= data[2];
end else if (tag[3] == o_tag && valid[3]) begin
readdata <= data[3];
end
end else if (access) begin
//tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12];
counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC
// Hit in First Column
if (tag[0] == o_tag && valid[0]) begin
if (write == 1'b0) begin // if it is a read
if (clean[0] == 1'b1 ) begin
//hit <= 4'b0001;
hit <= 1'b1;
readdata <= data[0];
miss <= 1'b0;
end else begin
//hit <= 4'b0000; // SHOULD PROBABLY TRACK WHERE THIS MISS IS IN A DIFFERENT VARIABLE
hit <= 1'b0;
readdata <= 32'b0;
miss <= 1'b1;
end
end else if (write == 1'b1) begin
data[0] <= writedata;
clean[0] <= 1'b0;
//hit <= 4'b0001;
hit <= 1'b1;
end
end
// Hit in Second Column
else if (tag[1] == o_tag && valid[1]) begin
if (write == 1'b0) begin // if it is a read
if (clean[1] == 1'b1 ) begin
//hit <= 4'b0010;
hit <= 1'b1;
readdata <= data[1];
miss <= 1'b0;
end else begin
//hit <= 4'b0000;
hit <= 1'b0;
readdata <= 32'b0;
miss <= 1'b1;
end
end else if (write == 1'b1) begin
data[1] <= writedata;
clean[1] <= 1'b0;
//hit <= 4'b0010;
hit <= 1'b1;
end
end
// Hit in Third Column
else if (tag[2] == o_tag && valid[2]) begin
if (write == 1'b0) begin // if it is a read
if (clean[2] == 1'b1 ) begin
//hit <= 4'b0100;
hit <= 1'b1;
readdata <= data[2];
miss <= 1'b0;
end else begin
//hit <= 4'b0000;
hit <= 1'b0;
readdata <= 32'b0;
miss <= 1'b1;
end
end else if (write == 1'b1) begin
data[2] <= writedata;
clean[2] <= 1'b0;
//hit <= 4'b0100;
hit <= 1'b1;
end
end
// Hit in Fourth Column
else if (tag[3] == o_tag && valid[3]) begin
if (write == 1'b0) begin // if it is a read
if (clean[3] == 1'b1 ) begin
//hit <= 4'b1000;
hit <= 1'b1;
readdata <= data[3];
miss <= 1'b0;
end else begin
//hit <= 4'b0000;
hit <= 1'b0;
readdata <= 32'b0;
miss <= 1'b1;
end
end else if (write == 1'b1) begin
data[3] <= writedata;
clean[3] <= 1'b0;
//hit <= 4'b1000;
hit <= 1'b1;
end
end
// Miss
else begin
//way_id_out <= counter;
miss <= 1'b1;
if (write == 1'b0) begin // Read Miss
clean[counter[1:0]] <= 1'b1;
data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS
end else if (write == 1'b1) begin // Write Miss
clean[counter[1:0]] <= 1'b1;
data[counter[1:0]] <= writedata;
end
end
end
if (write_from_mem) begin
tag[counter[1:0]] <= o_tag;
valid[counter[1:0]] <= 1'b1;
hit <= 1'b1;
if (write == 1'b0) begin // Read Miss
clean[counter[1:0]] <= 1'b1;
data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS
end else if (write == 1'b1) begin // Write Miss
clean[counter[1:0]] <= 1'b0;
data[counter[1:0]] <= writedata;
end
end
if (idle) begin // Set "way" register equal to invalid value
hit <= 1'b1; // set to know it is ready
miss <= 1'b0;
readdata <= 32'hFFFFFFFF;
end
if (find_evict) begin // Keep "way" value the same !!!! Fix. Need to send back data with matching tag. Also need to ensure evicted data doesnt get lost
if (tag[3] == o_tag && valid[3]) begin
readdata <= data[3];
end else if (tag[1] == o_tag && valid[1]) begin
readdata <= data[1];
end else if (tag[2] == o_tag && valid[2]) begin
readdata <= data[2];
end else if (tag[0] == o_tag && valid[0]) begin
readdata <= data[0];
end else begin
readdata <= eviction_data;
end
hit <= 1'b1;
miss <= 1'b0;
end
counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC
eviction_data <= data[counter[1:0]];
end
endmodule

29
old_rtl/cache/d_cache_test_bench.cpp vendored Normal file
View File

@@ -0,0 +1,29 @@
#include "d_cache_test_bench.h"
//#define NUM_TESTS 46
int main(int argc, char **argv)
{
Verilated::commandArgs(argc, argv);
Verilated::traceEverOn(true);
VX_d_cache v;
bool curr = v.simulate();
//if ( curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl;
//if (!curr) std::cerr << RED << "Test Failed: " << testing << std::endl;
if ( curr) std::cerr << GREEN << "Test Passed: " << std::endl;
if (!curr) std::cerr << RED << "Test Failed: " << std::endl;
return 0;
}

355
old_rtl/cache/d_cache_test_bench.h vendored Normal file
View File

@@ -0,0 +1,355 @@
// C++ libraries
#include <utility>
#include <iostream>
#include <map>
#include <iterator>
#include <iomanip>
#include <fstream>
#include <unistd.h>
#include <vector>
#include <math.h>
#include <algorithm>
#include "VX_define.h"
#include "VVX_d_cache_encapsulate.h"
#include "verilated.h"
#include "d_cache_test_bench_debug.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#endif
// void set_Index (auto & var, int index, int size, auto val)
// {
// int real_shift
// }
class VX_d_cache
{
public:
VX_d_cache();
~VX_d_cache();
bool simulate();
bool operation(int, bool);
VVX_d_cache_encapsulate * vx_d_cache_;
long int curr_cycle;
int stats_total_cycles = 0;
int stats_dram_accesses = 0;
#ifdef VCD_OUTPUT
VerilatedVcdC *m_trace;
#endif
};
VX_d_cache::VX_d_cache() : curr_cycle(0), stats_total_cycles(0), stats_dram_accesses(0)
{
this->vx_d_cache_ = new VVX_d_cache_encapsulate;
#ifdef VCD_OUTPUT
this->m_trace = new VerilatedVcdC;
this->vx_d_cache_->trace(m_trace, 99);
this->m_trace->open("trace.vcd");
#endif
//this->results.open("../results.txt");
}
VX_d_cache::~VX_d_cache()
{
delete this->vx_d_cache_;
#ifdef VCD_OUTPUT
m_trace->close();
#endif
}
bool VX_d_cache::operation(int counter_value, bool do_op) {
if (do_op) {
vx_d_cache_->i_p_initial_request = 1;
} else {
vx_d_cache_->i_p_initial_request = 0;
}
if (counter_value == 0 && do_op) { // Write to bank 1-4 at index 64
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 1;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0x7f6f8f6f;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001008; // bank 2
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3
} else {
vx_d_cache_->i_p_addr[3] = 0x30010010; // bank 4 -- This is serviced 1st, then the other 3 banks are at once
}
}
} else if (counter_value == 1 && do_op) { // Write to bank 4-7 at index 108
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 1;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0xd1d2d2d3;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001c14; // bank 5
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 6
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x30001c1c; // bank 7
} else {
vx_d_cache_->i_p_addr[3] = 0x30001c10; // bank 4
}
}
} else if (counter_value == 2 && do_op) { // Read from bank 1-4 at those indexes
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 0;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0x23232332;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 5
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3
} else {
vx_d_cache_->i_p_addr[3] = 0x30001c1c;; // bank 7
}
}
}
} else if (counter_value == 3 && do_op) { // Write to Bank 1-5 (evictions will need to take place)
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 1;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0;
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1;
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2;
} else {
vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3;
}
}
} else if (counter_value == 4 && do_op) { // Read from addresses that were just overwritten above ^^^
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 0;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0x23232332;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3
} else {
vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5
}
}
}
/* These will check writing multiple threads writing to the same block
} else if (counter_value == 3 && do_op) { // Write to Bank 0
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 1;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0;
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1;
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2;
} else {
vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1
vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3;
}
}
} else if (counter_value == 4 && do_op) { // Read from Bank 0
vx_d_cache_->i_p_initial_request = 1;
vx_d_cache_->i_p_read_or_write = 0;
vx_d_cache_->i_m_ready = 0;
for (int j = 0; j < NT; j++) {
vx_d_cache_->i_p_valid[j] = 1;
vx_d_cache_->i_p_writedata[j] = 0x23232332;
vx_d_cache_->i_m_readdata[j][0] = 1;
if (j == 0) {
vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0
} else if (j == 1) {
vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0
} else if (j == 2) {
vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0
} else {
vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1
}
}
}
*/
// Handle Memory Accesses
unsigned int read_data_from_mem = 0x1111 + counter_value + this->stats_total_cycles;
if (vx_d_cache_->o_m_valid) {
this->stats_dram_accesses = this->stats_dram_accesses + 1; // (assuming memory access takes 20 cycles)
this->stats_total_cycles += 1;
vx_d_cache_->clk = 0;
vx_d_cache_->eval();
#ifdef VCD_OUTPUT
m_trace->dump(2*this->stats_total_cycles);
#endif
vx_d_cache_->clk = 1;
vx_d_cache_->eval();
#ifdef VCD_OUTPUT
m_trace->dump((2*this->stats_total_cycles)+1);
#endif
vx_d_cache_->i_m_ready = 1;
for (int j1 = 0; j1 < 8; j1++) {
for (int j2 = 0; j2 < 4; j2++) {
vx_d_cache_->i_m_readdata[j1][j2] = read_data_from_mem;
}
}
} else {
vx_d_cache_->i_m_ready = 0;
}
if (vx_d_cache_->o_p_waitrequest == 0) {
return true;
} else {
return false;
}
}
bool VX_d_cache::simulate()
{
// this->instruction_file_name = file_to_simulate;
// this->results << "\n****************\t" << file_to_simulate << "\t****************\n";
// this->ProcessFile();
// auto start_time = std::chrono::high_resolution_clock::now();
//static bool stop = false;
//static int counter = 0;
//counter = 0;
//stop = false;
// auto start_time = clock();
vx_d_cache_->clk = 0;
vx_d_cache_->rst = 1;
//vortex->eval();
//counter = 0;
vx_d_cache_->rst = 0;
bool cont = false;
bool out_operation = false;
bool do_operation = true;
int other_counter = 0;
//while (this->stop && ((other_counter < 5)))
while (other_counter < 5)
{
// std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n";
// istop = ibus_driver();
// dstop = !dbus_driver();
vx_d_cache_->clk = 1;
vx_d_cache_->eval();
#ifdef VCD_OUTPUT
m_trace->dump(2*this->stats_total_cycles);
#endif
//vortex->eval();
//dstop = !dbus_driver();
out_operation = operation(other_counter, do_operation);
vx_d_cache_->clk = 0;
vx_d_cache_->eval();
#ifdef VCD_OUTPUT
m_trace->dump((2*this->stats_total_cycles)+1);
#endif
//vortex->eval();
/*
// stop = istop && dstop;
stop = vortex->out_ebreak;
if (stop || cont)
{
cont = true;
counter++;
} else
{
counter = 0;
}
*/
if (out_operation) {
other_counter++;
do_operation = true;
} else {
do_operation = false;
}
++(this->stats_total_cycles);
if (this->stats_total_cycles > 5000) {
break;
}
}
std::cerr << "New Total Cycles: " << (this->stats_total_cycles + (this->stats_dram_accesses * 20)) << "\n";
//uint32_t status;
//ram.getWord(0, &status);
//this->print_stats();
return (true);
}

View File

@@ -0,0 +1 @@
#define VCD_OUTPUT

View File

@@ -0,0 +1,18 @@
`include "../VX_define.v"
`ifndef VX_BRANCH_RSP
`define VX_BRANCH_RSP
interface VX_branch_response_inter ();
wire valid_branch;
wire branch_dir;
wire[31:0] branch_dest;
wire[`NW_M1:0] branch_warp_num;
endinterface
`endif

View File

@@ -0,0 +1,24 @@
`include "../VX_define.v"
`ifndef VX_CSR_REQ
`define VX_CSR_REQ
interface VX_csr_req_inter ();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[4:0] rd;
wire[1:0] wb;
wire[4:0] alu_op;
wire is_csr;
wire[11:0] csr_address;
wire csr_immed;
wire[31:0] csr_mask;
endinterface
`endif

View File

@@ -0,0 +1,21 @@
`include "../VX_define.v"
`ifndef VX_CSR_WB_REQ
`define VX_CSR_WB_REQ
interface VX_csr_wb_inter ();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[4:0] rd;
wire[1:0] wb;
wire[`NT_M1:0][31:0] csr_result;
endinterface
`endif

View File

@@ -0,0 +1,19 @@
`include "../VX_define.v"
`ifndef VX_DCACHE_REQ
`define VX_DCACHE_REQ
interface VX_dcache_request_inter ();
wire[`NT_M1:0][31:0] out_cache_driver_in_address;
wire[2:0] out_cache_driver_in_mem_read;
wire[2:0] out_cache_driver_in_mem_write;
wire[`NT_M1:0] out_cache_driver_in_valid;
wire[`NT_M1:0][31:0] out_cache_driver_in_data;
endinterface
`endif

View File

@@ -0,0 +1,16 @@
`include "../VX_define.v"
`ifndef VX_DCACHE_RSP
`define VX_DCACHE_RSP
interface VX_dcache_response_inter ();
wire[`NT_M1:0][31:0] in_cache_driver_out_data;
wire delay;
endinterface
`endif

View File

@@ -0,0 +1,27 @@
`include "../VX_define.v"
`ifndef VX_DRAM_REQ_RSP_INTER
`define VX_DRAM_REQ_RSP_INTER
interface VX_dram_req_rsp_inter #(
parameter NUMBER_BANKS = 8,
parameter NUM_WORDS_PER_BLOCK = 4) ();
// Req
wire [31:0] o_m_evict_addr;
wire [31:0] o_m_read_addr;
wire o_m_valid;
wire[NUMBER_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata;
wire o_m_read_or_write;
// Rsp
wire[NUMBER_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata;
wire i_m_ready;
endinterface
`endif

View File

@@ -0,0 +1,51 @@
`include "../VX_define.v"
`ifndef VX_EXE_UNIT_REQ_INTER
`define VX_EXE_UNIT_REQ_INTER
interface VX_exec_unit_req_inter ();
// Meta
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[31:0] curr_PC;
wire[31:0] PC_next;
// Write Back Info
wire[4:0] rd;
wire[1:0] wb;
// Data and alu op
wire[`NT_M1:0][31:0] a_reg_data;
wire[`NT_M1:0][31:0] b_reg_data;
wire[4:0] alu_op;
wire[4:0] rs1;
wire[4:0] rs2;
wire rs2_src;
wire[31:0] itype_immed;
wire[19:0] upper_immed;
// Branch type
wire[2:0] branch_type;
// Jal info
wire jalQual;
wire jal;
wire[31:0] jal_offset;
/* verilator lint_off UNUSED */
wire ebreak;
wire wspawn;
/* verilator lint_on UNUSED */
// CSR info
wire is_csr;
wire[11:0] csr_address;
wire csr_immed;
wire[31:0] csr_mask;
endinterface
`endif

View File

@@ -0,0 +1,46 @@
`include "../VX_define.v"
`ifndef VX_FrE_to_BE_INTER
`define VX_FrE_to_BE_INTER
interface VX_frE_to_bckE_req_inter ();
wire[11:0] csr_address;
wire is_csr;
wire csr_immed;
wire[31:0] csr_mask;
wire[4:0] rd;
wire[4:0] rs1;
wire[4:0] rs2;
wire[4:0] alu_op;
wire[1:0] wb;
wire rs2_src;
wire[31:0] itype_immed;
wire[2:0] mem_read;
wire[2:0] mem_write;
wire[2:0] branch_type;
wire[19:0] upper_immed;
wire[31:0] curr_PC;
/* verilator lint_off UNUSED */
wire ebreak;
/* verilator lint_on UNUSED */
wire jalQual;
wire jal;
wire[31:0] jal_offset;
wire[31:0] PC_next;
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
// GPGPU stuff
wire is_wspawn;
wire is_tmc;
wire is_split;
wire is_barrier;
endinterface
`endif

View File

@@ -0,0 +1,18 @@
`include "../VX_define.v"
`ifndef VX_GPR_CLONE_INTER
`define VX_GPR_CLONE_INTER
interface VX_gpr_clone_inter ();
/* verilator lint_off UNUSED */
wire is_clone;
wire[`NW_M1:0] warp_num;
/* verilator lint_on UNUSED */
endinterface
`endif

View File

@@ -0,0 +1,14 @@
`include "../VX_define.v"
`ifndef VX_gpr_data_INTER
`define VX_gpr_data_INTER
interface VX_gpr_data_inter ();
wire[`NT_M1:0][31:0] a_reg_data;
wire[`NT_M1:0][31:0] b_reg_data;
endinterface
`endif

View File

@@ -0,0 +1,14 @@
`include "../VX_define.v"
`ifndef VX_GPR_JAL_INTER
`define VX_GPR_JAL_INTER
interface VX_gpr_jal_inter ();
wire is_jal;
wire[31:0] curr_PC;
endinterface
`endif

View File

@@ -0,0 +1,17 @@
`include "../VX_define.v"
`ifndef VX_GPR_READ
`define VX_GPR_READ
interface VX_gpr_read_inter ();
wire[4:0] rs1;
wire[4:0] rs2;
wire[`NW_M1:0] warp_num;
endinterface
`endif

View File

@@ -0,0 +1,18 @@
`include "../VX_define.v"
`ifndef VX_GPR_WSPAWN_INTER
`define VX_GPR_WSPAWN_INTER
interface VX_gpr_wspawn_inter ();
/* verilator lint_off UNUSED */
wire is_wspawn;
wire[`NW_M1:0] which_wspawn;
// wire[`NW_M1:0] warp_num;
/* verilator lint_on UNUSED */
endinterface
`endif

View File

@@ -0,0 +1,27 @@
`include "../VX_define.v"
`ifndef VX_GPU_INST_REQ_IN
`define VX_GPU_INST_REQ_IN
interface VX_gpu_inst_req_inter();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire is_wspawn;
wire is_tmc;
wire is_split;
wire is_barrier;
wire[31:0] pc_next;
wire[`NT_M1:0][31:0] a_reg_data;
wire[31:0] rd2;
endinterface
`endif

View File

@@ -0,0 +1,19 @@
`include "../VX_define.v"
`ifndef VX_ICACHE_REQ
`define VX_ICACHE_REQ
interface VX_icache_request_inter ();
wire[31:0] pc_address;
wire[2:0] out_cache_driver_in_mem_read;
wire[2:0] out_cache_driver_in_mem_write;
wire out_cache_driver_in_valid;
wire[31:0] out_cache_driver_in_data;
endinterface
`endif

View File

@@ -0,0 +1,18 @@
`include "../VX_define.v"
`ifndef VX_ICACHE_RSP
`define VX_ICACHE_RSP
interface VX_icache_response_inter ();
// wire ready;
// wire stall;
wire[31:0] instruction;
wire delay;
endinterface
`endif

View File

@@ -0,0 +1,21 @@
`include "../VX_define.v"
`ifndef VX_EXEC_UNIT_WB_INST_INTER
`define VX_EXEC_UNIT_WB_INST_INTER
interface VX_inst_exec_wb_inter ();
wire[`NT_M1:0][31:0] alu_result;
wire[31:0] exec_wb_pc;
wire[4:0] rd;
wire[1:0] wb;
wire[`NT_M1:0] wb_valid;
wire[`NW_M1:0] wb_warp_num;
endinterface
`endif

View File

@@ -0,0 +1,21 @@
`include "../VX_define.v"
`ifndef VX_MEM_WB_INST_INTER
`define VX_MEM_WB_INST_INTER
interface VX_inst_mem_wb_inter ();
wire[`NT_M1:0][31:0] loaded_data;
wire[31:0] mem_wb_pc;
wire[4:0] rd;
wire[1:0] wb;
wire[`NT_M1:0] wb_valid;
wire[`NW_M1:0] wb_warp_num;
endinterface
`endif

View File

@@ -0,0 +1,16 @@
`include "../VX_define.v"
`ifndef VX_F_D_INTER
`define VX_F_D_INTER
interface VX_inst_meta_inter ();
wire[31:0] instruction;
wire[31:0] inst_pc;
wire[`NW_M1:0] warp_num;
wire[`NT_M1:0] valid;
endinterface
`endif

View File

@@ -0,0 +1,17 @@
`include "../VX_define.v"
`ifndef VX_JAL_RSP
`define VX_JAL_RSP
interface VX_jal_response_inter ();
wire jal;
wire[31:0] jal_dest;
wire[`NW_M1:0] jal_warp_num;
endinterface
`endif

View File

@@ -0,0 +1,17 @@
`include "../VX_define.v"
`ifndef VX_JOIN_INTER
`define VX_JOIN_INTER
interface VX_join_inter ();
wire is_join;
wire[`NW_M1:0] join_warp_num;
endinterface
`endif

View File

@@ -0,0 +1,24 @@
`include "../VX_define.v"
`ifndef VX_LSU_REQ_INTER
`define VX_LSU_REQ_INTER
interface VX_lsu_req_inter ();
wire[`NT_M1:0] valid;
wire[31:0] lsu_pc;
wire[`NW_M1:0] warp_num;
wire[`NT_M1:0][31:0] store_data;
wire[`NT_M1:0][31:0] base_address; // A reg data
wire[31:0] offset; // itype_immed
wire[2:0] mem_read;
wire[2:0] mem_write;
wire[4:0] rd;
wire[1:0] wb;
endinterface
`endif

View File

@@ -0,0 +1,28 @@
`include "../VX_define.v"
`ifndef VX_MEM_REQ_IN
`define VX_MEM_REQ_IN
interface VX_mem_req_inter ();
wire[`NT_M1:0][31:0] alu_result;
wire[2:0] mem_read;
wire[2:0] mem_write;
wire[4:0] rd;
wire[1:0] wb;
wire[4:0] rs1;
wire[4:0] rs2;
wire[`NT_M1:0][31:0] rd2;
wire[31:0] PC_next;
wire[31:0] curr_PC;
wire[31:0] branch_offset;
wire[2:0] branch_type;
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
endinterface
`endif

View File

@@ -0,0 +1,22 @@
`include "../VX_define.v"
`ifndef VX_MW_WB_INTER
`define VX_MW_WB_INTER
interface VX_mw_wb_inter ();
wire[`NT_M1:0][31:0] alu_result;
wire[`NT_M1:0][31:0] mem_result;
wire[4:0] rd;
wire[1:0] wb;
wire[31:0] PC_next;
wire[`NT_M1:0] valid;
wire [`NW_M1:0] warp_num;
endinterface
`endif

View File

@@ -0,0 +1,36 @@
`include "../VX_define.v"
`ifndef VX_WARP_CTL_INTER
`define VX_WARP_CTL_INTER
interface VX_warp_ctl_inter ();
wire[`NW_M1:0] warp_num;
wire change_mask;
wire[`NT_M1:0] thread_mask;
wire wspawn;
wire[31:0] wspawn_pc;
wire[`NW-1:0] wspawn_new_active;
wire ebreak;
// barrier
wire is_barrier;
wire[31:0] barrier_id;
wire[$clog2(`NW):0] num_warps;
wire is_split;
wire dont_split;
wire[`NW_M1:0] split_warp_num;
wire[`NT_M1:0] split_new_mask;
wire[`NT_M1:0] split_later_mask;
wire[31:0] split_save_pc;
endinterface
`endif

View File

@@ -0,0 +1,21 @@
`include "../VX_define.v"
`ifndef VX_WB_INTER
`define VX_WB_INTER
interface VX_wb_inter ();
wire[`NT_M1:0][31:0] write_data;
wire[31:0] wb_pc;
wire[4:0] rd;
wire[1:0] wb;
wire[`NT_M1:0] wb_valid;
wire[`NW_M1:0] wb_warp_num;
endinterface
`endif

View File

@@ -0,0 +1,15 @@
`include "../VX_define.v"
`ifndef VX_WSTALL_INTER
`define VX_WSTALL_INTER
interface VX_wstall_inter();
wire wstall;
wire[`NW_M1:0] warp_num;
endinterface
`endif

124
old_rtl/modelsim/Makefile Normal file
View File

@@ -0,0 +1,124 @@
ALL:sim
#TOOL INPUT
SRC = \
vortex_dpi.cpp \
vortex_tb.v \
../VX_define.v \
../VX_define_synth.v \
../interfaces/VX_branch_response_inter.v \
../interfaces/VX_csr_req_inter.v \
../interfaces/VX_csr_wb_inter.v \
../interfaces/VX_dcache_request_inter.v \
../interfaces/VX_dcache_response_inter.v \
../interfaces/VX_dram_req_rsp_inter.v \
../interfaces/VX_exec_unit_req_inter.v \
../interfaces/VX_frE_to_bckE_req_inter.v \
../interfaces/VX_gpr_clone_inter.v \
../interfaces/VX_gpr_data_inter.v \
../interfaces/VX_gpr_jal_inter.v \
../interfaces/VX_gpr_read_inter.v \
../interfaces/VX_gpr_wspawn_inter.v \
../interfaces/VX_gpu_inst_req_inter.v \
../interfaces/VX_icache_request_inter.v \
../interfaces/VX_icache_response_inter.v \
../interfaces/VX_inst_exec_wb_inter.v \
../interfaces/VX_inst_mem_wb_inter.v \
../interfaces/VX_inst_meta_inter.v \
../interfaces/VX_jal_response_inter.v \
../interfaces/VX_join_inter.v \
../interfaces/VX_lsu_req_inter.v \
../interfaces/VX_mem_req_inter.v \
../interfaces/VX_mw_wb_inter.v \
../interfaces/VX_warp_ctl_inter.v \
../interfaces/VX_wb_inter.v \
../interfaces/VX_wstall_inter.v \
../VX_alu.v \
../VX_back_end.v \
../VX_csr_handler.v \
../VX_csr_wrapper.v \
../VX_decode.v \
../VX_dmem_controller.v \
../VX_execute_unit.v \
../VX_fetch.v \
../VX_front_end.v \
../VX_generic_priority_encoder.v \
../VX_generic_register.v \
../VX_generic_stack.v \
../VX_gpgpu_inst.v \
../VX_gpr.v \
../VX_gpr_stage.v \
../VX_gpr_wrapper.v \
../VX_inst_multiplex.v \
../VX_lsu.v \
../VX_lsu_addr_gen.v \
../VX_priority_encoder.v \
../VX_priority_encoder_w_mask.v \
../VX_scheduler.v \
../VX_warp.v \
../VX_countones.v \
../VX_warp_scheduler.v \
../VX_writeback.v \
../Vortex.v \
../byte_enabled_simple_dual_port_ram.v \
../cache/VX_Cache_Bank.v \
../cache/VX_cache_bank_valid.v \
../cache/VX_cache_data.v \
../cache/VX_d_cache.v \
../cache/VX_generic_pe.v \
../cache/cache_set.v \
../cache/VX_cache_data_per_index.v \
../pipe_regs/VX_d_e_reg.v \
../pipe_regs/VX_f_d_reg.v \
../shared_memory/VX_bank_valids.v \
../shared_memory/VX_priority_encoder_sm.v \
../shared_memory/VX_shared_memory.v \
../shared_memory/VX_shared_memory_block.v \
../../models/memory/cln28hpm/rf2_128x128_wm1/rf2_128x128_wm1.v \
../../models/memory/cln28hpm/rf2_256x128_wm1/rf2_256x128_wm1.v \
../../models/memory/cln28hpm/rf2_256x19_wm0/rf2_256x19_wm0.v \
../../models/memory/cln28hpm/rf2_32x128_wm1/rf2_32x128_wm1.v \
../../models/memory/cln28hpm/rf2_32x19_wm0/rf2_32x19_wm0.v
# ../../models/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.v
# vortex_dpi.h
CMD= \
-do "VoptFlow = 0; \
vcd file vortex.vcd; \
vcd add -r /vortex_tb/*; \
vcd add -r /vortex/*; \
run -all; \
quit -f"
OPT=-sv -sv12compat
LIB = vortex_lib
# LOG=-logfile vortex_tb.log
LOG=
# setup: source cshrc.modelsim
# vlib
lib:
vlib vortex_lib
comp:
vlog $(OPT) -work $(LIB) $(SRC)
# vlog -O0 -dpiheader vortex_dpi.h $(OPT) -work $(LIB) $(SRC)
sim: comp
# vsim vortex_tb $(LOG) -c -lib $(LIB) $(CMD) > vortex_sim.log
vsim -novopt vortex_tb $(LOG) -c -lib $(LIB) $(CMD) > vortex_sim.log

View File

@@ -0,0 +1,8 @@
setenv PATH "${PATH}:/tools/mentor/modelsim/ms106a/modeltech/bin"
setenv MTI_VCO_MODE 1
if (${?LM_LICENSE_FILE}) then
setenv LM_LICENSE_FILE "1717@ece-linlic.ece.gatech.edu:${LM_LICENSE_FILE}"
else
setenv LM_LICENSE_FILE "1717@ece-linlic.ece.gatech.edu"
endif
setenv MGLS_LICENSE_FILE 1717@ece-linlic.ece.gatech.edu

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,328 @@
// #include <iostream>
// #include "VX_define.h"
#include <../simulate/ram.h>
#include <stdio.h>
#include <math.h>
#include "svdpi.h"
#include "../simulate/VX_define.h"
// #include "vortex_dpi.h"
extern "C" {
void load_file (char * filename);
void ibus_driver (bool clk, unsigned o_m_read_addr, unsigned o_m_evict_addr, bool o_m_valid, svLogicVecVal * o_m_writedata, bool o_m_read_or_write, unsigned cache_banks, unsigned num_words_per_block, svLogicVecVal * i_m_readdata, bool * i_m_ready);
void dbus_driver (bool clk, unsigned o_m_read_addr, unsigned o_m_evict_addr, bool o_m_valid, svLogicVecVal * o_m_writedata, bool o_m_read_or_write, unsigned cache_banks, unsigned num_words_per_block, svLogicVecVal * i_m_readdata, bool * i_m_ready);
void io_handler (bool clk, bool io_valid, unsigned io_data);
void gracefulExit(int);
}
RAM ram;
bool refill;
unsigned refill_addr;
bool i_refill;
unsigned i_refill_addr;
unsigned num_cycles;
unsigned getIndex(int, int, int);
unsigned calculate_bits_per_bank_num(int);
unsigned getIndex(int r, int c, int numCols)
{
return (r * numCols) + c;
}
unsigned calculate_bits_per_bank_num(int num)
{
int shifted_num = 0;
for(int i = 0; i < num; i++){
shifted_num = (shifted_num << 1)| 1 ;
}
return shifted_num;
}
void load_file(char * filename)
{
num_cycles = 0;
// printf("\n\n\n\n**********************\n");
// printf("Inside load_file\n");
fprintf(stderr, "\n\n\n\n**********************\n");
loadHexImpl(filename, &ram);
// printf("Filename: %s\n", filename);
refill = false;
i_refill = false;
}
void ibus_driver(bool clk, unsigned o_m_read_addr, unsigned o_m_evict_addr, bool o_m_valid, svLogicVecVal * o_m_writedata, bool o_m_read_or_write, unsigned cache_banks, unsigned num_words_per_block, svLogicVecVal * i_m_readdata, bool * i_m_ready)
{
// Default values
{
s_vpi_vecval * real_i_m_readdata = (s_vpi_vecval *) i_m_readdata;
(*i_m_ready) = false;
for (int i = 0; i < cache_banks; i++)
{
for (int j = 0; j < num_words_per_block; j++)
{
unsigned index = getIndex(i,j, num_words_per_block);
real_i_m_readdata[index].aval = 0x506070;
// svGetArrElemPtr2(i_m_readdata, i, j);
// svPutLogicArrElem2VecVal(i_m_readdata, i, j);
// i_m_readdata[getIndex(i,j, num_words_per_block)] = 0;
}
}
}
if (clk)
{
// Do nothing on positive edge
}
else
{
if (i_refill)
{
// svGetArrElemPtr2((*i_m_readdata), 0,0);
// fprintf(stderr, "--------------------------------\n");
i_refill = false;
*i_m_ready = true;
s_vpi_vecval * real_i_m_readdata = (s_vpi_vecval *) i_m_readdata;
for (int curr_e = 0; curr_e < (cache_banks*num_words_per_block); curr_e++)
{
unsigned new_addr = i_refill_addr + (4*curr_e);
unsigned addr_without_byte = new_addr >> 2;
unsigned bits_per_bank = (int)log2(cache_banks);
// unsigned maskbits_per_bank = calculate_bits_per_bank_num(bits_per_bank);
unsigned maskbits_per_bank = cache_banks - 1;
unsigned bank_num = addr_without_byte & maskbits_per_bank;
unsigned addr_wihtout_bank = addr_without_byte >> bits_per_bank;
unsigned offset_num = addr_wihtout_bank & (num_words_per_block-1);
unsigned value;
ram.getWord(new_addr, &value);
fprintf(stdout, "-------- (%x) i_m_readdata[%d][%d] (%d) = %x\n", new_addr, bank_num, offset_num, curr_e, value);
unsigned index = getIndex(bank_num,offset_num, num_words_per_block);
// fprintf(stderr, "Index: %d (%d, %d) = %x\n", index, bank_num, offset_num, value);
real_i_m_readdata[index].aval = value;
}
}
else
{
if (o_m_valid)
{
s_vpi_vecval * real_o_m_writedata = (s_vpi_vecval *) o_m_writedata;
if (o_m_read_or_write)
{
// fprintf(stderr, "++++++++++++++++++++++++++++++++\n");
for (int curr_e = 0; curr_e < (cache_banks*num_words_per_block); curr_e++)
{
unsigned new_addr = (o_m_evict_addr) + (4*curr_e);
unsigned addr_without_byte = new_addr >> 2;
unsigned bits_per_bank = (int)log2(cache_banks);
// unsigned maskbits_per_bank = calculate_bits_per_bank_num(bits_per_bank);
unsigned maskbits_per_bank = cache_banks - 1;
unsigned bank_num = addr_without_byte & maskbits_per_bank;
unsigned addr_wihtout_bank = addr_without_byte >> bits_per_bank;
unsigned offset_num = addr_wihtout_bank & (num_words_per_block-1);
// unsigned offset_num = addr_wihtout_bank & 0x3;
unsigned index = getIndex(bank_num,offset_num, num_words_per_block);
unsigned new_value = real_o_m_writedata[index].aval;
// new_value = (unsigned *) svGetArrElemPtr2(o_m_writedata, bank_num, offset_num);
// new_value = getElem(o_m_writedata, index);
// unsigned new_value = o_m_writedata[getIndex(bank_num,offset_num, num_words_per_block)];
ram.writeWord( new_addr, &new_value);
fprintf(stdout, "+++++++ (%x) writeback[%d][%d] (%d) = %x\n", new_addr, bank_num, offset_num, curr_e, new_value);
}
}
// Respond next cycle
i_refill = true;
i_refill_addr = o_m_read_addr;
}
}
}
}
void dbus_driver(bool clk, unsigned o_m_read_addr, unsigned o_m_evict_addr, bool o_m_valid, svLogicVecVal * o_m_writedata, bool o_m_read_or_write, unsigned cache_banks, unsigned num_words_per_block, svLogicVecVal * i_m_readdata, bool * i_m_ready)
{
// Default values
{
s_vpi_vecval * real_i_m_readdata = (s_vpi_vecval *) i_m_readdata;
(*i_m_ready) = false;
for (int i = 0; i < cache_banks; i++)
{
for (int j = 0; j < num_words_per_block; j++)
{
unsigned index = getIndex(i,j, num_words_per_block);
real_i_m_readdata[index].aval = 0x506070;
// svGetArrElemPtr2(i_m_readdata, i, j);
// svPutLogicArrElem2VecVal(i_m_readdata, i, j);
// i_m_readdata[getIndex(i,j, num_words_per_block)] = 0;
}
}
}
if (clk)
{
// Do nothing on positive edge
}
else
{
if (refill)
{
// svGetArrElemPtr2((*i_m_readdata), 0,0);
// fprintf(stderr, "--------------------------------\n");
refill = false;
*i_m_ready = true;
s_vpi_vecval * real_i_m_readdata = (s_vpi_vecval *) i_m_readdata;
for (int curr_e = 0; curr_e < (cache_banks*num_words_per_block); curr_e++)
{
unsigned new_addr = refill_addr + (4*curr_e);
unsigned addr_without_byte = new_addr >> 2;
unsigned bits_per_bank = (int)log2(cache_banks);
// unsigned maskbits_per_bank = calculate_bits_per_bank_num(bits_per_bank);
unsigned maskbits_per_bank = cache_banks - 1;
unsigned bank_num = addr_without_byte & maskbits_per_bank;
unsigned addr_wihtout_bank = addr_without_byte >> bits_per_bank;
unsigned offset_num = addr_wihtout_bank & (num_words_per_block-1);
unsigned value;
ram.getWord(new_addr, &value);
fprintf(stdout, "-------- (%x) i_m_readdata[%d][%d] (%d) = %x\n", new_addr, bank_num, offset_num, curr_e, value);
unsigned index = getIndex(bank_num,offset_num, num_words_per_block);
// fprintf(stderr, "Index: %d (%d, %d) = %x\n", index, bank_num, offset_num, value);
real_i_m_readdata[index].aval = value;
}
}
else
{
if (o_m_valid)
{
s_vpi_vecval * real_o_m_writedata = (s_vpi_vecval *) o_m_writedata;
if (o_m_read_or_write)
{
// fprintf(stderr, "++++++++++++++++++++++++++++++++\n");
for (int curr_e = 0; curr_e < (cache_banks*num_words_per_block); curr_e++)
{
unsigned new_addr = (o_m_evict_addr) + (4*curr_e);
unsigned addr_without_byte = new_addr >> 2;
unsigned bits_per_bank = (int)log2(cache_banks);
// unsigned maskbits_per_bank = calculate_bits_per_bank_num(bits_per_bank);
unsigned maskbits_per_bank = cache_banks - 1;
unsigned bank_num = addr_without_byte & maskbits_per_bank;
unsigned addr_wihtout_bank = addr_without_byte >> bits_per_bank;
unsigned offset_num = addr_wihtout_bank & (num_words_per_block-1);
// unsigned offset_num = addr_wihtout_bank & 0x3;
unsigned index = getIndex(bank_num,offset_num, num_words_per_block);
unsigned new_value = real_o_m_writedata[index].aval;
// new_value = (unsigned *) svGetArrElemPtr2(o_m_writedata, bank_num, offset_num);
// new_value = getElem(o_m_writedata, index);
// unsigned new_value = o_m_writedata[getIndex(bank_num,offset_num, num_words_per_block)];
ram.writeWord( new_addr, &new_value);
fprintf(stdout, "+++++++ (%x) writeback[%d][%d] (%d) = %x\n", new_addr, bank_num, offset_num, curr_e, new_value);
}
}
// Respond next cycle
refill = true;
refill_addr = o_m_read_addr;
}
}
}
}
void io_handler(bool clk, bool io_valid, unsigned io_data)
{
// printf("Inside io_handler\n");
if (clk)
{
// Do nothing
}
else
{
if (io_valid)
{
uint32_t data_write = (uint32_t) (io_data);
fprintf(stderr, "%c", (char) data_write);
fflush(stderr);
}
}
}
void gracefulExit(int cycles)
{
fprintf(stderr, "*********************\n\n");
fprintf(stderr, "DPI Cycle Num: %d\tVerilog Cycle Num: %d\n", num_cycles, cycles);
}

View File

@@ -0,0 +1,8 @@
extern "C" {
void load_file (char * filename);
void dbus_driver(bool clk, unsigned o_m_read_addr, unsigned o_m_evict_addr, bool o_m_valid, svLogicVecVal * o_m_writedata, bool o_m_read_or_write, unsigned cache_banks, unsigned num_words_per_block, svLogicVecVal * i_m_readdata, bool * i_m_ready);
void ibus_driver(bool clk, unsigned o_m_read_addr, unsigned o_m_evict_addr, bool o_m_valid, svLogicVecVal * o_m_writedata, bool o_m_read_or_write, unsigned cache_banks, unsigned num_words_per_block, svLogicVecVal * i_m_readdata, bool * i_m_ready);
void io_handler (bool clk, bool io_valid, unsigned io_data);
void gracefulExit();
}

View File

@@ -0,0 +1,160 @@
`include "../VX_define.v"
//`define NUMBER_BANKS 8
//`define NUM_WORDS_PER_BLOCK 4
`define ARM_UD_MODEL
`timescale 1ns/1ps
import "DPI-C" load_file = function void load_file(input string filename);
/*
import "DPI-C" ibus_driver = function void ibus_driver(input logic clk, input int pc_addr,
output int instruction);
*/
import "DPI-C" ibus_driver = function void ibus_driver( input logic clk,
input int o_m_read_addr,
input int o_m_evict_addr,
input logic o_m_valid,
input reg[31:0] o_m_writedata[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0],
input logic o_m_read_or_write,
input int cache_banks,
input int words_per_block,
// Rsp
output reg[31:0] i_m_readdata[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0],
output logic i_m_ready);
import "DPI-C" dbus_driver = function void dbus_driver( input logic clk,
input int o_m_read_addr,
input int o_m_evict_addr,
input logic o_m_valid,
input reg[31:0] o_m_writedata[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0],
input logic o_m_read_or_write,
input int cache_banks,
input int words_per_block,
// Rsp
output reg[31:0] i_m_readdata[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0],
output logic i_m_ready);
import "DPI-C" io_handler = function void io_handler(input logic clk, input logic io_valid, input int io_data);
import "DPI-C" gracefulExit = function void gracefulExit(input int cycle_num);
module vortex_tb (
);
int cycle_num;
reg clk;
reg reset;
reg[31:0] icache_response_instruction;
reg[31:0] icache_request_pc_address;
// IO
reg io_valid;
reg[31:0] io_data;
// Req
reg [31:0] o_m_read_addr_d;
reg [31:0] o_m_evict_addr_d;
reg o_m_valid_d;
reg [31:0] o_m_writedata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0];
reg o_m_read_or_write_d;
// Rsp
reg [31:0] i_m_readdata_d[`DCACHE_BANKS - 1:0][`DCACHE_NUM_WORDS_PER_BLOCK-1:0];
reg i_m_ready_d;
// Req
reg [31:0] o_m_read_addr_i;
reg [31:0] o_m_evict_addr_i;
reg o_m_valid_i;
reg [31:0] o_m_writedata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0];
reg o_m_read_or_write_i;
// Rsp
reg [31:0] i_m_readdata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0];
reg i_m_ready_i;
reg out_ebreak;
reg[31:0] hi;
integer temp;
initial begin
// $fdumpfile("vortex1.vcd");
load_file("../../runtime/mains/simple/vx_simple_main.hex");
// load_file("../../emulator/riscv_tests/rv32ui-p-add.hex");
//load_file("../../kernel/vortex_test.hex");
$dumpvars(0, vortex_tb);
reset = 1;
clk = 0;
#5 reset = 1;
clk = 1;
cycle_num = 0;
end
Vortex vortex(
.clk (clk),
.reset (reset),
.icache_response_instruction (icache_response_instruction),
.icache_request_pc_address (icache_request_pc_address),
.io_valid (io_valid),
.io_data (io_data),
.o_m_read_addr_d (o_m_read_addr_d),
.o_m_evict_addr_d (o_m_evict_addr_d),
.o_m_valid_d (o_m_valid_d),
.o_m_writedata_d (o_m_writedata_d),
.o_m_read_or_write_d (o_m_read_or_write_d),
.i_m_readdata_d (i_m_readdata_d),
.i_m_ready_d (i_m_ready_d),
.o_m_read_addr_i (o_m_read_addr_i),
.o_m_evict_addr_i (o_m_evict_addr_i),
.o_m_valid_i (o_m_valid_i),
.o_m_writedata_i (o_m_writedata_i),
.o_m_read_or_write_i (o_m_read_or_write_i),
.i_m_readdata_i (i_m_readdata_i),
.i_m_ready_i (i_m_ready_i),
.out_ebreak (out_ebreak)
);
always @(negedge clk) begin
ibus_driver(clk, o_m_read_addr_i, o_m_evict_addr_i, o_m_valid_i, o_m_writedata_i, o_m_read_or_write_i, `ICACHE_BANKS, `ICACHE_NUM_WORDS_PER_BLOCK, i_m_readdata_i, i_m_ready_i);
dbus_driver(clk, o_m_read_addr_d, o_m_evict_addr_d, o_m_valid_d, o_m_writedata_d, o_m_read_or_write_d, `DCACHE_BANKS, `DCACHE_NUM_WORDS_PER_BLOCK, i_m_readdata_d, i_m_ready_d);
io_handler (clk, io_valid, io_data);
end
always @(posedge clk) begin
if (out_ebreak) begin
gracefulExit(cycle_num);
#40 $finish;
end
end
always @(posedge clk) begin
cycle_num = cycle_num + 1;
end
always @(clk, posedge reset) begin
if (reset) begin
reset = 0;
clk = 0;
end
#5 clk <= ~clk;
end
endmodule

1084
old_rtl/modelsim/work/_info Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,4 @@
m255
K4
z0
cModel Technology

View File

@@ -0,0 +1,36 @@
`include "../VX_define.v"
module VX_d_e_reg (
input wire clk,
input wire reset,
input wire in_branch_stall,
input wire in_freeze,
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req,
VX_frE_to_bckE_req_inter VX_bckE_req
);
wire stall = in_freeze;
wire flush = (in_branch_stall == `STALL);
VX_generic_register #(.N(233 + `NW_M1 + 1 + `NT)) d_e_reg
(
.clk (clk),
.reset(reset),
.stall(stall),
.flush(flush),
.in ({VX_frE_to_bckE_req.csr_address, VX_frE_to_bckE_req.jalQual, VX_frE_to_bckE_req.ebreak, VX_frE_to_bckE_req.is_csr, VX_frE_to_bckE_req.csr_immed, VX_frE_to_bckE_req.csr_mask, VX_frE_to_bckE_req.rd, VX_frE_to_bckE_req.rs1, VX_frE_to_bckE_req.rs2, VX_frE_to_bckE_req.alu_op, VX_frE_to_bckE_req.wb, VX_frE_to_bckE_req.rs2_src, VX_frE_to_bckE_req.itype_immed, VX_frE_to_bckE_req.mem_read, VX_frE_to_bckE_req.mem_write, VX_frE_to_bckE_req.branch_type, VX_frE_to_bckE_req.upper_immed, VX_frE_to_bckE_req.curr_PC, VX_frE_to_bckE_req.jal, VX_frE_to_bckE_req.jal_offset, VX_frE_to_bckE_req.PC_next, VX_frE_to_bckE_req.valid, VX_frE_to_bckE_req.warp_num, VX_frE_to_bckE_req.is_wspawn, VX_frE_to_bckE_req.is_tmc, VX_frE_to_bckE_req.is_split, VX_frE_to_bckE_req.is_barrier}),
.out ({VX_bckE_req.csr_address , VX_bckE_req.jalQual , VX_bckE_req.ebreak ,VX_bckE_req.is_csr , VX_bckE_req.csr_immed , VX_bckE_req.csr_mask , VX_bckE_req.rd , VX_bckE_req.rs1 , VX_bckE_req.rs2 , VX_bckE_req.alu_op , VX_bckE_req.wb , VX_bckE_req.rs2_src , VX_bckE_req.itype_immed , VX_bckE_req.mem_read , VX_bckE_req.mem_write , VX_bckE_req.branch_type , VX_bckE_req.upper_immed , VX_bckE_req.curr_PC , VX_bckE_req.jal , VX_bckE_req.jal_offset , VX_bckE_req.PC_next , VX_bckE_req.valid , VX_bckE_req.warp_num , VX_bckE_req.is_wspawn , VX_bckE_req.is_tmc , VX_bckE_req.is_split , VX_bckE_req.is_barrier })
);
endmodule

View File

@@ -0,0 +1,28 @@
`include "../VX_define.v"
module VX_f_d_reg (
input wire clk,
input wire reset,
input wire in_freeze,
VX_inst_meta_inter fe_inst_meta_fd,
VX_inst_meta_inter fd_inst_meta_de
);
wire flush = 1'b0;
wire stall = in_freeze == 1'b1;
VX_generic_register #(.N(64 + `NW_M1 + 1 + `NT)) f_d_reg
(
.clk (clk),
.reset(reset),
.stall(stall),
.flush(flush),
.in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}),
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
);
endmodule

70
old_rtl/quartus/Makefile Normal file
View File

@@ -0,0 +1,70 @@
PROJECT = Vortex
TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.v
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N4F45I3SG
# Executable Configuration
SYN_ARGS = --read_settings_files=on
FIT_ARGS = --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --do_report_timing
# Build targets
all: smart.log $(PROJECT).asm.rpt $(PROJECT).sta.rpt
syn: smart.log $(PROJECT).syn.rpt
fit: smart.log $(PROJECT).fit.rpt
asm: smart.log $(PROJECT).asm.rpt
sta: smart.log $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: syn.chg $(SOURCE_FILES)
/tools/reconfig/intel/18.0/quartus/bin/quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: fit.chg $(PROJECT).syn.rpt
/tools/reconfig/intel/18.0/quartus/bin/quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: asm.chg $(PROJECT).fit.rpt
/tools/reconfig/intel/18.0/quartus/bin/quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: sta.chg $(PROJECT).fit.rpt
/tools/reconfig/intel/18.0/quartus/bin/quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
/tools/reconfig/intel/18.0/quartus/bin/quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
/tools/reconfig/intel/18.0/quartus/bin/quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "P;$(PROJECT).sof"
clean:
rm -rf *.rpt *.chg *.qsf *.qpf smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db output_files tmp-clearbox

View File

@@ -0,0 +1,30 @@
# -------------------------------------------------------------------------- #
#
# Copyright (C) 2018 Intel Corporation. All rights reserved.
# Your use of Intel Corporation's design tools, logic functions
# and other software and tools, and its AMPP partner logic
# functions, and any output files from any of the foregoing
# (including device programming or simulation files), and any
# associated documentation or information are expressly subject
# to the terms and conditions of the Intel Program License
# Subscription Agreement, the Intel Quartus Prime License Agreement,
# the Intel FPGA IP License Agreement, or other applicable license
# agreement, including, without limitation, that your use is for
# the sole purpose of programming logic devices manufactured by
# Intel and sold by Intel or its authorized distributors. Please
# refer to the applicable agreement for further details.
#
# -------------------------------------------------------------------------- #
#
# Quartus Prime
# Version 18.0.0 Build 219 04/25/2018 SJ Pro Edition
# Date created = 00:18:19 September 11, 2019
#
# -------------------------------------------------------------------------- #
QUARTUS_VERSION = "18.0"
DATE = "00:18:19 September 11, 2019"
# Revisions
PROJECT_REVISION = "VX_gpr_syn"

View File

@@ -0,0 +1,63 @@
set_global_assignment -name ORIGINAL_QUARTUS_VERSION 18.0.0
set_global_assignment -name PROJECT_CREATION_TIME_DATE "00:18:19 SEPTEMBER 11, 2019"
set_global_assignment -name LAST_QUARTUS_VERSION "18.0.0 Pro Edition"
set_global_assignment -name FAMILY "Arria 10"
set_global_assignment -name DEVICE 10AX115N4F45I3SG
set_global_assignment -name TOP_LEVEL_ENTITY VX_gpr_syn
set_global_assignment -name SEARCH_PATH ../
set_global_assignment -name VERILOG_FILE ../VX_define.v
set_global_assignment -name VERILOG_FILE ../byte_enabled_simple_dual_port_ram.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_branch_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_csr_write_request_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_dcache_request_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_dcache_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_csr_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_exe_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_mem_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_reqeust_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_wb_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_frE_to_bckE_req_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_gpr_clone_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_gpr_jal_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_gpr_read_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_gpr_wspawn_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_icache_request_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_icache_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_inst_mem_wb_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_inst_meta_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_jal_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_mem_req_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_mw_wb_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_warp_ctl_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_wb_inter.v
set_global_assignment -name VERILOG_FILE ../pipe_regs/VX_d_e_reg.v
set_global_assignment -name VERILOG_FILE ../pipe_regs/VX_e_m_reg.v
set_global_assignment -name VERILOG_FILE ../pipe_regs/VX_f_d_reg.v
set_global_assignment -name VERILOG_FILE ../pipe_regs/VX_m_w_reg.v
set_global_assignment -name VERILOG_FILE ../VX_alu.v
set_global_assignment -name VERILOG_FILE ../VX_back_end.v
set_global_assignment -name VERILOG_FILE ../VX_context.v
set_global_assignment -name VERILOG_FILE ../VX_context_slave.v
set_global_assignment -name VERILOG_FILE ../VX_csr_handler.v
set_global_assignment -name VERILOG_FILE ../VX_decode.v
set_global_assignment -name VERILOG_FILE ../VX_execute.v
set_global_assignment -name VERILOG_FILE ../VX_fetch.v
set_global_assignment -name VERILOG_FILE ../VX_forwarding.v
set_global_assignment -name VERILOG_FILE ../VX_front_end.v
set_global_assignment -name VERILOG_FILE ../VX_generic_register.v
set_global_assignment -name VERILOG_FILE ../VX_gpr.v
set_global_assignment -name VERILOG_FILE ../VX_gpr_wrapper.v
set_global_assignment -name VERILOG_FILE ../VX_gpr_syn.v
set_global_assignment -name VERILOG_FILE ../VX_memory.v
set_global_assignment -name VERILOG_FILE ../VX_register_file.v
set_global_assignment -name VERILOG_FILE ../VX_register_file_master_slave.v
set_global_assignment -name VERILOG_FILE ../VX_register_file_slave.v
set_global_assignment -name VERILOG_FILE ../VX_warp.v
set_global_assignment -name VERILOG_FILE ../VX_writeback.v
set_global_assignment -name VERILOG_FILE ../Vortex.v
set_global_assignment -name SDC_FILE vortex.sdc
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL

1
old_rtl/quartus/asm.chg Normal file
View File

@@ -0,0 +1 @@
done

1
old_rtl/quartus/fit.chg Normal file
View File

@@ -0,0 +1 @@
done

1
old_rtl/quartus/map.chg Normal file
View File

@@ -0,0 +1 @@
Wed Sep 11 00:18:22 2019

View File

@@ -0,0 +1,88 @@
package require cmdline
set options { \
{ "project.arg" "" "Project name" } \
{ "family.arg" "" "Device family name" } \
{ "device.arg" "" "Device name" } \
{ "top.arg" "" "Top level module" } \
{ "sdc.arg" "" "Timing Design Constraints file" } \
{ "src.arg" "" "Verilog source file" } \
}
array set opts [::cmdline::getoptions quartus(args) $options]
project_new $opts(project) -overwrite
set_global_assignment -name FAMILY $opts(family)
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name SEARCH_PATH ../
set_global_assignment -name VERILOG_FILE ../VX_define.v
set_global_assignment -name VERILOG_FILE ../byte_enabled_simple_dual_port_ram.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_branch_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_csr_write_request_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_dcache_request_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_dcache_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_csr_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_exe_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_mem_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_reqeust_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_forward_wb_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_frE_to_bckE_req_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_gpr_clone_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_gpr_jal_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_gpr_read_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_gpr_wspawn_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_icache_request_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_icache_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_inst_mem_wb_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_inst_meta_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_jal_response_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_mem_req_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_mw_wb_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_warp_ctl_inter.v
set_global_assignment -name VERILOG_FILE ../interfaces/VX_wb_inter.v
set_global_assignment -name VERILOG_FILE ../pipe_regs/VX_d_e_reg.v
set_global_assignment -name VERILOG_FILE ../pipe_regs/VX_e_m_reg.v
set_global_assignment -name VERILOG_FILE ../pipe_regs/VX_f_d_reg.v
set_global_assignment -name VERILOG_FILE ../pipe_regs/VX_m_w_reg.v
set_global_assignment -name VERILOG_FILE ../VX_alu.v
set_global_assignment -name VERILOG_FILE ../VX_back_end.v
set_global_assignment -name VERILOG_FILE ../VX_context.v
set_global_assignment -name VERILOG_FILE ../VX_context_slave.v
set_global_assignment -name VERILOG_FILE ../VX_csr_handler.v
set_global_assignment -name VERILOG_FILE ../VX_decode.v
set_global_assignment -name VERILOG_FILE ../VX_define.v
set_global_assignment -name VERILOG_FILE ../VX_execute.v
set_global_assignment -name VERILOG_FILE ../VX_fetch.v
set_global_assignment -name VERILOG_FILE ../VX_forwarding.v
set_global_assignment -name VERILOG_FILE ../VX_front_end.v
set_global_assignment -name VERILOG_FILE ../VX_generic_register.v
set_global_assignment -name VERILOG_FILE ../VX_gpr.v
set_global_assignment -name VERILOG_FILE ../VX_gpr_wrapper.v
set_global_assignment -name VERILOG_FILE ../VX_gpr_syn.v
set_global_assignment -name VERILOG_FILE ../VX_memory.v
set_global_assignment -name VERILOG_FILE ../VX_register_file.v
set_global_assignment -name VERILOG_FILE ../VX_register_file_master_slave.v
set_global_assignment -name VERILOG_FILE ../VX_register_file_slave.v
set_global_assignment -name VERILOG_FILE ../VX_warp.v
set_global_assignment -name VERILOG_FILE ../VX_writeback.v
set_global_assignment -name VERILOG_FILE ../Vortex.v
set_global_assignment -name SDC_FILE vortex.sdc
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
project_close
# set_global_assignment -name VERILOG_FILE $opts(src)

27
old_rtl/quartus/smart.log Normal file
View File

@@ -0,0 +1,27 @@
Info (292036): Thank you for using the Quartus Prime software 30-day evaluation. You have 0 days remaining (until Sep 11, 2019) to use the Quartus Prime software with compilation and simulation support.
Info: *******************************************************************
Info: Running Quartus Prime Shell
Info: Version 18.0.0 Build 219 04/25/2018 SJ Pro Edition
Info: Copyright (C) 2018 Intel Corporation. All rights reserved.
Info: Your use of Intel Corporation's design tools, logic functions
Info: and other software and tools, and its AMPP partner logic
Info: functions, and any output files from any of the foregoing
Info: (including device programming or simulation files), and any
Info: associated documentation or information are expressly subject
Info: to the terms and conditions of the Intel Program License
Info: Subscription Agreement, the Intel Quartus Prime License Agreement,
Info: the Intel FPGA IP License Agreement, or other applicable license
Info: agreement, including, without limitation, that your use is for
Info: the sole purpose of programming logic devices manufactured by
Info: Intel and sold by Intel or its authorized distributors. Please
Info: refer to the applicable agreement for further details.
Info: Processing started: Wed Sep 11 00:18:22 2019
Info: Command: quartus_sh --determine_smart_action VX_gpr_syn
Info: Quartus(args): VX_gpr_syn
Info: SMART_ACTION = SOURCE
Info (23030): Evaluation of Tcl script /tools/reconfig/intel/18.0/quartus/common/tcl/internal/qsh_smart.tcl was successful
Info: Quartus Prime Shell was successful. 0 errors, 0 warnings
Info: Peak virtual memory: 687 megabytes
Info: Processing ended: Wed Sep 11 00:18:22 2019
Info: Elapsed time: 00:00:00
Info: Total CPU time (on all processors): 00:00:00

1
old_rtl/quartus/sta.chg Normal file
View File

@@ -0,0 +1 @@
done

1
old_rtl/quartus/syn.chg Normal file
View File

@@ -0,0 +1 @@
done

View File

@@ -0,0 +1,40 @@
load_package flow
set_global_assignment -name VERILOG_FILE ../VX_gpr_wrapper.v
set_global_assignment -name VERILOG_FILE ../VX_gpr.v
set_global_assignment -name SDC_FILE vortex.sdc
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 80
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY output_files
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
# pins configuration
package require cmdline
proc make_all_pins_virtual { args } {
set options {\
{ "exclude.arg" "" "List of signals to exclude" } \
}
array set opts [::cmdline::getoptions quartus(args) $options]
remove_all_instance_assignments -name VIRTUAL_PIN
execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {
set pin_name [get_name_info -info full_path $name_id]
if { -1 == [lsearch -exact $opts(excludes) $pin_name] } {
post_message "Making VIRTUAL_PIN assignment to $pin_name"
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
} else {
post_message "Skipping VIRTUAL_PIN assignment to $pin_name"
}
}
export_assignments
}
make_all_pins_virtual -exclude { clk, reset }

View File

@@ -0,0 +1 @@
create_clock -name {clk} -period "400 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]

Some files were not shown because too many files have changed in this diff Show More