New Cache Design Passing All Tests

This commit is contained in:
felsabbagh3
2020-03-04 23:24:32 -08:00
parent aa1a0ee376
commit a86a403ca9
12 changed files with 194 additions and 103 deletions

View File

@@ -49,6 +49,7 @@ module VX_bank (
wire dfpq_full;
wire[31:0] dfpq_addr_st0;
wire[`BANK_LINE_SIZE_RNG][31:0] dfpq_filldata_st0;
reg dfpq_hazard_st0;
assign dram_fill_accept = !dfpq_full;
@@ -76,6 +77,7 @@ module VX_bank (
wire [`NW_M1:0] reqq_req_warp_num_st0;
wire [2:0] reqq_req_mem_read_st0;
wire [2:0] reqq_req_mem_write_st0;
reg reqq_hazard_st0;
assign reqq_push = !delay_req && (|bank_valids);
@@ -119,6 +121,7 @@ module VX_bank (
wire [`NW_M1:0] mrvq_warp_num_st0;
wire [2:0] mrvq_mem_read_st0;
wire [2:0] mrvq_mem_write_st0;
reg mrvq_hazard_st0;
wire miss_add;
wire[31:0] miss_add_addr;
@@ -165,9 +168,27 @@ module VX_bank (
wire stall_bank_pipe;
assign dfpq_pop = !dfpq_empty && !stall_bank_pipe;
assign mrvq_pop = !dfpq_pop && mrvq_valid_st0 && !stall_bank_pipe;
assign reqq_pop = !mrvq_pop && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0];
assign dfpq_pop = !dfpq_empty && !stall_bank_pipe && !dfpq_hazard_st0;
assign mrvq_pop = !dfpq_pop && mrvq_valid_st0 && !stall_bank_pipe && !mrvq_hazard_st0;
assign reqq_pop = !mrvq_pop && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !reqq_hazard_st0;
integer st1_cycle;
always @(*) begin
assign dfpq_hazard_st0 = 0;
assign mrvq_hazard_st0 = 0;
assign reqq_hazard_st0 = 0;
for (st1_cycle = 0; st1_cycle < `STAGE_1_CYCLES; st1_cycle = st1_cycle + 1) begin
if (valid_st1[st1_cycle] && going_to_write_st1[st1_cycle]) begin
if (dfpq_addr_st0 [31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) assign dfpq_hazard_st0 = 1;
if (mrvq_addr_st0 [31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) assign mrvq_hazard_st0 = 1;
if (reqq_req_addr_st0[31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) assign reqq_hazard_st0 = 1;
end
end
end
wire qual_is_fill_st0;
@@ -176,13 +197,15 @@ module VX_bank (
wire [31:0] qual_writeword_st0;
wire [`BANK_LINE_SIZE_RNG][31:0] qual_writedata_st0;
wire [`REQ_INST_META_SIZE-1:0] qual_inst_meta_st0;
wire qual_going_to_write_st0;
wire valid_st1[`STAGE_1_CYCLES-1:0];
wire [31:0] addr_st1[`STAGE_1_CYCLES-1:0];
wire [31:0] writeword_st1[`STAGE_1_CYCLES-1:0];
wire [`REQ_INST_META_SIZE-1:0] inst_meta_st1[`STAGE_1_CYCLES-1:0];
wire is_fill_st1[`STAGE_1_CYCLES-1:0];
wire [`BANK_LINE_SIZE_RNG][31:0] writedata_st1[`STAGE_1_CYCLES-1:0];
wire valid_st1 [`STAGE_1_CYCLES-1:0];
wire going_to_write_st1[`STAGE_1_CYCLES-1:0];
wire [31:0] addr_st1 [`STAGE_1_CYCLES-1:0];
wire [31:0] writeword_st1 [`STAGE_1_CYCLES-1:0];
wire [`REQ_INST_META_SIZE-1:0] inst_meta_st1 [`STAGE_1_CYCLES-1:0];
wire is_fill_st1 [`STAGE_1_CYCLES-1:0];
wire [`BANK_LINE_SIZE_RNG][31:0] writedata_st1 [`STAGE_1_CYCLES-1:0];
assign qual_is_fill_st0 = dfpq_pop;
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop;
@@ -202,25 +225,30 @@ module VX_bank (
reqq_pop ? {reqq_req_rd_st0, reqq_req_wb_st0, reqq_req_warp_num_st0, reqq_req_mem_read_st0, reqq_req_mem_write_st0, reqq_req_tid_st0} :
0;
VX_generic_register #(.N( 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_c0 (
assign qual_going_to_write_st0 = dfpq_pop ? 1 :
(mrvq_pop && (mrvq_mem_write_st0 != `NO_MEM_WRITE)) ? 1 :
(reqq_pop && (reqq_req_mem_write_st0 != `NO_MEM_WRITE)) ? 1 :
0;
VX_generic_register #(.N( 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_c0 (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
.flush(0),
.in ({qual_valid_st0, qual_addr_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({valid_st1[0] , addr_st1[0] , writeword_st1[0] , inst_meta_st1[0] , is_fill_st1[0] , writedata_st1[0]})
.in ({qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({going_to_write_st1[0] , valid_st1[0] , addr_st1[0] , writeword_st1[0] , inst_meta_st1[0] , is_fill_st1[0] , writedata_st1[0]})
);
genvar curr_stage;
generate
for (curr_stage = 1; curr_stage < `STAGE_1_CYCLES; curr_stage = curr_stage + 1) begin
VX_generic_register #(.N( 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_cc (
VX_generic_register #(.N( 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_cc (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
.flush(0),
.in ({valid_st1[curr_stage-1], addr_st1[curr_stage-1], writeword_st1[curr_stage-1], inst_meta_st1[curr_stage-1], is_fill_st1[curr_stage-1] , writedata_st1[curr_stage-1]}),
.out ({valid_st1[curr_stage] , addr_st1[curr_stage] , writeword_st1[curr_stage] , inst_meta_st1[curr_stage] , is_fill_st1[curr_stage] , writedata_st1[curr_stage] })
.in ({going_to_write_st1[curr_stage-1], valid_st1[curr_stage-1], addr_st1[curr_stage-1], writeword_st1[curr_stage-1], inst_meta_st1[curr_stage-1], is_fill_st1[curr_stage-1] , writedata_st1[curr_stage-1]}),
.out ({going_to_write_st1[curr_stage] , valid_st1[curr_stage] , addr_st1[curr_stage] , writeword_st1[curr_stage] , inst_meta_st1[curr_stage] , is_fill_st1[curr_stage] , writedata_st1[curr_stage] })
);
end
endgenerate
@@ -239,6 +267,7 @@ module VX_bank (
wire [2:0] mem_read_st1e;
wire [2:0] mem_write_st1e;
wire [`vx_clog2(`NUMBER_REQUESTS)-1:0] tid_st1e;
wire fill_saw_dirty_st1e;
assign {rd_st1e, wb_st1e, warp_num_st1e, mem_read_st1e, mem_write_st1e, tid_st1e} = inst_meta_st1[`STAGE_1_CYCLES-1];
@@ -266,7 +295,8 @@ module VX_bank (
.readdata_st1e (readdata_st1e),
.readtag_st1e (readtag_st1e),
.miss_st1e (miss_st1e),
.dirty_st1e (dirty_st1e)
.dirty_st1e (dirty_st1e),
.fill_saw_dirty_st1e(fill_saw_dirty_st1e)
);
wire qual_valid_st1e_2 = valid_st1[`STAGE_1_CYCLES-1] && !is_fill_st1[`STAGE_1_CYCLES-1];
@@ -281,14 +311,15 @@ module VX_bank (
wire[`REQ_INST_META_SIZE-1:0] inst_meta_st2;
wire[`TAG_SELECT_SIZE_RNG] readtag_st2;
wire is_fill_st2;
wire fill_saw_dirty_st2;
VX_generic_register #(.N( 1 + 1 + 32 + 32 + 32 + (`BANK_LINE_SIZE_WORDS * 32) + 1 + 1 + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS)) st_1e_2 (
VX_generic_register #(.N( 1 + 1 + 1 + 32 + 32 + 32 + (`BANK_LINE_SIZE_WORDS * 32) + 1 + 1 + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS)) st_1e_2 (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
.flush(0),
.in ({is_fill_st1[`STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1[`STAGE_1_CYCLES-1], writeword_st1[`STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[`STAGE_1_CYCLES-1]}),
.out ({is_fill_st2 , valid_st2 , addr_st2 , writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 })
.in ({fill_saw_dirty_st1e, is_fill_st1[`STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1[`STAGE_1_CYCLES-1], writeword_st1[`STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[`STAGE_1_CYCLES-1]}),
.out ({fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 })
);
@@ -324,7 +355,7 @@ module VX_bank (
);
// Enqueue to DWB Queue
wire dwbq_push = valid_st2 && miss_st2 && dirty_st2;
wire dwbq_push = (valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2;
wire[31:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]};
wire[`BANK_LINE_SIZE_RNG][31:0] dwbq_req_data = readdata_st2;
wire dwbq_empty;

View File

@@ -39,7 +39,7 @@
`define DFQQ_SIZE `REQQ_SIZE
// Dram knobs
`define SIMULATED_DRAM_LATENCY_CYCLES 50
`define SIMULATED_DRAM_LATENCY_CYCLES 10
// ========================================= Configurable Knobs =========================================
@@ -89,8 +89,8 @@
`define BANK_SIZE_BYTES `CACHE_SIZE_BYTES/`NUMBER_BANKS
`define BANK_LINE_COUNT `BANK_SIZE_BYTES/`BANK_LINE_SIZE_BYTES
`define BANK_LINE_SIZE_WORDS `BANK_LINE_SIZE_BYTES / `WORD_SIZE_BYTES
`define BANK_LINE_COUNT (`BANK_SIZE_BYTES/`BANK_LINE_SIZE_BYTES)
`define BANK_LINE_SIZE_WORDS (`BANK_LINE_SIZE_BYTES / `WORD_SIZE_BYTES)
`define BANK_LINE_SIZE_RNG `BANK_LINE_SIZE_WORDS-1:0
// Offset is fixed
@@ -106,7 +106,7 @@
`define WORD_SELECT_ADDR_START 1+`OFFSET_ADDR_END
`define WORD_SELECT_ADDR_END `WORD_SELECT_SIZE_END+`OFFSET_ADDR_END
`define WORD_SELECT_ADDR_RNG `WORD_SELECT_ADDR_END:`WORD_SELECT_ADDR_START
`define WORD_SELECT_SIZE_RNG `WORD_SELECT_SIZE_END-1:`WORD_SELECT_SIZE_END
`define WORD_SELECT_SIZE_RNG `WORD_SELECT_SIZE_END-1:0
`define BANK_SELECT_NUM_BITS $clog2(`NUMBER_BANKS)
`define BANK_SELECT_SIZE_END `BANK_SELECT_NUM_BITS

View File

@@ -69,6 +69,7 @@ module VX_cache_req_queue (
wire push_qual = reqq_push && !reqq_full;
wire pop_qual = reqq_pop && use_empty && !out_empty && !reqq_empty;
VX_generic_queue #(.DATAW( (`NUMBER_REQUESTS * (1+32+32)) + 5 + 2 + (`NW_M1+1) + 3 + 3 ), .SIZE(`REQQ_SIZE)) reqq_queue(
.clk (clk),
.reset (reset),
@@ -81,15 +82,16 @@ module VX_cache_req_queue (
);
wire[`NUMBER_REQUESTS-1:0] real_out_per_valids = out_per_valids & {`NUMBER_REQUESTS{~reqq_empty}};
assign qual_valids = use_empty ? out_per_valids : out_empty ? 0 : use_per_valids;
assign qual_addr = use_empty ? out_per_addr : use_per_addr;
assign qual_writedata = use_empty ? out_per_writedata : use_per_writedata;
assign qual_rd = use_empty ? out_per_rd : use_per_rd;
assign qual_wb = use_empty ? out_per_wb : use_per_wb;
assign qual_warp_num = use_empty ? out_per_warp_num : use_per_warp_num;
assign qual_mem_read = use_empty ? out_per_mem_read : use_per_mem_read;
assign qual_mem_write = use_empty ? out_per_mem_write : use_per_mem_write;
assign qual_valids = use_empty ? real_out_per_valids : out_empty ? 0 : use_per_valids;
assign qual_addr = use_empty ? out_per_addr : use_per_addr;
assign qual_writedata = use_empty ? out_per_writedata : use_per_writedata;
assign qual_rd = use_empty ? out_per_rd : use_per_rd;
assign qual_wb = use_empty ? out_per_wb : use_per_wb;
assign qual_warp_num = use_empty ? out_per_warp_num : use_per_warp_num;
assign qual_mem_read = use_empty ? out_per_mem_read : use_per_mem_read;
assign qual_mem_write = use_empty ? out_per_mem_write : use_per_mem_write;
wire[`vx_clog2(`NUMBER_REQUESTS)-1:0] qual_request_index;
wire qual_has_request;

View File

@@ -24,7 +24,7 @@ module VX_cache_wb_sel_merge (
);
reg [`NUMBER_BANKS-1:0] per_bank_wb_pop_unqual;
assign per_bank_wb_pop = per_bank_wb_pop_unqual & {`NUMBER_BANKS{core_no_wb_slot}};
assign per_bank_wb_pop = per_bank_wb_pop_unqual & {`NUMBER_BANKS{~core_no_wb_slot}};
wire[`NUMBER_BANKS-1:0] bank_wants_wb;
genvar curr_bank;
@@ -51,8 +51,10 @@ module VX_cache_wb_sel_merge (
genvar this_bank;
generate
always @(*) begin
assign core_wb_valid = 0;
assign core_wb_readdata = 0;
for (this_bank = 0; this_bank < `NUMBER_BANKS; this_bank = this_bank + 1) begin
if (found_bank && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin
if (found_bank && (per_bank_wb_valid[this_bank]) && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin
assign core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
assign core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
assign per_bank_wb_pop_unqual[this_bank] = 1;

View File

@@ -21,7 +21,8 @@ module VX_tag_data_access (
output wire[`BANK_LINE_SIZE_RNG][31:0] readdata_st1e,
output wire[`TAG_SELECT_SIZE_RNG] readtag_st1e,
output wire miss_st1e,
output wire dirty_st1e
output wire dirty_st1e,
output wire fill_saw_dirty_st1e
);
@@ -46,6 +47,8 @@ module VX_tag_data_access (
wire[`BANK_LINE_SIZE_RNG][3:0] use_write_enable;
wire[`BANK_LINE_SIZE_RNG][31:0] use_write_data;
wire fill_sent;
VX_tag_data_structure VX_tag_data_structure(
.clk (clk),
.reset (reset),
@@ -59,7 +62,8 @@ module VX_tag_data_access (
.write_enable(use_write_enable),
.write_fill (writefill_st1e),
.write_addr (writeaddr_st1e),
.write_data (use_write_data)
.write_data (use_write_data),
.fill_sent (fill_sent)
);
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`BANK_LINE_SIZE_WORDS*32) )) s0_1_c0 (
@@ -89,7 +93,10 @@ module VX_tag_data_access (
assign use_read_valid_st1e = read_valid_st1c[`STAGE_1_CYCLES-1];
assign use_read_dirty_st1e = read_dirty_st1c[`STAGE_1_CYCLES-1];
assign use_read_tag_st1e = read_tag_st1c [`STAGE_1_CYCLES-1];
assign use_read_data_st1e = read_data_st1c [`STAGE_1_CYCLES-1];
genvar curr_w;
for (curr_w = 0; curr_w < `BANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[`STAGE_1_CYCLES-1][curr_w][31:0];
// assign use_read_data_st1e = read_data_st1c [`STAGE_1_CYCLES-1];
/////////////////////// LOAD LOGIC ///////////////////
@@ -107,10 +114,17 @@ module VX_tag_data_access (
wire b2 = (byte_select == 2);
wire b3 = (byte_select == 3);
wire[31:0] data_unQual = (b0 || lw) ? (use_read_data_st1e[block_offset]) :
b1 ? (use_read_data_st1e[block_offset] >> 8) :
b2 ? (use_read_data_st1e[block_offset] >> 16) :
(use_read_data_st1e[block_offset] >> 24);
wire[31:0] w0 = read_data_st1c[`STAGE_1_CYCLES-1][0][31:0];
wire[31:0] w1 = read_data_st1c[`STAGE_1_CYCLES-1][1][31:0];
wire[31:0] w2 = read_data_st1c[`STAGE_1_CYCLES-1][2][31:0];
wire[31:0] w3 = read_data_st1c[`STAGE_1_CYCLES-1][3][31:0];
wire[31:0] data_unmod = read_data_st1c[`STAGE_1_CYCLES-1][block_offset][31:0];
wire[31:0] data_unQual = (b0 || lw) ? (data_unmod) :
b1 ? (data_unmod >> 8) :
b2 ? (data_unmod >> 16) :
(data_unmod >> 24);
wire[31:0] lb_data = (data_unQual[7] ) ? (data_unQual | 32'hFFFFFF00) : (data_unQual & 32'hFF);
@@ -151,8 +165,8 @@ module VX_tag_data_access (
wire[3:0] sb_mask = (b0 ? 4'b0001 : (b1 ? 4'b0010 : (b2 ? 4'b0100 : 4'b1000)));
wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
wire should_write = (sw || sb || sh) && valid_req_st1e && !miss_st1e;
wire force_write = writefill_st1e && valid_req_st1e;
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e;
wire force_write = writefill_st1e && valid_req_st1e && miss_st1e;
wire[`BANK_LINE_SIZE_RNG][3:0] we;
wire[`BANK_LINE_SIZE_RNG][31:0] data_write;
@@ -161,13 +175,13 @@ module VX_tag_data_access (
for (g = 0; g < `BANK_LINE_SIZE_WORDS; g = g + 1) begin : write_enables
wire normal_write = (block_offset == g) && should_write;
assign we[g] = (force_write) ? 4'b1111 :
assign we[g] = (force_write) ? 4'b1111 :
(normal_write && sw) ? 4'b1111 :
(normal_write && sb) ? sb_mask :
(normal_write && sh) ? sh_mask :
4'b0000;
assign data_write[g] = force_write ? writedata_st1e : use_write_dat ;
assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat;
end
endgenerate
@@ -181,6 +195,8 @@ module VX_tag_data_access (
assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e;
assign readdata_st1e = use_read_data_st1e;
assign readtag_st1e = use_read_tag_st1e;
assign fill_sent = miss_st1e;
assign fill_saw_dirty_st1e = force_write && dirty_st1e;
endmodule

View File

@@ -11,7 +11,8 @@ module VX_tag_data_structure (
input wire[`BANK_LINE_SIZE_RNG][3:0] write_enable,
input wire write_fill,
input wire[31:0] write_addr,
input wire[`BANK_LINE_SIZE_RNG][31:0] write_data
input wire[`BANK_LINE_SIZE_RNG][31:0] write_data,
input wire fill_sent
);
@@ -38,7 +39,9 @@ module VX_tag_data_structure (
end else begin
dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 1;
end
end
end else if (fill_sent) begin
dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
end
for (f = 0; f < `BANK_LINE_SIZE_WORDS; f = f + 1) begin
if (write_enable[f][0]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][0] <= write_data[f][7 :0 ];

View File

@@ -8,6 +8,8 @@ module VX_fetch (
VX_join_inter VX_join,
input wire schedule_delay,
input wire icache_stage_delay,
input wire[`NW_M1:0] icache_stage_wid,
input wire[`NT-1:0] icache_stage_valids,
output wire out_ebreak,
VX_jal_response_inter VX_jal_rsp,
@@ -40,7 +42,7 @@ module VX_fetch (
// Locals
assign pipe_stall = schedule_delay || icache_stage_delay || stall_might_be_branch;
assign pipe_stall = schedule_delay || icache_stage_delay || (stall_might_be_branch && (icache_stage_wid == warp_num)) ;
VX_warp_scheduler warp_scheduler(
.clk (clk),

View File

@@ -37,6 +37,9 @@ wire icache_stage_delay;
wire vortex_ebreak;
wire terminate_sim;
wire[`NW_M1:0] icache_stage_wid;
wire[`NT-1:0] icache_stage_valids;
assign fetch_ebreak = vortex_ebreak || terminate_sim;
@@ -46,6 +49,8 @@ VX_join_inter VX_join();
VX_fetch vx_fetch(
.clk (clk),
.reset (reset),
.icache_stage_wid (icache_stage_wid),
.icache_stage_valids(icache_stage_valids),
.VX_wstall (VX_wstall),
.VX_join (VX_join),
.schedule_delay (schedule_delay),
@@ -74,6 +79,8 @@ VX_icache_stage VX_icache_stage(
.clk (clk),
.reset (reset),
.icache_stage_delay(icache_stage_delay),
.icache_stage_valids(icache_stage_valids),
.icache_stage_wid (icache_stage_wid),
.fe_inst_meta_fi (fe_inst_meta_fi2),
.fe_inst_meta_id (fe_inst_meta_id),
.icache_response (icache_response_fe),

View File

@@ -39,7 +39,7 @@ module VX_generic_queue
tail <= tail+1;
end
if (pop) begin
if (pop && !empty) begin
head <= head + 1;
end

View File

@@ -4,6 +4,8 @@ module VX_icache_stage (
input wire clk,
input wire reset,
output wire icache_stage_delay,
output wire[`NW_M1:0] icache_stage_wid,
output wire[`NT-1:0] icache_stage_valids,
VX_inst_meta_inter fe_inst_meta_fi,
VX_inst_meta_inter fe_inst_meta_id,
VX_icache_response_inter icache_response,
@@ -27,5 +29,8 @@ module VX_icache_stage (
assign fe_inst_meta_id.warp_num = fe_inst_meta_fi.warp_num;
assign fe_inst_meta_id.valid = fe_inst_meta_fi.valid & {`NT{!icache_stage_delay}};
assign icache_stage_wid = fe_inst_meta_fi.warp_num;
assign icache_stage_valids = fe_inst_meta_fi.valid;
endmodule

View File

@@ -87,7 +87,9 @@ int main(int argc, char **argv)
#else
char testing[] = "../../emulator/riscv_tests/rv32ui-p-auipc.hex";
// char testing[] = "../../runtime/mains/simple/vx_simple_main.hex";
// char testing[] = "../../emulator/riscv_tests/rv32ui-p-lw.hex";
char testing[] = "../../emulator/riscv_tests/rv32ui-p-sw.hex";
Vortex v;
// const char *testing;

View File

@@ -28,6 +28,14 @@ double sc_time_stamp()
return time_stamp / 1000.0;
}
typedef struct
{
int cycles_left;
int data_length;
unsigned base_addr;
unsigned * data;
} dram_req_t;
class Vortex
{
public:
@@ -69,6 +77,7 @@ class Vortex
int debug_end_wait;
int debug_debugAddr;
double stats_sim_time;
std::vector<dram_req_t> dram_req_vec;
#ifdef VCD_OUTPUT
VerilatedVcdC *m_trace;
#endif
@@ -235,65 +244,77 @@ void Vortex::io_handler()
bool Vortex::dbus_driver()
{
vortex->i_m_ready_d = false;
// Iterate through each element, and get pop index
int dequeue_index = -1;
bool dequeue_valid = false;
for (int i = 0; i < this->dram_req_vec.size(); i++)
{
// int dcache_num_words_per_block
if (refill_d)
if (this->dram_req_vec[i].cycles_left > 0)
{
refill_d = false;
vortex->i_m_ready_d = true;
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__dcache_banks; curr_bank++)
{
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__dcache_num_words_per_block; curr_word++)
{
unsigned curr_index = (curr_word * vortex->Vortex__DOT__dcache_banks) + curr_bank;
unsigned curr_addr = refill_addr_d + (4*curr_index);
unsigned curr_value;
ram.getWord(curr_addr, &curr_value);
vortex->i_m_readdata_d[curr_bank][curr_word] = curr_value;
}
}
}
else
{
if (vortex->o_m_valid_d)
{
if (vortex->o_m_read_or_write_d)
{
// fprintf(stderr, "++++++++++++++++++++++++++++++++\n");
unsigned base_addr = vortex->o_m_evict_addr_d;
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__dcache_banks; curr_bank++)
{
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__dcache_num_words_per_block; curr_word++)
{
unsigned curr_index = (curr_word * vortex->Vortex__DOT__dcache_banks) + curr_bank;
unsigned curr_addr = base_addr + (4*curr_index);
unsigned curr_value = vortex->o_m_writedata_d[curr_bank][curr_word];
ram.writeWord( curr_addr, &curr_value);
}
}
}
// Respond next cycle
refill_d = true;
refill_addr_d = vortex->o_m_read_addr_d;
}
this->dram_req_vec[i].cycles_left -= 1;
}
if ((this->dram_req_vec[i].cycles_left == 0) && (!dequeue_valid))
{
dequeue_index = i;
dequeue_valid = true;
}
}
if (vortex->dram_req)
{
if (vortex->dram_req_read)
{
// Need to add an element
dram_req_t dram_req;
dram_req.cycles_left = vortex->dram_expected_lat;
dram_req.data_length = vortex->dram_req_size / 4;
dram_req.base_addr = vortex->dram_req_addr;
dram_req.data = (unsigned *) malloc(dram_req.data_length * sizeof(unsigned));
for (int i = 0; i < dram_req.data_length; i++)
{
unsigned curr_addr = dram_req.base_addr + (i*4);
unsigned data_rd;
ram.getWord(curr_addr, &data_rd);
dram_req.data[i] = data_rd;
}
this->dram_req_vec.push_back(dram_req);
}
if (vortex->dram_req_write)
{
unsigned base_addr = vortex->dram_req_addr;
unsigned data_length = vortex->dram_req_size / 4;
for (int i = 0; i < data_length; i++)
{
unsigned curr_addr = base_addr + (i*4);
unsigned data_wr = vortex->dram_req_data[i];
ram.writeWord(curr_addr, &data_wr);
}
}
}
if (vortex->dram_fill_accept && dequeue_valid)
{
vortex->dram_fill_rsp = 1;
vortex->dram_fill_rsp_addr = this->dram_req_vec[dequeue_index].base_addr;
for (int i = 0; i < this->dram_req_vec[dequeue_index].data_length; i++)
{
vortex->dram_fill_rsp_data[i] = this->dram_req_vec[dequeue_index].data[i];
}
free(this->dram_req_vec[dequeue_index].data);
this->dram_req_vec.erase(this->dram_req_vec.begin() + dequeue_index);
}
else
{
vortex->dram_fill_rsp = 0;
vortex->dram_fill_rsp_addr = 0;
}
return false;
}
@@ -430,4 +451,4 @@ bool Vortex::simulate(std::string file_to_simulate)
return (status == 1);
// return (1 == 1);
}
}