OPAE hw snooping fixes

This commit is contained in:
Blaise Tine
2020-04-04 05:07:45 -07:00
parent 1f63139ce5
commit 07ec0ef344
9 changed files with 96 additions and 79 deletions

View File

@@ -68,6 +68,7 @@ vortex_afu.json
../../rtl/VX_cache/VX_cache_miss_resrv.v ../../rtl/VX_cache/VX_cache_miss_resrv.v
../../rtl/VX_cache/VX_fill_invalidator.v ../../rtl/VX_cache/VX_fill_invalidator.v
../../rtl/VX_cache/VX_tag_data_structure.v ../../rtl/VX_cache/VX_tag_data_structure.v
../../rtl/VX_cache/VX_prefetcher.v
../../rtl/cache/VX_generic_pe.v ../../rtl/cache/VX_generic_pe.v
../../rtl/cache/cache_set.v ../../rtl/cache/cache_set.v
../../rtl/cache/VX_d_cache.v ../../rtl/cache/VX_d_cache.v

View File

@@ -14,7 +14,7 @@
"cmd-type-read": 1, "cmd-type-read": 1,
"cmd-type-write": 2, "cmd-type-write": 2,
"cmd-type-run": 3, "cmd-type-run": 3,
"cmd-type-snoop": 4, "cmd-type-clflush": 4,
"afu-top-interface": "afu-top-interface":
{ {

View File

@@ -34,7 +34,9 @@ module vortex_afu #(
); );
localparam AVS_RD_QUEUE_SIZE = 16; localparam AVS_RD_QUEUE_SIZE = 16;
localparam VX_SNOOPING_DELAY = 300;
localparam VX_SNOOP_DELAY = 300;
localparam VX_SNOOP_LEVELS = 2;
localparam AFU_ID_L = 16'h0002; // AFU ID Lower localparam AFU_ID_L = 16'h0002; // AFU ID Lower
localparam AFU_ID_H = 16'h0004; // AFU ID Higher localparam AFU_ID_H = 16'h0004; // AFU ID Higher
@@ -42,7 +44,7 @@ localparam AFU_ID_H = 16'h0004; // AFU ID Higher
localparam CMD_TYPE_READ = `AFU_IMAGE_CMD_TYPE_READ; localparam CMD_TYPE_READ = `AFU_IMAGE_CMD_TYPE_READ;
localparam CMD_TYPE_WRITE = `AFU_IMAGE_CMD_TYPE_WRITE; localparam CMD_TYPE_WRITE = `AFU_IMAGE_CMD_TYPE_WRITE;
localparam CMD_TYPE_RUN = `AFU_IMAGE_CMD_TYPE_RUN; localparam CMD_TYPE_RUN = `AFU_IMAGE_CMD_TYPE_RUN;
localparam CMD_TYPE_SNOOP = `AFU_IMAGE_CMD_TYPE_SNOOP; localparam CMD_TYPE_CLFLUSH = `AFU_IMAGE_CMD_TYPE_CLFLUSH;
localparam MMIO_CSR_CMD = `AFU_IMAGE_MMIO_CSR_CMD; localparam MMIO_CSR_CMD = `AFU_IMAGE_MMIO_CSR_CMD;
localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS; localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS;
@@ -52,13 +54,12 @@ localparam MMIO_CSR_DATA_SIZE = `AFU_IMAGE_MMIO_CSR_DATA_SIZE;
logic [127:0] afu_id = `AFU_ACCEL_UUID; logic [127:0] afu_id = `AFU_ACCEL_UUID;
typedef enum logic[2:0] { typedef enum logic[3:0] {
STATE_IDLE, STATE_IDLE,
STATE_READ, STATE_READ,
STATE_WRITE, STATE_WRITE,
STATE_RUN, STATE_RUN,
STATE_SNOOP1, STATE_CLFLUSH
STATE_SNOOP2
} state_t; } state_t;
state_t state; state_t state;
@@ -192,7 +193,8 @@ logic [31:0] cci_write_ctr;
logic [31:0] avs_read_ctr; logic [31:0] avs_read_ctr;
logic [31:0] avs_write_ctr; logic [31:0] avs_write_ctr;
logic [31:0] vx_snoop_ctr; logic [31:0] vx_snoop_ctr;
logic [31:0] vx_snoop_delay; logic [9:0] vx_snoop_delay;
logic [1:0] vx_snoop_level;
logic vx_reset; logic vx_reset;
always_ff @(posedge clk) always_ff @(posedge clk)
@@ -210,21 +212,21 @@ begin
STATE_IDLE: begin STATE_IDLE: begin
case (csr_cmd) case (csr_cmd)
CMD_TYPE_READ: begin CMD_TYPE_READ: begin
$display("%t: CMD READ: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size); $display("%t: STATE READ: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
state <= STATE_READ; state <= STATE_READ;
end end
CMD_TYPE_WRITE: begin CMD_TYPE_WRITE: begin
$display("%t: CMD WRITE: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size); $display("%t: STATE WRITE: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
state <= STATE_WRITE; state <= STATE_WRITE;
end end
CMD_TYPE_RUN: begin CMD_TYPE_RUN: begin
$display("%t: CMD START", $time); $display("%t: STATE START", $time);
vx_reset <= 1; vx_reset <= 1;
state <= STATE_RUN; state <= STATE_RUN;
end end
CMD_TYPE_SNOOP: begin CMD_TYPE_CLFLUSH: begin
$display("%t: CMD SNOOP: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size); $display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
state <= STATE_SNOOP1; state <= STATE_CLFLUSH;
end end
endcase endcase
end end
@@ -251,16 +253,8 @@ begin
end end
end end
STATE_SNOOP1: begin STATE_CLFLUSH: begin
if (vx_snoop_delay >= VX_SNOOPING_DELAY) if (vx_snoop_level >= VX_SNOOP_LEVELS)
begin
// TODO: Allow both RUN and SNOOP states to use the AVS bus
state <= STATE_SNOOP2;
end
end
STATE_SNOOP2: begin
if (vx_snoop_delay >= VX_SNOOPING_DELAY)
begin begin
state <= STATE_IDLE; state <= STATE_IDLE;
end end
@@ -322,7 +316,7 @@ begin
end end
end end
STATE_RUN: begin STATE_RUN, STATE_CLFLUSH: begin
if (vx_dram_req_read if (vx_dram_req_read
&& !vx_dram_req_delay) && !vx_dram_req_delay)
begin begin
@@ -350,15 +344,20 @@ begin
end end
// Vortex DRAM requests stalling // Vortex DRAM requests stalling
assign vx_dram_req_delay = !((STATE_RUN == state)
&& !avs_waitrequest
&& !avs_raq_full
&& !avs_rdq_full);
// Vortex DRAM fill response logic vortex_enabled;
always_comb always_comb
begin begin
vx_dram_fill_rsp = (STATE_RUN == state) && !avs_rdq_empty && vx_dram_fill_accept; vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
vx_dram_req_delay = !vortex_enabled || avs_waitrequest || avs_raq_full || avs_rdq_full;
end
// Vortex DRAM fill response
always_comb
begin
vx_dram_fill_rsp = vortex_enabled && !avs_rdq_empty && vx_dram_fill_accept;
vx_dram_fill_rsp_addr = (avs_raq_dout << 6); vx_dram_fill_rsp_addr = (avs_raq_dout << 6);
{>>{vx_dram_fill_rsp_data}} = avs_rdq_dout; {>>{vx_dram_fill_rsp_data}} = avs_rdq_dout;
end end
@@ -522,35 +521,39 @@ begin
vx_snp_req <= 0; vx_snp_req <= 0;
vx_snoop_ctr <= 0; vx_snoop_ctr <= 0;
vx_snoop_delay <= 0; vx_snoop_delay <= 0;
vx_snoop_level <= 0;
end end
else begin else begin
if (STATE_IDLE == state) if (STATE_IDLE == state)
begin begin
vx_snoop_ctr <= 0; vx_snoop_ctr <= 0;
vx_snoop_delay <= 0; vx_snoop_delay <= 0;
vx_snoop_level <= 0;
end end
vx_snp_req <= 0; vx_snp_req <= 0;
if ((STATE_SNOOP1 == state if ((STATE_CLFLUSH == state)
|| STATE_SNOOP2 == state)
&& vx_snoop_ctr < csr_data_size && vx_snoop_ctr < csr_data_size
&& vx_snp_req_delay) && vx_snoop_level < VX_SNOOP_LEVELS
&& !vx_snp_req_delay)
begin begin
vx_snp_req <= 1; vx_snp_req_addr <= (csr_mem_addr + vx_snoop_ctr) << 6;
vx_snoop_ctr <= vx_snoop_ctr + 1; vx_snp_req <= 1;
vx_snoop_ctr <= vx_snoop_ctr + 1;
end end
if ((vx_snoop_ctr >= csr_data_size) if ((vx_snoop_ctr == csr_data_size)
&& (vx_snoop_delay < VX_SNOOPING_DELAY)) && (vx_snoop_delay < VX_SNOOP_DELAY))
begin begin
vx_snoop_delay <= vx_snoop_delay + 1; vx_snoop_delay <= vx_snoop_delay + 1;
end end
if (vx_snoop_delay >= VX_SNOOPING_DELAY) if (vx_snoop_delay == VX_SNOOP_DELAY)
begin begin
vx_snoop_ctr <= 0; vx_snoop_ctr <= 0;
vx_snoop_delay <= 0; vx_snoop_delay <= 0;
vx_snoop_level <= vx_snoop_level + 1;
end end
end end
end end

View File

@@ -27,6 +27,7 @@ add wave -noupdate -label avs_raq_full /ase_top/ase_top_generic/platform_shim_cc
add wave -noupdate -label avs_rdq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_full add wave -noupdate -label avs_rdq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_full
add wave -noupdate -label avs_raq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_empty add wave -noupdate -label avs_raq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_empty
add wave -noupdate -label avs_rdq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_empty add wave -noupdate -label avs_rdq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_empty
add wave -noupdate -label vortex_enabled /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vortex_enabled
add wave -noupdate -label vx_reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/reset add wave -noupdate -label vx_reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/reset
add wave -noupdate -label vx_dram_req_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_read add wave -noupdate -label vx_dram_req_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_read
add wave -noupdate -label vx_dram_req_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_write add wave -noupdate -label vx_dram_req_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_write
@@ -49,7 +50,7 @@ add wave -noupdate -label warp_stalled {/ase_top/ase_top_generic/platform_shim_c
add wave -noupdate -label warp_lock {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_lock} add wave -noupdate -label warp_lock {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_lock}
add wave -noupdate -label use_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/use_active} add wave -noupdate -label use_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/use_active}
TreeUpdate [SetDefaultTree] TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 2} {620643200 ps} 0} WaveRestoreCursors {{Cursor 2} {293228800 ps} 0}
quietly wave cursor active 1 quietly wave cursor active 1
configure wave -namecolwidth 195 configure wave -namecolwidth 195
configure wave -valuecolwidth 100 configure wave -valuecolwidth 100
@@ -65,4 +66,4 @@ configure wave -griddelta 40
configure wave -timeline 0 configure wave -timeline 0
configure wave -timelineunits ps configure wave -timelineunits ps
update update
WaveRestoreZoom {620460856 ps} {620825544 ps} WaveRestoreZoom {293046456 ps} {293411144 ps}

View File

@@ -22,8 +22,8 @@
#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ #define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ
#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE #define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE
#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN #define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN
#define CMD_TYPE_SNOOP AFU_IMAGE_CMD_TYPE_SNOOP #define CMD_TYPE_CLFLUSH AFU_IMAGE_CMD_TYPE_CLFLUSH
#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4) #define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4)
#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4) #define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4)
@@ -313,7 +313,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_SNOOP)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
// Wait for the write operation to finish // Wait for the write operation to finish
if (vx_ready_wait(hdevice, -1) != 0) if (vx_ready_wait(hdevice, -1) != 0)

Binary file not shown.

View File

@@ -27,7 +27,7 @@ uint64_t shuffle(int i, uint64_t value) {
return (value << i) | (value & ((1 << i)-1));; return (value << i) | (value & ((1 << i)-1));;
} }
int run_memcpy_test(vx_buffer_h sbuf, int run_memcopy_test(vx_buffer_h sbuf,
vx_buffer_h dbuf, vx_buffer_h dbuf,
uint32_t address, uint32_t address,
uint64_t value, uint64_t value,
@@ -105,7 +105,7 @@ int run_snoop_test(vx_device_h device) {
// upload program // upload program
std::cout << "upload program" << std::endl; std::cout << "upload program" << std::endl;
ret = vx_upload_kernel_file(device, "rv32ui-p-lw.bin"); ret = vx_upload_kernel_file(device, "snooping.bin");
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
@@ -124,9 +124,9 @@ int run_snoop_test(vx_device_h device) {
return ret; return ret;
} }
// send snooping request // flush the caches
std::cout << "flush the caches" << std::endl; std::cout << "flush the caches" << std::endl;
ret = vx_flush_caches(device, 0x80002000, 64); ret = vx_flush_caches(device, 0x10000000, 64*8);
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
@@ -181,15 +181,15 @@ int main(int argc, char *argv[]) {
// run tests // run tests
if (0 == test || -1 == test) { if (0 == test || -1 == test) {
std::cout << "run memcpy test" << std::endl; std::cout << "run memcopy test" << std::endl;
ret = run_memcpy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1); ret = run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
if (ret != 0) { if (ret != 0) {
cleanup(); cleanup();
return ret; return ret;
} }
ret = run_memcpy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8); ret = run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
if (ret != 0) { if (ret != 0) {
cleanup(); cleanup();
return ret; return ret;

Binary file not shown.

View File

@@ -6,45 +6,47 @@ module VX_generic_queue_ll
parameter SIZE = 277 parameter SIZE = 277
) )
( (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire push, input wire push,
input wire[DATAW-1:0] in_data, input wire [DATAW-1:0] in_data,
input wire pop, input wire pop,
output wire[DATAW-1:0] out_data, output wire [DATAW-1:0] out_data,
output wire empty, output wire empty,
output wire full output wire full
); );
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
if (SIZE == 0) begin if (SIZE == 0) begin
assign empty = 1; assign empty = 1;
assign out_data = 0; assign out_data = 0;
assign full = 0; assign full = 0;
end else begin
`ifdef QUEUE_FORCE_MLAB end else begin // (SIZE > 0)
`ifdef QUEUE_FORCE_MLAB
(* syn_ramstyle = "mlab" *) reg[DATAW-1:0] data[SIZE-1:0]; (* syn_ramstyle = "mlab" *) reg[DATAW-1:0] data[SIZE-1:0];
`else `else
reg[DATAW-1:0] data[SIZE-1:0]; reg[ DATAW-1:0] data[SIZE-1:0];
`endif `endif
reg[DATAW-1:0] curr_r, head_r; reg [DATAW-1:0] head_r;
reg[$clog2(SIZE+1)-1:0] size_r; reg [$clog2(SIZE+1)-1:0] size_r;
reg[$clog2(SIZE)-1:0] wr_ctr_r; wire reading;
reg[$clog2(SIZE)-1:0] rd_ptr_r, rd_next_ptr_r; wire writing;
reg empty_r, full_r, bypass_r;
wire reading, writing;
assign reading = pop && !empty; assign reading = pop && !empty;
assign writing = push && !full; assign writing = push && !full;
if (SIZE == 1) begin if (SIZE == 1) begin
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
size_r <= 0; size_r <= 0;
head_r <= 0;
end else begin end else begin
if (writing && !reading) begin if (writing && !reading) begin
size_r <= 1; size_r <= 1;
@@ -59,9 +61,19 @@ module VX_generic_queue_ll
end end
assign out_data = head_r; assign out_data = head_r;
assign empty = (size_r == 0); assign empty = (size_r == 0);
assign full = (size_r != 0) && !pop; assign full = (size_r != 0) && !pop;
end else begin
end else begin // (SIZE > 1)
reg [DATAW-1:0] curr_r;
reg [$clog2(SIZE)-1:0] wr_ctr_r;
reg [$clog2(SIZE)-1:0] rd_ptr_r;
reg [$clog2(SIZE)-1:0] rd_next_ptr_r;
reg empty_r;
reg full_r;
reg bypass_r;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
wr_ctr_r <= 0; wr_ctr_r <= 0;
@@ -99,9 +111,10 @@ module VX_generic_queue_ll
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
rd_ptr_r <= 0; curr_r <= 0;
rd_ptr_r <= 0;
rd_next_ptr_r <= 1; rd_next_ptr_r <= 1;
bypass_r <= 0; bypass_r <= 0;
end else begin end else begin
if (reading) begin if (reading) begin
if (SIZE == 2) begin if (SIZE == 2) begin
@@ -123,7 +136,6 @@ module VX_generic_queue_ll
assign empty = empty_r; assign empty = empty_r;
assign full = full_r; assign full = full_r;
end end
end end
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */