From e01c411b2043fe8f6ea1538e32f6521c599796d7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 1 Jun 2020 23:06:13 -0700 Subject: [PATCH 1/3] opae rtl fixes --- driver/rtlsim/Makefile | 1 + driver/tests/basic/common.h | 4 +- driver/tests/basic/kernel.bin | Bin 6548 -> 6556 bytes driver/tests/demo/demo.cpp | 28 +++- hw/Makefile | 4 +- hw/opae/vortex_afu.sv | 216 +++++++++++++++---------- hw/rtl/VX_define.vh | 1 + hw/rtl/VX_lsu_unit.v | 2 +- hw/rtl/Vortex.v | 2 +- hw/rtl/cache/VX_bank.v | 16 +- hw/rtl/cache/VX_cache_core_rsp_merge.v | 3 + hw/rtl/cache/VX_snp_forwarder.v | 2 +- hw/rtl/cache/VX_tag_data_access.v | 2 +- hw/rtl/cache/VX_tag_data_structure.v | 3 +- hw/rtl/libs/VX_generic_queue.v | 24 ++- hw/simulate/simulator.cpp | 5 +- 16 files changed, 192 insertions(+), 121 deletions(-) mode change 100755 => 100644 driver/tests/basic/kernel.bin diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 6627faba..c7d97a74 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -34,6 +34,7 @@ RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../ VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE) VL_FLAGS += -Wno-DECLFILENAME VL_FLAGS += --x-initial unique +VL_FLAGS += --x-assign unique # Enable Verilator multithreaded simulation #THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') diff --git a/driver/tests/basic/common.h b/driver/tests/basic/common.h index 3fdb128c..69bd8c1c 100644 --- a/driver/tests/basic/common.h +++ b/driver/tests/basic/common.h @@ -1,8 +1,8 @@ #ifndef _COMMON_H_ #define _COMMON_H_ -#define DEV_MEM_SRC_ADDR 0x10000000 -#define DEV_MEM_DST_ADDR 0x20000000 +#define DEV_MEM_SRC_ADDR 0x10000040 +#define DEV_MEM_DST_ADDR 0x20000080 #define NUM_BLOCKS 64 #endif \ No newline at end of file diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin old mode 100755 new mode 100644 index e08ac81c4ea77f265c0f5ad0318a14af3f3bf6a6..cdd3dcc0bde6f3935f75856eca286890f4fe5168 GIT binary patch delta 285 zcmWlTF-yZh0EJ(!cR3;`Y0owmp%kNn;v$D+%_5F2Vo)4}jvZ9|0iH;Evz(xVP!Za? zN>JCfL_~1+Cpe~q2u^x^@c7{I4ev_c%Cl<*nAYaZ>Cz!-`cKFaPYdRw2?o&`jzoJn zl5Q_H*2^(i4J-v?^VX~6Ks|@T5e60EIDg@C5#i#w^;A(%Hetzd)YG>47>%Iv#AbB6A?b1|#LIe>a7k;73anapE>i!_S>rtaM!uw}>`LIA(?eU}L zxHK`ECuZ#Ivusy*?`E3>PG&l!Y806&Pc|=+-ft}SR^kRcB50O&$ZKgfgS~BYFb9>{ Su2rGT3A;)CiYn0KGSolM)=r`T delta 272 zcmbPZJjJ+vIx7Ri4wHfE z-ZMC8i34e2)>bK?I??wG4e~(Dkk8;CCd|nAUs+9%L0P2{sE$EUn1vNY3r%KcP}qEY zIg>H#7pCNZ4=m>F3{1xD&lx6*nKBr&FK1=g&dwk(u}7MdVRCyrgYu$n3==mfF}2EW zyduZQ))EF344eFeF-q1YOqkO%%$W111K1X&i44M#nhwbfoJ_(jE|c?_ltn=*I>9PX sWh6HrU|J(2vWEfaQXmMCU})F_#43^u4U-F5XKa=bYhY%{5M^ip0A5r{G5`Po diff --git a/driver/tests/demo/demo.cpp b/driver/tests/demo/demo.cpp index f2d40002..290d888e 100644 --- a/driver/tests/demo/demo.cpp +++ b/driver/tests/demo/demo.cpp @@ -91,7 +91,7 @@ int run_test(vx_device_h device, int ref = i + i; int cur = buf_ptr[i]; if (cur != ref) { - std::cout << "error at 0x" << std::hex << (buf_ptr + i) + std::cout << "error at value " << i << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; ++errors; } @@ -150,23 +150,39 @@ int main(int argc, char *argv[]) { RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); kernel_arg.dst_ptr = value; + std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; + std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + // allocate shared memory std::cout << "allocate shared memory" << std::endl; uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer)); - // populate source buffer values - std::cout << "populate source buffer values" << std::endl; + // populate source buffer0 values + std::cout << "populate source buffer0 values" << std::endl; { auto buf_ptr = (int*)vx_host_ptr(buffer); for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i; + buf_ptr[i] = i-1; } } - // upload source buffers - std::cout << "upload source buffers" << std::endl; + // upload source buffer0 + std::cout << "upload source buffer0" << std::endl; RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0)); + + // populate source buffer1 values + std::cout << "populate source buffer1 values" << std::endl; + { + auto buf_ptr = (int*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = i+1; + } + } + + // upload source buffer1 + std::cout << "upload source buffer1" << std::endl; RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0)); // upload kernel argument diff --git a/hw/Makefile b/hw/Makefile index be559a68..789e1628 100644 --- a/hw/Makefile +++ b/hw/Makefile @@ -4,8 +4,8 @@ CF += -std=c++11 -fms-extensions VF += --language 1800-2009 --assert -Wall -Wpedantic VF += -Wno-DECLFILENAME -VF += --x-initial unique - +VF += --x-initial unique +VF += --x-assign unique VF += -exe $(SRCS) $(INCLUDE) #MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4 diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 1d0cbdc7..e86dd071 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -1,6 +1,16 @@ +`ifndef NOPAE `include "platform_if.vh" import local_mem_cfg_pkg::*; `include "afu_json_info.vh" +`include "VX_define.vh" +`else +`include "vortex_afu.vh" +/* verilator lint_off IMPORTSTAR */ +import ccip_if_pkg::*; +import local_mem_cfg_pkg::*; +/* verilator lint_on IMPORTSTAR */ +`endif + `include "VX_define.vh" `define VX_TO_DRAM_ADDR(x) x[`VX_DRAM_ADDR_WIDTH-1:(`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH)] @@ -93,55 +103,68 @@ logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; logic vx_snp_req_ready; logic vx_snp_rsp_valid; +`DEBUG_BEGIN logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; +`DEBUG_END logic vx_snp_rsp_ready; +logic vx_reset; logic vx_busy; // AVS Queues ///////////////////////////////////////////////////////////////// logic avs_rtq_push; logic avs_rtq_pop; +`DEBUG_BEGIN logic avs_rtq_empty; logic avs_rtq_full; +`DEBUG_BEGIN logic avs_rdq_push; logic avs_rdq_pop; t_local_mem_data avs_rdq_dout; logic avs_rdq_empty; +`DEBUG_BEGIN logic avs_rdq_full; +`DEBUG_END // CSR variables ////////////////////////////////////////////////////////////// -logic [2:0] csr_cmd; -t_ccip_clAddr csr_io_addr; -t_local_mem_addr csr_mem_addr; -t_ccip_clAddr csr_data_size; +logic [2:0] csr_cmd; +t_ccip_clAddr csr_io_addr; +logic[DRAM_ADDR_WIDTH-1:0] csr_mem_addr; +logic[DRAM_ADDR_WIDTH-1:0] csr_data_size; // MMIO controller //////////////////////////////////////////////////////////// -t_ccip_c0_ReqMmioHdr mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); +`IGNORE_WARNINGS_BEGIN +t_ccip_c0_ReqMmioHdr mmio_hdr; +`IGNORE_WARNINGS_END +assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); + +t_if_ccip_c2_Tx mmio_tx; +assign af2cp_sTxPort.c2 = mmio_tx; always_ff @(posedge clk) begin if (SoftReset) begin - af2cp_sTxPort.c2.hdr <= 0; - af2cp_sTxPort.c2.data <= 0; - af2cp_sTxPort.c2.mmioRdValid <= 0; - csr_cmd <= 0; - csr_io_addr <= 0; - csr_mem_addr <= 0; - csr_data_size <= 0; + mmio_tx.hdr <= 0; + mmio_tx.data <= 0; + mmio_tx.mmioRdValid <= 0; + csr_cmd <= 0; + csr_io_addr <= 0; + csr_mem_addr <= 0; + csr_data_size <= 0; end else begin csr_cmd <= 0; - af2cp_sTxPort.c2.mmioRdValid <= 0; + mmio_tx.mmioRdValid <= 0; // serve MMIO write request if (cp2af_sRxPort.c0.mmioWrValid) begin - case (mmioHdr.address) + case (mmio_hdr.address) MMIO_CSR_IO_ADDR: begin csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); `ifdef DBG_PRINT_OPAE @@ -168,7 +191,7 @@ begin end default: begin // user-defined CSRs - //if (mmioHdr.addres >= MMIO_CSR_USER) begin + //if (mmio_hdr.addres >= MMIO_CSR_USER) begin // write Vortex CRS //end end @@ -177,10 +200,10 @@ begin // serve MMIO read requests if (cp2af_sRxPort.c0.mmioRdValid) begin - af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID - case (mmioHdr.address) + mmio_tx.hdr.tid <= mmio_hdr.tid; // copy TID + case (mmio_hdr.address) // AFU header - 16'h0000: af2cp_sTxPort.c2.data <= { + 16'h0000: mmio_tx.data <= { 4'b0001, // Feature type = AFU 8'b0, // reserved 4'b0, // afu minor revision = 0 @@ -190,37 +213,31 @@ begin 4'b0, // afu major revision = 0 12'b0 // feature ID = 0 }; - AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low - AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi - 16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU - 16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved + AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low + AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi + 16'h0006: mmio_tx.data <= 64'h0; // next AFU + 16'h0008: mmio_tx.data <= 64'h0; // reserved MMIO_CSR_STATUS: begin `ifdef DBG_PRINT_OPAE - if (state != af2cp_sTxPort.c2.data) begin + if (state != mmio_tx.data) begin $display("%t: STATUS: state=%0d", $time, state); end `endif - af2cp_sTxPort.c2.data <= state; + mmio_tx.data <= {60'b0, state}; end - default: af2cp_sTxPort.c2.data <= 64'h0; + default: mmio_tx.data <= 64'h0; endcase - af2cp_sTxPort.c2.mmioRdValid <= 1; // post response + mmio_tx.mmioRdValid <= 1; // post response end end end // COMMAND FSM //////////////////////////////////////////////////////////////// -t_ccip_clAddr cci_wr_req_ctr; -logic [DRAM_ADDR_WIDTH-1:0] avs_rd_req_ctr; -logic [DRAM_ADDR_WIDTH-1:0] avs_wr_req_ctr; -logic vx_reset; - logic cmd_read_done; logic cmd_write_done; logic cmd_clflush_done; - -logic cmd_run_done = !vx_busy; +logic cmd_run_done; always_ff @(posedge clk) begin @@ -260,6 +277,9 @@ begin `endif state <= STATE_CLFLUSH; end + default: begin + state <= state; + end endcase end @@ -291,6 +311,10 @@ begin end end + default: begin + state <= state; + end + endcase end end @@ -304,7 +328,9 @@ t_cci_rdq_data cci_rdq_dout; logic cci_dram_rd_req_fire; logic cci_dram_wr_req_fire; logic vx_dram_rd_req_fire; +`DEBUG_BEGIN logic vx_dram_wr_req_fire; +`DEBUG_END logic vx_dram_rd_rsp_fire; t_local_mem_byte_mask vx_dram_req_byteen_; @@ -315,15 +341,17 @@ logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; logic cci_dram_rd_req_enable, cci_dram_wr_req_enable; logic vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable; +logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr; + assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state); assign cci_dram_rd_req_enable = (state == STATE_READ) && (avs_pending_reads < AVS_RD_QUEUE_SIZE) - && (avs_rd_req_ctr != 0); + && (cci_dram_rd_req_ctr != 0); assign cci_dram_wr_req_enable = (state == STATE_WRITE) && !cci_rdq_empty - && (avs_wr_req_ctr != 0); + && (cci_dram_wr_req_ctr < csr_data_size); assign vx_dram_req_enable = vortex_enabled && (avs_pending_reads < AVS_RD_QUEUE_SIZE); assign vx_dram_rd_req_enable = vx_dram_req_enable && vx_dram_req_valid && ~vx_dram_req_rw; @@ -338,24 +366,22 @@ assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && ~avs_waitrequest; assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready; assign avs_pending_reads_next = avs_pending_reads - + ((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && ~avs_rdq_pop) ? 1 : - (~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0; - -assign cmd_write_done = (0 == avs_wr_req_ctr); + + (((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && ~avs_rdq_pop) ? 1 : + (~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0); if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin - assign vx_dram_req_offset = {{VX_DRAM_LINE_LW{1'b0}}, vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]} << VX_DRAM_LINE_LW; - assign vx_dram_req_byteen_ = vx_dram_req_byteen << ({(VX_DRAM_LINE_LW - 3)'(0), vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]} << (VX_DRAM_LINE_LW - 3)); + assign vx_dram_req_offset = ((DRAM_LINE_LW)'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0])) << VX_DRAM_LINE_LW; + assign vx_dram_req_byteen_ = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]) << (VX_DRAM_LINE_LW - 3)); end else begin assign vx_dram_req_offset = 0; - assign vx_dram_req_byteen_ = 64'hffffffffffffffff; + assign vx_dram_req_byteen_ = vx_dram_req_byteen; end always_comb begin case (state) CMD_TYPE_READ: avs_address = cci_dram_rd_req_addr; - CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr; + CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout))); default: avs_address = `VX_TO_DRAM_ADDR(vx_dram_req_addr); endcase @@ -367,51 +393,53 @@ begin case (state) CMD_TYPE_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)]; - default: avs_writedata = vx_dram_req_data << vx_dram_req_offset; + default: avs_writedata = (DRAM_LINE_WIDTH)'(vx_dram_req_data) << vx_dram_req_offset; endcase end assign avs_read = cci_dram_rd_req_enable || vx_dram_rd_req_enable; assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable; +assign cmd_write_done = (cci_dram_wr_req_ctr >= csr_data_size); + always_ff @(posedge clk) begin if (SoftReset) begin mem_bank_select <= 0; avs_burstcount <= 1; - avs_rd_req_ctr <= 0; - avs_wr_req_ctr <= 0; - avs_pending_reads <= 0; cci_dram_rd_req_addr <= 0; cci_dram_wr_req_addr <= 0; + cci_dram_rd_req_ctr <= 0; + cci_dram_wr_req_ctr <= 0; + avs_pending_reads <= 0; end else begin if (state == STATE_IDLE) begin if (CMD_TYPE_READ == csr_cmd) begin cci_dram_rd_req_addr <= csr_mem_addr; - avs_rd_req_ctr <= csr_data_size; + cci_dram_rd_req_ctr <= csr_data_size; end else if (CMD_TYPE_WRITE == csr_cmd) begin cci_dram_wr_req_addr <= csr_mem_addr; - avs_wr_req_ctr <= csr_data_size; + cci_dram_wr_req_ctr <= 0; end end if (cci_dram_rd_req_fire) begin cci_dram_rd_req_addr <= cci_dram_rd_req_addr + 1; - avs_rd_req_ctr <= avs_rd_req_ctr - 1; + cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - 1; `ifdef DBG_PRINT_OPAE - $display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (avs_rd_req_ctr - 1), avs_pending_reads_next); + $display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (cci_dram_rd_req_ctr - 1), avs_pending_reads_next); `endif end if (cci_dram_wr_req_fire) begin - cci_dram_wr_req_addr <= ((cci_dram_wr_req_addr + 1) & ~(CCI_RD_WINDOW_SIZE-1)) | t_cci_rdq_tag'(cci_rdq_dout); - avs_wr_req_ctr <= avs_wr_req_ctr - 1; + cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == (DRAM_ADDR_WIDTH)'(CCI_RD_WINDOW_SIZE-1)) ? (DRAM_ADDR_WIDTH)'(CCI_RD_WINDOW_SIZE) : 0); + cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + 1; `ifdef DBG_PRINT_OPAE - $display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (avs_wr_req_ctr - 1)); + $display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1)); `endif end @@ -441,7 +469,7 @@ assign vx_dram_req_ready = vx_dram_req_enable && !avs_waitrequest; assign vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty; if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin - assign vx_dram_rsp_data = (avs_rdq_dout >> vx_dram_rsp_offset); + assign vx_dram_rsp_data = (`VX_DRAM_LINE_WIDTH)'(avs_rdq_dout >> vx_dram_rsp_offset); end else begin assign vx_dram_rsp_data = avs_rdq_dout; end @@ -462,7 +490,8 @@ VX_generic_queue #( .pop (avs_rtq_pop), .data_out ({vx_dram_rsp_tag, vx_dram_rsp_offset}), .empty (avs_rtq_empty), - .full (avs_rtq_full) + .full (avs_rtq_full), + `UNUSED_PIN (size) ); // AVS data read response queue /////////////////////////////////////////////// @@ -483,25 +512,27 @@ VX_generic_queue #( .pop (avs_rdq_pop), .data_out (avs_rdq_dout), .empty (avs_rdq_empty), - .full (avs_rdq_full) + .full (avs_rdq_full), + `UNUSED_PIN (size) ); // CCI-P Read Request /////////////////////////////////////////////////////////// logic [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads, cci_pending_reads_next; -t_ccip_clAddr cci_rd_req_addr, cci_rd_req_ctr, cci_rd_req_ctr_next; +logic [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr, cci_rd_req_ctr_next; +t_ccip_clAddr cci_rd_req_addr; t_cci_rdq_tag cci_rd_rsp_ctr; logic cci_rd_req_fire, cci_rd_rsp_fire; logic cci_rd_req_enable, cci_rd_req_wait; -logic cci_rdq_full, cci_rdq_push, cci_rdq_pop; +logic cci_rdq_push, cci_rdq_pop; t_cci_rdq_data cci_rdq_din; always_comb begin af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0); af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr; - af2cp_sTxPort.c0.hdr.mdata = t_cci_rdq_tag'(cci_rd_req_ctr); + af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(t_cci_rdq_tag'(cci_rd_req_ctr)); end assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull; @@ -514,8 +545,8 @@ assign cci_rdq_push = cci_rd_rsp_fire; assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)}; assign cci_pending_reads_next = cci_pending_reads - + (cci_rd_req_fire && ~cci_rdq_pop) ? 1 : - (~cci_rd_req_fire && cci_rdq_pop) ? -1 : 0; + + ((cci_rd_req_fire && ~cci_rdq_pop) ? 1 : + (~cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && ~cci_rd_req_wait; @@ -549,7 +580,7 @@ begin if (cci_rd_req_fire) begin cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr_next; - if (t_cci_rdq_tag'(cci_rd_req_ctr) == (CCI_RD_WINDOW_SIZE-1)) begin + if (t_cci_rdq_tag'(cci_rd_req_ctr) == t_cci_rdq_tag'(CCI_RD_WINDOW_SIZE-1)) begin cci_rd_req_wait <= 1; // end current request batch end `ifdef DBG_PRINT_OPAE @@ -559,7 +590,7 @@ begin if (cci_rd_rsp_fire) begin cci_rd_rsp_ctr <= cci_rd_rsp_ctr + 1; - if (cci_rd_rsp_ctr == (CCI_RD_WINDOW_SIZE-1)) begin + if (cci_rd_rsp_ctr == t_cci_rdq_tag'(CCI_RD_WINDOW_SIZE-1)) begin cci_rd_req_wait <= 0; // restart new request batch end `ifdef DBG_PRINT_OPAE @@ -589,12 +620,14 @@ VX_generic_queue #( .pop (cci_rdq_pop), .data_out (cci_rdq_dout), .empty (cci_rdq_empty), - .full (cci_rdq_full) + `UNUSED_PIN (full), + `UNUSED_PIN (size) ); // CCI-P Write Request ////////////////////////////////////////////////////////// logic [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes, cci_pending_writes_next; +logic [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; t_ccip_clAddr cci_wr_req_addr; logic cci_wr_req_enable, cci_wr_rsp_fire; @@ -609,8 +642,8 @@ assign cci_wr_req_fire = af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull; assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; assign cci_pending_writes_next = cci_pending_writes - + (cci_wr_req_fire && ~cci_wr_rsp_fire) ? 1 : - (~cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0; + + ((cci_wr_req_fire && ~cci_wr_rsp_fire) ? 1 : + (~cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); @@ -660,7 +693,8 @@ end logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size; logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr; +logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_req_ctr_next; +logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr, snp_rsp_ctr_next; logic vx_snp_req_fire, vx_snp_rsp_fire; @@ -674,6 +708,10 @@ end assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready; assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready; + +assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + 1) : snp_req_ctr; +assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - 1) : snp_rsp_ctr; + assign cmd_clflush_done = (0 == snp_rsp_ctr); always_ff @(posedge clk) @@ -691,38 +729,40 @@ begin if ((STATE_IDLE == state) && (CMD_TYPE_CLFLUSH == csr_cmd)) begin vx_snp_req_addr <= snp_req_baseaddr; - snp_req_ctr <= snp_req_size; + vx_snp_req_tag <= 0; + snp_req_ctr <= 0; snp_rsp_ctr <= snp_req_size; vx_snp_req_valid <= (snp_req_size != 0); vx_snp_rsp_ready <= (snp_req_size != 0); end if ((STATE_CLFLUSH == state) - && (0 == snp_rsp_ctr)) begin - vx_snp_rsp_ready <= 0; + && (snp_req_ctr_next >= snp_req_size)) begin + vx_snp_req_valid <= 0; end if ((STATE_CLFLUSH == state) - && (0 == snp_req_ctr)) begin - vx_snp_req_valid <= 0; + && (0 == snp_rsp_ctr_next)) begin + vx_snp_rsp_ready <= 0; end if (vx_snp_req_fire) begin + assert(snp_req_ctr < snp_req_size); vx_snp_req_addr <= vx_snp_req_addr + 1; - vx_snp_req_tag <= snp_req_ctr[`VX_SNP_TAG_WIDTH-1:0]; - snp_req_ctr <= snp_req_ctr - 1; + vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next); + snp_req_ctr <= snp_req_ctr_next; `ifdef DBG_PRINT_OPAE - $display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), vx_snp_req_tag, (snp_req_ctr - 1)); + $display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next), (snp_req_size - snp_req_ctr_next)); `endif end if ((STATE_CLFLUSH == state) && vx_snp_rsp_fire) begin assert(snp_rsp_ctr != 0); - snp_rsp_ctr <= snp_rsp_ctr - 1; + snp_rsp_ctr <= snp_rsp_ctr_next; `ifdef DBG_PRINT_OPAE - $display("%t: AFU Snp Rsp: tag=%0d, rem=%0d", $time, vx_snp_rsp_tag, (snp_rsp_ctr - 1)); + $display("%t: AFU Snp Rsp: tag=%0d, rem=%0d", $time, vx_snp_rsp_tag, snp_rsp_ctr_next); `endif end end @@ -730,6 +770,8 @@ end // Vortex binding ///////////////////////////////////////////////////////////// +assign cmd_run_done = !vx_busy; + Vortex_Socket #() vx_socket ( .clk (clk), .reset (vx_reset), @@ -761,23 +803,23 @@ Vortex_Socket #() vx_socket ( .snp_rsp_ready (vx_snp_rsp_ready), // I/O request - .io_req_valid (), - .io_req_rw (), - .io_req_byteen (), - .io_req_addr (), - .io_req_data (), - .io_req_tag (), + `UNUSED_PIN (io_req_valid), + `UNUSED_PIN (io_req_rw), + `UNUSED_PIN (io_req_byteen), + `UNUSED_PIN (io_req_addr), + `UNUSED_PIN (io_req_data), + `UNUSED_PIN (io_req_tag), .io_req_ready (1), // I/O response .io_rsp_valid (0), .io_rsp_data (0), .io_rsp_tag (0), - .io_rsp_ready (), + `UNUSED_PIN (io_rsp_ready), // status .busy (vx_busy), - .ebreak () + `UNUSED_PIN (ebreak) ); -endmodule +endmodule \ No newline at end of file diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index cd4c5029..45033d57 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -48,6 +48,7 @@ `define CLOG2(x) $clog2(x) `define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0)) `define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1) +`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1)))) `define MIN(x, y) ((x < y) ? (x) : (y)) `define MAX(x, y) ((x > y) ? (x) : (y)) diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 8b460b96..4a387d81 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -70,7 +70,7 @@ module VX_lsu_unit #( for (i = 0; i < `NUM_THREADS; ++i) begin assign mem_req_addr[i] = use_address[i][31:2]; - assign mem_req_offset[i] = {3'b0, use_address[i][1:0]} << 3; + assign mem_req_offset[i] = (5'(use_address[i][1:0])) << 3; assign mem_req_byteen[i] = (wmask << use_address[i][1:0]); assign mem_req_data[i] = (use_store_data[i] << mem_req_offset[i]); end diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 6861eca8..e285ab74 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -123,7 +123,7 @@ module Vortex #( assign io_req_tag = io_core_req_if.core_req_tag[0]; assign io_core_req_if.core_req_ready = io_req_ready; - assign io_core_rsp_if.core_rsp_valid = {{`NUM_THREADS-1{1'b0}}, io_rsp_valid}; + assign io_core_rsp_if.core_rsp_valid = {{(`NUM_THREADS-1){1'b0}}, io_rsp_valid}; assign io_core_rsp_if.core_rsp_data[0] = io_rsp_data; assign io_core_rsp_if.core_rsp_tag = io_rsp_tag; assign io_rsp_ready = io_core_rsp_if.core_rsp_ready; diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 5485ce18..f0280aff 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -718,12 +718,12 @@ module VX_bank #( `ifdef DBG_PRINT_CACHE_BANK if (NUM_BANKS == 1) begin always_ff @(posedge clk) begin - /*if (core_req_valid && core_req_ready) begin + if (core_req_valid && core_req_ready) begin $display("%t: bank%01d%01d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(core_req_addr), core_req_tag); end if (core_rsp_valid && core_rsp_ready) begin $display("%t: bank%01d%01d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); - end*/ + end if (dram_fill_req_valid && dram_fill_req_ready) begin $display("%t: bank%01d%01d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_req_addr)); end @@ -733,21 +733,21 @@ module VX_bank #( if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin $display("%t: bank%01d%01d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_rsp_addr), dram_fill_rsp_data); end - /*if (snp_req_valid && snp_req_ready) begin + if (snp_req_valid && snp_req_ready) begin $display("%t: bank%01d%01d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(snp_req_addr), snp_req_tag); end if (snp_rsp_valid && snp_rsp_ready) begin $display("%t: bank%01d%01d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); - end*/ + end end end else begin always_ff @(posedge clk) begin - /*if ((|core_req_valid) && core_req_ready) begin + if ((|core_req_valid) && core_req_ready) begin $display("%t: bank%01d%01d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag); end if (core_rsp_valid && core_rsp_ready) begin $display("%t: bank%01d%01d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); - end*/ + end if (dram_fill_req_valid && dram_fill_req_ready) begin $display("%t: bank%01d%01d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID)); end @@ -757,12 +757,12 @@ module VX_bank #( if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin $display("%t: bank%01d%01d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data); end - /*if (snp_req_valid && snp_req_ready) begin + if (snp_req_valid && snp_req_ready) begin $display("%t: bank%01d%01d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_tag); end if (snp_rsp_valid && snp_rsp_ready) begin $display("%t: bank%01d%01d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); - end*/ + end end end `endif diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index c72a5c8b..27fd9e76 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -46,6 +46,7 @@ module VX_cache_core_rsp_merge #( assign core_rsp_tag = per_bank_core_rsp_tag[main_bank_index]; always @(*) begin core_rsp_valid = 0; + core_rsp_data = 0; for (i = 0; i < NUM_BANKS; i++) begin if (per_bank_core_rsp_valid[i] && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin @@ -60,6 +61,8 @@ module VX_cache_core_rsp_merge #( end else begin always @(*) begin core_rsp_valid = 0; + core_rsp_data = 0; + core_rsp_tag = 0; for (i = 0; i < NUM_BANKS; i++) begin if (per_bank_core_rsp_valid[i] && !core_rsp_valid[per_bank_core_rsp_tid[i]] diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 2d8570dc..fe8ac9ba 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -97,7 +97,7 @@ module VX_snp_forwarder #( always @(posedge clk) begin if (reset) begin fwdin_sel <= 0; - end else begin + end else if (NUM_REQUESTS > 1) begin fwdin_sel <= fwdin_sel + 1; end end diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index 37ad7bc9..97397e1c 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -136,7 +136,7 @@ module VX_tag_data_access #( end assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || ~DRAM_ENABLE; // If shared memory, always valid - assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE; // Dirty only applies in Dcache + assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writetag_st1e; // Tag is always the same in SM assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1]; assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1]; diff --git a/hw/rtl/cache/VX_tag_data_structure.v b/hw/rtl/cache/VX_tag_data_structure.v index b76ef266..1d32f37d 100644 --- a/hw/rtl/cache/VX_tag_data_structure.v +++ b/hw/rtl/cache/VX_tag_data_structure.v @@ -33,7 +33,7 @@ module VX_tag_data_structure #( reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0][7:0] data [`BANK_LINE_COUNT-1:0]; reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0]; reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0]; - reg dirty[`BANK_LINE_COUNT-1:0]; + reg [`BANK_LINE_COUNT-1:0] dirty; reg [`BANK_LINE_COUNT-1:0] valid; assign read_valid = valid [read_addr]; @@ -49,6 +49,7 @@ module VX_tag_data_structure #( if (reset) begin for (i = 0; i < `BANK_LINE_COUNT; i++) begin valid[i] <= 0; + dirty[i] <= 0; end end else if (!stall_bank_pipe) begin if (do_write) begin diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 40f130bb..bb7f4639 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -15,6 +15,8 @@ module VX_generic_queue #( output wire full, output wire [`LOG2UP(SIZE+1)-1:0] size ); + `STATIC_ASSERT(0 == SIZE || `ISPOW2(SIZE), "must be 0 or power of 2!"); + if (SIZE == 0) begin assign empty = 1; @@ -88,6 +90,7 @@ module VX_generic_queue #( if (writing) begin data[wr_ptr_a] <= data_in; wr_ptr_r <= wr_ptr_r + 1; + if (!reading) begin size_r <= size_r + 1; end @@ -120,16 +123,17 @@ module VX_generic_queue #( always @(posedge clk) begin if (reset) begin - size_r <= 0; - empty_r <= 1; - full_r <= 0; wr_ptr_r <= 0; rd_ptr_r <= 0; rd_ptr_next_r <= 1; + empty_r <= 1; + full_r <= 0; + size_r <= 0; end else begin if (writing) begin data[wr_ptr_r] <= data_in; wr_ptr_r <= wr_ptr_r + 1; + if (!reading) begin empty_r <= 0; if (size_r == SIZE-1) begin @@ -140,15 +144,17 @@ module VX_generic_queue #( end if (reading) begin - rd_ptr_r <= rd_ptr_next_r; - if (SIZE == 2) begin - rd_ptr_next_r <= ~rd_ptr_next_r; - end else if (SIZE > 2) begin + rd_ptr_r <= rd_ptr_next_r; + + if (SIZE > 2) begin rd_ptr_next_r <= rd_ptr_r + 2; + end else begin // (SIZE == 2); + rd_ptr_next_r <= ~rd_ptr_next_r; end if (!writing) begin if (size_r == 1) begin + assert(rd_ptr_next_r == wr_ptr_r); empty_r <= 1; end; full_r <= 0; @@ -156,7 +162,9 @@ module VX_generic_queue #( end end - bypass_r <= writing && (empty_r || (1 == size_r) && reading); + bypass_r <= writing + && (empty_r || ((1 == size_r) && reading)); // empty or about to go empty + curr_r <= data_in; head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r]; end diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index f8314a34..a02c820a 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -10,9 +10,8 @@ double sc_time_stamp() { } Simulator::Simulator() { - // force random values for unitialized signals - const char* args[] = {"", "+verilator+rand+reset+2", "+verilator+seed+50"}; - Verilated::commandArgs(3, args); + // force random values for unitialized signals + Verilated::randReset(2); ram_ = nullptr; vortex_ = new VVortex_Socket(); From 9b186dcc6e4e4e3a86e0e3e1b1020a07c042ba0c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 2 Jun 2020 05:32:50 -0700 Subject: [PATCH 2/3] fixed L2 cache --- hw/opae/README | 2 +- hw/opae/sources.txt | 15 ++++++------ hw/rtl/cache/VX_bank.v | 38 +++++++++++++++++------------- hw/rtl/cache/VX_cache_config.vh | 6 ++--- hw/rtl/cache/VX_cache_miss_resrv.v | 4 ++-- hw/rtl/cache/VX_tag_data_access.v | 18 ++++++++------ 6 files changed, 46 insertions(+), 37 deletions(-) diff --git a/hw/opae/README b/hw/opae/README index ffcf562b..d5aadd97 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -82,4 +82,4 @@ ps -u tinebp kill -9 # fixing device resource busy issue when deleting /build_ase/ -lsof +D build_ase \ No newline at end of file +- \ No newline at end of file diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 83a93aca..7b75973f 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -7,6 +7,7 @@ vortex_afu.json +define+NUM_CORES=2 +define+NUM_WARPS=4 +define+NUM_THREADS=4 ++define+L2_ENABLE=0 +define+DNUM_BANKS=4 +define+INUM_BANKS=1 @@ -16,13 +17,13 @@ vortex_afu.json +define+IDFPQ_SIZE=16 +define+SDFPQ_SIZE=0 -+define+DBG_PRINT_CORE_ICACHE -+define+DBG_PRINT_CORE_DCACHE -+define+DBG_PRINT_CACHE_BANK -+define+DBG_PRINT_CACHE_SNP -+define+DBG_PRINT_CACHE_MSRQ -+define+DBG_PRINT_DRAM -+define+DBG_PRINT_OPAE +#+define+DBG_PRINT_CORE_ICACHE +#+define+DBG_PRINT_CORE_DCACHE +#+define+DBG_PRINT_CACHE_BANK +#+define+DBG_PRINT_CACHE_SNP +#+define+DBG_PRINT_CACHE_MSRQ +#+define+DBG_PRINT_DRAM +#+define+DBG_PRINT_OPAE +incdir+. +incdir+../rtl diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index f0280aff..1ff01d33 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -230,7 +230,7 @@ module VX_bank #( wire mrvq_valid_st0; wire[`REQS_BITS-1:0] mrvq_tid_st0; wire [`LINE_ADDR_WIDTH-1:0] mrvq_addr_st0; - wire [`WORD_SELECT_WIDTH-1:0] mrvq_wsel_st0; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] mrvq_wsel_st0; wire [`WORD_WIDTH-1:0] mrvq_writeword_st0; wire [`REQ_TAG_WIDTH-1:0] mrvq_tag_st0; wire mrvq_rw_st0; @@ -287,7 +287,7 @@ module VX_bank #( wire qual_is_fill_st0; wire qual_valid_st0; wire [`LINE_ADDR_WIDTH-1:0] qual_addr_st0; - wire [`WORD_SELECT_WIDTH-1:0] qual_wsel_st0; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] qual_wsel_st0; wire qual_from_mrvq_st0; wire [`WORD_WIDTH-1:0] qual_writeword_st0; @@ -298,7 +298,7 @@ module VX_bank #( wire valid_st1 [STAGE_1_CYCLES-1:0]; wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0]; - wire [`WORD_SELECT_WIDTH-1:0] wsel_st1 [STAGE_1_CYCLES-1:0]; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1 [STAGE_1_CYCLES-1:0]; wire [`WORD_WIDTH-1:0] writeword_st1 [STAGE_1_CYCLES-1:0]; wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0]; wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0]; @@ -313,18 +313,22 @@ module VX_bank #( mrvq_pop_unqual ? mrvq_addr_st0 : reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : snrq_pop_unqual ? snrq_addr_st0 : - 0; - - assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] : - mrvq_pop_unqual ? mrvq_wsel_st0 : - 0; + 0; + if (`WORD_SELECT_WIDTH != 0) begin + assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] : + mrvq_pop_unqual ? mrvq_wsel_st0 : + 0; + end else begin + `UNUSED_VAR(mrvq_wsel_st0) + assign qual_wsel_st0 = 0; + end assign qual_writedata_st0 = dfpq_pop_unqual ? dfpq_filldata_st0 : 57; assign qual_inst_meta_st0 = mrvq_pop_unqual ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_rw_st0, mrvq_byteen_st0, mrvq_tid_st0} : reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_rw_st0, reqq_req_byteen_st0, reqq_req_tid_st0} : snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} : - 0; + 0; assign qual_going_to_write_st0 = dfpq_pop_unqual ? 1 : (mrvq_pop_unqual && mrvq_rw_st0) ? 1 : @@ -333,11 +337,11 @@ module VX_bank #( assign qual_is_snp_st0 = mrvq_pop_unqual ? mrvq_is_snp_st0 : snrq_pop_unqual ? 1 : - 0; + 0; assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 : reqq_pop_unqual ? reqq_req_writeword_st0 : - 0; + 0; assign qual_from_mrvq_st0 = mrvq_pop_unqual; @@ -348,7 +352,7 @@ module VX_bank #( ) VX_generic_register #( - .N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `WORD_SELECT_WIDTH + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) + .N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) ) s0_1_c0 ( .clk (clk), .reset (reset), @@ -361,7 +365,7 @@ module VX_bank #( genvar i; for (i = 1; i < STAGE_1_CYCLES; i++) begin VX_generic_register #( - .N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `WORD_SELECT_WIDTH + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) + .N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) ) s0_1_cc ( .clk (clk), .reset(reset), @@ -428,7 +432,7 @@ module VX_bank #( .valid_req_st1e (valid_st1e), .writefill_st1e (is_fill_st1[STAGE_1_CYCLES-1]), .writeaddr_st1e (addr_st1[STAGE_1_CYCLES-1]), - .writewsel_st1e (wsel_st1[STAGE_1_CYCLES-1]), + .wordsel_st1e (wsel_st1[STAGE_1_CYCLES-1]), .writeword_st1e (writeword_st1[STAGE_1_CYCLES-1]), .writedata_st1e (writedata_st1[STAGE_1_CYCLES-1]), @@ -458,7 +462,7 @@ module VX_bank #( wire from_mrvq_st1e_st2 = from_mrvq_st1e && !is_snp_st1e; wire valid_st2; - wire [`WORD_SELECT_WIDTH-1:0] wsel_st2; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2; wire [`WORD_WIDTH-1:0] writeword_st2; wire [`WORD_WIDTH-1:0] readword_st2; wire [`BANK_LINE_WIDTH-1:0] readdata_st2; @@ -478,7 +482,7 @@ module VX_bank #( wire mrvq_init_ready_state_hazard_st1e_st1; VX_generic_register #( - .N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `WORD_SELECT_WIDTH + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH) + .N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH) ) st_1e_2 ( .clk (clk), .reset(reset), @@ -512,7 +516,7 @@ module VX_bank #( assign recover_mrvq_state_st2 = miss_add && from_mrvq_st2; wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2; - wire [`WORD_SELECT_WIDTH-1:0] miss_add_wsel = wsel_st2; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel = wsel_st2; wire [`WORD_WIDTH-1:0] miss_add_data = writeword_st2; assign {miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_tid} = inst_meta_st2; wire miss_add_is_snp = is_snp_st2; diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index 52af265e..7d163a73 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -8,8 +8,8 @@ // tag rw byteen tid `define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS) -// data metadata word_sel is_snp -`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `WORD_SELECT_WIDTH + 1) +// data metadata word_sel is_snp +`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1) `define REQS_BITS `LOG2UP(NUM_REQUESTS) @@ -48,7 +48,7 @@ `define TAG_SELECT_ADDR_START (1+`LINE_SELECT_ADDR_END) `define TAG_SELECT_ADDR_END 31 -`define WORD_SELECT_WIDTH `LOG2UP(`BANK_LINE_WORDS) +`define WORD_SELECT_WIDTH `CLOG2(`BANK_LINE_WORDS) `define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE)) diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 260b0098..23631381 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -25,7 +25,7 @@ module VX_cache_miss_resrv #( input wire miss_add, input wire from_mrvq, input wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr, - input wire[`WORD_SELECT_WIDTH-1:0] miss_add_wsel, + input wire[`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel, input wire[`WORD_WIDTH-1:0] miss_add_data, input wire[`REQS_BITS-1:0] miss_add_tid, input wire[`REQ_TAG_WIDTH-1:0] miss_add_tag, @@ -46,7 +46,7 @@ module VX_cache_miss_resrv #( input wire miss_resrv_pop, output wire miss_resrv_valid_st0, output wire[`LINE_ADDR_WIDTH-1:0] miss_resrv_addr_st0, - output wire[`WORD_SELECT_WIDTH-1:0] miss_resrv_wsel_st0, + output wire[`UP(`WORD_SELECT_WIDTH)-1:0] miss_resrv_wsel_st0, output wire[`WORD_WIDTH-1:0] miss_resrv_data_st0, output wire[`REQS_BITS-1:0] miss_resrv_tid_st0, output wire[`REQ_TAG_WIDTH-1:0] miss_resrv_tag_st0, diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index 97397e1c..00279c0f 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -38,7 +38,7 @@ module VX_tag_data_access #( `IGNORE_WARNINGS_BEGIN input wire mem_rw_st1e, input wire[WORD_SIZE-1:0] mem_byteen_st1e, - input wire[`WORD_SELECT_WIDTH-1:0] writewsel_st1e, + input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_st1e, `IGNORE_WARNINGS_END output wire[`WORD_WIDTH-1:0] readword_st1e, @@ -141,7 +141,11 @@ module VX_tag_data_access #( assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1]; assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1]; - assign readword_st1e = use_read_data_st1e[writewsel_st1e * `WORD_WIDTH +: `WORD_WIDTH]; + if (`WORD_SELECT_WIDTH != 0) begin + assign readword_st1e = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH]; + end else begin + assign readword_st1e = use_read_data_st1e; + end wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] we; wire [`BANK_LINE_WIDTH-1:0] data_write; @@ -150,15 +154,15 @@ module VX_tag_data_access #( && valid_req_st1e && use_read_valid_st1e && !miss_st1e - && !is_snp_st1e; + && !is_snp_st1e + && !real_writefill; for (i = 0; i < `BANK_LINE_WORDS; i++) begin - wire normal_write = ((writewsel_st1e == `WORD_SELECT_WIDTH'(i)) || (`BANK_LINE_WORDS == 1)) - && should_write - && !real_writefill; + wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i))) + && should_write; assign we[i] = real_writefill ? {WORD_SIZE{1'b1}} : - normal_write ? mem_byteen_st1e: + normal_write ? mem_byteen_st1e : {WORD_SIZE{1'b0}}; assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1e; From 04fc34b8489b556093468a318f2c83d61b076110 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 3 Jun 2020 03:05:45 -0700 Subject: [PATCH 3/3] minor update --- driver/opae/vortex.cpp | 6 +- hw/opae/sources.txt | 2 +- hw/rtl/VX_config.vh | 32 +-- hw/rtl/VX_dmem_ctrl.v | 14 +- hw/rtl/cache/VX_cache.v | 6 +- hw/rtl/cache/VX_cache_miss_resrv.v | 4 +- hw/rtl/libs/VX_generic_queue.v | 260 ++++++++---------- hw/syn/quartus/top/Makefile | 8 +- .../quartus/top/{vortex.sdc => project.sdc} | 0 hw/syn/quartus/vortex/Makefile | 6 +- .../vortex/{vortex.sdc => project.sdc} | 0 hw/syn/quartus/vortex/timing.tcl | 3 +- 12 files changed, 165 insertions(+), 176 deletions(-) rename hw/syn/quartus/top/{vortex.sdc => project.sdc} (100%) rename hw/syn/quartus/vortex/{vortex.sdc => project.sdc} (100%) diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index eba1a7c5..d6f018df 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -246,8 +246,12 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { for (;;) { CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_STATUS, &data)); - if (0 == data || 0 == timeout) + if (0 == data || 0 == timeout) { + if (data != 0) { + fprintf(stdout, "ready-wait timed out: status=%ld\n", data); + } break; + } nanosleep(&sleep_time, nullptr); timeout -= sleep_time_ms; }; diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 7b75973f..664af3ca 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -7,7 +7,7 @@ vortex_afu.json +define+NUM_CORES=2 +define+NUM_WARPS=4 +define+NUM_THREADS=4 -+define+L2_ENABLE=0 ++define+L2_ENABLE=1 +define+DNUM_BANKS=4 +define+INUM_BANKS=1 diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index bee56a25..1c3628e7 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -12,7 +12,7 @@ `endif `ifndef NUM_WARPS -`define NUM_WARPS 8 +`define NUM_WARPS 4 `endif `ifndef NUM_THREADS @@ -87,7 +87,7 @@ // Number of banks {1, 2, 4, 8,...} `ifndef DNUM_BANKS -`define DNUM_BANKS 8 +`define DNUM_BANKS 4 `endif // Size of a word in bytes @@ -107,12 +107,12 @@ // Miss Reserv Queue Knob `ifndef DMRVQ_SIZE -`define DMRVQ_SIZE (`NUM_WARPS*`NUM_THREADS) +`define DMRVQ_SIZE `MAX(`NUM_WARPS*`NUM_THREADS, 8) `endif // Dram Fill Rsp Queue Size `ifndef DDFPQ_SIZE -`define DDFPQ_SIZE 32 +`define DDFPQ_SIZE 16 `endif // Snoop Req Queue Size @@ -137,7 +137,7 @@ // Prefetcher `ifndef DPRFQ_SIZE -`define DPRFQ_SIZE 32 +`define DPRFQ_SIZE 16 `endif `ifndef DPRFQ_STRIDE @@ -178,12 +178,12 @@ // Miss Reserv Queue Knob `ifndef IMRVQ_SIZE -`define IMRVQ_SIZE `ICREQ_SIZE +`define IMRVQ_SIZE `MAX(`ICREQ_SIZE, 8) `endif // Dram Fill Rsp Queue Size `ifndef IDFPQ_SIZE -`define IDFPQ_SIZE 32 +`define IDFPQ_SIZE 16 `endif // Core Writeback Queue Size @@ -203,7 +203,7 @@ // Prefetcher `ifndef IPRFQ_SIZE -`define IPRFQ_SIZE 32 +`define IPRFQ_SIZE 16 `endif `ifndef IPRFQ_STRIDE @@ -276,17 +276,17 @@ // Core Request Queue Size `ifndef L2CREQ_SIZE -`define L2CREQ_SIZE 32 +`define L2CREQ_SIZE 16 `endif // Miss Reserv Queue Knob `ifndef L2MRVQ_SIZE -`define L2MRVQ_SIZE 32 +`define L2MRVQ_SIZE `MAX(`L2CREQ_SIZE, 8) `endif // Dram Fill Rsp Queue Size `ifndef L2DFPQ_SIZE -`define L2DFPQ_SIZE 32 +`define L2DFPQ_SIZE 16 `endif // Snoop Req Queue Size @@ -311,7 +311,7 @@ // Prefetcher `ifndef L2PRFQ_SIZE -`define L2PRFQ_SIZE 32 +`define L2PRFQ_SIZE 16 `endif `ifndef L2PRFQ_STRIDE @@ -347,17 +347,17 @@ // Core Request Queue Size `ifndef L3CREQ_SIZE -`define L3CREQ_SIZE 32 +`define L3CREQ_SIZE 16 `endif // Miss Reserv Queue Knob `ifndef L3MRVQ_SIZE -`define L3MRVQ_SIZE `L3CREQ_SIZE +`define L3MRVQ_SIZE `MAX(`L3CREQ_SIZE, 8) `endif // Dram Fill Rsp Queue Size `ifndef L3DFPQ_SIZE -`define L3DFPQ_SIZE 32 +`define L3DFPQ_SIZE 16 `endif // Snoop Req Queue Size @@ -382,7 +382,7 @@ // Prefetcher `ifndef L3PRFQ_SIZE -`define L3PRFQ_SIZE 32 +`define L3PRFQ_SIZE 16 `endif `ifndef L3PRFQ_STRIDE diff --git a/hw/rtl/VX_dmem_ctrl.v b/hw/rtl/VX_dmem_ctrl.v index b7ecf5eb..c34bd055 100644 --- a/hw/rtl/VX_dmem_ctrl.v +++ b/hw/rtl/VX_dmem_ctrl.v @@ -60,13 +60,13 @@ module VX_dmem_ctrl # ( .NUM_REQUESTS (`SNUM_REQUESTS), .STAGE_1_CYCLES (`SSTAGE_1_CYCLES), .CREQ_SIZE (`SCREQ_SIZE), - .MRVQ_SIZE (1), - .DFPQ_SIZE (0), - .SNRQ_SIZE (0), + .MRVQ_SIZE (8), + .DFPQ_SIZE (1), + .SNRQ_SIZE (1), .CWBQ_SIZE (`SCWBQ_SIZE), - .DWBQ_SIZE (0), - .DFQQ_SIZE (0), - .PRFQ_SIZE (0), + .DWBQ_SIZE (1), + .DFQQ_SIZE (1), + .PRFQ_SIZE (1), .PRFQ_STRIDE (0), .SNOOP_FORWARDING (0), .DRAM_ENABLE (0), @@ -223,7 +223,7 @@ module VX_dmem_ctrl # ( .CREQ_SIZE (`ICREQ_SIZE), .MRVQ_SIZE (`IMRVQ_SIZE), .DFPQ_SIZE (`IDFPQ_SIZE), - .SNRQ_SIZE (0), + .SNRQ_SIZE (1), .CWBQ_SIZE (`ICWBQ_SIZE), .DWBQ_SIZE (`IDWBQ_SIZE), .DFQQ_SIZE (`IDFQQ_SIZE), diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 64e31d28..59fbcd2f 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -125,13 +125,13 @@ module VX_cache #( `DEBUG_BLOCK( wire[31:0] debug_core_req_use_pc; - wire[1:0] debug_core_req_wb; - wire[2:0] debug_core_req_rmask; + wire[1:0] debug_core_req_wb; wire[4:0] debug_core_req_rd; wire[`NW_BITS-1:0] debug_core_req_warp_num; + wire[`LOG2UP(CREQ_SIZE)-1:0] debug_core_req_idx; if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin - assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rmask, debug_core_req_rd, debug_core_req_warp_num} = core_req_tag[0]; + assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0]; end ) wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid; diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 23631381..2636b344 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -64,8 +64,10 @@ module VX_cache_miss_resrv #( reg [`LOG2UP(MRVQ_SIZE+1)-1:0] size; + `STATIC_ASSERT(MRVQ_SIZE > 5, "invalid size"); + assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE)); - assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); + assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-1)); wire enqueue_possible = !miss_resrv_full; wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index bb7f4639..5176685c 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -15,166 +15,150 @@ module VX_generic_queue #( output wire full, output wire [`LOG2UP(SIZE+1)-1:0] size ); - `STATIC_ASSERT(0 == SIZE || `ISPOW2(SIZE), "must be 0 or power of 2!"); + `STATIC_ASSERT(`ISPOW2(SIZE), "must be 0 or power of 2!"); - if (SIZE == 0) begin + reg [`LOG2UP(SIZE+1)-1:0] size_r; + wire reading; + wire writing; - assign empty = 1; - assign data_out = 0; - assign full = 0; - assign size = 0; + assign reading = pop && !empty; + assign writing = push && !full; - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - `UNUSED_VAR (push) - `UNUSED_VAR (pop) - `UNUSED_VAR (data_in) + if (SIZE == 1) begin // (SIZE == 1) + + reg [DATAW-1:0] head_r; + + always @(posedge clk) begin + if (reset) begin + head_r <= 0; + size_r <= 0; + end else begin + if (writing && !reading) begin + size_r <= 1; + end else if (reading && !writing) begin + size_r <= 0; + end + + if (writing) begin + head_r <= data_in; + end + end + end + + assign data_out = head_r; + assign empty = (size_r == 0); + assign full = (size_r != 0); + assign size = size_r; + + end else begin // (SIZE > 1) - end else begin // (SIZE > 0) - `ifdef QUEUE_FORCE_MLAB (* syn_ramstyle = "mlab" *) reg [DATAW-1:0] data [SIZE-1:0]; `else reg [DATAW-1:0] data [SIZE-1:0]; `endif - reg [`LOG2UP(SIZE+1)-1:0] size_r; - wire reading; - wire writing; + if (0 == BUFFERED_OUTPUT) begin - assign reading = pop && !empty; - assign writing = push && !full; + reg [`LOG2UP(SIZE):0] wr_ptr_r; + reg [`LOG2UP(SIZE):0] rd_ptr_r; - if (SIZE == 1) begin // (SIZE == 1) - - reg [DATAW-1:0] head_r; + wire [`LOG2UP(SIZE)-1:0] wr_ptr_a = wr_ptr_r[`LOG2UP(SIZE)-1:0]; + wire [`LOG2UP(SIZE)-1:0] rd_ptr_a = rd_ptr_r[`LOG2UP(SIZE)-1:0]; always @(posedge clk) begin if (reset) begin - head_r <= 0; - size_r <= 0; + rd_ptr_r <= 0; + wr_ptr_r <= 0; + size_r <= 0; end else begin - if (writing && !reading) begin - size_r <= 1; - end else if (reading && !writing) begin - size_r <= 0; + if (writing) begin + data[wr_ptr_a] <= data_in; + wr_ptr_r <= wr_ptr_r + 1; + + if (!reading) begin + size_r <= size_r + 1; + end end - if (writing) begin - head_r <= data_in; + if (reading) begin + rd_ptr_r <= rd_ptr_r + 1; + if (!writing) begin + size_r <= size_r - 1; + end end + end + end + + assign data_out = data[rd_ptr_a]; + assign empty = (wr_ptr_r == rd_ptr_r); + assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[`LOG2UP(SIZE)] != rd_ptr_r[`LOG2UP(SIZE)]); + assign size = size_r; + + end else begin + + reg [DATAW-1:0] head_r; + reg [DATAW-1:0] curr_r; + reg [`LOG2UP(SIZE)-1:0] wr_ptr_r; + reg [`LOG2UP(SIZE)-1:0] rd_ptr_r; + reg [`LOG2UP(SIZE)-1:0] rd_ptr_next_r; + reg empty_r; + reg full_r; + reg bypass_r; + + always @(posedge clk) begin + if (reset) begin + wr_ptr_r <= 0; + rd_ptr_r <= 0; + rd_ptr_next_r <= 1; + empty_r <= 1; + full_r <= 0; + size_r <= 0; + end else begin + if (writing) begin + data[wr_ptr_r] <= data_in; + wr_ptr_r <= wr_ptr_r + 1; + + if (!reading) begin + empty_r <= 0; + if (size_r == SIZE-1) begin + full_r <= 1; + end + size_r <= size_r + 1; + end + end + + if (reading) begin + rd_ptr_r <= rd_ptr_next_r; + + if (SIZE > 2) begin + rd_ptr_next_r <= rd_ptr_r + 2; + end else begin // (SIZE == 2); + rd_ptr_next_r <= ~rd_ptr_next_r; + end + + if (!writing) begin + if (size_r == 1) begin + assert(rd_ptr_next_r == wr_ptr_r); + empty_r <= 1; + end; + full_r <= 0; + size_r <= size_r - 1; + end + end + + bypass_r <= writing + && (empty_r || ((1 == size_r) && reading)); // empty or about to go empty + + curr_r <= data_in; + head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r]; end - end + end - assign data_out = head_r; - assign empty = (size_r == 0); - assign full = (size_r != 0); + assign data_out = bypass_r ? curr_r : head_r; + assign empty = empty_r; + assign full = full_r; assign size = size_r; - - end else begin // (SIZE > 1) - - if (0 == BUFFERED_OUTPUT) begin - - reg [`LOG2UP(SIZE):0] wr_ptr_r; - reg [`LOG2UP(SIZE):0] rd_ptr_r; - - wire [`LOG2UP(SIZE)-1:0] wr_ptr_a = wr_ptr_r[`LOG2UP(SIZE)-1:0]; - wire [`LOG2UP(SIZE)-1:0] rd_ptr_a = rd_ptr_r[`LOG2UP(SIZE)-1:0]; - - always @(posedge clk) begin - if (reset) begin - rd_ptr_r <= 0; - wr_ptr_r <= 0; - size_r <= 0; - end else begin - if (writing) begin - data[wr_ptr_a] <= data_in; - wr_ptr_r <= wr_ptr_r + 1; - - if (!reading) begin - size_r <= size_r + 1; - end - end - - if (reading) begin - rd_ptr_r <= rd_ptr_r + 1; - if (!writing) begin - size_r <= size_r - 1; - end - end - end - end - - assign data_out = data[rd_ptr_a]; - assign empty = (wr_ptr_r == rd_ptr_r); - assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[`LOG2UP(SIZE)] != rd_ptr_r[`LOG2UP(SIZE)]); - assign size = size_r; - - end else begin - - reg [DATAW-1:0] head_r; - reg [DATAW-1:0] curr_r; - reg [`LOG2UP(SIZE)-1:0] wr_ptr_r; - reg [`LOG2UP(SIZE)-1:0] rd_ptr_r; - reg [`LOG2UP(SIZE)-1:0] rd_ptr_next_r; - reg empty_r; - reg full_r; - reg bypass_r; - - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= 0; - rd_ptr_r <= 0; - rd_ptr_next_r <= 1; - empty_r <= 1; - full_r <= 0; - size_r <= 0; - end else begin - if (writing) begin - data[wr_ptr_r] <= data_in; - wr_ptr_r <= wr_ptr_r + 1; - - if (!reading) begin - empty_r <= 0; - if (size_r == SIZE-1) begin - full_r <= 1; - end - size_r <= size_r + 1; - end - end - - if (reading) begin - rd_ptr_r <= rd_ptr_next_r; - - if (SIZE > 2) begin - rd_ptr_next_r <= rd_ptr_r + 2; - end else begin // (SIZE == 2); - rd_ptr_next_r <= ~rd_ptr_next_r; - end - - if (!writing) begin - if (size_r == 1) begin - assert(rd_ptr_next_r == wr_ptr_r); - empty_r <= 1; - end; - full_r <= 0; - size_r <= size_r - 1; - end - end - - bypass_r <= writing - && (empty_r || ((1 == size_r) && reading)); // empty or about to go empty - - curr_r <= data_in; - head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r]; - end - end - - assign data_out = bypass_r ? curr_r : head_r; - assign empty = empty_r; - assign full = full_r; - assign size = size_r; - end end end diff --git a/hw/syn/quartus/top/Makefile b/hw/syn/quartus/top/Makefile index 62b5cdd9..8feaf127 100644 --- a/hw/syn/quartus/top/Makefile +++ b/hw/syn/quartus/top/Makefile @@ -1,6 +1,6 @@ -PROJECT = Vortex_Socket -TOP_LEVEL_ENTITY = Vortex_Socket -SRC_FILE = Vortex_Socket.v +PROJECT = vortex_afu +TOP_LEVEL_ENTITY = vortex_afu +SRC_FILE = vortex_afu.sv PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Part, Family @@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache" + quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/top/vortex.sdc b/hw/syn/quartus/top/project.sdc similarity index 100% rename from hw/syn/quartus/top/vortex.sdc rename to hw/syn/quartus/top/project.sdc diff --git a/hw/syn/quartus/vortex/Makefile b/hw/syn/quartus/vortex/Makefile index 370d7320..62b5cdd9 100644 --- a/hw/syn/quartus/vortex/Makefile +++ b/hw/syn/quartus/vortex/Makefile @@ -1,6 +1,6 @@ -PROJECT = Vortex -TOP_LEVEL_ENTITY = Vortex -SRC_FILE = Vortex.v +PROJECT = Vortex_Socket +TOP_LEVEL_ENTITY = Vortex_Socket +SRC_FILE = Vortex_Socket.v PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Part, Family diff --git a/hw/syn/quartus/vortex/vortex.sdc b/hw/syn/quartus/vortex/project.sdc similarity index 100% rename from hw/syn/quartus/vortex/vortex.sdc rename to hw/syn/quartus/vortex/project.sdc diff --git a/hw/syn/quartus/vortex/timing.tcl b/hw/syn/quartus/vortex/timing.tcl index d5408ad1..411379dc 100644 --- a/hw/syn/quartus/vortex/timing.tcl +++ b/hw/syn/quartus/vortex/timing.tcl @@ -1,4 +1,4 @@ -project_open Vortex +project_open Vortex_Socket set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL @@ -6,7 +6,6 @@ create_timing_netlist read_sdc update_timing_netlist - foreach_in_collection op [get_available_operating_conditions] { set_operating_conditions $op