`include "platform_if.vh" import local_mem_cfg_pkg::*; `include "afu_json_info.vh" `include "VX_define.vh" module vortex_afu #( parameter NUM_LOCAL_MEM_BANKS = 2 ) ( // global signals input clk, input SoftReset, // IF signals between CCI and AFU input t_if_ccip_Rx cp2af_sRxPort, output t_if_ccip_Tx af2cp_sTxPort, // Avalon signals for local memory access output t_local_mem_data avs_writedata, input t_local_mem_data avs_readdata, output t_local_mem_addr avs_address, input logic avs_waitrequest, output logic avs_write, output logic avs_read, output t_local_mem_byte_mask avs_byteenable, output t_local_mem_burst_cnt avs_burstcount, input avs_readdatavalid, output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select ); localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr); localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data); localparam DRAM_TAG_WIDTH = `L3DRAM_TAG_WIDTH; `STATIC_ASSERT(DRAM_ADDR_WIDTH == `L3DRAM_ADDR_WIDTH, "invalid vortex dram bus!") `STATIC_ASSERT(DRAM_LINE_WIDTH == `L3DRAM_LINE_WIDTH, "invalid vortex dram bus!") localparam AVS_RD_QUEUE_SIZE = 16; localparam CCI_RD_WINDOW_SIZE = 8; localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE; localparam VX_SNOOP_DELAY = 1000; localparam VX_SNOOP_LEVELS = 2; localparam AFU_ID_L = 16'h0002; // AFU ID Lower localparam AFU_ID_H = 16'h0004; // AFU ID Higher localparam CMD_TYPE_READ = `AFU_IMAGE_CMD_TYPE_READ; localparam CMD_TYPE_WRITE = `AFU_IMAGE_CMD_TYPE_WRITE; localparam CMD_TYPE_RUN = `AFU_IMAGE_CMD_TYPE_RUN; localparam CMD_TYPE_CLFLUSH = `AFU_IMAGE_CMD_TYPE_CLFLUSH; localparam MMIO_CSR_CMD = `AFU_IMAGE_MMIO_CSR_CMD; localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS; localparam MMIO_CSR_IO_ADDR = `AFU_IMAGE_MMIO_CSR_IO_ADDR; localparam MMIO_CSR_MEM_ADDR = `AFU_IMAGE_MMIO_CSR_MEM_ADDR; localparam MMIO_CSR_DATA_SIZE = `AFU_IMAGE_MMIO_CSR_DATA_SIZE; logic [127:0] afu_id = `AFU_ACCEL_UUID; typedef enum logic[3:0] { STATE_IDLE, STATE_READ, STATE_WRITE, STATE_START, STATE_RUN, STATE_CLFLUSH } state_t; typedef logic [`LOG2UP(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag; typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data; state_t state; // Vortex ports /////////////////////////////////////////////////////////////// logic vx_dram_req_read; logic vx_dram_req_write; logic [DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; logic [DRAM_LINE_WIDTH-1:0] vx_dram_req_data; logic [DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; logic vx_dram_req_ready; logic vx_dram_rsp_valid; logic [DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; logic [DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; logic vx_dram_rsp_ready; logic vx_snp_req_valid; logic [DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; logic vx_snp_req_ready; logic vx_busy; // AVS Queues ///////////////////////////////////////////////////////////////// logic avs_rtq_push; logic [DRAM_TAG_WIDTH-1:0] avs_rtq_din; logic avs_rtq_pop; logic [DRAM_TAG_WIDTH-1:0] avs_rtq_dout; logic avs_rtq_empty; logic avs_rtq_full; logic avs_rdq_push; t_local_mem_data avs_rdq_din; logic avs_rdq_pop; t_local_mem_data avs_rdq_dout; logic avs_rdq_empty; logic avs_rdq_full; // CSR variables ////////////////////////////////////////////////////////////// logic [2:0] csr_cmd; t_ccip_clAddr csr_io_addr; t_local_mem_addr csr_mem_addr; logic [DRAM_ADDR_WIDTH-1:0] csr_data_size; // MMIO controller //////////////////////////////////////////////////////////// t_ccip_c0_ReqMmioHdr mmioHdr; always_comb begin mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); end always_ff @(posedge clk) begin if (SoftReset) begin af2cp_sTxPort.c2.hdr <= 0; af2cp_sTxPort.c2.data <= 0; af2cp_sTxPort.c2.mmioRdValid <= 0; csr_cmd <= 0; csr_io_addr <= 0; csr_mem_addr <= 0; csr_data_size <= 0; end else begin csr_cmd <= 0; af2cp_sTxPort.c2.mmioRdValid <= 0; // serve MMIO write request if (cp2af_sRxPort.c0.mmioWrValid) begin case (mmioHdr.address) MMIO_CSR_IO_ADDR: begin csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); $display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data)); end MMIO_CSR_MEM_ADDR: begin csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data); $display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data)); end MMIO_CSR_DATA_SIZE: begin csr_data_size <= $bits(csr_data_size)'(cp2af_sRxPort.c0.data); $display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'(cp2af_sRxPort.c0.data)); end MMIO_CSR_CMD: begin csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data); $display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data)); end default: begin // user-defined CSRs //if (mmioHdr.addres >= MMIO_CSR_USER) begin // write Vortex CRS //end end endcase end // serve MMIO read requests if (cp2af_sRxPort.c0.mmioRdValid) begin af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID case (mmioHdr.address) // AFU header 16'h0000: af2cp_sTxPort.c2.data <= { 4'b0001, // Feature type = AFU 8'b0, // reserved 4'b0, // afu minor revision = 0 7'b0, // reserved 1'b1, // end of DFH list = 1 24'b0, // next DFH offset = 0 4'b0, // afu major revision = 0 12'b0 // feature ID = 0 }; AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi 16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU 16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved MMIO_CSR_STATUS: begin if (state != af2cp_sTxPort.c2.data) begin $display("%t: STATUS: state=%0d", $time, state); end af2cp_sTxPort.c2.data <= state; end default: af2cp_sTxPort.c2.data <= 64'h0; endcase af2cp_sTxPort.c2.mmioRdValid <= 1; // post response end end end // COMMAND FSM //////////////////////////////////////////////////////////////// logic [DRAM_ADDR_WIDTH-1:0] cci_write_ctr; logic [DRAM_ADDR_WIDTH-1:0] avs_read_ctr; logic [DRAM_ADDR_WIDTH-1:0] avs_write_ctr; logic [DRAM_ADDR_WIDTH-1:0] snp_req_ctr; logic [9:0] snp_req_delay; logic vx_reset; always_ff @(posedge clk) begin if (SoftReset) begin state <= STATE_IDLE; vx_reset <= 0; end else begin vx_reset <= 0; case (state) STATE_IDLE: begin case (csr_cmd) CMD_TYPE_READ: begin $display("%t: STATE READ: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size); state <= STATE_READ; end CMD_TYPE_WRITE: begin $display("%t: STATE WRITE: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size); state <= STATE_WRITE; end CMD_TYPE_RUN: begin $display("%t: STATE START", $time); vx_reset <= 1; state <= STATE_START; end CMD_TYPE_CLFLUSH: begin $display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size); state <= STATE_CLFLUSH; end endcase end STATE_READ: begin if (cci_write_ctr >= csr_data_size) begin state <= STATE_IDLE; end end STATE_WRITE: begin if (avs_write_ctr >= csr_data_size) begin state <= STATE_IDLE; end end STATE_START: begin // vortex reset cycle state <= STATE_RUN; end STATE_RUN: begin if (!vx_busy) begin state <= STATE_IDLE; end end STATE_CLFLUSH: begin if (snp_req_delay >= VX_SNOOP_DELAY) begin state <= STATE_IDLE; end end endcase end end // AVS Controller ///////////////////////////////////////////////////////////// logic cci_rdq_empty; t_cci_rdq_data cci_rdq_dout; logic cci_rdq_pop; logic [DRAM_TAG_WIDTH-1:0] dram_req_tag; t_ccip_clAddr next_avs_address; always_comb begin next_avs_address = csr_mem_addr + {avs_write_ctr[DRAM_ADDR_WIDTH-1:$bits(t_cci_rdq_tag)], t_cci_rdq_tag'(cci_rdq_dout)}; cci_rdq_pop = (state == STATE_WRITE && !cci_rdq_empty && !avs_waitrequest && avs_write_ctr < csr_data_size); end always_ff @(posedge clk) begin if (SoftReset) begin mem_bank_select <= 0; avs_burstcount <= 1; avs_byteenable <= 64'hffffffffffffffff; avs_read <= 0; avs_write <= 0; avs_read_ctr <= 0; avs_write_ctr <= 0; end else begin avs_read <= 0; avs_write <= 0; case (state) STATE_IDLE: begin avs_read_ctr <= 0; avs_write_ctr <= 0; end STATE_READ: begin if (!avs_rtq_full && !avs_rdq_full && !avs_waitrequest && avs_read_ctr < csr_data_size) begin avs_address <= csr_mem_addr + avs_read_ctr; avs_read_ctr <= avs_read_ctr + 1; avs_read <= 1; $display("%t: AVS Rd Req: addr=%h", $time, csr_mem_addr + avs_read_ctr); end end STATE_WRITE: begin if (cci_rdq_pop) begin avs_writedata <= cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)]; avs_address <= next_avs_address; avs_write_ctr <= avs_write_ctr + 1; avs_write <= 1; $display("%t: AVS Wr Req: addr=%h (%0d/%0d)", $time, next_avs_address, avs_write_ctr + 1, csr_data_size); end end STATE_RUN, STATE_CLFLUSH: begin if (vx_dram_req_read && vx_dram_req_ready) begin avs_address <= vx_dram_req_addr; dram_req_tag <= vx_dram_req_tag; avs_read <= 1; $display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr); end if (vx_dram_req_write && vx_dram_req_ready) begin avs_address <= vx_dram_req_addr; avs_writedata <= vx_dram_req_data; avs_write <= 1; $display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr); end end endcase if (avs_readdatavalid) begin $display("%t: AVS Rd Rsp", $time); end end end // Vortex DRAM requests stalling logic vortex_enabled; always_comb begin vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state); vx_dram_req_ready = vortex_enabled && !avs_waitrequest && !avs_rtq_full && !avs_rdq_full; end // Vortex DRAM fill response always_comb begin vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty; vx_dram_rsp_tag = avs_rtq_dout; vx_dram_rsp_data = avs_rdq_dout; end // AVS address read request queue ///////////////////////////////////////////// logic cci_wr_req; always_comb begin avs_rtq_pop = vx_dram_rsp_valid || cci_wr_req; avs_rtq_din = dram_req_tag; avs_rtq_push = avs_read; end VX_generic_queue #( .DATAW(DRAM_TAG_WIDTH), .SIZE(AVS_RD_QUEUE_SIZE) ) avs_rd_req_queue ( .clk (clk), .reset (SoftReset), .push (avs_rtq_push), .data_in (avs_rtq_din), .pop (avs_rtq_pop), .data_out (avs_rtq_dout), .empty (avs_rtq_empty), .full (avs_rtq_full) ); // AVS data read response queue /////////////////////////////////////////////// always_comb begin avs_rdq_pop = avs_rtq_pop; avs_rdq_din = avs_readdata; avs_rdq_push = avs_readdatavalid; end VX_generic_queue #( .DATAW(DRAM_LINE_WIDTH), .SIZE(AVS_RD_QUEUE_SIZE) ) avs_rd_rsp_queue ( .clk (clk), .reset (SoftReset), .push (avs_rdq_push), .data_in (avs_rdq_din), .pop (avs_rdq_pop), .data_out (avs_rdq_dout), .empty (avs_rdq_empty), .full (avs_rdq_full) ); // CCI Read Request /////////////////////////////////////////////////////////// t_ccip_c0_ReqMemHdr cci_read_hdr; logic [DRAM_ADDR_WIDTH-1:0] cci_read_ctr; t_cci_rdq_tag cci_rdq_ctr; logic cci_rdq_full; logic cci_rdq_push; t_cci_rdq_data cci_rdq_din; logic cci_read_wait; always_comb begin cci_read_hdr = t_ccip_c0_ReqMemHdr'(0); cci_read_hdr.address = csr_io_addr + cci_read_ctr; cci_read_hdr.mdata = t_cci_rdq_tag'(cci_read_ctr); cci_rdq_push = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)}; end // Send read requests to CCI always_ff @(posedge clk) begin if (SoftReset) begin af2cp_sTxPort.c0.hdr <= 0; af2cp_sTxPort.c0.valid <= 0; cci_read_ctr <= 0; cci_rdq_ctr <= 0; cci_read_wait <= 0; end else begin af2cp_sTxPort.c0.valid <= 0; if (STATE_IDLE == state) begin cci_read_ctr <= 0; cci_rdq_ctr <= 0; cci_read_wait <= 0; end if (STATE_WRITE == state && !cp2af_sRxPort.c0TxAlmFull // ensure read queue not full && !cci_rdq_full // ensure destination queue not full && !cci_read_wait // ensure the last batch has arrived && cci_read_ctr < csr_data_size) // ensure not done begin af2cp_sTxPort.c0.hdr <= cci_read_hdr; af2cp_sTxPort.c0.valid <= 1; cci_read_ctr <= cci_read_ctr + 1; if (cci_read_ctr == (CCI_RD_WINDOW_SIZE-1)) begin cci_read_wait <= 1; // end current request batch end $display("%t: CCI Rd Req: addr=%h", $time, cci_read_hdr.address); end if (cci_rdq_push) begin cci_rdq_ctr <= cci_rdq_ctr + 1; if (cci_rdq_ctr == (CCI_RD_WINDOW_SIZE-1)) begin cci_read_wait <= 0; // restart new request batch end $display("%t: CCI Rd Rsp: idx=%d, ctr=%d", $time, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata), cci_rdq_ctr); end end end VX_generic_queue #( .DATAW($bits(t_ccip_clData) + $bits(t_cci_rdq_tag)), .SIZE(CCI_RD_QUEUE_SIZE) ) cci_rd_req_queue ( .clk (clk), .reset (SoftReset), .push (cci_rdq_push), .data_in (cci_rdq_din), .pop (cci_rdq_pop), .data_out (cci_rdq_dout), .empty (cci_rdq_empty), .full (cci_rdq_full) ); // CCI Write Request ////////////////////////////////////////////////////////// t_ccip_c1_ReqMemHdr cci_write_hdr; logic cci_write_wait; always_comb begin cci_wr_req = (STATE_READ == state) && !avs_rdq_empty && !cp2af_sRxPort.c1TxAlmFull && !cci_write_wait && cci_write_ctr < csr_data_size; cci_write_hdr = t_ccip_c1_ReqMemHdr'(0); cci_write_hdr.address = csr_io_addr + cci_write_ctr; cci_write_hdr.sop = 1; // single line write mode end // Send write requests to CCI always_ff @(posedge clk) begin if (SoftReset) begin af2cp_sTxPort.c1.hdr <= 0; af2cp_sTxPort.c1.data <= 0; af2cp_sTxPort.c1.valid <= 0; cci_write_ctr <= 0; cci_write_wait <= 0; end else begin af2cp_sTxPort.c1.valid <= 0; if (STATE_IDLE == state) begin cci_write_ctr <= 0; end if (cci_wr_req) begin af2cp_sTxPort.c1.hdr <= cci_write_hdr; af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_rdq_dout); af2cp_sTxPort.c1.valid <= 1; cci_write_wait <= 1; $display("%t: CCI Wr Req: addr=%h", $time, cci_write_hdr.address); end if (cci_write_wait && cp2af_sRxPort.c1.rspValid) begin cci_write_ctr <= cci_write_ctr + 1; cci_write_wait <= 0; $display("%t: CCI Wr Rsp (%0d/%0d)", $time, cci_write_ctr + 1, csr_data_size); end end end // Vortex cache snooping ////////////////////////////////////////////////////// always_ff @(posedge clk) begin if (SoftReset) begin vx_snp_req_valid <= 0; snp_req_ctr <= 0; snp_req_delay <= 0; end else begin if (STATE_IDLE == state) begin snp_req_ctr <= 0; snp_req_delay <= 0; end vx_snp_req_valid <= 0; if ((STATE_CLFLUSH == state) && (snp_req_ctr < csr_data_size) && vx_snp_req_ready) begin vx_snp_req_addr <= csr_mem_addr + snp_req_ctr; vx_snp_req_valid <= 1; snp_req_ctr <= snp_req_ctr + 1; end if (snp_req_ctr == csr_data_size) begin snp_req_delay <= snp_req_delay + 1; end end end // Vortex binding ///////////////////////////////////////////////////////////// Vortex_Socket #() vx_socket ( .clk (clk), .reset (vx_reset), // DRAM request .dram_req_write (vx_dram_req_write), .dram_req_read (vx_dram_req_read), .dram_req_addr (vx_dram_req_addr), .dram_req_data (vx_dram_req_data), .dram_req_tag (vx_dram_req_tag), .dram_req_ready (vx_dram_req_ready), // DRAM response .dram_rsp_valid (vx_dram_rsp_valid), .dram_rsp_data (vx_dram_rsp_data), .dram_rsp_tag (vx_dram_rsp_tag), .dram_rsp_ready (vx_dram_rsp_ready), // Cache snooping .snp_req_valid (vx_snp_req_valid), .snp_req_addr (vx_snp_req_addr), .snp_req_ready (vx_snp_req_ready), // I/O request .io_req_read (), .io_req_write (), .io_req_addr (), .io_req_data (), .io_req_byteen (), .io_req_tag (), .io_req_ready (0), // I/O response .io_rsp_valid (0), .io_rsp_data (0), .io_rsp_tag (0), .io_rsp_ready (), // status .busy (vx_busy), .ebreak () ); endmodule