fixed FPU handshake, optimized writeback's critical path
This commit is contained in:
@@ -1,603 +0,0 @@
|
|||||||
//
|
|
||||||
// Copyright (c) 2017, Intel Corporation
|
|
||||||
// All rights reserved.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// Redistributions of source code must retain the above copyright notice, this
|
|
||||||
// list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// Neither the name of the Intel Corporation nor the names of its contributors
|
|
||||||
// may be used to endorse or promote products derived from this software
|
|
||||||
// without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
// POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
|
|
||||||
// Read from the memory locations first and then write to the memory locations
|
|
||||||
|
|
||||||
`include "platform_if.vh"
|
|
||||||
`include "afu_json_info.vh"
|
|
||||||
|
|
||||||
|
|
||||||
module ccip_std_afu
|
|
||||||
(
|
|
||||||
// CCI-P Clocks and Resets
|
|
||||||
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
|
|
||||||
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
|
|
||||||
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
|
|
||||||
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
|
|
||||||
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
|
|
||||||
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
|
|
||||||
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
|
|
||||||
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
|
|
||||||
|
|
||||||
// Interface structures
|
|
||||||
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
|
|
||||||
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// Run the entire design at the standard CCI-P frequency (400 MHz).
|
|
||||||
//
|
|
||||||
logic clk;
|
|
||||||
assign clk = pClk;
|
|
||||||
|
|
||||||
logic reset;
|
|
||||||
assign reset = pck_cp2af_softReset;
|
|
||||||
|
|
||||||
logic [511:0] wr_data;
|
|
||||||
logic [511:0] rd_data;
|
|
||||||
|
|
||||||
logic get_write_addr;
|
|
||||||
logic do_update;
|
|
||||||
logic rd_end_of_list;
|
|
||||||
logic rd_needed;
|
|
||||||
logic wr_needed;
|
|
||||||
logic [15:0] cnt_list_length;
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Register requests.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
//
|
|
||||||
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
|
|
||||||
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
|
|
||||||
// We also assign pck_af2cp_sTx to sTx here but don't register it.
|
|
||||||
// The code below never uses combinational logic to write sTx.
|
|
||||||
//
|
|
||||||
|
|
||||||
t_if_ccip_Rx sRx;
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
sRx <= pck_cp2af_sRx;
|
|
||||||
end
|
|
||||||
|
|
||||||
t_if_ccip_Tx sTx;
|
|
||||||
assign pck_af2cp_sTx = sTx;
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// CSR (MMIO) handling.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
// The AFU ID is a unique ID for a given program. Here we generated
|
|
||||||
// one with the "uuidgen" program and stored it in the AFU's JSON file.
|
|
||||||
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
|
|
||||||
// to extract the UUID into afu_json_info.vh.
|
|
||||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
|
||||||
|
|
||||||
//
|
|
||||||
// A valid AFU must implement a device feature list, starting at MMIO
|
|
||||||
// address 0. Every entry in the feature list begins with 5 64-bit
|
|
||||||
// words: a device feature header, two AFU UUID words and two reserved
|
|
||||||
// words.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Is a CSR read request active this cycle?
|
|
||||||
logic is_csr_read;
|
|
||||||
assign is_csr_read = sRx.c0.mmioRdValid;
|
|
||||||
|
|
||||||
// Is a CSR write request active this cycle?
|
|
||||||
logic is_csr_write;
|
|
||||||
assign is_csr_write = sRx.c0.mmioWrValid;
|
|
||||||
|
|
||||||
// The MMIO request header is overlayed on the normal c0 memory read
|
|
||||||
// response data structure. Cast the c0Rx header to an MMIO request
|
|
||||||
// header.
|
|
||||||
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
|
|
||||||
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// Implement the device feature list by responding to MMIO reads.
|
|
||||||
//
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
sTx.c2.mmioRdValid <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Always respond with something for every read request
|
|
||||||
sTx.c2.mmioRdValid <= is_csr_read;
|
|
||||||
|
|
||||||
// The unique transaction ID matches responses to requests
|
|
||||||
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
|
|
||||||
|
|
||||||
// Addresses are of 32-bit objects in MMIO space. Addresses
|
|
||||||
// of 64-bit objects are thus multiples of 2.
|
|
||||||
case (mmio_req_hdr.address)
|
|
||||||
0: // AFU DFH (device feature header)
|
|
||||||
begin
|
|
||||||
// Here we define a trivial feature list. In this
|
|
||||||
// example, our AFU is the only entry in this list.
|
|
||||||
sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
// Feature type is AFU
|
|
||||||
sTx.c2.data[63:60] <= 4'h1;
|
|
||||||
// End of list (last entry in list)
|
|
||||||
sTx.c2.data[40] <= 1'b1;
|
|
||||||
end
|
|
||||||
|
|
||||||
// AFU_ID_L
|
|
||||||
2: sTx.c2.data <= afu_id[63:0];
|
|
||||||
|
|
||||||
// AFU_ID_H
|
|
||||||
4: sTx.c2.data <= afu_id[127:64];
|
|
||||||
|
|
||||||
// DFH_RSVD0
|
|
||||||
6: sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
|
|
||||||
// DFH_RSVD1
|
|
||||||
8: sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
|
|
||||||
default: sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// CSR write handling. Host software must tell the AFU the memory address
|
|
||||||
// to which it should be writing. The address is set by writing a CSR.
|
|
||||||
//
|
|
||||||
|
|
||||||
// We use MMIO address 0 to set the memory address. The read and
|
|
||||||
// write MMIO spaces are logically separate so we are free to use
|
|
||||||
// whatever we like. This may not be good practice for cleanly
|
|
||||||
// organizing the MMIO address space, but it is legal.
|
|
||||||
logic is_mem_addr_csr_write;
|
|
||||||
assign is_mem_addr_csr_write = get_write_addr && is_csr_write &&
|
|
||||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
|
|
||||||
|
|
||||||
// Memory address to which this AFU will write.
|
|
||||||
t_ccip_clAddr write_mem_addr;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
get_write_addr <= 1'b1;
|
|
||||||
end
|
|
||||||
else if (is_mem_addr_csr_write)
|
|
||||||
begin
|
|
||||||
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
|
||||||
get_write_addr <= 1'b0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// We use MMIO address 0 to set the memory address for reading data.
|
|
||||||
logic is_mem_addr_csr_read;
|
|
||||||
assign is_mem_addr_csr_read = !get_write_addr && is_csr_write &&
|
|
||||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
|
|
||||||
|
|
||||||
// Memory address from which this AFU will read.
|
|
||||||
logic start_read;
|
|
||||||
t_ccip_clAddr read_mem_addr;
|
|
||||||
|
|
||||||
//logic start_traversal = 'b0;
|
|
||||||
//t_ccip_clAddr start_traversal_addr;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
start_read <= 1'b0;
|
|
||||||
end
|
|
||||||
else if (is_mem_addr_csr_read)
|
|
||||||
begin
|
|
||||||
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
|
||||||
start_read <= 'b1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Main AFU logic
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
//
|
|
||||||
// States in our simple example.
|
|
||||||
//
|
|
||||||
//typedef enum logic [0:0]
|
|
||||||
typedef enum logic [1:0]
|
|
||||||
{
|
|
||||||
STATE_IDLE,
|
|
||||||
STATE_READ,
|
|
||||||
STATE_UPDATE,
|
|
||||||
STATE_WRITE
|
|
||||||
}
|
|
||||||
t_state;
|
|
||||||
|
|
||||||
t_state state;
|
|
||||||
|
|
||||||
//
|
|
||||||
// State machine
|
|
||||||
//
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
state <= STATE_IDLE;
|
|
||||||
rd_end_of_list <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
case (state)
|
|
||||||
STATE_IDLE:
|
|
||||||
begin
|
|
||||||
// Traversal begins when CSR 1 is written
|
|
||||||
if (start_read)
|
|
||||||
begin
|
|
||||||
state <= STATE_READ;
|
|
||||||
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
STATE_READ:
|
|
||||||
begin
|
|
||||||
if (rd_needed)
|
|
||||||
begin
|
|
||||||
// Read data from the address and update address
|
|
||||||
state <= STATE_UPDATE;
|
|
||||||
start_read <= 'b0;
|
|
||||||
$display("AFU reading data and pointing to next read address...");
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
STATE_UPDATE:
|
|
||||||
begin
|
|
||||||
// Update the read value to be written back
|
|
||||||
if (do_update)
|
|
||||||
begin
|
|
||||||
state <= STATE_WRITE;
|
|
||||||
$display("AFU performing comutations on the read values...");
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
STATE_WRITE:
|
|
||||||
begin
|
|
||||||
// Write the updated value to the address
|
|
||||||
// Point to new address after that
|
|
||||||
// if done then point to IDLE; else read new values
|
|
||||||
if (rd_end_of_list)
|
|
||||||
begin
|
|
||||||
state <= STATE_IDLE;
|
|
||||||
$display("AFU done...");
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
if (wr_needed)
|
|
||||||
begin
|
|
||||||
state <= STATE_READ;
|
|
||||||
$display("AFU reading again from read address...");
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Read logic.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
//
|
|
||||||
// READ REQUEST
|
|
||||||
//
|
|
||||||
|
|
||||||
// Did a write response just arrive
|
|
||||||
logic addr_next_valid;
|
|
||||||
|
|
||||||
// Next read address
|
|
||||||
t_ccip_clAddr addr_next;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
// Next read address is valid when we have got the write response back
|
|
||||||
// and channel is not full
|
|
||||||
//addr_next_valid <= sRx.c0TxAlmFull;
|
|
||||||
addr_next_valid <= sRx.c1.rspValid;
|
|
||||||
|
|
||||||
// Next address is current address plus address length
|
|
||||||
// Apurve
|
|
||||||
//addr_next <= addr_next + addr_size;
|
|
||||||
addr_next <= addr_next + 0;
|
|
||||||
|
|
||||||
// End of list reached if we have read 10 times
|
|
||||||
rd_end_of_list <= (cnt_list_length == 'h10);
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Since back pressure may prevent an immediate read request, we must
|
|
||||||
// record whether a read is needed and hold it until the request can
|
|
||||||
// be sent to the FIU.
|
|
||||||
//
|
|
||||||
t_ccip_clAddr rd_addr;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
rd_needed <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// If reads are allowed this cycle then we can safely clear
|
|
||||||
// any previously requested reads. This simple AFU has only
|
|
||||||
// one read in flight at a time since it is walking a pointer
|
|
||||||
// chain.
|
|
||||||
if (rd_needed)
|
|
||||||
begin
|
|
||||||
rd_needed <= sRx.c0TxAlmFull;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Need a read under two conditions:
|
|
||||||
// - Starting a new walk
|
|
||||||
// - A read response just arrived from a line containing
|
|
||||||
// a next pointer.
|
|
||||||
rd_needed <= (start_read || (addr_next_valid && ! rd_end_of_list));
|
|
||||||
rd_addr <= (start_read ? read_mem_addr : addr_next);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Emit read requests to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Read header defines the request to the FIU
|
|
||||||
t_cci_c0_ReqMemHdr rd_hdr;
|
|
||||||
|
|
||||||
always_comb
|
|
||||||
begin
|
|
||||||
rd_hdr = t_cci_c0_ReqMemHdr'(0);
|
|
||||||
|
|
||||||
// Read request type
|
|
||||||
rd_hdr.req_type = eREQ_RDLINE_I;
|
|
||||||
// Virtual address (MPF virtual addressing is enabled)
|
|
||||||
rd_hdr.address = rd_addr;
|
|
||||||
// Let the FIU pick the channel
|
|
||||||
rd_hdr.vc_sel = eVC_VA;
|
|
||||||
// Read 4 lines (the size of an entry in the list)
|
|
||||||
rd_hdr.cl_len = eCL_LEN_4;
|
|
||||||
end
|
|
||||||
|
|
||||||
// Send read requests to the FIU
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
sTx.c0.valid <= 1'b0;
|
|
||||||
cnt_list_length <= 0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Generate a read request when needed and the FIU isn't full
|
|
||||||
sTx.c0.valid <= (rd_needed && ! sRx.c0TxAlmFull);
|
|
||||||
sTx.c0.hdr <= rd_hdr;
|
|
||||||
|
|
||||||
if (rd_needed && ! sRx.c0TxAlmFull)
|
|
||||||
begin
|
|
||||||
cnt_list_length <= cnt_list_length + 1;
|
|
||||||
//$display(" Reading from VA 0x%x", clAddrToByteAddr(rd_addr));
|
|
||||||
$display("Incrementing read count...");
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// READ RESPONSE HANDLING
|
|
||||||
//
|
|
||||||
|
|
||||||
//
|
|
||||||
// Receive data (read responses).
|
|
||||||
//
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
do_update <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
if (state == STATE_READ)
|
|
||||||
begin
|
|
||||||
rd_data <= sRx.c0.data;
|
|
||||||
do_update <= 1'b1;
|
|
||||||
end
|
|
||||||
if (state == STATE_UPDATE)
|
|
||||||
begin
|
|
||||||
// Update the read data and put it in the write data to be written
|
|
||||||
wr_data <= rd_data + 1;
|
|
||||||
do_update <= 1'b0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Write logic.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// WRITE REQUEST
|
|
||||||
//
|
|
||||||
|
|
||||||
// Did a write response just arrive
|
|
||||||
logic wr_addr_next_valid;
|
|
||||||
|
|
||||||
// Next write address
|
|
||||||
t_ccip_clAddr wr_addr_next;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
// Next write address is valid when we have got the read response back
|
|
||||||
// and channel is not full
|
|
||||||
//wr_addr_next_valid <= sRx.c1TxAlmFull;
|
|
||||||
wr_addr_next_valid <= sRx.c0.rspValid;
|
|
||||||
|
|
||||||
// Next address is current address plus address length
|
|
||||||
// Apurve
|
|
||||||
//wr_addr_next <= wr_addr_next + addr_size;
|
|
||||||
wr_addr_next <= wr_addr_next + 0;
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Since back pressure may prevent an immediate write request, we must
|
|
||||||
// record whether a write is needed and hold it until the request can
|
|
||||||
// be sent to the FIU.
|
|
||||||
//
|
|
||||||
t_ccip_clAddr wr_addr;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
wr_needed <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// If writes are allowed this cycle then we can safely clear
|
|
||||||
// any previously requested writes. This simple AFU has only
|
|
||||||
// one write in flight at a time since it is walking a pointer
|
|
||||||
// chain.
|
|
||||||
if (wr_needed)
|
|
||||||
begin
|
|
||||||
wr_needed <= sRx.c1TxAlmFull;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Need a write under two conditions:
|
|
||||||
// - Starting a new walk
|
|
||||||
// - A write response just arrived from a line containing
|
|
||||||
// a next pointer.
|
|
||||||
//wr_needed <= (start_write || (wr_addr_next_valid && ! rd_end_of_list));
|
|
||||||
wr_needed <= (start_write || wr_addr_next_valid);
|
|
||||||
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Emit write requests to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Write header defines the request to the FIU
|
|
||||||
t_ccip_c1_ReqMemHdr wr_hdr;
|
|
||||||
|
|
||||||
always_comb
|
|
||||||
begin
|
|
||||||
wr_hdr = t_cci_c1_ReqMemHdr'(0);
|
|
||||||
|
|
||||||
// Write request type
|
|
||||||
wr_hdr.req_type = eREQ_RDLINE_I;
|
|
||||||
// Virtual address (MPF virtual addressing is enabled)
|
|
||||||
wr_hdr.address = wr_addr;
|
|
||||||
// Let the FIU pick the channel
|
|
||||||
wr_hdr.vc_sel = eVC_VA;
|
|
||||||
// Write 4 lines (the size of an entry in the list)
|
|
||||||
wr_hdr.cl_len = eCL_LEN_4;
|
|
||||||
// Start of packet is true (single line write)
|
|
||||||
wr_hdr.sop = 1'b1;
|
|
||||||
end
|
|
||||||
|
|
||||||
// Send write requests to the FIU
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
sTx.c1.valid <= 1'b0;
|
|
||||||
//cnt_list_length <= 0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Generate a write request when needed and the FIU isn't full
|
|
||||||
sTx.c1.valid <= (wr_needed && ! sRx.c1TxAlmFull);
|
|
||||||
sTx.c1.hdr <= wr_hdr;
|
|
||||||
sTx.c1.data = t_ccip_clData'(wr_data);
|
|
||||||
|
|
||||||
//if (wr_needed && ! sRx.c1TxAlmFull)
|
|
||||||
//begin
|
|
||||||
// cnt_list_length <= cnt_list_length + 1;
|
|
||||||
// //$display(" Writing from VA 0x%x", clAddrToByteAddr(rd_addr));
|
|
||||||
// $display("Incrementing write count...");
|
|
||||||
//end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// WRITE RESPONSE HANDLING
|
|
||||||
//
|
|
||||||
|
|
||||||
// Apurve: Check if a signal is to be sent to read to start reading in case
|
|
||||||
// write response does not work
|
|
||||||
//
|
|
||||||
// Send data (write requests).
|
|
||||||
//
|
|
||||||
//always_ff @(posedge clk)
|
|
||||||
//begin
|
|
||||||
// if (state == STATE_WRITE)
|
|
||||||
// begin
|
|
||||||
// rd_data <= sRx.c0.data;
|
|
||||||
// end
|
|
||||||
// if (state == STATE_UPDATE)
|
|
||||||
// begin
|
|
||||||
// // Update the write data and put it in the write data to be written
|
|
||||||
// wr_data <= rd_data + 1;
|
|
||||||
// end
|
|
||||||
//end
|
|
||||||
|
|
||||||
endmodule
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
{
|
|
||||||
"version": 1,
|
|
||||||
"afu-image": {
|
|
||||||
"power": 0,
|
|
||||||
"afu-top-interface":
|
|
||||||
{
|
|
||||||
"name": "ccip_std_afu"
|
|
||||||
},
|
|
||||||
"accelerator-clusters":
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"name": "cci_hello",
|
|
||||||
"total-contexts": 1,
|
|
||||||
"accelerator-type-uuid": "c6aa954a-9b91-4a37-abc1-1d9f0709dcc3"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,653 +0,0 @@
|
|||||||
//
|
|
||||||
// Copyright (c) 2017, Intel Corporation
|
|
||||||
// All rights reserved.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// Redistributions of source code must retain the above copyright notice, this
|
|
||||||
// list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// Neither the name of the Intel Corporation nor the names of its contributors
|
|
||||||
// may be used to endorse or promote products derived from this software
|
|
||||||
// without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
// POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
|
|
||||||
// Read from the memory locations first and then write to the memory locations
|
|
||||||
|
|
||||||
`include "platform_if.vh"
|
|
||||||
`include "afu_json_info.vh"
|
|
||||||
|
|
||||||
|
|
||||||
module ccip_std_afu
|
|
||||||
(
|
|
||||||
// CCI-P Clocks and Resets
|
|
||||||
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
|
|
||||||
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
|
|
||||||
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
|
|
||||||
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
|
|
||||||
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
|
|
||||||
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
|
|
||||||
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
|
|
||||||
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
|
|
||||||
|
|
||||||
// Interface structures
|
|
||||||
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
|
|
||||||
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// Run the entire design at the standard CCI-P frequency (400 MHz).
|
|
||||||
//
|
|
||||||
logic clk;
|
|
||||||
assign clk = pClk;
|
|
||||||
|
|
||||||
logic reset;
|
|
||||||
assign reset = pck_cp2af_softReset;
|
|
||||||
|
|
||||||
logic [511:0] wr_data;
|
|
||||||
logic [511:0] rd_data;
|
|
||||||
|
|
||||||
logic do_update;
|
|
||||||
logic start_read;
|
|
||||||
logic start_write;
|
|
||||||
logic wr_addr_next_valid;
|
|
||||||
logic addr_next_valid;
|
|
||||||
logic rd_end_of_list;
|
|
||||||
logic rd_needed;
|
|
||||||
logic wr_needed;
|
|
||||||
logic read_req;
|
|
||||||
logic write_req;
|
|
||||||
logic [15:0] cnt_list_length;
|
|
||||||
t_ccip_clAddr rd_addr;
|
|
||||||
t_ccip_clAddr wr_addr;
|
|
||||||
t_ccip_clAddr addr_next;
|
|
||||||
t_ccip_clAddr wr_addr_next;
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Register requests.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
//
|
|
||||||
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
|
|
||||||
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
|
|
||||||
// We also assign pck_af2cp_sTx to sTx here but don't register it.
|
|
||||||
// The code below never uses combinational logic to write sTx.
|
|
||||||
//
|
|
||||||
|
|
||||||
t_if_ccip_Rx sRx;
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
sRx <= pck_cp2af_sRx;
|
|
||||||
end
|
|
||||||
|
|
||||||
t_if_ccip_Tx sTx;
|
|
||||||
assign pck_af2cp_sTx = sTx;
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// CSR (MMIO) handling.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
// The AFU ID is a unique ID for a given program. Here we generated
|
|
||||||
// one with the "uuidgen" program and stored it in the AFU's JSON file.
|
|
||||||
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
|
|
||||||
// to extract the UUID into afu_json_info.vh.
|
|
||||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
|
||||||
|
|
||||||
//
|
|
||||||
// A valid AFU must implement a device feature list, starting at MMIO
|
|
||||||
// address 0. Every entry in the feature list begins with 5 64-bit
|
|
||||||
// words: a device feature header, two AFU UUID words and two reserved
|
|
||||||
// words.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Is a CSR read request active this cycle?
|
|
||||||
logic is_csr_read;
|
|
||||||
assign is_csr_read = sRx.c0.mmioRdValid;
|
|
||||||
|
|
||||||
// Is a CSR write request active this cycle?
|
|
||||||
logic is_csr_write;
|
|
||||||
assign is_csr_write = sRx.c0.mmioWrValid;
|
|
||||||
|
|
||||||
// The MMIO request header is overlayed on the normal c0 memory read
|
|
||||||
// response data structure. Cast the c0Rx header to an MMIO request
|
|
||||||
// header.
|
|
||||||
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
|
|
||||||
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// Implement the device feature list by responding to MMIO reads.
|
|
||||||
//
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
sTx.c2.mmioRdValid <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Always respond with something for every read request
|
|
||||||
sTx.c2.mmioRdValid <= is_csr_read;
|
|
||||||
|
|
||||||
// The unique transaction ID matches responses to requests
|
|
||||||
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
|
|
||||||
|
|
||||||
// Addresses are of 32-bit objects in MMIO space. Addresses
|
|
||||||
// of 64-bit objects are thus multiples of 2.
|
|
||||||
case (mmio_req_hdr.address)
|
|
||||||
0: // AFU DFH (device feature header)
|
|
||||||
begin
|
|
||||||
// Here we define a trivial feature list. In this
|
|
||||||
// example, our AFU is the only entry in this list.
|
|
||||||
sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
// Feature type is AFU
|
|
||||||
sTx.c2.data[63:60] <= 4'h1;
|
|
||||||
// End of list (last entry in list)
|
|
||||||
sTx.c2.data[40] <= 1'b1;
|
|
||||||
end
|
|
||||||
|
|
||||||
// AFU_ID_L
|
|
||||||
2: sTx.c2.data <= afu_id[63:0];
|
|
||||||
|
|
||||||
// AFU_ID_H
|
|
||||||
4: sTx.c2.data <= afu_id[127:64];
|
|
||||||
|
|
||||||
// DFH_RSVD0
|
|
||||||
6: sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
|
|
||||||
// DFH_RSVD1
|
|
||||||
8: sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
|
|
||||||
// Updated by apurve to check fpgaReadMMIO
|
|
||||||
10: sTx.c2.data <= t_ccip_mmioData'(start_read);
|
|
||||||
|
|
||||||
default: sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// CSR write handling. Host software must tell the AFU the memory address
|
|
||||||
// to which it should be writing. The address is set by writing a CSR.
|
|
||||||
//
|
|
||||||
|
|
||||||
// We use MMIO address 0 to set the memory address. The read and
|
|
||||||
// write MMIO spaces are logically separate so we are free to use
|
|
||||||
// whatever we like. This may not be good practice for cleanly
|
|
||||||
// organizing the MMIO address space, but it is legal.
|
|
||||||
logic is_mem_addr_csr_write;
|
|
||||||
assign is_mem_addr_csr_write = is_csr_write &&
|
|
||||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
|
|
||||||
|
|
||||||
// Memory address to which this AFU will write.
|
|
||||||
t_ccip_clAddr write_mem_addr;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
start_write <= 1'b0;
|
|
||||||
end
|
|
||||||
else if (is_mem_addr_csr_write)
|
|
||||||
begin
|
|
||||||
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
|
||||||
start_write <= 1'b1;
|
|
||||||
//$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// We use MMIO address 8 to set the memory address for reading data.
|
|
||||||
logic is_mem_addr_csr_read;
|
|
||||||
assign is_mem_addr_csr_read = is_csr_write &&
|
|
||||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(2));
|
|
||||||
|
|
||||||
// Memory address from which this AFU will read.
|
|
||||||
t_ccip_clAddr read_mem_addr;
|
|
||||||
|
|
||||||
//logic start_traversal = 'b0;
|
|
||||||
//t_ccip_clAddr start_traversal_addr;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
start_read <= 1'b0;
|
|
||||||
end
|
|
||||||
else if (is_mem_addr_csr_read)
|
|
||||||
begin
|
|
||||||
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
|
||||||
start_read <= 1'b1;
|
|
||||||
//$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Main AFU logic
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
//
|
|
||||||
// States in our simple example.
|
|
||||||
//
|
|
||||||
//typedef enum logic [0:0]
|
|
||||||
typedef enum logic [1:0]
|
|
||||||
{
|
|
||||||
STATE_IDLE,
|
|
||||||
STATE_READ,
|
|
||||||
STATE_UPDATE,
|
|
||||||
STATE_WRITE
|
|
||||||
}
|
|
||||||
t_state;
|
|
||||||
|
|
||||||
t_state state;
|
|
||||||
|
|
||||||
//
|
|
||||||
// State machine
|
|
||||||
//
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
state <= STATE_IDLE;
|
|
||||||
rd_end_of_list <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
case (state)
|
|
||||||
STATE_IDLE:
|
|
||||||
begin
|
|
||||||
// Traversal begins when CSR 1 is written
|
|
||||||
if (start_read)
|
|
||||||
begin
|
|
||||||
state <= STATE_READ;
|
|
||||||
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
STATE_READ:
|
|
||||||
begin
|
|
||||||
$display("AFU in READ...");
|
|
||||||
$display("do_update is %d...",do_update);
|
|
||||||
$display("addr_next_valid is %d...",addr_next_valid);
|
|
||||||
$display("rd_needed is %d...",rd_needed);
|
|
||||||
if (!rd_needed && do_update)
|
|
||||||
begin
|
|
||||||
state <= STATE_UPDATE;
|
|
||||||
$display("AFU moving to UPDATE...");
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
STATE_UPDATE:
|
|
||||||
begin
|
|
||||||
// Update the read value to be written back
|
|
||||||
$display("AFU in UPDATE...");
|
|
||||||
if (!do_update)
|
|
||||||
begin
|
|
||||||
state <= STATE_WRITE;
|
|
||||||
wr_needed <= 1'b1;
|
|
||||||
$display("AFU moving to WRITE...");
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
STATE_WRITE:
|
|
||||||
begin
|
|
||||||
// Write the updated value to the address
|
|
||||||
// Point to new address after that
|
|
||||||
// if done then point to IDLE; else read new values
|
|
||||||
$display("AFU in WRITE...");
|
|
||||||
if (rd_end_of_list)
|
|
||||||
begin
|
|
||||||
state <= STATE_IDLE;
|
|
||||||
$display("AFU done...");
|
|
||||||
end
|
|
||||||
else if (!wr_needed)
|
|
||||||
begin
|
|
||||||
state <= STATE_READ;
|
|
||||||
$display("AFU moving to READ from WRITE...");
|
|
||||||
start_write <= 1'b0;
|
|
||||||
write_req <= 1'b0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Read logic.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
//
|
|
||||||
// READ REQUEST
|
|
||||||
//
|
|
||||||
|
|
||||||
// Did a write response just arrive
|
|
||||||
|
|
||||||
// Next read address
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
// Next read address is valid when we have got the write response back
|
|
||||||
if (sRx.c1.rspValid)
|
|
||||||
begin
|
|
||||||
addr_next_valid <= sRx.c1.rspValid;
|
|
||||||
|
|
||||||
//if (state == STATE_READ && !rd_needed)
|
|
||||||
//begin
|
|
||||||
// Apurve: Next address is current address plus address length
|
|
||||||
//addr_next <= addr_next + addr_size;
|
|
||||||
addr_next <= (addr_next_valid ? rd_addr + 0 : rd_addr);
|
|
||||||
|
|
||||||
// End of list reached if we have read 5 times
|
|
||||||
rd_end_of_list <= (cnt_list_length == 'h5);
|
|
||||||
//end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Since back pressure may prevent an immediate read request, we must
|
|
||||||
// record whether a read is needed and hold it until the request can
|
|
||||||
// be sent to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
rd_needed <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// If reads are allowed this cycle then we can safely clear
|
|
||||||
// any previously requested reads. This simple AFU has only
|
|
||||||
// one read in flight at a time since it is walking a pointer
|
|
||||||
// chain.
|
|
||||||
if (rd_needed)
|
|
||||||
begin
|
|
||||||
//rd_needed <= sRx.c0TxAlmFull;
|
|
||||||
//rd_needed <= (!sRx.c0TxAlmFull && !sRx.c0.rspValid);
|
|
||||||
rd_needed <= !sRx.c0.rspValid;
|
|
||||||
end
|
|
||||||
else if (state == STATE_READ)
|
|
||||||
begin
|
|
||||||
// Need a read under two conditions:
|
|
||||||
// - Starting a new walk
|
|
||||||
// - A read response just arrived from a line containing
|
|
||||||
// a next pointer.
|
|
||||||
rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
|
|
||||||
rd_addr <= (start_read ? read_mem_addr : addr_next);
|
|
||||||
//$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr));
|
|
||||||
//$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr));
|
|
||||||
//$display("start read is %d", start_read);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Emit read requests to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Read header defines the request to the FIU
|
|
||||||
t_ccip_c0_ReqMemHdr rd_hdr;
|
|
||||||
|
|
||||||
always_comb
|
|
||||||
begin
|
|
||||||
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
|
|
||||||
|
|
||||||
// Read request type (No intention to cache)
|
|
||||||
//rd_hdr.req_type = 4'h0;
|
|
||||||
|
|
||||||
// Virtual address (MPF virtual addressing is enabled)
|
|
||||||
rd_hdr.address = rd_addr;
|
|
||||||
|
|
||||||
// Read over channel VA
|
|
||||||
//rd_hdr.vc_sel = 2'h0;
|
|
||||||
|
|
||||||
// Read one cache line (64 bytes)
|
|
||||||
//rd_hdr.cl_len = 2'h0;
|
|
||||||
end
|
|
||||||
|
|
||||||
// Send read requests to the FIU
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
sTx.c0.valid <= 1'b0;
|
|
||||||
cnt_list_length <= 0;
|
|
||||||
read_req <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Generate a read request when needed and the FIU isn't full
|
|
||||||
if (state == STATE_READ)
|
|
||||||
begin
|
|
||||||
sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull && !read_req);
|
|
||||||
|
|
||||||
if (rd_needed && !sRx.c0TxAlmFull && !read_req)
|
|
||||||
begin
|
|
||||||
sTx.c0.hdr <= rd_hdr;
|
|
||||||
cnt_list_length <= cnt_list_length + 1;
|
|
||||||
read_req <= 1'b1;
|
|
||||||
$display("Incrementing read count...%d",cnt_list_length);
|
|
||||||
$display("Read address is 0x%x...",rd_hdr.address);
|
|
||||||
addr_next_valid <= 1'b0;
|
|
||||||
// Apurve: Add something to stop read once this section has been accessed
|
|
||||||
//rd_needed <= 1'b0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// READ RESPONSE HANDLING
|
|
||||||
//
|
|
||||||
|
|
||||||
//
|
|
||||||
// Receive data (read responses).
|
|
||||||
//
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
do_update <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
if (!do_update && sRx.c0.rspValid)
|
|
||||||
begin
|
|
||||||
rd_data <= sRx.c0.data;
|
|
||||||
do_update <= 1'b1;
|
|
||||||
$display("rd data is %d...",rd_data);
|
|
||||||
end
|
|
||||||
|
|
||||||
if ((state == STATE_UPDATE) && (do_update == 1'b1))
|
|
||||||
begin
|
|
||||||
// Update the read data and put it in the write data to be written
|
|
||||||
wr_data <= rd_data + 2;
|
|
||||||
do_update <= 1'b0;
|
|
||||||
read_req <= 1'b0;
|
|
||||||
$display("write data is %d...",wr_data);
|
|
||||||
|
|
||||||
// First read done. Next reads should be from the updated addresses
|
|
||||||
start_read <= 1'b0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Write logic.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// WRITE REQUEST
|
|
||||||
//
|
|
||||||
|
|
||||||
// Did a write response just arrive
|
|
||||||
|
|
||||||
// Next write address
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (sRx.c0.rspValid)
|
|
||||||
begin
|
|
||||||
// Next write address is valid when we have got the read response back
|
|
||||||
wr_addr_next_valid <= sRx.c0.rspValid;
|
|
||||||
//wr_addr_next_valid <= (!start_write && sRx.c0.rspValid);
|
|
||||||
|
|
||||||
//if (state == STATE_WRITE && !wr_needed)
|
|
||||||
//begin
|
|
||||||
// Apurve: Next address is current address plus address length
|
|
||||||
//wr_addr_next <= wr_addr + 0;
|
|
||||||
wr_addr_next <= (wr_addr_next_valid ? wr_addr + 0 : wr_addr);
|
|
||||||
//end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Since back pressure may prevent an immediate write request, we must
|
|
||||||
// record whether a write is needed and hold it until the request can
|
|
||||||
// be sent to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
wr_needed <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// If writes are allowed this cycle then we can safely clear
|
|
||||||
// any previously requested writes. This simple AFU has only
|
|
||||||
// one write in flight at a time since it is walking a pointer
|
|
||||||
// chain.
|
|
||||||
if (wr_needed)
|
|
||||||
begin
|
|
||||||
//wr_needed <= sRx.c1TxAlmFull;
|
|
||||||
//wr_needed <= (!sRx.c1TxAlmFull && !sRx.c1.rspValid);
|
|
||||||
wr_needed <= !sRx.c1.rspValid;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Need a write under two conditions:
|
|
||||||
// - Starting a new walk
|
|
||||||
// - A write response just arrived from a line containing
|
|
||||||
// a next pointer.
|
|
||||||
wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
|
|
||||||
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
|
|
||||||
//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Emit write requests to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Write header defines the request to the FIU
|
|
||||||
t_ccip_c1_ReqMemHdr wr_hdr;
|
|
||||||
|
|
||||||
always_comb
|
|
||||||
begin
|
|
||||||
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
|
|
||||||
|
|
||||||
// Write request type
|
|
||||||
//wr_hdr.req_type = 4'h0;
|
|
||||||
|
|
||||||
// Virtual address (MPF virtual addressing is enabled)
|
|
||||||
wr_hdr.address = wr_addr;
|
|
||||||
|
|
||||||
// Let the FIU pick the channel
|
|
||||||
//wr_hdr.vc_sel = 2'h2;
|
|
||||||
|
|
||||||
// Write 1 cache line (64 bytes)
|
|
||||||
//wr_hdr.cl_len = 2'h0;
|
|
||||||
|
|
||||||
// Start of packet is true (single line write)
|
|
||||||
wr_hdr.sop = 1'b1;
|
|
||||||
end
|
|
||||||
|
|
||||||
// Send write requests to the FIU
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
sTx.c1.valid <= 1'b0;
|
|
||||||
write_req <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Generate a write request when needed and the FIU isn't full
|
|
||||||
if (state == STATE_WRITE)
|
|
||||||
begin
|
|
||||||
sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull && !write_req);
|
|
||||||
if (wr_needed && !sRx.c1TxAlmFull && !write_req)
|
|
||||||
begin
|
|
||||||
sTx.c1.hdr <= wr_hdr;
|
|
||||||
sTx.c1.data <= t_ccip_clData'(wr_data);
|
|
||||||
write_req <= 1'b1;
|
|
||||||
wr_addr_next_valid <= 1'b0;
|
|
||||||
$display("Write address is 0x%x...", wr_hdr.address);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// WRITE RESPONSE HANDLING
|
|
||||||
//
|
|
||||||
|
|
||||||
// Apurve: Check if a signal is to be sent to read to start reading in case
|
|
||||||
// write response does not work
|
|
||||||
//
|
|
||||||
// Send data (write requests).
|
|
||||||
//
|
|
||||||
//always_ff @(posedge clk)
|
|
||||||
//begin
|
|
||||||
// if (state == STATE_WRITE)
|
|
||||||
// begin
|
|
||||||
// rd_data <= sRx.c0.data;
|
|
||||||
// end
|
|
||||||
// if (state == STATE_UPDATE)
|
|
||||||
// begin
|
|
||||||
// // Update the write data and put it in the write data to be written
|
|
||||||
// wr_data <= rd_data + 1;
|
|
||||||
// end
|
|
||||||
//end
|
|
||||||
|
|
||||||
endmodule
|
|
||||||
@@ -1,621 +0,0 @@
|
|||||||
//
|
|
||||||
// Copyright (c) 2017, Intel Corporation
|
|
||||||
// All rights reserved.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// Redistributions of source code must retain the above copyright notice, this
|
|
||||||
// list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// Neither the name of the Intel Corporation nor the names of its contributors
|
|
||||||
// may be used to endorse or promote products derived from this software
|
|
||||||
// without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
// POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
|
|
||||||
// Read from the memory locations first and then write to the memory locations
|
|
||||||
|
|
||||||
`include "platform_if.vh"
|
|
||||||
`include "afu_json_info.vh"
|
|
||||||
|
|
||||||
|
|
||||||
module ccip_std_afu
|
|
||||||
(
|
|
||||||
// CCI-P Clocks and Resets
|
|
||||||
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
|
|
||||||
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
|
|
||||||
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
|
|
||||||
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
|
|
||||||
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
|
|
||||||
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
|
|
||||||
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
|
|
||||||
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
|
|
||||||
|
|
||||||
// Interface structures
|
|
||||||
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
|
|
||||||
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// Run the entire design at the standard CCI-P frequency (400 MHz).
|
|
||||||
//
|
|
||||||
logic clk;
|
|
||||||
assign clk = pClk;
|
|
||||||
|
|
||||||
logic reset;
|
|
||||||
assign reset = pck_cp2af_softReset;
|
|
||||||
|
|
||||||
logic [511:0] wr_data;
|
|
||||||
logic [511:0] rd_data;
|
|
||||||
|
|
||||||
logic do_update;
|
|
||||||
logic start_read;
|
|
||||||
logic start_write;
|
|
||||||
logic wr_addr_next_valid;
|
|
||||||
logic addr_next_valid;
|
|
||||||
logic rd_end_of_list;
|
|
||||||
logic rd_needed;
|
|
||||||
logic wr_needed;
|
|
||||||
logic [15:0] cnt_list_length;
|
|
||||||
t_ccip_clAddr rd_addr;
|
|
||||||
t_ccip_clAddr wr_addr;
|
|
||||||
t_ccip_clAddr addr_next;
|
|
||||||
t_ccip_clAddr wr_addr_next;
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Register requests.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
//
|
|
||||||
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
|
|
||||||
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
|
|
||||||
// We also assign pck_af2cp_sTx to sTx here but don't register it.
|
|
||||||
// The code below never uses combinational logic to write sTx.
|
|
||||||
//
|
|
||||||
|
|
||||||
t_if_ccip_Rx sRx;
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
sRx <= pck_cp2af_sRx;
|
|
||||||
end
|
|
||||||
|
|
||||||
t_if_ccip_Tx sTx;
|
|
||||||
assign pck_af2cp_sTx = sTx;
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// CSR (MMIO) handling.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
// The AFU ID is a unique ID for a given program. Here we generated
|
|
||||||
// one with the "uuidgen" program and stored it in the AFU's JSON file.
|
|
||||||
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
|
|
||||||
// to extract the UUID into afu_json_info.vh.
|
|
||||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
|
||||||
|
|
||||||
//
|
|
||||||
// A valid AFU must implement a device feature list, starting at MMIO
|
|
||||||
// address 0. Every entry in the feature list begins with 5 64-bit
|
|
||||||
// words: a device feature header, two AFU UUID words and two reserved
|
|
||||||
// words.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Is a CSR read request active this cycle?
|
|
||||||
logic is_csr_read;
|
|
||||||
assign is_csr_read = sRx.c0.mmioRdValid;
|
|
||||||
|
|
||||||
// Is a CSR write request active this cycle?
|
|
||||||
logic is_csr_write;
|
|
||||||
assign is_csr_write = sRx.c0.mmioWrValid;
|
|
||||||
|
|
||||||
// The MMIO request header is overlayed on the normal c0 memory read
|
|
||||||
// response data structure. Cast the c0Rx header to an MMIO request
|
|
||||||
// header.
|
|
||||||
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
|
|
||||||
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// Implement the device feature list by responding to MMIO reads.
|
|
||||||
//
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
sTx.c2.mmioRdValid <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Always respond with something for every read request
|
|
||||||
sTx.c2.mmioRdValid <= is_csr_read;
|
|
||||||
|
|
||||||
// The unique transaction ID matches responses to requests
|
|
||||||
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
|
|
||||||
|
|
||||||
// Addresses are of 32-bit objects in MMIO space. Addresses
|
|
||||||
// of 64-bit objects are thus multiples of 2.
|
|
||||||
case (mmio_req_hdr.address)
|
|
||||||
0: // AFU DFH (device feature header)
|
|
||||||
begin
|
|
||||||
// Here we define a trivial feature list. In this
|
|
||||||
// example, our AFU is the only entry in this list.
|
|
||||||
sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
// Feature type is AFU
|
|
||||||
sTx.c2.data[63:60] <= 4'h1;
|
|
||||||
// End of list (last entry in list)
|
|
||||||
sTx.c2.data[40] <= 1'b1;
|
|
||||||
end
|
|
||||||
|
|
||||||
// AFU_ID_L
|
|
||||||
2: sTx.c2.data <= afu_id[63:0];
|
|
||||||
|
|
||||||
// AFU_ID_H
|
|
||||||
4: sTx.c2.data <= afu_id[127:64];
|
|
||||||
|
|
||||||
// DFH_RSVD0
|
|
||||||
6: sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
|
|
||||||
// DFH_RSVD1
|
|
||||||
8: sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
|
|
||||||
// Updated by apurve to check fpgaReadMMIO
|
|
||||||
10: sTx.c2.data <= t_ccip_mmioData'(start_read);
|
|
||||||
|
|
||||||
default: sTx.c2.data <= t_ccip_mmioData'(0);
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// CSR write handling. Host software must tell the AFU the memory address
|
|
||||||
// to which it should be writing. The address is set by writing a CSR.
|
|
||||||
//
|
|
||||||
|
|
||||||
// We use MMIO address 0 to set the memory address. The read and
|
|
||||||
// write MMIO spaces are logically separate so we are free to use
|
|
||||||
// whatever we like. This may not be good practice for cleanly
|
|
||||||
// organizing the MMIO address space, but it is legal.
|
|
||||||
logic is_mem_addr_csr_write;
|
|
||||||
assign is_mem_addr_csr_write = is_csr_write &&
|
|
||||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
|
|
||||||
|
|
||||||
// Memory address to which this AFU will write.
|
|
||||||
t_ccip_clAddr write_mem_addr;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
start_write <= 1'b0;
|
|
||||||
end
|
|
||||||
else if (is_mem_addr_csr_write)
|
|
||||||
begin
|
|
||||||
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
|
||||||
start_write <= 1'b1;
|
|
||||||
//$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// We use MMIO address 8 to set the memory address for reading data.
|
|
||||||
logic is_mem_addr_csr_read;
|
|
||||||
assign is_mem_addr_csr_read = is_csr_write &&
|
|
||||||
(mmio_req_hdr.address == t_ccip_mmioAddr'(2));
|
|
||||||
|
|
||||||
// Memory address from which this AFU will read.
|
|
||||||
t_ccip_clAddr read_mem_addr;
|
|
||||||
|
|
||||||
//logic start_traversal = 'b0;
|
|
||||||
//t_ccip_clAddr start_traversal_addr;
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
start_read <= 1'b0;
|
|
||||||
end
|
|
||||||
else if (is_mem_addr_csr_read)
|
|
||||||
begin
|
|
||||||
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
|
|
||||||
start_read <= 1'b1;
|
|
||||||
//$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Main AFU logic
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
//
|
|
||||||
// States in our simple example.
|
|
||||||
//
|
|
||||||
//typedef enum logic [0:0]
|
|
||||||
typedef enum logic [1:0]
|
|
||||||
{
|
|
||||||
STATE_IDLE,
|
|
||||||
STATE_READ,
|
|
||||||
STATE_UPDATE,
|
|
||||||
STATE_WRITE
|
|
||||||
}
|
|
||||||
t_state;
|
|
||||||
|
|
||||||
t_state state;
|
|
||||||
|
|
||||||
//
|
|
||||||
// State machine
|
|
||||||
//
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
state <= STATE_IDLE;
|
|
||||||
rd_end_of_list <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
case (state)
|
|
||||||
STATE_IDLE:
|
|
||||||
begin
|
|
||||||
// Traversal begins when CSR 1 is written
|
|
||||||
if (start_read)
|
|
||||||
begin
|
|
||||||
state <= STATE_READ;
|
|
||||||
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
STATE_READ:
|
|
||||||
begin
|
|
||||||
$display("AFU in READ...");
|
|
||||||
if (!rd_needed && do_update)
|
|
||||||
begin
|
|
||||||
state <= STATE_UPDATE;
|
|
||||||
$display("AFU moving to UPDATE...");
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
STATE_UPDATE:
|
|
||||||
begin
|
|
||||||
// Update the read value to be written back
|
|
||||||
$display("AFU in UPDATE...");
|
|
||||||
if (!do_update)
|
|
||||||
begin
|
|
||||||
state <= STATE_WRITE;
|
|
||||||
wr_needed <= 1'b1;
|
|
||||||
$display("AFU moving to WRITE...");
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
STATE_WRITE:
|
|
||||||
begin
|
|
||||||
// Write the updated value to the address
|
|
||||||
// Point to new address after that
|
|
||||||
// if done then point to IDLE; else read new values
|
|
||||||
$display("AFU in WRITE...");
|
|
||||||
if (rd_end_of_list)
|
|
||||||
begin
|
|
||||||
state <= STATE_IDLE;
|
|
||||||
$display("AFU done...");
|
|
||||||
end
|
|
||||||
else if (!wr_needed)
|
|
||||||
begin
|
|
||||||
state <= STATE_READ;
|
|
||||||
$display("AFU moving to READ from WRITE...");
|
|
||||||
start_write <= 1'b0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Read logic.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
//
|
|
||||||
// READ REQUEST
|
|
||||||
//
|
|
||||||
|
|
||||||
// Did a write response just arrive
|
|
||||||
|
|
||||||
// Next read address
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
// Next read address is valid when we have got the write response back
|
|
||||||
addr_next_valid <= sRx.c1.rspValid;
|
|
||||||
|
|
||||||
// Apurve: Next address is current address plus address length
|
|
||||||
//addr_next <= addr_next + addr_size;
|
|
||||||
addr_next <= rd_addr + 0;
|
|
||||||
|
|
||||||
// End of list reached if we have read 5 times
|
|
||||||
rd_end_of_list <= (cnt_list_length == 'h5);
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Since back pressure may prevent an immediate read request, we must
|
|
||||||
// record whether a read is needed and hold it until the request can
|
|
||||||
// be sent to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
rd_needed <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// If reads are allowed this cycle then we can safely clear
|
|
||||||
// any previously requested reads. This simple AFU has only
|
|
||||||
// one read in flight at a time since it is walking a pointer
|
|
||||||
// chain.
|
|
||||||
if (rd_needed)
|
|
||||||
begin
|
|
||||||
rd_needed <= sRx.c0TxAlmFull;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Need a read under two conditions:
|
|
||||||
// - Starting a new walk
|
|
||||||
// - A read response just arrived from a line containing
|
|
||||||
// a next pointer.
|
|
||||||
rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
|
|
||||||
rd_addr <= (start_read ? read_mem_addr : addr_next);
|
|
||||||
//$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr));
|
|
||||||
//$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr));
|
|
||||||
//$display("start read is %d", start_read);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Emit read requests to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Read header defines the request to the FIU
|
|
||||||
t_ccip_c0_ReqMemHdr rd_hdr;
|
|
||||||
|
|
||||||
always_comb
|
|
||||||
begin
|
|
||||||
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
|
|
||||||
|
|
||||||
// Read request type (No intention to cache)
|
|
||||||
//rd_hdr.req_type = 4'h0;
|
|
||||||
|
|
||||||
// Virtual address (MPF virtual addressing is enabled)
|
|
||||||
rd_hdr.address = rd_addr;
|
|
||||||
|
|
||||||
// Read over channel VA
|
|
||||||
//rd_hdr.vc_sel = 2'h0;
|
|
||||||
|
|
||||||
// Read one cache line (64 bytes)
|
|
||||||
//rd_hdr.cl_len = 2'h0;
|
|
||||||
end
|
|
||||||
|
|
||||||
// Send read requests to the FIU
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
sTx.c0.valid <= 1'b0;
|
|
||||||
cnt_list_length <= 0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Generate a read request when needed and the FIU isn't full
|
|
||||||
if (state == STATE_READ)
|
|
||||||
begin
|
|
||||||
sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull);
|
|
||||||
|
|
||||||
if (rd_needed && !sRx.c0TxAlmFull)
|
|
||||||
begin
|
|
||||||
sTx.c0.hdr <= rd_hdr;
|
|
||||||
cnt_list_length <= cnt_list_length + 1;
|
|
||||||
$display("Incrementing read count...%d",cnt_list_length);
|
|
||||||
$display("Read address is 0x%x...",rd_hdr.address);
|
|
||||||
// Apurve: Add something to stop read once this section has been accessed
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// READ RESPONSE HANDLING
|
|
||||||
//
|
|
||||||
|
|
||||||
//
|
|
||||||
// Receive data (read responses).
|
|
||||||
//
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
do_update <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
if (sRx.c0.rspValid)
|
|
||||||
begin
|
|
||||||
rd_data <= sRx.c0.data;
|
|
||||||
do_update <= 1'b1;
|
|
||||||
//$display("rd data is %d...",rd_data);
|
|
||||||
end
|
|
||||||
|
|
||||||
if (state == STATE_UPDATE)
|
|
||||||
begin
|
|
||||||
// Update the read data and put it in the write data to be written
|
|
||||||
wr_data <= rd_data + 2;
|
|
||||||
do_update <= 1'b0;
|
|
||||||
$display("write data is %d...",wr_data);
|
|
||||||
|
|
||||||
// First read done. Next reads should be from the updated addresses
|
|
||||||
start_read <= 1'b0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
//
|
|
||||||
// Write logic.
|
|
||||||
//
|
|
||||||
// =========================================================================
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// WRITE REQUEST
|
|
||||||
//
|
|
||||||
|
|
||||||
// Did a write response just arrive
|
|
||||||
|
|
||||||
// Next write address
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
// Next write address is valid when we have got the read response back
|
|
||||||
wr_addr_next_valid <= sRx.c0.rspValid;
|
|
||||||
|
|
||||||
// Apurve: Next address is current address plus address length
|
|
||||||
wr_addr_next <= wr_addr + 0;
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Since back pressure may prevent an immediate write request, we must
|
|
||||||
// record whether a write is needed and hold it until the request can
|
|
||||||
// be sent to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
wr_needed <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// If writes are allowed this cycle then we can safely clear
|
|
||||||
// any previously requested writes. This simple AFU has only
|
|
||||||
// one write in flight at a time since it is walking a pointer
|
|
||||||
// chain.
|
|
||||||
if (wr_needed)
|
|
||||||
begin
|
|
||||||
wr_needed <= sRx.c1TxAlmFull;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Need a write under two conditions:
|
|
||||||
// - Starting a new walk
|
|
||||||
// - A write response just arrived from a line containing
|
|
||||||
// a next pointer.
|
|
||||||
wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
|
|
||||||
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
|
|
||||||
//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
//
|
|
||||||
// Emit write requests to the FIU.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Write header defines the request to the FIU
|
|
||||||
t_ccip_c1_ReqMemHdr wr_hdr;
|
|
||||||
|
|
||||||
always_comb
|
|
||||||
begin
|
|
||||||
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
|
|
||||||
|
|
||||||
// Write request type
|
|
||||||
//wr_hdr.req_type = 4'h0;
|
|
||||||
|
|
||||||
// Virtual address (MPF virtual addressing is enabled)
|
|
||||||
wr_hdr.address = wr_addr;
|
|
||||||
|
|
||||||
// Let the FIU pick the channel
|
|
||||||
//wr_hdr.vc_sel = 2'h2;
|
|
||||||
|
|
||||||
// Write 1 cache line (64 bytes)
|
|
||||||
//wr_hdr.cl_len = 2'h0;
|
|
||||||
|
|
||||||
// Start of packet is true (single line write)
|
|
||||||
wr_hdr.sop = 1'b1;
|
|
||||||
end
|
|
||||||
|
|
||||||
// Send write requests to the FIU
|
|
||||||
always_ff @(posedge clk)
|
|
||||||
begin
|
|
||||||
if (reset)
|
|
||||||
begin
|
|
||||||
sTx.c1.valid <= 1'b0;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
// Generate a write request when needed and the FIU isn't full
|
|
||||||
if (state == STATE_WRITE)
|
|
||||||
begin
|
|
||||||
sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull);
|
|
||||||
if (wr_needed && !sRx.c1TxAlmFull)
|
|
||||||
begin
|
|
||||||
sTx.c1.hdr <= wr_hdr;
|
|
||||||
sTx.c1.data <= t_ccip_clData'(wr_data);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// WRITE RESPONSE HANDLING
|
|
||||||
//
|
|
||||||
|
|
||||||
// Apurve: Check if a signal is to be sent to read to start reading in case
|
|
||||||
// write response does not work
|
|
||||||
//
|
|
||||||
// Send data (write requests).
|
|
||||||
//
|
|
||||||
//always_ff @(posedge clk)
|
|
||||||
//begin
|
|
||||||
// if (state == STATE_WRITE)
|
|
||||||
// begin
|
|
||||||
// rd_data <= sRx.c0.data;
|
|
||||||
// end
|
|
||||||
// if (state == STATE_UPDATE)
|
|
||||||
// begin
|
|
||||||
// // Update the write data and put it in the write data to be written
|
|
||||||
// wr_data <= rd_data + 1;
|
|
||||||
// end
|
|
||||||
//end
|
|
||||||
|
|
||||||
endmodule
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
cci_hello.json
|
|
||||||
cci_hello_afu.sv
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
##
|
|
||||||
## Setup ASE environment using ../rtl/sources.txt.
|
|
||||||
##
|
|
||||||
|
|
||||||
# Absolute path to this script
|
|
||||||
SCRIPT=$(readlink -f "$0")
|
|
||||||
SCRIPT_PATH=$(dirname "$SCRIPT")
|
|
||||||
|
|
||||||
afu_sim_setup --sources="${SCRIPT_PATH}/../rtl/sources.txt" $@
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
include ../../common/sw/common_include.mk
|
|
||||||
|
|
||||||
# Primary test name
|
|
||||||
TEST = cci_hello
|
|
||||||
|
|
||||||
# Build directory
|
|
||||||
OBJDIR = obj
|
|
||||||
CFLAGS += -I./$(OBJDIR)
|
|
||||||
CPPFLAGS += -I./$(OBJDIR)
|
|
||||||
|
|
||||||
# Files and folders
|
|
||||||
SRCS = $(TEST).c
|
|
||||||
OBJS = $(addprefix $(OBJDIR)/,$(patsubst %.c,%.o,$(SRCS)))
|
|
||||||
|
|
||||||
# Targets (build only $(TEST)_ase by default)
|
|
||||||
all: $(TEST) $(TEST)_ase
|
|
||||||
|
|
||||||
# AFU info from JSON file, including AFU UUID
|
|
||||||
AFU_JSON_INFO = $(OBJDIR)/afu_json_info.h
|
|
||||||
|
|
||||||
$(AFU_JSON_INFO): ../hw/rtl/$(TEST).json | objdir
|
|
||||||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
|
||||||
|
|
||||||
$(OBJS): $(AFU_JSON_INFO)
|
|
||||||
|
|
||||||
$(TEST): $(OBJS)
|
|
||||||
$(CC) -o $@ $^ $(LDFLAGS) $(FPGA_LIBS)
|
|
||||||
|
|
||||||
$(TEST)_ase: $(OBJS)
|
|
||||||
$(CC) -o $@ $^ $(LDFLAGS) $(ASE_LIBS)
|
|
||||||
|
|
||||||
$(OBJDIR)/%.o: %.c | objdir
|
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf $(TEST) $(TEST)_ase $(OBJDIR)
|
|
||||||
|
|
||||||
objdir:
|
|
||||||
@mkdir -p $(OBJDIR)
|
|
||||||
|
|
||||||
.PHONY: all clean
|
|
||||||
@@ -1,210 +0,0 @@
|
|||||||
//
|
|
||||||
// Copyright (c) 2017, Intel Corporation
|
|
||||||
// All rights reserved.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// Redistributions of source code must retain the above copyright notice, this
|
|
||||||
// list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// Neither the name of the Intel Corporation nor the names of its contributors
|
|
||||||
// may be used to endorse or promote products derived from this software
|
|
||||||
// without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
// POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <uuid/uuid.h>
|
|
||||||
|
|
||||||
#include <opae/fpga.h>
|
|
||||||
|
|
||||||
// State from the AFU's JSON file, extracted using OPAE's afu_json_mgr script
|
|
||||||
#include "afu_json_info.h"
|
|
||||||
|
|
||||||
#define CACHELINE_BYTES 64
|
|
||||||
#define CL(x) ((x) * CACHELINE_BYTES)
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// Search for an accelerator matching the requested UUID and connect to it.
|
|
||||||
//
|
|
||||||
static fpga_handle connect_to_accel(const char *accel_uuid)
|
|
||||||
{
|
|
||||||
fpga_properties filter = NULL;
|
|
||||||
fpga_guid guid;
|
|
||||||
fpga_token accel_token;
|
|
||||||
uint32_t num_matches;
|
|
||||||
fpga_handle accel_handle;
|
|
||||||
fpga_result r;
|
|
||||||
|
|
||||||
// Don't print verbose messages in ASE by default
|
|
||||||
//setenv("ASE_LOG", "0", 0);
|
|
||||||
|
|
||||||
// Set up a filter that will search for an accelerator
|
|
||||||
fpgaGetProperties(NULL, &filter);
|
|
||||||
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
|
||||||
|
|
||||||
// Add the desired UUID to the filter
|
|
||||||
uuid_parse(accel_uuid, guid);
|
|
||||||
fpgaPropertiesSetGUID(filter, guid);
|
|
||||||
|
|
||||||
// Do the search across the available FPGA contexts
|
|
||||||
num_matches = 1;
|
|
||||||
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
|
|
||||||
|
|
||||||
// Not needed anymore
|
|
||||||
fpgaDestroyProperties(&filter);
|
|
||||||
|
|
||||||
if (num_matches < 1)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "Accelerator %s not found!\n", accel_uuid);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open accelerator
|
|
||||||
r = fpgaOpen(accel_token, &accel_handle, 0);
|
|
||||||
assert(FPGA_OK == r);
|
|
||||||
|
|
||||||
// Done with token
|
|
||||||
fpgaDestroyToken(&accel_token);
|
|
||||||
|
|
||||||
return accel_handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// Allocate a buffer in I/O memory, shared with the FPGA.
|
|
||||||
//
|
|
||||||
static volatile void* alloc_buffer(fpga_handle accel_handle,
|
|
||||||
ssize_t size,
|
|
||||||
uint64_t *wsid,
|
|
||||||
uint64_t *io_addr)
|
|
||||||
{
|
|
||||||
fpga_result r;
|
|
||||||
volatile void* buf;
|
|
||||||
|
|
||||||
r = fpgaPrepareBuffer(accel_handle, size, (void*)&buf, wsid, 0);
|
|
||||||
if (FPGA_OK != r) return NULL;
|
|
||||||
|
|
||||||
// Get the physical address of the buffer in the accelerator
|
|
||||||
r = fpgaGetIOAddress(accel_handle, *wsid, io_addr);
|
|
||||||
assert(FPGA_OK == r);
|
|
||||||
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
|
||||||
{
|
|
||||||
fpga_handle accel_handle;
|
|
||||||
volatile char *buf;
|
|
||||||
volatile char *buf_r;
|
|
||||||
uint64_t wsid1;
|
|
||||||
uint64_t wsid2;
|
|
||||||
uint64_t buf_pa;
|
|
||||||
uint64_t ret_buf_pa;
|
|
||||||
uint64_t buf_rpa;
|
|
||||||
uint64_t ret_buf_rpa;
|
|
||||||
fpga_result r;
|
|
||||||
|
|
||||||
// Find and connect to the accelerator
|
|
||||||
accel_handle = connect_to_accel(AFU_ACCEL_UUID);
|
|
||||||
|
|
||||||
// Allocate a single page memory buffer for write
|
|
||||||
buf = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
|
|
||||||
&wsid1, &buf_pa);
|
|
||||||
// Allocate a single page memory buffer for read
|
|
||||||
buf_r = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
|
|
||||||
&wsid2, &buf_rpa);
|
|
||||||
assert(NULL != buf);
|
|
||||||
|
|
||||||
//// Set the low byte of the shared buffer to 0. The FPGA will write
|
|
||||||
//// a non-zero value to it.
|
|
||||||
//buf[0] = 0;
|
|
||||||
|
|
||||||
// Set the low byte of the shared buffer buf_r to 0. The FPGA will read
|
|
||||||
// the values and write to buf address
|
|
||||||
buf[0] = 5;
|
|
||||||
buf_r[0] = 5;
|
|
||||||
|
|
||||||
// Tell the accelerator the address of the buffer using cache line
|
|
||||||
// addresses. The accelerator will respond by writing to the buffer.
|
|
||||||
r = fpgaWriteMMIO64(accel_handle, 0, 0, buf_pa / CL(1));
|
|
||||||
printf("Write address is %08lx\n", buf_pa);
|
|
||||||
printf("Write address div 64 is %08lx\n", buf_pa/ CL(1));
|
|
||||||
assert(FPGA_OK == r);
|
|
||||||
|
|
||||||
// Wait for response from FPGA. Check using fpgaReadMMIO
|
|
||||||
//r = fpgaReadMMIO64(accel_handle, 0, 0, &ret_buf_pa);
|
|
||||||
//printf("Returned write is %08lx\n", ret_buf_pa);
|
|
||||||
//assert(FPGA_OK == r);
|
|
||||||
|
|
||||||
///////////////////// Added to check fpgaRead
|
|
||||||
// Wait for response from FPGA. Check using fpgaReadMMIO
|
|
||||||
r = fpgaReadMMIO64(accel_handle, 0, 5 * sizeof(uint64_t), &ret_buf_rpa);
|
|
||||||
printf("Returned read at 10 is %08lx\n", ret_buf_rpa);
|
|
||||||
assert(FPGA_OK == r);
|
|
||||||
///////////////////////////////////////////////
|
|
||||||
|
|
||||||
|
|
||||||
// Tell the accelerator the address of the buffer using cache line
|
|
||||||
// addresses. The accelerator will read from the buffer.
|
|
||||||
// Write the address to MMIO 1
|
|
||||||
r = fpgaWriteMMIO64(accel_handle, 0, sizeof(uint64_t), buf_rpa / CL(1));
|
|
||||||
printf("Read address is %08lx\n", buf_rpa);
|
|
||||||
printf("Read address div64 is %08lx\n", buf_rpa / CL(1));
|
|
||||||
assert(FPGA_OK == r);
|
|
||||||
|
|
||||||
// Wait for response from FPGA. Check using fpgaReadMMIO
|
|
||||||
//r = fpgaReadMMIO64(accel_handle, 0, sizeof(uint64_t), &ret_buf_rpa);
|
|
||||||
//printf("Returned write is %08lx\n", ret_buf_rpa);
|
|
||||||
//assert(FPGA_OK == r);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Update this
|
|
||||||
// Spin, waiting for the value in memory to change to something non-zero.
|
|
||||||
while (5 == buf[0])
|
|
||||||
{
|
|
||||||
// A well-behaved program would use _mm_pause(), nanosleep() or
|
|
||||||
// equivalent to save power here.
|
|
||||||
};
|
|
||||||
|
|
||||||
// Print the string written by the FPGA
|
|
||||||
printf("%d\n", buf[0]);
|
|
||||||
|
|
||||||
do {
|
|
||||||
//printf("%d\n", buf[0]);
|
|
||||||
} while (10 != buf[0]);
|
|
||||||
|
|
||||||
// Done
|
|
||||||
fpgaReleaseBuffer(accel_handle, wsid1);
|
|
||||||
fpgaReleaseBuffer(accel_handle, wsid2);
|
|
||||||
fpgaClose(accel_handle);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
//
|
|
||||||
// Generated by afu_json_mgr from ../hw/rtl/cci_hello.json
|
|
||||||
//
|
|
||||||
|
|
||||||
#ifndef __AFU_JSON_INFO__
|
|
||||||
#define __AFU_JSON_INFO__
|
|
||||||
|
|
||||||
#define AFU_ACCEL_NAME "cci_hello"
|
|
||||||
#define AFU_ACCEL_UUID "C6AA954A-9B91-4A37-ABC1-1D9F0709DCC3"
|
|
||||||
#define AFU_IMAGE_POWER 0
|
|
||||||
#define AFU_TOP_IFC "ccip_std_afu"
|
|
||||||
|
|
||||||
#endif // __AFU_JSON_INFO__
|
|
||||||
Binary file not shown.
@@ -38,7 +38,7 @@ module VX_commit #(
|
|||||||
.count (num_commits)
|
.count (num_commits)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign cmt_to_csr_if.valid = (| commited_mask);
|
assign cmt_to_csr_if.valid = (| commited_mask);
|
||||||
assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num;
|
assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num;
|
||||||
assign cmt_to_csr_if.num_commits = num_commits;
|
assign cmt_to_csr_if.num_commits = num_commits;
|
||||||
|
|
||||||
@@ -46,16 +46,16 @@ module VX_commit #(
|
|||||||
|
|
||||||
integer i;
|
integer i;
|
||||||
|
|
||||||
reg [`FFG_BITS-1:0] fflags;
|
fflags_t fflags;
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
fflags = 0;
|
fflags = 0;
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||||
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
|
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
|
||||||
fflags[0] |= fpu_commit_if.fflags[i][0];
|
fflags.NX |= fpu_commit_if.fflags[i].NX;
|
||||||
fflags[1] |= fpu_commit_if.fflags[i][1];
|
fflags.UF |= fpu_commit_if.fflags[i].UF;
|
||||||
fflags[2] |= fpu_commit_if.fflags[i][2];
|
fflags.OF |= fpu_commit_if.fflags[i].OF;
|
||||||
fflags[3] |= fpu_commit_if.fflags[i][3];
|
fflags.DZ |= fpu_commit_if.fflags[i].DZ;
|
||||||
fflags[4] |= fpu_commit_if.fflags[i][4];
|
fflags.NV |= fpu_commit_if.fflags[i].NV;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -71,8 +71,8 @@ module VX_csr_data #(
|
|||||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
||||||
|
|
||||||
default: begin
|
default: begin
|
||||||
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
|
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
|
||||||
end
|
end
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -38,7 +38,7 @@
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`define LATENCY_IDIV 22
|
`define LATENCY_IDIV 23
|
||||||
`define LATENCY_IMUL 2
|
`define LATENCY_IMUL 2
|
||||||
|
|
||||||
`define LATENCY_FDIV 16
|
`define LATENCY_FDIV 16
|
||||||
@@ -201,13 +201,6 @@
|
|||||||
`define FRM_DYN 3'b111 // dynamic mode
|
`define FRM_DYN 3'b111 // dynamic mode
|
||||||
`define FRM_BITS 3
|
`define FRM_BITS 3
|
||||||
|
|
||||||
`define FFG_NX 0 // inexact
|
|
||||||
`define FFG_UF 1 // underflow
|
|
||||||
`define FFG_OF 2 // overflow
|
|
||||||
`define FFG_DZ 3 // division by zero
|
|
||||||
`define FFG_NV 4 // invalid
|
|
||||||
`define FFG_BITS 5
|
|
||||||
|
|
||||||
`define GPU_TMC 3'h0
|
`define GPU_TMC 3'h0
|
||||||
`define GPU_WSPAWN 3'h1
|
`define GPU_WSPAWN 3'h1
|
||||||
`define GPU_SPLIT 3'h2
|
`define GPU_SPLIT 3'h2
|
||||||
@@ -440,4 +433,14 @@ typedef struct packed {
|
|||||||
logic is_quiet;
|
logic is_quiet;
|
||||||
} fp_type_t;
|
} fp_type_t;
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic NV; // Invalid
|
||||||
|
logic DZ; // Divide by zero
|
||||||
|
logic OF; // Overflow
|
||||||
|
logic UF; // Underflow
|
||||||
|
logic NX; // Inexact
|
||||||
|
} fflags_t;
|
||||||
|
|
||||||
|
`define FFG_BITS $bits(fflags_t)
|
||||||
|
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -28,13 +28,16 @@ module VX_issue #(
|
|||||||
|
|
||||||
wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag;
|
||||||
|
|
||||||
wire gpr_busy = ~gpr_read_if.in_ready;
|
wire schedule_delay;
|
||||||
wire alu_busy = ~alu_req_if.ready;
|
|
||||||
wire lsu_busy = ~lsu_req_if.ready;
|
wire gpr_busy = ~gpr_read_if.in_ready;
|
||||||
wire csr_busy = ~csr_req_if.ready;
|
|
||||||
wire mul_busy = ~mul_req_if.ready;
|
wire ex_busy = (~alu_req_if.ready && (decode_if.ex_type == `EX_ALU))
|
||||||
wire fpu_busy = ~mul_req_if.ready;
|
|| (~lsu_req_if.ready && (decode_if.ex_type == `EX_LSU))
|
||||||
wire gpu_busy = ~gpu_req_if.ready;
|
|| (~csr_req_if.ready && (decode_if.ex_type == `EX_CSR))
|
||||||
|
|| (~mul_req_if.ready && (decode_if.ex_type == `EX_MUL))
|
||||||
|
|| (~fpu_req_if.ready && (decode_if.ex_type == `EX_FPU))
|
||||||
|
|| (~gpu_req_if.ready && (decode_if.ex_type == `EX_GPU));
|
||||||
|
|
||||||
VX_scheduler #(
|
VX_scheduler #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
@@ -44,14 +47,10 @@ module VX_issue #(
|
|||||||
.decode_if (decode_if),
|
.decode_if (decode_if),
|
||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
.cmt_to_issue_if(cmt_to_issue_if),
|
.cmt_to_issue_if(cmt_to_issue_if),
|
||||||
.gpr_busy (gpr_busy),
|
.ex_busy (ex_busy),
|
||||||
.alu_busy (alu_busy),
|
.gpr_busy (gpr_busy),
|
||||||
.lsu_busy (lsu_busy),
|
.issue_tag (issue_tag),
|
||||||
.csr_busy (csr_busy),
|
.schedule_delay (schedule_delay)
|
||||||
.mul_busy (mul_busy),
|
|
||||||
.fpu_busy (fpu_busy),
|
|
||||||
.gpu_busy (gpu_busy),
|
|
||||||
.issue_tag (issue_tag)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_gpr_stage #(
|
VX_gpr_stage #(
|
||||||
@@ -66,8 +65,8 @@ module VX_issue #(
|
|||||||
VX_decode_if decode_tmp_if();
|
VX_decode_if decode_tmp_if();
|
||||||
VX_gpr_read_if gpr_read_tmp_if();
|
VX_gpr_read_if gpr_read_tmp_if();
|
||||||
|
|
||||||
wire stall = ~alu_req_if.ready || ~decode_if.ready;
|
wire stall = schedule_delay;
|
||||||
wire flush = alu_req_if.ready && ~decode_if.ready;
|
wire flush = schedule_delay && ~ex_busy;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||||
@@ -80,17 +79,19 @@ module VX_issue #(
|
|||||||
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_read_tmp_if.rs1_data, gpr_read_tmp_if.rs2_data, gpr_read_tmp_if.rs3_data})
|
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_read_tmp_if.rs1_data, gpr_read_tmp_if.rs2_data, gpr_read_tmp_if.rs3_data})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assign decode_if.ready = ~stall;
|
||||||
|
|
||||||
VX_issue_demux issue_demux (
|
VX_issue_demux issue_demux (
|
||||||
.decode_if (decode_tmp_if),
|
.decode_if (decode_tmp_if),
|
||||||
.gpr_read_if (gpr_read_tmp_if),
|
.gpr_read_if(gpr_read_tmp_if),
|
||||||
.issue_tag (issue_tmp_tag),
|
.issue_tag (issue_tmp_tag),
|
||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
.csr_req_if (csr_req_if),
|
.csr_req_if (csr_req_if),
|
||||||
.mul_req_if (mul_req_if),
|
.mul_req_if (mul_req_if),
|
||||||
.fpu_req_if (fpu_req_if),
|
.fpu_req_if (fpu_req_if),
|
||||||
.gpu_req_if (gpu_req_if)
|
.gpu_req_if (gpu_req_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
|
|||||||
@@ -8,64 +8,52 @@ module VX_scheduler #(
|
|||||||
|
|
||||||
VX_decode_if decode_if,
|
VX_decode_if decode_if,
|
||||||
VX_wb_if writeback_if,
|
VX_wb_if writeback_if,
|
||||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||||
|
input wire ex_busy,
|
||||||
input wire gpr_busy,
|
input wire gpr_busy,
|
||||||
input wire alu_busy,
|
output wire [`ISTAG_BITS-1:0] issue_tag,
|
||||||
input wire lsu_busy,
|
output wire schedule_delay
|
||||||
input wire csr_busy,
|
|
||||||
input wire mul_busy,
|
|
||||||
input wire fpu_busy,
|
|
||||||
input wire gpu_busy,
|
|
||||||
output wire [`ISTAG_BITS-1:0] issue_tag
|
|
||||||
);
|
);
|
||||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||||
reg [`NUM_THREADS-1:0] inuse_registers [`NUM_WARPS-1:0][`NUM_REGS-1:0];
|
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||||
|
|
||||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.warp_num] & decode_if.reg_use_mask;
|
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.warp_num] & decode_if.reg_use_mask;
|
||||||
wire inuse_hazard = (inuse_mask != 0);
|
wire inuse_hazard = (inuse_mask != 0);
|
||||||
|
|
||||||
wire exu_stalled = (alu_busy && (decode_if.ex_type == `EX_ALU))
|
|
||||||
|| (lsu_busy && (decode_if.ex_type == `EX_LSU))
|
|
||||||
|| (csr_busy && (decode_if.ex_type == `EX_CSR))
|
|
||||||
|| (mul_busy && (decode_if.ex_type == `EX_MUL))
|
|
||||||
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|
|
||||||
|| (gpu_busy && (decode_if.ex_type == `EX_GPU));
|
|
||||||
|
|
||||||
wire issue_buf_full;
|
wire issue_buf_full;
|
||||||
|
|
||||||
wire stall = (gpr_busy || exu_stalled || inuse_hazard || issue_buf_full) && decode_if.valid;
|
wire stall = gpr_busy || ex_busy || inuse_hazard || issue_buf_full;
|
||||||
|
|
||||||
wire acquire_rd = decode_if.valid && (decode_if.wb != 0) && ~stall;
|
wire issue_fire = decode_if.valid && ~stall;
|
||||||
|
|
||||||
|
wire acquire_rd = issue_fire && (decode_if.wb != 0);
|
||||||
|
|
||||||
wire release_rd = writeback_if.valid;
|
wire release_rd = writeback_if.valid;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.thread_mask;
|
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.warp_num, writeback_if.rd}] & ~writeback_if.thread_mask;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
integer i, w;
|
for (integer w = 0; w < `NUM_WARPS; w++) begin
|
||||||
for (w = 0; w < `NUM_WARPS; w++) begin
|
for (integer i = 0; i < `NUM_REGS; i++) begin
|
||||||
for (i = 0; i < `NUM_REGS; i++) begin
|
inuse_registers[w * `NUM_REGS + i] <= 0;
|
||||||
inuse_registers[w][i] <= 0;
|
|
||||||
end
|
end
|
||||||
inuse_reg_mask[w] <= `NUM_REGS'(0);
|
inuse_reg_mask[w] <= `NUM_REGS'(0);
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
if (acquire_rd) begin
|
if (acquire_rd) begin
|
||||||
inuse_registers[decode_if.warp_num][decode_if.rd] <= decode_if.thread_mask;
|
inuse_registers[{decode_if.warp_num, decode_if.rd}] <= decode_if.thread_mask;
|
||||||
inuse_reg_mask[decode_if.warp_num][decode_if.rd] <= 1;
|
inuse_reg_mask[decode_if.warp_num][decode_if.rd] <= 1;
|
||||||
end
|
end
|
||||||
if (release_rd) begin
|
if (release_rd) begin
|
||||||
assert(inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] != 0);
|
assert(inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||||
inuse_registers[writeback_if.warp_num][writeback_if.rd] <= inuse_registers_n;
|
inuse_registers[{writeback_if.warp_num, writeback_if.rd}] <= inuse_registers_n;
|
||||||
inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] <= (| inuse_registers_n);
|
inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] <= (| inuse_registers_n);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
wire issue_fire = decode_if.valid && ~stall;
|
|
||||||
|
|
||||||
VX_cam_buffer #(
|
VX_cam_buffer #(
|
||||||
.DATAW ($bits(issue_data_t)),
|
.DATAW ($bits(issue_data_t)),
|
||||||
.SIZE (`ISSUEQ_SIZE),
|
.SIZE (`ISSUEQ_SIZE),
|
||||||
@@ -82,14 +70,14 @@ module VX_scheduler #(
|
|||||||
.full (issue_buf_full)
|
.full (issue_buf_full)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign decode_if.ready = ~stall;
|
assign schedule_delay = stall;
|
||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (stall) begin
|
if (decode_if.valid && stall) begin
|
||||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, gpr=%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b",
|
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b, gpr_busy=%b",
|
||||||
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full, inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1],
|
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full,
|
||||||
inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], gpr_busy, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
|
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy, gpr_busy);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -18,113 +18,131 @@ module VX_writeback #(
|
|||||||
// outputs
|
// outputs
|
||||||
VX_wb_if writeback_if
|
VX_wb_if writeback_if
|
||||||
);
|
);
|
||||||
|
reg [`ISSUEQ_SIZE-1:0] wb_valid_table, wb_valid_table_n;
|
||||||
|
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] wb_data_table, wb_data_table_n;
|
||||||
|
reg [`ISSUEQ_SIZE-1:0][`NW_BITS-1:0] wb_warp_num_table, wb_warp_num_table_n;
|
||||||
|
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] wb_thread_mask_table, wb_thread_mask_table_n;
|
||||||
|
reg [`ISSUEQ_SIZE-1:0][31:0] wb_curr_PC_table, wb_curr_PC_table_n;
|
||||||
|
reg [`ISSUEQ_SIZE-1:0][`NR_BITS-1:0] wb_rd_table, wb_rd_table_n;
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0][31:0] wb_data_table [`ISSUEQ_SIZE-1:0];
|
reg [`NUM_THREADS-1:0][31:0] wb_data, wb_data_n;
|
||||||
reg [`NW_BITS-1:0] wb_warp_num_table [`ISSUEQ_SIZE-1:0];
|
reg [`NW_BITS-1:0] wb_warp_num, wb_warp_num_n;
|
||||||
reg [`NUM_THREADS-1:0] wb_thread_mask_table [`ISSUEQ_SIZE-1:0];
|
reg [`NUM_THREADS-1:0] wb_thread_mask, wb_thread_mask_n;
|
||||||
reg [31:0] wb_curr_PC_table [`ISSUEQ_SIZE-1:0];
|
reg [31:0] wb_curr_PC, wb_curr_PC_n;
|
||||||
reg [`NR_BITS-1:0] wb_rd_table [`ISSUEQ_SIZE-1:0];
|
reg [`NR_BITS-1:0] wb_rd, wb_rd_n;
|
||||||
|
|
||||||
reg [`ISSUEQ_SIZE-1:0] wb_valid_table;
|
reg [`ISTAG_BITS-1:0] wb_index;
|
||||||
reg [`ISSUEQ_SIZE-1:0] wb_valid_table_n;
|
reg [`ISTAG_BITS-1:0] wb_index_n;
|
||||||
|
|
||||||
reg [`ISTAG_BITS-1:0] wb_index;
|
|
||||||
wire [`ISTAG_BITS-1:0] wb_index_n;
|
|
||||||
|
|
||||||
reg wb_valid;
|
reg wb_valid;
|
||||||
wire wb_valid_n;
|
reg wb_valid_n;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
wb_valid_table_n = wb_valid_table;
|
wb_valid_table_n = wb_valid_table;
|
||||||
|
wb_warp_num_table_n = wb_warp_num_table;
|
||||||
|
wb_thread_mask_table_n = wb_thread_mask_table;
|
||||||
|
wb_curr_PC_table_n = wb_curr_PC_table;
|
||||||
|
wb_rd_table_n = wb_rd_table;
|
||||||
|
wb_data_table_n = wb_data_table;
|
||||||
|
|
||||||
if (wb_valid) begin
|
if (wb_valid) begin
|
||||||
wb_valid_table_n[wb_index] = 0;
|
wb_valid_table_n[wb_index] = 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (alu_commit_if.valid) begin
|
if (alu_commit_if.valid) begin
|
||||||
wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb;
|
wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb;
|
||||||
|
wb_thread_mask_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.thread_mask;
|
||||||
|
wb_data_table_n [alu_commit_if.issue_tag] = alu_commit_if.data;
|
||||||
|
wb_warp_num_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.warp_num;
|
||||||
|
wb_curr_PC_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.curr_PC;
|
||||||
|
wb_rd_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.rd;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (lsu_commit_if.valid) begin
|
if (lsu_commit_if.valid) begin
|
||||||
wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb;
|
wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb;
|
||||||
|
wb_thread_mask_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.thread_mask;
|
||||||
|
wb_data_table_n [lsu_commit_if.issue_tag] = lsu_commit_if.data;
|
||||||
|
wb_warp_num_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.warp_num;
|
||||||
|
wb_curr_PC_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.curr_PC;
|
||||||
|
wb_rd_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.rd;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (csr_commit_if.valid) begin
|
if (csr_commit_if.valid) begin
|
||||||
wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb;
|
wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb;
|
||||||
|
wb_thread_mask_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.thread_mask;
|
||||||
|
wb_data_table_n [csr_commit_if.issue_tag] = csr_commit_if.data;
|
||||||
|
wb_warp_num_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.warp_num;
|
||||||
|
wb_curr_PC_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.curr_PC;
|
||||||
|
wb_rd_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.rd;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (mul_commit_if.valid) begin
|
if (mul_commit_if.valid) begin
|
||||||
wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb;
|
wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb;
|
||||||
|
wb_thread_mask_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.thread_mask;
|
||||||
|
wb_data_table_n [mul_commit_if.issue_tag] = mul_commit_if.data;
|
||||||
|
wb_warp_num_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.warp_num;
|
||||||
|
wb_curr_PC_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.curr_PC;
|
||||||
|
wb_rd_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.rd;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (fpu_commit_if.valid) begin
|
if (fpu_commit_if.valid) begin
|
||||||
wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb;
|
wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb;
|
||||||
|
wb_thread_mask_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.thread_mask;
|
||||||
|
wb_data_table_n [fpu_commit_if.issue_tag] = fpu_commit_if.data;
|
||||||
|
wb_warp_num_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.warp_num;
|
||||||
|
wb_curr_PC_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.curr_PC;
|
||||||
|
wb_rd_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.rd;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
VX_priority_encoder #(
|
integer i;
|
||||||
.N(`ISSUEQ_SIZE)
|
|
||||||
) wb_select (
|
always @(*) begin
|
||||||
.data_in (wb_valid_table_n),
|
wb_index_n = 0;
|
||||||
.data_out (wb_index_n),
|
wb_valid_n = 0;
|
||||||
.valid_out (wb_valid_n)
|
for (i = `ISSUEQ_SIZE-1; i >= 0; i--) begin
|
||||||
);
|
if (wb_valid_table_n[i]) begin
|
||||||
|
wb_index_n = `ISTAG_BITS'(i);
|
||||||
|
wb_valid_n = 1;
|
||||||
|
wb_thread_mask_n= wb_thread_mask_table_n[i];
|
||||||
|
wb_warp_num_n = wb_warp_num_table_n[i];
|
||||||
|
wb_curr_PC_n = wb_curr_PC_table_n[i];
|
||||||
|
wb_rd_n = wb_rd_table_n[i];
|
||||||
|
wb_data_n = wb_data_table_n[i];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
wb_valid_table <= 0;
|
wb_valid_table <= 0;
|
||||||
wb_index <= 0;
|
wb_index <= 0;
|
||||||
wb_valid <= 0;
|
wb_valid <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (alu_commit_if.valid) begin
|
wb_valid_table <= wb_valid_table_n;
|
||||||
wb_data_table [alu_commit_if.issue_tag] <= alu_commit_if.data;
|
wb_thread_mask_table <= wb_thread_mask_table_n;
|
||||||
wb_warp_num_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.warp_num;
|
wb_warp_num_table <= wb_warp_num_table_n;
|
||||||
wb_thread_mask_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.thread_mask;
|
wb_curr_PC_table <= wb_curr_PC_table_n;
|
||||||
wb_curr_PC_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.curr_PC;
|
wb_rd_table <= wb_rd_table_n;
|
||||||
wb_rd_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd;
|
wb_data_table <= wb_data_table_n;
|
||||||
end
|
|
||||||
|
|
||||||
if (lsu_commit_if.valid) begin
|
|
||||||
wb_data_table [lsu_commit_if.issue_tag] <= lsu_commit_if.data;
|
|
||||||
wb_warp_num_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.warp_num;
|
|
||||||
wb_thread_mask_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.thread_mask;
|
|
||||||
wb_curr_PC_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.curr_PC;
|
|
||||||
wb_rd_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd;
|
|
||||||
end
|
|
||||||
|
|
||||||
if (csr_commit_if.valid) begin
|
|
||||||
wb_data_table [csr_commit_if.issue_tag] <= csr_commit_if.data;
|
|
||||||
wb_warp_num_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.warp_num;
|
|
||||||
wb_thread_mask_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.thread_mask;
|
|
||||||
wb_curr_PC_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.curr_PC;
|
|
||||||
wb_rd_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd;
|
|
||||||
end
|
|
||||||
|
|
||||||
if (mul_commit_if.valid) begin
|
|
||||||
wb_data_table [mul_commit_if.issue_tag] <= mul_commit_if.data;
|
|
||||||
wb_warp_num_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.warp_num;
|
|
||||||
wb_thread_mask_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.thread_mask;
|
|
||||||
wb_curr_PC_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.curr_PC;
|
|
||||||
wb_rd_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd;
|
|
||||||
end
|
|
||||||
|
|
||||||
if (fpu_commit_if.valid) begin
|
wb_index <= wb_index_n;
|
||||||
wb_data_table [fpu_commit_if.issue_tag] <= fpu_commit_if.data;
|
wb_valid <= wb_valid_n && writeback_if.ready;
|
||||||
wb_warp_num_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.warp_num;
|
wb_thread_mask <= wb_thread_mask_n;
|
||||||
wb_thread_mask_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.thread_mask;
|
wb_warp_num <= wb_warp_num_n;
|
||||||
wb_curr_PC_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.curr_PC;
|
wb_curr_PC <= wb_curr_PC_n;
|
||||||
wb_rd_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd;
|
wb_rd <= wb_rd_n;
|
||||||
end
|
wb_data <= wb_data_n;
|
||||||
|
|
||||||
wb_valid_table <= wb_valid_table_n;
|
|
||||||
wb_index <= wb_index_n;
|
|
||||||
wb_valid <= wb_valid_n && writeback_if.ready;
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
// writeback request
|
// writeback request
|
||||||
assign writeback_if.valid = wb_valid;
|
assign writeback_if.valid = wb_valid;
|
||||||
assign writeback_if.warp_num = wb_warp_num_table [wb_index];
|
assign writeback_if.thread_mask = wb_thread_mask;
|
||||||
assign writeback_if.thread_mask = wb_thread_mask_table [wb_index];
|
assign writeback_if.warp_num = wb_warp_num;
|
||||||
assign writeback_if.curr_PC = wb_curr_PC_table [wb_index];
|
assign writeback_if.curr_PC = wb_curr_PC;
|
||||||
assign writeback_if.rd = wb_rd_table [wb_index];
|
assign writeback_if.rd = wb_rd;
|
||||||
assign writeback_if.data = wb_data_table [wb_index];
|
assign writeback_if.data = wb_data;
|
||||||
|
|
||||||
// commit back-pressure
|
// commit back-pressure
|
||||||
assign alu_commit_if.ready = 1'b1;
|
assign alu_commit_if.ready = 1'b1;
|
||||||
|
|||||||
2
hw/rtl/cache/VX_tag_data_access.v
vendored
2
hw/rtl/cache/VX_tag_data_access.v
vendored
@@ -26,11 +26,13 @@ module VX_tag_data_access #(
|
|||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
|
`IGNORE_WARNINGS_BEGIN
|
||||||
input wire[31:0] debug_pc_st1e,
|
input wire[31:0] debug_pc_st1e,
|
||||||
input wire debug_wb_st1e,
|
input wire debug_wb_st1e,
|
||||||
input wire[`NR_BITS-1:0] debug_rd_st1e,
|
input wire[`NR_BITS-1:0] debug_rd_st1e,
|
||||||
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
|
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
|
||||||
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
|
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
|
||||||
|
`IGNORE_WARNINGS_END
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
input wire stall,
|
input wire stall,
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ module VX_fp_fpga (
|
|||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
output wire in_ready,
|
|
||||||
input wire in_valid,
|
input wire in_valid,
|
||||||
|
output wire in_ready,
|
||||||
|
|
||||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||||
|
|
||||||
@@ -19,7 +19,7 @@ module VX_fp_fpga (
|
|||||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||||
|
|
||||||
output wire has_fflags,
|
output wire has_fflags,
|
||||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||||
|
|
||||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||||
|
|
||||||
@@ -29,31 +29,30 @@ module VX_fp_fpga (
|
|||||||
localparam NUM_FPC = 12;
|
localparam NUM_FPC = 12;
|
||||||
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
||||||
|
|
||||||
reg [FPC_BITS-1:0] core_select;
|
|
||||||
|
|
||||||
wire [NUM_FPC-1:0] core_in_ready;
|
wire [NUM_FPC-1:0] core_in_ready;
|
||||||
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] core_result;
|
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] core_result;
|
||||||
wire fpnew_has_fflags;
|
wire fpnew_has_fflags;
|
||||||
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fpnew_fflags;
|
fflags_t fpnew_fflags;
|
||||||
wire [NUM_FPC-1:0][`ISTAG_BITS-1:0] core_out_tag;
|
wire [NUM_FPC-1:0][`ISTAG_BITS-1:0] core_out_tag;
|
||||||
wire [NUM_FPC-1:0] core_out_ready;
|
wire [NUM_FPC-1:0] core_out_ready;
|
||||||
wire [NUM_FPC-1:0] core_out_valid;
|
wire [NUM_FPC-1:0] core_out_valid;
|
||||||
|
|
||||||
reg negate_output;
|
reg [FPC_BITS-1:0] core_select;
|
||||||
|
reg fmadd_negate;
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
core_select = 0;
|
core_select = 0;
|
||||||
negate_output = 0;
|
fmadd_negate = 0;
|
||||||
case (op)
|
case (op)
|
||||||
`FPU_ADD: core_select = 1;
|
`FPU_ADD: core_select = 1;
|
||||||
`FPU_SUB: core_select = 2;
|
`FPU_SUB: core_select = 2;
|
||||||
`FPU_MUL: core_select = 3;
|
`FPU_MUL: core_select = 3;
|
||||||
`FPU_MADD: core_select = 4;
|
`FPU_MADD: core_select = 4;
|
||||||
`FPU_MSUB: core_select = 5;
|
`FPU_MSUB: core_select = 5;
|
||||||
`FPU_NMSUB: begin core_select = 4; negate_output = 1; end
|
`FPU_NMSUB: begin core_select = 4; fmadd_negate = 1; end
|
||||||
`FPU_NMADD: begin core_select = 5; negate_output = 1; end
|
`FPU_NMADD: begin core_select = 5; fmadd_negate = 1; end
|
||||||
`FPU_DIV: core_select = 6;
|
`FPU_DIV: core_select = 6;
|
||||||
`FPU_SQRT: core_select = 7;
|
`FPU_SQRT: core_select = 7;
|
||||||
`FPU_CVTWS: core_select = 8;
|
`FPU_CVTWS: core_select = 8;
|
||||||
@@ -130,7 +129,7 @@ module VX_fp_fpga (
|
|||||||
.in_valid (in_valid && (core_select == 4)),
|
.in_valid (in_valid && (core_select == 4)),
|
||||||
.in_ready (core_in_ready[4]),
|
.in_ready (core_in_ready[4]),
|
||||||
.in_tag (in_tag),
|
.in_tag (in_tag),
|
||||||
.negate (negate_output),
|
.negate (fmadd_negate),
|
||||||
.dataa (dataa),
|
.dataa (dataa),
|
||||||
.datab (datab),
|
.datab (datab),
|
||||||
.datac (datac),
|
.datac (datac),
|
||||||
@@ -146,7 +145,7 @@ module VX_fp_fpga (
|
|||||||
.in_valid (in_valid && (core_select == 5)),
|
.in_valid (in_valid && (core_select == 5)),
|
||||||
.in_ready (core_in_ready[5]),
|
.in_ready (core_in_ready[5]),
|
||||||
.in_tag (in_tag),
|
.in_tag (in_tag),
|
||||||
.negate (negate_output),
|
.negate (fmadd_negate),
|
||||||
.dataa (dataa),
|
.dataa (dataa),
|
||||||
.datab (datab),
|
.datab (datab),
|
||||||
.datac (datac),
|
.datac (datac),
|
||||||
@@ -250,10 +249,21 @@ module VX_fp_fpga (
|
|||||||
assign core_out_ready[i] = out_ready && (i == fp_index);
|
assign core_out_ready[i] = out_ready && (i == fp_index);
|
||||||
end
|
end
|
||||||
|
|
||||||
assign has_fflags = fpnew_has_fflags && (fp_index == 0);
|
wire tmp_valid = fp_valid;
|
||||||
assign fflags = fpnew_fflags;
|
wire [`ISTAG_BITS-1:0] tmp_tag = core_out_tag[fp_index];
|
||||||
assign out_tag = core_out_tag[fp_index];
|
wire [`NUM_THREADS-1:0][31:0] tmp_result = core_result[fp_index];
|
||||||
assign result = core_result[fp_index];
|
wire tmp_has_fflags = fpnew_has_fflags && (fp_index == 0);
|
||||||
assign out_valid = fp_valid;
|
fflags_t [`NUM_THREADS-1:0] tmp_flags = fpnew_fflags;
|
||||||
|
|
||||||
|
VX_generic_register #(
|
||||||
|
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + `FFG_BITS)
|
||||||
|
) nc_reg (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.stall (stall),
|
||||||
|
.flush (1'b0),
|
||||||
|
.in ({tmp_valid, tmp_tag, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||||
|
.out ({out_valid, out_tag, result, has_fflags, fflags})
|
||||||
|
);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -17,7 +17,7 @@ module VX_fp_noncomp (
|
|||||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||||
|
|
||||||
output wire has_fflags,
|
output wire has_fflags,
|
||||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||||
|
|
||||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||||
|
|
||||||
@@ -178,7 +178,7 @@ module VX_fp_noncomp (
|
|||||||
|
|
||||||
reg tmp_valid;
|
reg tmp_valid;
|
||||||
reg tmp_has_fflags;
|
reg tmp_has_fflags;
|
||||||
reg [`NUM_THREADS-1:0][`FFG_BITS-1:0] tmp_fflags;
|
fflags_t [`NUM_THREADS-1:0] tmp_fflags;
|
||||||
reg [`NUM_THREADS-1:0][31:0] tmp_result;
|
reg [`NUM_THREADS-1:0][31:0] tmp_result;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
@@ -199,27 +199,27 @@ module VX_fp_noncomp (
|
|||||||
case (op)
|
case (op)
|
||||||
`FPU_CLASS: begin
|
`FPU_CLASS: begin
|
||||||
tmp_result[i] = fclass_mask[i];
|
tmp_result[i] = fclass_mask[i];
|
||||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||||
end
|
end
|
||||||
`FPU_MVXW,`FPU_MVWX: begin
|
`FPU_MVXW,`FPU_MVWX: begin
|
||||||
tmp_result[i] = dataa[i];
|
tmp_result[i] = dataa[i];
|
||||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||||
end
|
end
|
||||||
`FPU_MIN,`FPU_MAX: begin
|
`FPU_MIN,`FPU_MAX: begin
|
||||||
tmp_result[i] = fminmax_res[i];
|
tmp_result[i] = fminmax_res[i];
|
||||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = {a_type[i][0] | b_type[i][0], 4'h0};
|
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
|
||||||
end
|
end
|
||||||
`FPU_SGNJ,`FPU_SGNJN,`FPU_SGNJX: begin
|
`FPU_SGNJ,`FPU_SGNJN,`FPU_SGNJX: begin
|
||||||
tmp_result[i] = fsgnj_res[i];
|
tmp_result[i] = fsgnj_res[i];
|
||||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||||
end
|
end
|
||||||
`FPU_CMP: begin
|
`FPU_CMP: begin
|
||||||
tmp_result[i] = fcmp_res[i];
|
tmp_result[i] = fcmp_res[i];
|
||||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = fcmp_excp[i];
|
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = fcmp_excp[i];
|
||||||
end
|
end
|
||||||
default: begin
|
default: begin
|
||||||
tmp_result[i] = 32'hdeadbeaf;
|
tmp_result[i] = 32'hdeadbeaf;
|
||||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||||
tmp_valid = 1'b0;
|
tmp_valid = 1'b0;
|
||||||
end
|
end
|
||||||
endcase
|
endcase
|
||||||
@@ -230,7 +230,7 @@ module VX_fp_noncomp (
|
|||||||
assign in_ready = ~stall;
|
assign in_ready = ~stall;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + `FFG_BITS)
|
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
|
||||||
) nc_reg (
|
) nc_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ module VX_fpnew #(
|
|||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
output wire in_ready,
|
|
||||||
input wire in_valid,
|
input wire in_valid,
|
||||||
|
output wire in_ready,
|
||||||
|
|
||||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||||
|
|
||||||
@@ -25,7 +25,7 @@ module VX_fpnew #(
|
|||||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||||
|
|
||||||
output wire has_fflags,
|
output wire has_fflags,
|
||||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||||
|
|
||||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||||
|
|
||||||
@@ -75,7 +75,7 @@ module VX_fpnew #(
|
|||||||
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
|
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
||||||
fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1];
|
fpnew_pkg::status_t [0:`NUM_THREADS-1] fpu_status;
|
||||||
|
|
||||||
wire is_class_op_i, is_class_op_o;
|
wire is_class_op_i, is_class_op_o;
|
||||||
assign is_class_op_i = (op == `FPU_CLASS);
|
assign is_class_op_i = (op == `FPU_CLASS);
|
||||||
@@ -194,7 +194,8 @@ module VX_fpnew #(
|
|||||||
`ENABLE_TRACING
|
`ENABLE_TRACING
|
||||||
|
|
||||||
assign fpu_in_valid = in_valid;
|
assign fpu_in_valid = in_valid;
|
||||||
assign in_ready = fpu_in_ready;
|
assign in_ready = fpu_in_ready
|
||||||
|
|| ~in_valid; // fix fpnews's in_ready containing in_valid;
|
||||||
|
|
||||||
assign fpu_in_tag = in_tag;
|
assign fpu_in_tag = in_tag;
|
||||||
assign out_tag = fpu_out_tag;
|
assign out_tag = fpu_out_tag;
|
||||||
@@ -202,14 +203,7 @@ module VX_fpnew #(
|
|||||||
assign result = fpu_result;
|
assign result = fpu_result;
|
||||||
|
|
||||||
assign has_fflags = fpu_has_fflags_o;
|
assign has_fflags = fpu_has_fflags_o;
|
||||||
|
assign fflags = fpu_status;
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
assign fflags[i][`FFG_NX] = fpu_status[i].NX;
|
|
||||||
assign fflags[i][`FFG_UF] = fpu_status[i].UF;
|
|
||||||
assign fflags[i][`FFG_OF] = fpu_status[i].OF;
|
|
||||||
assign fflags[i][`FFG_DZ] = fpu_status[i].DZ;
|
|
||||||
assign fflags[i][`FFG_NV] = fpu_status[i].NV;
|
|
||||||
end
|
|
||||||
|
|
||||||
assign out_valid = fpu_out_valid;
|
assign out_valid = fpu_out_valid;
|
||||||
assign fpu_out_ready = out_ready;
|
assign fpu_out_ready = out_ready;
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ interface VX_cmt_to_csr_if ();
|
|||||||
wire [`NE_BITS:0] num_commits;
|
wire [`NE_BITS:0] num_commits;
|
||||||
|
|
||||||
wire has_fflags;
|
wire has_fflags;
|
||||||
wire [`FFG_BITS-1:0] fflags;
|
fflags_t fflags;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
||||||
|
|||||||
@@ -7,9 +7,9 @@ interface VX_fpu_to_cmt_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [`NUM_THREADS-1:0][31:0] data;
|
wire [`NUM_THREADS-1:0][31:0] data;
|
||||||
wire has_fflags;
|
wire has_fflags;
|
||||||
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags;
|
fflags_t [`NUM_THREADS-1:0] fflags;
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ module VX_index_queue #(
|
|||||||
input wire [`LOG2UP(SIZE)-1:0] read_addr,
|
input wire [`LOG2UP(SIZE)-1:0] read_addr,
|
||||||
output wire [DATAW-1:0] read_data
|
output wire [DATAW-1:0] read_data
|
||||||
);
|
);
|
||||||
`USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0];
|
reg [DATAW-1:0] entries [SIZE-1:0];
|
||||||
reg [SIZE-1:0] valid;
|
reg [SIZE-1:0] valid;
|
||||||
reg [`LOG2UP(SIZE):0] rd_ptr, wr_ptr;
|
reg [`LOG2UP(SIZE):0] rd_ptr, wr_ptr;
|
||||||
|
|
||||||
@@ -38,7 +38,7 @@ module VX_index_queue #(
|
|||||||
valid <= 0;
|
valid <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (enqueue) begin
|
if (enqueue) begin
|
||||||
data[wr_a] <= write_data;
|
entries[wr_a] <= write_data;
|
||||||
valid[wr_a] <= 1;
|
valid[wr_a] <= 1;
|
||||||
wr_ptr <= wr_ptr + 1;
|
wr_ptr <= wr_ptr + 1;
|
||||||
end
|
end
|
||||||
@@ -52,6 +52,6 @@ module VX_index_queue #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
assign write_addr = wr_a;
|
assign write_addr = wr_a;
|
||||||
assign read_data = data[read_addr];
|
assign read_data = entries[read_addr];
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -14,7 +14,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
|||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||||
|
|
||||||
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||||
|
|
||||||
FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
|
FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
set_time_format -unit ns -decimal_places 3
|
set_time_format -unit ns -decimal_places 3
|
||||||
|
|
||||||
create_clock -name {clk} -period "300 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
create_clock -name {clk} -period "200 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||||
|
|
||||||
derive_pll_clocks -create_base_clocks
|
derive_pll_clocks -create_base_clocks
|
||||||
derive_clock_uncertainty
|
derive_clock_uncertainty
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
project_open Vortex_Socket
|
project_open VX_pipeline
|
||||||
|
|
||||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user