Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit d47cccc157
1300 changed files with 247321 additions and 311189 deletions

View File

@@ -1,195 +1,155 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_cluster #(
module VX_cluster import VX_gpu_pkg::*; #(
parameter CLUSTER_ID = 0
) (
`SCOPE_IO_VX_cluster
`SCOPE_IO_DECL
// Clock
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`L2_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`L2_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`L2_MEM_DATA_WIDTH-1:0] mem_req_data,
output wire [`L2_MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
`ifdef PERF_ENABLE
VX_mem_perf_if.master mem_perf_if,
VX_mem_perf_if.slave perf_memsys_total_if,
`endif
// Memory response
input wire mem_rsp_valid,
input wire [`L2_MEM_DATA_WIDTH-1:0] mem_rsp_data,
input wire [`L2_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
VX_dcr_bus_if.slave dcr_bus_if,
// Memory
VX_mem_bus_if.master mem_bus_if,
// simulation helper signals
output wire sim_ebreak,
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
// Status
output wire busy
);
`STATIC_ASSERT((`L2_ENABLE == 0 || `NUM_CORES > 1), ("invalid parameter"))
output wire busy
);
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
wire [`NUM_CORES-1:0][`DCACHE_MEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
wire [`NUM_CORES-1:0][`DCACHE_MEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_req_data;
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
`ifdef SCOPE
localparam scope_socket = 0;
`SCOPE_IO_SWITCH (scope_socket + `NUM_SOCKETS);
`endif
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
`ifdef GBAR_ENABLE
wire [`NUM_CORES-1:0] per_core_busy;
VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS]();
VX_gbar_bus_if gbar_bus_if();
for (genvar i = 0; i < `NUM_CORES; i++) begin
`RESET_RELAY (gbar_reset, reset);
`RESET_RELAY (core_reset);
VX_gbar_arb #(
.NUM_REQS (`NUM_SOCKETS),
.OUT_REG ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure
) gbar_arb (
.clk (clk),
.reset (gbar_reset),
.bus_in_if (per_socket_gbar_bus_if),
.bus_out_if (gbar_bus_if)
);
VX_core #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) core (
`SCOPE_BIND_VX_cluster_core(i)
VX_gbar_unit #(
.INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID))
) gbar_unit (
.clk (clk),
.reset (gbar_reset),
.gbar_bus_if (gbar_bus_if)
);
`endif
.clk (clk),
.reset (core_reset),
.mem_req_valid (per_core_mem_req_valid[i]),
.mem_req_rw (per_core_mem_req_rw [i]),
.mem_req_byteen (per_core_mem_req_byteen[i]),
.mem_req_addr (per_core_mem_req_addr [i]),
.mem_req_data (per_core_mem_req_data [i]),
.mem_req_tag (per_core_mem_req_tag [i]),
.mem_req_ready (per_core_mem_req_ready[i]),
.mem_rsp_valid (per_core_mem_rsp_valid[i]),
.mem_rsp_data (per_core_mem_rsp_data [i]),
.mem_rsp_tag (per_core_mem_rsp_tag [i]),
.mem_rsp_ready (per_core_mem_rsp_ready[i]),
.busy (per_core_busy [i])
);
end
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_ARB_TAG_WIDTH)
) per_socket_dcache_bus_if[`NUM_SOCKETS * DCACHE_NUM_REQS]();
assign busy = (| per_core_busy);
VX_mem_bus_if #(
.DATA_SIZE (ICACHE_WORD_SIZE),
.TAG_WIDTH (ICACHE_ARB_TAG_WIDTH)
) per_socket_icache_bus_if[`NUM_SOCKETS]();
`RESET_RELAY (mem_unit_reset, reset);
VX_mem_unit #(
.CLUSTER_ID (CLUSTER_ID)
) mem_unit (
.clk (clk),
.reset (mem_unit_reset),
if (`L2_ENABLE) begin
`ifdef PERF_ENABLE
VX_perf_cache_if perf_l2cache_if();
.mem_perf_if (mem_perf_if),
`endif
`RESET_RELAY (l2_reset);
.dcache_bus_if (per_socket_dcache_bus_if),
.icache_bus_if (per_socket_icache_bus_if),
VX_cache #(
.CACHE_ID (`L2_CACHE_ID),
.CACHE_SIZE (`L2_CACHE_SIZE),
.CACHE_LINE_SIZE (`L2_CACHE_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
.NUM_PORTS (`L2_NUM_PORTS),
.WORD_SIZE (`L2_WORD_SIZE),
.NUM_REQS (`L2_NUM_REQS),
.CREQ_SIZE (`L2_CREQ_SIZE),
.CRSQ_SIZE (`L2_CRSQ_SIZE),
.MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE),
.MREQ_SIZE (`L2_MREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`L1_MEM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.MEM_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
.NC_ENABLE (1)
) l2cache (
`SCOPE_BIND_VX_cluster_l2cache
.clk (clk),
.reset (l2_reset),
.mem_bus_if (mem_bus_if)
);
///////////////////////////////////////////////////////////////////////////
wire [`NUM_SOCKETS-1:0] per_socket_sim_ebreak;
wire [`NUM_SOCKETS-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_socket_sim_wb_value;
assign sim_ebreak = per_socket_sim_ebreak[0];
assign sim_wb_value = per_socket_sim_wb_value[0];
`UNUSED_VAR (per_socket_sim_ebreak)
`UNUSED_VAR (per_socket_sim_wb_value)
VX_dcr_bus_if socket_dcr_bus_tmp_if();
assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr;
assign socket_dcr_bus_tmp_if.write_data = dcr_bus_if.write_data;
wire [`NUM_SOCKETS-1:0] per_socket_busy;
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
// Generate all sockets
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
`RESET_RELAY (socket_reset, reset);
VX_socket #(
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
) socket (
`SCOPE_IO_BIND (scope_socket+i)
.clk (clk),
.reset (socket_reset),
`ifdef PERF_ENABLE
.perf_cache_if (perf_l2cache_if),
.mem_perf_if (perf_memsys_total_if),
`endif
.dcr_bus_if (socket_dcr_bus_if),
.dcache_bus_if (per_socket_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
.icache_bus_if (per_socket_icache_bus_if[i]),
`ifdef GBAR_ENABLE
.gbar_bus_if (per_socket_gbar_bus_if[i]),
`endif
// Core request
.core_req_valid (per_core_mem_req_valid),
.core_req_rw (per_core_mem_req_rw),
.core_req_byteen (per_core_mem_req_byteen),
.core_req_addr (per_core_mem_req_addr),
.core_req_data (per_core_mem_req_data),
.core_req_tag (per_core_mem_req_tag),
.core_req_ready (per_core_mem_req_ready),
// Core response
.core_rsp_valid (per_core_mem_rsp_valid),
.core_rsp_data (per_core_mem_rsp_data),
.core_rsp_tag (per_core_mem_rsp_tag),
.core_rsp_ready (per_core_mem_rsp_ready),
`UNUSED_PIN (core_rsp_tmask),
// Memory request
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
// Memory response
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_ready (mem_rsp_ready)
.sim_ebreak (per_socket_sim_ebreak[i]),
.sim_wb_value (per_socket_sim_wb_value[i]),
.busy (per_socket_busy[i])
);
end else begin
`RESET_RELAY (mem_arb_reset);
VX_mem_arb #(
.NUM_REQS (`NUM_CORES),
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`L1_MEM_TAG_WIDTH),
.TYPE ("R"),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (mem_arb_reset),
// Core request
.req_valid_in (per_core_mem_req_valid),
.req_rw_in (per_core_mem_req_rw),
.req_byteen_in (per_core_mem_req_byteen),
.req_addr_in (per_core_mem_req_addr),
.req_data_in (per_core_mem_req_data),
.req_tag_in (per_core_mem_req_tag),
.req_ready_in (per_core_mem_req_ready),
// Memory request
.req_valid_out (mem_req_valid),
.req_rw_out (mem_req_rw),
.req_byteen_out (mem_req_byteen),
.req_addr_out (mem_req_addr),
.req_data_out (mem_req_data),
.req_tag_out (mem_req_tag),
.req_ready_out (mem_req_ready),
// Core response
.rsp_valid_out (per_core_mem_rsp_valid),
.rsp_data_out (per_core_mem_rsp_data),
.rsp_tag_out (per_core_mem_rsp_tag),
.rsp_ready_out (per_core_mem_rsp_ready),
// Memory response
.rsp_valid_in (mem_rsp_valid),
.rsp_tag_in (mem_rsp_tag),
.rsp_data_in (mem_rsp_data),
.rsp_ready_in (mem_rsp_ready)
);
end
`BUFFER_BUSY (busy, (| per_socket_busy), (`NUM_SOCKETS > 1));
endmodule