Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
This commit is contained in:
327
hw/rtl/Vortex.sv
327
hw/rtl/Vortex.sv
@@ -1,7 +1,20 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module Vortex (
|
||||
`SCOPE_IO_Vortex
|
||||
module Vortex import VX_gpu_pkg::*; (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
@@ -22,204 +35,186 @@ module Vortex (
|
||||
input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// DCR write request
|
||||
input wire dcr_wr_valid,
|
||||
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
`STATIC_ASSERT((`L3_ENABLE == 0 || `NUM_CLUSTERS > 1), ("invalid parameter"))
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_rw;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_ready;
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
cache_perf_t perf_l3cache;
|
||||
mem_perf_t mem_perf;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready;
|
||||
assign mem_perf_if.icache = 'x;
|
||||
assign mem_perf_if.dcache = 'x;
|
||||
assign mem_perf_if.l2cache = 'x;
|
||||
assign mem_perf_if.l3cache = perf_l3cache;
|
||||
assign mem_perf_if.smem = 'x;
|
||||
assign mem_perf_if.mem = mem_perf;
|
||||
`endif
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L2_LINE_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
|
||||
) per_cluster_mem_bus_if[`NUM_CLUSTERS]();
|
||||
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L3_LINE_SIZE),
|
||||
.TAG_WIDTH (L3_MEM_TAG_WIDTH)
|
||||
) mem_bus_if();
|
||||
|
||||
`RESET_RELAY (cluster_reset);
|
||||
`RESET_RELAY (l3_reset, reset);
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID(i)
|
||||
) cluster (
|
||||
`SCOPE_BIND_Vortex_cluster(i)
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ("l3cache"),
|
||||
.CACHE_SIZE (`L3_CACHE_SIZE),
|
||||
.LINE_SIZE (`L3_LINE_SIZE),
|
||||
.NUM_BANKS (`L3_NUM_BANKS),
|
||||
.NUM_WAYS (`L3_NUM_WAYS),
|
||||
.WORD_SIZE (L3_WORD_SIZE),
|
||||
.NUM_REQS (L3_NUM_REQS),
|
||||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_REG (2),
|
||||
.MEM_OUT_REG (2),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L3_ENABLED)
|
||||
) l3cache (
|
||||
.clk (clk),
|
||||
.reset (l3_reset),
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
||||
.mem_req_valid (per_cluster_mem_req_valid [i]),
|
||||
.mem_req_rw (per_cluster_mem_req_rw [i]),
|
||||
.mem_req_byteen (per_cluster_mem_req_byteen[i]),
|
||||
.mem_req_addr (per_cluster_mem_req_addr [i]),
|
||||
.mem_req_data (per_cluster_mem_req_data [i]),
|
||||
.mem_req_tag (per_cluster_mem_req_tag [i]),
|
||||
.mem_req_ready (per_cluster_mem_req_ready [i]),
|
||||
|
||||
.mem_rsp_valid (per_cluster_mem_rsp_valid [i]),
|
||||
.mem_rsp_data (per_cluster_mem_rsp_data [i]),
|
||||
.mem_rsp_tag (per_cluster_mem_rsp_tag [i]),
|
||||
.mem_rsp_ready (per_cluster_mem_rsp_ready [i]),
|
||||
|
||||
.busy (per_cluster_busy [i])
|
||||
);
|
||||
end
|
||||
|
||||
assign busy = (| per_cluster_busy);
|
||||
|
||||
if (`L3_ENABLE) begin
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_l3cache_if();
|
||||
.cache_perf (perf_l3cache),
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (l3_reset);
|
||||
.core_bus_if (per_cluster_mem_bus_if),
|
||||
.mem_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
assign mem_req_valid = mem_bus_if.req_valid;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_byteen= mem_bus_if.req_data.byteen;
|
||||
assign mem_req_addr = mem_bus_if.req_data.addr;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_bus_if.rsp_ready;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
wire sim_ebreak /* verilator public */;
|
||||
wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value /* verilator public */;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_sim_ebreak;
|
||||
wire [`NUM_CLUSTERS-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_cluster_sim_wb_value;
|
||||
assign sim_ebreak = per_cluster_sim_ebreak[0];
|
||||
assign sim_wb_value = per_cluster_sim_wb_value[0];
|
||||
`UNUSED_VAR (per_cluster_sim_ebreak)
|
||||
`UNUSED_VAR (per_cluster_sim_wb_value)
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
assign dcr_bus_if.write_valid = dcr_wr_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_wr_addr;
|
||||
assign dcr_bus_if.write_data = dcr_wr_data;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (i)
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (i)
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L3_CACHE_ID),
|
||||
.CACHE_SIZE (`L3_CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L3_CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L3_NUM_BANKS),
|
||||
.NUM_PORTS (`L3_NUM_PORTS),
|
||||
.WORD_SIZE (`L3_WORD_SIZE),
|
||||
.NUM_REQS (`L3_NUM_REQS),
|
||||
.CREQ_SIZE (`L3_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L3_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l3cache (
|
||||
`SCOPE_BIND_Vortex_l3cache
|
||||
|
||||
.clk (clk),
|
||||
.reset (l3_reset),
|
||||
.reset (cluster_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l3cache_if),
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_cluster_mem_req_valid),
|
||||
.core_req_rw (per_cluster_mem_req_rw),
|
||||
.core_req_byteen (per_cluster_mem_req_byteen),
|
||||
.core_req_addr (per_cluster_mem_req_addr),
|
||||
.core_req_data (per_cluster_mem_req_data),
|
||||
.core_req_tag (per_cluster_mem_req_tag),
|
||||
.core_req_ready (per_cluster_mem_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_cluster_mem_rsp_valid),
|
||||
.core_rsp_data (per_cluster_mem_rsp_data),
|
||||
.core_rsp_tag (per_cluster_mem_rsp_tag),
|
||||
.core_rsp_ready (per_cluster_mem_rsp_ready),
|
||||
`UNUSED_PIN (core_rsp_tmask),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_rw (mem_req_rw),
|
||||
.mem_req_byteen (mem_req_byteen),
|
||||
.mem_req_addr (mem_req_addr),
|
||||
.mem_req_data (mem_req_data),
|
||||
.mem_req_tag (mem_req_tag),
|
||||
.mem_req_ready (mem_req_ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (`L3_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`L3_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Core request
|
||||
.req_valid_in (per_cluster_mem_req_valid),
|
||||
.req_rw_in (per_cluster_mem_req_rw),
|
||||
.req_byteen_in (per_cluster_mem_req_byteen),
|
||||
.req_addr_in (per_cluster_mem_req_addr),
|
||||
.req_data_in (per_cluster_mem_req_data),
|
||||
.req_tag_in (per_cluster_mem_req_tag),
|
||||
.req_ready_in (per_cluster_mem_req_ready),
|
||||
|
||||
// Memory request
|
||||
.req_valid_out (mem_req_valid),
|
||||
.req_rw_out (mem_req_rw),
|
||||
.req_byteen_out (mem_req_byteen),
|
||||
.req_addr_out (mem_req_addr),
|
||||
.req_data_out (mem_req_data),
|
||||
.req_tag_out (mem_req_tag),
|
||||
.req_ready_out (mem_req_ready),
|
||||
|
||||
// Core response
|
||||
.rsp_valid_out (per_cluster_mem_rsp_valid),
|
||||
.rsp_data_out (per_cluster_mem_rsp_data),
|
||||
.rsp_tag_out (per_cluster_mem_rsp_tag),
|
||||
.rsp_ready_out (per_cluster_mem_rsp_ready),
|
||||
|
||||
// Memory response
|
||||
.rsp_valid_in (mem_rsp_valid),
|
||||
.rsp_tag_in (mem_rsp_tag),
|
||||
.rsp_data_in (mem_rsp_data),
|
||||
.rsp_ready_in (mem_rsp_ready)
|
||||
);
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[i]),
|
||||
|
||||
.sim_ebreak (per_cluster_sim_ebreak[i]),
|
||||
.sim_wb_value (per_cluster_sim_wb_value[i]),
|
||||
|
||||
.busy (per_cluster_busy[i])
|
||||
);
|
||||
end
|
||||
|
||||
`SCOPE_ASSIGN (reset, reset);
|
||||
`SCOPE_ASSIGN (mem_req_fire, mem_req_valid && mem_req_ready);
|
||||
`SCOPE_ASSIGN (mem_req_addr, `TO_FULL_ADDR(mem_req_addr));
|
||||
`SCOPE_ASSIGN (mem_req_rw, mem_req_rw);
|
||||
`SCOPE_ASSIGN (mem_req_byteen, mem_req_byteen);
|
||||
`SCOPE_ASSIGN (mem_req_data, mem_req_data);
|
||||
`SCOPE_ASSIGN (mem_req_tag, mem_req_tag);
|
||||
`SCOPE_ASSIGN (mem_rsp_fire, mem_rsp_valid && mem_rsp_ready);
|
||||
`SCOPE_ASSIGN (mem_rsp_data, mem_rsp_data);
|
||||
`SCOPE_ASSIGN (mem_rsp_tag, mem_rsp_tag);
|
||||
`SCOPE_ASSIGN (busy, busy);
|
||||
`BUFFER_BUSY (busy, (| per_cluster_busy), (`NUM_CLUSTERS > 1));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_pending_reads <= '0;
|
||||
end else begin
|
||||
perf_mem_pending_reads <= $signed(perf_mem_pending_reads) +
|
||||
`PERF_CTR_BITS'($signed(2'(mem_req_fire && ~mem_bus_if.req_data.rw) - 2'(mem_rsp_fire)));
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
mem_perf <= '0;
|
||||
end else begin
|
||||
if (mem_req_fire && ~mem_bus_if.req_data.rw) begin
|
||||
mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(1);
|
||||
end
|
||||
if (mem_req_fire && mem_bus_if.req_data.rw) begin
|
||||
mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(1);
|
||||
end
|
||||
mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads;
|
||||
end
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_MEM
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_valid && mem_req_ready) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw)
|
||||
dpi_trace("%d: MEM Wr Req: addr=%0h, tag=%0h, byteen=%0h data=%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data);
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
||||
else
|
||||
dpi_trace("%d: MEM Rd Req: addr=%0h, tag=%0h, byteen=%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen);
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
||||
end
|
||||
if (mem_rsp_valid && mem_rsp_ready) begin
|
||||
dpi_trace("%d: MEM Rsp: tag=%0h, data=%0h\n", $time, mem_rsp_tag, mem_rsp_data);
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag, mem_rsp_data));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
`ifndef NDEBUG
|
||||
`ifdef SIMULATION
|
||||
always @(posedge clk) begin
|
||||
$fflush(); // flush stdout buffer
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
||||
Reference in New Issue
Block a user