270 lines
9.5 KiB
Verilog
270 lines
9.5 KiB
Verilog
`include "VX_define.vh"
|
|
`include "VX_cache_config.vh"
|
|
|
|
module Vortex
|
|
#(
|
|
parameter CORE_ID = 0
|
|
) (
|
|
`ifdef SINGLE_CORE_BENCH
|
|
|
|
// Clock
|
|
input wire clk,
|
|
input wire reset,
|
|
|
|
// IO
|
|
output wire io_valid,
|
|
output wire [31:0] io_data,
|
|
|
|
// DRAM Dcache Req
|
|
output wire dram_req_read,
|
|
output wire dram_req_write,
|
|
output wire [31:0] dram_req_addr,
|
|
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
|
input wire dram_req_full,
|
|
|
|
// DRAM Dcache Rsp
|
|
input wire dram_rsp_valid,
|
|
input wire [31:0] dram_rsp_addr,
|
|
input wire [`DBANK_LINE_SIZE-1:0] dram_rsp_data,
|
|
output wire dram_rsp_ready,
|
|
|
|
// DRAM Icache Req
|
|
output wire I_dram_req_read,
|
|
output wire I_dram_req_write,
|
|
output wire [31:0] I_dram_req_addr,
|
|
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
|
input wire I_dram_req_full,
|
|
|
|
// DRAM Icache Rsp
|
|
input wire I_dram_rsp_valid,
|
|
input wire [31:0] I_dram_rsp_addr,
|
|
input wire [`IBANK_LINE_SIZE-1:0] I_dram_rsp_data,
|
|
output wire I_dram_rsp_ready,
|
|
|
|
// LLC Snooping
|
|
input wire snp_req_valid,
|
|
input wire [31:0] snp_req_addr,
|
|
output wire snp_req_full,
|
|
|
|
output wire out_ebreak
|
|
|
|
`else
|
|
|
|
input wire clk,
|
|
input wire reset,
|
|
// IO
|
|
output wire io_valid,
|
|
output wire[31:0] io_data,
|
|
|
|
// DRAM Dcache Req
|
|
output wire dram_req_read,
|
|
output wire dram_req_write,
|
|
output wire [31:0] dram_req_addr,
|
|
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
|
input wire dram_req_full,
|
|
|
|
// DRAM Dcache Rsp
|
|
input wire dram_rsp_valid,
|
|
input wire [31:0] dram_rsp_addr,
|
|
input wire [`DBANK_LINE_SIZE-1:0] dram_rsp_data,
|
|
output wire dram_rsp_ready,
|
|
|
|
// DRAM Icache Req
|
|
output wire I_dram_req_read,
|
|
output wire I_dram_req_write,
|
|
output wire [31:0] I_dram_req_addr,
|
|
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
|
input wire I_dram_req_full,
|
|
|
|
// DRAM Icache Rsp
|
|
output wire I_dram_rsp_ready,
|
|
input wire I_dram_rsp_valid,
|
|
input wire [31:0] I_dram_rsp_addr,
|
|
input wire [`IBANK_LINE_SIZE-1:0] I_dram_rsp_data,
|
|
|
|
input wire snp_req_valid,
|
|
input wire [31:0] snp_req_addr,
|
|
output wire snp_req_full,
|
|
|
|
output wire out_ebreak
|
|
`endif
|
|
);
|
|
`DEBUG_BEGIN
|
|
wire scheduler_empty;
|
|
`DEBUG_END
|
|
|
|
wire memory_delay;
|
|
wire exec_delay;
|
|
wire gpr_stage_delay;
|
|
wire schedule_delay;
|
|
|
|
// Dcache Interface
|
|
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp();
|
|
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req();
|
|
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_qual();
|
|
|
|
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_req();
|
|
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_res();
|
|
|
|
assign vx_gpu_dcache_dram_res.dram_rsp_valid = dram_rsp_valid;
|
|
assign vx_gpu_dcache_dram_res.dram_rsp_addr = dram_rsp_addr;
|
|
|
|
assign dram_req_write = vx_gpu_dcache_dram_req.dram_req_write;
|
|
assign dram_req_read = vx_gpu_dcache_dram_req.dram_req_read;
|
|
assign dram_req_addr = vx_gpu_dcache_dram_req.dram_req_addr;
|
|
assign dram_rsp_ready = vx_gpu_dcache_dram_req.dram_rsp_ready;
|
|
|
|
assign vx_gpu_dcache_dram_req.dram_req_full = dram_req_full;
|
|
|
|
genvar i;
|
|
generate
|
|
for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
|
|
assign vx_gpu_dcache_dram_res.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
|
|
assign dram_req_data[i * 32 +: 32] = vx_gpu_dcache_dram_req.dram_req_data[i];
|
|
end
|
|
endgenerate
|
|
|
|
wire temp_io_valid = (!memory_delay)
|
|
&& (|vx_dcache_req.core_req_valid)
|
|
&& (vx_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE)
|
|
&& (vx_dcache_req.core_req_addr[0] == 32'h00010000);
|
|
|
|
wire[31:0] temp_io_data = vx_dcache_req.core_req_writedata[0];
|
|
assign io_valid = temp_io_valid;
|
|
assign io_data = temp_io_data;
|
|
|
|
assign vx_dcache_req_qual.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}};
|
|
assign vx_dcache_req_qual.core_req_addr = vx_dcache_req.core_req_addr;
|
|
assign vx_dcache_req_qual.core_req_writedata = vx_dcache_req.core_req_writedata;
|
|
assign vx_dcache_req_qual.core_req_mem_read = vx_dcache_req.core_req_mem_read;
|
|
assign vx_dcache_req_qual.core_req_mem_write = vx_dcache_req.core_req_mem_write;
|
|
assign vx_dcache_req_qual.core_req_rd = vx_dcache_req.core_req_rd;
|
|
assign vx_dcache_req_qual.core_req_wb = vx_dcache_req.core_req_wb;
|
|
assign vx_dcache_req_qual.core_req_warp_num = vx_dcache_req.core_req_warp_num;
|
|
assign vx_dcache_req_qual.core_req_pc = vx_dcache_req.core_req_pc;
|
|
assign vx_dcache_req_qual.core_no_wb_slot = vx_dcache_req.core_no_wb_slot;
|
|
|
|
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_rsp();
|
|
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_req();
|
|
|
|
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_req();
|
|
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_res();
|
|
|
|
assign vx_gpu_icache_dram_res.dram_rsp_valid = I_dram_rsp_valid;
|
|
assign vx_gpu_icache_dram_res.dram_rsp_addr = I_dram_rsp_addr;
|
|
|
|
assign I_dram_req_write = vx_gpu_icache_dram_req.dram_req_write;
|
|
assign I_dram_req_read = vx_gpu_icache_dram_req.dram_req_read;
|
|
assign I_dram_req_addr = vx_gpu_icache_dram_req.dram_req_addr;
|
|
assign I_dram_rsp_ready = vx_gpu_icache_dram_req.dram_rsp_ready;
|
|
|
|
assign vx_gpu_icache_dram_req.dram_req_full = I_dram_req_full;
|
|
|
|
genvar j;
|
|
generate
|
|
for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin
|
|
assign vx_gpu_icache_dram_res.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32];
|
|
assign I_dram_req_data[j * 32 +: 32] = vx_gpu_icache_dram_req.dram_req_data[j];
|
|
end
|
|
endgenerate
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
// Front-end to Back-end
|
|
VX_frE_to_bckE_req_if vx_bckE_req(); // New instruction request to EXE/MEM
|
|
|
|
// Back-end to Front-end
|
|
VX_wb_if vx_writeback_if(); // Writeback to GPRs
|
|
VX_branch_response_if vx_branch_rsp(); // Branch Resolution to Fetch
|
|
VX_jal_response_if vx_jal_rsp(); // Jump resolution to Fetch
|
|
|
|
// CSR Buses
|
|
// VX_csr_write_request_if vx_csr_w_req();
|
|
|
|
VX_warp_ctl_if vx_warp_ctl();
|
|
VX_gpu_snp_req_rsp_if vx_gpu_icache_snp_req();
|
|
VX_gpu_snp_req_rsp_if vx_gpu_dcache_snp_req();
|
|
|
|
assign vx_gpu_dcache_snp_req.snp_req_valid = snp_req_valid;
|
|
assign vx_gpu_dcache_snp_req.snp_req_addr = snp_req_addr;
|
|
assign snp_req_full = vx_gpu_dcache_snp_req.snp_req_full;
|
|
|
|
VX_front_end vx_front_end(
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.vx_warp_ctl (vx_warp_ctl),
|
|
.vx_bckE_req (vx_bckE_req),
|
|
.schedule_delay (schedule_delay),
|
|
.vx_icache_rsp (vx_icache_rsp),
|
|
.vx_icache_req (vx_icache_req),
|
|
.vx_jal_rsp (vx_jal_rsp),
|
|
.vx_branch_rsp (vx_branch_rsp),
|
|
.fetch_ebreak (out_ebreak)
|
|
);
|
|
|
|
VX_scheduler schedule(
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.memory_delay (memory_delay),
|
|
.exec_delay (exec_delay),
|
|
.gpr_stage_delay (gpr_stage_delay),
|
|
.vx_bckE_req (vx_bckE_req),
|
|
.vx_writeback_if (vx_writeback_if),
|
|
.schedule_delay (schedule_delay),
|
|
.is_empty (scheduler_empty)
|
|
);
|
|
|
|
VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.schedule_delay (schedule_delay),
|
|
.vx_warp_ctl (vx_warp_ctl),
|
|
.vx_bckE_req (vx_bckE_req),
|
|
.vx_jal_rsp (vx_jal_rsp),
|
|
.vx_branch_rsp (vx_branch_rsp),
|
|
.vx_dcache_rsp (vx_dcache_rsp),
|
|
.vx_dcache_req (vx_dcache_req),
|
|
.vx_writeback_if (vx_writeback_if),
|
|
.out_mem_delay (memory_delay),
|
|
.out_exec_delay (exec_delay),
|
|
.gpr_stage_delay (gpr_stage_delay)
|
|
);
|
|
|
|
VX_dmem_controller vx_dmem_controller(
|
|
.clk (clk),
|
|
.reset (reset),
|
|
|
|
// Dram <-> Dcache
|
|
.vx_gpu_dcache_dram_req (vx_gpu_dcache_dram_req),
|
|
.vx_gpu_dcache_dram_res (vx_gpu_dcache_dram_res),
|
|
.vx_gpu_dcache_snp_req (vx_gpu_dcache_snp_req),
|
|
|
|
// Dram <-> Icache
|
|
.vx_gpu_icache_dram_req (vx_gpu_icache_dram_req),
|
|
.vx_gpu_icache_dram_res (vx_gpu_icache_dram_res),
|
|
.vx_gpu_icache_snp_req (vx_gpu_icache_snp_req),
|
|
|
|
// Core <-> Icache
|
|
.vx_icache_req (vx_icache_req),
|
|
.vx_icache_rsp (vx_icache_rsp),
|
|
|
|
// Core <-> Dcache
|
|
.vx_dcache_req (vx_dcache_req_qual),
|
|
.vx_dcache_rsp (vx_dcache_rsp)
|
|
);
|
|
|
|
// VX_csr_handler vx_csr_handler(
|
|
// .clk (clk),
|
|
// .in_decode_csr_address(decode_csr_address),
|
|
// .vx_csr_w_req (vx_csr_w_req),
|
|
// .in_wb_valid (vx_writeback_if.wb_valid[0]),
|
|
// .out_decode_csr_data (csr_decode_csr_data)
|
|
// );
|
|
|
|
endmodule // Vortex
|
|
|
|
|
|
|
|
|
|
|