`include "VX_define.vh" `include "VX_gpu_pkg.sv" // TODO: move VX_define constants to parameters, and then parameterize in blackbox module Vortex import VX_gpu_pkg::*; #( parameter CORE_ID = 0, parameter BOOTROM_HANG100 = 32'h10100, parameter NUM_THREADS = 0 ) ( /* adapt to CoreIO bundle at src/main/scala/tile/Core.scala */ input clock, input reset, // input hartid, input [31:0] reset_vector, input interrupts_debug, input interrupts_mtip, input interrupts_msip, input interrupts_meip, input interrupts_seip, // imem ------------------------------------------------ input imem_0_a_ready, input imem_0_d_valid, input [2:0] imem_0_d_bits_opcode, input [3:0] imem_0_d_bits_size, input [ICACHE_TAG_WIDTH-1:0] imem_0_d_bits_source, input [31:0] imem_0_d_bits_data, output imem_0_a_valid, output [2:0] imem_0_a_bits_opcode, output [3:0] imem_0_a_bits_size, output [ICACHE_TAG_WIDTH-1:0] imem_0_a_bits_source, output [31:0] imem_0_a_bits_address, output [3:0] imem_0_a_bits_mask, output [31:0] imem_0_a_bits_data, output imem_0_d_ready, // dmem ------------------------------------------------ input [NUM_THREADS - 1:0] dmem_d_valid, input [(NUM_THREADS * 3) - 1:0] dmem_d_bits_opcode, input [(NUM_THREADS * 4) - 1:0] dmem_d_bits_size, input [(NUM_THREADS * DCACHE_NOSM_TAG_WIDTH) - 1:0] dmem_d_bits_source, input [(NUM_THREADS * 32) - 1:0] dmem_d_bits_data, output [NUM_THREADS - 1:0] dmem_d_ready, input [NUM_THREADS - 1:0] dmem_a_ready, output [NUM_THREADS - 1:0] dmem_a_valid, output [(NUM_THREADS * 3) - 1:0] dmem_a_bits_opcode, output [(NUM_THREADS * 4) - 1:0] dmem_a_bits_size, output [(NUM_THREADS * DCACHE_NOSM_TAG_WIDTH) - 1:0] dmem_a_bits_source, output [(NUM_THREADS * 32) - 1:0] dmem_a_bits_address, output [(NUM_THREADS * 4) - 1:0] dmem_a_bits_mask, output [(NUM_THREADS * 32) - 1:0] dmem_a_bits_data, // smem ------------------------------------------------ input smem_0_a_ready, input smem_0_d_valid, input [2:0] smem_0_d_bits_opcode, input [3:0] smem_0_d_bits_size, input [DCACHE_NOSM_TAG_WIDTH-1:0] smem_0_d_bits_source, input [31:0] smem_0_d_bits_data, output smem_0_a_valid, output [2:0] smem_0_a_bits_opcode, output [3:0] smem_0_a_bits_size, output [DCACHE_NOSM_TAG_WIDTH-1:0] smem_0_a_bits_source, output [31:0] smem_0_a_bits_address, output [3:0] smem_0_a_bits_mask, output [31:0] smem_0_a_bits_data, output smem_0_d_ready, input smem_1_a_ready, input smem_1_d_valid, input [2:0] smem_1_d_bits_opcode, input [3:0] smem_1_d_bits_size, input [DCACHE_NOSM_TAG_WIDTH-1:0] smem_1_d_bits_source, input [31:0] smem_1_d_bits_data, output smem_1_a_valid, output [2:0] smem_1_a_bits_opcode, output [3:0] smem_1_a_bits_size, output [DCACHE_NOSM_TAG_WIDTH-1:0] smem_1_a_bits_source, output [31:0] smem_1_a_bits_address, output [3:0] smem_1_a_bits_mask, output [31:0] smem_1_a_bits_data, output smem_1_d_ready, input smem_2_a_ready, input smem_2_d_valid, input [2:0] smem_2_d_bits_opcode, input [3:0] smem_2_d_bits_size, input [DCACHE_NOSM_TAG_WIDTH-1:0] smem_2_d_bits_source, input [31:0] smem_2_d_bits_data, output smem_2_a_valid, output [2:0] smem_2_a_bits_opcode, output [3:0] smem_2_a_bits_size, output [DCACHE_NOSM_TAG_WIDTH-1:0] smem_2_a_bits_source, output [31:0] smem_2_a_bits_address, output [3:0] smem_2_a_bits_mask, output [31:0] smem_2_a_bits_data, output smem_2_d_ready, input smem_3_a_ready, input smem_3_d_valid, input [2:0] smem_3_d_bits_opcode, input [3:0] smem_3_d_bits_size, input [DCACHE_NOSM_TAG_WIDTH-1:0] smem_3_d_bits_source, input [31:0] smem_3_d_bits_data, output smem_3_a_valid, output [2:0] smem_3_a_bits_opcode, output [3:0] smem_3_a_bits_size, output [DCACHE_NOSM_TAG_WIDTH-1:0] smem_3_a_bits_source, output [31:0] smem_3_a_bits_address, output [3:0] smem_3_a_bits_mask, output [31:0] smem_3_a_bits_data, output smem_3_d_ready, // input fpu_fcsr_flags_valid, // input [4:0] fpu_fcsr_flags_bits, // // input [63:0] fpu_store_data, // input [31:0] fpu_toint_data, // input fpu_fcsr_rdy, // input fpu_nack_mem, // input fpu_illegal_rm, // input fpu_dec_wen, // input fpu_dec_ldst, // input fpu_dec_ren1, // input fpu_dec_ren2, // input fpu_dec_ren3, // input fpu_dec_swap12, // input fpu_dec_swap23, // input [1:0] fpu_dec_typeTagIn, // input [1:0] fpu_dec_typeTagOut, // input fpu_dec_fromint, // input fpu_dec_toint, // input fpu_dec_fastpipe, // input fpu_dec_fma, // input fpu_dec_div, // input fpu_dec_sqrt, // input fpu_dec_wflags, // input fpu_sboard_set, // input fpu_sboard_clr, // input [4:0] fpu_sboard_clra, // output fpu_hartid, // output [31:0] fpu_time, // output [31:0] fpu_inst, // output [31:0] fpu_fromint_data, // output [2:0] fpu_fcsr_rm, // output fpu_dmem_resp_val, // output [2:0] fpu_dmem_resp_type, // output [4:0] fpu_dmem_resp_tag, // output fpu_valid, // output fpu_killx, // output fpu_killm, // output fpu_keep_clock_enabled, output cease, input traceStall, output wfi ); logic [3:0] intr_counter; logic msip_1d, intr_reset; logic busy; assign intr_reset = |intr_counter; /* interrupts */ always @(posedge clock) begin msip_1d <= interrupts_msip; if (reset) begin intr_counter <= 4'h0; end else if (~msip_1d && interrupts_msip) begin // rising edge intr_counter <= 4'h6; end else begin intr_counter <= intr_counter > 0 ? intr_counter - 4'h1 : 4'h0; end end // ------------------------------------------------------------------------ // TL <-> Vortex core-cache interface adapter // ------------------------------------------------------------------------ VX_mem_bus_if #( .DATA_SIZE (ICACHE_WORD_SIZE), .TAG_WIDTH (ICACHE_TAG_WIDTH) ) icache_bus_if(); // NOTE(hansung): need to use DCACHE_NOSM_TAG_WIDTH here instead of // DCACHE_TAG_WIDTH; the latter is only used inside the core to // differentiate between requests going to the cache vs. sharedmem. // FIXME: DCACHE_NUM_REQS is assumed to be the same as NUM_LANES as of // now. VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), .TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH) ) dcache_bus_if[DCACHE_NUM_REQS](); VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), .TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH) ) smem_bus_if[DCACHE_NUM_REQS](); // always @(posedge clock) begin // `ASSERT(DCACHE_NUM_REQS == NUM_THREADS, "DCACHE_NUM_REQS doesn't match NUM_THREADS"); // end // imem ------------------------------------------------------------------- assign icache_bus_if.rsp_valid = imem_0_d_valid; // TODO: hardcoded DCACHE_WORD_SIZE = 4 assign icache_bus_if.rsp_data.data = imem_0_d_bits_data; assign icache_bus_if.rsp_data.tag = imem_0_d_bits_source[ICACHE_TAG_WIDTH-1:0]; assign imem_0_d_ready = icache_bus_if.rsp_ready; // always @(posedge clock) begin // if (icache_req_if.valid && icache_req_if.ready) // icache_rsp_if.tag <= icache_req_if.tag; // end assign imem_0_a_bits_source = {32'b0, icache_bus_if.req_data.tag}[ICACHE_TAG_WIDTH-1:0]; assign imem_0_a_valid = icache_bus_if.req_valid; assign imem_0_a_bits_address = {icache_bus_if.req_data.addr, 2'b0}; assign icache_bus_if.req_ready = imem_0_a_ready; assign imem_0_a_bits_data = 32'd0; assign imem_0_a_bits_mask = 4'hf; // assign imem_0_a_bits_corrupt = 1'b0; // assign imem_0_a_bits_param = 3'd0; assign imem_0_a_bits_size = 4'd2; // 32b assign imem_0_a_bits_opcode = 3'd4; // Get // dmem ------------------------------------------------------------------- // Vortex core does not accept write acks; filter them out here assign dcache_bus_if[0].rsp_valid = (dmem_d_valid[0] && (dmem_d_bits_opcode[0 * 3 +: 3] !== 3'd0 /*AccessAck*/)); assign dcache_bus_if[1].rsp_valid = (dmem_d_valid[1] && (dmem_d_bits_opcode[1 * 3 +: 3] !== 3'd0 /*AccessAck*/)); assign dcache_bus_if[2].rsp_valid = (dmem_d_valid[2] && (dmem_d_bits_opcode[2 * 3 +: 3] !== 3'd0 /*AccessAck*/)); assign dcache_bus_if[3].rsp_valid = (dmem_d_valid[3] && (dmem_d_bits_opcode[3 * 3 +: 3] !== 3'd0 /*AccessAck*/)); assign dcache_bus_if[0].rsp_data.data = dmem_d_bits_data[0 * 32 +: 32]; assign dcache_bus_if[1].rsp_data.data = dmem_d_bits_data[1 * 32 +: 32]; assign dcache_bus_if[2].rsp_data.data = dmem_d_bits_data[2 * 32 +: 32]; assign dcache_bus_if[3].rsp_data.data = dmem_d_bits_data[3 * 32 +: 32]; assign dcache_bus_if[0].rsp_data.tag = dmem_d_bits_source[0 * DCACHE_NOSM_TAG_WIDTH +: DCACHE_NOSM_TAG_WIDTH]; assign dcache_bus_if[1].rsp_data.tag = dmem_d_bits_source[1 * DCACHE_NOSM_TAG_WIDTH +: DCACHE_NOSM_TAG_WIDTH]; assign dcache_bus_if[2].rsp_data.tag = dmem_d_bits_source[2 * DCACHE_NOSM_TAG_WIDTH +: DCACHE_NOSM_TAG_WIDTH]; assign dcache_bus_if[3].rsp_data.tag = dmem_d_bits_source[3 * DCACHE_NOSM_TAG_WIDTH +: DCACHE_NOSM_TAG_WIDTH]; // When there's a write ACK coming back, ready bit should always be 1 to // accept them because core does not accept them on their own assign dmem_d_ready[0] = dcache_bus_if[0].rsp_ready || (dmem_d_valid[0] && (dmem_d_bits_opcode[0 * 3 +: 3] == 3'd0 /*AccessAck*/)); assign dmem_d_ready[1] = dcache_bus_if[1].rsp_ready || (dmem_d_valid[1] && (dmem_d_bits_opcode[1 * 3 +: 3] == 3'd0 /*AccessAck*/)); assign dmem_d_ready[2] = dcache_bus_if[2].rsp_ready || (dmem_d_valid[2] && (dmem_d_bits_opcode[2 * 3 +: 3] == 3'd0 /*AccessAck*/)); assign dmem_d_ready[3] = dcache_bus_if[3].rsp_ready || (dmem_d_valid[3] && (dmem_d_bits_opcode[3 * 3 +: 3] == 3'd0 /*AccessAck*/)); assign dmem_a_valid[0] = dcache_bus_if[0].req_valid; assign dmem_a_valid[1] = dcache_bus_if[1].req_valid; assign dmem_a_valid[2] = dcache_bus_if[2].req_valid; assign dmem_a_valid[3] = dcache_bus_if[3].req_valid; assign dmem_a_bits_address[0 * 32 +: 32] = {dcache_bus_if[0].req_data.addr, 2'b0}; assign dmem_a_bits_address[1 * 32 +: 32] = {dcache_bus_if[1].req_data.addr, 2'b0}; assign dmem_a_bits_address[2 * 32 +: 32] = {dcache_bus_if[2].req_data.addr, 2'b0}; assign dmem_a_bits_address[3 * 32 +: 32] = {dcache_bus_if[3].req_data.addr, 2'b0}; assign dmem_a_bits_data[0 * 32 +: 32] = dcache_bus_if[0].req_data.data; assign dmem_a_bits_data[1 * 32 +: 32] = dcache_bus_if[1].req_data.data; assign dmem_a_bits_data[2 * 32 +: 32] = dcache_bus_if[2].req_data.data; assign dmem_a_bits_data[3 * 32 +: 32] = dcache_bus_if[3].req_data.data; assign dmem_a_bits_source[0 * DCACHE_NOSM_TAG_WIDTH +: DCACHE_NOSM_TAG_WIDTH] = dcache_bus_if[0].req_data.tag; assign dmem_a_bits_source[1 * DCACHE_NOSM_TAG_WIDTH +: DCACHE_NOSM_TAG_WIDTH] = dcache_bus_if[1].req_data.tag; assign dmem_a_bits_source[2 * DCACHE_NOSM_TAG_WIDTH +: DCACHE_NOSM_TAG_WIDTH] = dcache_bus_if[2].req_data.tag; assign dmem_a_bits_source[3 * DCACHE_NOSM_TAG_WIDTH +: DCACHE_NOSM_TAG_WIDTH] = dcache_bus_if[3].req_data.tag; // we assume all lanes always have the same tag; otherwise the sourceId // logic in the Chisel tile breaks // NOTE: not working at the moment but this doesn't seem to be a problem // always @(*) begin // for (i = 0; i < 4; i++) begin // assert(dcache_req_if.tag[0] == dcache_req_if.tag[i]) // end // end // Translate Vortex rw/byteen to TileLink opcode assign dmem_a_bits_opcode[0 * 3 +: 3] = dcache_bus_if[0].req_data.rw ? (&dcache_bus_if[0].req_data.byteen ? 3'd0 /*PutFull*/ : 3'd1 /*PutPartial*/) : 3'd4 /*Get*/; assign dmem_a_bits_opcode[1 * 3 +: 3] = dcache_bus_if[1].req_data.rw ? (&dcache_bus_if[1].req_data.byteen ? 3'd0 /*PutFull*/ : 3'd1 /*PutPartial*/) : 3'd4 /*Get*/; assign dmem_a_bits_opcode[2 * 3 +: 3] = dcache_bus_if[2].req_data.rw ? (&dcache_bus_if[2].req_data.byteen ? 3'd0 /*PutFull*/ : 3'd1 /*PutPartial*/) : 3'd4 /*Get*/; assign dmem_a_bits_opcode[3 * 3 +: 3] = dcache_bus_if[3].req_data.rw ? (&dcache_bus_if[3].req_data.byteen ? 3'd0 /*PutFull*/ : 3'd1 /*PutPartial*/) : 3'd4 /*Get*/; // Vortex cache requests are single-fixed-size // NOTE: MAKE SURE TO CHANGE CONSTANT WIDTH FOR SIZE! assign dmem_a_bits_size[0 * 4 +: 4] = 4'd2; assign dmem_a_bits_size[1 * 4 +: 4] = 4'd2; assign dmem_a_bits_size[2 * 4 +: 4] = 4'd2; assign dmem_a_bits_size[3 * 4 +: 4] = 4'd2; /* $countones(dcache_req_if.byteen[0]) === 'd4 ? 2'd2 : ($countones(dcache_req_if.byteen[0]) === 'd2 ? 2'd1 : 2'd0); */ // byteen can be directly used as TL mask assign dmem_a_bits_mask[0 * 4 +: 4] = dcache_bus_if[0].req_data.byteen; assign dmem_a_bits_mask[1 * 4 +: 4] = dcache_bus_if[1].req_data.byteen; assign dmem_a_bits_mask[2 * 4 +: 4] = dcache_bus_if[2].req_data.byteen; assign dmem_a_bits_mask[3 * 4 +: 4] = dcache_bus_if[3].req_data.byteen; assign dcache_bus_if[0].req_ready = dmem_a_ready[0]; assign dcache_bus_if[1].req_ready = dmem_a_ready[1]; assign dcache_bus_if[2].req_ready = dmem_a_ready[2]; assign dcache_bus_if[3].req_ready = dmem_a_ready[3]; // smem ------------------------------------------------------------------- // FIXME: giant @copypaste from dmem // for (genvar i = 0; i < 4; i++) begin // Vortex core does not accept write acks; filter them out here assign smem_bus_if[0].rsp_valid = (smem_0_d_valid && (smem_0_d_bits_opcode !== 3'd0 /*AccessAck*/)); assign smem_bus_if[1].rsp_valid = (smem_1_d_valid && (smem_1_d_bits_opcode !== 3'd0 /*AccessAck*/)); assign smem_bus_if[2].rsp_valid = (smem_2_d_valid && (smem_2_d_bits_opcode !== 3'd0 /*AccessAck*/)); assign smem_bus_if[3].rsp_valid = (smem_3_d_valid && (smem_3_d_bits_opcode !== 3'd0 /*AccessAck*/)); assign smem_bus_if[0].rsp_data.data = smem_0_d_bits_data; assign smem_bus_if[1].rsp_data.data = smem_1_d_bits_data; assign smem_bus_if[2].rsp_data.data = smem_2_d_bits_data; assign smem_bus_if[3].rsp_data.data = smem_3_d_bits_data; assign smem_bus_if[0].rsp_data.tag = smem_0_d_bits_source; assign smem_bus_if[1].rsp_data.tag = smem_1_d_bits_source; assign smem_bus_if[2].rsp_data.tag = smem_2_d_bits_source; assign smem_bus_if[3].rsp_data.tag = smem_3_d_bits_source; // When there's a write ACK coming back, ready bit should always be 1 to // accept them because core does not accept them on their own assign smem_0_d_ready = smem_bus_if[0].rsp_ready || (smem_0_d_valid && (smem_0_d_bits_opcode == 3'd0 /*AccessAck*/)); assign smem_1_d_ready = smem_bus_if[1].rsp_ready || (smem_1_d_valid && (smem_1_d_bits_opcode == 3'd0 /*AccessAck*/)); assign smem_2_d_ready = smem_bus_if[2].rsp_ready || (smem_2_d_valid && (smem_2_d_bits_opcode == 3'd0 /*AccessAck*/)); assign smem_3_d_ready = smem_bus_if[3].rsp_ready || (smem_3_d_valid && (smem_3_d_bits_opcode == 3'd0 /*AccessAck*/)); assign smem_0_a_valid = smem_bus_if[0].req_valid; assign smem_1_a_valid = smem_bus_if[1].req_valid; assign smem_2_a_valid = smem_bus_if[2].req_valid; assign smem_3_a_valid = smem_bus_if[3].req_valid; assign smem_0_a_bits_address = {smem_bus_if[0].req_data.addr, 2'b0}; assign smem_1_a_bits_address = {smem_bus_if[1].req_data.addr, 2'b0}; assign smem_2_a_bits_address = {smem_bus_if[2].req_data.addr, 2'b0}; assign smem_3_a_bits_address = {smem_bus_if[3].req_data.addr, 2'b0}; assign smem_0_a_bits_data = smem_bus_if[0].req_data.data; assign smem_1_a_bits_data = smem_bus_if[1].req_data.data; assign smem_2_a_bits_data = smem_bus_if[2].req_data.data; assign smem_3_a_bits_data = smem_bus_if[3].req_data.data; assign smem_0_a_bits_source = smem_bus_if[0].req_data.tag; assign smem_1_a_bits_source = smem_bus_if[1].req_data.tag; assign smem_2_a_bits_source = smem_bus_if[2].req_data.tag; assign smem_3_a_bits_source = smem_bus_if[3].req_data.tag; // Translate Vortex rw/byteen to TileLink opcode assign smem_0_a_bits_opcode = smem_bus_if[0].req_data.rw ? (&smem_bus_if[0].req_data.byteen ? 3'd0 /*PutFull*/ : 3'd1 /*PutPartial*/) : 3'd4 /*Get*/; assign smem_1_a_bits_opcode = smem_bus_if[1].req_data.rw ? (&smem_bus_if[1].req_data.byteen ? 3'd0 /*PutFull*/ : 3'd1 /*PutPartial*/) : 3'd4 /*Get*/; assign smem_2_a_bits_opcode = smem_bus_if[2].req_data.rw ? (&smem_bus_if[2].req_data.byteen ? 3'd0 /*PutFull*/ : 3'd1 /*PutPartial*/) : 3'd4 /*Get*/; assign smem_3_a_bits_opcode = smem_bus_if[3].req_data.rw ? (&smem_bus_if[3].req_data.byteen ? 3'd0 /*PutFull*/ : 3'd1 /*PutPartial*/) : 3'd4 /*Get*/; // Vortex cache requests are single-fixed-size // NOTE: MAKE SURE TO CHANGE CONSTANT WIDTH FOR SIZE! assign smem_0_a_bits_size = 4'd2; assign smem_1_a_bits_size = 4'd2; assign smem_2_a_bits_size = 4'd2; assign smem_3_a_bits_size = 4'd2; /* $countones(dcache_req_if.byteen[0]) === 'd4 ? 2'd2 : ($countones(dcache_req_if.byteen[0]) === 'd2 ? 2'd1 : 2'd0); */ // byteen can be directly used as TL mask assign smem_0_a_bits_mask = smem_bus_if[0].req_data.byteen; assign smem_1_a_bits_mask = smem_bus_if[1].req_data.byteen; assign smem_2_a_bits_mask = smem_bus_if[2].req_data.byteen; assign smem_3_a_bits_mask = smem_bus_if[3].req_data.byteen; assign smem_bus_if[0].req_ready = smem_0_a_ready; assign smem_bus_if[1].req_ready = smem_1_a_ready; assign smem_bus_if[2].req_ready = smem_2_a_ready; assign smem_bus_if[3].req_ready = smem_3_a_ready; // end /* fpu */ // assign {fpu_hartid, fpu_time, fpu_inst, fpu_fromint_data, fpu_fcsr_rm, fpu_dmem_resp_val, fpu_dmem_resp_type, // fpu_dmem_resp_tag, fpu_valid, fpu_killx, fpu_killm, fpu_keep_clock_enabled} = '0; assign cease = ~busy; assign wfi = 1'b0; // FIXME: unused genvar i; generate for (i = 0; i < 4; i++) begin always @(posedge clock) begin if (dcache_bus_if[i].req_valid && dcache_bus_if[i].req_ready && dcache_bus_if[i].req_data.rw) begin // anything that starts with 0xC is heap address if ({dcache_bus_if[i].req_data.addr, 2'b0}[31:28] == 4'hc) begin $display("[%d] STORE HEAP MEM: CORE=%d, THREAD=%d, ADDRESS=0x%X, DATA=0x%08X", $time(), CORE_ID, i, {dcache_bus_if[i].req_data.addr, 2'b0}, dcache_bus_if[i].req_data.data); end end end end endgenerate logic sim_ebreak; logic [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value; logic [3:0] reset_start_counter; logic core_reset; logic dcr_reset; always @(posedge clock) begin if (reset) begin reset_start_counter <= 4'ha; end else begin if (reset_start_counter > 4'h0) begin reset_start_counter <= reset_start_counter - 4'h1; end end end // Delay reset signal by a few cycles to make time for resetting the DCR // (device configuration registers). assign core_reset = reset || (reset_start_counter != 4'h0); // || intr_reset; assign dcr_reset = !reset && (reset_start_counter != 4'h0); // A small FSM that tries to set DCR "properly" in the same order as // defined in VX_types.vh. // // DCR is a device configuration register that holds (among other things) // the startup address for the kernel, nominally set to 0x80000000. // TODO: Original Vortex code buffers dcr_bus by one cycle when // SOCKET_SIZE > 1, as below. Might want to check if we need to do the // same // `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1)); logic [`VX_DCR_ADDR_BITS-1:0] dcr_state; logic [`VX_DCR_ADDR_BITS-1:0] dcr_state_n; logic dcr_write_valid; logic [`VX_DCR_ADDR_WIDTH-1:0] dcr_write_addr; logic [`VX_DCR_DATA_WIDTH-1:0] dcr_write_data; always @(posedge clock) begin if (reset) begin dcr_state <= `VX_DCR_ADDR_BITS'h000; end else begin dcr_state <= dcr_state_n; end end always @(*) begin dcr_state_n = dcr_state; dcr_write_valid = 1'b0; dcr_write_addr = `VX_DCR_ADDR_WIDTH'b0; dcr_write_data = `VX_DCR_DATA_WIDTH'b0; case (dcr_state) `VX_DCR_ADDR_BITS'h000: begin dcr_state_n = `VX_DCR_BASE_STATE_BEGIN; end `VX_DCR_BASE_STATE_BEGIN: begin dcr_state_n = `VX_DCR_BASE_STARTUP_ADDR1; dcr_write_valid = 1'b1; dcr_write_addr = `VX_DCR_BASE_STARTUP_ADDR0; dcr_write_data = BOOTROM_HANG100; end `VX_DCR_BASE_STARTUP_ADDR1: begin dcr_state_n = `VX_DCR_BASE_MPM_CLASS; dcr_write_valid = 1'b1; dcr_write_addr = `VX_DCR_BASE_STARTUP_ADDR1; // FIXME: not sure what this does dcr_write_data = `VX_DCR_DATA_WIDTH'h0; end `VX_DCR_BASE_MPM_CLASS: begin dcr_state_n = `VX_DCR_BASE_STATE_END; dcr_write_valid = 1'b1; dcr_write_addr = `VX_DCR_BASE_MPM_CLASS; dcr_write_data = `VX_DCR_DATA_WIDTH'h0; end `VX_DCR_BASE_STATE_END: begin dcr_state_n = dcr_state; dcr_write_valid = 1'b0; end endcase end VX_dcr_bus_if dcr_bus_if(); assign dcr_bus_if.write_valid = dcr_write_valid; assign dcr_bus_if.write_addr = dcr_write_addr; assign dcr_bus_if.write_data = dcr_write_data; VX_core #( .CORE_ID (CORE_ID) ) core ( `SCOPE_IO_BIND (0) // TODO: should be socket id .clk (clock), .reset (core_reset), `ifdef PERF_ENABLE // NOTE unused .mem_perf_if (mem_perf_tmp_if), `endif .dcr_bus_if (dcr_bus_if), .smem_bus_if (smem_bus_if), .dcache_bus_if (dcache_bus_if), .icache_bus_if (icache_bus_if), `ifdef GBAR_ENABLE // NOTE unused .gbar_bus_if (per_core_gbar_bus_if[i]), `endif .sim_ebreak (sim_ebreak), .sim_wb_value (sim_wb_value), .busy (busy) ); // VX_dcache_req_if #( // .NUM_REQS (`DCACHE_NUM_REQS), // .WORD_SIZE (`DCACHE_WORD_SIZE), // .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH) // ) dcache_req_if(); // VX_dcache_rsp_if #( // .NUM_REQS (`DCACHE_NUM_REQS), // .WORD_SIZE (`DCACHE_WORD_SIZE), // .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH) // ) dcache_rsp_if(); // // VX_icache_req_if #( // .WORD_SIZE (`ICACHE_WORD_SIZE), // .TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH) // ) icache_req_if(); // VX_icache_rsp_if #( // .WORD_SIZE (`ICACHE_WORD_SIZE), // .TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH) // ) icache_rsp_if(); // VX_pipeline #( // .CORE_ID(CORE_ID) // ) pipeline ( // `SCOPE_BIND_VX_core_pipeline // `ifdef PERF_ENABLE // .perf_memsys_if (perf_memsys_if), // `endif // .clk(clock), // .reset(reset || intr_reset), // .irq(1'b0/*intr_reset*/), // // Dcache core request // .dcache_req_valid (dcache_req_if.valid), // .dcache_req_rw (dcache_req_if.rw), // .dcache_req_byteen (dcache_req_if.byteen), // .dcache_req_addr (dcache_req_if.addr), // .dcache_req_data (dcache_req_if.data), // .dcache_req_tag (dcache_req_if.tag), // .dcache_req_ready (dcache_req_if.ready), // // Dcache core reponse // .dcache_rsp_valid (dcache_rsp_if.valid), // .dcache_rsp_tmask (dcache_rsp_if.tmask), // .dcache_rsp_data (dcache_rsp_if.data), // .dcache_rsp_tag (dcache_rsp_if.tag), // .dcache_rsp_ready (dcache_rsp_if.ready), // // Icache core request // .icache_req_valid (icache_req_if.valid), // .icache_req_addr (icache_req_if.addr), // .icache_req_tag (icache_req_if.tag), // .icache_req_ready (icache_req_if.ready), // // Icache core reponse // .icache_rsp_valid (icache_rsp_if.valid), // .icache_rsp_data (icache_rsp_if.data), // .icache_rsp_tag (icache_rsp_if.tag), // .icache_rsp_ready (icache_rsp_if.ready), // // Status // .busy(busy) // ); always @(*) begin if (busy === 1'b0) begin $display("---------------- no more active warps ----------------"); @(negedge clock); // TODO: lane assumed to be 4 // `ifndef SYNTHESIS // for (integer j = 0; j < `NUM_WARPS; j++) begin // $display("warp %2d", j); // for (integer k = 0; k < `NUM_REGS; k += 1) // $display("x%2d: %08x %08x %08x %08x", k, // pipeline.issue.gpr_stage.iports[/*thread*/0].dp_ram1.not_out_reg.reg_dump.ram[j * `NUM_REGS + k], // pipeline.issue.gpr_stage.iports[/*thread*/1].dp_ram1.not_out_reg.reg_dump.ram[j * `NUM_REGS + k], // pipeline.issue.gpr_stage.iports[/*thread*/2].dp_ram1.not_out_reg.reg_dump.ram[j * `NUM_REGS + k], // pipeline.issue.gpr_stage.iports[/*thread*/3].dp_ram1.not_out_reg.reg_dump.ram[j * `NUM_REGS + k]); // end // `endif // @(posedge clock) $finish(); end end endmodule : Vortex