Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
153
sim/simx/core.h
153
sim/simx/core.h
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
@@ -11,101 +24,104 @@
|
||||
#include <simobject.h>
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
#include "arch.h"
|
||||
#include "decode.h"
|
||||
#include "mem.h"
|
||||
#include "warp.h"
|
||||
#include "pipeline.h"
|
||||
#include "cache.h"
|
||||
#include "sharedmem.h"
|
||||
#include "cache_sim.h"
|
||||
#include "shared_mem.h"
|
||||
#include "ibuffer.h"
|
||||
#include "scoreboard.h"
|
||||
#include "exeunit.h"
|
||||
#include "tex_unit.h"
|
||||
#include "operand.h"
|
||||
#include "dispatcher.h"
|
||||
#include "exe_unit.h"
|
||||
#include "dcrs.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Cluster;
|
||||
|
||||
class Core : public SimObject<Core> {
|
||||
public:
|
||||
struct PerfStats {
|
||||
uint64_t cycles;
|
||||
uint64_t instrs;
|
||||
uint64_t ibuf_stalls;
|
||||
uint64_t scrb_stalls;
|
||||
uint64_t alu_stalls;
|
||||
uint64_t lsu_stalls;
|
||||
uint64_t csr_stalls;
|
||||
uint64_t fpu_stalls;
|
||||
uint64_t gpu_stalls;
|
||||
uint64_t sfu_stalls;
|
||||
uint64_t ifetches;
|
||||
uint64_t loads;
|
||||
uint64_t stores;
|
||||
uint64_t branches;
|
||||
uint64_t mem_reads;
|
||||
uint64_t mem_writes;
|
||||
uint64_t mem_latency;
|
||||
uint64_t tex_reads;
|
||||
uint64_t tex_latency;
|
||||
uint64_t ifetch_latency;
|
||||
uint64_t load_latency;
|
||||
|
||||
PerfStats()
|
||||
: instrs(0)
|
||||
: cycles(0)
|
||||
, instrs(0)
|
||||
, ibuf_stalls(0)
|
||||
, scrb_stalls(0)
|
||||
, alu_stalls(0)
|
||||
, lsu_stalls(0)
|
||||
, csr_stalls(0)
|
||||
, fpu_stalls(0)
|
||||
, gpu_stalls(0)
|
||||
, sfu_stalls(0)
|
||||
, ifetches(0)
|
||||
, loads(0)
|
||||
, stores(0)
|
||||
, branches(0)
|
||||
, mem_reads(0)
|
||||
, mem_writes(0)
|
||||
, mem_latency(0)
|
||||
, tex_reads(0)
|
||||
, tex_latency(0)
|
||||
, ifetch_latency(0)
|
||||
, load_latency(0)
|
||||
{}
|
||||
};
|
||||
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
std::vector<SimPort<MemReq>> icache_req_ports;
|
||||
std::vector<SimPort<MemRsp>> icache_rsp_ports;
|
||||
|
||||
std::vector<SimPort<MemReq>> dcache_req_ports;
|
||||
std::vector<SimPort<MemRsp>> dcache_rsp_ports;
|
||||
|
||||
Core(const SimContext& ctx,
|
||||
uint32_t core_id,
|
||||
Cluster* cluster,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs,
|
||||
SharedMem::Ptr sharedmem);
|
||||
|
||||
Core(const SimContext& ctx, const ArchDef &arch, uint32_t id);
|
||||
~Core();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool running() const;
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool running() const;
|
||||
|
||||
void resume();
|
||||
|
||||
uint32_t id() const {
|
||||
return id_;
|
||||
return core_id_;
|
||||
}
|
||||
|
||||
const Decoder& decoder() {
|
||||
return decoder_;
|
||||
}
|
||||
|
||||
const ArchDef& arch() const {
|
||||
const Arch& arch() const {
|
||||
return arch_;
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
uint32_t getIRegValue(int reg) const {
|
||||
return warps_.at(0)->getIRegValue(reg);
|
||||
const DCRS& dcrs() const {
|
||||
return dcrs_;
|
||||
}
|
||||
|
||||
uint32_t get_csr(uint32_t addr, uint32_t tid, uint32_t wid);
|
||||
|
||||
void set_csr(uint32_t addr, uint32_t value, uint32_t tid, uint32_t wid);
|
||||
|
||||
WarpMask wspawn(uint32_t num_warps, uint32_t nextPC);
|
||||
void wspawn(uint32_t num_warps, Word nextPC);
|
||||
|
||||
WarpMask barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
|
||||
|
||||
AddrType get_addr_type(uint64_t addr);
|
||||
|
||||
void icache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
@@ -113,19 +129,22 @@ public:
|
||||
|
||||
void dcache_write(const void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
uint32_t tex_read(uint32_t unit, uint32_t lod, uint32_t u, uint32_t v, std::vector<mem_addr_size_t>* mem_addrs);
|
||||
void dcache_amo_reserve(uint64_t addr);
|
||||
|
||||
bool dcache_amo_check(uint64_t addr);
|
||||
|
||||
void trigger_ecall();
|
||||
|
||||
void trigger_ebreak();
|
||||
|
||||
bool check_exit() const;
|
||||
bool check_exit(Word* exitcode, bool riscv_test) const;
|
||||
|
||||
private:
|
||||
|
||||
void schedule();
|
||||
void fetch();
|
||||
void decode();
|
||||
void issue();
|
||||
void execute();
|
||||
void commit();
|
||||
|
||||
@@ -133,49 +152,55 @@ private:
|
||||
|
||||
void cout_flush();
|
||||
|
||||
uint32_t id_;
|
||||
const ArchDef arch_;
|
||||
uint32_t core_id_;
|
||||
const Arch& arch_;
|
||||
const DCRS &dcrs_;
|
||||
|
||||
const Decoder decoder_;
|
||||
MemoryUnit mmu_;
|
||||
RAM smem_;
|
||||
std::vector<TexUnit> tex_units_;
|
||||
|
||||
std::vector<std::shared_ptr<Warp>> warps_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<uint32_t> csrs_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<Byte> fcsrs_;
|
||||
std::vector<IBuffer> ibuffers_;
|
||||
Scoreboard scoreboard_;
|
||||
std::vector<Operand::Ptr> operands_;
|
||||
std::vector<Dispatcher::Ptr> dispatchers_;
|
||||
std::vector<ExeUnit::Ptr> exe_units_;
|
||||
Cache::Ptr icache_;
|
||||
Cache::Ptr dcache_;
|
||||
SharedMem::Ptr shared_mem_;
|
||||
Switch<MemReq, MemRsp>::Ptr l1_mem_switch_;
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> dcache_switch_;
|
||||
SharedMem::Ptr sharedmem_;
|
||||
|
||||
PipelineLatch fetch_latch_;
|
||||
PipelineLatch decode_latch_;
|
||||
|
||||
HashTable<pipeline_trace_t*> pending_icache_;
|
||||
std::vector<pipeline_trace_t*> committed_traces_;
|
||||
WarpMask active_warps_;
|
||||
WarpMask stalled_warps_;
|
||||
uint32_t last_schedule_wid_;
|
||||
uint64_t issued_instrs_;
|
||||
uint64_t committed_instrs_;
|
||||
uint32_t csr_tex_unit_;
|
||||
bool ecall_;
|
||||
bool ebreak_;
|
||||
bool exited_;
|
||||
|
||||
uint64_t pending_ifetches_;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
std::vector<std::vector<CSRs>> csrs_;
|
||||
|
||||
PerfStats perf_stats_;
|
||||
uint64_t perf_mem_pending_reads_;
|
||||
|
||||
Cluster* cluster_;
|
||||
|
||||
uint32_t commit_exe_;
|
||||
|
||||
friend class Warp;
|
||||
friend class LsuUnit;
|
||||
friend class AluUnit;
|
||||
friend class CsrUnit;
|
||||
friend class FpuUnit;
|
||||
friend class GpuUnit;
|
||||
friend class SfuUnit;
|
||||
friend class TexUnit;
|
||||
friend class RasterAgent;
|
||||
friend class RopAgent;
|
||||
friend class TexAgent;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
} // namespace vortex
|
||||
|
||||
Reference in New Issue
Block a user