Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit d47cccc157
1300 changed files with 247321 additions and 311189 deletions

View File

@@ -1,3 +1,16 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
@@ -11,101 +24,104 @@
#include <simobject.h>
#include "debug.h"
#include "types.h"
#include "archdef.h"
#include "arch.h"
#include "decode.h"
#include "mem.h"
#include "warp.h"
#include "pipeline.h"
#include "cache.h"
#include "sharedmem.h"
#include "cache_sim.h"
#include "shared_mem.h"
#include "ibuffer.h"
#include "scoreboard.h"
#include "exeunit.h"
#include "tex_unit.h"
#include "operand.h"
#include "dispatcher.h"
#include "exe_unit.h"
#include "dcrs.h"
namespace vortex {
class Cluster;
class Core : public SimObject<Core> {
public:
struct PerfStats {
uint64_t cycles;
uint64_t instrs;
uint64_t ibuf_stalls;
uint64_t scrb_stalls;
uint64_t alu_stalls;
uint64_t lsu_stalls;
uint64_t csr_stalls;
uint64_t fpu_stalls;
uint64_t gpu_stalls;
uint64_t sfu_stalls;
uint64_t ifetches;
uint64_t loads;
uint64_t stores;
uint64_t branches;
uint64_t mem_reads;
uint64_t mem_writes;
uint64_t mem_latency;
uint64_t tex_reads;
uint64_t tex_latency;
uint64_t ifetch_latency;
uint64_t load_latency;
PerfStats()
: instrs(0)
: cycles(0)
, instrs(0)
, ibuf_stalls(0)
, scrb_stalls(0)
, alu_stalls(0)
, lsu_stalls(0)
, csr_stalls(0)
, fpu_stalls(0)
, gpu_stalls(0)
, sfu_stalls(0)
, ifetches(0)
, loads(0)
, stores(0)
, branches(0)
, mem_reads(0)
, mem_writes(0)
, mem_latency(0)
, tex_reads(0)
, tex_latency(0)
, ifetch_latency(0)
, load_latency(0)
{}
};
SimPort<MemRsp> MemRspPort;
SimPort<MemReq> MemReqPort;
std::vector<SimPort<MemReq>> icache_req_ports;
std::vector<SimPort<MemRsp>> icache_rsp_ports;
std::vector<SimPort<MemReq>> dcache_req_ports;
std::vector<SimPort<MemRsp>> dcache_rsp_ports;
Core(const SimContext& ctx,
uint32_t core_id,
Cluster* cluster,
const Arch &arch,
const DCRS &dcrs,
SharedMem::Ptr sharedmem);
Core(const SimContext& ctx, const ArchDef &arch, uint32_t id);
~Core();
void attach_ram(RAM* ram);
bool running() const;
void reset();
void tick();
void attach_ram(RAM* ram);
bool running() const;
void resume();
uint32_t id() const {
return id_;
return core_id_;
}
const Decoder& decoder() {
return decoder_;
}
const ArchDef& arch() const {
const Arch& arch() const {
return arch_;
}
const PerfStats& perf_stats() const {
return perf_stats_;
}
uint32_t getIRegValue(int reg) const {
return warps_.at(0)->getIRegValue(reg);
const DCRS& dcrs() const {
return dcrs_;
}
uint32_t get_csr(uint32_t addr, uint32_t tid, uint32_t wid);
void set_csr(uint32_t addr, uint32_t value, uint32_t tid, uint32_t wid);
WarpMask wspawn(uint32_t num_warps, uint32_t nextPC);
void wspawn(uint32_t num_warps, Word nextPC);
WarpMask barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
void barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
AddrType get_addr_type(uint64_t addr);
void icache_read(void* data, uint64_t addr, uint32_t size);
@@ -113,19 +129,22 @@ public:
void dcache_write(const void* data, uint64_t addr, uint32_t size);
uint32_t tex_read(uint32_t unit, uint32_t lod, uint32_t u, uint32_t v, std::vector<mem_addr_size_t>* mem_addrs);
void dcache_amo_reserve(uint64_t addr);
bool dcache_amo_check(uint64_t addr);
void trigger_ecall();
void trigger_ebreak();
bool check_exit() const;
bool check_exit(Word* exitcode, bool riscv_test) const;
private:
void schedule();
void fetch();
void decode();
void issue();
void execute();
void commit();
@@ -133,49 +152,55 @@ private:
void cout_flush();
uint32_t id_;
const ArchDef arch_;
uint32_t core_id_;
const Arch& arch_;
const DCRS &dcrs_;
const Decoder decoder_;
MemoryUnit mmu_;
RAM smem_;
std::vector<TexUnit> tex_units_;
std::vector<std::shared_ptr<Warp>> warps_;
std::vector<WarpMask> barriers_;
std::vector<uint32_t> csrs_;
std::vector<WarpMask> barriers_;
std::vector<Byte> fcsrs_;
std::vector<IBuffer> ibuffers_;
Scoreboard scoreboard_;
std::vector<Operand::Ptr> operands_;
std::vector<Dispatcher::Ptr> dispatchers_;
std::vector<ExeUnit::Ptr> exe_units_;
Cache::Ptr icache_;
Cache::Ptr dcache_;
SharedMem::Ptr shared_mem_;
Switch<MemReq, MemRsp>::Ptr l1_mem_switch_;
std::vector<Switch<MemReq, MemRsp>::Ptr> dcache_switch_;
SharedMem::Ptr sharedmem_;
PipelineLatch fetch_latch_;
PipelineLatch decode_latch_;
HashTable<pipeline_trace_t*> pending_icache_;
std::vector<pipeline_trace_t*> committed_traces_;
WarpMask active_warps_;
WarpMask stalled_warps_;
uint32_t last_schedule_wid_;
uint64_t issued_instrs_;
uint64_t committed_instrs_;
uint32_t csr_tex_unit_;
bool ecall_;
bool ebreak_;
bool exited_;
uint64_t pending_ifetches_;
std::unordered_map<int, std::stringstream> print_bufs_;
std::vector<std::vector<CSRs>> csrs_;
PerfStats perf_stats_;
uint64_t perf_mem_pending_reads_;
Cluster* cluster_;
uint32_t commit_exe_;
friend class Warp;
friend class LsuUnit;
friend class AluUnit;
friend class CsrUnit;
friend class FpuUnit;
friend class GpuUnit;
friend class SfuUnit;
friend class TexUnit;
friend class RasterAgent;
friend class RopAgent;
friend class TexAgent;
};
} // namespace vortex
} // namespace vortex