Vortex 2.0 changes:

+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit d47cccc157
1300 changed files with 247321 additions and 311189 deletions
@@ -1,3 +1,16 @@
+// Copyright © 2019-2023
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #pragma once

 #include <string>
@@ -11,101 +24,104 @@
 #include <simobject.h>
 #include "debug.h"
 #include "types.h"
-#include "archdef.h"
+#include "arch.h"
 #include "decode.h"
 #include "mem.h"
 #include "warp.h"
 #include "pipeline.h"
-#include "cache.h"
-#include "sharedmem.h"
+#include "cache_sim.h"
+#include "shared_mem.h"
 #include "ibuffer.h"
 #include "scoreboard.h"
-#include "exeunit.h"
-#include "tex_unit.h"
+#include "operand.h"
+#include "dispatcher.h"
+#include "exe_unit.h"
+#include "dcrs.h"

 namespace vortex {

+class Cluster;
+
 class Core : public SimObject<Core> {
 public:
  struct PerfStats {
+    uint64_t cycles;
    uint64_t instrs;
    uint64_t ibuf_stalls;
    uint64_t scrb_stalls;
    uint64_t alu_stalls;
    uint64_t lsu_stalls;
-    uint64_t csr_stalls;
    uint64_t fpu_stalls;
-    uint64_t gpu_stalls;
+    uint64_t sfu_stalls;
+    uint64_t ifetches;
    uint64_t loads;
    uint64_t stores;
-    uint64_t branches;
-    uint64_t mem_reads;
-    uint64_t mem_writes;
-    uint64_t mem_latency;
-    uint64_t tex_reads;
-    uint64_t tex_latency;
+    uint64_t ifetch_latency;
+    uint64_t load_latency;

    PerfStats() 
-      : instrs(0)
+      : cycles(0)
+      , instrs(0)
      , ibuf_stalls(0)
      , scrb_stalls(0)
      , alu_stalls(0)
      , lsu_stalls(0)
-      , csr_stalls(0)
      , fpu_stalls(0)
-      , gpu_stalls(0)
+      , sfu_stalls(0)
+      , ifetches(0)
      , loads(0)
      , stores(0)
-      , branches(0)
-      , mem_reads(0)
-      , mem_writes(0)
-      , mem_latency(0)
-      , tex_reads(0)
-      , tex_latency(0)
+      , ifetch_latency(0)
+      , load_latency(0)
    {}
  };

-  SimPort<MemRsp> MemRspPort;
-  SimPort<MemReq> MemReqPort;
+  std::vector<SimPort<MemReq>> icache_req_ports;
+  std::vector<SimPort<MemRsp>> icache_rsp_ports;
+
+  std::vector<SimPort<MemReq>> dcache_req_ports;
+  std::vector<SimPort<MemRsp>> dcache_rsp_ports;
+
+  Core(const SimContext& ctx, 
+       uint32_t core_id, 
+       Cluster* cluster,
+       const Arch &arch, 
+       const DCRS &dcrs,
+       SharedMem::Ptr  sharedmem);

-  Core(const SimContext& ctx, const ArchDef &arch, uint32_t id);
  ~Core();

-  void attach_ram(RAM* ram);
-
-  bool running() const;
-
  void reset();

  void tick();

+  void attach_ram(RAM* ram);
+
+  bool running() const;
+
+  void resume();
+
  uint32_t id() const {
-    return id_;
+    return core_id_;
  }

-  const Decoder& decoder() {
-    return decoder_;
-  }
-
-  const ArchDef& arch() const {
+  const Arch& arch() const {
    return arch_;
  }

-  const PerfStats& perf_stats() const {
-    return perf_stats_;
-  } 
-
-  uint32_t getIRegValue(int reg) const {
-    return warps_.at(0)->getIRegValue(reg);
+  const DCRS& dcrs() const {
+    return dcrs_;
  }

  uint32_t get_csr(uint32_t addr, uint32_t tid, uint32_t wid);
  
  void set_csr(uint32_t addr, uint32_t value, uint32_t tid, uint32_t wid);

-  WarpMask wspawn(uint32_t num_warps, uint32_t nextPC);
+  void wspawn(uint32_t num_warps, Word nextPC);
  
-  WarpMask barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
+  void barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
+
+  AddrType get_addr_type(uint64_t addr);

  void icache_read(void* data, uint64_t addr, uint32_t size);

@@ -113,19 +129,22 @@ public:

  void dcache_write(const void* data, uint64_t addr, uint32_t size);

-  uint32_t tex_read(uint32_t unit, uint32_t lod, uint32_t u, uint32_t v, std::vector<mem_addr_size_t>* mem_addrs);
+  void dcache_amo_reserve(uint64_t addr);
+
+  bool dcache_amo_check(uint64_t addr);

  void trigger_ecall();

  void trigger_ebreak();

-  bool check_exit() const;
+  bool check_exit(Word* exitcode, bool riscv_test) const;

 private:

  void schedule();
  void fetch();
  void decode();
+  void issue();
  void execute();
  void commit();
  
@@ -133,49 +152,55 @@ private:

  void cout_flush();

-  uint32_t id_;
-  const ArchDef arch_;
+  uint32_t core_id_;
+  const Arch& arch_;
+  const DCRS &dcrs_;
+  
  const Decoder decoder_;
  MemoryUnit mmu_;
-  RAM smem_;
-  std::vector<TexUnit> tex_units_;

  std::vector<std::shared_ptr<Warp>> warps_;  
-  std::vector<WarpMask> barriers_;  
-  std::vector<uint32_t> csrs_;
+  std::vector<WarpMask> barriers_;
  std::vector<Byte> fcsrs_;
  std::vector<IBuffer> ibuffers_;
  Scoreboard scoreboard_;
+  std::vector<Operand::Ptr> operands_;
+  std::vector<Dispatcher::Ptr> dispatchers_;
  std::vector<ExeUnit::Ptr> exe_units_;
-  Cache::Ptr icache_;
-  Cache::Ptr dcache_;
-  SharedMem::Ptr shared_mem_;
-  Switch<MemReq, MemRsp>::Ptr l1_mem_switch_;
-  std::vector<Switch<MemReq, MemRsp>::Ptr> dcache_switch_;
+  SharedMem::Ptr sharedmem_;

  PipelineLatch fetch_latch_;
  PipelineLatch decode_latch_;
  
  HashTable<pipeline_trace_t*> pending_icache_;
+  std::vector<pipeline_trace_t*> committed_traces_;
  WarpMask active_warps_;
  WarpMask stalled_warps_;
-  uint32_t last_schedule_wid_;
  uint64_t issued_instrs_;
  uint64_t committed_instrs_;
-  uint32_t csr_tex_unit_;
-  bool ecall_;
-  bool ebreak_;
+  bool exited_;
+
+  uint64_t pending_ifetches_;

  std::unordered_map<int, std::stringstream> print_bufs_;
+
+  std::vector<std::vector<CSRs>> csrs_;
  
  PerfStats perf_stats_;
-  uint64_t perf_mem_pending_reads_;
+  
+  Cluster* cluster_;

+  uint32_t commit_exe_;
+
+  friend class Warp;
  friend class LsuUnit;
  friend class AluUnit;
-  friend class CsrUnit;
  friend class FpuUnit;
-  friend class GpuUnit;
+  friend class SfuUnit;
+  friend class TexUnit;
+  friend class RasterAgent;
+  friend class RopAgent;
+  friend class TexAgent;
 };

-} // namespace vortex
+} // namespace vortex