Vortex 2.0 changes:

+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit c1e168fdbe
1309 changed files with 247412 additions and 311463 deletions
--- a/hw/unittest/mem_streamer/Makefile
+++ b/hw/unittest/mem_streamer/Makefile
@@ -0,0 +1,65 @@
+DESTDIR ?= .
+RTL_DIR = ../../rtl
+DPI_DIR = ../../dpi
+
+CONFIGS +=
+PARAMS +=
+
+CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
+CXXFLAGS += -fPIC -Wno-maybe-uninitialized
+CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
+CXXFLAGS += $(CONFIGS)
+
+LDFLAGS +=
+
+DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
+
+RTL_PKGS +=
+
+RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
+
+SRCS = memsim.cpp ram.cpp
+SRCS += $(DPI_DIR)/util_dpi.cpp
+
+TOP = VX_mem_scheduler
+
+VL_FLAGS = --exe
+VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
+VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
+VL_FLAGS += --x-initial unique --x-assign unique
+VL_FLAGS += -DSIMULATION
+VL_FLAGS += $(CONFIGS)
+VL_FLAGS += $(PARAMS)
+VL_FLAGS += $(RTL_INCLUDE)
+VL_FLAGS += $(RTL_PKGS)
+VL_FLAGS += --cc $(TOP) --top-module $(TOP)
+
+# Enable Verilator multithreaded simulation
+THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
+VL_FLAGS += -j $(THREADS)
+#VL_FLAGS += --threads $(THREADS)
+
+# Debugigng
+ifdef DEBUG
+	VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
+	CXXFLAGS += -g -O0 $(DBG_FLAGS)
+else    
+	VL_FLAGS += -DNDEBUG
+	CXXFLAGS += -O2 -DNDEBUG
+endif
+
+PROJECT = mem_streamer
+
+all: $(DESTDIR)/$(PROJECT)
+	
+$(DESTDIR)/$(PROJECT): $(SRCS)
+	verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
+
+run: $(DESTDIR)/$(PROJECT)
+	$(DESTDIR)/$(PROJECT)
+
+waves: trace.vcd
+	gtkwave -o trace.vcd
+
+clean:
+	rm -rf obj_dir $(DESTDIR)/$(PROJECT)
--- a/hw/unittest/mem_streamer/memsim.cpp
+++ b/hw/unittest/mem_streamer/memsim.cpp
@@ -0,0 +1,166 @@
+// Copyright © 2019-2023
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <random>
+#include "memsim.h"
+#include "ram.h"
+
+#ifndef TRACE_START_TIME
+#define TRACE_START_TIME 0ull
+#endif
+
+#ifndef TRACE_STOP_TIME
+#define TRACE_STOP_TIME -1ull
+#endif
+
+static bool trace_enabled = false;
+static uint64_t trace_start_time = 0;
+static uint64_t trace_stop_time = -1ull;
+static uint64_t timestamp = 0;
+
+double sc_time_stamp() { 
+  	return timestamp;
+}
+
+bool sim_trace_enabled() {
+	if (timestamp >= trace_start_time 
+	&& timestamp < trace_stop_time)
+			return true;
+	return trace_enabled;
+}
+
+void sim_trace_enable (bool enable) {
+  	trace_enabled = enable;
+}
+
+int generate_rand (int min, int max) {
+	int range = max - min + 1;
+	return rand() % range + min;
+}
+
+int generate_rand_mask (int mask) {
+	int result = 0;
+	int m = mask;
+	for (int i = 0; i < 4; i++) {
+		int bit = m & 0b1;
+		int rand_bit = generate_rand (0, bit);
+		result |= (rand_bit << i);
+		m = m >> 1;
+	}
+	return result;
+}
+
+MemSim::MemSim() {
+	msu_ = new VVX_mem_scheduler();
+
+	// Enable tracing
+	Verilated::traceEverOn(true);
+
+#ifdef VCD_OUTPUT
+  	Verilated::traceEverOn(true);
+  	trace_ = new VerilatedVcdC;
+  	cache_->trace(trace_, 99);
+  	race_->open("trace.vcd");
+#endif
+}
+
+MemSim::~MemSim() {
+#ifdef VCD_OUTPUT
+	trace_->close();
+#endif
+	delete msu_;
+}
+
+void MemSim::eval() {
+	msu_->eval();
+#ifdef VCD_OUTPUT
+	trace_->dump(timestamp++);
+#endif
+}
+
+void MemSim::step() {
+	msu_->clk = 0;
+	this->eval();
+
+	msu_->clk = 1;
+	this->eval();
+}
+
+void MemSim::reset() {
+	msu_->reset = 1;
+	this->step();
+
+	msu_->reset = 0;
+	this->step();
+}
+
+void MemSim::attach_core() {
+	if (msu_->req_ready) {
+		msu_->req_valid 	= generate_rand(0, 1);
+		msu_->req_rw 		= generate_rand(0, 1);
+		msu_->req_mask 		= generate_rand(0b0001, 0b1111);
+		msu_->req_byteen 	= 0b1;
+		msu_->req_addr 		= generate_rand(0, 0x10000000);
+		msu_->req_data 		= generate_rand(0x60000000, 0x80000000);
+		msu_->req_tag 		= generate_rand(0x00, 0xFF);
+	}
+	msu_->rsp_ready = true;
+}
+
+void MemSim::attach_ram (RAM *ram) {
+
+	req_t req;
+	req.valid 			= msu_->mem_req_valid;
+	req.rw 				= msu_->mem_req_rw;
+	req.byteen			= msu_->mem_req_byteen;
+	req.addr 			= msu_->mem_req_addr;
+	req.data 			= msu_->mem_req_data;
+	req.tag 			= msu_->mem_req_tag;
+	msu_->mem_req_ready = ram->is_ready();
+
+	ram->insert_req(req);
+
+	rsp_t rsp;
+	rsp = ram->schedule_rsp();
+
+	msu_->mem_rsp_valid = rsp.valid;
+	msu_->mem_rsp_data 	= rsp.data;
+	msu_->mem_rsp_tag 	= rsp.tag;
+	rsp.ready 			= msu_->mem_rsp_ready;
+	std::cout<<"MEMSIM: mem_rsp_ready: "<<rsp.ready<<"\n";
+
+	ram->halt_rsp(rsp);
+}
+
+void MemSim::run(RAM *ram) {
+	this->reset();
+
+	while (sc_time_stamp() < SIM_TIME) {
+		this->step();
+		std::cout<<"========================="<<"\n";
+		std::cout<<"Cycle: "<<sc_time_stamp()<<"\n";
+		this->attach_core();
+		this->attach_ram(ram);
+	}
+}
+
+int main (int argc, char** argv, char** env) {
+    Verilated::commandArgs(argc, argv);
+
+	MemSim memsim;
+	RAM ram;	
+
+	memsim.run(&ram);
+
+	return 0;
+}
--- a/hw/unittest/mem_streamer/memsim.h
+++ b/hw/unittest/mem_streamer/memsim.h
@@ -0,0 +1,49 @@
+// Copyright © 2019-2023
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+#include <verilated.h>
+#include <verilated_vcd_c.h>
+#include "VVX_mem_scheduler.h"
+#include "VVX_mem_scheduler__Syms.h"
+#include "ram.h"
+
+#define SIM_TIME 5000
+
+int generate_rand (int min, int max);
+int generate_rand_mask (int mask);
+
+class MemSim {
+public:
+    MemSim();
+    virtual ~MemSim();
+
+    void run(RAM *ram);
+
+private:
+    VVX_mem_scheduler *msu_;
+#ifdef VCD_OUTPUT
+    VerilatedVcdC *trace_;
+#endif
+
+    void eval();
+    void step();
+    void reset();
+
+    void attach_core();
+    void attach_ram(RAM *ram);
+};
--- a/hw/unittest/mem_streamer/ram.cpp
+++ b/hw/unittest/mem_streamer/ram.cpp
@@ -0,0 +1,123 @@
+// Copyright © 2019-2023
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ram.h"
+#include "memsim.h"
+
+RAM::RAM() {
+
+    ram_.clear();
+    is_rsp_active_ = false;
+    is_rsp_stall_ = false;
+}
+
+bool RAM::check_duplicate_req(req_t req) {
+    for(int i = 0; i < ram_.size(); i++) {
+        if (ram_[i].addr == req.addr) {
+            std::cout<<"RAM: Duplicate entry. Do not insert..."<<std::endl;
+            return true;
+        }
+    }
+    return false;
+}
+
+int RAM::simulate_cycle_delay() {
+
+    std::cout<<"RAM: # entries: "<<ram_.size()<<std::endl;
+
+    int dequeue_index = -1;
+
+    for (int i = 0; i < ram_.size(); i++) {
+        if (!is_rsp_stall_) {
+            if (ram_[i].cycles_left > 0) {
+                ram_[i].cycles_left -= 1;
+            }
+        }
+
+        std::cout<<"RAM: # cycles left: "<<ram_[i].cycles_left<<std::endl;
+
+        if (ram_[i].cycles_left == 0) {
+            dequeue_index = i;
+        }
+    }
+    return dequeue_index;
+}
+
+void RAM::insert_req(req_t req) {
+    if ( !(this->check_duplicate_req(req)) && req.valid && !req.rw) {
+        req_t r;
+        r.valid     = req.valid;
+        r.rw        = req.rw;
+        r.byteen    = req.byteen;
+        r.addr      = req.addr;
+        r.data      = req.data;
+        r.tag       = req.tag & 0b11;
+
+        // Store metadata
+        r.cycles_left = MEM_LATENCY;
+
+        std::cout<<"RAM: Insert entry... "<<std::endl;
+        std::cout<<"Write? : "<<req.rw<<std::endl;
+        ram_.push_back(r);
+    }
+}
+
+uint8_t RAM::is_ready() {    
+    // return generate_rand(0b1000, 0b1111);
+    return 0b1111;
+}
+
+rsp_t RAM::schedule_rsp() {
+    rsp_t rsp;
+    int dequeue_index = this->simulate_cycle_delay();
+
+    if (!is_rsp_active_) {
+        if (dequeue_index != -1) {
+
+            std::cout<<"RAM: Scheduling response... "<<std::endl;
+
+            is_rsp_active_ = true;
+            rsp.valid   = 1;
+            rsp.mask    = generate_rand_mask(ram_[dequeue_index].valid);
+            rsp.data    = generate_rand(0x20000000, 0x30000000);
+            rsp.tag     = ram_[dequeue_index].tag;
+
+            std::cout<<std::hex;
+            std::cout<<"RAM: Response mask: "<<+rsp.mask<<" | Required mask: "<<+ram_[dequeue_index].valid<<std::endl;
+
+            ram_[dequeue_index].rsp_sent_mask = rsp.mask;
+            ram_[dequeue_index].valid = ram_[dequeue_index].valid & ~ram_[dequeue_index].rsp_sent_mask;
+
+            if (0 == ram_[dequeue_index].valid) {
+                ram_.erase(ram_.begin() + dequeue_index);
+                is_rsp_stall_ = false;
+                std::cout<<"RAM: Clear entry... "<<std::endl;
+            } else {
+                is_rsp_stall_ = true;
+                std::cout<<"RAM: Stall... "<<std::endl;
+            }
+        } else {
+            rsp.valid = false;
+        }
+    } 
+    
+    return rsp;
+}
+
+// Schedule response for only one cycle
+void RAM::halt_rsp(rsp_t rsp) {
+    if (is_rsp_active_ && rsp.valid && rsp.ready) {
+        std::cout<<"RAM: Halt response..."<<std::endl;
+        is_rsp_active_ = false;
+    }
+}
--- a/hw/unittest/mem_streamer/ram.h
+++ b/hw/unittest/mem_streamer/ram.h
@@ -0,0 +1,64 @@
+// Copyright © 2019-2023
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <vector>
+
+#define MEM_LATENCY 4
+
+typedef struct {
+    uint8_t     valid;
+    bool        rw;
+    uint8_t     byteen;
+    uint32_t    addr;
+    uint32_t    data;
+    uint8_t     tag;
+    uint8_t     ready;
+
+    // Metadata
+    uint8_t     rsp_sent_mask;
+    double      cycles_left;
+} req_t;
+
+typedef struct {
+    bool        valid;
+    uint8_t     mask;
+    uint32_t    data;
+    uint8_t     tag;
+    bool        ready;
+} rsp_t;
+
+class RAM {
+
+    private:
+        std::vector<req_t> ram_;
+
+        bool is_rsp_active_;
+        bool is_rsp_stall_;
+
+        bool    check_duplicate_req(req_t req);
+        int     simulate_cycle_delay();
+    
+    public:
+        RAM();
+        
+        uint8_t is_ready();
+        void    insert_req(req_t req);
+        rsp_t   schedule_rsp();
+        void    halt_rsp(rsp_t rsp);
+
+};
+
+//////////////////////////////////////////////////////