Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
3
hw/.gitignore
vendored
3
hw/.gitignore
vendored
@@ -1 +1,2 @@
|
||||
obj_dir/*
|
||||
VX_config.h
|
||||
VX_types.h
|
||||
13
hw/Makefile
13
hw/Makefile
@@ -1,12 +1,17 @@
|
||||
RTL_DIR=./rtl
|
||||
SCRIPT_DIR=./scripts
|
||||
|
||||
all: VX_config.h
|
||||
all: config
|
||||
|
||||
config: VX_config.h VX_types.h
|
||||
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
|
||||
|
||||
clean:
|
||||
rm -f VX_config.h
|
||||
VX_types.h: $(RTL_DIR)/VX_types.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h
|
||||
|
||||
.PHONY: VX_config.h
|
||||
clean:
|
||||
rm -f VX_config.h VX_types.h
|
||||
|
||||
.PHONY: VX_config.h VX_types.h
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <unordered_map>
|
||||
@@ -5,167 +18,323 @@
|
||||
#include <mutex>
|
||||
#include <iostream>
|
||||
#include <rvfloats.h>
|
||||
#include <util.h>
|
||||
#include "svdpi.h"
|
||||
#include "verilated_vpi.h"
|
||||
#include "VX_config.h"
|
||||
|
||||
extern "C" {
|
||||
void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
|
||||
void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
|
||||
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
|
||||
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
|
||||
void dpi_fclss(bool enable, int a, int* result);
|
||||
void dpi_fsgnj(bool enable, int a, int b, int* result);
|
||||
void dpi_fsgnjn(bool enable, int a, int b, int* result);
|
||||
void dpi_fsgnjx(bool enable, int a, int b, int* result);
|
||||
|
||||
void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags);
|
||||
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
}
|
||||
|
||||
void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fadd_s(a, b, (*frm & 0x7), fflags);
|
||||
inline uint64_t nan_box(uint32_t value) {
|
||||
#ifdef FPU_RV64F
|
||||
return value | 0xffffffff00000000;
|
||||
#else
|
||||
return value;
|
||||
#endif
|
||||
}
|
||||
|
||||
void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsub_s(a, b, (*frm & 0x7), fflags);
|
||||
inline bool is_nan_boxed(uint64_t value) {
|
||||
#ifdef FPU_RV64F
|
||||
return (uint32_t(value >> 32) == 0xffffffff);
|
||||
#else
|
||||
__unused (value);
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmul_s(a, b, (*frm & 0x7), fflags);
|
||||
inline int64_t check_boxing(int64_t a) {
|
||||
if (!is_nan_boxed(a)) {
|
||||
return nan_box(0x7fc00000); // NaN
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmadd_s(a, b, c, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fadd_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fadd_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmsub_s(a, b, c, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsub_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fsub_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fnmadd_s(a, b, c, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmul_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fnmsub_s(a, b, c, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fdiv_s(a, b, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsqrt_s(a, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_ftoi_s(a, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_ftou_s(a, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fdiv_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_itof_s(a, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fsqrt_s(check_boxing(a), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
|
||||
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_utof_s(a, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftol_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftol_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftoi_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftoi_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_flt_s(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftolu_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftolu_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftou_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fle_s(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ltof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_itof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_feq_s(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_lutof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_utof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_lutof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_utof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmin_s(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_ftod((int32_t)check_boxing(a));
|
||||
} else {
|
||||
*result = nan_box(rv_dtof(a));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fmax_s(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fclss_d(a);
|
||||
} else {
|
||||
*result = rv_fclss_s(check_boxing(a));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fclss(bool enable, int a, int* result) {
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fclss_s(a);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnj_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnj_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnj(bool enable, int a, int b, int* result) {
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsgnj_s(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjn_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjn_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(bool enable, int a, int b, int* result) {
|
||||
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsgnjn_s(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjx_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjx_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(bool enable, int a, int b, int* result) {
|
||||
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
*result = rv_fsgnjx_s(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_flt_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_flt_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fle_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_fle_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_feq_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_feq_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmin_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmin_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmax_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmax_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
}
|
||||
@@ -1,31 +1,47 @@
|
||||
`ifndef FLOAT_DPI
|
||||
`define FLOAT_DPI
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import "DPI-C" function void dpi_fadd(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsub(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmul(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmadd(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmsub(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fnmadd(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fnmsub(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
`ifndef FLOAT_DPI_VH
|
||||
`define FLOAT_DPI_VH
|
||||
|
||||
import "DPI-C" function void dpi_fdiv(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsqrt(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
`include "VX_config.vh"
|
||||
|
||||
import "DPI-C" function void dpi_ftoi(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_ftou(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_itof(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_utof(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmadd(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmsub(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fnmadd(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fnmsub(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" function void dpi_fclss(input logic enable, input int a, output int result);
|
||||
import "DPI-C" function void dpi_fsgnj(input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" function void dpi_fsgnjn(input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" function void dpi_fsgnjx(input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" function void dpi_fdiv(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsqrt(input logic enable, input int dst_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" function void dpi_flt(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fle(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_feq(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmin(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmax(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_ftoi(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_ftou(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_itof(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_utof(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_f2f(input logic enable, input int dst_fmt, input longint a, output longint result);
|
||||
|
||||
`endif
|
||||
import "DPI-C" function void dpi_fclss(input logic enable, input int dst_fmt, input longint a, output longint result);
|
||||
import "DPI-C" function void dpi_fsgnj(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
|
||||
import "DPI-C" function void dpi_fsgnjn(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
|
||||
import "DPI-C" function void dpi_fsgnjx(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
|
||||
|
||||
import "DPI-C" function void dpi_flt(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fle(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_feq(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmin(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmax(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
|
||||
`endif
|
||||
|
||||
@@ -1,23 +1,57 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <iostream>
|
||||
|
||||
#include "svdpi.h"
|
||||
#include "verilated_vpi.h"
|
||||
#include "VX_config.h"
|
||||
|
||||
#include "uuid_gen.h"
|
||||
|
||||
#ifdef XLEN_64
|
||||
#define iword_t int64_t
|
||||
#define uword_t uint64_t
|
||||
#define idword_t __int128_t
|
||||
#define udword_t __uint128_t
|
||||
#else
|
||||
#define iword_t int32_t
|
||||
#define uword_t uint32_t
|
||||
#define idword_t int64_t
|
||||
#define udword_t uint64_t
|
||||
#endif
|
||||
|
||||
#ifndef DEBUG_LEVEL
|
||||
#define DEBUG_LEVEL 3
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
void dpi_imul(bool enable, int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth);
|
||||
void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* remainder);
|
||||
void dpi_imul(bool enable, bool is_signed_a, bool is_signed_b, iword_t a, iword_t b, iword_t* resultl, iword_t* resulth);
|
||||
void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotient, iword_t* remainder);
|
||||
|
||||
int dpi_register();
|
||||
void dpi_assert(int inst, bool cond, int delay);
|
||||
|
||||
void dpi_trace(const char* format, ...);
|
||||
void dpi_trace(int level, const char* format, ...);
|
||||
void dpi_trace_start();
|
||||
void dpi_trace_stop();
|
||||
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid, uint64_t PC);
|
||||
}
|
||||
|
||||
bool sim_trace_enabled();
|
||||
@@ -93,49 +127,54 @@ void dpi_assert(int inst, bool cond, int delay) {
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_imul(bool enable, int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth) {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void dpi_imul(bool enable, bool is_signed_a, bool is_signed_b, iword_t a, iword_t b, iword_t* resultl, iword_t* resulth) {
|
||||
if (!enable)
|
||||
return;
|
||||
udword_t first = *(uword_t*)&a;
|
||||
udword_t second = *(uword_t*)&b;
|
||||
|
||||
udword_t mask = udword_t(-1) << (8 * sizeof(iword_t));
|
||||
|
||||
uint64_t first = *(uint32_t*)&a;
|
||||
uint64_t second = *(uint32_t*)&b;
|
||||
|
||||
if (is_signed_a && (first & 0x80000000)) {
|
||||
first |= 0xFFFFFFFF00000000;
|
||||
if (is_signed_a && a < 0) {
|
||||
first |= mask;
|
||||
}
|
||||
|
||||
if (is_signed_b && (second & 0x80000000)) {
|
||||
second |= 0xFFFFFFFF00000000;
|
||||
if (is_signed_b && b < 0) {
|
||||
second |= mask;
|
||||
}
|
||||
|
||||
uint64_t result;
|
||||
udword_t result;
|
||||
if (is_signed_a || is_signed_b) {
|
||||
result = (int64_t)first * (int64_t)second;
|
||||
result = idword_t(first) * idword_t(second);
|
||||
} else {
|
||||
result = first * second;
|
||||
}
|
||||
|
||||
*resultl = result & 0xFFFFFFFF;
|
||||
*resulth = (result >> 32) & 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
*resultl = iword_t(result);
|
||||
*resulth = iword_t(result >> (8 * sizeof(iword_t)));
|
||||
}
|
||||
|
||||
void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* remainder) {
|
||||
void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotient, iword_t* remainder) {
|
||||
if (!enable)
|
||||
return;
|
||||
|
||||
uint32_t dividen = *(uint32_t*)&a;
|
||||
uint32_t divisor = *(uint32_t*)&b;
|
||||
uword_t dividen = a;
|
||||
uword_t divisor = b;
|
||||
|
||||
auto inf_neg = uword_t(1) << (XLEN-1);
|
||||
|
||||
if (is_signed) {
|
||||
if (b == 0) {
|
||||
*quotient = -1;
|
||||
*remainder = dividen;
|
||||
} else if (dividen == 0x80000000 && divisor == 0xffffffff) {
|
||||
} else if (dividen == inf_neg && divisor == -1) {
|
||||
*remainder = 0;
|
||||
*quotient = dividen;
|
||||
} else {
|
||||
*quotient = (int32_t)dividen / (int32_t)divisor;
|
||||
*remainder = (int32_t)dividen % (int32_t)divisor;
|
||||
*quotient = (iword_t)dividen / (iword_t)divisor;
|
||||
*remainder = (iword_t)dividen % (iword_t)divisor;
|
||||
}
|
||||
} else {
|
||||
if (b == 0) {
|
||||
@@ -148,7 +187,11 @@ void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* rem
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_trace(const char* format, ...) {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void dpi_trace(int level, const char* format, ...) {
|
||||
if (level > DEBUG_LEVEL)
|
||||
return;
|
||||
if (!sim_trace_enabled())
|
||||
return;
|
||||
va_list va;
|
||||
@@ -163,4 +206,28 @@ void dpi_trace_start() {
|
||||
|
||||
void dpi_trace_stop() {
|
||||
sim_trace_enable(false);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::unordered_map<uint32_t, std::shared_ptr<vortex::UUIDGenerator>> g_uuid_gens;
|
||||
|
||||
uint64_t dpi_uuid_gen(bool reset, int wid, uint64_t PC) {
|
||||
if (reset) {
|
||||
g_uuid_gens.clear();
|
||||
return 0;
|
||||
}
|
||||
std::shared_ptr<vortex::UUIDGenerator> uuid_gen;
|
||||
auto it = g_uuid_gens.find(wid);
|
||||
if (it == g_uuid_gens.end()) {
|
||||
uuid_gen = std::make_shared<vortex::UUIDGenerator>();
|
||||
g_uuid_gens.emplace(wid, uuid_gen);
|
||||
} else {
|
||||
uuid_gen = it->second;
|
||||
}
|
||||
uint32_t instr_uuid = uuid_gen->get_uuid(PC);
|
||||
uint32_t instr_id = instr_uuid & 0xffff;
|
||||
uint32_t instr_ref = instr_uuid >> 16;
|
||||
uint64_t uuid = (uint64_t(instr_ref) << 32) | (wid << 16) | instr_id;
|
||||
return uuid;
|
||||
}
|
||||
@@ -1,14 +1,37 @@
|
||||
`ifndef UTIL_DPI
|
||||
`define UTIL_DPI
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import "DPI-C" function void dpi_imul(input logic enable, input int a, input int b, input logic is_signed_a, input logic is_signed_b, output int resultl, output int resulth);
|
||||
import "DPI-C" function void dpi_idiv(input logic enable, input int a, input int b, input logic is_signed, output int quotient, output int remainder);
|
||||
`ifndef UTIL_DPI_VH
|
||||
`define UTIL_DPI_VH
|
||||
|
||||
`include "VX_config.vh"
|
||||
|
||||
`ifdef XLEN_64
|
||||
`define INT_TYPE longint
|
||||
`else
|
||||
`define INT_TYPE int
|
||||
`endif
|
||||
|
||||
import "DPI-C" function void dpi_imul(input logic enable, input logic is_signed_a, input logic is_signed_b, input `INT_TYPE a, input `INT_TYPE b, output `INT_TYPE resultl, output `INT_TYPE resulth);
|
||||
import "DPI-C" function void dpi_idiv(input logic enable, input logic is_signed, input `INT_TYPE a, input `INT_TYPE b, output `INT_TYPE quotient, output `INT_TYPE remainder);
|
||||
|
||||
import "DPI-C" function int dpi_register();
|
||||
import "DPI-C" function void dpi_assert(int inst, input logic cond, input int delay);
|
||||
|
||||
import "DPI-C" function void dpi_trace(input string format /*verilator sformat*/);
|
||||
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
|
||||
import "DPI-C" function void dpi_trace_start();
|
||||
import "DPI-C" function void dpi_trace_stop();
|
||||
|
||||
`endif
|
||||
import "DPI-C" function longint dpi_uuid_gen(input logic reset, input int wid, input longint PC);
|
||||
|
||||
`endif
|
||||
|
||||
1
hw/rtl/.gitignore
vendored
1
hw/rtl/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
/VX_user_config.vh
|
||||
@@ -1,235 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_alu_req_if.slave alu_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_branch_ctl_if.master branch_ctl_if,
|
||||
VX_commit_if.master alu_commit_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] add_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
||||
|
||||
wire ready_in;
|
||||
|
||||
`UNUSED_VAR (alu_req_if.op_mod)
|
||||
wire is_br_op = `INST_ALU_IS_BR(alu_req_if.op_mod);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(alu_req_if.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(alu_req_if.op_type);
|
||||
wire alu_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire [1:0] alu_op_class = `INST_ALU_OP_CLASS(alu_op);
|
||||
wire is_sub = (alu_op == `INST_ALU_SUB);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
|
||||
assign sub_result[i] = sub_in1 - sub_in2;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
assign shr_result[i] = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`INST_ALU_SLL,
|
||||
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (alu_op_class)
|
||||
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
2'b01: alu_result[i] = {31'b0, sub_result[i][32]}; // SLTU, SLT
|
||||
2'b10: alu_result[i] = is_sub ? sub_result[i][31:0] // SUB
|
||||
: shr_result[i]; // SRL, SRA
|
||||
// 2'b11,
|
||||
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// branch
|
||||
|
||||
wire is_jal = is_br_op && (br_op == `INST_BR_JAL || br_op == `INST_BR_JALR);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
||||
|
||||
wire [31:0] br_dest = add_result[alu_req_if.tid];
|
||||
wire [32:0] cmp_result = sub_result[alu_req_if.tid];
|
||||
|
||||
wire is_less = cmp_result[32];
|
||||
wire is_equal = ~(| cmp_result[31:0]);
|
||||
|
||||
// output
|
||||
|
||||
wire alu_valid_in;
|
||||
wire alu_ready_in;
|
||||
wire alu_valid_out;
|
||||
wire alu_ready_out;
|
||||
wire [`UUID_BITS-1:0] alu_uuid;
|
||||
wire [`NW_BITS-1:0] alu_wid;
|
||||
wire [`NUM_THREADS-1:0] alu_tmask;
|
||||
wire [31:0] alu_PC;
|
||||
wire [`NR_BITS-1:0] alu_rd;
|
||||
wire alu_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_data;
|
||||
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire [31:0] br_dest_r;
|
||||
wire is_less_r;
|
||||
wire is_equal_r;
|
||||
wire is_br_op_r;
|
||||
|
||||
assign alu_ready_in = alu_ready_out || ~alu_valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (alu_ready_in),
|
||||
.data_in ({alu_valid_in, alu_req_if.uuid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, is_less, is_equal, br_dest}),
|
||||
.data_out ({alu_valid_out, alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data, is_br_op_r, br_op_r, is_less_r, is_equal_r, br_dest_r})
|
||||
);
|
||||
|
||||
`UNUSED_VAR (br_op_r)
|
||||
wire br_neg = `INST_BR_NEG(br_op_r);
|
||||
wire br_less = `INST_BR_LESS(br_op_r);
|
||||
wire br_static = `INST_BR_STATIC(br_op_r);
|
||||
|
||||
assign branch_ctl_if.valid = alu_valid_out && alu_ready_out && is_br_op_r;
|
||||
assign branch_ctl_if.taken = ((br_less ? is_less_r : is_equal_r) ^ br_neg) | br_static;
|
||||
assign branch_ctl_if.wid = alu_wid;
|
||||
assign branch_ctl_if.dest = br_dest_r;
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
wire mul_valid_in;
|
||||
wire mul_ready_in;
|
||||
wire mul_valid_out;
|
||||
wire mul_ready_out;
|
||||
wire [`UUID_BITS-1:0] mul_uuid;
|
||||
wire [`NW_BITS-1:0] mul_wid;
|
||||
wire [`NUM_THREADS-1:0] mul_tmask;
|
||||
wire [31:0] mul_PC;
|
||||
wire [`NR_BITS-1:0] mul_rd;
|
||||
wire mul_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_data;
|
||||
|
||||
wire [`INST_MUL_BITS-1:0] mul_op = `INST_MUL_BITS'(alu_req_if.op_type);
|
||||
|
||||
VX_muldiv muldiv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Inputs
|
||||
.alu_op (mul_op),
|
||||
.uuid_in (alu_req_if.uuid),
|
||||
.wid_in (alu_req_if.wid),
|
||||
.tmask_in (alu_req_if.tmask),
|
||||
.PC_in (alu_req_if.PC),
|
||||
.rd_in (alu_req_if.rd),
|
||||
.wb_in (alu_req_if.wb),
|
||||
.alu_in1 (alu_req_if.rs1_data),
|
||||
.alu_in2 (alu_req_if.rs2_data),
|
||||
|
||||
// Outputs
|
||||
.wid_out (mul_wid),
|
||||
.uuid_out (mul_uuid),
|
||||
.tmask_out (mul_tmask),
|
||||
.PC_out (mul_PC),
|
||||
.rd_out (mul_rd),
|
||||
.wb_out (mul_wb),
|
||||
.data_out (mul_data),
|
||||
|
||||
// handshake
|
||||
.valid_in (mul_valid_in),
|
||||
.ready_in (mul_ready_in),
|
||||
.valid_out (mul_valid_out),
|
||||
.ready_out (mul_ready_out)
|
||||
);
|
||||
|
||||
wire is_mul_op = `INST_ALU_IS_MUL(alu_req_if.op_mod);
|
||||
|
||||
assign ready_in = is_mul_op ? mul_ready_in : alu_ready_in;
|
||||
|
||||
assign alu_valid_in = alu_req_if.valid && ~is_mul_op;
|
||||
assign mul_valid_in = alu_req_if.valid && is_mul_op;
|
||||
|
||||
assign alu_commit_if.valid = alu_valid_out || mul_valid_out;
|
||||
assign alu_commit_if.uuid = alu_valid_out ? alu_uuid : mul_uuid;
|
||||
assign alu_commit_if.wid = alu_valid_out ? alu_wid : mul_wid;
|
||||
assign alu_commit_if.tmask = alu_valid_out ? alu_tmask : mul_tmask;
|
||||
assign alu_commit_if.PC = alu_valid_out ? alu_PC : mul_PC;
|
||||
assign alu_commit_if.rd = alu_valid_out ? alu_rd : mul_rd;
|
||||
assign alu_commit_if.wb = alu_valid_out ? alu_wb : mul_wb;
|
||||
assign alu_commit_if.data = alu_valid_out ? alu_data : mul_data;
|
||||
|
||||
assign alu_ready_out = alu_commit_if.ready;
|
||||
assign mul_ready_out = alu_commit_if.ready & ~alu_valid_out; // ALU takes priority
|
||||
|
||||
`else
|
||||
|
||||
assign ready_in = alu_ready_in;
|
||||
|
||||
assign alu_valid_in = alu_req_if.valid;
|
||||
|
||||
assign alu_commit_if.valid = alu_valid_out;
|
||||
assign alu_commit_if.uuid = alu_uuid;
|
||||
assign alu_commit_if.wid = alu_wid;
|
||||
assign alu_commit_if.tmask = alu_tmask;
|
||||
assign alu_commit_if.PC = alu_PC;
|
||||
assign alu_commit_if.rd = alu_rd;
|
||||
assign alu_commit_if.wb = alu_wb;
|
||||
assign alu_commit_if.data = alu_data;
|
||||
|
||||
assign alu_ready_out = alu_commit_if.ready;
|
||||
|
||||
`endif
|
||||
|
||||
assign alu_commit_if.eop = 1'b1;
|
||||
|
||||
// can accept new request?
|
||||
assign alu_req_if.ready = ready_in;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
dpi_trace("%d: core%0d-branch: wid=%0d, PC=%0h, taken=%b, dest=%0h (#%0d)\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, alu_commit_if.PC, branch_ctl_if.taken, branch_ctl_if.dest, alu_uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,159 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_cache_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LANES = 1,
|
||||
parameter DATA_SIZE = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
parameter TYPE = "R",
|
||||
|
||||
localparam ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
|
||||
localparam DATA_WIDTH = (8 * DATA_SIZE),
|
||||
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input requests
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] req_valid_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] req_rw_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][DATA_SIZE-1:0] req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] req_data_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] req_ready_in,
|
||||
|
||||
// output request
|
||||
output wire [LANES-1:0] req_valid_out,
|
||||
output wire [LANES-1:0] req_rw_out,
|
||||
output wire [LANES-1:0][DATA_SIZE-1:0] req_byteen_out,
|
||||
output wire [LANES-1:0][ADDR_WIDTH-1:0] req_addr_out,
|
||||
output wire [LANES-1:0][DATA_WIDTH-1:0] req_data_out,
|
||||
output wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
|
||||
input wire [LANES-1:0] req_ready_out,
|
||||
|
||||
// input response
|
||||
input wire rsp_valid_in,
|
||||
input wire [LANES-1:0] rsp_tmask_in,
|
||||
input wire [LANES-1:0][DATA_WIDTH-1:0] rsp_data_in,
|
||||
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
|
||||
output wire rsp_ready_in,
|
||||
|
||||
// output responses
|
||||
output wire [NUM_REQS-1:0] rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] rsp_tmask_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] rsp_data_out,
|
||||
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
input wire [NUM_REQS-1:0] rsp_ready_out
|
||||
);
|
||||
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
localparam RSP_DATAW = LANES * (1 + DATA_WIDTH) + TAG_IN_WIDTH;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0][LANES-1:0][REQ_DATAW-1:0] req_data_in_merged;
|
||||
wire [LANES-1:0][REQ_DATAW-1:0] req_data_out_merged;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
for (genvar j = 0; j < LANES; ++j) begin
|
||||
wire [TAG_OUT_WIDTH-1:0] req_tag_in_w;
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (TAG_IN_WIDTH),
|
||||
.S (LOG_NUM_REQS),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) bits_insert (
|
||||
.data_in (req_tag_in[i][j]),
|
||||
.sel_in (LOG_NUM_REQS'(i)),
|
||||
.data_out (req_tag_in_w)
|
||||
);
|
||||
|
||||
assign req_data_in_merged[i][j] = {req_tag_in_w, req_addr_in[i][j], req_rw_in[i][j], req_byteen_in[i][j], req_data_in[i][j]};
|
||||
end
|
||||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LANES (LANES),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (BUFFERED_REQ),
|
||||
.TYPE (TYPE)
|
||||
) req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in (req_data_in_merged),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out (req_data_out_merged),
|
||||
.ready_out (req_ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
assign {req_tag_out[i], req_addr_out[i], req_rw_out[i], req_byteen_out[i], req_data_out[i]} = req_data_out_merged[i];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out_merged;
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[TAG_SEL_IDX +: LOG_NUM_REQS];
|
||||
|
||||
wire [TAG_IN_WIDTH-1:0] rsp_tag_in_w;
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (TAG_OUT_WIDTH),
|
||||
.S (LOG_NUM_REQS),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) bits_remove (
|
||||
.data_in (rsp_tag_in),
|
||||
.data_out (rsp_tag_in_w)
|
||||
);
|
||||
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LANES (1),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (BUFFERED_RSP)
|
||||
) rsp_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (rsp_sel),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in ({rsp_tmask_in, rsp_tag_in_w, rsp_data_in}),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_data_out_merged),
|
||||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign {rsp_tmask_out[i], rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i];
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign req_valid_out = req_valid_in;
|
||||
assign req_tag_out = req_tag_in;
|
||||
assign req_addr_out = req_addr_in;
|
||||
assign req_rw_out = req_rw_in;
|
||||
assign req_byteen_out = req_byteen_in;
|
||||
assign req_data_out = req_data_in;
|
||||
assign req_ready_in = req_ready_out;
|
||||
|
||||
assign rsp_valid_out = rsp_valid_in;
|
||||
assign rsp_tmask_out = rsp_tmask_in;
|
||||
assign rsp_tag_out = rsp_tag_in;
|
||||
assign rsp_data_out = rsp_data_in;
|
||||
assign rsp_ready_in = rsp_ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,195 +1,155 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_cluster #(
|
||||
module VX_cluster import VX_gpu_pkg::*; #(
|
||||
parameter CLUSTER_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_cluster
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`L2_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`L2_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`L2_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L2_MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.master mem_perf_if,
|
||||
VX_mem_perf_if.slave perf_memsys_total_if,
|
||||
`endif
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`L2_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L2_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
VX_dcr_bus_if.slave dcr_bus_if,
|
||||
|
||||
// Memory
|
||||
VX_mem_bus_if.master mem_bus_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
`STATIC_ASSERT((`L2_ENABLE == 0 || `NUM_CORES > 1), ("invalid parameter"))
|
||||
output wire busy
|
||||
);
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_req_data;
|
||||
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
|
||||
`ifdef SCOPE
|
||||
localparam scope_socket = 0;
|
||||
`SCOPE_IO_SWITCH (scope_socket + `NUM_SOCKETS);
|
||||
`endif
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
|
||||
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
|
||||
`ifdef GBAR_ENABLE
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_busy;
|
||||
VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS]();
|
||||
VX_gbar_bus_if gbar_bus_if();
|
||||
|
||||
for (genvar i = 0; i < `NUM_CORES; i++) begin
|
||||
`RESET_RELAY (gbar_reset, reset);
|
||||
|
||||
`RESET_RELAY (core_reset);
|
||||
VX_gbar_arb #(
|
||||
.NUM_REQS (`NUM_SOCKETS),
|
||||
.OUT_REG ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure
|
||||
) gbar_arb (
|
||||
.clk (clk),
|
||||
.reset (gbar_reset),
|
||||
.bus_in_if (per_socket_gbar_bus_if),
|
||||
.bus_out_if (gbar_bus_if)
|
||||
);
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
|
||||
) core (
|
||||
`SCOPE_BIND_VX_cluster_core(i)
|
||||
VX_gbar_unit #(
|
||||
.INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID))
|
||||
) gbar_unit (
|
||||
.clk (clk),
|
||||
.reset (gbar_reset),
|
||||
.gbar_bus_if (gbar_bus_if)
|
||||
);
|
||||
`endif
|
||||
|
||||
.clk (clk),
|
||||
.reset (core_reset),
|
||||
|
||||
.mem_req_valid (per_core_mem_req_valid[i]),
|
||||
.mem_req_rw (per_core_mem_req_rw [i]),
|
||||
.mem_req_byteen (per_core_mem_req_byteen[i]),
|
||||
.mem_req_addr (per_core_mem_req_addr [i]),
|
||||
.mem_req_data (per_core_mem_req_data [i]),
|
||||
.mem_req_tag (per_core_mem_req_tag [i]),
|
||||
.mem_req_ready (per_core_mem_req_ready[i]),
|
||||
|
||||
.mem_rsp_valid (per_core_mem_rsp_valid[i]),
|
||||
.mem_rsp_data (per_core_mem_rsp_data [i]),
|
||||
.mem_rsp_tag (per_core_mem_rsp_tag [i]),
|
||||
.mem_rsp_ready (per_core_mem_rsp_ready[i]),
|
||||
|
||||
.busy (per_core_busy [i])
|
||||
);
|
||||
end
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_ARB_TAG_WIDTH)
|
||||
) per_socket_dcache_bus_if[`NUM_SOCKETS * DCACHE_NUM_REQS]();
|
||||
|
||||
assign busy = (| per_core_busy);
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (ICACHE_ARB_TAG_WIDTH)
|
||||
) per_socket_icache_bus_if[`NUM_SOCKETS]();
|
||||
|
||||
`RESET_RELAY (mem_unit_reset, reset);
|
||||
|
||||
VX_mem_unit #(
|
||||
.CLUSTER_ID (CLUSTER_ID)
|
||||
) mem_unit (
|
||||
.clk (clk),
|
||||
.reset (mem_unit_reset),
|
||||
|
||||
if (`L2_ENABLE) begin
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_l2cache_if();
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (l2_reset);
|
||||
.dcache_bus_if (per_socket_dcache_bus_if),
|
||||
|
||||
.icache_bus_if (per_socket_icache_bus_if),
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L2_CACHE_ID),
|
||||
.CACHE_SIZE (`L2_CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L2_CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L2_NUM_BANKS),
|
||||
.NUM_PORTS (`L2_NUM_PORTS),
|
||||
.WORD_SIZE (`L2_WORD_SIZE),
|
||||
.NUM_REQS (`L2_NUM_REQS),
|
||||
.CREQ_SIZE (`L2_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L2_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`L1_MEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l2cache (
|
||||
`SCOPE_BIND_VX_cluster_l2cache
|
||||
|
||||
.clk (clk),
|
||||
.reset (l2_reset),
|
||||
.mem_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`NUM_SOCKETS-1:0] per_socket_sim_ebreak;
|
||||
wire [`NUM_SOCKETS-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_socket_sim_wb_value;
|
||||
assign sim_ebreak = per_socket_sim_ebreak[0];
|
||||
assign sim_wb_value = per_socket_sim_wb_value[0];
|
||||
`UNUSED_VAR (per_socket_sim_ebreak)
|
||||
`UNUSED_VAR (per_socket_sim_wb_value)
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_tmp_if();
|
||||
assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
|
||||
assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr;
|
||||
assign socket_dcr_bus_tmp_if.write_data = dcr_bus_if.write_data;
|
||||
|
||||
wire [`NUM_SOCKETS-1:0] per_socket_busy;
|
||||
|
||||
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
|
||||
|
||||
// Generate all sockets
|
||||
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
|
||||
|
||||
`RESET_RELAY (socket_reset, reset);
|
||||
|
||||
VX_socket #(
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
|
||||
) socket (
|
||||
`SCOPE_IO_BIND (scope_socket+i)
|
||||
.clk (clk),
|
||||
.reset (socket_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l2cache_if),
|
||||
.mem_perf_if (perf_memsys_total_if),
|
||||
`endif
|
||||
|
||||
.dcr_bus_if (socket_dcr_bus_if),
|
||||
|
||||
.dcache_bus_if (per_socket_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
|
||||
.icache_bus_if (per_socket_icache_bus_if[i]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[i]),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_core_mem_req_valid),
|
||||
.core_req_rw (per_core_mem_req_rw),
|
||||
.core_req_byteen (per_core_mem_req_byteen),
|
||||
.core_req_addr (per_core_mem_req_addr),
|
||||
.core_req_data (per_core_mem_req_data),
|
||||
.core_req_tag (per_core_mem_req_tag),
|
||||
.core_req_ready (per_core_mem_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_core_mem_rsp_valid),
|
||||
.core_rsp_data (per_core_mem_rsp_data),
|
||||
.core_rsp_tag (per_core_mem_rsp_tag),
|
||||
.core_rsp_ready (per_core_mem_rsp_ready),
|
||||
`UNUSED_PIN (core_rsp_tmask),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_rw (mem_req_rw),
|
||||
.mem_req_byteen (mem_req_byteen),
|
||||
.mem_req_addr (mem_req_addr),
|
||||
.mem_req_data (mem_req_data),
|
||||
.mem_req_tag (mem_req_tag),
|
||||
.mem_req_ready (mem_req_ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (mem_rsp_valid),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_ready (mem_rsp_ready)
|
||||
.sim_ebreak (per_socket_sim_ebreak[i]),
|
||||
.sim_wb_value (per_socket_sim_wb_value[i]),
|
||||
.busy (per_socket_busy[i])
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L1_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Core request
|
||||
.req_valid_in (per_core_mem_req_valid),
|
||||
.req_rw_in (per_core_mem_req_rw),
|
||||
.req_byteen_in (per_core_mem_req_byteen),
|
||||
.req_addr_in (per_core_mem_req_addr),
|
||||
.req_data_in (per_core_mem_req_data),
|
||||
.req_tag_in (per_core_mem_req_tag),
|
||||
.req_ready_in (per_core_mem_req_ready),
|
||||
|
||||
// Memory request
|
||||
.req_valid_out (mem_req_valid),
|
||||
.req_rw_out (mem_req_rw),
|
||||
.req_byteen_out (mem_req_byteen),
|
||||
.req_addr_out (mem_req_addr),
|
||||
.req_data_out (mem_req_data),
|
||||
.req_tag_out (mem_req_tag),
|
||||
.req_ready_out (mem_req_ready),
|
||||
|
||||
// Core response
|
||||
.rsp_valid_out (per_core_mem_rsp_valid),
|
||||
.rsp_data_out (per_core_mem_rsp_data),
|
||||
.rsp_tag_out (per_core_mem_rsp_tag),
|
||||
.rsp_ready_out (per_core_mem_rsp_ready),
|
||||
|
||||
// Memory response
|
||||
.rsp_valid_in (mem_rsp_valid),
|
||||
.rsp_tag_in (mem_rsp_tag),
|
||||
.rsp_data_in (mem_rsp_data),
|
||||
.rsp_ready_in (mem_rsp_ready)
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`BUFFER_BUSY (busy, (| per_socket_busy), (`NUM_SOCKETS > 1));
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave alu_commit_if,
|
||||
VX_commit_if.slave ld_commit_if,
|
||||
VX_commit_if.slave st_commit_if,
|
||||
VX_commit_if.slave csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.slave fpu_commit_if,
|
||||
`endif
|
||||
VX_commit_if.slave gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if.master writeback_if,
|
||||
VX_cmt_to_csr_if.master cmt_to_csr_if
|
||||
);
|
||||
// CSRs update
|
||||
|
||||
wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready;
|
||||
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
|
||||
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
|
||||
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
|
||||
`endif
|
||||
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
|
||||
wire commit_fire = alu_commit_fire
|
||||
|| ld_commit_fire
|
||||
|| st_commit_fire
|
||||
|| csr_commit_fire
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| fpu_commit_fire
|
||||
`endif
|
||||
|| gpu_commit_fire;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [(6*`NUM_THREADS)-1:0] commit_tmask;
|
||||
`else
|
||||
wire [(5*`NUM_THREADS)-1:0] commit_tmask;
|
||||
`endif
|
||||
|
||||
wire [$clog2($bits(commit_tmask)+1)-1:0] commit_size;
|
||||
|
||||
assign commit_tmask = {
|
||||
{`NUM_THREADS{alu_commit_fire}} & alu_commit_if.tmask,
|
||||
{`NUM_THREADS{ld_commit_fire}} & ld_commit_if.tmask,
|
||||
{`NUM_THREADS{st_commit_fire}} & st_commit_if.tmask,
|
||||
{`NUM_THREADS{csr_commit_fire}} & csr_commit_if.tmask,
|
||||
`ifdef EXT_F_ENABLE
|
||||
{`NUM_THREADS{fpu_commit_fire}} & fpu_commit_if.tmask,
|
||||
`endif
|
||||
{`NUM_THREADS{gpu_commit_fire}} & gpu_commit_if.tmask
|
||||
};
|
||||
|
||||
`POP_COUNT(commit_size, commit_tmask);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + $bits(commit_size)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire, commit_size}),
|
||||
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
||||
VX_writeback #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) writeback (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
||||
// store and gpu commits don't writeback
|
||||
assign st_commit_if.ready = 1'b1;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd);
|
||||
`TRACE_ARRAY1D(alu_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", alu_commit_if.uuid);
|
||||
end
|
||||
if (ld_commit_if.valid && ld_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.wb, ld_commit_if.rd);
|
||||
`TRACE_ARRAY1D(ld_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", ld_commit_if.uuid);
|
||||
end
|
||||
if (st_commit_if.valid && st_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d (#%0d)\n", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd, st_commit_if.uuid);
|
||||
end
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd);
|
||||
`TRACE_ARRAY1D(csr_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", csr_commit_if.uuid);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd);
|
||||
`TRACE_ARRAY1D(fpu_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", fpu_commit_if.uuid);
|
||||
end
|
||||
`endif
|
||||
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.wb, gpu_commit_if.rd);
|
||||
`TRACE_ARRAY1D(gpu_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", gpu_commit_if.uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,49 @@
|
||||
`ifndef VX_CONFIG
|
||||
`define VX_CONFIG
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef XLEN
|
||||
`ifndef VX_CONFIG_VH
|
||||
`define VX_CONFIG_VH
|
||||
|
||||
`ifndef MIN
|
||||
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
`endif
|
||||
|
||||
`ifndef MAX
|
||||
`define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
`endif
|
||||
|
||||
`ifndef CLAMP
|
||||
`define CLAMP(x, lo, hi) (((x) > (hi)) ? (hi) : (((x) < (lo)) ? (lo) : (x)))
|
||||
`endif
|
||||
|
||||
`ifndef UP
|
||||
`define UP(x) (((x) != 0) ? (x) : 1)
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// 32 bit XLEN as default.
|
||||
`ifndef XLEN_32
|
||||
`ifndef XLEN_64
|
||||
`define XLEN_32
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
`define XLEN 64
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_32
|
||||
`define XLEN 32
|
||||
`endif
|
||||
|
||||
@@ -25,54 +67,127 @@
|
||||
`define NUM_BARRIERS 4
|
||||
`endif
|
||||
|
||||
`ifndef L2_ENABLE
|
||||
`define L2_ENABLE 0
|
||||
`ifndef SOCKET_SIZE
|
||||
`define SOCKET_SIZE `MIN(4, `NUM_CORES)
|
||||
`endif
|
||||
|
||||
`ifndef L3_ENABLE
|
||||
`define L3_ENABLE 0
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_ENABLED 1
|
||||
`else
|
||||
`define L2_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifndef SM_ENABLE
|
||||
`define SM_ENABLE 1
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_ENABLED 1
|
||||
`else
|
||||
`define L3_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef L1_DISABLE
|
||||
`define ICACHE_DISABLE
|
||||
`define DCACHE_DISABLE
|
||||
`endif
|
||||
|
||||
`ifndef MEM_BLOCK_SIZE
|
||||
`define MEM_BLOCK_SIZE 64
|
||||
`endif
|
||||
|
||||
`ifndef L1_BLOCK_SIZE
|
||||
`define L1_BLOCK_SIZE ((`L2_ENABLE || `L3_ENABLE) ? 16 : `MEM_BLOCK_SIZE)
|
||||
`ifndef MEM_ADDR_WIDTH
|
||||
`ifdef XLEN_64
|
||||
`define MEM_ADDR_WIDTH 48
|
||||
`else
|
||||
`define MEM_ADDR_WIDTH 32
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef L1_LINE_SIZE
|
||||
`ifdef L1_DISABLE
|
||||
`define L1_LINE_SIZE ((`L2_ENABLED || `L3_ENABLED) ? 4 : `MEM_BLOCK_SIZE)
|
||||
`else
|
||||
`define L1_LINE_SIZE ((`L2_ENABLED || `L3_ENABLED) ? 16 : `MEM_BLOCK_SIZE)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 64'h180000000
|
||||
`endif
|
||||
|
||||
`ifndef STACK_BASE_ADDR
|
||||
`define STACK_BASE_ADDR 64'h1FF000000
|
||||
`endif
|
||||
|
||||
`else
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 32'h80000000
|
||||
`endif
|
||||
|
||||
`ifndef IO_BASE_ADDR
|
||||
`define IO_BASE_ADDR 32'hFF000000
|
||||
`ifndef STACK_BASE_ADDR
|
||||
`define STACK_BASE_ADDR 32'hFF000000
|
||||
`endif
|
||||
|
||||
`ifndef IO_ADDR_SIZE
|
||||
`define IO_ADDR_SIZE (32'hFFFFFFFF - `IO_BASE_ADDR + 1)
|
||||
`endif
|
||||
|
||||
`ifndef IO_COUT_ADDR
|
||||
`define IO_COUT_ADDR (32'hFFFFFFFF - `MEM_BLOCK_SIZE + 1)
|
||||
`endif
|
||||
|
||||
`ifndef IO_COUT_SIZE
|
||||
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
`ifndef IO_CSR_ADDR
|
||||
`define IO_CSR_ADDR `IO_BASE_ADDR
|
||||
`endif
|
||||
|
||||
`ifndef SMEM_BASE_ADDR
|
||||
`define SMEM_BASE_ADDR `IO_BASE_ADDR
|
||||
`define SMEM_BASE_ADDR `STACK_BASE_ADDR
|
||||
`endif
|
||||
|
||||
`ifndef SMEM_LOG_SIZE
|
||||
`define SMEM_LOG_SIZE 14
|
||||
`endif
|
||||
|
||||
`ifndef IO_BASE_ADDR
|
||||
`define IO_BASE_ADDR (`SMEM_BASE_ADDR + (1 << `SMEM_LOG_SIZE))
|
||||
`endif
|
||||
|
||||
`ifndef IO_COUT_ADDR
|
||||
`define IO_COUT_ADDR `IO_BASE_ADDR
|
||||
`endif
|
||||
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
|
||||
|
||||
`ifndef IO_CSR_ADDR
|
||||
`define IO_CSR_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
|
||||
`endif
|
||||
`define IO_CSR_SIZE (4 * 64 * `NUM_CORES * `NUM_CLUSTERS)
|
||||
|
||||
`ifndef STACK_LOG2_SIZE
|
||||
`define STACK_LOG2_SIZE 13
|
||||
`endif
|
||||
`define STACK_SIZE (1 << `STACK_LOG2_SIZE)
|
||||
|
||||
`define RESET_DELAY 8
|
||||
|
||||
`ifndef STALL_TIMEOUT
|
||||
`define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
|
||||
`endif
|
||||
|
||||
`ifndef FPU_FPNEW
|
||||
`ifndef FPU_DSP
|
||||
`ifndef FPU_DPI
|
||||
`ifdef SYNTHESIS
|
||||
`define FPU_DSP
|
||||
`else
|
||||
`define FPU_DPI
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`ifndef DPI_DISABLE
|
||||
`define IMUL_DPI
|
||||
`define IDIV_DPI
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef DEBUG_LEVEL
|
||||
`define DEBUG_LEVEL 3
|
||||
`endif
|
||||
|
||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||
|
||||
`ifndef EXT_M_DISABLE
|
||||
`define EXT_M_ENABLE
|
||||
`endif
|
||||
@@ -81,230 +196,278 @@
|
||||
`define EXT_F_ENABLE
|
||||
`endif
|
||||
|
||||
// Device identification
|
||||
`ifdef EXT_D_ENABLE
|
||||
`define FLEN_64
|
||||
`else
|
||||
`define FLEN_32
|
||||
`endif
|
||||
|
||||
`ifdef FLEN_64
|
||||
`define FLEN 64
|
||||
`endif
|
||||
|
||||
`ifdef FLEN_32
|
||||
`define FLEN 32
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
`ifdef FLEN_32
|
||||
`define FPU_RV64F
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`define ISA_STD_A 0
|
||||
`define ISA_STD_C 2
|
||||
`define ISA_STD_D 3
|
||||
`define ISA_STD_E 4
|
||||
`define ISA_STD_F 5
|
||||
`define ISA_STD_H 7
|
||||
`define ISA_STD_I 8
|
||||
`define ISA_STD_N 13
|
||||
`define ISA_STD_Q 16
|
||||
`define ISA_STD_S 18
|
||||
`define ISA_STD_U 20
|
||||
|
||||
`define ISA_EXT_TEX 0
|
||||
`define ISA_EXT_RASTER 1
|
||||
`define ISA_EXT_ROP 2
|
||||
|
||||
`ifdef EXT_A_ENABLE
|
||||
`define EXT_A_ENABLED 1
|
||||
`else
|
||||
`define EXT_A_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_C_ENABLE
|
||||
`define EXT_C_ENABLED 1
|
||||
`else
|
||||
`define EXT_C_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_D_ENABLE
|
||||
`define EXT_D_ENABLED 1
|
||||
`else
|
||||
`define EXT_D_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define EXT_F_ENABLED 1
|
||||
`else
|
||||
`define EXT_F_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
`define EXT_M_ENABLED 1
|
||||
`else
|
||||
`define EXT_M_ENABLED 0
|
||||
`endif
|
||||
|
||||
`define ISA_X_ENABLED 0
|
||||
|
||||
`define MISA_EXT 0
|
||||
|
||||
`define MISA_STD (`EXT_A_ENABLED << 0) /* A - Atomic Instructions extension */ \
|
||||
| (0 << 1) /* B - Tentatively reserved for Bit operations extension */ \
|
||||
| (`EXT_C_ENABLED << 2) /* C - Compressed extension */ \
|
||||
| (`EXT_D_ENABLED << 3) /* D - Double precsision floating-point extension */ \
|
||||
| (0 << 4) /* E - RV32E base ISA */ \
|
||||
| (`EXT_F_ENABLED << 5) /* F - Single precsision floating-point extension */ \
|
||||
| (0 << 6) /* G - Additional standard extensions present */ \
|
||||
| (0 << 7) /* H - Hypervisor mode implemented */ \
|
||||
| (1 << 8) /* I - RV32I/64I/128I base ISA */ \
|
||||
| (0 << 9) /* J - Reserved */ \
|
||||
| (0 << 10) /* K - Reserved */ \
|
||||
| (0 << 11) /* L - Tentatively reserved for Bit operations extension */ \
|
||||
| (`EXT_M_ENABLED << 12) /* M - Integer Multiply/Divide extension */ \
|
||||
| (0 << 13) /* N - User level interrupts supported */ \
|
||||
| (0 << 14) /* O - Reserved */ \
|
||||
| (0 << 15) /* P - Tentatively reserved for Packed-SIMD extension */ \
|
||||
| (0 << 16) /* Q - Quad-precision floating-point extension */ \
|
||||
| (0 << 17) /* R - Reserved */ \
|
||||
| (0 << 18) /* S - Supervisor mode implemented */ \
|
||||
| (0 << 19) /* T - Tentatively reserved for Transactional Memory extension */ \
|
||||
| (1 << 20) /* U - User mode implemented */ \
|
||||
| (0 << 21) /* V - Tentatively reserved for Vector extension */ \
|
||||
| (0 << 22) /* W - Reserved */ \
|
||||
| (`ISA_X_ENABLED << 23) /* X - Non-standard extensions present */ \
|
||||
| (0 << 24) /* Y - Reserved */ \
|
||||
| (0 << 25) /* Z - Reserved */
|
||||
|
||||
// Device identification //////////////////////////////////////////////////////
|
||||
|
||||
`define VENDOR_ID 0
|
||||
`define ARCHITECTURE_ID 0
|
||||
`define IMPLEMENTATION_ID 0
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Pipeline Configuration /////////////////////////////////////////////////////
|
||||
|
||||
`ifndef LATENCY_IMUL
|
||||
`define LATENCY_IMUL 3
|
||||
// Issue width
|
||||
`ifndef ISSUE_WIDTH
|
||||
`define ISSUE_WIDTH `MIN(`NUM_WARPS, 4)
|
||||
`endif
|
||||
|
||||
`ifndef LATENCY_FNCP
|
||||
`define LATENCY_FNCP 2
|
||||
// Number of ALU units
|
||||
`ifndef NUM_ALU_LANES
|
||||
`define NUM_ALU_LANES `UP(`NUM_THREADS / 2)
|
||||
`endif
|
||||
`ifndef NUM_ALU_BLOCKS
|
||||
`define NUM_ALU_BLOCKS `UP(`ISSUE_WIDTH / 1)
|
||||
`endif
|
||||
|
||||
`ifndef LATENCY_FMA
|
||||
`define LATENCY_FMA 4
|
||||
// Number of FPU units
|
||||
`ifndef NUM_FPU_LANES
|
||||
`define NUM_FPU_LANES `UP(`NUM_THREADS / 2)
|
||||
`endif
|
||||
`ifndef NUM_FPU_BLOCKS
|
||||
`define NUM_FPU_BLOCKS `UP(`ISSUE_WIDTH / 1)
|
||||
`endif
|
||||
|
||||
`ifndef LATENCY_FDIV
|
||||
`ifdef ALTERA_S10
|
||||
`define LATENCY_FDIV 34
|
||||
`else
|
||||
`define LATENCY_FDIV 15
|
||||
`endif
|
||||
// Number of LSU units
|
||||
`ifndef NUM_LSU_LANES
|
||||
`define NUM_LSU_LANES `MIN(`NUM_THREADS, 4)
|
||||
`endif
|
||||
|
||||
`ifndef LATENCY_FSQRT
|
||||
`ifdef ALTERA_S10
|
||||
`define LATENCY_FSQRT 25
|
||||
`else
|
||||
`define LATENCY_FSQRT 10
|
||||
// Number of SFU units
|
||||
`ifndef NUM_SFU_LANES
|
||||
`define NUM_SFU_LANES `MIN(`NUM_THREADS, 4)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef LATENCY_FDIVSQRT
|
||||
`define LATENCY_FDIVSQRT 32
|
||||
`endif
|
||||
|
||||
`ifndef LATENCY_FCVT
|
||||
`define LATENCY_FCVT 5
|
||||
`endif
|
||||
|
||||
`define RESET_DELAY 6
|
||||
|
||||
// CSR Addresses //////////////////////////////////////////////////////////////
|
||||
|
||||
// User Floating-Point CSRs
|
||||
`define CSR_FFLAGS 12'h001
|
||||
`define CSR_FRM 12'h002
|
||||
`define CSR_FCSR 12'h003
|
||||
|
||||
`define CSR_SATP 12'h180
|
||||
|
||||
`define CSR_PMPCFG0 12'h3A0
|
||||
`define CSR_PMPADDR0 12'h3B0
|
||||
|
||||
`define CSR_MSTATUS 12'h300
|
||||
`define CSR_MISA 12'h301
|
||||
`define CSR_MEDELEG 12'h302
|
||||
`define CSR_MIDELEG 12'h303
|
||||
`define CSR_MIE 12'h304
|
||||
`define CSR_MTVEC 12'h305
|
||||
|
||||
`define CSR_MEPC 12'h341
|
||||
|
||||
// Machine Performance-monitoring counters
|
||||
`define CSR_MPM_BASE 12'hB00
|
||||
`define CSR_MPM_BASE_H 12'hB80
|
||||
// PERF: pipeline
|
||||
`define CSR_MCYCLE 12'hB00
|
||||
`define CSR_MCYCLE_H 12'hB80
|
||||
`define CSR_MPM_RESERVED 12'hB01
|
||||
`define CSR_MPM_RESERVED_H 12'hB81
|
||||
`define CSR_MINSTRET 12'hB02
|
||||
`define CSR_MINSTRET_H 12'hB82
|
||||
`define CSR_MPM_IBUF_ST 12'hB03
|
||||
`define CSR_MPM_IBUF_ST_H 12'hB83
|
||||
`define CSR_MPM_SCRB_ST 12'hB04
|
||||
`define CSR_MPM_SCRB_ST_H 12'hB84
|
||||
`define CSR_MPM_ALU_ST 12'hB05
|
||||
`define CSR_MPM_ALU_ST_H 12'hB85
|
||||
`define CSR_MPM_LSU_ST 12'hB06
|
||||
`define CSR_MPM_LSU_ST_H 12'hB86
|
||||
`define CSR_MPM_CSR_ST 12'hB07
|
||||
`define CSR_MPM_CSR_ST_H 12'hB87
|
||||
`define CSR_MPM_FPU_ST 12'hB08
|
||||
`define CSR_MPM_FPU_ST_H 12'hB88
|
||||
`define CSR_MPM_GPU_ST 12'hB09
|
||||
`define CSR_MPM_GPU_ST_H 12'hB89
|
||||
// PERF: decode
|
||||
`define CSR_MPM_LOADS 12'hB0A
|
||||
`define CSR_MPM_LOADS_H 12'hB8A
|
||||
`define CSR_MPM_STORES 12'hB0B
|
||||
`define CSR_MPM_STORES_H 12'hB8B
|
||||
`define CSR_MPM_BRANCHES 12'hB0C
|
||||
`define CSR_MPM_BRANCHES_H 12'hB8C
|
||||
// PERF: icache
|
||||
`define CSR_MPM_ICACHE_READS 12'hB0D // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB8D
|
||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0E // read misses
|
||||
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8E
|
||||
// PERF: dcache
|
||||
`define CSR_MPM_DCACHE_READS 12'hB0F // total reads
|
||||
`define CSR_MPM_DCACHE_READS_H 12'hB8F
|
||||
`define CSR_MPM_DCACHE_WRITES 12'hB10 // total writes
|
||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB90
|
||||
`define CSR_MPM_DCACHE_MISS_R 12'hB11 // read misses
|
||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB91
|
||||
`define CSR_MPM_DCACHE_MISS_W 12'hB12 // write misses
|
||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB92
|
||||
`define CSR_MPM_DCACHE_BANK_ST 12'hB13 // bank conflicts
|
||||
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB93
|
||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB14 // MSHR stalls
|
||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB94
|
||||
// PERF: smem
|
||||
`define CSR_MPM_SMEM_READS 12'hB15 // total reads
|
||||
`define CSR_MPM_SMEM_READS_H 12'hB95
|
||||
`define CSR_MPM_SMEM_WRITES 12'hB16 // total writes
|
||||
`define CSR_MPM_SMEM_WRITES_H 12'hB96
|
||||
`define CSR_MPM_SMEM_BANK_ST 12'hB17 // bank conflicts
|
||||
`define CSR_MPM_SMEM_BANK_ST_H 12'hB97
|
||||
// PERF: memory
|
||||
`define CSR_MPM_MEM_READS 12'hB18 // memory reads
|
||||
`define CSR_MPM_MEM_READS_H 12'hB98
|
||||
`define CSR_MPM_MEM_WRITES 12'hB19 // memory writes
|
||||
`define CSR_MPM_MEM_WRITES_H 12'hB99
|
||||
`define CSR_MPM_MEM_LAT 12'hB1A // memory latency
|
||||
`define CSR_MPM_MEM_LAT_H 12'hB9A
|
||||
// PERF: texunit
|
||||
`define CSR_MPM_TEX_READS 12'hB1B // texture accesses
|
||||
`define CSR_MPM_TEX_READS_H 12'hB9B
|
||||
`define CSR_MPM_TEX_LAT 12'hB1C // texture latency
|
||||
`define CSR_MPM_TEX_LAT_H 12'hB9C
|
||||
|
||||
// Machine Information Registers
|
||||
`define CSR_MVENDORID 12'hF11
|
||||
`define CSR_MARCHID 12'hF12
|
||||
`define CSR_MIMPID 12'hF13
|
||||
`define CSR_MHARTID 12'hF14
|
||||
|
||||
// User SIMT CSRs
|
||||
`define CSR_WTID 12'hCC0
|
||||
`define CSR_LTID 12'hCC1
|
||||
`define CSR_GTID 12'hCC2
|
||||
`define CSR_LWID 12'hCC3
|
||||
`define CSR_GWID `CSR_MHARTID
|
||||
`define CSR_GCID 12'hCC5
|
||||
`define CSR_TMASK 12'hCC4
|
||||
|
||||
// Machine SIMT CSRs
|
||||
`define CSR_NT 12'hFC0
|
||||
`define CSR_NW 12'hFC1
|
||||
`define CSR_NC 12'hFC2
|
||||
|
||||
////////// Texture Units //////////////////////////////////////////////////////
|
||||
|
||||
`define NUM_TEX_UNITS 2
|
||||
`define TEX_SUBPIXEL_BITS 8
|
||||
|
||||
`define TEX_DIM_BITS 15
|
||||
`define TEX_LOD_MAX `TEX_DIM_BITS
|
||||
`define TEX_LOD_BITS 4
|
||||
|
||||
`define TEX_FXD_BITS 32
|
||||
`define TEX_FXD_FRAC (`TEX_DIM_BITS+`TEX_SUBPIXEL_BITS)
|
||||
|
||||
`define TEX_STATE_ADDR 0
|
||||
`define TEX_STATE_WIDTH 1
|
||||
`define TEX_STATE_HEIGHT 2
|
||||
`define TEX_STATE_FORMAT 3
|
||||
`define TEX_STATE_FILTER 4
|
||||
`define TEX_STATE_WRAPU 5
|
||||
`define TEX_STATE_WRAPV 6
|
||||
`define TEX_STATE_MIPOFF(lod) (7+(lod))
|
||||
`define NUM_TEX_STATES (`TEX_STATE_MIPOFF(`TEX_LOD_MAX)+1)
|
||||
|
||||
`define CSR_TEX_UNIT 12'hFD0
|
||||
|
||||
`define CSR_TEX_STATE_BEGIN 12'hFD1
|
||||
`define CSR_TEX_ADDR (`CSR_TEX_STATE_BEGIN+`TEX_STATE_ADDR)
|
||||
`define CSR_TEX_WIDTH (`CSR_TEX_STATE_BEGIN+`TEX_STATE_WIDTH)
|
||||
`define CSR_TEX_HEIGHT (`CSR_TEX_STATE_BEGIN+`TEX_STATE_HEIGHT)
|
||||
`define CSR_TEX_FORMAT (`CSR_TEX_STATE_BEGIN+`TEX_STATE_FORMAT)
|
||||
`define CSR_TEX_FILTER (`CSR_TEX_STATE_BEGIN+`TEX_STATE_FILTER)
|
||||
`define CSR_TEX_WRAPU (`CSR_TEX_STATE_BEGIN+`TEX_STATE_WRAPU)
|
||||
`define CSR_TEX_WRAPV (`CSR_TEX_STATE_BEGIN+`TEX_STATE_WRAPV)
|
||||
`define CSR_TEX_MIPOFF(lod) (`CSR_TEX_STATE_BEGIN+`TEX_STATE_MIPOFF(lod))
|
||||
`define CSR_TEX_STATE_END (`CSR_TEX_STATE_BEGIN+`NUM_TEX_STATES)
|
||||
|
||||
`define CSR_TEX_STATE(addr) ((addr) - `CSR_TEX_STATE_BEGIN)
|
||||
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
// Size of Instruction Buffer
|
||||
`ifndef IBUF_SIZE
|
||||
`define IBUF_SIZE 2
|
||||
`define IBUF_SIZE (2 * (`NUM_WARPS / `ISSUE_WIDTH))
|
||||
`endif
|
||||
|
||||
// Size of LSU Request Queue
|
||||
`ifndef LSUQ_SIZE
|
||||
`define LSUQ_SIZE (`NUM_WARPS * 2)
|
||||
`define LSUQ_SIZE (2 * (`NUM_THREADS / `NUM_LSU_LANES))
|
||||
`endif
|
||||
|
||||
// LSU Duplicate Address Check
|
||||
`ifdef LSU_DUP
|
||||
`define LSU_DUP_ENABLED 1
|
||||
`else
|
||||
`define LSU_DUP_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
`define GBAR_ENABLED 1
|
||||
`else
|
||||
`define GBAR_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifndef LATENCY_IMUL
|
||||
`ifdef VIVADO
|
||||
`define LATENCY_IMUL 4
|
||||
`endif
|
||||
`ifdef QUARTUS
|
||||
`define LATENCY_IMUL 3
|
||||
`endif
|
||||
`ifndef LATENCY_IMUL
|
||||
`define LATENCY_IMUL 4
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// Floating-Point Units ///////////////////////////////////////////////////////
|
||||
|
||||
// Size of FPU Request Queue
|
||||
`ifndef FPUQ_SIZE
|
||||
`define FPUQ_SIZE 8
|
||||
`ifndef FPU_REQ_QUEUE_SIZE
|
||||
`define FPU_REQ_QUEUE_SIZE (2 * (`NUM_THREADS / `NUM_FPU_LANES))
|
||||
`endif
|
||||
|
||||
// Texture Unit Request Queue
|
||||
`ifndef TEXQ_SIZE
|
||||
`define TEXQ_SIZE (`NUM_WARPS * 2)
|
||||
// FNCP Latency
|
||||
`ifndef LATENCY_FNCP
|
||||
`define LATENCY_FNCP 2
|
||||
`endif
|
||||
|
||||
// FMA Latency
|
||||
`ifndef LATENCY_FMA
|
||||
`ifdef FPU_DPI
|
||||
`define LATENCY_FMA 4
|
||||
`endif
|
||||
`ifdef FPU_FPNEW
|
||||
`define LATENCY_FMA 4
|
||||
`endif
|
||||
`ifdef FPU_DSP
|
||||
`ifdef QUARTUS
|
||||
`define LATENCY_FMA 4
|
||||
`endif
|
||||
`ifdef VIVADO
|
||||
`define LATENCY_FMA 16
|
||||
`endif
|
||||
`ifndef LATENCY_FMA
|
||||
`define LATENCY_FMA 4
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// FDIV Latency
|
||||
`ifndef LATENCY_FDIV
|
||||
`ifdef FPU_DPI
|
||||
`define LATENCY_FDIV 15
|
||||
`endif
|
||||
`ifdef FPU_FPNEW
|
||||
`define LATENCY_FDIV 16
|
||||
`endif
|
||||
`ifdef FPU_DSP
|
||||
`ifdef QUARTUS
|
||||
`define LATENCY_FDIV 15
|
||||
`endif
|
||||
`ifdef VIVADO
|
||||
`define LATENCY_FDIV 28
|
||||
`endif
|
||||
`ifndef LATENCY_FDIV
|
||||
`define LATENCY_FDIV 16
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// FSQRT Latency
|
||||
`ifndef LATENCY_FSQRT
|
||||
`ifdef FPU_DPI
|
||||
`define LATENCY_FSQRT 10
|
||||
`endif
|
||||
`ifdef FPU_FPNEW
|
||||
`define LATENCY_FSQRT 16
|
||||
`endif
|
||||
`ifdef FPU_DSP
|
||||
`ifdef QUARTUS
|
||||
`define LATENCY_FSQRT 10
|
||||
`endif
|
||||
`ifdef VIVADO
|
||||
`define LATENCY_FSQRT 28
|
||||
`endif
|
||||
`ifndef LATENCY_FSQRT
|
||||
`define LATENCY_FSQRT 16
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// FCVT Latency
|
||||
`ifndef LATENCY_FCVT
|
||||
`define LATENCY_FCVT 5
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef ICACHE_SIZE
|
||||
`define ICACHE_SIZE 16384
|
||||
// Cache Enable
|
||||
`ifndef ICACHE_DISABLE
|
||||
`define ICACHE_ENABLE
|
||||
`endif
|
||||
`ifdef ICACHE_ENABLE
|
||||
`define ICACHE_ENABLED 1
|
||||
`else
|
||||
`define ICACHE_ENABLED 0
|
||||
`define NUM_ICACHES 0
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef ICACHE_CREQ_SIZE
|
||||
`define ICACHE_CREQ_SIZE 0
|
||||
// Number of Cache Units
|
||||
`ifndef NUM_ICACHES
|
||||
`define NUM_ICACHES `UP(`NUM_CORES / 4)
|
||||
`endif
|
||||
|
||||
// Cache Size
|
||||
`ifndef ICACHE_SIZE
|
||||
`define ICACHE_SIZE 16384
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
@@ -314,7 +477,7 @@
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef ICACHE_MSHR_SIZE
|
||||
`define ICACHE_MSHR_SIZE `NUM_WARPS
|
||||
`define ICACHE_MSHR_SIZE 16
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
@@ -327,26 +490,38 @@
|
||||
`define ICACHE_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Number of Associative Ways
|
||||
`ifndef ICACHE_NUM_WAYS
|
||||
`define ICACHE_NUM_WAYS 2
|
||||
`endif
|
||||
|
||||
// Dcache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
// Cache Enable
|
||||
`ifndef DCACHE_DISABLE
|
||||
`define DCACHE_ENABLE
|
||||
`endif
|
||||
`ifdef DCACHE_ENABLE
|
||||
`define DCACHE_ENABLED 1
|
||||
`else
|
||||
`define DCACHE_ENABLED 0
|
||||
`define NUM_DCACHES 0
|
||||
`define DCACHE_NUM_BANKS 1
|
||||
`endif
|
||||
|
||||
// Number of Cache Units
|
||||
`ifndef NUM_DCACHES
|
||||
`define NUM_DCACHES `UP(`NUM_CORES / 4)
|
||||
`endif
|
||||
|
||||
// Cache Size
|
||||
`ifndef DCACHE_SIZE
|
||||
`define DCACHE_SIZE 16384
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
// Number of Banks
|
||||
`ifndef DCACHE_NUM_BANKS
|
||||
`define DCACHE_NUM_BANKS `NUM_THREADS
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef DCACHE_NUM_PORTS
|
||||
`define DCACHE_NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef DCACHE_CREQ_SIZE
|
||||
`define DCACHE_CREQ_SIZE 0
|
||||
`define DCACHE_NUM_BANKS (`NUM_LSU_LANES)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
@@ -356,7 +531,7 @@
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef DCACHE_MSHR_SIZE
|
||||
`define DCACHE_MSHR_SIZE `LSUQ_SIZE
|
||||
`define DCACHE_MSHR_SIZE 16
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
@@ -369,54 +544,42 @@
|
||||
`define DCACHE_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Number of Associative Ways
|
||||
`ifndef DCACHE_NUM_WAYS
|
||||
`define DCACHE_NUM_WAYS 2
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
||||
// per thread stack size
|
||||
`ifndef STACK_LOG2_SIZE
|
||||
`define STACK_LOG2_SIZE 10
|
||||
`ifndef SM_DISABLE
|
||||
`define SM_ENABLE
|
||||
`endif
|
||||
`define STACK_SIZE (1 << `STACK_LOG2_SIZE)
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef SMEM_SIZE
|
||||
`define SMEM_SIZE (`STACK_SIZE * `NUM_WARPS * `NUM_THREADS)
|
||||
`ifdef SM_ENABLE
|
||||
`define SM_ENABLED 1
|
||||
`else
|
||||
`define SM_ENABLED 0
|
||||
`define SMEM_NUM_BANKS 1
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
// Number of Banks
|
||||
`ifndef SMEM_NUM_BANKS
|
||||
`define SMEM_NUM_BANKS `NUM_THREADS
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef SMEM_CREQ_SIZE
|
||||
`define SMEM_CREQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef SMEM_CRSQ_SIZE
|
||||
`define SMEM_CRSQ_SIZE 2
|
||||
`define SMEM_NUM_BANKS (`NUM_LSU_LANES)
|
||||
`endif
|
||||
|
||||
// L2cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
// Cache Size
|
||||
`ifndef L2_CACHE_SIZE
|
||||
`define L2_CACHE_SIZE 131072
|
||||
`ifdef ALTERA_S10
|
||||
`define L2_CACHE_SIZE 2097152
|
||||
`else
|
||||
`define L2_CACHE_SIZE 1048576
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
// Number of Banks
|
||||
`ifndef L2_NUM_BANKS
|
||||
`define L2_NUM_BANKS ((`NUM_CORES < 4) ? `NUM_CORES : 4)
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef L2_NUM_PORTS
|
||||
`define L2_NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L2_CREQ_SIZE
|
||||
`define L2_CREQ_SIZE 0
|
||||
`define L2_NUM_BANKS 2
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
@@ -439,26 +602,25 @@
|
||||
`define L2_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Number of Associative Ways
|
||||
`ifndef L2_NUM_WAYS
|
||||
`define L2_NUM_WAYS 4
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
// Cache Size
|
||||
`ifndef L3_CACHE_SIZE
|
||||
`ifdef ALTERA_S10
|
||||
`define L3_CACHE_SIZE 2097152
|
||||
`else
|
||||
`define L3_CACHE_SIZE 1048576
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
// Number of Banks
|
||||
`ifndef L3_NUM_BANKS
|
||||
`define L3_NUM_BANKS ((`NUM_CLUSTERS < 4) ? `NUM_CORES : 4)
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef L3_NUM_PORTS
|
||||
`define L3_NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L3_CREQ_SIZE
|
||||
`define L3_CREQ_SIZE 0
|
||||
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
@@ -481,4 +643,9 @@
|
||||
`define L3_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
`endif
|
||||
// Number of Associative Ways
|
||||
`ifndef L3_NUM_WAYS
|
||||
`define L3_NUM_WAYS 4
|
||||
`endif
|
||||
|
||||
`endif // VX_CONFIG_VH
|
||||
|
||||
@@ -1,156 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_core #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_core
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`DCACHE_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`DCACHE_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L1_MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory reponse
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L1_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if();
|
||||
`endif
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`L1_MEM_TAG_WIDTH)
|
||||
) mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`L1_MEM_TAG_WIDTH)
|
||||
) mem_rsp_if();
|
||||
|
||||
assign mem_req_valid = mem_req_if.valid;
|
||||
assign mem_req_rw = mem_req_if.rw;
|
||||
assign mem_req_byteen= mem_req_if.byteen;
|
||||
assign mem_req_addr = mem_req_if.addr;
|
||||
assign mem_req_data = mem_req_if.data;
|
||||
assign mem_req_tag = mem_req_if.tag;
|
||||
assign mem_req_if.ready = mem_req_ready;
|
||||
|
||||
assign mem_rsp_if.valid = mem_rsp_valid;
|
||||
assign mem_rsp_if.data = mem_rsp_data;
|
||||
assign mem_rsp_if.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_rsp_if.ready;
|
||||
|
||||
//--
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_rsp_if();
|
||||
|
||||
VX_icache_req_if #(
|
||||
.WORD_SIZE (`ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_req_if();
|
||||
|
||||
VX_icache_rsp_if #(
|
||||
.WORD_SIZE (`ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_rsp_if();
|
||||
|
||||
VX_pipeline #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) pipeline (
|
||||
`SCOPE_BIND_VX_core_pipeline
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
`endif
|
||||
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
||||
// Dcache core request
|
||||
.dcache_req_valid (dcache_req_if.valid),
|
||||
.dcache_req_rw (dcache_req_if.rw),
|
||||
.dcache_req_byteen (dcache_req_if.byteen),
|
||||
.dcache_req_addr (dcache_req_if.addr),
|
||||
.dcache_req_data (dcache_req_if.data),
|
||||
.dcache_req_tag (dcache_req_if.tag),
|
||||
.dcache_req_ready (dcache_req_if.ready),
|
||||
|
||||
// Dcache core reponse
|
||||
.dcache_rsp_valid (dcache_rsp_if.valid),
|
||||
.dcache_rsp_tmask (dcache_rsp_if.tmask),
|
||||
.dcache_rsp_data (dcache_rsp_if.data),
|
||||
.dcache_rsp_tag (dcache_rsp_if.tag),
|
||||
.dcache_rsp_ready (dcache_rsp_if.ready),
|
||||
|
||||
// Icache core request
|
||||
.icache_req_valid (icache_req_if.valid),
|
||||
.icache_req_addr (icache_req_if.addr),
|
||||
.icache_req_tag (icache_req_if.tag),
|
||||
.icache_req_ready (icache_req_if.ready),
|
||||
|
||||
// Icache core reponse
|
||||
.icache_rsp_valid (icache_rsp_if.valid),
|
||||
.icache_rsp_data (icache_rsp_if.data),
|
||||
.icache_rsp_tag (icache_rsp_if.tag),
|
||||
.icache_rsp_ready (icache_rsp_if.ready),
|
||||
|
||||
// Status
|
||||
.busy(busy)
|
||||
);
|
||||
|
||||
//--
|
||||
|
||||
VX_mem_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) mem_unit (
|
||||
`SCOPE_BIND_VX_core_mem_unit
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
`endif
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core <-> Dcache
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
|
||||
// Core <-> Icache
|
||||
.icache_req_if (icache_req_if),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
|
||||
// Memory
|
||||
.mem_req_if (mem_req_if),
|
||||
.mem_rsp_if (mem_rsp_if)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,265 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_data #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_perf_tex_if.slave perf_tex_if,
|
||||
`endif
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if.slave cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if.slave fetch_to_csr_if,
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if,
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if.master tex_csr_if,
|
||||
`endif
|
||||
|
||||
input wire read_enable,
|
||||
input wire [`UUID_BITS-1:0] read_uuid,
|
||||
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
||||
input wire[`NW_BITS-1:0] read_wid,
|
||||
output wire[31:0] read_data,
|
||||
|
||||
input wire write_enable,
|
||||
input wire [`UUID_BITS-1:0] write_uuid,
|
||||
input wire[`CSR_ADDR_BITS-1:0] write_addr,
|
||||
input wire[`NW_BITS-1:0] write_wid,
|
||||
input wire[31:0] write_data,
|
||||
|
||||
input wire busy
|
||||
);
|
||||
import fpu_types::*;
|
||||
|
||||
reg [`CSR_WIDTH-1:0] csr_satp;
|
||||
reg [`CSR_WIDTH-1:0] csr_mstatus;
|
||||
reg [`CSR_WIDTH-1:0] csr_medeleg;
|
||||
reg [`CSR_WIDTH-1:0] csr_mideleg;
|
||||
reg [`CSR_WIDTH-1:0] csr_mie;
|
||||
reg [`CSR_WIDTH-1:0] csr_mtvec;
|
||||
reg [`CSR_WIDTH-1:0] csr_mepc;
|
||||
reg [`CSR_WIDTH-1:0] csr_pmpcfg [0:0];
|
||||
reg [`CSR_WIDTH-1:0] csr_pmpaddr [0:0];
|
||||
reg [63:0] csr_cycle;
|
||||
reg [63:0] csr_instret;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
fcsr <= '0;
|
||||
end else begin
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_to_csr_if.write_enable) begin
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
end
|
||||
`endif
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
|
||||
`CSR_FRM: fcsr[write_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS] <= write_data[`INST_FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFLAGS_BITS+`INST_FRM_BITS-1:0];
|
||||
`CSR_SATP: csr_satp <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIE: csr_mie <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MTVEC: csr_mtvec <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEPC: csr_mepc <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
default: begin
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`ASSERT((write_addr == `CSR_TEX_UNIT)
|
||||
|| (write_addr >= `CSR_TEX_STATE_BEGIN
|
||||
&& write_addr < `CSR_TEX_STATE_END),
|
||||
("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
`else
|
||||
`ASSERT(~write_enable, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`UNUSED_VAR (write_data)
|
||||
|
||||
// TEX CSRs
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
assign tex_csr_if.write_enable = write_enable;
|
||||
assign tex_csr_if.write_addr = write_addr;
|
||||
assign tex_csr_if.write_data = write_data;
|
||||
assign tex_csr_if.write_uuid = write_uuid;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
csr_cycle <= 0;
|
||||
csr_instret <= 0;
|
||||
end else begin
|
||||
if (busy) begin
|
||||
csr_cycle <= csr_cycle + 1;
|
||||
end
|
||||
if (cmt_to_csr_if.valid) begin
|
||||
csr_instret <= csr_instret + 64'(cmt_to_csr_if.commit_size);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [31:0] read_data_r;
|
||||
reg read_addr_valid_r;
|
||||
|
||||
always @(*) begin
|
||||
read_data_r = 'x;
|
||||
read_addr_valid_r = 1;
|
||||
case (read_addr)
|
||||
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFLAGS_BITS-1:0]);
|
||||
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS]);
|
||||
`CSR_FCSR : read_data_r = 32'(fcsr[read_wid]);
|
||||
|
||||
`CSR_WTID ,
|
||||
`CSR_LTID ,
|
||||
`CSR_LWID : read_data_r = 32'(read_wid);
|
||||
`CSR_GTID ,
|
||||
/*`CSR_MHARTID ,*/
|
||||
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
|
||||
`CSR_GCID : read_data_r = CORE_ID;
|
||||
|
||||
`CSR_TMASK : read_data_r = 32'(fetch_to_csr_if.thread_masks[read_wid]);
|
||||
|
||||
`CSR_NT : read_data_r = `NUM_THREADS;
|
||||
`CSR_NW : read_data_r = `NUM_WARPS;
|
||||
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
|
||||
|
||||
`CSR_MCYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_MCYCLE_H : read_data_r = 32'(csr_cycle[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MINSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_MINSTRET_H : read_data_r = 32'(csr_instret[`PERF_CTR_BITS-1:32]);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibf_stalls[31:0];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scb_stalls[31:0];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`else
|
||||
`CSR_MPM_FPU_ST : read_data_r = '0;
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = '0;
|
||||
`endif
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: decode
|
||||
`CSR_MPM_LOADS : read_data_r = perf_pipeline_if.loads[31:0];
|
||||
`CSR_MPM_LOADS_H : read_data_r = 32'(perf_pipeline_if.loads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_STORES : read_data_r = perf_pipeline_if.stores[31:0];
|
||||
`CSR_MPM_STORES_H : read_data_r = 32'(perf_pipeline_if.stores[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_BRANCHES : read_data_r = perf_pipeline_if.branches[31:0];
|
||||
`CSR_MPM_BRANCHES_H : read_data_r = 32'(perf_pipeline_if.branches[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: icache
|
||||
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_BANK_ST : read_data_r = perf_memsys_if.dcache_bank_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: smem
|
||||
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: memory
|
||||
`CSR_MPM_MEM_READS : read_data_r = perf_memsys_if.mem_reads[31:0];
|
||||
`CSR_MPM_MEM_READS_H : read_data_r = 32'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_WRITES : read_data_r = perf_memsys_if.mem_writes[31:0];
|
||||
`CSR_MPM_MEM_WRITES_H : read_data_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_LAT : read_data_r = perf_memsys_if.mem_latency[31:0];
|
||||
`CSR_MPM_MEM_LAT_H : read_data_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
// PERF: texunit
|
||||
`CSR_MPM_TEX_READS : read_data_r = perf_tex_if.mem_reads[31:0];
|
||||
`CSR_MPM_TEX_READS_H : read_data_r = 32'(perf_tex_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TEX_LAT : read_data_r = perf_tex_if.mem_latency[31:0];
|
||||
`CSR_MPM_TEX_LAT_H : read_data_r = 32'(perf_tex_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`endif
|
||||
// PERF: reserved
|
||||
`CSR_MPM_RESERVED : read_data_r = '0;
|
||||
`CSR_MPM_RESERVED_H : read_data_r = '0;
|
||||
`endif
|
||||
|
||||
`CSR_SATP : read_data_r = 32'(csr_satp);
|
||||
|
||||
`CSR_MSTATUS : read_data_r = 32'(csr_mstatus);
|
||||
`CSR_MISA : read_data_r = `ISA_CODE;
|
||||
`CSR_MEDELEG : read_data_r = 32'(csr_medeleg);
|
||||
`CSR_MIDELEG : read_data_r = 32'(csr_mideleg);
|
||||
`CSR_MIE : read_data_r = 32'(csr_mie);
|
||||
`CSR_MTVEC : read_data_r = 32'(csr_mtvec);
|
||||
|
||||
`CSR_MEPC : read_data_r = 32'(csr_mepc);
|
||||
|
||||
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
|
||||
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
|
||||
|
||||
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
|
||||
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
|
||||
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
|
||||
|
||||
default: begin
|
||||
if ((read_addr >= `CSR_MPM_BASE && read_addr < (`CSR_MPM_BASE + 32))
|
||||
|| (read_addr >= `CSR_MPM_BASE_H && read_addr < (`CSR_MPM_BASE_H + 32))) begin
|
||||
read_addr_valid_r = 1;
|
||||
end else
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
if ((read_addr == `CSR_TEX_UNIT)
|
||||
|| (read_addr >= `CSR_TEX_STATE_BEGIN
|
||||
&& read_addr < `CSR_TEX_STATE_END)) begin
|
||||
read_addr_valid_r = 1;
|
||||
end else
|
||||
`endif
|
||||
read_addr_valid_r = 0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: %0h (#%0d)", $time, read_addr, read_uuid))
|
||||
|
||||
assign read_data = read_data_r;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS];
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,151 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_perf_tex_if.slave perf_tex_if,
|
||||
`endif
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if.slave cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if.slave fetch_to_csr_if,
|
||||
VX_csr_req_if.slave csr_req_if,
|
||||
VX_commit_if.master csr_commit_if,
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if,
|
||||
input wire[`NUM_WARPS-1:0] fpu_pending,
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if.master tex_csr_if,
|
||||
`endif
|
||||
|
||||
output wire[`NUM_WARPS-1:0] pending,
|
||||
input wire busy
|
||||
);
|
||||
wire csr_we_s1;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||
wire [31:0] csr_read_data;
|
||||
wire [31:0] csr_read_data_s1;
|
||||
wire [31:0] csr_updated_data_s1;
|
||||
|
||||
wire write_enable = csr_commit_if.valid && csr_we_s1;
|
||||
|
||||
wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.imm) : csr_req_if.rs1_data;
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.perf_tex_if (perf_tex_if),
|
||||
`endif
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if(perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
`endif
|
||||
.read_enable (csr_req_if.valid),
|
||||
.read_uuid (csr_req_if.uuid),
|
||||
.read_addr (csr_req_if.addr),
|
||||
.read_wid (csr_req_if.wid),
|
||||
.read_data (csr_read_data),
|
||||
.write_enable (write_enable),
|
||||
.write_uuid (csr_commit_if.uuid),
|
||||
.write_addr (csr_addr_s1),
|
||||
.write_wid (csr_commit_if.wid),
|
||||
.write_data (csr_updated_data_s1),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
wire write_hazard = (csr_addr_s1 == csr_req_if.addr)
|
||||
&& (csr_commit_if.wid == csr_req_if.wid)
|
||||
&& csr_commit_if.valid;
|
||||
|
||||
wire [31:0] csr_read_data_qual = write_hazard ? csr_updated_data_s1 : csr_read_data;
|
||||
|
||||
reg [31:0] csr_updated_data;
|
||||
reg csr_we_s0_unqual;
|
||||
|
||||
always @(*) begin
|
||||
csr_we_s0_unqual = (csr_req_data != 0);
|
||||
case (csr_req_if.op_type)
|
||||
`INST_CSR_RW: begin
|
||||
csr_updated_data = csr_req_data;
|
||||
csr_we_s0_unqual = 1;
|
||||
end
|
||||
`INST_CSR_RS: begin
|
||||
csr_updated_data = csr_read_data_qual | csr_req_data;
|
||||
end
|
||||
//`INST_CSR_RC
|
||||
default: begin
|
||||
csr_updated_data = csr_read_data_qual & ~csr_req_data;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire stall_in = fpu_pending[csr_req_if.wid];
|
||||
`else
|
||||
wire stall_in = 0;
|
||||
`endif
|
||||
|
||||
wire csr_req_valid = csr_req_if.valid && !stall_in;
|
||||
|
||||
wire stall_out = ~csr_commit_if.ready && csr_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({csr_req_valid, csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_we_s0_unqual, csr_req_if.addr, csr_read_data_qual, csr_updated_data}),
|
||||
.data_out ({csr_commit_if.valid, csr_commit_if.uuid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_we_s1, csr_addr_s1, csr_read_data_s1, csr_updated_data_s1})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign csr_commit_if.data[i] = (csr_addr_s1 == `CSR_WTID) ? i :
|
||||
(csr_addr_s1 == `CSR_LTID
|
||||
|| csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
|
||||
csr_read_data_s1;
|
||||
end
|
||||
|
||||
assign csr_commit_if.eop = 1'b1;
|
||||
|
||||
// can accept new request?
|
||||
assign csr_req_if.ready = ~(stall_out || stall_in);
|
||||
|
||||
// pending request
|
||||
reg [`NUM_WARPS-1:0] pending_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pending_r <= 0;
|
||||
end else begin
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
pending_r[csr_commit_if.wid] <= 0;
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
pending_r[csr_req_if.wid] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign pending = pending_r;
|
||||
|
||||
endmodule
|
||||
@@ -1,495 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
`include "VX_trace_instr.vh"
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(r) \
|
||||
r``_r = {1'b0, ``r}
|
||||
|
||||
`define USED_FREG(r) \
|
||||
r``_r = {1'b1, ``r}
|
||||
`else
|
||||
`define USED_IREG(r) \
|
||||
r``_r = ``r
|
||||
`endif
|
||||
|
||||
module VX_decode #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if.decode perf_decode_if,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_ifetch_rsp_if.slave ifetch_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_decode_if.master decode_if,
|
||||
VX_wstall_if.master wstall_if,
|
||||
VX_join_if.master join_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
reg [`INST_MOD_BITS-1:0] op_mod;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_PC, use_imm;
|
||||
reg is_join, is_wstall;
|
||||
|
||||
wire [31:0] instr = ifetch_rsp_if.data;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [1:0] func2 = instr[26:25];
|
||||
wire [2:0] func3 = instr[14:12];
|
||||
wire [6:0] func7 = instr[31:25];
|
||||
wire [11:0] u_12 = instr[31:20];
|
||||
|
||||
wire [4:0] rd = instr[11:7];
|
||||
wire [4:0] rs1 = instr[19:15];
|
||||
wire [4:0] rs2 = instr[24:20];
|
||||
wire [4:0] rs3 = instr[31:27];
|
||||
|
||||
wire [19:0] upper_imm = {func7, rs2, rs1, func3};
|
||||
wire [11:0] alu_imm = (func3[0] && ~func3[1]) ? {{7{1'b0}}, rs2} : u_12;
|
||||
wire [11:0] s_imm = {func7, rd};
|
||||
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
|
||||
`UNUSED_VAR (rs3)
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = 0;
|
||||
op_type = 'x;
|
||||
op_mod = 0;
|
||||
rd_r = 0;
|
||||
rs1_r = 0;
|
||||
rs2_r = 0;
|
||||
rs3_r = 0;
|
||||
imm = 'x;
|
||||
use_imm = 0;
|
||||
use_PC = 0;
|
||||
use_rd = 0;
|
||||
is_join = 0;
|
||||
is_wstall = 0;
|
||||
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_ALU_ADD);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
|
||||
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
|
||||
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (func7[0]) begin
|
||||
case (func3)
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_MUL_MUL);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_MUL_MULH);
|
||||
3'h2: op_type = `INST_OP_BITS'(`INST_MUL_MULHSU);
|
||||
3'h3: op_type = `INST_OP_BITS'(`INST_MUL_MULHU);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_MUL_DIV);
|
||||
3'h5: op_type = `INST_OP_BITS'(`INST_MUL_DIVU);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_MUL_REM);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_MUL_REMU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 2;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
case (func3)
|
||||
3'h0: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SUB) : `INST_OP_BITS'(`INST_ALU_ADD);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
|
||||
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
|
||||
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
rs1_r = 0;
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JAL);
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{11{jal_imm[20]}}, jal_imm};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JALR);
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_BR_EQ);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_BR_NE);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_BR_LT);
|
||||
3'h5: op_type = `INST_OP_BITS'(`INST_BR_GE);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_BR_LTU);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_BR_GEU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{19{b_imm[12]}}, b_imm};
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_FENCE: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_mod = `INST_MOD_BITS'(1);
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
ex_type = `EX_CSR;
|
||||
op_type = `INST_OP_BITS'(func3[1:0]);
|
||||
use_rd = 1;
|
||||
use_imm = func3[2];
|
||||
imm[`CSR_ADDR_BITS-1:0] = u_12; // addr
|
||||
`USED_IREG (rd);
|
||||
if (func3[2]) begin
|
||||
imm[`CSR_ADDR_BITS +: `NRI_BITS] = rs1; // imm
|
||||
end else begin
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
end else begin
|
||||
ex_type = `EX_ALU;
|
||||
case (u_12)
|
||||
12'h000: op_type = `INST_OP_BITS'(`INST_BR_ECALL);
|
||||
12'h001: op_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h002: op_type = `INST_OP_BITS'(`INST_BR_URET);
|
||||
12'h102: op_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h302: op_type = `INST_OP_BITS'(`INST_BR_MRET);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = 32'd4;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FL,
|
||||
`endif
|
||||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b0, func3});
|
||||
use_rd = 1;
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rd);
|
||||
end else
|
||||
`endif
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FS,
|
||||
`endif
|
||||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b1, func3});
|
||||
imm = {{20{s_imm[11]}}, s_imm};
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rs2);
|
||||
end else
|
||||
`endif
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FMADD,
|
||||
`INST_FMSUB,
|
||||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `INST_OP_BITS'(opcode[3:0]);
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
`USED_FREG (rs3);
|
||||
end
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
case (func7)
|
||||
7'h00, // FADD
|
||||
7'h04, // FSUB
|
||||
7'h08, // FMUL
|
||||
7'h0C: begin // FDIV
|
||||
op_type = `INST_OP_BITS'(func7[3:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h2C: begin
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h50: begin
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h60: begin
|
||||
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTWUS) : `INST_OP_BITS'(`INST_FPU_CVTWS);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h68: begin
|
||||
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTSWU) : `INST_OP_BITS'(`INST_FPU_CVTSW);
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
7'h10: begin
|
||||
// FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = {1'b0, func3[1:0]};
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h14: begin
|
||||
// FMIN=3, FMAX=4
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = func3[0] ? 4 : 3;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h70: begin
|
||||
if (func3[0]) begin
|
||||
// FCLASS
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CLASS);
|
||||
end else begin
|
||||
// FMV.X.W=5
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 5;
|
||||
end
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h78: begin
|
||||
// FMV.W.X=6
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 6;
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
`INST_GPGPU: begin
|
||||
ex_type = `EX_GPU;
|
||||
case (func3)
|
||||
3'h0: begin
|
||||
op_type = rs2[0] ? `INST_OP_BITS'(`INST_GPU_PRED) : `INST_OP_BITS'(`INST_GPU_TMC);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h1: begin
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_WSPAWN);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
3'h2: begin
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_SPLIT);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h3: begin
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_JOIN);
|
||||
is_join = 1;
|
||||
end
|
||||
3'h4: begin
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_BAR);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
3'h5: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'(`INST_LSU_LW);
|
||||
op_mod = `INST_MOD_BITS'(2);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_GPU: begin
|
||||
case (func3)
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
3'h0: begin
|
||||
ex_type = `EX_GPU;
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_TEX);
|
||||
op_mod = `INST_MOD_BITS'(func2);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
`USED_IREG (rs3);
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
`UNUSED_VAR (func2)
|
||||
|
||||
// disable write to integer register r0
|
||||
wire wb = use_rd && (| rd_r);
|
||||
|
||||
assign decode_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_if.uuid = ifetch_rsp_if.uuid;
|
||||
assign decode_if.wid = ifetch_rsp_if.wid;
|
||||
assign decode_if.tmask = ifetch_rsp_if.tmask;
|
||||
assign decode_if.PC = ifetch_rsp_if.PC;
|
||||
assign decode_if.ex_type = ex_type;
|
||||
assign decode_if.op_type = op_type;
|
||||
assign decode_if.op_mod = op_mod;
|
||||
assign decode_if.wb = wb;
|
||||
assign decode_if.rd = rd_r;
|
||||
assign decode_if.rs1 = rs1_r;
|
||||
assign decode_if.rs2 = rs2_r;
|
||||
assign decode_if.rs3 = rs3_r;
|
||||
assign decode_if.imm = imm;
|
||||
assign decode_if.use_PC = use_PC;
|
||||
assign decode_if.use_imm = use_imm;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
|
||||
|
||||
assign join_if.valid = ifetch_rsp_fire && is_join;
|
||||
assign join_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
assign wstall_if.valid = ifetch_rsp_fire;
|
||||
assign wstall_if.wid = ifetch_rsp_if.wid;
|
||||
assign wstall_if.stalled = is_wstall;
|
||||
|
||||
assign ifetch_rsp_if.ready = decode_if.ready;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] perf_loads_per_cycle;
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] perf_stores_per_cycle;
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] perf_branches_per_cycle;
|
||||
|
||||
wire [`NUM_THREADS-1:0] perf_loads_per_mask = decode_if.tmask & {`NUM_THREADS{decode_if.ex_type == `EX_LSU && `INST_LSU_IS_MEM(decode_if.op_mod) && decode_if.wb}};
|
||||
wire [`NUM_THREADS-1:0] perf_stores_per_mask = decode_if.tmask & {`NUM_THREADS{decode_if.ex_type == `EX_LSU && `INST_LSU_IS_MEM(decode_if.op_mod) && ~decode_if.wb}};
|
||||
wire [`NUM_THREADS-1:0] perf_branches_per_mask = decode_if.tmask & {`NUM_THREADS{decode_if.ex_type == `EX_ALU && `INST_ALU_IS_BR(decode_if.op_mod)}};
|
||||
|
||||
`POP_COUNT(perf_loads_per_cycle, perf_loads_per_mask);
|
||||
`POP_COUNT(perf_stores_per_cycle, perf_stores_per_mask);
|
||||
`POP_COUNT(perf_branches_per_cycle, perf_branches_per_mask);
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_loads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_stores;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_branches;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_loads <= 0;
|
||||
perf_stores <= 0;
|
||||
perf_branches <= 0;
|
||||
end else begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
perf_loads <= perf_loads + `PERF_CTR_BITS'(perf_loads_per_cycle);
|
||||
perf_stores <= perf_stores + `PERF_CTR_BITS'(perf_stores_per_cycle);
|
||||
perf_branches <= perf_branches + `PERF_CTR_BITS'(perf_branches_per_cycle);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_decode_if.loads = perf_loads;
|
||||
assign perf_decode_if.stores = perf_stores;
|
||||
assign perf_decode_if.branches = perf_branches;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
dpi_trace("%d: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.PC);
|
||||
trace_ex_type(decode_if.ex_type);
|
||||
dpi_trace(", op=");
|
||||
trace_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
|
||||
dpi_trace(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b (#%0d)\n",
|
||||
decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.use_PC, decode_if.use_imm, decode_if.uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,416 +1,425 @@
|
||||
`ifndef VX_DEFINE
|
||||
`define VX_DEFINE
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`include "VX_config.vh"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
||||
|
||||
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
||||
|
||||
`define NC_BITS `LOG2UP(`NUM_CORES)
|
||||
|
||||
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
|
||||
|
||||
`define NUM_IREGS 32
|
||||
|
||||
`define NRI_BITS `LOG2UP(`NUM_IREGS)
|
||||
|
||||
`define NTEX_BITS `LOG2UP(`NUM_TEX_UNITS)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define NUM_REGS (2 * `NUM_IREGS)
|
||||
`else
|
||||
`define NUM_REGS `NUM_IREGS
|
||||
`endif
|
||||
|
||||
`define NR_BITS `LOG2UP(`NUM_REGS)
|
||||
|
||||
`define CSR_ADDR_BITS 12
|
||||
|
||||
`define CSR_WIDTH 12
|
||||
|
||||
`define PERF_CTR_BITS 44
|
||||
|
||||
`define UUID_BITS 44
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_FPU 3'h4
|
||||
`define EX_GPU 3'h5
|
||||
`define EX_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_LUI 7'b0110111
|
||||
`define INST_AUIPC 7'b0010111
|
||||
`define INST_JAL 7'b1101111
|
||||
`define INST_JALR 7'b1100111
|
||||
`define INST_B 7'b1100011 // branch instructions
|
||||
`define INST_L 7'b0000011 // load instructions
|
||||
`define INST_S 7'b0100011 // store instructions
|
||||
`define INST_I 7'b0010011 // immediate instructions
|
||||
`define INST_R 7'b0110011 // register instructions
|
||||
`define INST_FENCE 7'b0001111 // Fence instructions
|
||||
`define INST_SYS 7'b1110011 // system instructions
|
||||
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
|
||||
`define INST_GPGPU 7'b1101011
|
||||
`define INST_GPU 7'b1011011
|
||||
|
||||
`define INST_TEX 7'b0101011
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_FRM_RNE 3'b000 // round to nearest even
|
||||
`define INST_FRM_RTZ 3'b001 // round to zero
|
||||
`define INST_FRM_RDN 3'b010 // round to -inf
|
||||
`define INST_FRM_RUP 3'b011 // round to +inf
|
||||
`define INST_FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define INST_FRM_DYN 3'b111 // dynamic mode
|
||||
`define INST_FRM_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_OP_BITS 4
|
||||
`define INST_MOD_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_ALU_ADD 4'b0000
|
||||
`define INST_ALU_LUI 4'b0010
|
||||
`define INST_ALU_AUIPC 4'b0011
|
||||
`define INST_ALU_SLTU 4'b0100
|
||||
`define INST_ALU_SLT 4'b0101
|
||||
`define INST_ALU_SRL 4'b1000
|
||||
`define INST_ALU_SRA 4'b1001
|
||||
`define INST_ALU_SUB 4'b1011
|
||||
`define INST_ALU_AND 4'b1100
|
||||
`define INST_ALU_OR 4'b1101
|
||||
`define INST_ALU_XOR 4'b1110
|
||||
`define INST_ALU_SLL 4'b1111
|
||||
`define INST_ALU_OTHER 4'b0111
|
||||
`define INST_ALU_BITS 4
|
||||
`define INST_ALU_OP(x) x[`INST_ALU_BITS-1:0]
|
||||
`define INST_ALU_OP_CLASS(x) x[3:2]
|
||||
`define INST_ALU_SIGNED(x) x[0]
|
||||
`define INST_ALU_IS_BR(x) x[0]
|
||||
`define INST_ALU_IS_MUL(x) x[1]
|
||||
|
||||
`define INST_BR_EQ 4'b0000
|
||||
`define INST_BR_NE 4'b0010
|
||||
`define INST_BR_LTU 4'b0100
|
||||
`define INST_BR_GEU 4'b0110
|
||||
`define INST_BR_LT 4'b0101
|
||||
`define INST_BR_GE 4'b0111
|
||||
`define INST_BR_JAL 4'b1000
|
||||
`define INST_BR_JALR 4'b1001
|
||||
`define INST_BR_ECALL 4'b1010
|
||||
`define INST_BR_EBREAK 4'b1011
|
||||
`define INST_BR_URET 4'b1100
|
||||
`define INST_BR_SRET 4'b1101
|
||||
`define INST_BR_MRET 4'b1110
|
||||
`define INST_BR_OTHER 4'b1111
|
||||
`define INST_BR_BITS 4
|
||||
`define INST_BR_NEG(x) x[1]
|
||||
`define INST_BR_LESS(x) x[2]
|
||||
`define INST_BR_STATIC(x) x[3]
|
||||
|
||||
`define INST_MUL_MUL 3'h0
|
||||
`define INST_MUL_MULH 3'h1
|
||||
`define INST_MUL_MULHSU 3'h2
|
||||
`define INST_MUL_MULHU 3'h3
|
||||
`define INST_MUL_DIV 3'h4
|
||||
`define INST_MUL_DIVU 3'h5
|
||||
`define INST_MUL_REM 3'h6
|
||||
`define INST_MUL_REMU 3'h7
|
||||
`define INST_MUL_BITS 3
|
||||
`define INST_MUL_IS_DIV(x) x[2]
|
||||
|
||||
`define INST_FMT_B 3'b000
|
||||
`define INST_FMT_H 3'b001
|
||||
`define INST_FMT_W 3'b010
|
||||
`define INST_FMT_BU 3'b100
|
||||
`define INST_FMT_HU 3'b101
|
||||
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LH 4'b0001
|
||||
`define INST_LSU_LW 4'b0010
|
||||
`define INST_LSU_LBU 4'b0100
|
||||
`define INST_LSU_LHU 4'b0101
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SH 4'b1001
|
||||
`define INST_LSU_SW 4'b1010
|
||||
`define INST_LSU_BITS 4
|
||||
`define INST_LSU_FMT(x) x[2:0]
|
||||
`define INST_LSU_WSIZE(x) x[1:0]
|
||||
`define INST_LSU_IS_MEM(x) (3'h0 == x)
|
||||
`define INST_LSU_IS_FENCE(x) (3'h1 == x)
|
||||
`define INST_LSU_IS_PREFETCH(x) (3'h2 == x)
|
||||
|
||||
`define INST_FENCE_BITS 1
|
||||
`define INST_FENCE_D 1'h0
|
||||
`define INST_FENCE_I 1'h1
|
||||
|
||||
`define INST_CSR_RW 2'h1
|
||||
`define INST_CSR_RS 2'h2
|
||||
`define INST_CSR_RC 2'h3
|
||||
`define INST_CSR_OTHER 2'h0
|
||||
`define INST_CSR_BITS 2
|
||||
|
||||
`define INST_FPU_ADD 4'h0
|
||||
`define INST_FPU_SUB 4'h4
|
||||
`define INST_FPU_MUL 4'h8
|
||||
`define INST_FPU_DIV 4'hC
|
||||
`define INST_FPU_CVTWS 4'h1 // FCVT.W.S
|
||||
`define INST_FPU_CVTWUS 4'h5 // FCVT.WU.S
|
||||
`define INST_FPU_CVTSW 4'h9 // FCVT.S.W
|
||||
`define INST_FPU_CVTSWU 4'hD // FCVT.S.WU
|
||||
`define INST_FPU_SQRT 4'h2
|
||||
`define INST_FPU_CLASS 4'h6
|
||||
`define INST_FPU_CMP 4'hA
|
||||
`define INST_FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define INST_FPU_MADD 4'h3
|
||||
`define INST_FPU_MSUB 4'h7
|
||||
`define INST_FPU_NMSUB 4'hB
|
||||
`define INST_FPU_NMADD 4'hF
|
||||
`define INST_FPU_BITS 4
|
||||
|
||||
`define INST_GPU_TMC 4'h0
|
||||
`define INST_GPU_WSPAWN 4'h1
|
||||
`define INST_GPU_SPLIT 4'h2
|
||||
`define INST_GPU_JOIN 4'h3
|
||||
`define INST_GPU_BAR 4'h4
|
||||
`define INST_GPU_PRED 4'h5
|
||||
`define INST_GPU_TEX 4'h6
|
||||
`define INST_GPU_BITS 4
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
`define ISA_EXT_M (1 << 12)
|
||||
`else
|
||||
`define ISA_EXT_M 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define ISA_EXT_F (1 << 5)
|
||||
`else
|
||||
`define ISA_EXT_F 0
|
||||
`endif
|
||||
|
||||
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
|
||||
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
|
||||
| (0 << 2) // C - Compressed extension \
|
||||
| (0 << 3) // D - Double precsision floating-point extension \
|
||||
| (0 << 4) // E - RV32E base ISA \
|
||||
|`ISA_EXT_F // F - Single precsision floating-point extension \
|
||||
| (0 << 6) // G - Additional standard extensions present \
|
||||
| (0 << 7) // H - Hypervisor mode implemented \
|
||||
| (1 << 8) // I - RV32I/64I/128I base ISA \
|
||||
| (0 << 9) // J - Reserved \
|
||||
| (0 << 10) // K - Reserved \
|
||||
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|
||||
|`ISA_EXT_M // M - Integer Multiply/Divide extension \
|
||||
| (0 << 13) // N - User level interrupts supported \
|
||||
| (0 << 14) // O - Reserved \
|
||||
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
|
||||
| (0 << 16) // Q - Quad-precision floating-point extension \
|
||||
| (0 << 17) // R - Reserved \
|
||||
| (0 << 18) // S - Supervisor mode implemented \
|
||||
| (0 << 19) // T - Tentatively reserved for Transactional Memory extension \
|
||||
| (1 << 20) // U - User mode implemented \
|
||||
| (0 << 21) // V - Tentatively reserved for Vector extension \
|
||||
| (0 << 22) // W - Reserved \
|
||||
| (1 << 23) // X - Non-standard extensions present \
|
||||
| (0 << 24) // Y - Reserved \
|
||||
| (0 << 25) // Z - Reserved
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// non-cacheable tag bits
|
||||
`define NC_TAG_BIT 1
|
||||
|
||||
// texture tag bits
|
||||
`define TEX_TAG_BIT 1
|
||||
|
||||
// cache address type bits
|
||||
`define CACHE_ADDR_TYPE_BITS (`NC_TAG_BIT + `SM_ENABLE)
|
||||
|
||||
////////////////////////// Icache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
|
||||
// Word size in bytes
|
||||
`define ICACHE_WORD_SIZE 4
|
||||
|
||||
// Block size in bytes
|
||||
`define ICACHE_LINE_SIZE `L1_BLOCK_SIZE
|
||||
|
||||
// TAG sharing enable
|
||||
`define ICACHE_CORE_TAG_ID_BITS `NW_BITS
|
||||
|
||||
// Core request tag bits
|
||||
`define ICACHE_CORE_TAG_WIDTH (`UUID_BITS + `ICACHE_CORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define ICACHE_MEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define ICACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
|
||||
|
||||
// Memory request tag bits
|
||||
`define ICACHE_MEM_TAG_WIDTH `CLOG2(`ICACHE_MSHR_SIZE)
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
|
||||
// Word size in bytes
|
||||
`define DCACHE_WORD_SIZE 4
|
||||
|
||||
// Block size in bytes
|
||||
`define DCACHE_LINE_SIZE `L1_BLOCK_SIZE
|
||||
|
||||
// Core request tag bits
|
||||
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`define LSU_TAG_ID_BITS `MAX(`LSUQ_ADDR_BITS, 2)
|
||||
`define LSU_TEX_DCACHE_TAG_BITS (`UUID_BITS + `LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS)
|
||||
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS + `TEX_TAG_BIT)
|
||||
`else
|
||||
`define LSU_TAG_ID_BITS `LSUQ_ADDR_BITS
|
||||
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS)
|
||||
`endif
|
||||
`define DCACHE_CORE_TAG_WIDTH (`UUID_BITS + `DCACHE_CORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define DCACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define DCACHE_MEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define DCACHE_NUM_REQS `NUM_THREADS
|
||||
|
||||
// Memory request tag bits
|
||||
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DCACHE_WORD_SIZE)
|
||||
`define _DNC_MEM_TAG_WIDTH ($clog2(`DCACHE_NUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCACHE_CORE_TAG_WIDTH)
|
||||
`define DCACHE_MEM_TAG_WIDTH `MAX((`CLOG2(`DCACHE_NUM_BANKS) + `CLOG2(`DCACHE_MSHR_SIZE) + `NC_TAG_BIT), `_DNC_MEM_TAG_WIDTH)
|
||||
|
||||
// Merged D-cache/I-cache memory tag
|
||||
`define L1_MEM_TAG_WIDTH (`MAX(`ICACHE_MEM_TAG_WIDTH, `DCACHE_MEM_TAG_WIDTH) + `CLOG2(2))
|
||||
|
||||
////////////////////////// SM Configurable Knobs //////////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define SMEM_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
|
||||
|
||||
// Word size in bytes
|
||||
`define SMEM_WORD_SIZE 4
|
||||
|
||||
// bank address offset
|
||||
`define SMEM_BANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SMEM_WORD_SIZE)
|
||||
|
||||
// Input request size
|
||||
`define SMEM_NUM_REQS `NUM_THREADS
|
||||
|
||||
////////////////////////// L2cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L2_CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
|
||||
|
||||
// Word size in bytes
|
||||
`define L2_WORD_SIZE `DCACHE_LINE_SIZE
|
||||
|
||||
// Block size in bytes
|
||||
`define L2_CACHE_LINE_SIZE ((`L2_ENABLE) ? `MEM_BLOCK_SIZE : `L2_WORD_SIZE)
|
||||
|
||||
// Input request tag bits
|
||||
`define L2_CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
|
||||
// Memory request data bits
|
||||
`define L2_MEM_DATA_WIDTH (`L2_CACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define L2_MEM_ADDR_WIDTH (32 - `CLOG2(`L2_CACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define L2_MEM_BYTEEN_WIDTH `L2_CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L2_NUM_REQS `NUM_CORES
|
||||
|
||||
// Memory request tag bits
|
||||
`define _L2_MEM_ADDR_RATIO_W $clog2(`L2_CACHE_LINE_SIZE / `L2_WORD_SIZE)
|
||||
`define _L2_NC_MEM_TAG_WIDTH ($clog2(`L2_NUM_REQS) + `_L2_MEM_ADDR_RATIO_W + `L1_MEM_TAG_WIDTH)
|
||||
`define _L2_MEM_TAG_WIDTH `MAX((`CLOG2(`L2_NUM_BANKS) + `CLOG2(`L2_MSHR_SIZE) + `NC_TAG_BIT), `_L2_NC_MEM_TAG_WIDTH)
|
||||
`define L2_MEM_TAG_WIDTH ((`L2_ENABLE) ? `_L2_MEM_TAG_WIDTH : (`L1_MEM_TAG_WIDTH + `CLOG2(`L2_NUM_REQS)))
|
||||
|
||||
////////////////////////// L3cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L3_CACHE_ID 0
|
||||
|
||||
// Word size in bytes
|
||||
`define L3_WORD_SIZE `L2_CACHE_LINE_SIZE
|
||||
|
||||
// Block size in bytes
|
||||
`define L3_CACHE_LINE_SIZE ((`L3_ENABLE) ? `MEM_BLOCK_SIZE : `L3_WORD_SIZE)
|
||||
|
||||
// Input request tag bits
|
||||
`define L3_CORE_TAG_WIDTH (`L2_CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
|
||||
// Memory request data bits
|
||||
`define L3_MEM_DATA_WIDTH (`L3_CACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define L3_MEM_ADDR_WIDTH (32 - `CLOG2(`L3_CACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define L3_MEM_BYTEEN_WIDTH `L3_CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L3_NUM_REQS `NUM_CLUSTERS
|
||||
|
||||
// Memory request tag bits
|
||||
`define _L3_MEM_ADDR_RATIO_W $clog2(`L3_CACHE_LINE_SIZE / `L3_WORD_SIZE)
|
||||
`define _L3_NC_MEM_TAG_WIDTH ($clog2(`L3_NUM_REQS) + `_L3_MEM_ADDR_RATIO_W + `L2_MEM_TAG_WIDTH)
|
||||
`define _L3_MEM_TAG_WIDTH `MAX((`CLOG2(`L3_NUM_BANKS) + `CLOG2(`L3_MSHR_SIZE) + `NC_TAG_BIT), `_L3_NC_MEM_TAG_WIDTH)
|
||||
`define L3_MEM_TAG_WIDTH ((`L3_ENABLE) ? `_L3_MEM_TAG_WIDTH : (`L2_MEM_TAG_WIDTH + `CLOG2(`L3_NUM_REQS)))
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_MEM_BYTEEN_WIDTH
|
||||
`define VX_MEM_ADDR_WIDTH `L3_MEM_ADDR_WIDTH
|
||||
`define VX_MEM_DATA_WIDTH `L3_MEM_DATA_WIDTH
|
||||
`define VX_MEM_TAG_WIDTH `L3_MEM_TAG_WIDTH
|
||||
`define VX_CORE_TAG_WIDTH `L3_CORE_TAG_WIDTH
|
||||
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "VX_fpu_types.vh"
|
||||
`include "VX_gpu_types.vh"
|
||||
|
||||
`endif
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_DEFINE_VH
|
||||
`define VX_DEFINE_VH
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`include "VX_config.vh"
|
||||
`include "VX_types.vh"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NW_BITS `CLOG2(`NUM_WARPS)
|
||||
`define NC_WIDTH `UP(`NC_BITS)
|
||||
|
||||
`define NT_BITS `CLOG2(`NUM_THREADS)
|
||||
`define NW_WIDTH `UP(`NW_BITS)
|
||||
|
||||
`define NC_BITS `CLOG2(`NUM_CORES)
|
||||
`define NT_WIDTH `UP(`NT_BITS)
|
||||
|
||||
`define NB_BITS `CLOG2(`NUM_BARRIERS)
|
||||
`define NB_WIDTH `UP(`NB_BITS)
|
||||
|
||||
`define NUM_IREGS 32
|
||||
|
||||
`define NRI_BITS `CLOG2(`NUM_IREGS)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define NUM_REGS (2 * `NUM_IREGS)
|
||||
`else
|
||||
`define NUM_REGS `NUM_IREGS
|
||||
`endif
|
||||
|
||||
`define NR_BITS `CLOG2(`NUM_REGS)
|
||||
|
||||
`define PERF_CTR_BITS 44
|
||||
|
||||
`ifndef NDEBUG
|
||||
`define UUID_WIDTH 44
|
||||
`else
|
||||
`define UUID_WIDTH 1
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_ALU 0
|
||||
`define EX_LSU 1
|
||||
`define EX_SFU 2
|
||||
`define EX_FPU 3
|
||||
|
||||
`define NUM_EX_UNITS (3 + `EXT_F_ENABLED)
|
||||
`define EX_BITS `CLOG2(`NUM_EX_UNITS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_LUI 7'b0110111
|
||||
`define INST_AUIPC 7'b0010111
|
||||
`define INST_JAL 7'b1101111
|
||||
`define INST_JALR 7'b1100111
|
||||
`define INST_B 7'b1100011 // branch instructions
|
||||
`define INST_L 7'b0000011 // load instructions
|
||||
`define INST_S 7'b0100011 // store instructions
|
||||
`define INST_I 7'b0010011 // immediate instructions
|
||||
`define INST_R 7'b0110011 // register instructions
|
||||
`define INST_FENCE 7'b0001111 // Fence instructions
|
||||
`define INST_SYS 7'b1110011 // system instructions
|
||||
|
||||
// RV64I instruction specific opcodes (for any W instruction)
|
||||
`define INST_I_W 7'b0011011 // W type immediate instructions
|
||||
`define INST_R_W 7'b0111011 // W type register instructions
|
||||
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
|
||||
// Custom extension opcodes
|
||||
`define INST_EXT1 7'b0001011 // 0x0B
|
||||
`define INST_EXT2 7'b0101011 // 0x2B
|
||||
`define INST_EXT3 7'b1011011 // 0x5B
|
||||
`define INST_EXT4 7'b1111011 // 0x7B
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_FRM_RNE 3'b000 // round to nearest even
|
||||
`define INST_FRM_RTZ 3'b001 // round to zero
|
||||
`define INST_FRM_RDN 3'b010 // round to -inf
|
||||
`define INST_FRM_RUP 3'b011 // round to +inf
|
||||
`define INST_FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define INST_FRM_DYN 3'b111 // dynamic mode
|
||||
`define INST_FRM_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_OP_BITS 4
|
||||
`define INST_MOD_BITS 3
|
||||
`define INST_FMT_BITS 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_ALU_ADD 4'b0000
|
||||
`define INST_ALU_LUI 4'b0010
|
||||
`define INST_ALU_AUIPC 4'b0011
|
||||
`define INST_ALU_SLTU 4'b0100
|
||||
`define INST_ALU_SLT 4'b0101
|
||||
`define INST_ALU_SUB 4'b0111
|
||||
`define INST_ALU_SRL 4'b1000
|
||||
`define INST_ALU_SRA 4'b1001
|
||||
`define INST_ALU_AND 4'b1100
|
||||
`define INST_ALU_OR 4'b1101
|
||||
`define INST_ALU_XOR 4'b1110
|
||||
`define INST_ALU_SLL 4'b1111
|
||||
`define INST_ALU_OTHER 4'b0111
|
||||
`define INST_ALU_BITS 4
|
||||
`define INST_ALU_CLASS(op) op[3:2]
|
||||
`define INST_ALU_SIGNED(op) op[0]
|
||||
`define INST_ALU_IS_SUB(op) op[1]
|
||||
`define INST_ALU_IS_BR(mod) mod[0]
|
||||
`define INST_ALU_IS_M(mod) mod[1]
|
||||
`define INST_ALU_IS_W(mod) mod[2]
|
||||
|
||||
`define INST_BR_EQ 4'b0000
|
||||
`define INST_BR_NE 4'b0010
|
||||
`define INST_BR_LTU 4'b0100
|
||||
`define INST_BR_GEU 4'b0110
|
||||
`define INST_BR_LT 4'b0101
|
||||
`define INST_BR_GE 4'b0111
|
||||
`define INST_BR_JAL 4'b1000
|
||||
`define INST_BR_JALR 4'b1001
|
||||
`define INST_BR_ECALL 4'b1010
|
||||
`define INST_BR_EBREAK 4'b1011
|
||||
`define INST_BR_URET 4'b1100
|
||||
`define INST_BR_SRET 4'b1101
|
||||
`define INST_BR_MRET 4'b1110
|
||||
`define INST_BR_OTHER 4'b1111
|
||||
`define INST_BR_BITS 4
|
||||
`define INST_BR_CLASS(op) {1'b0, ~op[3]}
|
||||
`define INST_BR_IS_NEG(op) op[1]
|
||||
`define INST_BR_IS_LESS(op) op[2]
|
||||
`define INST_BR_IS_STATIC(op) op[3]
|
||||
|
||||
`define INST_M_MUL 3'b000
|
||||
`define INST_M_MULHU 3'b001
|
||||
`define INST_M_MULH 3'b010
|
||||
`define INST_M_MULHSU 3'b011
|
||||
`define INST_M_DIV 3'b100
|
||||
`define INST_M_DIVU 3'b101
|
||||
`define INST_M_REM 3'b110
|
||||
`define INST_M_REMU 3'b111
|
||||
`define INST_M_BITS 3
|
||||
`define INST_M_SIGNED(op) (~op[0])
|
||||
`define INST_M_IS_MULX(op) (~op[2])
|
||||
`define INST_M_IS_MULH(op) (op[1:0] != 0)
|
||||
`define INST_M_SIGNED_A(op) (op[1:0] != 1)
|
||||
`define INST_M_IS_REM(op) op[1]
|
||||
|
||||
`define INST_FMT_B 3'b000
|
||||
`define INST_FMT_H 3'b001
|
||||
`define INST_FMT_W 3'b010
|
||||
`define INST_FMT_D 3'b011
|
||||
`define INST_FMT_BU 3'b100
|
||||
`define INST_FMT_HU 3'b101
|
||||
`define INST_FMT_WU 3'b110
|
||||
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LH 4'b0001
|
||||
`define INST_LSU_LW 4'b0010
|
||||
`define INST_LSU_LD 4'b0011 // new for RV64I LD
|
||||
`define INST_LSU_LBU 4'b0100
|
||||
`define INST_LSU_LHU 4'b0101
|
||||
`define INST_LSU_LWU 4'b0110 // new for RV64I LWU
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SH 4'b1001
|
||||
`define INST_LSU_SW 4'b1010
|
||||
`define INST_LSU_SD 4'b1011 // new for RV64I SD
|
||||
`define INST_LSU_FENCE 4'b1111
|
||||
`define INST_LSU_BITS 4
|
||||
`define INST_LSU_FMT(op) op[2:0]
|
||||
`define INST_LSU_WSIZE(op) op[1:0]
|
||||
`define INST_LSU_IS_FENCE(op) (op[3:2] == 3)
|
||||
|
||||
`define INST_FENCE_BITS 1
|
||||
`define INST_FENCE_D 1'h0
|
||||
`define INST_FENCE_I 1'h1
|
||||
|
||||
`define INST_FPU_ADD 4'b0000
|
||||
`define INST_FPU_SUB 4'b0001
|
||||
`define INST_FPU_MUL 4'b0010
|
||||
`define INST_FPU_DIV 4'b0011
|
||||
`define INST_FPU_SQRT 4'b0100
|
||||
`define INST_FPU_CMP 4'b0101 // mod: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_F2F 4'b0110
|
||||
`define INST_FPU_MISC 4'b0111 // mod: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_F2I 4'b1000
|
||||
`define INST_FPU_F2U 4'b1001
|
||||
`define INST_FPU_I2F 4'b1010
|
||||
`define INST_FPU_U2F 4'b1011
|
||||
`define INST_FPU_MADD 4'b1100
|
||||
`define INST_FPU_MSUB 4'b1101
|
||||
`define INST_FPU_NMSUB 4'b1110
|
||||
`define INST_FPU_NMADD 4'b1111
|
||||
`define INST_FPU_BITS 4
|
||||
`define INST_FPU_IS_W(mod) (mod[4])
|
||||
`define INST_FPU_IS_CLASS(op, mod) (op == `INST_FPU_MISC && mod == 3)
|
||||
`define INST_FPU_IS_MVXW(op, mod) (op == `INST_FPU_MISC && mod == 4)
|
||||
|
||||
`define INST_SFU_TMC 4'h0
|
||||
`define INST_SFU_WSPAWN 4'h1
|
||||
`define INST_SFU_SPLIT 4'h2
|
||||
`define INST_SFU_JOIN 4'h3
|
||||
`define INST_SFU_BAR 4'h4
|
||||
`define INST_SFU_PRED 4'h5
|
||||
`define INST_SFU_CSRRW 4'h6
|
||||
`define INST_SFU_CSRRS 4'h7
|
||||
`define INST_SFU_CSRRC 4'h8
|
||||
`define INST_SFU_TEX 4'h9
|
||||
`define INST_SFU_RASTER 4'hA
|
||||
`define INST_SFU_ROP 4'hB
|
||||
`define INST_SFU_CMOV 4'hC
|
||||
`define INST_SFU_BITS 4
|
||||
`define INST_SFU_CSR(f3) (4'h6 + 4'(f3) - 4'h1)
|
||||
`define INST_SFU_IS_WCTL(op) (op <= 5)
|
||||
`define INST_SFU_IS_CSR(op) (op >= 6 && op <= 8)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// non-cacheable tag bits
|
||||
`define NC_TAG_BITS 1
|
||||
|
||||
// cache address type bits
|
||||
`ifdef SM_ENABLE
|
||||
`define CACHE_ADDR_TYPE_BITS (`NC_TAG_BITS + 1)
|
||||
`else
|
||||
`define CACHE_ADDR_TYPE_BITS `NC_TAG_BITS
|
||||
`endif
|
||||
|
||||
`define ARB_SEL_BITS(I, O) ((I > O) ? `CLOG2((I + O - 1) / O) : 0)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks) \
|
||||
(`CLOG2(mshr_size) + `CLOG2(num_banks) + `NC_TAG_BITS)
|
||||
|
||||
`define CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \
|
||||
(`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width)
|
||||
|
||||
`define CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \
|
||||
(`CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) + `NC_TAG_BITS)
|
||||
|
||||
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width) \
|
||||
`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width))
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(num_inputs, `UP(num_caches)))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(`UP(num_caches), 1))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG((`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG((`CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)) + `NC_TAG_BITS), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_NC_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches))), num_caches)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L2_LINE_SIZE `L1_LINE_SIZE
|
||||
`endif
|
||||
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L3_LINE_SIZE `L2_LINE_SIZE
|
||||
`endif
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
|
||||
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
|
||||
`define VX_MEM_TAG_WIDTH L3_MEM_TAG_WIDTH
|
||||
|
||||
`define VX_DCR_ADDR_WIDTH `VX_DCR_ADDR_BITS
|
||||
`define VX_DCR_DATA_WIDTH 32
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define BUFFER_BUSY(dst, src, enable) \
|
||||
logic __busy; \
|
||||
if (enable) begin \
|
||||
always @(posedge clk) begin \
|
||||
if (reset) begin \
|
||||
__busy <= 1'b0; \
|
||||
end else begin \
|
||||
__busy <= src; \
|
||||
end \
|
||||
end \
|
||||
end else begin \
|
||||
assign __busy = src; \
|
||||
end \
|
||||
assign dst = __busy
|
||||
|
||||
`define POP_COUNT_EX(out, in, model) \
|
||||
VX_popcount #( \
|
||||
.N ($bits(in)), \
|
||||
.MODEL (model) \
|
||||
) __``out ( \
|
||||
.data_in (in), \
|
||||
.data_out (out) \
|
||||
)
|
||||
|
||||
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data = src.req_data; \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data = dst.rsp_data; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_IF_X(dst, src, TD, TS) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data.rw = src.req_data.rw; \
|
||||
assign dst.req_data.byteen = src.req_data.byteen; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.data = src.req_data.data; \
|
||||
if (TD != TS) \
|
||||
assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \
|
||||
else \
|
||||
assign dst.req_data.tag = src.req_data.tag; \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data.data = dst.rsp_data.data; \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag[TD-1 -: TS]; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, enable) \
|
||||
logic [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __``dst; \
|
||||
if (enable) begin \
|
||||
always @(posedge clk) begin \
|
||||
__``dst <= {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
end else begin \
|
||||
assign __``dst = {src.write_valid, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
VX_dcr_bus_if dst(); \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = __``dst
|
||||
|
||||
`define PERF_REDUCE(dst, src, field, width, count) \
|
||||
wire [count-1:0][width-1:0] __reduce_add_i_``src``field; \
|
||||
wire [width-1:0] __reduce_add_o_``dst``field; \
|
||||
reg [width-1:0] __reduce_add_r_``dst``field; \
|
||||
for (genvar __i = 0; __i < count; ++__i) begin \
|
||||
assign __reduce_add_i_``src``field[__i] = ``src[__i].``field; \
|
||||
end \
|
||||
VX_reduce #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_``dst``field ( \
|
||||
__reduce_add_i_``src``field, \
|
||||
__reduce_add_o_``dst``field \
|
||||
); \
|
||||
always @(posedge clk) begin \
|
||||
if (reset) begin \
|
||||
__reduce_add_r_``dst``field <= '0; \
|
||||
end else begin \
|
||||
__reduce_add_r_``dst``field <= __reduce_add_o_``dst``field; \
|
||||
end \
|
||||
end \
|
||||
assign ``dst.``field = __reduce_add_r_``dst``field
|
||||
|
||||
`define PERF_CACHE_ADD(dst, src, count) \
|
||||
`PERF_REDUCE (dst, src, reads, `PERF_CTR_BITS, count); \
|
||||
`PERF_REDUCE (dst, src, writes, `PERF_CTR_BITS, count); \
|
||||
`PERF_REDUCE (dst, src, read_misses, `PERF_CTR_BITS, count); \
|
||||
`PERF_REDUCE (dst, src, write_misses, `PERF_CTR_BITS, count); \
|
||||
`PERF_REDUCE (dst, src, bank_stalls, `PERF_CTR_BITS, count); \
|
||||
`PERF_REDUCE (dst, src, mshr_stalls, `PERF_CTR_BITS, count); \
|
||||
`PERF_REDUCE (dst, src, mem_stalls, `PERF_CTR_BITS, count); \
|
||||
`PERF_REDUCE (dst, src, crsp_stalls, `PERF_CTR_BITS, count)
|
||||
|
||||
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
|
||||
if (block_size != 1) begin \
|
||||
if (block_size != `NUM_WARPS) begin \
|
||||
assign dst = {src[`NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \
|
||||
end else begin \
|
||||
assign dst = `NW_WIDTH'(block_idx); \
|
||||
end \
|
||||
end else begin \
|
||||
assign dst = src; \
|
||||
end
|
||||
|
||||
`define TO_DISPATCH_DATA(data, tid) \
|
||||
{data.uuid, data.wis, data.tmask, data.op_type, data.op_mod, data.wb, data.use_PC, data.use_imm, data.PC, data.imm, data.rd, tid, data.rs1_data, data.rs2_data, data.rs3_data}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`endif // VX_DEFINE_VH
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_dispatch (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_ibuffer_if.slave ibuffer_if,
|
||||
VX_gpr_rsp_if.slave gpr_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if.master alu_req_if,
|
||||
VX_lsu_req_if.master lsu_req_if,
|
||||
VX_csr_req_if.master csr_req_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if.master fpu_req_if,
|
||||
`endif
|
||||
VX_gpu_req_if.master gpu_req_if
|
||||
);
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
wire alu_req_ready;
|
||||
wire lsu_req_ready;
|
||||
wire csr_req_ready;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_ready;
|
||||
`endif
|
||||
wire gpu_req_ready;
|
||||
|
||||
VX_lzc #(
|
||||
.N (`NUM_THREADS)
|
||||
) tid_select (
|
||||
.in_i (ibuffer_if.tmask),
|
||||
.cnt_o (tid),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
wire [31:0] next_PC = ibuffer_if.PC + 4;
|
||||
|
||||
// ALU unit
|
||||
|
||||
wire alu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_ALU);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (alu_req_valid),
|
||||
.ready_in (alu_req_ready),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, alu_op_type, ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.uuid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.valid_out (alu_req_if.valid),
|
||||
.ready_out (alu_req_if.ready)
|
||||
);
|
||||
|
||||
// lsu unit
|
||||
|
||||
wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU);
|
||||
wire [`INST_LSU_BITS-1:0] lsu_op_type = `INST_LSU_BITS'(ibuffer_if.op_type);
|
||||
wire lsu_is_fence = `INST_LSU_IS_FENCE(ibuffer_if.op_mod);
|
||||
wire lsu_is_prefetch = `INST_LSU_IS_PREFETCH(ibuffer_if.op_mod);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32) + 1),
|
||||
.OUT_REG (1)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (lsu_req_valid),
|
||||
.ready_in (lsu_req_ready),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, lsu_op_type, lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, lsu_is_prefetch}),
|
||||
.data_out ({lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data, lsu_req_if.is_prefetch}),
|
||||
.valid_out (lsu_req_if.valid),
|
||||
.ready_out (lsu_req_if.ready)
|
||||
);
|
||||
|
||||
// csr unit
|
||||
|
||||
wire csr_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_CSR);
|
||||
wire [`INST_CSR_BITS-1:0] csr_op_type = `INST_CSR_BITS'(ibuffer_if.op_type);
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr = ibuffer_if.imm[`CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = ibuffer_if.imm[`CSR_ADDR_BITS +: `NRI_BITS];
|
||||
wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32),
|
||||
.OUT_REG (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, csr_op_type, csr_addr, ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, csr_imm, csr_rs1_data}),
|
||||
.data_out ({csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.imm, csr_req_if.rs1_data}),
|
||||
.valid_out (csr_req_if.valid),
|
||||
.ready_out (csr_req_if.ready)
|
||||
);
|
||||
|
||||
// fpu unit
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU);
|
||||
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (fpu_req_valid),
|
||||
.ready_in (fpu_req_ready),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, fpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.uuid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.valid_out (fpu_req_if.valid),
|
||||
.ready_out (fpu_req_if.ready)
|
||||
);
|
||||
`else
|
||||
`UNUSED_VAR (gpr_rsp_if.rs3_data)
|
||||
`endif
|
||||
|
||||
// gpu unit
|
||||
|
||||
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
|
||||
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({gpu_req_if.uuid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
||||
// can take next request?
|
||||
reg ready_r;
|
||||
always @(*) begin
|
||||
case (ibuffer_if.ex_type)
|
||||
`EX_ALU: ready_r = alu_req_ready;
|
||||
`EX_LSU: ready_r = lsu_req_ready;
|
||||
`EX_CSR: ready_r = csr_req_ready;
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: ready_r = fpu_req_ready;
|
||||
`endif
|
||||
`EX_GPU: ready_r = gpu_req_ready;
|
||||
default: ready_r = 1'b1; // ignore NOPs
|
||||
endcase
|
||||
end
|
||||
assign ibuffer_if.ready = ready_r;
|
||||
|
||||
endmodule
|
||||
@@ -1,237 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_execute
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
|
||||
// commit interface
|
||||
VX_cmt_to_csr_if.slave cmt_to_csr_if,
|
||||
|
||||
// fetch interface
|
||||
VX_fetch_to_csr_if.slave fetch_to_csr_if,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_alu_req_if.slave alu_req_if,
|
||||
VX_lsu_req_if.slave lsu_req_if,
|
||||
VX_csr_req_if.slave csr_req_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if.slave fpu_req_if,
|
||||
`endif
|
||||
VX_gpu_req_if.slave gpu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_branch_ctl_if.master branch_ctl_if,
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master alu_commit_if,
|
||||
VX_commit_if.master ld_commit_if,
|
||||
VX_commit_if.master st_commit_if,
|
||||
VX_commit_if.master csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.master fpu_commit_if,
|
||||
`endif
|
||||
VX_commit_if.master gpu_commit_if,
|
||||
|
||||
input wire busy
|
||||
);
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
|
||||
) lsu_dcache_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
|
||||
) lsu_dcache_rsp_if();
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
|
||||
) tex_dcache_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
|
||||
) tex_dcache_rsp_if();
|
||||
|
||||
VX_tex_csr_if tex_csr_if();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_tex_if perf_tex_if();
|
||||
`endif
|
||||
|
||||
VX_cache_arb #(
|
||||
.NUM_REQS (2),
|
||||
.LANES (`NUM_THREADS),
|
||||
.DATA_SIZE (4),
|
||||
.TAG_IN_WIDTH (`LSU_TEX_DCACHE_TAG_BITS),
|
||||
.TAG_SEL_IDX (`NC_TAG_BIT + `SM_ENABLE)
|
||||
) tex_lsu_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Tex/LSU request
|
||||
.req_valid_in ({tex_dcache_req_if.valid, lsu_dcache_req_if.valid}),
|
||||
.req_rw_in ({tex_dcache_req_if.rw, lsu_dcache_req_if.rw}),
|
||||
.req_byteen_in ({tex_dcache_req_if.byteen, lsu_dcache_req_if.byteen}),
|
||||
.req_addr_in ({tex_dcache_req_if.addr, lsu_dcache_req_if.addr}),
|
||||
.req_data_in ({tex_dcache_req_if.data, lsu_dcache_req_if.data}),
|
||||
.req_tag_in ({tex_dcache_req_if.tag, lsu_dcache_req_if.tag}),
|
||||
.req_ready_in ({tex_dcache_req_if.ready, lsu_dcache_req_if.ready}),
|
||||
|
||||
// Dcache request
|
||||
.req_valid_out (dcache_req_if.valid),
|
||||
.req_rw_out (dcache_req_if.rw),
|
||||
.req_byteen_out (dcache_req_if.byteen),
|
||||
.req_addr_out (dcache_req_if.addr),
|
||||
.req_data_out (dcache_req_if.data),
|
||||
.req_tag_out (dcache_req_if.tag),
|
||||
.req_ready_out (dcache_req_if.ready),
|
||||
|
||||
// Dcache response
|
||||
.rsp_valid_in (dcache_rsp_if.valid),
|
||||
.rsp_tmask_in (dcache_rsp_if.tmask),
|
||||
.rsp_tag_in (dcache_rsp_if.tag),
|
||||
.rsp_data_in (dcache_rsp_if.data),
|
||||
.rsp_ready_in (dcache_rsp_if.ready),
|
||||
|
||||
// Tex/LSU response
|
||||
.rsp_valid_out ({tex_dcache_rsp_if.valid, lsu_dcache_rsp_if.valid}),
|
||||
.rsp_tmask_out ({tex_dcache_rsp_if.tmask, lsu_dcache_rsp_if.tmask}),
|
||||
.rsp_data_out ({tex_dcache_rsp_if.data, lsu_dcache_rsp_if.data}),
|
||||
.rsp_tag_out ({tex_dcache_rsp_if.tag, lsu_dcache_rsp_if.tag}),
|
||||
.rsp_ready_out ({tex_dcache_rsp_if.ready, lsu_dcache_rsp_if.ready})
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [`NUM_WARPS-1:0] csr_pending;
|
||||
wire [`NUM_WARPS-1:0] fpu_pending;
|
||||
VX_fpu_to_csr_if fpu_to_csr_if();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (alu_reset);
|
||||
`RESET_RELAY (lsu_reset);
|
||||
`RESET_RELAY (csr_reset);
|
||||
`RESET_RELAY (gpu_reset);
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.alu_req_if (alu_req_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.alu_commit_if (alu_commit_if)
|
||||
);
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) lsu_unit (
|
||||
`SCOPE_BIND_VX_execute_lsu_unit
|
||||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.dcache_req_if (lsu_dcache_req_if),
|
||||
.dcache_rsp_if (lsu_dcache_rsp_if),
|
||||
`else
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
`endif
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.st_commit_if (st_commit_if)
|
||||
);
|
||||
|
||||
VX_csr_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_unit (
|
||||
.clk (clk),
|
||||
.reset (csr_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.perf_tex_if (perf_tex_if),
|
||||
`endif
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if(perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.fpu_pending (fpu_pending),
|
||||
.pending (csr_pending),
|
||||
`else
|
||||
`UNUSED_PIN (pending),
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
`endif
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`RESET_RELAY (fpu_reset);
|
||||
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.csr_pending (csr_pending),
|
||||
.pending (fpu_pending)
|
||||
);
|
||||
`endif
|
||||
|
||||
VX_gpu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpu_unit (
|
||||
`SCOPE_BIND_VX_execute_gpu_unit
|
||||
.clk (clk),
|
||||
.reset (gpu_reset),
|
||||
.gpu_req_if (gpu_req_if),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_tex_if (perf_tex_if),
|
||||
`endif
|
||||
.tex_csr_if (tex_csr_if),
|
||||
.dcache_req_if (tex_dcache_req_if),
|
||||
.dcache_rsp_if (tex_dcache_rsp_if),
|
||||
`endif
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.gpu_commit_if (gpu_commit_if)
|
||||
);
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
wire ebreak /* verilator public */;
|
||||
assign ebreak = alu_req_if.valid && alu_req_if.ready
|
||||
&& `INST_ALU_IS_BR(alu_req_if.op_mod)
|
||||
&& (`INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_ECALL);
|
||||
|
||||
endmodule
|
||||
@@ -1,68 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_fetch
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Icache interface
|
||||
VX_icache_req_if.master icache_req_if,
|
||||
VX_icache_rsp_if.slave icache_rsp_if,
|
||||
|
||||
// inputs
|
||||
VX_wstall_if.slave wstall_if,
|
||||
VX_join_if.slave join_if,
|
||||
VX_branch_ctl_if.slave branch_ctl_if,
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
|
||||
// outputs
|
||||
VX_ifetch_rsp_if.master ifetch_rsp_if,
|
||||
|
||||
// csr interface
|
||||
VX_fetch_to_csr_if.master fetch_to_csr_if,
|
||||
|
||||
// busy status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
VX_ifetch_req_if ifetch_req_if();
|
||||
|
||||
VX_warp_sched #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) warp_sched (
|
||||
`SCOPE_BIND_VX_fetch_warp_sched
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
|
||||
.ifetch_req_if (ifetch_req_if),
|
||||
|
||||
.fetch_to_csr_if (fetch_to_csr_if),
|
||||
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
VX_icache_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) icache_stage (
|
||||
`SCOPE_BIND_VX_fetch_icache_stage
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
.icache_req_if (icache_req_if),
|
||||
|
||||
.ifetch_req_if (ifetch_req_if),
|
||||
.ifetch_rsp_if (ifetch_rsp_if)
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -1,219 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_fpu_req_if.slave fpu_req_if,
|
||||
VX_fpu_to_csr_if.master fpu_to_csr_if,
|
||||
VX_commit_if.master fpu_commit_if,
|
||||
|
||||
input wire[`NUM_WARPS-1:0] csr_pending,
|
||||
output wire[`NUM_WARPS-1:0] pending
|
||||
);
|
||||
import fpu_types::*;
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam FPUQ_BITS = `LOG2UP(`FPUQ_SIZE);
|
||||
|
||||
wire ready_in;
|
||||
wire valid_out;
|
||||
wire ready_out;
|
||||
|
||||
wire [`UUID_BITS-1:0] rsp_uuid;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
wire [31:0] rsp_PC;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
|
||||
wire has_fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags;
|
||||
wire [`NUM_THREADS-1:0][31:0] result;
|
||||
|
||||
wire [FPUQ_BITS-1:0] tag_in, tag_out;
|
||||
wire fpuq_full;
|
||||
|
||||
wire fpuq_push = fpu_req_if.valid && fpu_req_if.ready;
|
||||
wire fpuq_pop = valid_out && ready_out;
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
|
||||
.SIZE (`FPUQ_SIZE)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_slot (fpuq_push),
|
||||
.write_addr (tag_in),
|
||||
.read_addr (tag_out),
|
||||
.release_addr (tag_out),
|
||||
.write_data ({fpu_req_if.uuid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
|
||||
.read_data ({rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
||||
.release_slot (fpuq_pop),
|
||||
.full (fpuq_full),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
assign fpu_req_if.ready = ready_in && ~fpuq_full && !csr_pending[fpu_req_if.wid];
|
||||
|
||||
wire valid_in = fpu_req_if.valid && ~fpuq_full && !csr_pending[fpu_req_if.wid];
|
||||
|
||||
// resolve dynamic FRM from CSR
|
||||
assign fpu_to_csr_if.read_wid = fpu_req_if.wid;
|
||||
wire [`INST_FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `INST_FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod;
|
||||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
VX_fpu_dpi #(
|
||||
.TAGW (FPUQ_BITS)
|
||||
) fpu_dpi (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
|
||||
.tag_in (tag_in),
|
||||
|
||||
.op_type (fpu_req_if.op_type),
|
||||
.frm (fpu_frm),
|
||||
|
||||
.dataa (fpu_req_if.rs1_data),
|
||||
.datab (fpu_req_if.rs2_data),
|
||||
.datac (fpu_req_if.rs3_data),
|
||||
.result (result),
|
||||
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
|
||||
.tag_out (tag_out),
|
||||
|
||||
.ready_out (ready_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
||||
`elsif FPU_FPNEW
|
||||
|
||||
VX_fpu_fpnew #(
|
||||
.FMULADD (1),
|
||||
.FDIVSQRT (1),
|
||||
.FNONCOMP (1),
|
||||
.FCONV (1),
|
||||
.TAGW (FPUQ_BITS)
|
||||
) fpu_fpnew (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
|
||||
.tag_in (tag_in),
|
||||
|
||||
.op_type (fpu_req_if.op_type),
|
||||
.frm (fpu_frm),
|
||||
|
||||
.dataa (fpu_req_if.rs1_data),
|
||||
.datab (fpu_req_if.rs2_data),
|
||||
.datac (fpu_req_if.rs3_data),
|
||||
.result (result),
|
||||
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
|
||||
.tag_out (tag_out),
|
||||
|
||||
.ready_out (ready_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
VX_fpu_fpga #(
|
||||
.TAGW (FPUQ_BITS)
|
||||
) fpu_fpga (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
|
||||
.tag_in (tag_in),
|
||||
|
||||
.op_type (fpu_req_if.op_type),
|
||||
.frm (fpu_frm),
|
||||
|
||||
.dataa (fpu_req_if.rs1_data),
|
||||
.datab (fpu_req_if.rs2_data),
|
||||
.datac (fpu_req_if.rs3_data),
|
||||
.result (result),
|
||||
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
|
||||
.tag_out (tag_out),
|
||||
|
||||
.ready_out (ready_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
reg has_fflags_r;
|
||||
fflags_t fflags_r;
|
||||
|
||||
fflags_t rsp_fflags;
|
||||
always @(*) begin
|
||||
rsp_fflags = '0;
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (rsp_tmask[i]) begin
|
||||
rsp_fflags.NX |= fflags[i].NX;
|
||||
rsp_fflags.UF |= fflags[i].UF;
|
||||
rsp_fflags.OF |= fflags[i].OF;
|
||||
rsp_fflags.DZ |= fflags[i].DZ;
|
||||
rsp_fflags.NV |= fflags[i].NV;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({valid_out, rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
|
||||
.data_out ({fpu_commit_if.valid, fpu_commit_if.uuid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
|
||||
);
|
||||
|
||||
assign fpu_commit_if.eop = 1'b1;
|
||||
|
||||
assign ready_out = ~stall_out;
|
||||
|
||||
// CSR fflags Update
|
||||
assign fpu_to_csr_if.write_enable = fpu_commit_if.valid && fpu_commit_if.ready && has_fflags_r;
|
||||
assign fpu_to_csr_if.write_wid = fpu_commit_if.wid;
|
||||
assign fpu_to_csr_if.write_fflags = fflags_r;
|
||||
|
||||
// pending request
|
||||
reg [`NUM_WARPS-1:0] pending_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pending_r <= 0;
|
||||
end else begin
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
pending_r[fpu_commit_if.wid] <= 0;
|
||||
end
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
pending_r[fpu_req_if.wid] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign pending = pending_r;
|
||||
|
||||
endmodule
|
||||
@@ -1,91 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_stage #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_gpr_req_if.slave gpr_req_if,
|
||||
|
||||
// outputs
|
||||
VX_gpr_rsp_if.master gpr_rsp_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
|
||||
|
||||
// ensure r0 never gets written, which can happen before the reset
|
||||
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
|
||||
|
||||
wire [`NUM_THREADS-1:0] wren;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign wren[i] = write_enable && writeback_if.tmask[i];
|
||||
end
|
||||
|
||||
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (32),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
) dp_ram1 (
|
||||
.clk (clk),
|
||||
.wren (wren[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr (raddr1),
|
||||
.rdata (gpr_rsp_if.rs1_data[i])
|
||||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (32),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
) dp_ram2 (
|
||||
.clk (clk),
|
||||
.wren (wren[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr (raddr2),
|
||||
.rdata (gpr_rsp_if.rs2_data[i])
|
||||
);
|
||||
end
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [$clog2(RAM_SIZE)-1:0] raddr3;
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (32),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
) dp_ram3 (
|
||||
.clk (clk),
|
||||
.wren (wren[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr (raddr3),
|
||||
.rdata (gpr_rsp_if.rs3_data[i])
|
||||
);
|
||||
end
|
||||
`else
|
||||
`UNUSED_VAR (gpr_req_if.rs3)
|
||||
assign gpr_rsp_if.rs3_data = 'x;
|
||||
`endif
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
||||
218
hw/rtl/VX_gpu_pkg.sv
Normal file
218
hw/rtl/VX_gpu_pkg.sv
Normal file
@@ -0,0 +1,218 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_GPU_PKG_VH
|
||||
`define VX_GPU_PKG_VH
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
package VX_gpu_pkg;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
} tmc_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_WARPS-1:0] wmask;
|
||||
logic [`XLEN-1:0] pc;
|
||||
} wspawn_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic is_dvg;
|
||||
logic [`NUM_THREADS-1:0] then_tmask;
|
||||
logic [`NUM_THREADS-1:0] else_tmask;
|
||||
logic [`XLEN-1:0] next_pc;
|
||||
} split_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic is_dvg;
|
||||
} join_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NB_WIDTH-1:0] id;
|
||||
logic is_global;
|
||||
`ifdef GBAR_ENABLE
|
||||
logic [`MAX(`NW_WIDTH, `NC_WIDTH)-1:0] size_m1;
|
||||
`else
|
||||
logic [`NW_WIDTH-1:0] size_m1;
|
||||
`endif
|
||||
} barrier_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`XLEN-1:0] startup_addr;
|
||||
logic [7:0] mpm_class;
|
||||
} base_dcrs_t;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
|
||||
////////////////////////// Icache Parameters //////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
localparam ICACHE_WORD_SIZE = 4;
|
||||
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
|
||||
|
||||
// Block size in bytes
|
||||
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Core request tag Id bits
|
||||
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
|
||||
|
||||
// Core request tag bits
|
||||
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
|
||||
localparam ICACHE_ARB_TAG_WIDTH = (ICACHE_TAG_WIDTH + `CLOG2(`SOCKET_SIZE));
|
||||
|
||||
// Memory request data bits
|
||||
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef ICACHE_ENABLE
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
|
||||
`else
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_ARB_TAG_WIDTH, `NUM_SOCKETS, `NUM_ICACHES);
|
||||
`endif
|
||||
|
||||
////////////////////////// Dcache Parameters //////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
localparam DCACHE_WORD_SIZE = (`XLEN / 8);
|
||||
localparam DCACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(DCACHE_WORD_SIZE));
|
||||
|
||||
// Block size in bytes
|
||||
localparam DCACHE_LINE_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Input request size
|
||||
localparam DCACHE_NUM_REQS = `MAX(`DCACHE_NUM_BANKS, `SMEM_NUM_BANKS);
|
||||
|
||||
// Memory request size
|
||||
localparam LSU_MEM_REQS = `NUM_LSU_LANES;
|
||||
|
||||
// Batch select bits
|
||||
localparam DCACHE_NUM_BATCHES = ((LSU_MEM_REQS + DCACHE_NUM_REQS - 1) / DCACHE_NUM_REQS);
|
||||
localparam DCACHE_BATCH_SEL_BITS = `CLOG2(DCACHE_NUM_BATCHES);
|
||||
|
||||
// Core request tag Id bits
|
||||
localparam LSUQ_TAG_BITS = (`CLOG2(`LSUQ_SIZE) + DCACHE_BATCH_SEL_BITS);
|
||||
localparam DCACHE_TAG_ID_BITS = (LSUQ_TAG_BITS + `CACHE_ADDR_TYPE_BITS);
|
||||
|
||||
// Core request tag bits
|
||||
localparam DCACHE_TAG_WIDTH = (`UUID_WIDTH + DCACHE_TAG_ID_BITS);
|
||||
localparam DCACHE_NOSM_TAG_WIDTH = (DCACHE_TAG_WIDTH - `SM_ENABLED);
|
||||
localparam DCACHE_ARB_TAG_WIDTH = (DCACHE_NOSM_TAG_WIDTH + `CLOG2(`SOCKET_SIZE));
|
||||
|
||||
// Memory request data bits
|
||||
localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef DCACHE_ENABLE
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_ARB_TAG_WIDTH, `NUM_SOCKETS, `NUM_DCACHES);
|
||||
`else
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_BYPASS_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_ARB_TAG_WIDTH, `NUM_SOCKETS, `NUM_DCACHES);
|
||||
`endif
|
||||
|
||||
/////////////////////////////// L1 Parameters /////////////////////////////
|
||||
|
||||
localparam L1_MEM_TAG_WIDTH = `MAX(ICACHE_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
||||
|
||||
localparam NUM_L1_OUTPUTS = 2;
|
||||
|
||||
/////////////////////////////// L2 Parameters /////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
localparam L2_WORD_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Input request size
|
||||
localparam L2_NUM_REQS = NUM_L1_OUTPUTS;
|
||||
|
||||
// Core request tag bits
|
||||
localparam L2_TAG_WIDTH = L1_MEM_TAG_WIDTH;
|
||||
|
||||
// Memory request data bits
|
||||
localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef L2_ENABLE
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`else
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`endif
|
||||
|
||||
/////////////////////////////// L3 Parameters /////////////////////////////
|
||||
|
||||
// Word size in bytes
|
||||
localparam L3_WORD_SIZE = `L2_LINE_SIZE;
|
||||
|
||||
// Input request size
|
||||
localparam L3_NUM_REQS = `NUM_CLUSTERS;
|
||||
|
||||
// Core request tag bits
|
||||
localparam L3_TAG_WIDTH = L2_MEM_TAG_WIDTH;
|
||||
|
||||
// Memory request data bits
|
||||
localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8);
|
||||
|
||||
// Memory request tag bits
|
||||
`ifdef L3_ENABLE
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`else
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`endif
|
||||
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
/////////////////////////////// Issue parameters //////////////////////////
|
||||
|
||||
localparam ISSUE_IDX_W = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam ISSUE_RATIO = `NUM_WARPS / `ISSUE_WIDTH;
|
||||
localparam ISSUE_WIS_W = `LOG2UP(ISSUE_RATIO);
|
||||
localparam ISSUE_ADDRW = `LOG2UP(`NUM_REGS * (ISSUE_RATIO));
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
function logic [ISSUE_IDX_W-1:0] wid_to_isw(
|
||||
input logic [`NW_WIDTH-1:0] wid
|
||||
);
|
||||
if (`ISSUE_WIDTH > 1) begin
|
||||
wid_to_isw = ISSUE_IDX_W'(wid);
|
||||
end else begin
|
||||
wid_to_isw = 0;
|
||||
end
|
||||
endfunction
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
function logic [`NW_WIDTH-1:0] wis_to_wid(
|
||||
input logic [ISSUE_WIS_W-1:0] wis,
|
||||
input logic [ISSUE_IDX_W-1:0] isw
|
||||
);
|
||||
wis_to_wid = `NW_WIDTH'({wis, isw} >> (ISSUE_IDX_W-`CLOG2(`ISSUE_WIDTH)));
|
||||
endfunction
|
||||
|
||||
function logic [ISSUE_WIS_W-1:0] wid_to_wis(
|
||||
input logic [`NW_WIDTH-1:0] wid
|
||||
);
|
||||
wid_to_wis = ISSUE_WIS_W'(wid >> `CLOG2(`ISSUE_WIDTH));
|
||||
endfunction
|
||||
|
||||
function logic [ISSUE_ADDRW-1:0] wis_to_addr(
|
||||
input logic [`NR_BITS-1:0] rid,
|
||||
input logic [ISSUE_WIS_W-1:0] wis
|
||||
);
|
||||
wis_to_addr = ISSUE_ADDRW'({rid, wis} >> (ISSUE_WIS_W-`CLOG2(ISSUE_RATIO)));
|
||||
endfunction
|
||||
|
||||
endpackage
|
||||
|
||||
`endif // VX_GPU_PKG_VH
|
||||
@@ -1,43 +0,0 @@
|
||||
`ifndef VX_GPU_TYPES
|
||||
`define VX_GPU_TYPES
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
package gpu_types;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
} gpu_tmc_t;
|
||||
|
||||
`define GPU_TMC_BITS $bits(gpu_types::gpu_tmc_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_WARPS-1:0] wmask;
|
||||
logic [31:0] pc;
|
||||
} gpu_wspawn_t;
|
||||
|
||||
`define GPU_WSPAWN_BITS $bits(gpu_types::gpu_wspawn_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic diverged;
|
||||
logic [`NUM_THREADS-1:0] then_tmask;
|
||||
logic [`NUM_THREADS-1:0] else_tmask;
|
||||
logic [31:0] pc;
|
||||
} gpu_split_t;
|
||||
|
||||
`define GPU_SPLIT_BITS $bits(gpu_types::gpu_split_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NB_BITS-1:0] id;
|
||||
logic [`NW_BITS-1:0] size_m1;
|
||||
} gpu_barrier_t;
|
||||
|
||||
`define GPU_BARRIER_BITS $bits(gpu_types::gpu_barrier_t)
|
||||
|
||||
endpackage
|
||||
|
||||
`endif
|
||||
@@ -1,220 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_gpu_unit
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_gpu_req_if.slave gpu_req_if,
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_tex_if.master perf_tex_if,
|
||||
`endif
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
VX_tex_csr_if.slave tex_csr_if,
|
||||
`endif
|
||||
|
||||
// Outputs
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master gpu_commit_if
|
||||
);
|
||||
import gpu_types::*;
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam WCTL_DATAW = `GPU_TMC_BITS + `GPU_WSPAWN_BITS + `GPU_SPLIT_BITS + `GPU_BARRIER_BITS;
|
||||
localparam RSP_DATAW = `MAX(`NUM_THREADS * 32, WCTL_DATAW);
|
||||
|
||||
wire rsp_valid;
|
||||
wire [`UUID_BITS-1:0] rsp_uuid;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
wire [31:0] rsp_PC;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
|
||||
wire [RSP_DATAW-1:0] rsp_data, rsp_data_r;
|
||||
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
|
||||
wire [WCTL_DATAW-1:0] warp_ctl_data;
|
||||
wire is_warp_ctl;
|
||||
|
||||
wire stall_in, stall_out;
|
||||
|
||||
wire is_wspawn = (gpu_req_if.op_type == `INST_GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.op_type == `INST_GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `INST_GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `INST_GPU_BAR);
|
||||
wire is_pred = (gpu_req_if.op_type == `INST_GPU_PRED);
|
||||
|
||||
wire [31:0] rs1_data = gpu_req_if.rs1_data[gpu_req_if.tid];
|
||||
wire [31:0] rs2_data = gpu_req_if.rs2_data[gpu_req_if.tid];
|
||||
|
||||
wire [`NUM_THREADS-1:0] taken_tmask;
|
||||
wire [`NUM_THREADS-1:0] not_taken_tmask;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire taken = (gpu_req_if.rs1_data[i] != 0);
|
||||
assign taken_tmask[i] = gpu_req_if.tmask[i] & taken;
|
||||
assign not_taken_tmask[i] = gpu_req_if.tmask[i] & ~taken;
|
||||
end
|
||||
|
||||
// tmc
|
||||
|
||||
wire [`NUM_THREADS-1:0] pred_mask = (taken_tmask != 0) ? taken_tmask : gpu_req_if.tmask;
|
||||
|
||||
assign tmc.valid = is_tmc || is_pred;
|
||||
assign tmc.tmask = is_pred ? pred_mask : rs1_data[`NUM_THREADS-1:0];
|
||||
|
||||
// wspawn
|
||||
|
||||
wire [31:0] wspawn_pc = rs2_data;
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign wspawn_wmask[i] = (i < rs1_data);
|
||||
end
|
||||
assign wspawn.valid = is_wspawn;
|
||||
assign wspawn.wmask = wspawn_wmask;
|
||||
assign wspawn.pc = wspawn_pc;
|
||||
|
||||
// split
|
||||
|
||||
assign split.valid = is_split;
|
||||
assign split.diverged = (| taken_tmask) && (| not_taken_tmask);
|
||||
assign split.then_tmask = taken_tmask;
|
||||
assign split.else_tmask = not_taken_tmask;
|
||||
assign split.pc = gpu_req_if.next_PC;
|
||||
|
||||
// barrier
|
||||
|
||||
assign barrier.valid = is_bar;
|
||||
assign barrier.id = rs1_data[`NB_BITS-1:0];
|
||||
assign barrier.size_m1 = (`NW_BITS)'(rs2_data - 1);
|
||||
|
||||
// pack warp ctl result
|
||||
assign warp_ctl_data = {tmc, wspawn, split, barrier};
|
||||
|
||||
// texture
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
|
||||
`UNUSED_VAR (gpu_req_if.op_mod)
|
||||
|
||||
VX_tex_req_if tex_req_if();
|
||||
VX_tex_rsp_if tex_rsp_if();
|
||||
|
||||
wire is_tex = (gpu_req_if.op_type == `INST_GPU_TEX);
|
||||
|
||||
assign tex_req_if.valid = gpu_req_if.valid && is_tex;
|
||||
assign tex_req_if.uuid = gpu_req_if.uuid;
|
||||
assign tex_req_if.wid = gpu_req_if.wid;
|
||||
assign tex_req_if.tmask = gpu_req_if.tmask;
|
||||
assign tex_req_if.PC = gpu_req_if.PC;
|
||||
assign tex_req_if.rd = gpu_req_if.rd;
|
||||
assign tex_req_if.wb = gpu_req_if.wb;
|
||||
|
||||
assign tex_req_if.unit = gpu_req_if.op_mod[`NTEX_BITS-1:0];
|
||||
assign tex_req_if.coords[0] = gpu_req_if.rs1_data;
|
||||
assign tex_req_if.coords[1] = gpu_req_if.rs2_data;
|
||||
assign tex_req_if.lod = gpu_req_if.rs3_data;
|
||||
|
||||
VX_tex_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) tex_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_tex_if (perf_tex_if),
|
||||
`endif
|
||||
.tex_req_if (tex_req_if),
|
||||
.tex_csr_if (tex_csr_if),
|
||||
.tex_rsp_if (tex_rsp_if),
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if)
|
||||
);
|
||||
|
||||
assign tex_rsp_if.ready = !stall_out;
|
||||
|
||||
assign stall_in = (is_tex && ~tex_req_if.ready)
|
||||
|| (~is_tex && (tex_rsp_if.valid || stall_out));
|
||||
|
||||
assign is_warp_ctl = !(is_tex || tex_rsp_if.valid);
|
||||
|
||||
assign rsp_valid = tex_rsp_if.valid || (gpu_req_if.valid && ~is_tex);
|
||||
assign rsp_uuid = tex_rsp_if.valid ? tex_rsp_if.uuid : gpu_req_if.uuid;
|
||||
assign rsp_wid = tex_rsp_if.valid ? tex_rsp_if.wid : gpu_req_if.wid;
|
||||
assign rsp_tmask = tex_rsp_if.valid ? tex_rsp_if.tmask : gpu_req_if.tmask;
|
||||
assign rsp_PC = tex_rsp_if.valid ? tex_rsp_if.PC : gpu_req_if.PC;
|
||||
assign rsp_rd = tex_rsp_if.rd;
|
||||
assign rsp_wb = tex_rsp_if.valid && tex_rsp_if.wb;
|
||||
assign rsp_data = tex_rsp_if.valid ? RSP_DATAW'(tex_rsp_if.data) : RSP_DATAW'(warp_ctl_data);
|
||||
|
||||
`else
|
||||
|
||||
`UNUSED_VAR (gpu_req_if.op_mod)
|
||||
`UNUSED_VAR (gpu_req_if.rs3_data)
|
||||
`UNUSED_VAR (gpu_req_if.wb)
|
||||
`UNUSED_VAR (gpu_req_if.rd)
|
||||
|
||||
assign stall_in = stall_out;
|
||||
assign is_warp_ctl = 1;
|
||||
|
||||
assign rsp_valid = gpu_req_if.valid;
|
||||
assign rsp_uuid = gpu_req_if.uuid;
|
||||
assign rsp_wid = gpu_req_if.wid;
|
||||
assign rsp_tmask = gpu_req_if.tmask;
|
||||
assign rsp_PC = gpu_req_if.PC;
|
||||
assign rsp_rd = 0;
|
||||
assign rsp_wb = 0;
|
||||
assign rsp_data = RSP_DATAW'(warp_ctl_data);
|
||||
|
||||
`endif
|
||||
|
||||
wire is_warp_ctl_r;
|
||||
|
||||
// output
|
||||
assign stall_out = ~gpu_commit_if.ready && gpu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({rsp_valid, rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data, is_warp_ctl}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.uuid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, rsp_data_r, is_warp_ctl_r})
|
||||
);
|
||||
|
||||
assign gpu_commit_if.data = rsp_data_r[(`NUM_THREADS * 32)-1:0];
|
||||
assign gpu_commit_if.eop = 1'b1;
|
||||
|
||||
// warp control reponse
|
||||
|
||||
assign {warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier} = rsp_data_r[WCTL_DATAW-1:0];
|
||||
|
||||
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready && is_warp_ctl_r;
|
||||
assign warp_ctl_if.wid = gpu_commit_if.wid;
|
||||
|
||||
// can accept new request?
|
||||
assign gpu_req_if.ready = ~stall_in;
|
||||
|
||||
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_uuid, gpu_commit_if.uuid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wspawn, warp_ctl_if.wspawn.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_split, warp_ctl_if.split.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_barrier, warp_ctl_if.barrier.valid);
|
||||
|
||||
endmodule
|
||||
@@ -1,210 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_ibuffer #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_ibuffer_if.master ibuffer_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `UUID_BITS + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1;
|
||||
localparam ADDRW = $clog2(`IBUF_SIZE+1);
|
||||
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
||||
|
||||
reg [`NUM_WARPS-1:0][ADDRW-1:0] used_r;
|
||||
reg [`NUM_WARPS-1:0] full_r, empty_r, alm_empty_r;
|
||||
|
||||
wire [`NUM_WARPS-1:0] q_full, q_empty, q_alm_empty;
|
||||
wire [DATAW-1:0] q_data_in;
|
||||
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
|
||||
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
|
||||
|
||||
wire enq_fire = decode_if.valid && decode_if.ready;
|
||||
wire deq_fire = ibuffer_if.valid && ibuffer_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
wire writing = enq_fire && (i == decode_if.wid);
|
||||
wire reading = deq_fire && (i == ibuffer_if.wid);
|
||||
|
||||
wire going_empty = empty_r[i] || (alm_empty_r[i] && reading);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
.OUT_REG (1)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (writing && !going_empty),
|
||||
.data_in (q_data_in),
|
||||
.ready_out(reading),
|
||||
.data_out (q_data_prev[i]),
|
||||
`UNUSED_PIN (ready_in),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
used_r[i] <= 0;
|
||||
full_r[i] <= 0;
|
||||
empty_r[i] <= 1;
|
||||
alm_empty_r[i] <= 1;
|
||||
end else begin
|
||||
if (writing) begin
|
||||
if (!reading) begin
|
||||
empty_r[i] <= 0;
|
||||
if (used_r[i] == 1)
|
||||
alm_empty_r[i] <= 0;
|
||||
if (used_r[i] == ADDRW'(`IBUF_SIZE))
|
||||
full_r[i] <= 1;
|
||||
end
|
||||
end else if (reading) begin
|
||||
full_r[i] <= 0;
|
||||
if (used_r[i] == ADDRW'(1))
|
||||
empty_r[i] <= 1;
|
||||
if (used_r[i] == ADDRW'(2))
|
||||
alm_empty_r[i] <= 1;
|
||||
end
|
||||
used_r[i] <= used_r[i] + ADDRW'($signed(2'(writing) - 2'(reading)));
|
||||
end
|
||||
|
||||
if (writing && going_empty) begin
|
||||
q_data_out[i] <= q_data_in;
|
||||
end else if (reading) begin
|
||||
q_data_out[i] <= q_data_prev[i];
|
||||
end
|
||||
end
|
||||
|
||||
assign q_full[i] = full_r[i];
|
||||
assign q_empty[i] = empty_r[i];
|
||||
assign q_alm_empty[i] = alm_empty_r[i];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
reg [`NUM_WARPS-1:0] valid_table, valid_table_n;
|
||||
reg [`NW_BITS-1:0] deq_wid, deq_wid_n;
|
||||
reg [`NW_BITS-1:0] deq_wid_rr, deq_wid_rr_n;
|
||||
reg deq_valid, deq_valid_n;
|
||||
reg [DATAW-1:0] deq_instr, deq_instr_n;
|
||||
reg [NWARPSW-1:0] num_warps;
|
||||
|
||||
`UNUSED_VAR (deq_instr)
|
||||
|
||||
// calculate valid table
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table;
|
||||
if (deq_fire) begin
|
||||
valid_table_n[deq_wid] = !q_alm_empty[deq_wid];
|
||||
end
|
||||
if (enq_fire) begin
|
||||
valid_table_n[decode_if.wid] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
// round-robin warp scheduling
|
||||
VX_rr_arbiter #(
|
||||
.NUM_REQS (`NUM_WARPS)
|
||||
) rr_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_table_n),
|
||||
.grant_index (deq_wid_rr_n),
|
||||
`UNUSED_PIN (grant_valid),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
`UNUSED_PIN (enable)
|
||||
);
|
||||
|
||||
// schedule the next instruction to issue
|
||||
always @(*) begin
|
||||
if (num_warps > 1) begin
|
||||
deq_valid_n = 1;
|
||||
deq_wid_n = deq_wid_rr;
|
||||
deq_instr_n = q_data_out[deq_wid_rr];
|
||||
end else if (1 == num_warps && !(deq_fire && q_alm_empty[deq_wid])) begin
|
||||
deq_valid_n = 1;
|
||||
deq_wid_n = deq_wid;
|
||||
deq_instr_n = deq_fire ? q_data_prev[deq_wid] : q_data_out[deq_wid];
|
||||
end else begin
|
||||
deq_valid_n = enq_fire;
|
||||
deq_wid_n = decode_if.wid;
|
||||
deq_instr_n = q_data_in;
|
||||
end
|
||||
end
|
||||
|
||||
wire warp_added = enq_fire && q_empty[decode_if.wid];
|
||||
wire warp_removed = deq_fire && ~(enq_fire && decode_if.wid == deq_wid) && q_alm_empty[deq_wid];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= 0;
|
||||
deq_valid <= 0;
|
||||
num_warps <= 0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
deq_valid <= deq_valid_n;
|
||||
|
||||
|
||||
if (warp_added && !warp_removed) begin
|
||||
num_warps <= num_warps + NWARPSW'(1);
|
||||
end else if (warp_removed && !warp_added) begin
|
||||
num_warps <= num_warps - NWARPSW'(1);
|
||||
end
|
||||
end
|
||||
|
||||
deq_wid <= deq_wid_n;
|
||||
deq_wid_rr <= deq_wid_rr_n;
|
||||
deq_instr <= deq_instr_n;
|
||||
end
|
||||
|
||||
assign decode_if.ready = ~q_full[decode_if.wid];
|
||||
|
||||
assign q_data_in = {decode_if.uuid,
|
||||
decode_if.tmask,
|
||||
decode_if.PC,
|
||||
decode_if.ex_type,
|
||||
decode_if.op_type,
|
||||
decode_if.op_mod,
|
||||
decode_if.wb,
|
||||
decode_if.use_PC,
|
||||
decode_if.use_imm,
|
||||
decode_if.imm,
|
||||
decode_if.rd,
|
||||
decode_if.rs1,
|
||||
decode_if.rs2,
|
||||
decode_if.rs3};
|
||||
|
||||
assign ibuffer_if.valid = deq_valid;
|
||||
assign ibuffer_if.wid = deq_wid;
|
||||
assign {ibuffer_if.uuid,
|
||||
ibuffer_if.tmask,
|
||||
ibuffer_if.PC,
|
||||
ibuffer_if.ex_type,
|
||||
ibuffer_if.op_type,
|
||||
ibuffer_if.op_mod,
|
||||
ibuffer_if.wb,
|
||||
ibuffer_if.use_PC,
|
||||
ibuffer_if.use_imm,
|
||||
ibuffer_if.imm,
|
||||
ibuffer_if.rd,
|
||||
ibuffer_if.rs1,
|
||||
ibuffer_if.rs2,
|
||||
ibuffer_if.rs3} = deq_instr;
|
||||
|
||||
// scoreboard forwarding
|
||||
assign ibuffer_if.wid_n = deq_wid_n;
|
||||
assign ibuffer_if.rd_n = deq_instr_n[3*`NR_BITS +: `NR_BITS];
|
||||
assign ibuffer_if.rs1_n = deq_instr_n[2*`NR_BITS +: `NR_BITS];
|
||||
assign ibuffer_if.rs2_n = deq_instr_n[1*`NR_BITS +: `NR_BITS];
|
||||
assign ibuffer_if.rs3_n = deq_instr_n[0*`NR_BITS +: `NR_BITS];
|
||||
|
||||
endmodule
|
||||
@@ -1,102 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_icache_stage #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_icache_stage
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Icache interface
|
||||
VX_icache_req_if.master icache_req_if,
|
||||
VX_icache_rsp_if.slave icache_rsp_if,
|
||||
|
||||
// request
|
||||
VX_ifetch_req_if.slave ifetch_req_if,
|
||||
|
||||
// reponse
|
||||
VX_ifetch_rsp_if.master ifetch_rsp_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam OUT_REG = 0;
|
||||
|
||||
wire [`NW_BITS-1:0] req_tag, rsp_tag;
|
||||
|
||||
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||
|
||||
assign req_tag = ifetch_req_if.wid;
|
||||
assign rsp_tag = icache_rsp_if.tag[`NW_BITS-1:0];
|
||||
|
||||
wire [`UUID_BITS-1:0] rsp_uuid;
|
||||
wire [31:0] rsp_PC;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (32 + `NUM_THREADS + `UUID_BITS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
.wren (icache_req_fire),
|
||||
.waddr (req_tag),
|
||||
.wdata ({ifetch_req_if.PC, ifetch_req_if.tmask, ifetch_req_if.uuid}),
|
||||
.raddr (rsp_tag),
|
||||
.rdata ({rsp_PC, rsp_tmask, rsp_uuid})
|
||||
);
|
||||
|
||||
`RUNTIME_ASSERT((!ifetch_req_if.valid || ifetch_req_if.PC >= `STARTUP_ADDR),
|
||||
("%t: *** invalid PC=%0h, wid=%0d, tmask=%b (#%0d)", $time, ifetch_req_if.PC, ifetch_req_if.wid, ifetch_req_if.tmask, ifetch_req_if.uuid))
|
||||
|
||||
// Icache Request
|
||||
assign icache_req_if.valid = ifetch_req_if.valid;
|
||||
assign icache_req_if.addr = ifetch_req_if.PC[31:2];
|
||||
assign icache_req_if.tag = {ifetch_req_if.uuid, req_tag};
|
||||
|
||||
// Can accept new request?
|
||||
assign ifetch_req_if.ready = icache_req_if.ready;
|
||||
|
||||
wire [`NW_BITS-1:0] rsp_wid = rsp_tag;
|
||||
|
||||
wire stall_out = ~ifetch_rsp_if.ready && (0 == OUT_REG && ifetch_rsp_if.valid);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `UUID_BITS),
|
||||
.RESETW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({icache_rsp_if.valid, rsp_wid, rsp_tmask, rsp_PC, icache_rsp_if.data, rsp_uuid}),
|
||||
.data_out ({ifetch_rsp_if.valid, ifetch_rsp_if.wid, ifetch_rsp_if.tmask, ifetch_rsp_if.PC, ifetch_rsp_if.data, ifetch_rsp_if.uuid})
|
||||
);
|
||||
|
||||
// Can accept new response?
|
||||
assign icache_rsp_if.ready = ~stall_out;
|
||||
|
||||
`SCOPE_ASSIGN (icache_req_fire, icache_req_fire);
|
||||
`SCOPE_ASSIGN (icache_req_uuid, ifetch_req_if.uuid);
|
||||
`SCOPE_ASSIGN (icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||
`SCOPE_ASSIGN (icache_req_tag, req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready);
|
||||
`SCOPE_ASSIGN (icache_rsp_uuid, rsp_uuid);
|
||||
`SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (icache_rsp_tag, rsp_tag);
|
||||
|
||||
`ifdef DBG_TRACE_CORE_ICACHE
|
||||
always @(posedge clk) begin
|
||||
if (icache_req_fire) begin
|
||||
dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h (#%0d)\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, ifetch_req_if.uuid);
|
||||
end
|
||||
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
||||
dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, data=%0h (#%0d)\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.data, ifetch_rsp_if.uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,68 +0,0 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_ipdom_stack #(
|
||||
parameter WIDTH = 1,
|
||||
parameter DEPTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire pair,
|
||||
input wire [WIDTH - 1:0] q1,
|
||||
input wire [WIDTH - 1:0] q2,
|
||||
output wire [WIDTH - 1:0] d,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
output wire index,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
localparam ADDRW = $clog2(DEPTH);
|
||||
|
||||
reg is_part [DEPTH-1:0];
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr, wr_ptr;
|
||||
|
||||
wire [WIDTH-1:0] d1, d2;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr <= 0;
|
||||
wr_ptr <= 0;
|
||||
end else begin
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - ADDRW'(is_part[rd_ptr]);
|
||||
rd_ptr <= rd_ptr - ADDRW'(is_part[rd_ptr]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (WIDTH * 2),
|
||||
.SIZE (DEPTH),
|
||||
.LUTRAM (1)
|
||||
) store (
|
||||
.clk (clk),
|
||||
.wren (push),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q2, q1}),
|
||||
.raddr (rd_ptr),
|
||||
.rdata ({d2, d1})
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
is_part[wr_ptr] <= ~pair;
|
||||
end else if (pop) begin
|
||||
is_part[rd_ptr] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
assign index = is_part[rd_ptr];
|
||||
assign d = index ? d1 : d2;
|
||||
assign empty = (ADDRW'(0) == wr_ptr);
|
||||
assign full = (ADDRW'(DEPTH-1) == wr_ptr);
|
||||
|
||||
endmodule
|
||||
@@ -1,256 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_issue
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if.issue perf_issue_if,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
|
||||
VX_alu_req_if.master alu_req_if,
|
||||
VX_lsu_req_if.master lsu_req_if,
|
||||
VX_csr_req_if.master csr_req_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if.master fpu_req_if,
|
||||
`endif
|
||||
VX_gpu_req_if.master gpu_req_if
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if();
|
||||
VX_gpr_req_if gpr_req_if();
|
||||
VX_gpr_rsp_if gpr_rsp_if();
|
||||
VX_writeback_if sboard_wb_if();
|
||||
VX_ibuffer_if scoreboard_if();
|
||||
VX_ibuffer_if dispatch_if();
|
||||
|
||||
// GPR request interface
|
||||
assign gpr_req_if.wid = ibuffer_if.wid;
|
||||
assign gpr_req_if.rs1 = ibuffer_if.rs1;
|
||||
assign gpr_req_if.rs2 = ibuffer_if.rs2;
|
||||
assign gpr_req_if.rs3 = ibuffer_if.rs3;
|
||||
|
||||
// scoreboard writeback interface
|
||||
assign sboard_wb_if.valid = writeback_if.valid;
|
||||
assign sboard_wb_if.uuid = writeback_if.uuid;
|
||||
assign sboard_wb_if.wid = writeback_if.wid;
|
||||
assign sboard_wb_if.PC = writeback_if.PC;
|
||||
assign sboard_wb_if.rd = writeback_if.rd;
|
||||
assign sboard_wb_if.eop = writeback_if.eop;
|
||||
|
||||
// scoreboard interface
|
||||
assign scoreboard_if.valid = ibuffer_if.valid && dispatch_if.ready;
|
||||
assign scoreboard_if.uuid = ibuffer_if.uuid;
|
||||
assign scoreboard_if.wid = ibuffer_if.wid;
|
||||
assign scoreboard_if.PC = ibuffer_if.PC;
|
||||
assign scoreboard_if.wb = ibuffer_if.wb;
|
||||
assign scoreboard_if.rd = ibuffer_if.rd;
|
||||
assign scoreboard_if.rd_n = ibuffer_if.rd_n;
|
||||
assign scoreboard_if.rs1_n = ibuffer_if.rs1_n;
|
||||
assign scoreboard_if.rs2_n = ibuffer_if.rs2_n;
|
||||
assign scoreboard_if.rs3_n = ibuffer_if.rs3_n;
|
||||
assign scoreboard_if.wid_n = ibuffer_if.wid_n;
|
||||
|
||||
// dispatch interface
|
||||
assign dispatch_if.valid = ibuffer_if.valid && scoreboard_if.ready;
|
||||
assign dispatch_if.uuid = ibuffer_if.uuid;
|
||||
assign dispatch_if.wid = ibuffer_if.wid;
|
||||
assign dispatch_if.tmask = ibuffer_if.tmask;
|
||||
assign dispatch_if.PC = ibuffer_if.PC;
|
||||
assign dispatch_if.ex_type = ibuffer_if.ex_type;
|
||||
assign dispatch_if.op_type = ibuffer_if.op_type;
|
||||
assign dispatch_if.op_mod = ibuffer_if.op_mod;
|
||||
assign dispatch_if.wb = ibuffer_if.wb;
|
||||
assign dispatch_if.rd = ibuffer_if.rd;
|
||||
assign dispatch_if.rs1 = ibuffer_if.rs1;
|
||||
assign dispatch_if.imm = ibuffer_if.imm;
|
||||
assign dispatch_if.use_PC = ibuffer_if.use_PC;
|
||||
assign dispatch_if.use_imm = ibuffer_if.use_imm;
|
||||
|
||||
// issue the instruction
|
||||
assign ibuffer_if.ready = scoreboard_if.ready && dispatch_if.ready;
|
||||
|
||||
`RESET_RELAY (ibuf_reset);
|
||||
`RESET_RELAY (scoreboard_reset);
|
||||
`RESET_RELAY (gpr_reset);
|
||||
`RESET_RELAY (dispatch_reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
.writeback_if(sboard_wb_if),
|
||||
.ibuffer_if (scoreboard_if)
|
||||
);
|
||||
|
||||
VX_gpr_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (gpr_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_req_if (gpr_req_if),
|
||||
.gpr_rsp_if (gpr_rsp_if)
|
||||
);
|
||||
|
||||
VX_dispatch dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.ibuffer_if (dispatch_if),
|
||||
.gpr_rsp_if (gpr_rsp_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_req_if (fpu_req_if),
|
||||
`endif
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
`SCOPE_ASSIGN (issue_fire, ibuffer_if.valid && ibuffer_if.ready);
|
||||
`SCOPE_ASSIGN (issue_uuid, ibuffer_if.uuid);
|
||||
`SCOPE_ASSIGN (issue_tmask, ibuffer_if.tmask);
|
||||
`SCOPE_ASSIGN (issue_ex_type, ibuffer_if.ex_type);
|
||||
`SCOPE_ASSIGN (issue_op_type, ibuffer_if.op_type);
|
||||
`SCOPE_ASSIGN (issue_op_mod, ibuffer_if.op_mod);
|
||||
`SCOPE_ASSIGN (issue_wb, ibuffer_if.wb);
|
||||
`SCOPE_ASSIGN (issue_rd, ibuffer_if.rd);
|
||||
`SCOPE_ASSIGN (issue_rs1, ibuffer_if.rs1);
|
||||
`SCOPE_ASSIGN (issue_rs2, ibuffer_if.rs2);
|
||||
`SCOPE_ASSIGN (issue_rs3, ibuffer_if.rs3);
|
||||
`SCOPE_ASSIGN (issue_imm, ibuffer_if.imm);
|
||||
`SCOPE_ASSIGN (issue_use_pc, ibuffer_if.use_PC);
|
||||
`SCOPE_ASSIGN (issue_use_imm, ibuffer_if.use_imm);
|
||||
`SCOPE_ASSIGN (scoreboard_delay, !scoreboard_if.ready);
|
||||
`SCOPE_ASSIGN (dispatch_delay, !dispatch_if.ready);
|
||||
`SCOPE_ASSIGN (gpr_rs1, gpr_rsp_if.rs1_data);
|
||||
`SCOPE_ASSIGN (gpr_rs2, gpr_rsp_if.rs2_data);
|
||||
`SCOPE_ASSIGN (gpr_rs3, gpr_rsp_if.rs3_data);
|
||||
`SCOPE_ASSIGN (writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN (writeback_uuid, writeback_if.uuid);
|
||||
`SCOPE_ASSIGN (writeback_tmask, writeback_if.tmask);
|
||||
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
|
||||
`SCOPE_ASSIGN (writeback_eop, writeback_if.eop);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_scb_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_alu_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_lsu_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_csr_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_gpu_stalls;
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_fpu_stalls;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ibf_stalls <= 0;
|
||||
perf_scb_stalls <= 0;
|
||||
perf_alu_stalls <= 0;
|
||||
perf_lsu_stalls <= 0;
|
||||
perf_csr_stalls <= 0;
|
||||
perf_gpu_stalls <= 0;
|
||||
`ifdef EXT_F_ENABLE
|
||||
perf_fpu_stalls <= 0;
|
||||
`endif
|
||||
end else begin
|
||||
if (decode_if.valid & ~decode_if.ready) begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (scoreboard_if.valid & ~scoreboard_if.ready) begin
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (dispatch_if.valid & ~dispatch_if.ready) begin
|
||||
case (dispatch_if.ex_type)
|
||||
`EX_ALU: perf_alu_stalls <= perf_alu_stalls + `PERF_CTR_BITS'd1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: perf_fpu_stalls <= perf_fpu_stalls + `PERF_CTR_BITS'd1;
|
||||
`endif
|
||||
`EX_LSU: perf_lsu_stalls <= perf_lsu_stalls + `PERF_CTR_BITS'd1;
|
||||
`EX_CSR: perf_csr_stalls <= perf_csr_stalls + `PERF_CTR_BITS'd1;
|
||||
//`EX_GPU:
|
||||
default: perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'd1;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
|
||||
assign perf_issue_if.scb_stalls = perf_scb_stalls;
|
||||
assign perf_issue_if.alu_stalls = perf_alu_stalls;
|
||||
assign perf_issue_if.lsu_stalls = perf_lsu_stalls;
|
||||
assign perf_issue_if.csr_stalls = perf_csr_stalls;
|
||||
assign perf_issue_if.gpu_stalls = perf_gpu_stalls;
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign perf_issue_if.fpu_stalls = perf_fpu_stalls;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd);
|
||||
`TRACE_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace(", rs2_data=");
|
||||
`TRACE_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", alu_req_if.uuid);
|
||||
end
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
|
||||
$time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset);
|
||||
`TRACE_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
|
||||
dpi_trace(", data=");
|
||||
`TRACE_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", lsu_req_if.uuid);
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
|
||||
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
|
||||
`TRACE_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", csr_req_if.uuid);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd);
|
||||
`TRACE_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace(", rs2_data=");
|
||||
`TRACE_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
|
||||
dpi_trace(", rs3_data=");
|
||||
`TRACE_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", fpu_req_if.uuid);
|
||||
end
|
||||
`endif
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd);
|
||||
`TRACE_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace(", rs2_data=");
|
||||
`TRACE_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
|
||||
dpi_trace(", rs3_data=");
|
||||
`TRACE_ARRAY1D(gpu_req_if.rs3_data, `NUM_THREADS);
|
||||
dpi_trace(" (#%0d)\n", gpu_req_if.uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,372 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_lsu_unit
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
|
||||
// inputs
|
||||
VX_lsu_req_if.slave lsu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_commit_if.master ld_commit_if,
|
||||
VX_commit_if.master st_commit_if
|
||||
);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
|
||||
localparam REQ_ASHIFT = `CLOG2(`DCACHE_WORD_SIZE);
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
`STATIC_ASSERT(`SMEM_SIZE == `MEM_BLOCK_SIZE * (`SMEM_SIZE / `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
wire req_valid;
|
||||
wire [`UUID_BITS-1:0] req_uuid;
|
||||
wire [`NUM_THREADS-1:0] req_tmask;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_addr;
|
||||
wire [`INST_LSU_BITS-1:0] req_type;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_data;
|
||||
wire [`NR_BITS-1:0] req_rd;
|
||||
wire req_wb;
|
||||
wire [`NW_BITS-1:0] req_wid;
|
||||
wire [31:0] req_pc;
|
||||
wire req_is_dup;
|
||||
wire req_is_prefetch;
|
||||
|
||||
wire mbuf_empty;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`CACHE_ADDR_TYPE_BITS-1:0] lsu_addr_type, req_addr_type;
|
||||
|
||||
// full address calculation
|
||||
wire [`NUM_THREADS-1:0][31:0] full_addr;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign full_addr[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
|
||||
end
|
||||
|
||||
// detect duplicate addresses
|
||||
wire [`NUM_THREADS-2:0] addr_matches;
|
||||
for (genvar i = 0; i < (`NUM_THREADS-1); i++) begin
|
||||
assign addr_matches[i] = (lsu_req_if.base_addr[i+1] == lsu_req_if.base_addr[0]) || ~lsu_req_if.tmask[i+1];
|
||||
end
|
||||
|
||||
wire lsu_is_dup = lsu_req_if.tmask[0] && (& addr_matches);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
// is non-cacheable address
|
||||
wire is_addr_nc = (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] >= MEM_ADDRW'(`IO_BASE_ADDR >> MEM_ASHIFT));
|
||||
if (`SM_ENABLE) begin
|
||||
// is shared memory address
|
||||
wire is_addr_sm = (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] >= MEM_ADDRW'((`SMEM_BASE_ADDR - `SMEM_SIZE) >> MEM_ASHIFT))
|
||||
& (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] < MEM_ADDRW'(`SMEM_BASE_ADDR >> MEM_ASHIFT));
|
||||
assign lsu_addr_type[i] = {is_addr_nc, is_addr_sm};
|
||||
end else begin
|
||||
assign lsu_addr_type[i] = is_addr_nc;
|
||||
end
|
||||
end
|
||||
|
||||
// fence stalls the pipeline until all pending requests are sent
|
||||
wire fence_wait = lsu_req_if.is_fence && (req_valid || !mbuf_empty);
|
||||
|
||||
wire ready_in;
|
||||
wire stall_in = ~ready_in && req_valid;
|
||||
|
||||
wire lsu_valid = lsu_req_if.valid && ~fence_wait;
|
||||
|
||||
wire lsu_wb = lsu_req_if.wb | lsu_req_if.is_prefetch;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) req_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_in),
|
||||
.data_in ({lsu_valid, lsu_is_dup, lsu_req_if.is_prefetch, lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_wb, lsu_req_if.store_data}),
|
||||
.data_out ({req_valid, req_is_dup, req_is_prefetch, req_uuid, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data})
|
||||
);
|
||||
|
||||
// Can accept new request?
|
||||
assign lsu_req_if.ready = ~stall_in && ~fence_wait;
|
||||
|
||||
wire [`UUID_BITS-1:0] rsp_uuid;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [31:0] rsp_pc;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
wire [`INST_LSU_BITS-1:0] rsp_type;
|
||||
wire rsp_is_dup;
|
||||
wire rsp_is_prefetch;
|
||||
|
||||
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
|
||||
wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
reg [`NUM_THREADS-1:0] req_sent_mask;
|
||||
reg is_req_start;
|
||||
|
||||
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
||||
wire mbuf_full;
|
||||
|
||||
`UNUSED_VAR (rsp_type)
|
||||
`UNUSED_VAR (rsp_is_prefetch)
|
||||
|
||||
wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_offset, rsp_offset;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign req_offset[i] = req_addr[i][1:0];
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
|
||||
|
||||
wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
|
||||
wire mbuf_push = ~mbuf_full
|
||||
&& (| ({`NUM_THREADS{req_valid}} & req_tmask_dup & dcache_req_if.ready))
|
||||
&& is_req_start // first submission only
|
||||
&& req_wb; // loads only
|
||||
|
||||
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
|
||||
|
||||
assign mbuf_raddr = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: `LSUQ_ADDR_BITS];
|
||||
`UNUSED_VAR (dcache_rsp_if.tag)
|
||||
|
||||
// do not writeback from software prefetch
|
||||
wire req_wb2 = req_wb && ~req_is_prefetch;
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`UUID_BITS + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1 + 1),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_addr (mbuf_waddr),
|
||||
.acquire_slot (mbuf_push),
|
||||
.read_addr (mbuf_raddr),
|
||||
.write_data ({req_uuid, req_wid, req_pc, req_tmask, req_rd, req_wb2, req_type, req_offset, req_is_dup, req_is_prefetch}),
|
||||
.read_data ({rsp_uuid, rsp_wid, rsp_pc, rsp_tmask, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup, rsp_is_prefetch}),
|
||||
.release_addr (mbuf_raddr),
|
||||
.release_slot (mbuf_pop),
|
||||
.full (mbuf_full),
|
||||
.empty (mbuf_empty)
|
||||
);
|
||||
|
||||
wire dcache_req_ready = &(dcache_req_if.ready | req_sent_mask | ~req_tmask_dup);
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_sent_mask_n = req_sent_mask | dcache_req_fire;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
req_sent_mask <= 0;
|
||||
is_req_start <= 1;
|
||||
end else begin
|
||||
if (dcache_req_ready) begin
|
||||
req_sent_mask <= 0;
|
||||
is_req_start <= 1;
|
||||
end else begin
|
||||
req_sent_mask <= req_sent_mask_n;
|
||||
is_req_start <= (0 == req_sent_mask_n);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// need to hold the acquired tag index until the full request is submitted
|
||||
reg [`LSUQ_ADDR_BITS-1:0] req_tag_hold;
|
||||
wire [`LSUQ_ADDR_BITS-1:0] req_tag = is_req_start ? mbuf_waddr : req_tag_hold;
|
||||
always @(posedge clk) begin
|
||||
if (mbuf_push) begin
|
||||
req_tag_hold <= mbuf_waddr;
|
||||
end
|
||||
end
|
||||
|
||||
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.tmask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mbuf_push) begin
|
||||
rsp_rem_mask[mbuf_waddr] <= req_tmask_dup;
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
|
||||
end
|
||||
end
|
||||
|
||||
// ensure all dependencies for the requests are resolved
|
||||
wire req_dep_ready = (req_wb && ~(mbuf_full && is_req_start))
|
||||
|| (~req_wb && st_commit_if.ready);
|
||||
|
||||
// DCache Request
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
reg [3:0] mem_req_byteen;
|
||||
reg [31:0] mem_req_data;
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen = {4{req_wb}};
|
||||
case (`INST_LSU_WSIZE(req_type))
|
||||
0: mem_req_byteen[req_offset[i]] = 1;
|
||||
1: begin
|
||||
mem_req_byteen[req_offset[i]] = 1;
|
||||
mem_req_byteen[{req_offset[i][1], 1'b1}] = 1;
|
||||
end
|
||||
default : mem_req_byteen = {4{1'b1}};
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
mem_req_data = req_data[i];
|
||||
case (req_offset[i])
|
||||
1: mem_req_data[31:8] = req_data[i][23:0];
|
||||
2: mem_req_data[31:16] = req_data[i][15:0];
|
||||
3: mem_req_data[31:24] = req_data[i][7:0];
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign dcache_req_if.valid[i] = req_valid && req_dep_ready && req_tmask_dup[i] && !req_sent_mask[i];
|
||||
assign dcache_req_if.rw[i] = ~req_wb;
|
||||
assign dcache_req_if.addr[i] = req_addr[i][31:2];
|
||||
assign dcache_req_if.byteen[i] = mem_req_byteen;
|
||||
assign dcache_req_if.data[i] = mem_req_data;
|
||||
assign dcache_req_if.tag[i] = {req_uuid, `LSU_TAG_ID_BITS'(req_tag), req_addr_type[i]};
|
||||
end
|
||||
|
||||
assign ready_in = req_dep_ready && dcache_req_ready;
|
||||
|
||||
// send store commit
|
||||
|
||||
wire is_store_rsp = req_valid && ~req_wb && dcache_req_ready;
|
||||
|
||||
assign st_commit_if.valid = is_store_rsp;
|
||||
assign st_commit_if.uuid = req_uuid;
|
||||
assign st_commit_if.wid = req_wid;
|
||||
assign st_commit_if.tmask = req_tmask;
|
||||
assign st_commit_if.PC = req_pc;
|
||||
assign st_commit_if.rd = 0;
|
||||
assign st_commit_if.wb = 0;
|
||||
assign st_commit_if.eop = 1'b1;
|
||||
assign st_commit_if.data = 0;
|
||||
|
||||
// load response formatting
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] rsp_data;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask_qual;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
|
||||
wire [15:0] rsp_data16 = rsp_offset[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
|
||||
wire [7:0] rsp_data8 = rsp_offset[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
always @(*) begin
|
||||
case (`INST_LSU_FMT(rsp_type))
|
||||
`INST_FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
|
||||
`INST_FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
|
||||
default: rsp_data[i] = rsp_data32;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.tmask;
|
||||
|
||||
// send load commit
|
||||
|
||||
wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
|
||||
.RESETW (1)
|
||||
) rsp_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!load_rsp_stall),
|
||||
.data_in ({dcache_rsp_if.valid, rsp_uuid, rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.uuid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
||||
);
|
||||
|
||||
// Can accept new cache response?
|
||||
assign dcache_rsp_if.ready = ~load_rsp_stall;
|
||||
|
||||
// scope registration
|
||||
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_fire);
|
||||
`SCOPE_ASSIGN (dcache_req_uuid, req_uuid);
|
||||
`SCOPE_ASSIGN (dcache_req_addr, req_addr);
|
||||
`SCOPE_ASSIGN (dcache_req_rw, ~req_wb);
|
||||
`SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen);
|
||||
`SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN (dcache_req_tag, req_tag);
|
||||
`SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.tmask & {`NUM_THREADS{dcache_rsp_fire}});
|
||||
`SCOPE_ASSIGN (dcache_rsp_uuid, rsp_uuid);
|
||||
`SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr);
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + `NR_BITS + `UUID_BITS + 64 + 1)-1:0] pending_reqs;
|
||||
wire [63:0] delay_timeout = 10000 * (1 ** (`L2_ENABLE + `L3_ENABLE));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pending_reqs <= '0;
|
||||
end begin
|
||||
if (mbuf_push) begin
|
||||
pending_reqs[mbuf_waddr] <= {req_wid, req_pc, req_rd, req_uuid, $time, 1'b1};
|
||||
end
|
||||
if (mbuf_pop) begin
|
||||
pending_reqs[mbuf_raddr] <= '0;
|
||||
end
|
||||
end
|
||||
|
||||
for (integer i = 0; i < `LSUQ_SIZE; ++i) begin
|
||||
if (pending_reqs[i][0]) begin
|
||||
`ASSERT(($time - pending_reqs[i][1 +: 64]) < delay_timeout,
|
||||
("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+`UUID_BITS+`NR_BITS+32 +: `NW_BITS],
|
||||
pending_reqs[i][1+64+`UUID_BITS+`NR_BITS +: 32],
|
||||
pending_reqs[i][1+64+`UUID_BITS +: `NR_BITS],
|
||||
pending_reqs[i][1+64 +: `UUID_BITS]));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_DCACHE
|
||||
wire dcache_req_fire_any = (| dcache_req_fire);
|
||||
always @(posedge clk) begin
|
||||
if (lsu_req_if.valid && fence_wait) begin
|
||||
dpi_trace("%d: *** D$%0d fence wait\n", $time, CORE_ID);
|
||||
end
|
||||
if (dcache_req_fire_any) begin
|
||||
if (dcache_req_if.rw[0]) begin
|
||||
dpi_trace("%d: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
|
||||
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
dpi_trace(", data=");
|
||||
`TRACE_ARRAY1D(dcache_req_if.data, `NUM_THREADS);
|
||||
dpi_trace(", (#%0d)\n", req_uuid);
|
||||
end else begin
|
||||
dpi_trace("%d: D$%0d Rd Req: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_is_prefetch, req_wid, req_pc, dcache_req_fire);
|
||||
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
dpi_trace(", rd=%0d, is_dup=%b (#%0d)\n", req_rd, req_is_dup, req_uuid);
|
||||
end
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
dpi_trace("%d: D$%0d Rsp: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, tag=%0h, rd=%0d, data=",
|
||||
$time, CORE_ID, rsp_is_prefetch, rsp_wid, rsp_pc, dcache_rsp_if.tmask, mbuf_raddr, rsp_rd);
|
||||
`TRACE_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
|
||||
dpi_trace(", is_dup=%b (#%0d)\n", rsp_is_dup, rsp_uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,146 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_mem_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter ADDR_WIDTH = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
parameter TYPE = "P",
|
||||
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input requests
|
||||
input wire [NUM_REQS-1:0] req_valid_in,
|
||||
input wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
|
||||
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire [NUM_REQS-1:0] req_rw_in,
|
||||
input wire [NUM_REQS-1:0][DATA_SIZE-1:0] req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][DATA_WIDTH-1:0] req_data_in,
|
||||
output wire [NUM_REQS-1:0] req_ready_in,
|
||||
|
||||
// output request
|
||||
output wire req_valid_out,
|
||||
output wire [TAG_OUT_WIDTH-1:0] req_tag_out,
|
||||
output wire [ADDR_WIDTH-1:0] req_addr_out,
|
||||
output wire req_rw_out,
|
||||
output wire [DATA_SIZE-1:0] req_byteen_out,
|
||||
output wire [DATA_WIDTH-1:0] req_data_out,
|
||||
input wire req_ready_out,
|
||||
|
||||
// input response
|
||||
input wire rsp_valid_in,
|
||||
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
|
||||
input wire [DATA_WIDTH-1:0] rsp_data_in,
|
||||
output wire rsp_ready_in,
|
||||
|
||||
// output responses
|
||||
output wire [NUM_REQS-1:0] rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out,
|
||||
input wire [NUM_REQS-1:0] rsp_ready_out
|
||||
);
|
||||
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
localparam RSP_DATAW = TAG_IN_WIDTH + DATA_WIDTH;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in_merged;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
wire [TAG_OUT_WIDTH-1:0] req_tag_in_w;
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (TAG_IN_WIDTH),
|
||||
.S (LOG_NUM_REQS),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) bits_insert (
|
||||
.data_in (req_tag_in[i]),
|
||||
.sel_in (LOG_NUM_REQS'(i)),
|
||||
.data_out (req_tag_in_w)
|
||||
);
|
||||
|
||||
assign req_data_in_merged[i] = {req_tag_in_w, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (BUFFERED_REQ),
|
||||
.TYPE (TYPE)
|
||||
) req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in (req_data_in_merged),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
|
||||
.ready_out (req_ready_out)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out_merged;
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[TAG_SEL_IDX +: LOG_NUM_REQS];
|
||||
|
||||
wire [TAG_IN_WIDTH-1:0] rsp_tag_in_w;
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (TAG_OUT_WIDTH),
|
||||
.S (LOG_NUM_REQS),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) bits_remove (
|
||||
.data_in (rsp_tag_in),
|
||||
.data_out (rsp_tag_in_w)
|
||||
);
|
||||
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (BUFFERED_RSP)
|
||||
) rsp_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (rsp_sel),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in ({rsp_tag_in_w, rsp_data_in}),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_data_out_merged),
|
||||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i];
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign req_valid_out = req_valid_in;
|
||||
assign req_tag_out = req_tag_in;
|
||||
assign req_addr_out = req_addr_in;
|
||||
assign req_rw_out = req_rw_in;
|
||||
assign req_byteen_out = req_byteen_in;
|
||||
assign req_data_out = req_data_in;
|
||||
assign req_ready_in = req_ready_out;
|
||||
|
||||
assign rsp_valid_out = rsp_valid_in;
|
||||
assign rsp_tag_out = rsp_tag_in;
|
||||
assign rsp_data_out = rsp_data_in;
|
||||
assign rsp_ready_in = rsp_ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,420 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_mem_unit # (
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_mem_unit
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if.master perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Core <-> Dcache
|
||||
VX_dcache_req_if.slave dcache_req_if,
|
||||
VX_dcache_rsp_if.master dcache_rsp_if,
|
||||
|
||||
// Core <-> Icache
|
||||
VX_icache_req_if.slave icache_req_if,
|
||||
VX_icache_rsp_if.master icache_rsp_if,
|
||||
|
||||
// Memory
|
||||
VX_mem_req_if.master mem_req_if,
|
||||
VX_mem_rsp_if.slave mem_rsp_if
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_icache_if(), perf_dcache_if(), perf_smem_if();
|
||||
`endif
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`ICACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
|
||||
) icache_mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
|
||||
) icache_mem_rsp_if();
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
|
||||
) dcache_mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
|
||||
) dcache_mem_rsp_if();
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) dcache_req_tmp_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) dcache_rsp_tmp_if();
|
||||
|
||||
`RESET_RELAY (icache_reset);
|
||||
`RESET_RELAY (dcache_reset);
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`ICACHE_ID),
|
||||
.CACHE_SIZE (`ICACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`ICACHE_LINE_SIZE),
|
||||
.NUM_BANKS (1),
|
||||
.WORD_SIZE (`ICACHE_WORD_SIZE),
|
||||
.NUM_REQS (1),
|
||||
.CREQ_SIZE (`ICACHE_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`ICACHE_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`ICACHE_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICACHE_CORE_TAG_ID_BITS),
|
||||
.MEM_TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
|
||||
) icache (
|
||||
`SCOPE_BIND_VX_mem_unit_icache
|
||||
|
||||
.clk (clk),
|
||||
.reset (icache_reset),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (icache_req_if.valid),
|
||||
.core_req_rw (1'b0),
|
||||
.core_req_byteen ('b0),
|
||||
.core_req_addr (icache_req_if.addr),
|
||||
.core_req_data ('x),
|
||||
.core_req_tag (icache_req_if.tag),
|
||||
.core_req_ready (icache_req_if.ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (icache_rsp_if.valid),
|
||||
.core_rsp_data (icache_rsp_if.data),
|
||||
.core_rsp_tag (icache_rsp_if.tag),
|
||||
.core_rsp_ready (icache_rsp_if.ready),
|
||||
`UNUSED_PIN (core_rsp_tmask),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_icache_if),
|
||||
`endif
|
||||
|
||||
// Memory Request
|
||||
.mem_req_valid (icache_mem_req_if.valid),
|
||||
.mem_req_rw (icache_mem_req_if.rw),
|
||||
.mem_req_byteen (icache_mem_req_if.byteen),
|
||||
.mem_req_addr (icache_mem_req_if.addr),
|
||||
.mem_req_data (icache_mem_req_if.data),
|
||||
.mem_req_tag (icache_mem_req_if.tag),
|
||||
.mem_req_ready (icache_mem_req_if.ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (icache_mem_rsp_if.valid),
|
||||
.mem_rsp_data (icache_mem_rsp_if.data),
|
||||
.mem_rsp_tag (icache_mem_rsp_if.tag),
|
||||
.mem_rsp_ready (icache_mem_rsp_if.ready)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`DCACHE_ID),
|
||||
.CACHE_SIZE (`DCACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`DCACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`DCACHE_NUM_BANKS),
|
||||
.NUM_PORTS (`DCACHE_NUM_PORTS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.CREQ_SIZE (`DCACHE_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
|
||||
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
|
||||
.MEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) dcache (
|
||||
`SCOPE_BIND_VX_mem_unit_dcache
|
||||
|
||||
.clk (clk),
|
||||
.reset (dcache_reset),
|
||||
|
||||
// Core req
|
||||
.core_req_valid (dcache_req_tmp_if.valid),
|
||||
.core_req_rw (dcache_req_tmp_if.rw),
|
||||
.core_req_byteen (dcache_req_tmp_if.byteen),
|
||||
.core_req_addr (dcache_req_tmp_if.addr),
|
||||
.core_req_data (dcache_req_tmp_if.data),
|
||||
.core_req_tag (dcache_req_tmp_if.tag),
|
||||
.core_req_ready (dcache_req_tmp_if.ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (dcache_rsp_tmp_if.valid),
|
||||
.core_rsp_tmask (dcache_rsp_tmp_if.tmask),
|
||||
.core_rsp_data (dcache_rsp_tmp_if.data),
|
||||
.core_rsp_tag (dcache_rsp_tmp_if.tag),
|
||||
.core_rsp_ready (dcache_rsp_tmp_if.ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_dcache_if),
|
||||
`endif
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (dcache_mem_req_if.valid),
|
||||
.mem_req_rw (dcache_mem_req_if.rw),
|
||||
.mem_req_byteen (dcache_mem_req_if.byteen),
|
||||
.mem_req_addr (dcache_mem_req_if.addr),
|
||||
.mem_req_data (dcache_mem_req_if.data),
|
||||
.mem_req_tag (dcache_mem_req_if.tag),
|
||||
.mem_req_ready (dcache_mem_req_if.ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (dcache_mem_rsp_if.valid),
|
||||
.mem_rsp_data (dcache_mem_rsp_if.data),
|
||||
.mem_rsp_tag (dcache_mem_rsp_if.tag),
|
||||
.mem_rsp_ready (dcache_mem_rsp_if.ready)
|
||||
);
|
||||
|
||||
if (`SM_ENABLE) begin
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) smem_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) smem_rsp_if();
|
||||
|
||||
`RESET_RELAY (smem_arb_reset);
|
||||
`RESET_RELAY (smem_reset);
|
||||
|
||||
VX_smem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.LANES (`NUM_THREADS),
|
||||
.DATA_SIZE (4),
|
||||
.TAG_IN_WIDTH (`DCACHE_CORE_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (0), // SM flag
|
||||
.TYPE ("P"),
|
||||
.BUFFERED_REQ (2),
|
||||
.BUFFERED_RSP (1)
|
||||
) smem_arb (
|
||||
.clk (clk),
|
||||
.reset (smem_arb_reset),
|
||||
|
||||
// input request
|
||||
.req_valid_in (dcache_req_if.valid),
|
||||
.req_rw_in (dcache_req_if.rw),
|
||||
.req_byteen_in (dcache_req_if.byteen),
|
||||
.req_addr_in (dcache_req_if.addr),
|
||||
.req_data_in (dcache_req_if.data),
|
||||
.req_tag_in (dcache_req_if.tag),
|
||||
.req_ready_in (dcache_req_if.ready),
|
||||
|
||||
// output requests
|
||||
.req_valid_out ({smem_req_if.valid, dcache_req_tmp_if.valid}),
|
||||
.req_rw_out ({smem_req_if.rw, dcache_req_tmp_if.rw}),
|
||||
.req_byteen_out ({smem_req_if.byteen, dcache_req_tmp_if.byteen}),
|
||||
.req_addr_out ({smem_req_if.addr, dcache_req_tmp_if.addr}),
|
||||
.req_data_out ({smem_req_if.data, dcache_req_tmp_if.data}),
|
||||
.req_tag_out ({smem_req_if.tag, dcache_req_tmp_if.tag}),
|
||||
.req_ready_out ({smem_req_if.ready, dcache_req_tmp_if.ready}),
|
||||
|
||||
// input responses
|
||||
.rsp_valid_in ({smem_rsp_if.valid, dcache_rsp_tmp_if.valid}),
|
||||
.rsp_tmask_in ({smem_rsp_if.tmask, dcache_rsp_tmp_if.tmask}),
|
||||
.rsp_data_in ({smem_rsp_if.data, dcache_rsp_tmp_if.data}),
|
||||
.rsp_tag_in ({smem_rsp_if.tag, dcache_rsp_tmp_if.tag}),
|
||||
.rsp_ready_in ({smem_rsp_if.ready, dcache_rsp_tmp_if.ready}),
|
||||
|
||||
// output response
|
||||
.rsp_valid_out (dcache_rsp_if.valid),
|
||||
.rsp_tmask_out (dcache_rsp_if.tmask),
|
||||
.rsp_tag_out (dcache_rsp_if.tag),
|
||||
.rsp_data_out (dcache_rsp_if.data),
|
||||
.rsp_ready_out (dcache_rsp_if.ready)
|
||||
);
|
||||
|
||||
VX_shared_mem #(
|
||||
.CACHE_ID (`SMEM_ID),
|
||||
.CACHE_SIZE (`SMEM_SIZE),
|
||||
.NUM_BANKS (`SMEM_NUM_BANKS),
|
||||
.WORD_SIZE (`SMEM_WORD_SIZE),
|
||||
.NUM_REQS (`SMEM_NUM_REQS),
|
||||
.CREQ_SIZE (`SMEM_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`SMEM_CRSQ_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
|
||||
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
|
||||
.BANK_ADDR_OFFSET (`SMEM_BANK_ADDR_OFFSET)
|
||||
) smem (
|
||||
.clk (clk),
|
||||
.reset (smem_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_smem_if),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (smem_req_if.valid),
|
||||
.core_req_rw (smem_req_if.rw),
|
||||
.core_req_byteen (smem_req_if.byteen),
|
||||
.core_req_addr (smem_req_if.addr),
|
||||
.core_req_data (smem_req_if.data),
|
||||
.core_req_tag (smem_req_if.tag),
|
||||
.core_req_ready (smem_req_if.ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (smem_rsp_if.valid),
|
||||
.core_rsp_tmask (smem_rsp_if.tmask),
|
||||
.core_rsp_data (smem_rsp_if.data),
|
||||
.core_rsp_tag (smem_rsp_if.tag),
|
||||
.core_rsp_ready (smem_rsp_if.ready)
|
||||
);
|
||||
end else begin
|
||||
// core to D-cache request
|
||||
for (genvar i = 0; i < `DCACHE_NUM_REQS; ++i) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW ((32-`CLOG2(`DCACHE_WORD_SIZE)) + 1 + `DCACHE_WORD_SIZE + (8*`DCACHE_WORD_SIZE) + `DCACHE_CORE_TAG_WIDTH)
|
||||
) req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (dcache_req_if.valid[i]),
|
||||
.data_in ({dcache_req_if.addr[i], dcache_req_if.rw[i], dcache_req_if.byteen[i], dcache_req_if.data[i], dcache_req_if.tag[i]}),
|
||||
.ready_in (dcache_req_if.ready[i]),
|
||||
.valid_out (dcache_req_tmp_if.valid[i]),
|
||||
.data_out ({dcache_req_tmp_if.addr[i], dcache_req_tmp_if.rw[i], dcache_req_tmp_if.byteen[i], dcache_req_tmp_if.data[i], dcache_req_tmp_if.tag[i]}),
|
||||
.ready_out (dcache_req_tmp_if.ready[i])
|
||||
);
|
||||
end
|
||||
|
||||
// D-cache to core reponse
|
||||
assign dcache_rsp_if.valid = dcache_rsp_tmp_if.valid;
|
||||
assign dcache_rsp_if.tmask = dcache_rsp_tmp_if.tmask;
|
||||
assign dcache_rsp_if.tag = dcache_rsp_tmp_if.tag;
|
||||
assign dcache_rsp_if.data = dcache_rsp_tmp_if.data;
|
||||
assign dcache_rsp_tmp_if.ready = dcache_rsp_if.ready;
|
||||
end
|
||||
|
||||
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DCACHE_MEM_TAG_WIDTH'(icache_mem_req_if.tag);
|
||||
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_rsp_tag;
|
||||
assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`ICACHE_MEM_TAG_WIDTH-1:0];
|
||||
`UNUSED_VAR (icache_mem_rsp_tag)
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`DCACHE_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (2)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Source request
|
||||
.req_valid_in ({dcache_mem_req_if.valid, icache_mem_req_if.valid}),
|
||||
.req_rw_in ({dcache_mem_req_if.rw, icache_mem_req_if.rw}),
|
||||
.req_byteen_in ({dcache_mem_req_if.byteen, icache_mem_req_if.byteen}),
|
||||
.req_addr_in ({dcache_mem_req_if.addr, icache_mem_req_if.addr}),
|
||||
.req_data_in ({dcache_mem_req_if.data, icache_mem_req_if.data}),
|
||||
.req_tag_in ({dcache_mem_req_if.tag, icache_mem_req_tag}),
|
||||
.req_ready_in ({dcache_mem_req_if.ready, icache_mem_req_if.ready}),
|
||||
|
||||
// Memory request
|
||||
.req_valid_out (mem_req_if.valid),
|
||||
.req_rw_out (mem_req_if.rw),
|
||||
.req_byteen_out (mem_req_if.byteen),
|
||||
.req_addr_out (mem_req_if.addr),
|
||||
.req_data_out (mem_req_if.data),
|
||||
.req_tag_out (mem_req_if.tag),
|
||||
.req_ready_out (mem_req_if.ready),
|
||||
|
||||
// Source response
|
||||
.rsp_valid_out ({dcache_mem_rsp_if.valid, icache_mem_rsp_if.valid}),
|
||||
.rsp_data_out ({dcache_mem_rsp_if.data, icache_mem_rsp_if.data}),
|
||||
.rsp_tag_out ({dcache_mem_rsp_if.tag, icache_mem_rsp_tag}),
|
||||
.rsp_ready_out ({dcache_mem_rsp_if.ready, icache_mem_rsp_if.ready}),
|
||||
|
||||
// Memory response
|
||||
.rsp_valid_in (mem_rsp_if.valid),
|
||||
.rsp_tag_in (mem_rsp_if.tag),
|
||||
.rsp_data_in (mem_rsp_if.data),
|
||||
.rsp_ready_in (mem_rsp_if.ready)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
`UNUSED_VAR (perf_dcache_if.mem_stalls)
|
||||
`UNUSED_VAR (perf_dcache_if.crsp_stalls)
|
||||
|
||||
assign perf_memsys_if.icache_reads = perf_icache_if.reads;
|
||||
assign perf_memsys_if.icache_read_misses = perf_icache_if.read_misses;
|
||||
assign perf_memsys_if.dcache_reads = perf_dcache_if.reads;
|
||||
assign perf_memsys_if.dcache_writes = perf_dcache_if.writes;
|
||||
assign perf_memsys_if.dcache_read_misses = perf_dcache_if.read_misses;
|
||||
assign perf_memsys_if.dcache_write_misses= perf_dcache_if.write_misses;
|
||||
assign perf_memsys_if.dcache_bank_stalls = perf_dcache_if.bank_stalls;
|
||||
assign perf_memsys_if.dcache_mshr_stalls = perf_dcache_if.mshr_stalls;
|
||||
|
||||
if (`SM_ENABLE) begin
|
||||
assign perf_memsys_if.smem_reads = perf_smem_if.reads;
|
||||
assign perf_memsys_if.smem_writes = perf_smem_if.writes;
|
||||
assign perf_memsys_if.smem_bank_stalls = perf_smem_if.bank_stalls;
|
||||
end else begin
|
||||
assign perf_memsys_if.smem_reads = 0;
|
||||
assign perf_memsys_if.smem_writes = 0;
|
||||
assign perf_memsys_if.smem_bank_stalls = 0;
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_pending_reads <= 0;
|
||||
end else begin
|
||||
perf_mem_pending_reads <= perf_mem_pending_reads +
|
||||
`PERF_CTR_BITS'($signed(2'((mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw) && !(mem_rsp_if.valid && mem_rsp_if.ready)) -
|
||||
2'((mem_rsp_if.valid && mem_rsp_if.ready) && !(mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw))));
|
||||
end
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_lat;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_reads <= 0;
|
||||
perf_mem_writes <= 0;
|
||||
perf_mem_lat <= 0;
|
||||
end else begin
|
||||
if (mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw) begin
|
||||
perf_mem_reads <= perf_mem_reads + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (mem_req_if.valid && mem_req_if.ready && mem_req_if.rw) begin
|
||||
perf_mem_writes <= perf_mem_writes + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
perf_mem_lat <= perf_mem_lat + perf_mem_pending_reads;
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_memsys_if.mem_reads = perf_mem_reads;
|
||||
assign perf_memsys_if.mem_writes = perf_mem_writes;
|
||||
assign perf_memsys_if.mem_latency = perf_mem_lat;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,226 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_muldiv (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
input wire [`INST_MUL_BITS-1:0] alu_op,
|
||||
input wire [`UUID_BITS-1:0] uuid_in,
|
||||
input wire [`NW_BITS-1:0] wid_in,
|
||||
input wire [`NUM_THREADS-1:0] tmask_in,
|
||||
input wire [31:0] PC_in,
|
||||
input wire [`NR_BITS-1:0] rd_in,
|
||||
input wire wb_in,
|
||||
input wire [`NUM_THREADS-1:0][31:0] alu_in1,
|
||||
input wire [`NUM_THREADS-1:0][31:0] alu_in2,
|
||||
|
||||
// Outputs
|
||||
output wire [`UUID_BITS-1:0] uuid_out,
|
||||
output wire [`NW_BITS-1:0] wid_out,
|
||||
output wire [`NUM_THREADS-1:0] tmask_out,
|
||||
output wire [31:0] PC_out,
|
||||
output wire [`NR_BITS-1:0] rd_out,
|
||||
output wire wb_out,
|
||||
output wire [`NUM_THREADS-1:0][31:0] data_out,
|
||||
|
||||
// handshake
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
output wire valid_out,
|
||||
input wire ready_out
|
||||
);
|
||||
|
||||
wire is_div_op = `INST_MUL_IS_DIV(alu_op);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
||||
wire [`UUID_BITS-1:0] mul_uuid_out;
|
||||
wire [`NW_BITS-1:0] mul_wid_out;
|
||||
wire [`NUM_THREADS-1:0] mul_tmask_out;
|
||||
wire [31:0] mul_PC_out;
|
||||
wire [`NR_BITS-1:0] mul_rd_out;
|
||||
wire mul_wb_out;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
wire mul_valid_out;
|
||||
wire mul_valid_in = valid_in && !is_div_op;
|
||||
wire mul_ready_in = ~stall_out || ~mul_valid_out;
|
||||
|
||||
wire is_mulh_in = (alu_op != `INST_MUL_MUL);
|
||||
wire is_signed_mul_a = (alu_op != `INST_MUL_MULHU);
|
||||
wire is_signed_mul_b = (alu_op != `INST_MUL_MULHU && alu_op != `INST_MUL_MULHSU);
|
||||
|
||||
`ifdef IMUL_DPI
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result_tmp;
|
||||
|
||||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] mul_resultl, mul_resulth;
|
||||
always @(*) begin
|
||||
dpi_imul (mul_fire_in, alu_in1[i], alu_in2[i], is_signed_mul_a, is_signed_mul_b, mul_resultl, mul_resulth);
|
||||
end
|
||||
assign mul_result_tmp[i] = is_mulh_in ? mul_resulth : mul_resultl;
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, mul_result_tmp}),
|
||||
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_result})
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
wire is_mulh_out;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] mul_in1 = {is_signed_mul_a & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] mul_in2 = {is_signed_mul_b & alu_in2[i][31], alu_in2[i]};
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [65:0] mul_result_tmp;
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
VX_multiplier #(
|
||||
.WIDTHA (33),
|
||||
.WIDTHB (33),
|
||||
.WIDTHP (66),
|
||||
.SIGNED (1),
|
||||
.LATENCY (`LATENCY_IMUL)
|
||||
) multiplier (
|
||||
.clk (clk),
|
||||
.enable (mul_ready_in),
|
||||
.dataa (mul_in1),
|
||||
.datab (mul_in2),
|
||||
.result (mul_result_tmp)
|
||||
);
|
||||
|
||||
assign mul_result[i] = is_mulh_out ? mul_result_tmp[63:32] : mul_result_tmp[31:0];
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_mulh_in}),
|
||||
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] div_result;
|
||||
wire [`UUID_BITS-1:0] div_uuid_out;
|
||||
wire [`NW_BITS-1:0] div_wid_out;
|
||||
wire [`NUM_THREADS-1:0] div_tmask_out;
|
||||
wire [31:0] div_PC_out;
|
||||
wire [`NR_BITS-1:0] div_rd_out;
|
||||
wire div_wb_out;
|
||||
|
||||
wire is_rem_op_in = (alu_op == `INST_MUL_REM) || (alu_op == `INST_MUL_REMU);
|
||||
wire is_signed_div = (alu_op == `INST_MUL_DIV) || (alu_op == `INST_MUL_REM);
|
||||
wire div_valid_in = valid_in && is_div_op;
|
||||
wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL
|
||||
wire div_ready_in;
|
||||
wire div_valid_out;
|
||||
|
||||
`ifdef IDIV_DPI
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] div_result_tmp;
|
||||
|
||||
wire div_fire_in = div_valid_in && div_ready_in;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
dpi_idiv (div_fire_in, alu_in1[i], alu_in2[i], is_signed_div, div_quotient, div_remainder);
|
||||
end
|
||||
assign div_result_tmp[i] = is_rem_op_in ? div_remainder : div_quotient;
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) div_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (div_ready_in),
|
||||
.data_in ({div_valid_in, uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, div_result_tmp}),
|
||||
.data_out ({div_valid_out, div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_result})
|
||||
);
|
||||
|
||||
assign div_ready_in = div_ready_out || ~div_valid_out;
|
||||
|
||||
`else
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp;
|
||||
wire is_rem_op_out;
|
||||
|
||||
VX_serial_div #(
|
||||
.WIDTHN (32),
|
||||
.WIDTHD (32),
|
||||
.WIDTHQ (32),
|
||||
.WIDTHR (32),
|
||||
.LANES (`NUM_THREADS),
|
||||
.TAGW (64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1)
|
||||
) divide (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (div_valid_in),
|
||||
.ready_in (div_ready_in),
|
||||
.signed_mode(is_signed_div),
|
||||
.tag_in ({uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_rem_op_in}),
|
||||
.numer (alu_in1),
|
||||
.denom (alu_in2),
|
||||
.quotient (div_result_tmp),
|
||||
.remainder (rem_result_tmp),
|
||||
.ready_out (div_ready_out),
|
||||
.valid_out (div_valid_out),
|
||||
.tag_out ({div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out})
|
||||
);
|
||||
|
||||
assign div_result = is_rem_op_out ? rem_result_tmp : div_result_tmp;
|
||||
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire rsp_valid = mul_valid_out || div_valid_out;
|
||||
wire [`UUID_BITS-1:0] rsp_uuid = mul_valid_out ? mul_uuid_out : div_uuid_out;
|
||||
wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out;
|
||||
wire [31:0] rsp_PC = mul_valid_out ? mul_PC_out : div_PC_out;
|
||||
wire [`NR_BITS-1:0] rsp_rd = mul_valid_out ? mul_rd_out : div_rd_out;
|
||||
wire rsp_wb = mul_valid_out ? mul_wb_out : div_wb_out;
|
||||
wire [`NUM_THREADS-1:0][31:0] rsp_data = mul_valid_out ? mul_result : div_result;
|
||||
|
||||
assign stall_out = ~ready_out && valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({rsp_valid, rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
|
||||
.data_out ({valid_out, uuid_out, wid_out, tmask_out, PC_out, rd_out, wb_out, data_out})
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
assign ready_in = is_div_op ? div_ready_in : mul_ready_in;
|
||||
|
||||
endmodule
|
||||
@@ -1,261 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_pipeline #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_pipeline
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Dcache core request
|
||||
output wire [`NUM_THREADS-1:0] dcache_req_valid,
|
||||
output wire [`NUM_THREADS-1:0] dcache_req_rw,
|
||||
output wire [`NUM_THREADS-1:0][3:0] dcache_req_byteen,
|
||||
output wire [`NUM_THREADS-1:0][29:0] dcache_req_addr,
|
||||
output wire [`NUM_THREADS-1:0][31:0] dcache_req_data,
|
||||
output wire [`NUM_THREADS-1:0][`DCACHE_CORE_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
input wire [`NUM_THREADS-1:0] dcache_req_ready,
|
||||
|
||||
// Dcache core reponse
|
||||
input wire dcache_rsp_valid,
|
||||
input wire [`NUM_THREADS-1:0] dcache_rsp_tmask,
|
||||
input wire [`NUM_THREADS-1:0][31:0] dcache_rsp_data,
|
||||
input wire [`DCACHE_CORE_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
output wire dcache_rsp_ready,
|
||||
|
||||
// Icache core request
|
||||
output wire icache_req_valid,
|
||||
output wire [29:0] icache_req_addr,
|
||||
output wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_req_tag,
|
||||
input wire icache_req_ready,
|
||||
|
||||
// Icache core response
|
||||
input wire icache_rsp_valid,
|
||||
input wire [31:0] icache_rsp_data,
|
||||
input wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
output wire icache_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
//
|
||||
// Dcache request
|
||||
//
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_req_if();
|
||||
|
||||
assign dcache_req_valid = dcache_req_if.valid;
|
||||
assign dcache_req_rw = dcache_req_if.rw;
|
||||
assign dcache_req_byteen = dcache_req_if.byteen;
|
||||
assign dcache_req_addr = dcache_req_if.addr;
|
||||
assign dcache_req_data = dcache_req_if.data;
|
||||
assign dcache_req_tag = dcache_req_if.tag;
|
||||
assign dcache_req_if.ready = dcache_req_ready;
|
||||
|
||||
//
|
||||
// Dcache response
|
||||
//
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_rsp_if();
|
||||
|
||||
assign dcache_rsp_if.valid = dcache_rsp_valid;
|
||||
assign dcache_rsp_if.tmask = dcache_rsp_tmask;
|
||||
assign dcache_rsp_if.data = dcache_rsp_data;
|
||||
assign dcache_rsp_if.tag = dcache_rsp_tag;
|
||||
assign dcache_rsp_ready = dcache_rsp_if.ready;
|
||||
|
||||
//
|
||||
// Icache request
|
||||
//
|
||||
|
||||
VX_icache_req_if #(
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_req_if();
|
||||
|
||||
assign icache_req_valid = icache_req_if.valid;
|
||||
assign icache_req_addr = icache_req_if.addr;
|
||||
assign icache_req_tag = icache_req_if.tag;
|
||||
assign icache_req_if.ready = icache_req_ready;
|
||||
|
||||
//
|
||||
// Icache response
|
||||
//
|
||||
|
||||
VX_icache_rsp_if #(
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_rsp_if();
|
||||
|
||||
assign icache_rsp_if.valid = icache_rsp_valid;
|
||||
assign icache_rsp_if.data = icache_rsp_data;
|
||||
assign icache_rsp_if.tag = icache_rsp_tag;
|
||||
assign icache_rsp_ready = icache_rsp_if.ready;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_fetch_to_csr_if fetch_to_csr_if();
|
||||
VX_cmt_to_csr_if cmt_to_csr_if();
|
||||
VX_decode_if decode_if();
|
||||
VX_branch_ctl_if branch_ctl_if();
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
VX_ifetch_rsp_if ifetch_rsp_if();
|
||||
VX_alu_req_if alu_req_if();
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if fpu_req_if();
|
||||
`endif
|
||||
VX_gpu_req_if gpu_req_if();
|
||||
VX_writeback_if writeback_if();
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
VX_commit_if alu_commit_if();
|
||||
VX_commit_if ld_commit_if();
|
||||
VX_commit_if st_commit_if();
|
||||
VX_commit_if csr_commit_if();
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if fpu_commit_if();
|
||||
`endif
|
||||
VX_commit_if gpu_commit_if();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if perf_pipeline_if();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (fetch_reset);
|
||||
`RESET_RELAY (decode_reset);
|
||||
`RESET_RELAY (issue_reset);
|
||||
`RESET_RELAY (execute_reset);
|
||||
`RESET_RELAY (commit_reset);
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) fetch (
|
||||
`SCOPE_BIND_VX_pipeline_fetch
|
||||
.clk (clk),
|
||||
.reset (fetch_reset),
|
||||
.icache_req_if (icache_req_if),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
VX_decode #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (decode_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_decode_if (perf_pipeline_if.decode),
|
||||
`endif
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
.decode_if (decode_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if)
|
||||
);
|
||||
|
||||
VX_issue #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) issue (
|
||||
`SCOPE_BIND_VX_pipeline_issue
|
||||
|
||||
.clk (clk),
|
||||
.reset (issue_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_issue_if (perf_pipeline_if.issue),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_req_if (fpu_req_if),
|
||||
`endif
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
VX_execute #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) execute (
|
||||
`SCOPE_BIND_VX_pipeline_execute
|
||||
|
||||
.clk (clk),
|
||||
.reset (execute_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_req_if (fpu_req_if),
|
||||
`endif
|
||||
.gpu_req_if (gpu_req_if),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.st_commit_if (st_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
VX_commit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) commit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.st_commit_if (st_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if),
|
||||
.cmt_to_csr_if (cmt_to_csr_if)
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -1,5 +1,18 @@
|
||||
`ifndef VX_PLATFORM
|
||||
`define VX_PLATFORM
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_PLATFORM_VH
|
||||
`define VX_PLATFORM_VH
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "util_dpi.vh"
|
||||
@@ -9,8 +22,36 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`ifdef VIVADO
|
||||
`define STRING
|
||||
`else
|
||||
`define STRING string
|
||||
`endif
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) x
|
||||
`else
|
||||
`define DEBUG_BLOCK(x)
|
||||
`endif
|
||||
`define IGNORE_UNOPTFLAT_BEGIN
|
||||
`define IGNORE_UNOPTFLAT_END
|
||||
`define IGNORE_UNUSED_BEGIN
|
||||
`define IGNORE_UNUSED_END
|
||||
`define IGNORE_WARNINGS_BEGIN
|
||||
`define IGNORE_WARNINGS_END
|
||||
`define UNUSED_PARAM(x)
|
||||
`define UNUSED_SPARAM(x)
|
||||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define UNUSED_ARG(x) x
|
||||
`define TRACE(level, args) $write args
|
||||
`else
|
||||
`ifdef VERILATOR
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
`define TRACING_OFF /* verilator tracing_off */
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
@@ -19,6 +60,10 @@
|
||||
`define DEBUG_BLOCK(x)
|
||||
`endif
|
||||
|
||||
`define IGNORE_UNOPTFLAT_BEGIN /* verilator lint_off UNOPTFLAT */
|
||||
|
||||
`define IGNORE_UNOPTFLAT_END /* verilator lint_off UNOPTFLAT */
|
||||
|
||||
`define IGNORE_UNUSED_BEGIN /* verilator lint_off UNUSED */
|
||||
|
||||
`define IGNORE_UNUSED_END /* verilator lint_on UNUSED */
|
||||
@@ -30,7 +75,9 @@
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
/* verilator lint_off DECLFILENAME */ \
|
||||
/* verilator lint_off IMPLICIT */ \
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
/* verilator lint_off PINMISSING */ \
|
||||
/* verilator lint_off IMPORTSTAR */ \
|
||||
/* verilator lint_off UNSIGNED */
|
||||
|
||||
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
|
||||
/* verilator lint_on PINCONNECTEMPTY */ \
|
||||
@@ -39,68 +86,80 @@
|
||||
/* verilator lint_on UNDRIVEN */ \
|
||||
/* verilator lint_on DECLFILENAME */ \
|
||||
/* verilator lint_on IMPLICIT */ \
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
/* verilator lint_off PINMISSING */ \
|
||||
/* verilator lint_on IMPORTSTAR */ \
|
||||
/* verilator lint_on UNSIGNED */
|
||||
|
||||
`define UNUSED_PARAM(x) /* verilator lint_off UNUSED */ \
|
||||
localparam __``x = x; \
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
`define UNUSED_VAR(x) always @(x) begin end
|
||||
`define UNUSED_SPARAM(x) /* verilator lint_off UNUSED */ \
|
||||
localparam `STRING __``x = x; \
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
|
||||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
`define UNUSED_VAR(x) if (1) begin \
|
||||
/* verilator lint_off UNUSED */ \
|
||||
wire [$bits(x)-1:0] __x = x; \
|
||||
/* verilator lint_on UNUSED */ \
|
||||
end
|
||||
|
||||
`define ERROR(msg) \
|
||||
$error msg
|
||||
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
|
||||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
`define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
/* verilator lint_on UNUSED */
|
||||
`define TRACE(level, args) dpi_trace(level, $sformatf args)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
`ifdef SIMULATION
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
if (!(cond)) $error msg; \
|
||||
endgenerate
|
||||
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) begin \
|
||||
assert(cond) else $error msg; \
|
||||
end
|
||||
`define ERROR(msg) \
|
||||
$error msg
|
||||
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
`define TRACING_OFF /* verilator tracing_off */
|
||||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
|
||||
`else // SYNTHESIS
|
||||
|
||||
`define DEBUG_BLOCK(x)
|
||||
`define IGNORE_UNUSED_BEGIN
|
||||
`define IGNORE_UNUSED_END
|
||||
`define IGNORE_WARNINGS_BEGIN
|
||||
`define IGNORE_WARNINGS_END
|
||||
`define UNUSED_PARAM(x)
|
||||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define ERROR(msg)
|
||||
`define ASSERT(cond, msg) if (cond);
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
`define RUNTIME_ASSERT(cond, msg)
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
|
||||
`endif // SYNTHESIS
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) begin \
|
||||
assert(cond) else $error msg; \
|
||||
end
|
||||
`else
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
`define ERROR(msg) //
|
||||
`define ASSERT(cond, msg) //
|
||||
`define RUNTIME_ASSERT(cond, msg)
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef QUARTUS
|
||||
`define MAX_FANOUT 4
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
`define DISABLE_BRAM (* ramstyle = "logic" *)
|
||||
`define PRESERVE_REG (* preserve *)
|
||||
`define PRESERVE_NET (* preserve *)
|
||||
`elsif VIVADO
|
||||
`define MAX_FANOUT 4
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
`define USE_FAST_BRAM (* ram_style = "distributed" *)
|
||||
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
|
||||
`define DISABLE_BRAM (* ram_style = "registers" *)
|
||||
`define PRESERVE_NET (* keep = "true" *)
|
||||
`else
|
||||
`define MAX_FANOUT 4
|
||||
`define IF_DATA_SIZE(x) x.DATA_WIDTH
|
||||
`define USE_FAST_BRAM
|
||||
`define NO_RW_RAM_CHECK
|
||||
`define DISABLE_BRAM
|
||||
`define PRESERVE_REG
|
||||
`define PRESERVE_NET
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -112,52 +171,105 @@
|
||||
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
|
||||
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
|
||||
|
||||
`define ABS(x) (($signed(x) < 0) ? (-$signed(x)) : (x));
|
||||
`define ABS(x) (((x) < 0) ? (-(x)) : (x));
|
||||
|
||||
`ifndef MIN
|
||||
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
`define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
`endif
|
||||
|
||||
`define UP(x) (((x) > 0) ? (x) : 1)
|
||||
`ifndef MAX
|
||||
`define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
`endif
|
||||
|
||||
`ifndef CLAMP
|
||||
`define CLAMP(x, lo, hi) (((x) > (hi)) ? (hi) : (((x) < (lo)) ? (lo) : (x)))
|
||||
`endif
|
||||
|
||||
`ifndef UP
|
||||
`define UP(x) (((x) != 0) ? (x) : 1)
|
||||
`endif
|
||||
|
||||
`define RTRIM(x, s) x[$bits(x)-1:($bits(x)-s)]
|
||||
|
||||
`define LTRIM(x, s) x[s-1:0]
|
||||
|
||||
`define TRACE_ARRAY1D(a, m) \
|
||||
dpi_trace("{"); \
|
||||
for (integer i = (m-1); i >= 0; --i) begin \
|
||||
if (i != (m-1)) dpi_trace(", "); \
|
||||
dpi_trace("0x%0h", a[i]); \
|
||||
`define TRACE_ARRAY1D(lvl, arr, m) \
|
||||
`TRACE(lvl, ("{")); \
|
||||
for (integer __i = (m-1); __i >= 0; --__i) begin \
|
||||
if (__i != (m-1)) `TRACE(lvl, (", ")); \
|
||||
`TRACE(lvl, ("0x%0h", arr[__i])); \
|
||||
end \
|
||||
dpi_trace("}"); \
|
||||
`TRACE(lvl, ("}"));
|
||||
|
||||
`define TRACE_ARRAY2D(a, m, n) \
|
||||
dpi_trace("{"); \
|
||||
for (integer i = n-1; i >= 0; --i) begin \
|
||||
if (i != (n-1)) dpi_trace(", "); \
|
||||
dpi_trace("{"); \
|
||||
for (integer j = (m-1); j >= 0; --j) begin \
|
||||
if (j != (m-1)) dpi_trace(", "); \
|
||||
dpi_trace("0x%0h", a[i][j]); \
|
||||
`define TRACE_ARRAY2D(lvl, arr, m, n) \
|
||||
`TRACE(lvl, ("{")); \
|
||||
for (integer __i = n-1; __i >= 0; --__i) begin \
|
||||
if (__i != (n-1)) `TRACE(lvl, (", ")); \
|
||||
`TRACE(lvl, ("{")); \
|
||||
for (integer __j = (m-1); __j >= 0; --__j) begin \
|
||||
if (__j != (m-1)) `TRACE(lvl, (", "));\
|
||||
`TRACE(lvl, ("0x%0h", arr[__i][__j])); \
|
||||
end \
|
||||
dpi_trace("}"); \
|
||||
`TRACE(lvl, ("}")); \
|
||||
end \
|
||||
dpi_trace("}")
|
||||
`TRACE(lvl, ("}"))
|
||||
|
||||
`define RESET_RELAY(signal) \
|
||||
wire signal; \
|
||||
VX_reset_relay __``signal ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.reset_o (signal) \
|
||||
`define RESET_RELAY_EX(dst, src, size, fanout) \
|
||||
wire [size-1:0] dst; \
|
||||
VX_reset_relay #(.N(size), .MAX_FANOUT(fanout)) __``dst ( \
|
||||
.clk (clk), \
|
||||
.reset (src), \
|
||||
.reset_o (dst) \
|
||||
)
|
||||
|
||||
`define POP_COUNT(out, in) \
|
||||
VX_popcount #( \
|
||||
.N ($bits(in)) \
|
||||
) __``out ( \
|
||||
.in_i (in), \
|
||||
.cnt_o (out) \
|
||||
)
|
||||
`define RESET_RELAY_EN(dst, src, enable) \
|
||||
`RESET_RELAY_EX (dst, src, 1, ((enable) ? 0 : -1))
|
||||
|
||||
`endif
|
||||
`define RESET_RELAY(dst, src) \
|
||||
`RESET_RELAY_EX (dst, src, 1, 0)
|
||||
|
||||
// size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2
|
||||
`define OUT_REG_TO_EB_SIZE(out_reg) `MIN(out_reg, 2)
|
||||
|
||||
// reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2
|
||||
`define OUT_REG_TO_EB_REG(out_reg) ((out_reg & 1) + ((out_reg >> 2) << 1))
|
||||
|
||||
`define REPEAT(n,f,s) `_REPEAT_``n(f,s)
|
||||
`define _REPEAT_0(f,s)
|
||||
`define _REPEAT_1(f,s) `f(0)
|
||||
`define _REPEAT_2(f,s) `f(1) `s `_REPEAT_1(f,s)
|
||||
`define _REPEAT_3(f,s) `f(2) `s `_REPEAT_2(f,s)
|
||||
`define _REPEAT_4(f,s) `f(3) `s `_REPEAT_3(f,s)
|
||||
`define _REPEAT_5(f,s) `f(4) `s `_REPEAT_4(f,s)
|
||||
`define _REPEAT_6(f,s) `f(5) `s `_REPEAT_5(f,s)
|
||||
`define _REPEAT_7(f,s) `f(6) `s `_REPEAT_6(f,s)
|
||||
`define _REPEAT_8(f,s) `f(7) `s `_REPEAT_7(f,s)
|
||||
`define _REPEAT_9(f,s) `f(8) `s `_REPEAT_8(f,s)
|
||||
`define _REPEAT_10(f,s) `f(9) `s `_REPEAT_9(f,s)
|
||||
`define _REPEAT_11(f,s) `f(10) `s `_REPEAT_10(f,s)
|
||||
`define _REPEAT_12(f,s) `f(11) `s `_REPEAT_11(f,s)
|
||||
`define _REPEAT_13(f,s) `f(12) `s `_REPEAT_12(f,s)
|
||||
`define _REPEAT_14(f,s) `f(13) `s `_REPEAT_13(f,s)
|
||||
`define _REPEAT_15(f,s) `f(14) `s `_REPEAT_14(f,s)
|
||||
`define _REPEAT_16(f,s) `f(15) `s `_REPEAT_15(f,s)
|
||||
`define _REPEAT_17(f,s) `f(16) `s `_REPEAT_16(f,s)
|
||||
`define _REPEAT_18(f,s) `f(17) `s `_REPEAT_17(f,s)
|
||||
`define _REPEAT_19(f,s) `f(18) `s `_REPEAT_18(f,s)
|
||||
`define _REPEAT_20(f,s) `f(19) `s `_REPEAT_19(f,s)
|
||||
`define _REPEAT_21(f,s) `f(20) `s `_REPEAT_20(f,s)
|
||||
`define _REPEAT_22(f,s) `f(21) `s `_REPEAT_21(f,s)
|
||||
`define _REPEAT_23(f,s) `f(22) `s `_REPEAT_22(f,s)
|
||||
`define _REPEAT_24(f,s) `f(23) `s `_REPEAT_23(f,s)
|
||||
`define _REPEAT_25(f,s) `f(24) `s `_REPEAT_24(f,s)
|
||||
`define _REPEAT_26(f,s) `f(25) `s `_REPEAT_25(f,s)
|
||||
`define _REPEAT_27(f,s) `f(26) `s `_REPEAT_26(f,s)
|
||||
`define _REPEAT_28(f,s) `f(27) `s `_REPEAT_27(f,s)
|
||||
`define _REPEAT_29(f,s) `f(28) `s `_REPEAT_28(f,s)
|
||||
`define _REPEAT_30(f,s) `f(29) `s `_REPEAT_29(f,s)
|
||||
`define _REPEAT_31(f,s) `f(30) `s `_REPEAT_30(f,s)
|
||||
`define _REPEAT_32(f,s) `f(31) `s `_REPEAT_31(f,s)
|
||||
|
||||
`define REPEAT_COMMA ,
|
||||
`define REPEAT_SEMICOLON ;
|
||||
|
||||
`endif // VX_PLATFORM_VH
|
||||
|
||||
@@ -1,89 +1,68 @@
|
||||
`ifndef VX_SCOPE
|
||||
`define VX_SCOPE
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_SCOPE_VH
|
||||
`define VX_SCOPE_VH
|
||||
|
||||
`ifdef SCOPE
|
||||
|
||||
`include "scope-defs.vh"
|
||||
`define SCOPE_IO_DECL \
|
||||
input wire scope_reset, \
|
||||
input wire scope_bus_in, \
|
||||
output wire scope_bus_out,
|
||||
|
||||
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
|
||||
`define SCOPE_IO_SWITCH(__count) \
|
||||
wire scope_bus_in_w [__count]; \
|
||||
wire scope_bus_out_w [__count]; \
|
||||
`RESET_RELAY_EX(scope_reset_w, scope_reset, __count, 4); \
|
||||
VX_scope_switch #( \
|
||||
.N (__count) \
|
||||
) scope_switch ( \
|
||||
.clk (clk), \
|
||||
.reset (scope_reset), \
|
||||
.req_in (scope_bus_in), \
|
||||
.rsp_out (scope_bus_out), \
|
||||
.req_out (scope_bus_in_w), \
|
||||
.rsp_in (scope_bus_out_w) \
|
||||
);
|
||||
|
||||
`define SCOPE_SIZE 1024
|
||||
`define SCOPE_IO_BIND(__i) \
|
||||
.scope_reset (scope_reset_w[__i]), \
|
||||
.scope_bus_in (scope_bus_in_w[__i]), \
|
||||
.scope_bus_out (scope_bus_out_w[__i]),
|
||||
|
||||
`define SCOPE_IO_UNUSED() \
|
||||
`UNUSED_VAR (scope_reset); \
|
||||
`UNUSED_VAR (scope_bus_in); \
|
||||
assign scope_bus_out = 0;
|
||||
|
||||
`define SCOPE_IO_UNUSED_W(__i) \
|
||||
`UNUSED_VAR (scope_reset_w[__i]); \
|
||||
`UNUSED_VAR (scope_bus_in_w[__i]); \
|
||||
assign scope_bus_out_w[__i] = 0;
|
||||
|
||||
`else
|
||||
|
||||
`define SCOPE_IO_VX_icache_stage
|
||||
`define SCOPE_IO_DECL
|
||||
|
||||
`define SCOPE_IO_VX_fetch
|
||||
`define SCOPE_IO_SWITCH(__count)
|
||||
|
||||
`define SCOPE_BIND_VX_fetch_icache_stage
|
||||
`define SCOPE_IO_BIND(__i)
|
||||
|
||||
`define SCOPE_BIND_VX_fetch_warp_sched
|
||||
`define SCOPE_IO_UNUSED_W(__i)
|
||||
|
||||
`define SCOPE_IO_VX_warp_sched
|
||||
|
||||
`define SCOPE_IO_VX_pipeline
|
||||
|
||||
`define SCOPE_BIND_VX_pipeline_fetch
|
||||
|
||||
`define SCOPE_IO_VX_core
|
||||
|
||||
`define SCOPE_BIND_VX_core_pipeline
|
||||
|
||||
`define SCOPE_IO_VX_cluster
|
||||
|
||||
`define SCOPE_BIND_VX_cluster_core(__i__)
|
||||
|
||||
`define SCOPE_IO_Vortex
|
||||
|
||||
`define SCOPE_BIND_Vortex_cluster(__i__)
|
||||
|
||||
`define SCOPE_BIND_afu_vortex
|
||||
|
||||
`define SCOPE_IO_VX_lsu_unit
|
||||
|
||||
`define SCOPE_IO_VX_gpu_unit
|
||||
|
||||
`define SCOPE_IO_VX_execute
|
||||
|
||||
`define SCOPE_BIND_VX_execute_lsu_unit
|
||||
|
||||
`define SCOPE_BIND_VX_execute_gpu_unit
|
||||
|
||||
`define SCOPE_BIND_VX_pipeline_execute
|
||||
|
||||
`define SCOPE_IO_VX_issue
|
||||
|
||||
`define SCOPE_BIND_VX_pipeline_issue
|
||||
|
||||
`define SCOPE_IO_VX_bank
|
||||
|
||||
`define SCOPE_IO_VX_cache
|
||||
|
||||
`define SCOPE_BIND_VX_cache_bank(__i__)
|
||||
|
||||
`define SCOPE_BIND_Vortex_l3cache
|
||||
|
||||
`define SCOPE_BIND_VX_cluster_l2cache
|
||||
|
||||
`define SCOPE_IO_VX_mem_unit
|
||||
|
||||
`define SCOPE_BIND_VX_mem_unit_dcache
|
||||
|
||||
`define SCOPE_BIND_VX_core_mem_unit
|
||||
|
||||
`define SCOPE_BIND_VX_mem_unit_icache
|
||||
|
||||
`define SCOPE_BIND_VX_mem_unit_smem
|
||||
|
||||
`define SCOPE_DECL_SIGNALS
|
||||
|
||||
`define SCOPE_DATA_LIST
|
||||
|
||||
`define SCOPE_UPDATE_LIST
|
||||
|
||||
`define SCOPE_TRIGGER
|
||||
|
||||
`define SCOPE_ASSIGN(d,s)
|
||||
`define SCOPE_IO_UNUSED(__i)
|
||||
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`endif // VX_SCOPE_VH
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_scoreboard #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_ibuffer_if.slave ibuffer_if,
|
||||
VX_writeback_if.slave writeback_if
|
||||
);
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
|
||||
wire reserve_reg = ibuffer_if.valid && ibuffer_if.ready && ibuffer_if.wb;
|
||||
|
||||
wire release_reg = writeback_if.valid && writeback_if.ready && writeback_if.eop;
|
||||
|
||||
always @(*) begin
|
||||
inuse_regs_n = inuse_regs;
|
||||
if (reserve_reg) begin
|
||||
inuse_regs_n[ibuffer_if.wid][ibuffer_if.rd] = 1;
|
||||
end
|
||||
if (release_reg) begin
|
||||
inuse_regs_n[writeback_if.wid][writeback_if.rd] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
inuse_regs <= '0;
|
||||
end else begin
|
||||
inuse_regs <= inuse_regs_n;
|
||||
end
|
||||
end
|
||||
|
||||
reg deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3;
|
||||
|
||||
always @(posedge clk) begin
|
||||
deq_inuse_rd <= inuse_regs_n[ibuffer_if.wid_n][ibuffer_if.rd_n];
|
||||
deq_inuse_rs1 <= inuse_regs_n[ibuffer_if.wid_n][ibuffer_if.rs1_n];
|
||||
deq_inuse_rs2 <= inuse_regs_n[ibuffer_if.wid_n][ibuffer_if.rs2_n];
|
||||
deq_inuse_rs3 <= inuse_regs_n[ibuffer_if.wid_n][ibuffer_if.rs3_n];
|
||||
end
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
assign ibuffer_if.ready = ~(deq_inuse_rd
|
||||
| deq_inuse_rs1
|
||||
| deq_inuse_rs2
|
||||
| deq_inuse_rs3);
|
||||
|
||||
`UNUSED_VAR (writeback_if.PC)
|
||||
|
||||
reg [31:0] deadlock_ctr;
|
||||
wire [31:0] deadlock_timeout = 10000 * (1 ** (`L2_ENABLE + `L3_ENABLE));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
deadlock_ctr <= 0;
|
||||
end else begin
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
dpi_trace("%d: *** core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b (#%0d)\n",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3, ibuffer_if.uuid);
|
||||
end
|
||||
`endif
|
||||
if (release_reg) begin
|
||||
`ASSERT(inuse_regs[writeback_if.wid][writeback_if.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd,writeback_if.uuid));
|
||||
end
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
deadlock_ctr <= deadlock_ctr + 1;
|
||||
`ASSERT(deadlock_ctr < deadlock_timeout,
|
||||
("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b (#%0d)",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3, ibuffer_if.uuid));
|
||||
end else if (ibuffer_if.valid && ibuffer_if.ready) begin
|
||||
deadlock_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,160 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_smem_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LANES = 1,
|
||||
parameter DATA_SIZE = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
parameter TYPE = "P",
|
||||
|
||||
parameter ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
|
||||
parameter DATA_WIDTH = (8 * DATA_SIZE),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input request
|
||||
input wire [LANES-1:0] req_valid_in,
|
||||
input wire [LANES-1:0] req_rw_in,
|
||||
input wire [LANES-1:0][DATA_SIZE-1:0] req_byteen_in,
|
||||
input wire [LANES-1:0][ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire [LANES-1:0][DATA_WIDTH-1:0] req_data_in,
|
||||
input wire [LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
|
||||
output wire [LANES-1:0] req_ready_in,
|
||||
|
||||
// output requests
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] req_valid_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] req_rw_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0][DATA_SIZE-1:0] req_byteen_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0][ADDR_WIDTH-1:0] req_addr_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] req_data_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] req_ready_out,
|
||||
|
||||
// input responses
|
||||
input wire [NUM_REQS-1:0] rsp_valid_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] rsp_tmask_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] rsp_data_in,
|
||||
input wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] rsp_tag_in,
|
||||
output wire [NUM_REQS-1:0] rsp_ready_in,
|
||||
|
||||
// output response
|
||||
output wire rsp_valid_out,
|
||||
output wire [LANES-1:0] rsp_tmask_out,
|
||||
output wire [LANES-1:0][DATA_WIDTH-1:0] rsp_data_out,
|
||||
output wire [TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
input wire rsp_ready_out
|
||||
);
|
||||
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
localparam RSP_DATAW = LANES * (1 + DATA_WIDTH) + TAG_IN_WIDTH;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [LANES-1:0][REQ_DATAW-1:0] req_data_in_merged;
|
||||
wire [NUM_REQS-1:0][LANES-1:0][REQ_DATAW-1:0] req_data_out_merged;
|
||||
|
||||
wire [LANES-1:0][LOG_NUM_REQS-1:0] req_sel;
|
||||
wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_in_w;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
assign req_sel[i] = req_tag_in[i][TAG_SEL_IDX +: LOG_NUM_REQS];
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (TAG_IN_WIDTH),
|
||||
.S (LOG_NUM_REQS),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) bits_remove (
|
||||
.data_in (req_tag_in[i]),
|
||||
.data_out (req_tag_in_w[i])
|
||||
);
|
||||
|
||||
assign req_data_in_merged[i] = {req_tag_in_w[i], req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
end
|
||||
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LANES (LANES),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (BUFFERED_REQ)
|
||||
) req_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (req_sel),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in (req_data_in_merged),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out (req_data_out_merged),
|
||||
.ready_out (req_ready_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
for (genvar j = 0; j < LANES; ++j) begin
|
||||
assign {req_tag_out[i][j], req_addr_out[i][j], req_rw_out[i][j], req_byteen_out[i][j], req_data_out[i][j]} = req_data_out_merged[i][j];
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_in_merged;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
wire [TAG_IN_WIDTH-1:0] rsp_tag_in_w;
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (TAG_OUT_WIDTH),
|
||||
.S (LOG_NUM_REQS),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) bits_insert (
|
||||
.data_in (rsp_tag_in[i]),
|
||||
.sel_in (LOG_NUM_REQS'(i)),
|
||||
.data_out (rsp_tag_in_w)
|
||||
);
|
||||
|
||||
assign rsp_data_in_merged[i] = {rsp_tag_in_w, rsp_tmask_in[i], rsp_data_in[i]};
|
||||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LANES (1),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (BUFFERED_RSP),
|
||||
.TYPE (TYPE)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in (rsp_data_in_merged),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out ({rsp_tag_out, rsp_tmask_out, rsp_data_out}),
|
||||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign req_valid_out = req_valid_in;
|
||||
assign req_tag_out = req_tag_in;
|
||||
assign req_addr_out = req_addr_in;
|
||||
assign req_rw_out = req_rw_in;
|
||||
assign req_byteen_out = req_byteen_in;
|
||||
assign req_data_out = req_data_in;
|
||||
assign req_ready_in = req_ready_out;
|
||||
|
||||
assign rsp_valid_out = rsp_valid_in;
|
||||
assign rsp_tmask_out = rsp_tmask_in;
|
||||
assign rsp_tag_out = rsp_tag_in;
|
||||
assign rsp_data_out = rsp_data_in;
|
||||
assign rsp_ready_in = rsp_ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
187
hw/rtl/VX_socket.sv
Normal file
187
hw/rtl/VX_socket.sv
Normal file
@@ -0,0 +1,187 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_socket import VX_gpu_pkg::*; #(
|
||||
parameter SOCKET_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
`endif
|
||||
|
||||
VX_dcr_bus_if.slave dcr_bus_if,
|
||||
|
||||
VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS],
|
||||
|
||||
VX_mem_bus_if.master icache_bus_if,
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if.master gbar_bus_if,
|
||||
`endif
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE]();
|
||||
|
||||
`RESET_RELAY (gbar_arb_reset, reset);
|
||||
|
||||
VX_gbar_arb #(
|
||||
.NUM_REQS (`SOCKET_SIZE),
|
||||
.OUT_REG ((`SOCKET_SIZE > 1) ? 2 : 0)
|
||||
) gbar_arb (
|
||||
.clk (clk),
|
||||
.reset (gbar_arb_reset),
|
||||
.bus_in_if (per_core_gbar_bus_if),
|
||||
.bus_out_if (gbar_bus_if)
|
||||
);
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
) per_core_dcache_bus_if[`SOCKET_SIZE * DCACHE_NUM_REQS]();
|
||||
|
||||
`RESET_RELAY (dcache_arb_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_ARB_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[1]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
) per_core_dcache_bus_tmp_if[`SOCKET_SIZE]();
|
||||
|
||||
for (genvar j = 0; j < `SOCKET_SIZE; ++j) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (per_core_dcache_bus_tmp_if[j], per_core_dcache_bus_if[j * DCACHE_NUM_REQS + i]);
|
||||
end
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (`CACHE_ADDR_TYPE_BITS),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG_REQ ((`SOCKET_SIZE > 1) ? 2 : 0),
|
||||
.OUT_REG_RSP ((`SOCKET_SIZE > 1) ? 2 : 0)
|
||||
) dcache_arb (
|
||||
.clk (clk),
|
||||
.reset (dcache_arb_reset),
|
||||
.bus_in_if (per_core_dcache_bus_tmp_if),
|
||||
.bus_out_if (dcache_bus_tmp_if)
|
||||
);
|
||||
|
||||
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i], dcache_bus_tmp_if[0]);
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (ICACHE_TAG_WIDTH)
|
||||
) per_core_icache_bus_if[`SOCKET_SIZE]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (ICACHE_ARB_TAG_WIDTH)
|
||||
) icache_bus_tmp_if[1]();
|
||||
|
||||
`RESET_RELAY (icache_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATA_SIZE (ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (ICACHE_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (0),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG_REQ ((`SOCKET_SIZE > 1) ? 2 : 0),
|
||||
.OUT_REG_RSP ((`SOCKET_SIZE > 1) ? 2 : 0)
|
||||
) icache_arb (
|
||||
.clk (clk),
|
||||
.reset (icache_arb_reset),
|
||||
.bus_in_if (per_core_icache_bus_if),
|
||||
.bus_out_if (icache_bus_tmp_if)
|
||||
);
|
||||
|
||||
`ASSIGN_VX_MEM_BUS_IF (icache_bus_if, icache_bus_tmp_if[0]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`SOCKET_SIZE-1:0] per_core_sim_ebreak;
|
||||
wire [`SOCKET_SIZE-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_core_sim_wb_value;
|
||||
assign sim_ebreak = per_core_sim_ebreak[0];
|
||||
assign sim_wb_value = per_core_sim_wb_value[0];
|
||||
`UNUSED_VAR (per_core_sim_ebreak)
|
||||
`UNUSED_VAR (per_core_sim_wb_value)
|
||||
|
||||
wire [`SOCKET_SIZE-1:0] per_core_busy;
|
||||
|
||||
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1));
|
||||
|
||||
`SCOPE_IO_SWITCH (`SOCKET_SIZE)
|
||||
|
||||
// Generate all cores
|
||||
for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin
|
||||
|
||||
`RESET_RELAY (core_reset, reset);
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i)
|
||||
) core (
|
||||
`SCOPE_IO_BIND (i)
|
||||
|
||||
.clk (clk),
|
||||
.reset (core_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
.dcr_bus_if (core_dcr_bus_if),
|
||||
|
||||
.dcache_bus_if (per_core_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
|
||||
.icache_bus_if (per_core_icache_bus_if[i]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_core_gbar_bus_if[i]),
|
||||
`endif
|
||||
|
||||
.sim_ebreak (per_core_sim_ebreak[i]),
|
||||
.sim_wb_value (per_core_sim_wb_value[i]),
|
||||
.busy (per_core_busy[i])
|
||||
);
|
||||
end
|
||||
|
||||
`BUFFER_BUSY (busy, (| per_core_busy), (`SOCKET_SIZE > 1));
|
||||
|
||||
endmodule
|
||||
@@ -1,148 +0,0 @@
|
||||
`ifndef VX_TRACE_INSTR
|
||||
`define VX_TRACE_INSTR
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
task trace_ex_type (
|
||||
input [`EX_BITS-1:0] ex_type
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: dpi_trace("ALU");
|
||||
`EX_LSU: dpi_trace("LSU");
|
||||
`EX_CSR: dpi_trace("CSR");
|
||||
`EX_FPU: dpi_trace("FPU");
|
||||
`EX_GPU: dpi_trace("GPU");
|
||||
default: dpi_trace("NOP");
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op (
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input [`INST_MOD_BITS-1:0] op_mod
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
if (`INST_ALU_IS_BR(op_mod)) begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: dpi_trace("BEQ");
|
||||
`INST_BR_NE: dpi_trace("BNE");
|
||||
`INST_BR_LT: dpi_trace("BLT");
|
||||
`INST_BR_GE: dpi_trace("BGE");
|
||||
`INST_BR_LTU: dpi_trace("BLTU");
|
||||
`INST_BR_GEU: dpi_trace("BGEU");
|
||||
`INST_BR_JAL: dpi_trace("JAL");
|
||||
`INST_BR_JALR: dpi_trace("JALR");
|
||||
`INST_BR_ECALL: dpi_trace("ECALL");
|
||||
`INST_BR_EBREAK:dpi_trace("EBREAK");
|
||||
`INST_BR_URET: dpi_trace("URET");
|
||||
`INST_BR_SRET: dpi_trace("SRET");
|
||||
`INST_BR_MRET: dpi_trace("MRET");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else if (`INST_ALU_IS_MUL(op_mod)) begin
|
||||
case (`INST_MUL_BITS'(op_type))
|
||||
`INST_MUL_MUL: dpi_trace("MUL");
|
||||
`INST_MUL_MULH: dpi_trace("MULH");
|
||||
`INST_MUL_MULHSU:dpi_trace("MULHSU");
|
||||
`INST_MUL_MULHU: dpi_trace("MULHU");
|
||||
`INST_MUL_DIV: dpi_trace("DIV");
|
||||
`INST_MUL_DIVU: dpi_trace("DIVU");
|
||||
`INST_MUL_REM: dpi_trace("REM");
|
||||
`INST_MUL_REMU: dpi_trace("REMU");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: dpi_trace("ADD");
|
||||
`INST_ALU_SUB: dpi_trace("SUB");
|
||||
`INST_ALU_SLL: dpi_trace("SLL");
|
||||
`INST_ALU_SRL: dpi_trace("SRL");
|
||||
`INST_ALU_SRA: dpi_trace("SRA");
|
||||
`INST_ALU_SLT: dpi_trace("SLT");
|
||||
`INST_ALU_SLTU: dpi_trace("SLTU");
|
||||
`INST_ALU_XOR: dpi_trace("XOR");
|
||||
`INST_ALU_OR: dpi_trace("OR");
|
||||
`INST_ALU_AND: dpi_trace("AND");
|
||||
`INST_ALU_LUI: dpi_trace("LUI");
|
||||
`INST_ALU_AUIPC: dpi_trace("AUIPC");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (op_mod == 0) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: dpi_trace("LB");
|
||||
`INST_LSU_LH: dpi_trace("LH");
|
||||
`INST_LSU_LW: dpi_trace("LW");
|
||||
`INST_LSU_LBU:dpi_trace("LBU");
|
||||
`INST_LSU_LHU:dpi_trace("LHU");
|
||||
`INST_LSU_SB: dpi_trace("SB");
|
||||
`INST_LSU_SH: dpi_trace("SH");
|
||||
`INST_LSU_SW: dpi_trace("SW");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else if (op_mod == 1) begin
|
||||
case (`INST_FENCE_BITS'(op_type))
|
||||
`INST_FENCE_D: dpi_trace("DFENCE");
|
||||
`INST_FENCE_I: dpi_trace("IFENCE");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_CSR: begin
|
||||
case (`INST_CSR_BITS'(op_type))
|
||||
`INST_CSR_RW: dpi_trace("CSRW");
|
||||
`INST_CSR_RS: dpi_trace("CSRS");
|
||||
`INST_CSR_RC: dpi_trace("CSRC");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: dpi_trace("ADD");
|
||||
`INST_FPU_SUB: dpi_trace("SUB");
|
||||
`INST_FPU_MUL: dpi_trace("MUL");
|
||||
`INST_FPU_DIV: dpi_trace("DIV");
|
||||
`INST_FPU_SQRT: dpi_trace("SQRT");
|
||||
`INST_FPU_MADD: dpi_trace("MADD");
|
||||
`INST_FPU_NMSUB: dpi_trace("NMSUB");
|
||||
`INST_FPU_NMADD: dpi_trace("NMADD");
|
||||
`INST_FPU_CVTWS: dpi_trace("CVTWS");
|
||||
`INST_FPU_CVTWUS:dpi_trace("CVTWUS");
|
||||
`INST_FPU_CVTSW: dpi_trace("CVTSW");
|
||||
`INST_FPU_CVTSWU:dpi_trace("CVTSWU");
|
||||
`INST_FPU_CLASS: dpi_trace("CLASS");
|
||||
`INST_FPU_CMP: dpi_trace("CMP");
|
||||
`INST_FPU_MISC: begin
|
||||
case (op_mod)
|
||||
0: dpi_trace("SGNJ");
|
||||
1: dpi_trace("SGNJN");
|
||||
2: dpi_trace("SGNJX");
|
||||
3: dpi_trace("MIN");
|
||||
4: dpi_trace("MAX");
|
||||
5: dpi_trace("MVXW");
|
||||
6: dpi_trace("MVWX");
|
||||
endcase
|
||||
end
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
`EX_GPU: begin
|
||||
case (`INST_GPU_BITS'(op_type))
|
||||
`INST_GPU_TMC: dpi_trace("TMC");
|
||||
`INST_GPU_WSPAWN:dpi_trace("WSPAWN");
|
||||
`INST_GPU_SPLIT: dpi_trace("SPLIT");
|
||||
`INST_GPU_JOIN: dpi_trace("JOIN");
|
||||
`INST_GPU_BAR: dpi_trace("BAR");
|
||||
`INST_GPU_PRED: dpi_trace("PRED");
|
||||
`INST_GPU_TEX: dpi_trace("TEX");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
177
hw/rtl/VX_types.vh
Normal file
177
hw/rtl/VX_types.vh
Normal file
@@ -0,0 +1,177 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_TYPES_VH
|
||||
`define VX_TYPES_VH
|
||||
|
||||
// Device configuration registers
|
||||
|
||||
`define VX_CSR_ADDR_BITS 12
|
||||
`define VX_DCR_ADDR_BITS 12
|
||||
|
||||
`define VX_DCR_BASE_STATE_BEGIN 12'h001
|
||||
`define VX_DCR_BASE_STARTUP_ADDR0 12'h001
|
||||
`define VX_DCR_BASE_STARTUP_ADDR1 12'h002
|
||||
`define VX_DCR_BASE_MPM_CLASS 12'h003
|
||||
`define VX_DCR_BASE_STATE_END 12'h004
|
||||
|
||||
`define VX_DCR_BASE_STATE(addr) ((addr) - `VX_DCR_BASE_STATE_BEGIN)
|
||||
`define VX_DCR_BASE_STATE_COUNT (`VX_DCR_BASE_STATE_END-`VX_DCR_BASE_STATE_BEGIN)
|
||||
|
||||
// Machine Performance-monitoring counters classes
|
||||
|
||||
`define VX_DCR_MPM_CLASS_NONE 0
|
||||
`define VX_DCR_MPM_CLASS_CORE 1
|
||||
`define VX_DCR_MPM_CLASS_MEM 2
|
||||
`define VX_DCR_MPM_CLASS_TEX 3
|
||||
`define VX_DCR_MPM_CLASS_RASTER 4
|
||||
`define VX_DCR_MPM_CLASS_ROP 5
|
||||
|
||||
// User Floating-Point CSRs
|
||||
|
||||
`define VX_CSR_FFLAGS 12'h001
|
||||
`define VX_CSR_FRM 12'h002
|
||||
`define VX_CSR_FCSR 12'h003
|
||||
|
||||
`define VX_CSR_SATP 12'h180
|
||||
|
||||
`define VX_CSR_PMPCFG0 12'h3A0
|
||||
`define VX_CSR_PMPADDR0 12'h3B0
|
||||
|
||||
`define VX_CSR_MSTATUS 12'h300
|
||||
`define VX_CSR_MISA 12'h301
|
||||
`define VX_CSR_MEDELEG 12'h302
|
||||
`define VX_CSR_MIDELEG 12'h303
|
||||
`define VX_CSR_MIE 12'h304
|
||||
`define VX_CSR_MTVEC 12'h305
|
||||
|
||||
`define VX_CSR_MEPC 12'h341
|
||||
|
||||
`define VX_CSR_MNSTATUS 12'h744
|
||||
|
||||
`define VX_CSR_MPM_BASE 12'hB00
|
||||
`define VX_CSR_MPM_BASE_H 12'hB80
|
||||
|
||||
// Machine Performance-monitoring core counters
|
||||
// PERF: Standard
|
||||
`define VX_CSR_MCYCLE 12'hB00
|
||||
`define VX_CSR_MCYCLE_H 12'hB80
|
||||
`define VX_CSR_MPM_RESERVED 12'hB01
|
||||
`define VX_CSR_MPM_RESERVED_H 12'hB81
|
||||
`define VX_CSR_MINSTRET 12'hB02
|
||||
`define VX_CSR_MINSTRET_H 12'hB82
|
||||
// PERF: pipeline
|
||||
`define VX_CSR_MPM_IBUF_ST 12'hB03
|
||||
`define VX_CSR_MPM_IBUF_ST_H 12'hB83
|
||||
`define VX_CSR_MPM_SCRB_ST 12'hB04
|
||||
`define VX_CSR_MPM_SCRB_ST_H 12'hB84
|
||||
`define VX_CSR_MPM_ALU_ST 12'hB05
|
||||
`define VX_CSR_MPM_ALU_ST_H 12'hB85
|
||||
`define VX_CSR_MPM_LSU_ST 12'hB06
|
||||
`define VX_CSR_MPM_LSU_ST_H 12'hB86
|
||||
`define VX_CSR_MPM_FPU_ST 12'hB07
|
||||
`define VX_CSR_MPM_FPU_ST_H 12'hB87
|
||||
`define VX_CSR_MPM_SFU_ST 12'hB08
|
||||
`define VX_CSR_MPM_SFU_ST_H 12'hB88
|
||||
// PERF: memory
|
||||
`define VX_CSR_MPM_IFETCHES 12'hB0A
|
||||
`define VX_CSR_MPM_IFETCHES_H 12'hB8A
|
||||
`define VX_CSR_MPM_LOADS 12'hB0B
|
||||
`define VX_CSR_MPM_LOADS_H 12'hB8B
|
||||
`define VX_CSR_MPM_STORES 12'hB0C
|
||||
`define VX_CSR_MPM_STORES_H 12'hB8C
|
||||
`define VX_CSR_MPM_IFETCH_LAT 12'hB0D
|
||||
`define VX_CSR_MPM_IFETCH_LAT_H 12'hB8D
|
||||
`define VX_CSR_MPM_LOAD_LAT 12'hB0E
|
||||
`define VX_CSR_MPM_LOAD_LAT_H 12'hB8E
|
||||
|
||||
// Machine Performance-monitoring memory counters
|
||||
// PERF: icache
|
||||
`define VX_CSR_MPM_ICACHE_READS 12'hB03 // total reads
|
||||
`define VX_CSR_MPM_ICACHE_READS_H 12'hB83
|
||||
`define VX_CSR_MPM_ICACHE_MISS_R 12'hB04 // read misses
|
||||
`define VX_CSR_MPM_ICACHE_MISS_R_H 12'hB84
|
||||
// PERF: dcache
|
||||
`define VX_CSR_MPM_DCACHE_READS 12'hB05 // total reads
|
||||
`define VX_CSR_MPM_DCACHE_READS_H 12'hB85
|
||||
`define VX_CSR_MPM_DCACHE_WRITES 12'hB06 // total writes
|
||||
`define VX_CSR_MPM_DCACHE_WRITES_H 12'hB86
|
||||
`define VX_CSR_MPM_DCACHE_MISS_R 12'hB07 // read misses
|
||||
`define VX_CSR_MPM_DCACHE_MISS_R_H 12'hB87
|
||||
`define VX_CSR_MPM_DCACHE_MISS_W 12'hB08 // write misses
|
||||
`define VX_CSR_MPM_DCACHE_MISS_W_H 12'hB88
|
||||
`define VX_CSR_MPM_DCACHE_BANK_ST 12'hB09 // bank conflicts
|
||||
`define VX_CSR_MPM_DCACHE_BANK_ST_H 12'hB89
|
||||
`define VX_CSR_MPM_DCACHE_MSHR_ST 12'hB0A // MSHR stalls
|
||||
`define VX_CSR_MPM_DCACHE_MSHR_ST_H 12'hB8A
|
||||
// PERF: smem
|
||||
`define VX_CSR_MPM_SMEM_READS 12'hB0B // memory reads
|
||||
`define VX_CSR_MPM_SMEM_READS_H 12'hB8B
|
||||
`define VX_CSR_MPM_SMEM_WRITES 12'hB0C // memory writes
|
||||
`define VX_CSR_MPM_SMEM_WRITES_H 12'hB8C
|
||||
`define VX_CSR_MPM_SMEM_BANK_ST 12'hB0D // bank conflicts
|
||||
`define VX_CSR_MPM_SMEM_BANK_ST_H 12'hB8D
|
||||
// PERF: l2cache
|
||||
`define VX_CSR_MPM_L2CACHE_READS 12'hB0E // total reads
|
||||
`define VX_CSR_MPM_L2CACHE_READS_H 12'hB8E
|
||||
`define VX_CSR_MPM_L2CACHE_WRITES 12'hB0F // total writes
|
||||
`define VX_CSR_MPM_L2CACHE_WRITES_H 12'hB8F
|
||||
`define VX_CSR_MPM_L2CACHE_MISS_R 12'hB10 // read misses
|
||||
`define VX_CSR_MPM_L2CACHE_MISS_R_H 12'hB90
|
||||
`define VX_CSR_MPM_L2CACHE_MISS_W 12'hB11 // write misses
|
||||
`define VX_CSR_MPM_L2CACHE_MISS_W_H 12'hB91
|
||||
`define VX_CSR_MPM_L2CACHE_BANK_ST 12'hB12 // bank conflicts
|
||||
`define VX_CSR_MPM_L2CACHE_BANK_ST_H 12'hB92
|
||||
`define VX_CSR_MPM_L2CACHE_MSHR_ST 12'hB13 // MSHR stalls
|
||||
`define VX_CSR_MPM_L2CACHE_MSHR_ST_H 12'hB93
|
||||
// PERF: l3cache
|
||||
`define VX_CSR_MPM_L3CACHE_READS 12'hB14 // total reads
|
||||
`define VX_CSR_MPM_L3CACHE_READS_H 12'hB94
|
||||
`define VX_CSR_MPM_L3CACHE_WRITES 12'hB15 // total writes
|
||||
`define VX_CSR_MPM_L3CACHE_WRITES_H 12'hB95
|
||||
`define VX_CSR_MPM_L3CACHE_MISS_R 12'hB16 // read misses
|
||||
`define VX_CSR_MPM_L3CACHE_MISS_R_H 12'hB96
|
||||
`define VX_CSR_MPM_L3CACHE_MISS_W 12'hB17 // write misses
|
||||
`define VX_CSR_MPM_L3CACHE_MISS_W_H 12'hB97
|
||||
`define VX_CSR_MPM_L3CACHE_BANK_ST 12'hB18 // bank conflicts
|
||||
`define VX_CSR_MPM_L3CACHE_BANK_ST_H 12'hB98
|
||||
`define VX_CSR_MPM_L3CACHE_MSHR_ST 12'hB19 // MSHR stalls
|
||||
`define VX_CSR_MPM_L3CACHE_MSHR_ST_H 12'hB99
|
||||
// PERF: memory
|
||||
`define VX_CSR_MPM_MEM_READS 12'hB1A // total reads
|
||||
`define VX_CSR_MPM_MEM_READS_H 12'hB9A
|
||||
`define VX_CSR_MPM_MEM_WRITES 12'hB1B // total writes
|
||||
`define VX_CSR_MPM_MEM_WRITES_H 12'hB9B
|
||||
`define VX_CSR_MPM_MEM_LAT 12'hB1C // memory latency
|
||||
`define VX_CSR_MPM_MEM_LAT_H 12'hB9C
|
||||
|
||||
// Machine Information Registers
|
||||
|
||||
`define VX_CSR_MVENDORID 12'hF11
|
||||
`define VX_CSR_MARCHID 12'hF12
|
||||
`define VX_CSR_MIMPID 12'hF13
|
||||
`define VX_CSR_MHARTID 12'hF14
|
||||
|
||||
// GPGU CSRs
|
||||
|
||||
`define VX_CSR_THREAD_ID 12'hCC0
|
||||
`define VX_CSR_WARP_ID 12'hCC1
|
||||
`define VX_CSR_CORE_ID 12'hCC2
|
||||
`define VX_CSR_WARP_MASK 12'hCC3
|
||||
`define VX_CSR_THREAD_MASK 12'hCC4 // warning! this value is also used in LLVM
|
||||
|
||||
`define VX_CSR_NUM_THREADS 12'hFC0
|
||||
`define VX_CSR_NUM_WARPS 12'hFC1
|
||||
`define VX_CSR_NUM_CORES 12'hFC2
|
||||
|
||||
`endif // VX_TYPES_VH
|
||||
@@ -1,254 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_warp_sched #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_warp_sched
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
VX_wstall_if.slave wstall_if,
|
||||
VX_join_if.slave join_if,
|
||||
VX_branch_ctl_if.slave branch_ctl_if,
|
||||
|
||||
VX_ifetch_req_if.master ifetch_req_if,
|
||||
|
||||
VX_fetch_to_csr_if.master fetch_to_csr_if,
|
||||
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire join_else;
|
||||
wire [31:0] join_pc;
|
||||
wire [`NUM_THREADS-1:0] join_tmask;
|
||||
|
||||
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // real active warps (updated when a warp is activated or disabled)
|
||||
reg [`NUM_WARPS-1:0] stalled_warps; // asserted when a branch/gpgpu instructions are issued
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks;
|
||||
reg [`NUM_WARPS-1:0][31:0] warp_pcs;
|
||||
|
||||
// barriers
|
||||
reg [`NUM_BARRIERS-1:0][`NUM_WARPS-1:0] barrier_masks; // warps waiting on barrier
|
||||
wire reached_barrier_limit; // the expected number of warps reached the barrier
|
||||
|
||||
// wspawn
|
||||
reg [31:0] wspawn_pc;
|
||||
reg [`NUM_WARPS-1:0] use_wspawn;
|
||||
|
||||
wire [`NW_BITS-1:0] schedule_wid;
|
||||
wire [`NUM_THREADS-1:0] schedule_tmask;
|
||||
wire [31:0] schedule_pc;
|
||||
wire schedule_valid;
|
||||
wire warp_scheduled;
|
||||
|
||||
reg [`UUID_BITS-1:0] issued_instrs;
|
||||
|
||||
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;
|
||||
|
||||
wire tmc_active = (warp_ctl_if.tmc.tmask != 0);
|
||||
|
||||
always @(*) begin
|
||||
active_warps_n = active_warps;
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
active_warps_n = warp_ctl_if.wspawn.wmask;
|
||||
end
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
active_warps_n[warp_ctl_if.wid] = tmc_active;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
barrier_masks <= '0;
|
||||
use_wspawn <= '0;
|
||||
stalled_warps <= '0;
|
||||
warp_pcs <= '0;
|
||||
active_warps <= '0;
|
||||
thread_masks <= '0;
|
||||
issued_instrs <= '0;
|
||||
|
||||
// activate first warp
|
||||
warp_pcs[0] <= `STARTUP_ADDR;
|
||||
active_warps[0] <= 1;
|
||||
thread_masks[0] <= 1;
|
||||
end else begin
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
|
||||
wspawn_pc <= warp_ctl_if.wspawn.pc;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_masks[warp_ctl_if.barrier.id] <= 0;
|
||||
end else begin
|
||||
barrier_masks[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
if (warp_ctl_if.split.diverged) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_tmask;
|
||||
end
|
||||
end
|
||||
|
||||
// Branch
|
||||
if (branch_ctl_if.valid) begin
|
||||
if (branch_ctl_if.taken) begin
|
||||
warp_pcs[branch_ctl_if.wid] <= branch_ctl_if.dest;
|
||||
end
|
||||
stalled_warps[branch_ctl_if.wid] <= 0;
|
||||
end
|
||||
|
||||
if (warp_scheduled) begin
|
||||
// stall the warp until decode stage
|
||||
stalled_warps[schedule_wid] <= 1;
|
||||
|
||||
// release wspawn
|
||||
use_wspawn[schedule_wid] <= 0;
|
||||
if (use_wspawn[schedule_wid]) begin
|
||||
thread_masks[schedule_wid] <= 1;
|
||||
end
|
||||
|
||||
issued_instrs <= issued_instrs + 1;
|
||||
end
|
||||
|
||||
if (ifetch_req_fire) begin
|
||||
warp_pcs[ifetch_req_if.wid] <= ifetch_req_if.PC + 4;
|
||||
end
|
||||
|
||||
if (wstall_if.valid) begin
|
||||
stalled_warps[wstall_if.wid] <= wstall_if.stalled;
|
||||
end
|
||||
|
||||
// join handling
|
||||
if (join_if.valid) begin
|
||||
if (join_else) begin
|
||||
warp_pcs[join_if.wid] <= join_pc;
|
||||
end
|
||||
thread_masks[join_if.wid] <= join_tmask;
|
||||
end
|
||||
|
||||
active_warps <= active_warps_n;
|
||||
end
|
||||
end
|
||||
|
||||
// export thread mask register
|
||||
assign fetch_to_csr_if.thread_masks = thread_masks;
|
||||
|
||||
// calculate active barrier status
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [`NW_BITS:0] active_barrier_count;
|
||||
`IGNORE_UNUSED_END
|
||||
wire [`NUM_WARPS-1:0] barrier_mask = barrier_masks[warp_ctl_if.barrier.id];
|
||||
`POP_COUNT(active_barrier_count, barrier_mask);
|
||||
|
||||
assign reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
|
||||
|
||||
reg [`NUM_WARPS-1:0] barrier_stalls;
|
||||
always @(*) begin
|
||||
barrier_stalls = barrier_masks[0];
|
||||
for (integer i = 1; i < `NUM_BARRIERS; ++i) begin
|
||||
barrier_stalls |= barrier_masks[i];
|
||||
end
|
||||
end
|
||||
|
||||
// split/join stack management
|
||||
|
||||
wire [(32+`NUM_THREADS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire ipdom_index [`NUM_WARPS-1:0];
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
wire push = warp_ctl_if.valid
|
||||
&& warp_ctl_if.split.valid
|
||||
&& (i == warp_ctl_if.wid);
|
||||
|
||||
wire pop = join_if.valid && (i == join_if.wid);
|
||||
|
||||
wire [`NUM_THREADS-1:0] else_tmask = warp_ctl_if.split.else_tmask;
|
||||
wire [`NUM_THREADS-1:0] orig_tmask = thread_masks[warp_ctl_if.wid];
|
||||
|
||||
wire [(32+`NUM_THREADS)-1:0] q_else = {warp_ctl_if.split.pc, else_tmask};
|
||||
wire [(32+`NUM_THREADS)-1:0] q_end = {32'b0, orig_tmask};
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (32+`NUM_THREADS),
|
||||
.DEPTH (2 ** (`NT_BITS+1))
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.pair (warp_ctl_if.split.diverged),
|
||||
.q1 (q_end),
|
||||
.q2 (q_else),
|
||||
.d (ipdom_data[i]),
|
||||
.index (ipdom_index[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full)
|
||||
);
|
||||
end
|
||||
|
||||
assign {join_pc, join_tmask} = ipdom_data[join_if.wid];
|
||||
assign join_else = ~ipdom_index[join_if.wid];
|
||||
|
||||
// schedule the next ready warp
|
||||
|
||||
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls);
|
||||
|
||||
VX_lzc #(
|
||||
.N (`NUM_WARPS)
|
||||
) wid_select (
|
||||
.in_i (ready_warps),
|
||||
.cnt_o (schedule_wid),
|
||||
.valid_o (schedule_valid)
|
||||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + 32)-1:0] schedule_data;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign schedule_data[i] = {(use_wspawn[i] ? `NUM_THREADS'(1) : thread_masks[i]),
|
||||
(use_wspawn[i] ? wspawn_pc : warp_pcs[i])};
|
||||
end
|
||||
|
||||
assign {schedule_tmask, schedule_pc} = schedule_data[schedule_wid];
|
||||
|
||||
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
|
||||
assign warp_scheduled = schedule_valid && ~stall_out;
|
||||
|
||||
wire [`UUID_BITS-1:0] instr_uuid = (issued_instrs * `NUM_CORES * `NUM_CLUSTERS) + `UUID_BITS'(CORE_ID);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `UUID_BITS + `NUM_THREADS + 32 + `NW_BITS),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({schedule_valid, instr_uuid, schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({ifetch_req_if.valid, ifetch_req_if.uuid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
|
||||
);
|
||||
|
||||
assign busy = (active_warps != 0);
|
||||
|
||||
`SCOPE_ASSIGN (wsched_scheduled, warp_scheduled);
|
||||
`SCOPE_ASSIGN (wsched_schedule_uuid, instr_uuid);
|
||||
`SCOPE_ASSIGN (wsched_active_warps, active_warps);
|
||||
`SCOPE_ASSIGN (wsched_stalled_warps, stalled_warps);
|
||||
`SCOPE_ASSIGN (wsched_schedule_wid, schedule_wid);
|
||||
`SCOPE_ASSIGN (wsched_schedule_tmask, schedule_tmask);
|
||||
`SCOPE_ASSIGN (wsched_schedule_pc, schedule_pc);
|
||||
|
||||
endmodule
|
||||
@@ -1,113 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_writeback #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave alu_commit_if,
|
||||
VX_commit_if.slave ld_commit_if,
|
||||
VX_commit_if.slave csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.slave fpu_commit_if,
|
||||
`endif
|
||||
VX_commit_if.slave gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if.master writeback_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
localparam NUM_RSPS = 5;
|
||||
`else
|
||||
localparam NUM_RSPS = 4;
|
||||
`endif
|
||||
|
||||
wire wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_wid;
|
||||
wire [31:0] wb_PC;
|
||||
wire [`NUM_THREADS-1:0] wb_tmask;
|
||||
wire [`NR_BITS-1:0] wb_rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] wb_data;
|
||||
wire wb_eop;
|
||||
|
||||
wire [NUM_RSPS-1:0] rsp_valid;
|
||||
wire [NUM_RSPS-1:0][DATAW-1:0] rsp_data;
|
||||
wire [NUM_RSPS-1:0] rsp_ready;
|
||||
wire stall;
|
||||
|
||||
assign rsp_valid = {
|
||||
gpu_commit_if.valid && gpu_commit_if.wb,
|
||||
csr_commit_if.valid && csr_commit_if.wb,
|
||||
alu_commit_if.valid && alu_commit_if.wb,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if.valid && fpu_commit_if.wb,
|
||||
`endif
|
||||
ld_commit_if.valid && ld_commit_if.wb
|
||||
};
|
||||
|
||||
assign rsp_data = {
|
||||
{gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.rd, gpu_commit_if.data, gpu_commit_if.eop},
|
||||
{csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.rd, csr_commit_if.data, csr_commit_if.eop},
|
||||
{alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.rd, alu_commit_if.data, alu_commit_if.eop},
|
||||
`ifdef EXT_F_ENABLE
|
||||
{fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.rd, fpu_commit_if.data, fpu_commit_if.eop},
|
||||
`endif
|
||||
{ ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.rd, ld_commit_if.data, ld_commit_if.eop}
|
||||
};
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_RSPS),
|
||||
.DATAW (DATAW),
|
||||
.BUFFERED (1),
|
||||
.TYPE ("R")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_valid),
|
||||
.data_in (rsp_data),
|
||||
.ready_in (rsp_ready),
|
||||
.valid_out (wb_valid),
|
||||
.data_out ({wb_wid, wb_PC, wb_tmask, wb_rd, wb_data, wb_eop}),
|
||||
.ready_out (~stall)
|
||||
);
|
||||
|
||||
assign ld_commit_if.ready = rsp_ready[0] || ~ld_commit_if.wb;
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_commit_if.ready = rsp_ready[1] || ~fpu_commit_if.wb;
|
||||
assign alu_commit_if.ready = rsp_ready[2] || ~alu_commit_if.wb;
|
||||
assign csr_commit_if.ready = rsp_ready[3] || ~csr_commit_if.wb;
|
||||
assign gpu_commit_if.ready = rsp_ready[4] || ~gpu_commit_if.wb;
|
||||
`else
|
||||
assign alu_commit_if.ready = rsp_ready[1] || ~alu_commit_if.wb;
|
||||
assign csr_commit_if.ready = rsp_ready[2] || ~csr_commit_if.wb;
|
||||
assign gpu_commit_if.ready = rsp_ready[3] || ~gpu_commit_if.wb;
|
||||
`endif
|
||||
|
||||
assign stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data, wb_eop}),
|
||||
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data, writeback_if.eop})
|
||||
);
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||
always @(posedge clk) begin
|
||||
if (writeback_if.valid && writeback_if.ready) begin
|
||||
last_wb_value[writeback_if.rd] <= writeback_if.data[0];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
369
hw/rtl/Vortex.sv
369
hw/rtl/Vortex.sv
@@ -1,7 +1,20 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module Vortex (
|
||||
`SCOPE_IO_Vortex
|
||||
module Vortex import VX_gpu_pkg::*; (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
@@ -22,204 +35,224 @@ module Vortex (
|
||||
input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// DCR write request
|
||||
input wire dcr_wr_valid,
|
||||
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
`STATIC_ASSERT((`L3_ENABLE == 0 || `NUM_CLUSTERS > 1), ("invalid parameter"))
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_rw;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_ready;
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if[`NUM_CLUSTERS]();
|
||||
VX_mem_perf_if perf_memsys_total_if();
|
||||
VX_cache_perf_if perf_l3cache_if();
|
||||
`endif
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready;
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L3_LINE_SIZE),
|
||||
.TAG_WIDTH (L3_MEM_TAG_WIDTH)
|
||||
) mem_bus_if();
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
assign mem_req_valid = mem_bus_if.req_valid;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_byteen= mem_bus_if.req_data.byteen;
|
||||
assign mem_req_addr = mem_bus_if.req_data.addr;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_bus_if.rsp_ready;
|
||||
|
||||
`RESET_RELAY (cluster_reset);
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
wire sim_ebreak /* verilator public */;
|
||||
wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value /* verilator public */;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_sim_ebreak;
|
||||
wire [`NUM_CLUSTERS-1:0][`NUM_REGS-1:0][`XLEN-1:0] per_cluster_sim_wb_value;
|
||||
assign sim_ebreak = per_cluster_sim_ebreak[0];
|
||||
assign sim_wb_value = per_cluster_sim_wb_value[0];
|
||||
`UNUSED_VAR (per_cluster_sim_ebreak)
|
||||
`UNUSED_VAR (per_cluster_sim_wb_value)
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L2_LINE_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
|
||||
) per_cluster_mem_bus_if[`NUM_CLUSTERS]();
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
assign dcr_bus_if.write_valid = dcr_wr_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_wr_addr;
|
||||
assign dcr_bus_if.write_data = dcr_wr_data;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID(i)
|
||||
.CLUSTER_ID (i)
|
||||
) cluster (
|
||||
`SCOPE_BIND_Vortex_cluster(i)
|
||||
`SCOPE_IO_BIND (i)
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
||||
.mem_req_valid (per_cluster_mem_req_valid [i]),
|
||||
.mem_req_rw (per_cluster_mem_req_rw [i]),
|
||||
.mem_req_byteen (per_cluster_mem_req_byteen[i]),
|
||||
.mem_req_addr (per_cluster_mem_req_addr [i]),
|
||||
.mem_req_data (per_cluster_mem_req_data [i]),
|
||||
.mem_req_tag (per_cluster_mem_req_tag [i]),
|
||||
.mem_req_ready (per_cluster_mem_req_ready [i]),
|
||||
|
||||
.mem_rsp_valid (per_cluster_mem_rsp_valid [i]),
|
||||
.mem_rsp_data (per_cluster_mem_rsp_data [i]),
|
||||
.mem_rsp_tag (per_cluster_mem_rsp_tag [i]),
|
||||
.mem_rsp_ready (per_cluster_mem_rsp_ready [i]),
|
||||
|
||||
.busy (per_cluster_busy [i])
|
||||
);
|
||||
end
|
||||
|
||||
assign busy = (| per_cluster_busy);
|
||||
|
||||
if (`L3_ENABLE) begin
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_l3cache_if();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (l3_reset);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L3_CACHE_ID),
|
||||
.CACHE_SIZE (`L3_CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L3_CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L3_NUM_BANKS),
|
||||
.NUM_PORTS (`L3_NUM_PORTS),
|
||||
.WORD_SIZE (`L3_WORD_SIZE),
|
||||
.NUM_REQS (`L3_NUM_REQS),
|
||||
.CREQ_SIZE (`L3_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L3_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l3cache (
|
||||
`SCOPE_BIND_Vortex_l3cache
|
||||
|
||||
.clk (clk),
|
||||
.reset (l3_reset),
|
||||
.reset (cluster_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l3cache_if),
|
||||
.mem_perf_if (mem_perf_if[i]),
|
||||
.perf_memsys_total_if (perf_memsys_total_if),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_cluster_mem_req_valid),
|
||||
.core_req_rw (per_cluster_mem_req_rw),
|
||||
.core_req_byteen (per_cluster_mem_req_byteen),
|
||||
.core_req_addr (per_cluster_mem_req_addr),
|
||||
.core_req_data (per_cluster_mem_req_data),
|
||||
.core_req_tag (per_cluster_mem_req_tag),
|
||||
.core_req_ready (per_cluster_mem_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_cluster_mem_rsp_valid),
|
||||
.core_rsp_data (per_cluster_mem_rsp_data),
|
||||
.core_rsp_tag (per_cluster_mem_rsp_tag),
|
||||
.core_rsp_ready (per_cluster_mem_rsp_ready),
|
||||
`UNUSED_PIN (core_rsp_tmask),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_rw (mem_req_rw),
|
||||
.mem_req_byteen (mem_req_byteen),
|
||||
.mem_req_addr (mem_req_addr),
|
||||
.mem_req_data (mem_req_data),
|
||||
.mem_req_tag (mem_req_tag),
|
||||
.mem_req_ready (mem_req_ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (`L3_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`L3_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Core request
|
||||
.req_valid_in (per_cluster_mem_req_valid),
|
||||
.req_rw_in (per_cluster_mem_req_rw),
|
||||
.req_byteen_in (per_cluster_mem_req_byteen),
|
||||
.req_addr_in (per_cluster_mem_req_addr),
|
||||
.req_data_in (per_cluster_mem_req_data),
|
||||
.req_tag_in (per_cluster_mem_req_tag),
|
||||
.req_ready_in (per_cluster_mem_req_ready),
|
||||
|
||||
// Memory request
|
||||
.req_valid_out (mem_req_valid),
|
||||
.req_rw_out (mem_req_rw),
|
||||
.req_byteen_out (mem_req_byteen),
|
||||
.req_addr_out (mem_req_addr),
|
||||
.req_data_out (mem_req_data),
|
||||
.req_tag_out (mem_req_tag),
|
||||
.req_ready_out (mem_req_ready),
|
||||
|
||||
// Core response
|
||||
.rsp_valid_out (per_cluster_mem_rsp_valid),
|
||||
.rsp_data_out (per_cluster_mem_rsp_data),
|
||||
.rsp_tag_out (per_cluster_mem_rsp_tag),
|
||||
.rsp_ready_out (per_cluster_mem_rsp_ready),
|
||||
|
||||
// Memory response
|
||||
.rsp_valid_in (mem_rsp_valid),
|
||||
.rsp_tag_in (mem_rsp_tag),
|
||||
.rsp_data_in (mem_rsp_data),
|
||||
.rsp_ready_in (mem_rsp_ready)
|
||||
);
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[i]),
|
||||
|
||||
.sim_ebreak (per_cluster_sim_ebreak[i]),
|
||||
.sim_wb_value (per_cluster_sim_wb_value[i]),
|
||||
|
||||
.busy (per_cluster_busy[i])
|
||||
);
|
||||
end
|
||||
|
||||
`SCOPE_ASSIGN (reset, reset);
|
||||
`SCOPE_ASSIGN (mem_req_fire, mem_req_valid && mem_req_ready);
|
||||
`SCOPE_ASSIGN (mem_req_addr, `TO_FULL_ADDR(mem_req_addr));
|
||||
`SCOPE_ASSIGN (mem_req_rw, mem_req_rw);
|
||||
`SCOPE_ASSIGN (mem_req_byteen, mem_req_byteen);
|
||||
`SCOPE_ASSIGN (mem_req_data, mem_req_data);
|
||||
`SCOPE_ASSIGN (mem_req_tag, mem_req_tag);
|
||||
`SCOPE_ASSIGN (mem_rsp_fire, mem_rsp_valid && mem_rsp_ready);
|
||||
`SCOPE_ASSIGN (mem_rsp_data, mem_rsp_data);
|
||||
`SCOPE_ASSIGN (mem_rsp_tag, mem_rsp_tag);
|
||||
`SCOPE_ASSIGN (busy, busy);
|
||||
`BUFFER_BUSY (busy, (| per_cluster_busy), (`NUM_CLUSTERS > 1));
|
||||
|
||||
`RESET_RELAY (l3_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ("l3cache"),
|
||||
.CACHE_SIZE (`L3_CACHE_SIZE),
|
||||
.LINE_SIZE (`L3_LINE_SIZE),
|
||||
.NUM_BANKS (`L3_NUM_BANKS),
|
||||
.NUM_WAYS (`L3_NUM_WAYS),
|
||||
.WORD_SIZE (L3_WORD_SIZE),
|
||||
.NUM_REQS (L3_NUM_REQS),
|
||||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_REG (2),
|
||||
.MEM_OUT_REG (2),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L3_ENABLED)
|
||||
) l3cache (
|
||||
.clk (clk),
|
||||
.reset (l3_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf_if (perf_l3cache_if),
|
||||
`endif
|
||||
|
||||
.core_bus_if (per_cluster_mem_bus_if),
|
||||
.mem_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, icache_reads, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, icache_read_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_reads, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_writes, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_read_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_write_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_bank_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, dcache_mshr_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, smem_reads, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, smem_writes, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, smem_bank_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_reads, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_writes, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_read_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_write_misses, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_bank_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
`PERF_REDUCE (perf_memsys_total_if, mem_perf_if, l2cache_mshr_stalls, `PERF_CTR_BITS, `NUM_CLUSTERS);
|
||||
|
||||
`ifdef L3_ENABLE
|
||||
assign perf_memsys_total_if.l3cache_reads = perf_l3cache_if.reads;
|
||||
assign perf_memsys_total_if.l3cache_writes = perf_l3cache_if.writes;
|
||||
assign perf_memsys_total_if.l3cache_read_misses = perf_l3cache_if.read_misses;
|
||||
assign perf_memsys_total_if.l3cache_write_misses= perf_l3cache_if.write_misses;
|
||||
assign perf_memsys_total_if.l3cache_bank_stalls = perf_l3cache_if.bank_stalls;
|
||||
assign perf_memsys_total_if.l3cache_mshr_stalls = perf_l3cache_if.mshr_stalls;
|
||||
`else
|
||||
assign perf_memsys_total_if.l3cache_reads = '0;
|
||||
assign perf_memsys_total_if.l3cache_writes = '0;
|
||||
assign perf_memsys_total_if.l3cache_read_misses = '0;
|
||||
assign perf_memsys_total_if.l3cache_write_misses= '0;
|
||||
assign perf_memsys_total_if.l3cache_bank_stalls = '0;
|
||||
assign perf_memsys_total_if.l3cache_mshr_stalls = '0;
|
||||
`endif
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_pending_reads <= '0;
|
||||
end else begin
|
||||
perf_mem_pending_reads <= $signed(perf_mem_pending_reads) +
|
||||
`PERF_CTR_BITS'($signed(2'(mem_req_fire && ~mem_bus_if.req_data.rw) - 2'(mem_rsp_fire)));
|
||||
end
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_lat;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_reads <= '0;
|
||||
perf_mem_writes <= '0;
|
||||
perf_mem_lat <= '0;
|
||||
end else begin
|
||||
if (mem_req_fire && ~mem_bus_if.req_data.rw) begin
|
||||
perf_mem_reads <= perf_mem_reads + `PERF_CTR_BITS'(1);
|
||||
end
|
||||
if (mem_req_fire && mem_bus_if.req_data.rw) begin
|
||||
perf_mem_writes <= perf_mem_writes + `PERF_CTR_BITS'(1);
|
||||
end
|
||||
perf_mem_lat <= perf_mem_lat + perf_mem_pending_reads;
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_memsys_total_if.mem_reads = perf_mem_reads;
|
||||
assign perf_memsys_total_if.mem_writes = perf_mem_writes;
|
||||
assign perf_memsys_total_if.mem_latency = perf_mem_lat;
|
||||
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_MEM
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_valid && mem_req_ready) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw)
|
||||
dpi_trace("%d: MEM Wr Req: addr=%0h, tag=%0h, byteen=%0h data=%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data);
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data));
|
||||
else
|
||||
dpi_trace("%d: MEM Rd Req: addr=%0h, tag=%0h, byteen=%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen);
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen));
|
||||
end
|
||||
if (mem_rsp_valid && mem_rsp_ready) begin
|
||||
dpi_trace("%d: MEM Rsp: tag=%0h, data=%0h\n", $time, mem_rsp_tag, mem_rsp_data);
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag, mem_rsp_data));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
`ifndef NDEBUG
|
||||
`ifdef SIMULATION
|
||||
always @(posedge clk) begin
|
||||
$fflush(); // flush stdout buffer
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
||||
@@ -1,65 +1,91 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module Vortex_axi #(
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = 32,
|
||||
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
|
||||
parameter AXI_STROBE_WIDTH = (`VX_MEM_DATA_WIDTH / 8)
|
||||
module Vortex_axi import VX_gpu_pkg::*; #(
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = `XLEN,
|
||||
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
|
||||
parameter AXI_NUM_BANKS = 1
|
||||
)(
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// AXI write request address channel
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
|
||||
output wire [7:0] m_axi_awlen,
|
||||
output wire [2:0] m_axi_awsize,
|
||||
output wire [1:0] m_axi_awburst,
|
||||
output wire m_axi_awlock,
|
||||
output wire [3:0] m_axi_awcache,
|
||||
output wire [2:0] m_axi_awprot,
|
||||
output wire [3:0] m_axi_awqos,
|
||||
output wire m_axi_awvalid,
|
||||
input wire m_axi_awready,
|
||||
output wire m_axi_awvalid [AXI_NUM_BANKS],
|
||||
input wire m_axi_awready [AXI_NUM_BANKS],
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [AXI_NUM_BANKS],
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_awid [AXI_NUM_BANKS],
|
||||
output wire [7:0] m_axi_awlen [AXI_NUM_BANKS],
|
||||
output wire [2:0] m_axi_awsize [AXI_NUM_BANKS],
|
||||
output wire [1:0] m_axi_awburst [AXI_NUM_BANKS],
|
||||
output wire [1:0] m_axi_awlock [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_awcache [AXI_NUM_BANKS],
|
||||
output wire [2:0] m_axi_awprot [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_awqos [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_awregion [AXI_NUM_BANKS],
|
||||
|
||||
// AXI write request data channel
|
||||
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
|
||||
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
|
||||
output wire m_axi_wlast,
|
||||
output wire m_axi_wvalid,
|
||||
input wire m_axi_wready,
|
||||
output wire m_axi_wvalid [AXI_NUM_BANKS],
|
||||
input wire m_axi_wready [AXI_NUM_BANKS],
|
||||
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata [AXI_NUM_BANKS],
|
||||
output wire [AXI_DATA_WIDTH/8-1:0] m_axi_wstrb [AXI_NUM_BANKS],
|
||||
output wire m_axi_wlast [AXI_NUM_BANKS],
|
||||
|
||||
// AXI write response channel
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_bid,
|
||||
input wire [1:0] m_axi_bresp,
|
||||
input wire m_axi_bvalid,
|
||||
output wire m_axi_bready,
|
||||
input wire m_axi_bvalid [AXI_NUM_BANKS],
|
||||
output wire m_axi_bready [AXI_NUM_BANKS],
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_bid [AXI_NUM_BANKS],
|
||||
input wire [1:0] m_axi_bresp [AXI_NUM_BANKS],
|
||||
|
||||
// AXI read request channel
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
|
||||
output wire [7:0] m_axi_arlen,
|
||||
output wire [2:0] m_axi_arsize,
|
||||
output wire [1:0] m_axi_arburst,
|
||||
output wire m_axi_arlock,
|
||||
output wire [3:0] m_axi_arcache,
|
||||
output wire [2:0] m_axi_arprot,
|
||||
output wire [3:0] m_axi_arqos,
|
||||
output wire m_axi_arvalid,
|
||||
input wire m_axi_arready,
|
||||
output wire m_axi_arvalid [AXI_NUM_BANKS],
|
||||
input wire m_axi_arready [AXI_NUM_BANKS],
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [AXI_NUM_BANKS],
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_arid [AXI_NUM_BANKS],
|
||||
output wire [7:0] m_axi_arlen [AXI_NUM_BANKS],
|
||||
output wire [2:0] m_axi_arsize [AXI_NUM_BANKS],
|
||||
output wire [1:0] m_axi_arburst [AXI_NUM_BANKS],
|
||||
output wire [1:0] m_axi_arlock [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_arcache [AXI_NUM_BANKS],
|
||||
output wire [2:0] m_axi_arprot [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_arqos [AXI_NUM_BANKS],
|
||||
output wire [3:0] m_axi_arregion [AXI_NUM_BANKS],
|
||||
|
||||
// AXI read response channel
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
|
||||
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
|
||||
input wire [1:0] m_axi_rresp,
|
||||
input wire m_axi_rlast,
|
||||
input wire m_axi_rvalid,
|
||||
output wire m_axi_rready,
|
||||
input wire m_axi_rvalid [AXI_NUM_BANKS],
|
||||
output wire m_axi_rready [AXI_NUM_BANKS],
|
||||
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata [AXI_NUM_BANKS],
|
||||
input wire m_axi_rlast [AXI_NUM_BANKS],
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_rid [AXI_NUM_BANKS],
|
||||
input wire [1:0] m_axi_rresp [AXI_NUM_BANKS],
|
||||
|
||||
// DCR write request
|
||||
input wire dcr_wr_valid,
|
||||
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
`STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH))
|
||||
`STATIC_ASSERT((AXI_ADDR_WIDTH >= `XLEN), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH))
|
||||
//`STATIC_ASSERT((AXI_TID_WIDTH >= `VX_MEM_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, `VX_MEM_TAG_WIDTH))
|
||||
|
||||
wire mem_req_valid;
|
||||
wire mem_req_rw;
|
||||
wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen;
|
||||
@@ -72,16 +98,33 @@ module Vortex_axi #(
|
||||
wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data;
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
wire [`XLEN-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS];
|
||||
wire [`XLEN-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
assign m_axi_awaddr[i] = `XLEN'(m_axi_awaddr_unqual[i]);
|
||||
assign m_axi_araddr[i] = `XLEN'(m_axi_araddr_unqual[i]);
|
||||
|
||||
assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]);
|
||||
assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]);
|
||||
|
||||
assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]);
|
||||
assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]);
|
||||
end
|
||||
|
||||
VX_axi_adapter #(
|
||||
.VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
|
||||
.VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.VX_BYTEEN_WIDTH (AXI_STROBE_WIDTH),
|
||||
.AXI_DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
|
||||
.AXI_TID_WIDTH (AXI_TID_WIDTH),
|
||||
.AXI_STROBE_WIDTH (AXI_STROBE_WIDTH)
|
||||
.DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`XLEN),
|
||||
.TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.NUM_BANKS (AXI_NUM_BANKS),
|
||||
.OUT_REG_RSP((AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -98,9 +141,11 @@ module Vortex_axi #(
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready),
|
||||
|
||||
.m_axi_awid (m_axi_awid),
|
||||
.m_axi_awaddr (m_axi_awaddr),
|
||||
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
.m_axi_awaddr (m_axi_awaddr_unqual),
|
||||
.m_axi_awid (m_axi_awid_unqual),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
.m_axi_awsize (m_axi_awsize),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
@@ -108,22 +153,23 @@ module Vortex_axi #(
|
||||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
.m_axi_awregion (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
|
||||
.m_axi_bid (m_axi_bid),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
.m_axi_bid (m_axi_bid_unqual),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
|
||||
.m_axi_arid (m_axi_arid),
|
||||
.m_axi_araddr (m_axi_araddr),
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
.m_axi_araddr (m_axi_araddr_unqual),
|
||||
.m_axi_arid (m_axi_arid_unqual),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
.m_axi_arsize (m_axi_arsize),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
@@ -131,18 +177,21 @@ module Vortex_axi #(
|
||||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
.m_axi_arregion (m_axi_arregion),
|
||||
|
||||
.m_axi_rid (m_axi_rid),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rresp (m_axi_rresp),
|
||||
.m_axi_rlast (m_axi_rlast),
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready)
|
||||
.m_axi_rready (m_axi_rready),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rlast (m_axi_rlast) ,
|
||||
.m_axi_rid (m_axi_rid_unqual),
|
||||
.m_axi_rresp (m_axi_rresp)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
|
||||
Vortex vortex (
|
||||
`SCOPE_IO_BIND (0)
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -159,7 +208,11 @@ module Vortex_axi #(
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready),
|
||||
|
||||
.dcr_wr_valid (dcr_wr_valid),
|
||||
.dcr_wr_addr (dcr_wr_addr),
|
||||
.dcr_wr_data (dcr_wr_data),
|
||||
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
||||
@@ -1,176 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_avs_wrapper #(
|
||||
parameter AVS_DATA_WIDTH = 1,
|
||||
parameter AVS_ADDR_WIDTH = 1,
|
||||
parameter AVS_BURST_WIDTH = 1,
|
||||
parameter AVS_BANKS = 1,
|
||||
parameter REQ_TAG_WIDTH = 1,
|
||||
parameter RD_QUEUE_SIZE = 1,
|
||||
|
||||
parameter AVS_BYTEENW = (AVS_DATA_WIDTH / 8),
|
||||
parameter RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Memory request
|
||||
input wire mem_req_valid,
|
||||
input wire mem_req_rw,
|
||||
input wire [AVS_BYTEENW-1:0] mem_req_byteen,
|
||||
input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
input wire [AVS_DATA_WIDTH-1:0] mem_req_data,
|
||||
input wire [REQ_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
output wire mem_rsp_valid,
|
||||
output wire [AVS_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
output wire [REQ_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire mem_rsp_ready,
|
||||
|
||||
// AVS bus
|
||||
output wire [AVS_DATA_WIDTH-1:0] avs_writedata [AVS_BANKS],
|
||||
input wire [AVS_DATA_WIDTH-1:0] avs_readdata [AVS_BANKS],
|
||||
output wire [AVS_ADDR_WIDTH-1:0] avs_address [AVS_BANKS],
|
||||
input wire avs_waitrequest [AVS_BANKS],
|
||||
output wire avs_write [AVS_BANKS],
|
||||
output wire avs_read [AVS_BANKS],
|
||||
output wire [AVS_BYTEENW-1:0] avs_byteenable [AVS_BANKS],
|
||||
output wire [AVS_BURST_WIDTH-1:0] avs_burstcount [AVS_BANKS],
|
||||
input avs_readdatavalid [AVS_BANKS]
|
||||
);
|
||||
|
||||
localparam BANK_ADDRW = `LOG2UP(AVS_BANKS);
|
||||
|
||||
// Requests handling
|
||||
|
||||
wire [AVS_BANKS-1:0] avs_reqq_push, avs_reqq_pop, avs_reqq_ready;
|
||||
wire [AVS_BANKS-1:0][REQ_TAG_WIDTH-1:0] avs_reqq_tag_out;
|
||||
wire [AVS_BANKS-1:0] req_queue_going_full;
|
||||
wire [AVS_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
|
||||
wire [BANK_ADDRW-1:0] req_bank_sel;
|
||||
|
||||
if (AVS_BANKS >= 2) begin
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
end else begin
|
||||
assign req_bank_sel = 0;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
assign avs_reqq_ready[i] = !req_queue_going_full[i] && !avs_waitrequest[i];
|
||||
assign avs_reqq_push[i] = mem_req_valid && !mem_req_rw && avs_reqq_ready[i] && (req_bank_sel == i);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
VX_pending_size #(
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (avs_reqq_push[i]),
|
||||
.decr (avs_reqq_pop[i]),
|
||||
.full (req_queue_going_full[i]),
|
||||
.size (req_queue_size[i]),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
`UNUSED_VAR (req_queue_size)
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (REQ_TAG_WIDTH),
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (avs_reqq_push[i]),
|
||||
.pop (avs_reqq_pop[i]),
|
||||
.data_in (mem_req_tag),
|
||||
.data_out (avs_reqq_tag_out[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
assign avs_read[i] = mem_req_valid && !mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
|
||||
assign avs_write[i] = mem_req_valid && mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
|
||||
assign avs_address[i] = mem_req_addr;
|
||||
assign avs_byteenable[i] = mem_req_byteen;
|
||||
assign avs_writedata[i] = mem_req_data;
|
||||
assign avs_burstcount[i] = AVS_BURST_WIDTH'(1);
|
||||
end
|
||||
|
||||
if (AVS_BANKS >= 2) begin
|
||||
assign mem_req_ready = avs_reqq_ready[req_bank_sel];
|
||||
end else begin
|
||||
assign mem_req_ready = avs_reqq_ready;
|
||||
end
|
||||
|
||||
// Responses handling
|
||||
|
||||
wire [AVS_BANKS-1:0] rsp_arb_valid_in;
|
||||
wire [AVS_BANKS-1:0][AVS_DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in;
|
||||
wire [AVS_BANKS-1:0] rsp_arb_ready_in;
|
||||
|
||||
wire [AVS_BANKS-1:0][AVS_DATA_WIDTH-1:0] avs_rspq_data_out;
|
||||
wire [AVS_BANKS-1:0] avs_rspq_empty;
|
||||
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
VX_fifo_queue #(
|
||||
.DATAW (AVS_DATA_WIDTH),
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) rd_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (avs_readdatavalid[i]),
|
||||
.pop (avs_reqq_pop[i]),
|
||||
.data_in (avs_readdata[i]),
|
||||
.data_out (avs_rspq_data_out[i]),
|
||||
.empty (avs_rspq_empty[i]),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < AVS_BANKS; i++) begin
|
||||
assign rsp_arb_valid_in[i] = !avs_rspq_empty[i];
|
||||
assign rsp_arb_data_in[i] = {avs_rspq_data_out[i], avs_reqq_tag_out[i]};
|
||||
assign avs_reqq_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
|
||||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (AVS_BANKS),
|
||||
.DATAW (AVS_DATA_WIDTH + REQ_TAG_WIDTH),
|
||||
.TYPE ("R")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_arb_valid_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
.valid_out (mem_rsp_valid),
|
||||
.data_out ({mem_rsp_data, mem_rsp_tag}),
|
||||
.ready_out (mem_rsp_ready)
|
||||
);
|
||||
|
||||
`ifdef DBG_TRACE_AFU
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_valid && mem_req_ready) begin
|
||||
if (mem_req_rw) begin
|
||||
dpi_trace("%d: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, mem_req_data);
|
||||
end else begin
|
||||
dpi_trace("%d: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, req_queue_size);
|
||||
end
|
||||
end
|
||||
if (mem_rsp_valid && mem_rsp_ready) begin
|
||||
dpi_trace("%d: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d\n", $time, mem_rsp_tag, mem_rsp_data, req_queue_size);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,181 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_to_mem #(
|
||||
parameter SRC_DATA_WIDTH = 1,
|
||||
parameter SRC_ADDR_WIDTH = 1,
|
||||
parameter DST_DATA_WIDTH = 1,
|
||||
parameter DST_ADDR_WIDTH = 1,
|
||||
parameter SRC_TAG_WIDTH = 1,
|
||||
parameter DST_TAG_WIDTH = 1,
|
||||
parameter SRC_DATA_SIZE = (SRC_DATA_WIDTH / 8),
|
||||
parameter DST_DATA_SIZE = (DST_DATA_WIDTH / 8)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire mem_req_valid_in,
|
||||
input wire [SRC_ADDR_WIDTH-1:0] mem_req_addr_in,
|
||||
input wire mem_req_rw_in,
|
||||
input wire [SRC_DATA_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [SRC_DATA_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [SRC_TAG_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire mem_req_ready_in,
|
||||
|
||||
output wire mem_req_valid_out,
|
||||
output wire [DST_ADDR_WIDTH-1:0] mem_req_addr_out,
|
||||
output wire mem_req_rw_out,
|
||||
output wire [DST_DATA_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [DST_DATA_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [DST_TAG_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
input wire mem_rsp_valid_in,
|
||||
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_in,
|
||||
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in,
|
||||
output wire mem_rsp_ready_in,
|
||||
|
||||
output wire mem_rsp_valid_out,
|
||||
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_out,
|
||||
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_out,
|
||||
input wire mem_rsp_ready_out
|
||||
);
|
||||
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
|
||||
|
||||
localparam DST_LDATAW = $clog2(DST_DATA_WIDTH);
|
||||
localparam SRC_LDATAW = $clog2(SRC_DATA_WIDTH);
|
||||
localparam D = `ABS(DST_LDATAW - SRC_LDATAW);
|
||||
localparam P = 2**D;
|
||||
|
||||
`UNUSED_VAR (mem_rsp_tag_in)
|
||||
|
||||
if (DST_LDATAW > SRC_LDATAW) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire [D-1:0] req_idx = mem_req_addr_in[D-1:0];
|
||||
wire [D-1:0] rsp_idx = mem_rsp_tag_in[D-1:0];
|
||||
|
||||
wire [SRC_ADDR_WIDTH-D-1:0] mem_req_addr_in_qual = mem_req_addr_in[SRC_ADDR_WIDTH-1:D];
|
||||
|
||||
wire [P-1:0][SRC_DATA_WIDTH-1:0] mem_rsp_data_in_w = mem_rsp_data_in;
|
||||
|
||||
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin
|
||||
`UNUSED_VAR (mem_req_addr_in_qual)
|
||||
assign mem_req_addr_out = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
|
||||
end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin
|
||||
assign mem_req_addr_out = DST_ADDR_WIDTH'(mem_req_addr_in_qual);
|
||||
end else begin
|
||||
assign mem_req_addr_out = mem_req_addr_in_qual;
|
||||
end
|
||||
|
||||
assign mem_req_valid_out = mem_req_valid_in;
|
||||
assign mem_req_rw_out = mem_req_rw_in;
|
||||
assign mem_req_byteen_out = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
|
||||
assign mem_req_data_out = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW);
|
||||
assign mem_req_tag_out = DST_TAG_WIDTH'({mem_req_tag_in, req_idx});
|
||||
assign mem_req_ready_in = mem_req_ready_out;
|
||||
|
||||
assign mem_rsp_valid_out = mem_rsp_valid_in;
|
||||
assign mem_rsp_data_out = mem_rsp_data_in_w[rsp_idx];
|
||||
assign mem_rsp_tag_out = SRC_TAG_WIDTH'(mem_rsp_tag_in[SRC_TAG_WIDTH+D-1:D]);
|
||||
assign mem_rsp_ready_in = mem_rsp_ready_out;
|
||||
|
||||
end else if (DST_LDATAW < SRC_LDATAW) begin
|
||||
|
||||
reg [D-1:0] req_ctr, rsp_ctr;
|
||||
|
||||
reg [P-1:0][DST_DATA_WIDTH-1:0] mem_rsp_data_out_r, mem_rsp_data_out_n;
|
||||
|
||||
wire mem_req_out_fire = mem_req_valid_out && mem_req_ready_out;
|
||||
wire mem_rsp_in_fire = mem_rsp_valid_in && mem_rsp_ready_in;
|
||||
|
||||
wire [P-1:0][DST_DATA_WIDTH-1:0] mem_req_data_in_w = mem_req_data_in;
|
||||
wire [P-1:0][DST_DATA_SIZE-1:0] mem_req_byteen_in_w = mem_req_byteen_in;
|
||||
|
||||
always @(*) begin
|
||||
mem_rsp_data_out_n = mem_rsp_data_out_r;
|
||||
if (mem_rsp_in_fire) begin
|
||||
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_in;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
req_ctr <= 0;
|
||||
rsp_ctr <= 0;
|
||||
end else begin
|
||||
if (mem_req_out_fire) begin
|
||||
req_ctr <= req_ctr + 1;
|
||||
end
|
||||
if (mem_rsp_in_fire) begin
|
||||
rsp_ctr <= rsp_ctr + 1;
|
||||
end
|
||||
end
|
||||
mem_rsp_data_out_r <= mem_rsp_data_out_n;
|
||||
end
|
||||
|
||||
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;
|
||||
wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mem_rsp_in_fire) begin
|
||||
mem_rsp_tag_in_r <= mem_rsp_tag_in;
|
||||
end
|
||||
end
|
||||
assign mem_rsp_tag_in_w = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_in;
|
||||
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_w == mem_rsp_tag_in),
|
||||
("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_w, mem_rsp_tag_in))
|
||||
|
||||
wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr};
|
||||
|
||||
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin
|
||||
`UNUSED_VAR (mem_req_addr_in_qual)
|
||||
assign mem_req_addr_out = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
|
||||
end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin
|
||||
assign mem_req_addr_out = DST_ADDR_WIDTH'(mem_req_addr_in_qual);
|
||||
end else begin
|
||||
assign mem_req_addr_out = mem_req_addr_in_qual;
|
||||
end
|
||||
|
||||
assign mem_req_valid_out = mem_req_valid_in;
|
||||
assign mem_req_rw_out = mem_req_rw_in;
|
||||
assign mem_req_byteen_out = mem_req_byteen_in_w[req_ctr];
|
||||
assign mem_req_data_out = mem_req_data_in_w[req_ctr];
|
||||
assign mem_req_tag_out = DST_TAG_WIDTH'(mem_req_tag_in);
|
||||
assign mem_req_ready_in = mem_req_ready_out && (req_ctr == (P-1));
|
||||
|
||||
assign mem_rsp_valid_out = mem_rsp_valid_in && (rsp_ctr == (P-1));
|
||||
assign mem_rsp_data_out = mem_rsp_data_out_n;
|
||||
assign mem_rsp_tag_out = SRC_TAG_WIDTH'(mem_rsp_tag_in);
|
||||
assign mem_rsp_ready_in = mem_rsp_ready_out;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin
|
||||
`UNUSED_VAR (mem_req_addr_in)
|
||||
assign mem_req_addr_out = mem_req_addr_in[DST_ADDR_WIDTH-1:0];
|
||||
end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin
|
||||
assign mem_req_addr_out = DST_ADDR_WIDTH'(mem_req_addr_in);
|
||||
end else begin
|
||||
assign mem_req_addr_out = mem_req_addr_in;
|
||||
end
|
||||
|
||||
assign mem_req_valid_out = mem_req_valid_in;
|
||||
assign mem_req_rw_out = mem_req_rw_in;
|
||||
assign mem_req_byteen_out = mem_req_byteen_in;
|
||||
assign mem_req_data_out = mem_req_data_in;
|
||||
assign mem_req_tag_out = DST_TAG_WIDTH'(mem_req_tag_in);
|
||||
assign mem_req_ready_in = mem_req_ready_out;
|
||||
|
||||
assign mem_rsp_valid_out = mem_rsp_valid_in;
|
||||
assign mem_rsp_data_out = mem_rsp_data_in;
|
||||
assign mem_rsp_tag_out = SRC_TAG_WIDTH'(mem_rsp_tag_in);
|
||||
assign mem_rsp_ready_in = mem_rsp_ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -241,4 +241,4 @@ typedef union packed {
|
||||
t_ccip_c0_ReqMmioHdr reqMmioHdr;
|
||||
} t_if_ccip_c0_RxHdr;
|
||||
|
||||
endpackage
|
||||
endpackage
|
||||
@@ -45,4 +45,4 @@ begin
|
||||
pck_af2cp_sTx_T1 = pck_af2cp_sTx_T0_q;
|
||||
end
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
@@ -58,4 +58,4 @@ package local_mem_cfg_pkg;
|
||||
|
||||
endpackage // local_mem_cfg_pkg
|
||||
|
||||
`endif // PLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
`endif // PLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
1093
hw/rtl/afu/opae/vortex_afu.sv
Normal file
1093
hw/rtl/afu/opae/vortex_afu.sv
Normal file
File diff suppressed because it is too large
Load Diff
39
hw/rtl/afu/opae/vortex_afu.vh
Normal file
39
hw/rtl/afu/opae/vortex_afu.vh
Normal file
@@ -0,0 +1,39 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VORTEX_AFU_VH
|
||||
`define VORTEX_AFU_VH
|
||||
|
||||
`define AFU_ACCEL_NAME "vortex_afu"
|
||||
`define AFU_ACCEL_UUID 128'h35F9452B_25C2_434C_93D5_6F8C60DB361C
|
||||
|
||||
`define AFU_IMAGE_CMD_MEM_READ 1
|
||||
`define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
`define AFU_IMAGE_CMD_RUN 3
|
||||
`define AFU_IMAGE_CMD_DCR_WRITE 4
|
||||
`define AFU_IMAGE_CMD_MAX_VALUE 4
|
||||
|
||||
`define AFU_IMAGE_MMIO_CMD_TYPE 10
|
||||
`define AFU_IMAGE_MMIO_CMD_ARG0 12
|
||||
`define AFU_IMAGE_MMIO_CMD_ARG1 14
|
||||
`define AFU_IMAGE_MMIO_CMD_ARG2 16
|
||||
`define AFU_IMAGE_MMIO_STATUS 18
|
||||
`define AFU_IMAGE_MMIO_SCOPE_READ 20
|
||||
`define AFU_IMAGE_MMIO_SCOPE_WRITE 22
|
||||
`define AFU_IMAGE_MMIO_DEV_CAPS 24
|
||||
`define AFU_IMAGE_MMIO_ISA_CAPS 26
|
||||
|
||||
`define AFU_IMAGE_POWER 0
|
||||
`define AFU_TOP_IFC "ccip_std_afu_avalon_mm"
|
||||
|
||||
`endif // VORTEX_AFU_VH
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,44 +0,0 @@
|
||||
`ifndef __VORTEX_AFU__
|
||||
`define __VORTEX_AFU__
|
||||
|
||||
`include "ccip_if_pkg.sv"
|
||||
|
||||
`define PLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_BANKS 2
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH 26
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH 512
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4
|
||||
`endif
|
||||
|
||||
`include "local_mem_cfg_pkg.sv"
|
||||
|
||||
`define AFU_ACCEL_NAME "vortex_afu"
|
||||
`define AFU_ACCEL_UUID 128'h35f9452b_25c2_434c_93d5_6f8c60db361c
|
||||
|
||||
`define AFU_IMAGE_CMD_MEM_READ 1
|
||||
`define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
`define AFU_IMAGE_CMD_RUN 3
|
||||
`define AFU_IMAGE_MMIO_CMD_TYPE 10
|
||||
`define AFU_IMAGE_MMIO_DATA_SIZE 16
|
||||
`define AFU_IMAGE_MMIO_IO_ADDR 12
|
||||
`define AFU_IMAGE_MMIO_MEM_ADDR 14
|
||||
`define AFU_IMAGE_MMIO_SCOPE_READ 20
|
||||
`define AFU_IMAGE_MMIO_SCOPE_WRITE 22
|
||||
`define AFU_IMAGE_MMIO_DEV_CAPS 24
|
||||
`define AFU_IMAGE_MMIO_STATUS 18
|
||||
|
||||
`define AFU_IMAGE_POWER 0
|
||||
`define AFU_TOP_IFC "ccip_std_afu_avalon_mm"
|
||||
|
||||
`endif
|
||||
419
hw/rtl/afu/xrt/VX_afu_ctrl.sv
Normal file
419
hw/rtl/afu/xrt/VX_afu_ctrl.sv
Normal file
@@ -0,0 +1,419 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "vortex_afu.vh"
|
||||
|
||||
module VX_afu_ctrl #(
|
||||
parameter AXI_ADDR_WIDTH = 8,
|
||||
parameter AXI_DATA_WIDTH = 32,
|
||||
parameter AXI_NUM_BANKS = 1
|
||||
) (
|
||||
// axi4 lite slave signals
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk_en,
|
||||
|
||||
input wire s_axi_awvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
|
||||
output wire s_axi_awready,
|
||||
|
||||
input wire s_axi_wvalid,
|
||||
input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata,
|
||||
input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
|
||||
output wire s_axi_wready,
|
||||
|
||||
output wire s_axi_bvalid,
|
||||
output wire [1:0] s_axi_bresp,
|
||||
input wire s_axi_bready,
|
||||
|
||||
input wire s_axi_arvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
|
||||
output wire s_axi_arready,
|
||||
|
||||
output wire s_axi_rvalid,
|
||||
output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata,
|
||||
output wire [1:0] s_axi_rresp,
|
||||
input wire s_axi_rready,
|
||||
|
||||
output wire ap_reset,
|
||||
output wire ap_start,
|
||||
input wire ap_done,
|
||||
input wire ap_ready,
|
||||
input wire ap_idle,
|
||||
output wire interrupt,
|
||||
|
||||
`ifdef SCOPE
|
||||
input wire scope_bus_in,
|
||||
output wire scope_bus_out,
|
||||
`endif
|
||||
|
||||
output wire [63:0] mem_base [AXI_NUM_BANKS],
|
||||
|
||||
output wire dcr_wr_valid,
|
||||
output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data
|
||||
);
|
||||
|
||||
// Address Info
|
||||
// 0x00 : Control signals
|
||||
// bit 0 - ap_start (Read/Write/COH)
|
||||
// bit 1 - ap_done (Read/COR)
|
||||
// bit 2 - ap_idle (Read)
|
||||
// bit 3 - ap_ready (Read)
|
||||
// bit 4 - ap_reset (Write)
|
||||
// bit 7 - auto_restart (Read/Write)
|
||||
// others - reserved
|
||||
// 0x04 : Global Interrupt Enable Register
|
||||
// bit 0 - Global Interrupt Enable (Read/Write)
|
||||
// others - reserved
|
||||
// 0x08 : IP Interrupt Enable Register (Read/Write)
|
||||
// bit 0 - Channel 0 (ap_done)
|
||||
// bit 1 - Channel 1 (ap_ready)
|
||||
// others - reserved
|
||||
// 0x0c : IP Interrupt Status Register (Read/TOW)
|
||||
// bit 0 - Channel 0 (ap_done)
|
||||
// bit 1 - Channel 1 (ap_ready)
|
||||
// others - reserved
|
||||
// 0x10 : Low 32-bit Data signal of DEV_CAPS
|
||||
// 0x14 : High 32-bit Data signal of DEV_CAPS
|
||||
// 0x18 : Control signal of DEV_CAPS
|
||||
// 0x1C : Low 32-bit Data signal of ISA_CAPS
|
||||
// 0x20 : High 32-bit Data signal of ISA_CAPS
|
||||
// 0x24 : Control signal of ISA_CAPS
|
||||
// 0x28 : Low 32-bit Data signal of DCR
|
||||
// 0x2C : High 32-bit Data signal of DCR
|
||||
// 0x30 : Control signal of DCR
|
||||
// 0x34 : Low 32-bit Data signal of SCP
|
||||
// 0x38 : High 32-bit Data signal of SCP
|
||||
// 0x3C : Control signal of SCP
|
||||
// 0x40 : Low 32-bit Data signal of MEM
|
||||
// 0x44 : High 32-bit Data signal of MEM
|
||||
// 0x48 : Control signal of MEM
|
||||
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
|
||||
|
||||
// Parameters
|
||||
localparam
|
||||
ADDR_AP_CTRL = 8'h00,
|
||||
ADDR_GIE = 8'h04,
|
||||
ADDR_IER = 8'h08,
|
||||
ADDR_ISR = 8'h0C,
|
||||
|
||||
ADDR_DEV_0 = 8'h10,
|
||||
ADDR_DEV_1 = 8'h14,
|
||||
ADDR_DEV_CTRL = 8'h18,
|
||||
|
||||
ADDR_ISA_0 = 8'h1C,
|
||||
ADDR_ISA_1 = 8'h20,
|
||||
ADDR_ISA_CTRL = 8'h24,
|
||||
|
||||
ADDR_DCR_0 = 8'h28,
|
||||
ADDR_DCR_1 = 8'h2C,
|
||||
ADDR_DCR_CTRL = 8'h30,
|
||||
|
||||
ADDR_SCP_0 = 8'h34,
|
||||
ADDR_SCP_1 = 8'h38,
|
||||
ADDR_SCP_CTRL = 8'h3C,
|
||||
|
||||
ADDR_MEM_0 = 8'h40,
|
||||
ADDR_MEM_1 = 8'h44,
|
||||
ADDR_MEM_CTRL = 8'h48,
|
||||
|
||||
ADDR_BITS = 8;
|
||||
|
||||
localparam
|
||||
WSTATE_IDLE = 2'd0,
|
||||
WSTATE_DATA = 2'd1,
|
||||
WSTATE_RESP = 2'd2;
|
||||
|
||||
localparam
|
||||
RSTATE_IDLE = 2'd0,
|
||||
RSTATE_DATA = 2'd1;
|
||||
|
||||
// device caps
|
||||
wire [63:0] dev_caps = {16'b0,
|
||||
8'(`SM_ENABLED ? `SMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
8'(`NUM_THREADS),
|
||||
8'(`IMPLEMENTATION_ID)};
|
||||
|
||||
wire [63:0] isa_caps = {32'(`MISA_EXT),
|
||||
2'(`CLOG2(`XLEN)-4),
|
||||
30'(`MISA_STD)};
|
||||
|
||||
reg [1:0] wstate;
|
||||
reg [ADDR_BITS-1:0] waddr;
|
||||
wire [31:0] wmask;
|
||||
wire s_axi_aw_fire;
|
||||
wire s_axi_w_fire;
|
||||
|
||||
reg [1:0] rstate;
|
||||
reg [31:0] rdata;
|
||||
wire [ADDR_BITS-1:0] raddr;
|
||||
wire s_axi_ar_fire;
|
||||
|
||||
reg ap_reset_r;
|
||||
reg ap_start_r;
|
||||
reg auto_restart_r;
|
||||
reg gie_r;
|
||||
reg [1:0] ier_r;
|
||||
reg [1:0] isr_r;
|
||||
reg [63:0] mem_r [AXI_NUM_BANKS];
|
||||
reg [31:0] dcra_r;
|
||||
reg [31:0] dcrv_r;
|
||||
reg dcr_wr_valid_r;
|
||||
|
||||
`ifdef SCOPE
|
||||
|
||||
reg [63:0] scope_bus_wdata;
|
||||
reg [63:0] scope_bus_rdata;
|
||||
reg [5:0] scope_bus_ctr;
|
||||
|
||||
reg cmd_scope_reading;
|
||||
reg cmd_scope_writing;
|
||||
reg scope_bus_out_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
cmd_scope_reading <= 0;
|
||||
cmd_scope_writing <= 0;
|
||||
scope_bus_ctr <= '0;
|
||||
scope_bus_out_r <= 0;
|
||||
end else if (clk_en) begin
|
||||
if (s_axi_w_fire && waddr == ADDR_SCP_0) begin
|
||||
scope_bus_wdata[31:0] <= (s_axi_wdata & wmask) | (scope_bus_wdata[31:0] & ~wmask);
|
||||
end
|
||||
if (s_axi_w_fire && waddr == ADDR_SCP_1) begin
|
||||
scope_bus_wdata[63:32] <= (s_axi_wdata & wmask) | (scope_bus_wdata[63:32] & ~wmask);
|
||||
cmd_scope_writing <= 1;
|
||||
scope_bus_out_r <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
if (scope_bus_in) begin
|
||||
cmd_scope_reading <= 1;
|
||||
scope_bus_ctr <= 63;
|
||||
end
|
||||
if (cmd_scope_reading) begin
|
||||
scope_bus_rdata <= {scope_bus_rdata[62:0], scope_bus_in};
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_reading <= 0;
|
||||
end
|
||||
end
|
||||
if (cmd_scope_writing) begin
|
||||
scope_bus_out_r <= 1'(scope_bus_wdata >> scope_bus_ctr);
|
||||
scope_bus_ctr <= scope_bus_ctr - 1;
|
||||
if (scope_bus_ctr == 0) begin
|
||||
cmd_scope_writing <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign scope_bus_out = scope_bus_out_r;
|
||||
|
||||
`endif
|
||||
|
||||
// AXI Write
|
||||
|
||||
assign s_axi_awready = (wstate == WSTATE_IDLE);
|
||||
assign s_axi_wready = (wstate == WSTATE_DATA);
|
||||
assign s_axi_bvalid = (wstate == WSTATE_RESP);
|
||||
assign s_axi_bresp = 2'b00; // OKAY
|
||||
|
||||
assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready;
|
||||
assign s_axi_w_fire = s_axi_wvalid && s_axi_wready;
|
||||
|
||||
for (genvar i = 0; i < 4; ++i) begin
|
||||
assign wmask[8 * i +: 8] = {8{s_axi_wstrb[i]}};
|
||||
end
|
||||
|
||||
// wstate
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wstate <= WSTATE_IDLE;
|
||||
end else if (clk_en) begin
|
||||
case (wstate)
|
||||
WSTATE_IDLE: wstate <= s_axi_awvalid ? WSTATE_DATA : WSTATE_IDLE;
|
||||
WSTATE_DATA: wstate <= s_axi_wvalid ? WSTATE_RESP : WSTATE_DATA;
|
||||
WSTATE_RESP: wstate <= s_axi_bready ? WSTATE_IDLE : WSTATE_RESP;
|
||||
default: wstate <= WSTATE_IDLE;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// waddr
|
||||
always @(posedge clk) begin
|
||||
if (clk_en) begin
|
||||
if (s_axi_aw_fire)
|
||||
waddr <= s_axi_awaddr[ADDR_BITS-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
// wdata
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
ap_start_r <= 0;
|
||||
ap_reset_r <= 0;
|
||||
auto_restart_r <= 0;
|
||||
|
||||
gie_r <= 0;
|
||||
ier_r <= '0;
|
||||
isr_r <= '0;
|
||||
|
||||
dcra_r <= '0;
|
||||
dcrv_r <= '0;
|
||||
dcr_wr_valid_r <= 0;
|
||||
|
||||
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
mem_r[i] <= '0;
|
||||
end
|
||||
end else if (clk_en) begin
|
||||
if (ap_ready)
|
||||
ap_start_r <= auto_restart_r;
|
||||
|
||||
dcr_wr_valid_r <= 0;
|
||||
|
||||
if (s_axi_w_fire) begin
|
||||
case (waddr)
|
||||
ADDR_AP_CTRL: begin
|
||||
if (s_axi_wstrb[0]) begin
|
||||
if (s_axi_wdata[0])
|
||||
ap_start_r <= 1;
|
||||
if (s_axi_wdata[4])
|
||||
ap_reset_r <= 1;
|
||||
if (s_axi_wdata[7])
|
||||
auto_restart_r <= 1;
|
||||
end
|
||||
end
|
||||
ADDR_GIE: begin
|
||||
if (s_axi_wstrb[0])
|
||||
gie_r <= s_axi_wdata[0];
|
||||
end
|
||||
ADDR_IER: begin
|
||||
if (s_axi_wstrb[0])
|
||||
ier_r <= s_axi_wdata[1:0];
|
||||
end
|
||||
ADDR_ISR: begin
|
||||
if (s_axi_wstrb[0])
|
||||
isr_r <= isr_r ^ s_axi_wdata[1:0];
|
||||
end
|
||||
ADDR_DCR_0: begin
|
||||
dcra_r <= (s_axi_wdata & wmask) | (dcra_r & ~wmask);
|
||||
end
|
||||
ADDR_DCR_1: begin
|
||||
dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask);
|
||||
dcr_wr_valid_r <= 1;
|
||||
end
|
||||
default: begin
|
||||
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
if (waddr == (ADDR_MEM_0 + i * 12)) begin
|
||||
mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask);
|
||||
end
|
||||
if (waddr == (ADDR_MEM_1 + i * 12)) begin
|
||||
mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask);
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
|
||||
if (ier_r[0] & ap_done)
|
||||
isr_r[0] <= 1'b1;
|
||||
if (ier_r[1] & ap_ready)
|
||||
isr_r[1] <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// AXI Read
|
||||
|
||||
assign s_axi_arready = (rstate == RSTATE_IDLE);
|
||||
assign s_axi_rvalid = (rstate == RSTATE_DATA);
|
||||
assign s_axi_rdata = rdata;
|
||||
assign s_axi_rresp = 2'b00; // OKAY
|
||||
|
||||
assign s_axi_ar_fire = s_axi_arvalid && s_axi_arready;
|
||||
assign raddr = s_axi_araddr[ADDR_BITS-1:0];
|
||||
|
||||
// rstate
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rstate <= RSTATE_IDLE;
|
||||
end else if (clk_en) begin
|
||||
case (rstate)
|
||||
RSTATE_IDLE: rstate <= s_axi_arvalid ? RSTATE_DATA : RSTATE_IDLE;
|
||||
RSTATE_DATA: rstate <= (s_axi_rready & s_axi_rvalid) ? RSTATE_IDLE : RSTATE_DATA;
|
||||
default: rstate <= RSTATE_IDLE;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// rdata
|
||||
always @(posedge clk) begin
|
||||
if (clk_en) begin
|
||||
if (s_axi_ar_fire) begin
|
||||
rdata <= '0;
|
||||
case (raddr)
|
||||
ADDR_AP_CTRL: begin
|
||||
rdata[0] <= ap_start_r;
|
||||
rdata[1] <= ap_done;
|
||||
rdata[2] <= ap_idle;
|
||||
rdata[3] <= ap_ready;
|
||||
rdata[7] <= auto_restart_r;
|
||||
end
|
||||
ADDR_GIE: begin
|
||||
rdata <= 32'(gie_r);
|
||||
end
|
||||
ADDR_IER: begin
|
||||
rdata <= 32'(ier_r);
|
||||
end
|
||||
ADDR_ISR: begin
|
||||
rdata <= 32'(isr_r);
|
||||
end
|
||||
ADDR_DEV_0: begin
|
||||
rdata <= dev_caps[31:0];
|
||||
end
|
||||
ADDR_DEV_1: begin
|
||||
rdata <= dev_caps[63:32];
|
||||
end
|
||||
ADDR_ISA_0: begin
|
||||
rdata <= isa_caps[31:0];
|
||||
end
|
||||
ADDR_ISA_1: begin
|
||||
rdata <= isa_caps[63:32];
|
||||
end
|
||||
`ifdef SCOPE
|
||||
ADDR_SCP_0: begin
|
||||
rdata <= scope_bus_rdata[31:0];
|
||||
end
|
||||
ADDR_SCP_1: begin
|
||||
rdata <= scope_bus_rdata[63:32];
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign ap_reset = ap_reset_r;
|
||||
assign ap_start = ap_start_r;
|
||||
assign interrupt = gie_r & (| isr_r);
|
||||
|
||||
assign mem_base = mem_r;
|
||||
|
||||
assign dcr_wr_valid = dcr_wr_valid_r;
|
||||
assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r);
|
||||
assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r);
|
||||
|
||||
endmodule
|
||||
412
hw/rtl/afu/xrt/VX_afu_wrap.sv
Normal file
412
hw/rtl/afu/xrt/VX_afu_wrap.sv
Normal file
@@ -0,0 +1,412 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "vortex_afu.vh"
|
||||
|
||||
module VX_afu_wrap #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = 16,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = 512
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
input wire ap_rst_n,
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
output wire s_axi_ctrl_awready,
|
||||
input wire [C_S_AXI_CTRL_ADDR_WIDTH-1:0] s_axi_ctrl_awaddr,
|
||||
input wire s_axi_ctrl_wvalid,
|
||||
output wire s_axi_ctrl_wready,
|
||||
input wire [C_S_AXI_CTRL_DATA_WIDTH-1:0] s_axi_ctrl_wdata,
|
||||
input wire [C_S_AXI_CTRL_DATA_WIDTH/8-1:0] s_axi_ctrl_wstrb,
|
||||
input wire s_axi_ctrl_arvalid,
|
||||
output wire s_axi_ctrl_arready,
|
||||
input wire [C_S_AXI_CTRL_ADDR_WIDTH-1:0] s_axi_ctrl_araddr,
|
||||
output wire s_axi_ctrl_rvalid,
|
||||
input wire s_axi_ctrl_rready,
|
||||
output wire [C_S_AXI_CTRL_DATA_WIDTH-1:0] s_axi_ctrl_rdata,
|
||||
output wire [1:0] s_axi_ctrl_rresp,
|
||||
output wire s_axi_ctrl_bvalid,
|
||||
input wire s_axi_ctrl_bready,
|
||||
output wire [1:0] s_axi_ctrl_bresp,
|
||||
|
||||
output wire interrupt
|
||||
);
|
||||
localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS;
|
||||
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_RUN = 1;
|
||||
|
||||
wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_awid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [7:0] m_axi_mem_awlen_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_wvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_wready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_DATA_WIDTH-1:0] m_axi_mem_wdata_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_wlast_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_bvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_bready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_bid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [1:0] m_axi_mem_bresp_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_arvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_arready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_arid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [7:0] m_axi_mem_arlen_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_rvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_rready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_DATA_WIDTH-1:0] m_axi_mem_rdata_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_rlast_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_rid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
// convert memory interface to array
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
|
||||
wire clk = ap_clk;
|
||||
wire reset = ~ap_rst_n;
|
||||
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
reg [15:0] vx_pending_writes;
|
||||
reg vx_busy_wait;
|
||||
reg vx_running;
|
||||
|
||||
wire vx_busy;
|
||||
|
||||
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
wire dcr_wr_valid;
|
||||
wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr;
|
||||
wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data;
|
||||
|
||||
reg state;
|
||||
|
||||
wire ap_reset;
|
||||
wire ap_start;
|
||||
wire ap_idle = ~vx_running;
|
||||
wire ap_done = ~(state == STATE_RUN || vx_pending_writes != 0);
|
||||
wire ap_ready = 1'b1;
|
||||
|
||||
`ifdef SCOPE
|
||||
wire scope_bus_in;
|
||||
wire scope_bus_out;
|
||||
wire scope_reset = reset;
|
||||
`endif
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
if (reset || ap_reset) begin
|
||||
state <= STATE_IDLE;
|
||||
vx_busy_wait <= 0;
|
||||
vx_running <= 0;
|
||||
end else begin
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (ap_start) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: STATE RUN\n", $time));
|
||||
`endif
|
||||
state <= STATE_RUN;
|
||||
vx_running <= 0;
|
||||
end
|
||||
end
|
||||
STATE_RUN: begin
|
||||
if (vx_running) begin
|
||||
if (vx_busy_wait) begin
|
||||
// wait until processor goes busy
|
||||
if (vx_busy) begin
|
||||
vx_busy_wait <= 0;
|
||||
end
|
||||
end else begin
|
||||
// wait until the processor is not busy
|
||||
if (~vx_busy) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: End execution\n", $time));
|
||||
`TRACE(2, ("%d: STATE IDLE\n", $time));
|
||||
`endif
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
// wait until the reset sequence is complete
|
||||
if (vx_reset_ctr == (`RESET_DELAY-1)) begin
|
||||
`ifdef DBG_TRACE_AFU
|
||||
`TRACE(2, ("%d: AFU: Begin execution\n", $time));
|
||||
`endif
|
||||
vx_running <= 1;
|
||||
vx_busy_wait <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
reg m_axi_mem_wfire;
|
||||
reg m_axi_mem_bfire;
|
||||
|
||||
always @(*) begin
|
||||
m_axi_mem_wfire = 0;
|
||||
m_axi_mem_bfire = 0;
|
||||
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
m_axi_mem_wfire |= m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i];
|
||||
m_axi_mem_bfire |= m_axi_mem_bvalid_a[i] && m_axi_mem_bready_a[i];
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
if (reset || ap_reset) begin
|
||||
vx_pending_writes <= '0;
|
||||
end else begin
|
||||
if (m_axi_mem_wfire && ~m_axi_mem_bfire)
|
||||
vx_pending_writes <= vx_pending_writes + 1;
|
||||
if (~m_axi_mem_wfire && m_axi_mem_bfire)
|
||||
vx_pending_writes <= vx_pending_writes - 1;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge ap_clk) begin
|
||||
if (state == STATE_RUN) begin
|
||||
vx_reset_ctr <= vx_reset_ctr + 1;
|
||||
end else begin
|
||||
vx_reset_ctr <= '0;
|
||||
end
|
||||
end
|
||||
|
||||
VX_afu_ctrl #(
|
||||
.AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) afu_ctrl (
|
||||
.clk (ap_clk),
|
||||
.reset (reset || ap_reset),
|
||||
.clk_en (1'b1),
|
||||
|
||||
.s_axi_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_awready (s_axi_ctrl_awready),
|
||||
.s_axi_awaddr (s_axi_ctrl_awaddr),
|
||||
.s_axi_wvalid (s_axi_ctrl_wvalid),
|
||||
.s_axi_wready (s_axi_ctrl_wready),
|
||||
.s_axi_wdata (s_axi_ctrl_wdata),
|
||||
.s_axi_wstrb (s_axi_ctrl_wstrb),
|
||||
.s_axi_arvalid (s_axi_ctrl_arvalid),
|
||||
.s_axi_arready (s_axi_ctrl_arready),
|
||||
.s_axi_araddr (s_axi_ctrl_araddr),
|
||||
.s_axi_rvalid (s_axi_ctrl_rvalid),
|
||||
.s_axi_rready (s_axi_ctrl_rready),
|
||||
.s_axi_rdata (s_axi_ctrl_rdata),
|
||||
.s_axi_rresp (s_axi_ctrl_rresp),
|
||||
.s_axi_bvalid (s_axi_ctrl_bvalid),
|
||||
.s_axi_bready (s_axi_ctrl_bready),
|
||||
.s_axi_bresp (s_axi_ctrl_bresp),
|
||||
|
||||
.ap_reset (ap_reset),
|
||||
.ap_start (ap_start),
|
||||
.ap_done (ap_done),
|
||||
.ap_ready (ap_ready),
|
||||
.ap_idle (ap_idle),
|
||||
.interrupt (interrupt),
|
||||
|
||||
`ifdef SCOPE
|
||||
.scope_bus_in (scope_bus_out),
|
||||
.scope_bus_out (scope_bus_in),
|
||||
`endif
|
||||
|
||||
.mem_base (mem_base),
|
||||
|
||||
.dcr_wr_valid (dcr_wr_valid),
|
||||
.dcr_wr_addr (dcr_wr_addr),
|
||||
.dcr_wr_data (dcr_wr_data)
|
||||
);
|
||||
|
||||
wire [`XLEN-1:0] m_axi_mem_awaddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`XLEN-1:0] m_axi_mem_araddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
end
|
||||
|
||||
`SCOPE_IO_SWITCH (2)
|
||||
|
||||
Vortex_axi #(
|
||||
.AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
|
||||
.AXI_ADDR_WIDTH (`XLEN),
|
||||
.AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) vortex_axi (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
.clk (ap_clk),
|
||||
.reset (reset || ap_reset || ~vx_running),
|
||||
|
||||
.m_axi_awvalid (m_axi_mem_awvalid_a),
|
||||
.m_axi_awready (m_axi_mem_awready_a),
|
||||
.m_axi_awaddr (m_axi_mem_awaddr_w),
|
||||
.m_axi_awid (m_axi_mem_awid_a),
|
||||
`UNUSED_PIN (m_axi_awlen),
|
||||
`UNUSED_PIN (m_axi_awsize),
|
||||
`UNUSED_PIN (m_axi_awburst),
|
||||
`UNUSED_PIN (m_axi_awlock),
|
||||
`UNUSED_PIN (m_axi_awcache),
|
||||
`UNUSED_PIN (m_axi_awprot),
|
||||
`UNUSED_PIN (m_axi_awqos),
|
||||
`UNUSED_PIN (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_mem_wvalid_a),
|
||||
.m_axi_wready (m_axi_mem_wready_a),
|
||||
.m_axi_wdata (m_axi_mem_wdata_a),
|
||||
.m_axi_wstrb (m_axi_mem_wstrb_a),
|
||||
.m_axi_wlast (m_axi_mem_wlast_a),
|
||||
|
||||
.m_axi_bvalid (m_axi_mem_bvalid_a),
|
||||
.m_axi_bready (m_axi_mem_bready_a),
|
||||
.m_axi_bid (m_axi_mem_bid_a),
|
||||
.m_axi_bresp (m_axi_mem_bresp_a),
|
||||
|
||||
.m_axi_arvalid (m_axi_mem_arvalid_a),
|
||||
.m_axi_arready (m_axi_mem_arready_a),
|
||||
.m_axi_araddr (m_axi_mem_araddr_w),
|
||||
.m_axi_arid (m_axi_mem_arid_a),
|
||||
.m_axi_arlen (m_axi_mem_arlen_a),
|
||||
`UNUSED_PIN (m_axi_arsize),
|
||||
`UNUSED_PIN (m_axi_arburst),
|
||||
`UNUSED_PIN (m_axi_arlock),
|
||||
`UNUSED_PIN (m_axi_arcache),
|
||||
`UNUSED_PIN (m_axi_arprot),
|
||||
`UNUSED_PIN (m_axi_arqos),
|
||||
`UNUSED_PIN (m_axi_arregion),
|
||||
|
||||
.m_axi_rvalid (m_axi_mem_rvalid_a),
|
||||
.m_axi_rready (m_axi_mem_rready_a),
|
||||
.m_axi_rdata (m_axi_mem_rdata_a),
|
||||
.m_axi_rlast (m_axi_mem_rlast_a),
|
||||
.m_axi_rid (m_axi_mem_rid_a),
|
||||
.m_axi_rresp (m_axi_mem_rresp_a),
|
||||
|
||||
.dcr_wr_valid (dcr_wr_valid),
|
||||
.dcr_wr_addr (dcr_wr_addr),
|
||||
.dcr_wr_data (dcr_wr_data),
|
||||
|
||||
.busy (vx_busy)
|
||||
);
|
||||
|
||||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
`ifdef SCOPE
|
||||
`define TRIGGERS { \
|
||||
reset, \
|
||||
ap_start, \
|
||||
ap_done, \
|
||||
ap_idle, \
|
||||
interrupt, \
|
||||
vx_busy_wait, \
|
||||
vx_busy, \
|
||||
vx_running \
|
||||
}
|
||||
|
||||
`define PROBES { \
|
||||
vx_pending_writes \
|
||||
}
|
||||
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (0),
|
||||
.TRIGGERW ($bits(`TRIGGERS)),
|
||||
.PROBEW ($bits(`PROBES))
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset_w[0]),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers(`TRIGGERS),
|
||||
.probes(`PROBES),
|
||||
.bus_in(scope_bus_in_w[0]),
|
||||
.bus_out(scope_bus_out_w[0])
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_afu ila_afu_inst (
|
||||
.clk (ap_clk),
|
||||
.probe0 ({
|
||||
ap_start,
|
||||
ap_done,
|
||||
ap_idle,
|
||||
interrupt
|
||||
}),
|
||||
.probe1 ({
|
||||
vx_pending_writes,
|
||||
vx_busy_wait,
|
||||
vx_busy,
|
||||
vx_running
|
||||
})
|
||||
);
|
||||
`endif
|
||||
`else
|
||||
`SCOPE_IO_UNUSED_W(0)
|
||||
`endif
|
||||
|
||||
`ifdef SIMULATION
|
||||
`ifndef VERILATOR
|
||||
// disable assertions until full reset
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] assert_delay_ctr;
|
||||
reg assert_enabled;
|
||||
initial begin
|
||||
$assertoff(0, vortex_axi);
|
||||
end
|
||||
always @(posedge ap_clk) begin
|
||||
if (reset) begin
|
||||
assert_delay_ctr <= '0;
|
||||
assert_enabled <= 0;
|
||||
end else begin
|
||||
if (~assert_enabled) begin
|
||||
if (assert_delay_ctr == (`RESET_DELAY-1)) begin
|
||||
assert_enabled <= 1;
|
||||
$asserton(0, vortex_axi); // enable assertions
|
||||
end else begin
|
||||
assert_delay_ctr <= assert_delay_ctr + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_AFU
|
||||
always @(posedge ap_clk) begin
|
||||
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
|
||||
end
|
||||
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%0h\n", $time, i, m_axi_mem_wdata_a[i]));
|
||||
end
|
||||
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
|
||||
`TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]));
|
||||
end
|
||||
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
|
||||
`TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
85
hw/rtl/afu/xrt/vortex_afu.v
Normal file
85
hw/rtl/afu/xrt/vortex_afu.v
Normal file
@@ -0,0 +1,85 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "vortex_afu.vh"
|
||||
|
||||
module vortex_afu #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
input wire ap_rst_n,
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
output wire s_axi_ctrl_awready,
|
||||
input wire [C_S_AXI_CTRL_ADDR_WIDTH-1:0] s_axi_ctrl_awaddr,
|
||||
input wire s_axi_ctrl_wvalid,
|
||||
output wire s_axi_ctrl_wready,
|
||||
input wire [C_S_AXI_CTRL_DATA_WIDTH-1:0] s_axi_ctrl_wdata,
|
||||
input wire [C_S_AXI_CTRL_DATA_WIDTH/8-1:0] s_axi_ctrl_wstrb,
|
||||
input wire s_axi_ctrl_arvalid,
|
||||
output wire s_axi_ctrl_arready,
|
||||
input wire [C_S_AXI_CTRL_ADDR_WIDTH-1:0] s_axi_ctrl_araddr,
|
||||
output wire s_axi_ctrl_rvalid,
|
||||
input wire s_axi_ctrl_rready,
|
||||
output wire [C_S_AXI_CTRL_DATA_WIDTH-1:0] s_axi_ctrl_rdata,
|
||||
output wire [1:0] s_axi_ctrl_rresp,
|
||||
output wire s_axi_ctrl_bvalid,
|
||||
input wire s_axi_ctrl_bready,
|
||||
output wire [1:0] s_axi_ctrl_bresp,
|
||||
|
||||
output wire interrupt
|
||||
);
|
||||
|
||||
VX_afu_wrap #(
|
||||
.C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
|
||||
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH)
|
||||
) afu_wrap (
|
||||
.ap_clk (ap_clk),
|
||||
.ap_rst_n (ap_rst_n),
|
||||
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
|
||||
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
||||
.s_axi_ctrl_awaddr (s_axi_ctrl_awaddr),
|
||||
.s_axi_ctrl_wvalid (s_axi_ctrl_wvalid),
|
||||
.s_axi_ctrl_wready (s_axi_ctrl_wready),
|
||||
.s_axi_ctrl_wdata (s_axi_ctrl_wdata),
|
||||
.s_axi_ctrl_wstrb (s_axi_ctrl_wstrb),
|
||||
.s_axi_ctrl_arvalid (s_axi_ctrl_arvalid),
|
||||
.s_axi_ctrl_arready (s_axi_ctrl_arready),
|
||||
.s_axi_ctrl_araddr (s_axi_ctrl_araddr),
|
||||
.s_axi_ctrl_rvalid (s_axi_ctrl_rvalid),
|
||||
.s_axi_ctrl_rready (s_axi_ctrl_rready),
|
||||
.s_axi_ctrl_rdata (s_axi_ctrl_rdata),
|
||||
.s_axi_ctrl_rresp (s_axi_ctrl_rresp),
|
||||
.s_axi_ctrl_bvalid (s_axi_ctrl_bvalid),
|
||||
.s_axi_ctrl_bready (s_axi_ctrl_bready),
|
||||
.s_axi_ctrl_bresp (s_axi_ctrl_bresp),
|
||||
|
||||
.interrupt (interrupt)
|
||||
);
|
||||
|
||||
endmodule
|
||||
108
hw/rtl/afu/xrt/vortex_afu.vh
Normal file
108
hw/rtl/afu/xrt/vortex_afu.vh
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VORTEX_AFU_VH
|
||||
`define VORTEX_AFU_VH
|
||||
|
||||
`ifndef M_AXI_MEM_NUM_BANKS
|
||||
`define M_AXI_MEM_NUM_BANKS 1
|
||||
`endif
|
||||
|
||||
`ifndef M_AXI_MEM_ID_WIDTH
|
||||
`define M_AXI_MEM_ID_WIDTH 32
|
||||
`endif
|
||||
|
||||
`define GEN_AXI_MEM(i) \
|
||||
output wire m_axi_mem_``i``_awvalid, \
|
||||
input wire m_axi_mem_``i``_awready, \
|
||||
output wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_``i``_awaddr, \
|
||||
output wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_``i``_awid, \
|
||||
output wire [7:0] m_axi_mem_``i``_awlen, \
|
||||
output wire m_axi_mem_``i``_wvalid, \
|
||||
input wire m_axi_mem_``i``_wready, \
|
||||
output wire [C_M_AXI_MEM_DATA_WIDTH-1:0] m_axi_mem_``i``_wdata, \
|
||||
output wire [C_M_AXI_MEM_DATA_WIDTH/8-1:0] m_axi_mem_``i``_wstrb, \
|
||||
output wire m_axi_mem_``i``_wlast, \
|
||||
output wire m_axi_mem_``i``_arvalid, \
|
||||
input wire m_axi_mem_``i``_arready, \
|
||||
output wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_``i``_araddr, \
|
||||
output wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_``i``_arid, \
|
||||
output wire [7:0] m_axi_mem_``i``_arlen, \
|
||||
input wire m_axi_mem_``i``_rvalid, \
|
||||
output wire m_axi_mem_``i``_rready, \
|
||||
input wire [C_M_AXI_MEM_DATA_WIDTH-1:0] m_axi_mem_``i``_rdata, \
|
||||
input wire m_axi_mem_``i``_rlast, \
|
||||
input wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_``i``_rid, \
|
||||
input wire [1:0] m_axi_mem_``i``_rresp, \
|
||||
input wire m_axi_mem_``i``_bvalid, \
|
||||
output wire m_axi_mem_``i``_bready, \
|
||||
input wire [1:0] m_axi_mem_``i``_bresp, \
|
||||
input wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_``i``_bid
|
||||
|
||||
`define AXI_MEM_ARGS(i) \
|
||||
.m_axi_mem_``i``_awvalid(m_axi_mem_``i``_awvalid), \
|
||||
.m_axi_mem_``i``_awready(m_axi_mem_``i``_awready), \
|
||||
.m_axi_mem_``i``_awaddr(m_axi_mem_``i``_awaddr), \
|
||||
.m_axi_mem_``i``_awid(m_axi_mem_``i``_awid), \
|
||||
.m_axi_mem_``i``_awlen(m_axi_mem_``i``_awlen), \
|
||||
.m_axi_mem_``i``_wvalid(m_axi_mem_``i``_wvalid), \
|
||||
.m_axi_mem_``i``_wready(m_axi_mem_``i``_wready), \
|
||||
.m_axi_mem_``i``_wdata(m_axi_mem_``i``_wdata), \
|
||||
.m_axi_mem_``i``_wstrb(m_axi_mem_``i``_wstrb), \
|
||||
.m_axi_mem_``i``_wlast(m_axi_mem_``i``_wlast), \
|
||||
.m_axi_mem_``i``_arvalid(m_axi_mem_``i``_arvalid), \
|
||||
.m_axi_mem_``i``_arready(m_axi_mem_``i``_arready), \
|
||||
.m_axi_mem_``i``_araddr(m_axi_mem_``i``_araddr), \
|
||||
.m_axi_mem_``i``_arid(m_axi_mem_``i``_arid), \
|
||||
.m_axi_mem_``i``_arlen(m_axi_mem_``i``_arlen), \
|
||||
.m_axi_mem_``i``_rvalid(m_axi_mem_``i``_rvalid), \
|
||||
.m_axi_mem_``i``_rready(m_axi_mem_``i``_rready), \
|
||||
.m_axi_mem_``i``_rdata(m_axi_mem_``i``_rdata), \
|
||||
.m_axi_mem_``i``_rlast(m_axi_mem_``i``_rlast), \
|
||||
.m_axi_mem_``i``_rid(m_axi_mem_``i``_rid), \
|
||||
.m_axi_mem_``i``_rresp(m_axi_mem_``i``_rresp), \
|
||||
.m_axi_mem_``i``_bvalid(m_axi_mem_``i``_bvalid), \
|
||||
.m_axi_mem_``i``_bready(m_axi_mem_``i``_bready), \
|
||||
.m_axi_mem_``i``_bresp(m_axi_mem_``i``_bresp), \
|
||||
.m_axi_mem_``i``_bid(m_axi_mem_``i``_bid)
|
||||
|
||||
`define AXI_MEM_TO_ARRAY(i) \
|
||||
assign m_axi_mem_``i``_awvalid = m_axi_mem_awvalid_a[i]; \
|
||||
assign m_axi_mem_awready_a[i] = m_axi_mem_``i``_awready; \
|
||||
assign m_axi_mem_``i``_awaddr = m_axi_mem_awaddr_a[i]; \
|
||||
assign m_axi_mem_``i``_awid = m_axi_mem_awid_a[i]; \
|
||||
assign m_axi_mem_``i``_awlen = m_axi_mem_awlen_a[i]; \
|
||||
assign m_axi_mem_``i``_wvalid = m_axi_mem_wvalid_a[i]; \
|
||||
assign m_axi_mem_wready_a[i] = m_axi_mem_``i``_wready; \
|
||||
assign m_axi_mem_``i``_wdata = m_axi_mem_wdata_a[i]; \
|
||||
assign m_axi_mem_``i``_wstrb = m_axi_mem_wstrb_a[i]; \
|
||||
assign m_axi_mem_``i``_wlast = m_axi_mem_wlast_a[i]; \
|
||||
assign m_axi_mem_``i``_arvalid = m_axi_mem_arvalid_a[i]; \
|
||||
assign m_axi_mem_arready_a[i] = m_axi_mem_``i``_arready; \
|
||||
assign m_axi_mem_``i``_araddr = m_axi_mem_araddr_a[i]; \
|
||||
assign m_axi_mem_``i``_arid = m_axi_mem_arid_a[i]; \
|
||||
assign m_axi_mem_``i``_arlen = m_axi_mem_arlen_a[i]; \
|
||||
assign m_axi_mem_rvalid_a[i] = m_axi_mem_``i``_rvalid; \
|
||||
assign m_axi_mem_``i``_rready = m_axi_mem_rready_a[i]; \
|
||||
assign m_axi_mem_rdata_a[i] = m_axi_mem_``i``_rdata; \
|
||||
assign m_axi_mem_rlast_a[i] = m_axi_mem_``i``_rlast; \
|
||||
assign m_axi_mem_rid_a[i] = m_axi_mem_``i``_rid; \
|
||||
assign m_axi_mem_rresp_a[i] = m_axi_mem_``i``_rresp; \
|
||||
assign m_axi_mem_bvalid_a[i] = m_axi_mem_``i``_bvalid; \
|
||||
assign m_axi_mem_``i``_bready = m_axi_mem_bready_a[i]; \
|
||||
assign m_axi_mem_bresp_a[i] = m_axi_mem_``i``_bresp; \
|
||||
assign m_axi_mem_bid_a[i] = m_axi_mem_``i``_bid
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`endif // VORTEX_AFU_VH
|
||||
511
hw/rtl/cache/VX_bank.sv
vendored
511
hw/rtl/cache/VX_bank.sv
vendored
@@ -1,511 +0,0 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_bank #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of bankS
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
|
||||
// Core Request Queue Size
|
||||
parameter CREQ_SIZE = 1,
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 1,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 1,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
|
||||
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
`SCOPE_IO_VX_bank
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire perf_read_misses,
|
||||
output wire perf_write_misses,
|
||||
output wire perf_mshr_stalls,
|
||||
`endif
|
||||
|
||||
// Core Request
|
||||
input wire core_req_valid,
|
||||
input wire [NUM_PORTS-1:0] core_req_pmask,
|
||||
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] core_req_wsel,
|
||||
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_req_tid,
|
||||
input wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
input wire core_req_rw,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr,
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
output wire core_rsp_valid,
|
||||
output wire [NUM_PORTS-1:0] core_rsp_pmask,
|
||||
output wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_rsp_tid,
|
||||
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire core_rsp_ready,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [NUM_PORTS-1:0] mem_req_pmask,
|
||||
output wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
|
||||
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// flush
|
||||
input wire flush_enable,
|
||||
input wire [`LINE_SELECT_BITS-1:0] flush_addr
|
||||
);
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [`DBG_CACHE_REQ_IDW-1:0] req_id_sel, req_id_st0, req_id_st1;
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
wire [NUM_PORTS-1:0] creq_pmask;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] creq_wsel;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] creq_tid;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] creq_tag;
|
||||
wire creq_rw;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
|
||||
|
||||
wire creq_valid, creq_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH)),
|
||||
.SIZE (CREQ_SIZE)
|
||||
) core_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (core_req_ready),
|
||||
.valid_in (core_req_valid),
|
||||
.data_in ({core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid, core_req_tag}),
|
||||
.data_out ({creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid, creq_tag}),
|
||||
.ready_out (creq_ready),
|
||||
.valid_out (creq_valid)
|
||||
);
|
||||
|
||||
wire mreq_alm_full;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
||||
wire crsq_valid, crsq_ready;
|
||||
wire crsq_stall;
|
||||
|
||||
wire mshr_valid;
|
||||
wire mshr_ready;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id;
|
||||
wire mshr_alm_full;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_dequeue_id;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] mshr_tag;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mshr_wsel;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] mshr_tid;
|
||||
wire [NUM_PORTS-1:0] mshr_pmask;
|
||||
|
||||
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
|
||||
wire is_read_st0, is_read_st1;
|
||||
wire is_write_st0, is_write_st1;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel_st0, wsel_st1;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
|
||||
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
|
||||
wire valid_st0, valid_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_mshr_st0, is_mshr_st1;
|
||||
wire miss_st0, miss_st1;
|
||||
wire is_flush_st0;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
|
||||
// prevent read-during-write hazard when accessing tags/data block RAMs
|
||||
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
|
||||
wire rdw_write_hazard = valid_st0 && is_write_st0 && ~creq_rw;
|
||||
|
||||
// determine which queue to pop next in priority order
|
||||
wire mshr_grant = !flush_enable;
|
||||
wire mshr_enable = mshr_grant && mshr_valid;
|
||||
|
||||
wire mrsq_grant = !flush_enable && !mshr_enable;
|
||||
wire mrsq_enable = mrsq_grant && mem_rsp_valid;
|
||||
wire creq_grant = !flush_enable && !mshr_enable && !mrsq_enable;
|
||||
|
||||
wire creq_enable = creq_grant && creq_valid;
|
||||
|
||||
assign mshr_ready = mshr_grant
|
||||
&& !rdw_fill_hazard // prevent read-during-write hazard
|
||||
&& !crsq_stall; // ensure core_rsp_queue not full
|
||||
|
||||
|
||||
assign mem_rsp_ready = mrsq_grant
|
||||
&& !crsq_stall; // ensure core_rsp_queue not full
|
||||
|
||||
assign creq_ready = creq_grant
|
||||
&& !rdw_write_hazard // prevent read-during-write hazard
|
||||
&& !mreq_alm_full // ensure mem_req_queue not full
|
||||
&& !mshr_alm_full // ensure mshr not full
|
||||
&& !crsq_stall; // ensure core_rsp_queue not full
|
||||
|
||||
wire flush_fire = flush_enable;
|
||||
wire mshr_fire = mshr_valid && mshr_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire creq_fire = creq_valid && creq_ready;
|
||||
|
||||
assign req_id_sel = mshr_enable ? mshr_tag[0][`CACHE_REQ_ID_RNG] : creq_tag[0][`CACHE_REQ_ID_RNG];
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_sel;
|
||||
assign wdata_sel[(NUM_PORTS * `WORD_WIDTH)-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[(NUM_PORTS * `WORD_WIDTH)-1:0] : creq_data;
|
||||
for (genvar i = NUM_PORTS * `WORD_WIDTH; i < `CACHE_LINE_WIDTH; ++i) begin
|
||||
assign wdata_sel[i] = mem_rsp_data[i];
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!crsq_stall),
|
||||
.data_in ({
|
||||
flush_fire || mshr_fire || mem_rsp_fire || creq_fire,
|
||||
flush_enable,
|
||||
mshr_enable,
|
||||
mrsq_enable,
|
||||
creq_enable && ~creq_rw,
|
||||
creq_enable && creq_rw,
|
||||
flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : (mshr_valid ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : creq_addr)),
|
||||
wdata_sel,
|
||||
mshr_valid ? mshr_wsel : creq_wsel,
|
||||
creq_byteen,
|
||||
mshr_valid ? mshr_tid : creq_tid,
|
||||
mshr_valid ? mshr_pmask : creq_pmask,
|
||||
mshr_valid ? mshr_tag : creq_tag,
|
||||
mshr_valid ? mshr_dequeue_id : mem_rsp_id
|
||||
}),
|
||||
.data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
|
||||
);
|
||||
|
||||
assign req_id_st0 = tag_st0[0][`CACHE_REQ_ID_RNG];
|
||||
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_flush_st0);
|
||||
|
||||
wire tag_match_st0;
|
||||
|
||||
VX_tag_access #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
|
||||
) tag_access (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.req_id (req_id_st0),
|
||||
|
||||
.stall (crsq_stall),
|
||||
|
||||
// read/Fill
|
||||
.lookup (do_lookup_st0),
|
||||
.addr (addr_st0),
|
||||
.fill (do_fill_st0),
|
||||
.flush (do_flush_st0),
|
||||
.tag_match (tag_match_st0)
|
||||
);
|
||||
|
||||
// we have a core request hit
|
||||
assign miss_st0 = (is_read_st0 || is_write_st0) && ~tag_match_st0;
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_a_st0 = (is_read_st0 || is_write_st0) ? mshr_alloc_id : mshr_id_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!crsq_stall),
|
||||
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, miss_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_a_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, miss_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
assign req_id_st1 = tag_st1[0][`CACHE_REQ_ID_RNG];
|
||||
|
||||
wire do_read_st0 = valid_st0 && is_read_st0;
|
||||
wire do_read_st1 = valid_st1 && is_read_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_write_st1 = valid_st1 && is_write_st1;
|
||||
wire do_mshr_st1 = valid_st1 && is_mshr_st1;
|
||||
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data_st1 = wdata_st1[0 +: NUM_PORTS * `WORD_WIDTH];
|
||||
`UNUSED_VAR (wdata_st1)
|
||||
|
||||
VX_data_access #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE(CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_PORTS (NUM_PORTS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE)
|
||||
) data_access (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.req_id (req_id_st1),
|
||||
|
||||
.stall (crsq_stall),
|
||||
|
||||
.read (do_read_st1 || do_mshr_st1),
|
||||
.fill (do_fill_st1),
|
||||
.write (do_write_st1 && !miss_st1),
|
||||
.addr (addr_st1),
|
||||
.wsel (wsel_st1),
|
||||
.pmask (pmask_st1),
|
||||
.byteen (byteen_st1),
|
||||
.fill_data (wdata_st1),
|
||||
.write_data (creq_data_st1),
|
||||
.read_data (rdata_st1)
|
||||
);
|
||||
|
||||
wire mshr_allocate = do_read_st0 && !crsq_stall;
|
||||
wire mshr_replay = do_fill_st0 && !crsq_stall;
|
||||
wire mshr_lookup = mshr_allocate;
|
||||
wire mshr_release = do_read_st1 && !miss_st1 && !crsq_stall;
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
) mshr_pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (creq_fire && ~creq_rw),
|
||||
.decr (mshr_fire || mshr_release),
|
||||
.full (mshr_alm_full),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
VX_miss_resrv #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_PORTS (NUM_PORTS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
|
||||
) miss_resrv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.deq_req_id (req_id_sel),
|
||||
.lkp_req_id (req_id_st0),
|
||||
.rel_req_id (req_id_st1),
|
||||
|
||||
// allocate
|
||||
.allocate_valid (mshr_allocate),
|
||||
.allocate_addr (addr_st0),
|
||||
.allocate_data ({wsel_st0, tag_st0, req_tid_st0, pmask_st0}),
|
||||
.allocate_id (mshr_alloc_id),
|
||||
`UNUSED_PIN (allocate_ready),
|
||||
|
||||
// lookup
|
||||
.lookup_valid (mshr_lookup),
|
||||
.lookup_replay (mshr_replay),
|
||||
.lookup_id (mshr_alloc_id),
|
||||
.lookup_addr (addr_st0),
|
||||
.lookup_match (mshr_pending_st0),
|
||||
|
||||
// fill
|
||||
.fill_valid (mem_rsp_fire),
|
||||
.fill_id (mem_rsp_id),
|
||||
.fill_addr (mem_rsp_addr),
|
||||
|
||||
// dequeue
|
||||
.dequeue_valid (mshr_valid),
|
||||
.dequeue_id (mshr_dequeue_id),
|
||||
.dequeue_addr (mshr_addr),
|
||||
.dequeue_data ({mshr_wsel, mshr_tag, mshr_tid, mshr_pmask}),
|
||||
.dequeue_ready (mshr_ready),
|
||||
|
||||
// release
|
||||
.release_valid (mshr_release),
|
||||
.release_id (mshr_id_st1)
|
||||
);
|
||||
|
||||
// Enqueue core response
|
||||
|
||||
wire [NUM_PORTS-1:0] crsq_pmask;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] crsq_tag;
|
||||
|
||||
assign crsq_valid = (do_read_st1 && !miss_st1)
|
||||
|| do_mshr_st1;
|
||||
|
||||
assign crsq_stall = crsq_valid && !crsq_ready;
|
||||
|
||||
assign crsq_pmask = pmask_st1;
|
||||
assign crsq_tid = req_tid_st1;
|
||||
assign crsq_data = rdata_st1;
|
||||
assign crsq_tag = tag_st1;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.OUT_REG (1)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (crsq_valid),
|
||||
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
|
||||
.ready_in (crsq_ready),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
// Enqueue memory request
|
||||
|
||||
wire mreq_push, mreq_pop, mreq_empty;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mreq_wsel;
|
||||
wire [NUM_PORTS-1:0] mreq_pmask;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
|
||||
wire mreq_rw;
|
||||
|
||||
assign mreq_push = (do_read_st1 && miss_st1 && !mshr_pending_st1)
|
||||
|| do_write_st1;
|
||||
|
||||
assign mreq_pop = mem_req_valid && mem_req_ready;
|
||||
|
||||
assign mreq_rw = WRITE_ENABLE && is_write_st1;
|
||||
assign mreq_addr = addr_st1;
|
||||
assign mreq_id = mshr_id_st1;
|
||||
assign mreq_pmask= pmask_st1;
|
||||
assign mreq_wsel = wsel_st1;
|
||||
assign mreq_byteen = byteen_st1;
|
||||
assign mreq_data = creq_data_st1;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
|
||||
.SIZE (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-2),
|
||||
.OUT_REG (1 == NUM_BANKS)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (mreq_push),
|
||||
.pop (mreq_pop),
|
||||
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_pmask, mreq_byteen, mreq_wsel, mreq_data}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_pmask, mem_req_byteen, mem_req_wsel, mem_req_data}),
|
||||
.empty (mreq_empty),
|
||||
.alm_full (mreq_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign mem_req_valid = !mreq_empty;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`SCOPE_ASSIGN (valid_st0, valid_st0);
|
||||
`SCOPE_ASSIGN (valid_st1, valid_st1);
|
||||
`SCOPE_ASSIGN (is_fill_st0, is_fill_st0);
|
||||
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
|
||||
`SCOPE_ASSIGN (miss_st0, miss_st0);
|
||||
`SCOPE_ASSIGN (crsq_stall, crsq_stall);
|
||||
`SCOPE_ASSIGN (mreq_alm_full, mreq_alm_full);
|
||||
`SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full);
|
||||
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_read_misses = do_read_st1 && miss_st1;
|
||||
assign perf_write_misses = do_write_st1 && miss_st1;
|
||||
assign perf_mshr_stalls = mshr_alm_full;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_BANK
|
||||
wire crsq_fire = crsq_valid && crsq_ready;
|
||||
wire pipeline_stall = (mshr_valid || mem_rsp_valid || creq_valid)
|
||||
&& ~(mshr_fire || mem_rsp_fire || creq_fire);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (pipeline_stall) begin
|
||||
dpi_trace("%d: *** cache%0d:%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, CACHE_ID, BANK_ID, crsq_stall, mreq_alm_full, mshr_alm_full);
|
||||
end
|
||||
if (flush_enable) begin
|
||||
dpi_trace("%d: cache%0d:%0d flush: addr=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
dpi_trace("%d: cache%0d:%0d fill-rsp: addr=%0h, id=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data);
|
||||
end
|
||||
if (mshr_fire) begin
|
||||
dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, req_id_sel);
|
||||
end
|
||||
if (creq_fire) begin
|
||||
if (creq_rw)
|
||||
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, req_id_sel);
|
||||
else
|
||||
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, req_id_sel);
|
||||
end
|
||||
if (crsq_fire) begin
|
||||
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, req_id_st1);
|
||||
end
|
||||
if (mreq_push) begin
|
||||
if (is_write_st1)
|
||||
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, req_id_st1);
|
||||
else
|
||||
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, req_id_st1);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
1047
hw/rtl/cache/VX_cache.sv
vendored
1047
hw/rtl/cache/VX_cache.sv
vendored
File diff suppressed because it is too large
Load Diff
549
hw/rtl/cache/VX_cache_bank.sv
vendored
Normal file
549
hw/rtl/cache/VX_cache_bank.sv
vendored
Normal file
@@ -0,0 +1,549 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_bank #(
|
||||
parameter `STRING INSTANCE_ID= "",
|
||||
parameter BANK_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 1,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 1,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 0,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE),
|
||||
parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS),
|
||||
parameter WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire perf_read_misses,
|
||||
output wire perf_write_misses,
|
||||
output wire perf_mshr_stalls,
|
||||
`endif
|
||||
|
||||
// Core Request
|
||||
input wire core_req_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire core_req_rw,
|
||||
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel,
|
||||
input wire [WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx,
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
output wire core_rsp_valid,
|
||||
output wire [`CS_WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [TAG_WIDTH-1:0] core_rsp_tag,
|
||||
output wire [REQ_SEL_WIDTH-1:0] core_rsp_idx,
|
||||
input wire core_rsp_ready,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire mem_req_rw,
|
||||
output wire [WORD_SEL_WIDTH-1:0] mem_req_wsel,
|
||||
output wire [WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_WORD_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// initialization
|
||||
input wire init_enable,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] init_line_sel
|
||||
);
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [`UP(UUID_WIDTH)-1:0] req_uuid_sel, req_uuid_st0, req_uuid_st1;
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
wire crsq_stall;
|
||||
wire mshr_alm_full;
|
||||
wire mreq_alm_full;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
||||
|
||||
wire replay_valid;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] replay_addr;
|
||||
wire replay_rw;
|
||||
wire [WORD_SEL_WIDTH-1:0] replay_wsel;
|
||||
wire [WORD_SIZE-1:0] replay_byteen;
|
||||
wire [`CS_WORD_WIDTH-1:0] replay_data;
|
||||
wire [TAG_WIDTH-1:0] replay_tag;
|
||||
wire [REQ_SEL_WIDTH-1:0] replay_idx;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
|
||||
wire replay_ready;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
|
||||
wire valid_sel, valid_st0, valid_st1;
|
||||
wire is_init_st0;
|
||||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_replay_st0, is_replay_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_tail_st0, mshr_tail_st1;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
|
||||
wire rdw_hazard_st0;
|
||||
reg rdw_hazard_st1;
|
||||
|
||||
wire pipe_stall = crsq_stall || rdw_hazard_st1;
|
||||
|
||||
// inputs arbitration:
|
||||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
||||
wire replay_grant = ~init_enable;
|
||||
wire replay_enable = replay_grant && replay_valid;
|
||||
|
||||
wire fill_grant = ~init_enable && ~replay_enable;
|
||||
wire fill_enable = fill_grant && mem_rsp_valid;
|
||||
|
||||
wire creq_grant = ~init_enable && ~replay_enable && ~fill_enable;
|
||||
wire creq_enable = creq_grant && core_req_valid;
|
||||
|
||||
assign replay_ready = replay_grant
|
||||
&& ~rdw_hazard_st0
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign mem_rsp_ready = fill_grant
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = init_enable;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
wire [TAG_WIDTH-1:0] mshr_creq_tag = replay_enable ? replay_tag : core_req_tag;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_sel = mshr_creq_tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_sel = 0;
|
||||
end
|
||||
|
||||
`UNUSED_VAR (mshr_creq_tag)
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || core_req_fire;
|
||||
|
||||
assign addr_sel = init_enable ? `CS_LINE_ADDR_WIDTH'(init_line_sel) :
|
||||
(replay_valid ? replay_addr :
|
||||
(mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : (replay_valid ? replay_data : core_req_data);
|
||||
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
|
||||
assign data_sel[i] = mem_rsp_data[i];
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({
|
||||
valid_sel,
|
||||
init_enable,
|
||||
replay_enable,
|
||||
fill_enable,
|
||||
creq_enable,
|
||||
addr_sel,
|
||||
data_sel,
|
||||
replay_valid ? replay_rw : core_req_rw,
|
||||
replay_valid ? replay_byteen : core_req_byteen,
|
||||
replay_valid ? replay_wsel : core_req_wsel,
|
||||
replay_valid ? replay_idx : core_req_idx,
|
||||
replay_valid ? replay_tag : core_req_tag,
|
||||
replay_id
|
||||
}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_creq_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_st0 = tag_st0[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_st0 = 0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
|
||||
`RESET_RELAY (tag_reset, reset);
|
||||
|
||||
VX_cache_tags #(
|
||||
.INSTANCE_ID(INSTANCE_ID),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_tags (
|
||||
.clk (clk),
|
||||
.reset (tag_reset),
|
||||
|
||||
.req_uuid (req_uuid_st0),
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
// read/Fill
|
||||
.lookup (do_lookup_st0),
|
||||
.line_addr (addr_st0),
|
||||
.fill (do_fill_st0),
|
||||
.init (do_init_st0),
|
||||
.way_sel (way_sel_st0),
|
||||
.tag_matches(tag_matches_st0)
|
||||
);
|
||||
|
||||
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_replay_st0, is_fill_st0, is_creq_st0, rw_st0, addr_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_tail_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_replay_st1, is_fill_st1, is_creq_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_tail_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag hit
|
||||
wire is_hit_st1 = (| tag_matches_st1);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_st1 = 0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st1 = valid_st1 && is_creq_st1 && ~rw_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_creq_st1 && rw_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
|
||||
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
|
||||
|
||||
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
|
||||
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
|
||||
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
|
||||
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
|
||||
|
||||
// detect BRAM's read-during-write hazard
|
||||
assign rdw_hazard_st0 = do_fill_st0; // after a fill
|
||||
always @(posedge clk) begin
|
||||
rdw_hazard_st1 <= (do_creq_rd_st0 && do_write_hit_st1 && (addr_st0 == addr_st1))
|
||||
&& ~rdw_hazard_st1; // after a write to same address
|
||||
end
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
|
||||
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
|
||||
|
||||
`RESET_RELAY (data_reset, reset);
|
||||
|
||||
VX_cache_data #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_data (
|
||||
.clk (clk),
|
||||
.reset (data_reset),
|
||||
|
||||
.req_uuid (req_uuid_st1),
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
.read (do_read_hit_st1 || do_replay_rd_st1),
|
||||
.fill (do_fill_st1),
|
||||
.write (do_write_hit_st1 || do_replay_wr_st1),
|
||||
.way_sel (way_sel_st1 | tag_matches_st1),
|
||||
.line_addr (addr_st1),
|
||||
.wsel (wsel_st1),
|
||||
.byteen (byteen_st1),
|
||||
.fill_data (fill_data_st1),
|
||||
.write_data (write_data_st1),
|
||||
.read_data (read_data_st1)
|
||||
);
|
||||
|
||||
wire [MSHR_SIZE-1:0] mshr_matches_st0;
|
||||
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
|
||||
wire mshr_lookup_st0 = mshr_allocate_st0;
|
||||
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
|
||||
wire mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
) mshr_pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (core_req_fire),
|
||||
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
|
||||
.full (mshr_alm_full),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
`RESET_RELAY (mshr_reset, reset);
|
||||
|
||||
VX_cache_mshr #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.BANK_ID (BANK_ID),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
|
||||
) cache_mshr (
|
||||
.clk (clk),
|
||||
.reset (mshr_reset),
|
||||
|
||||
.deq_req_uuid (req_uuid_sel),
|
||||
.lkp_req_uuid (req_uuid_st0),
|
||||
.fin_req_uuid (req_uuid_st1),
|
||||
|
||||
// memory fill
|
||||
.fill_valid (mem_rsp_fire),
|
||||
.fill_id (mem_rsp_id),
|
||||
.fill_addr (mem_rsp_addr),
|
||||
|
||||
// dequeue
|
||||
.dequeue_valid (replay_valid),
|
||||
.dequeue_addr (replay_addr),
|
||||
.dequeue_rw (replay_rw),
|
||||
.dequeue_data ({replay_wsel, replay_byteen, replay_data, replay_tag, replay_idx}),
|
||||
.dequeue_id (replay_id),
|
||||
.dequeue_ready (replay_ready),
|
||||
|
||||
// allocate
|
||||
.allocate_valid (mshr_allocate_st0),
|
||||
.allocate_addr (addr_st0),
|
||||
.allocate_rw (rw_st0),
|
||||
.allocate_data ({wsel_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
|
||||
.allocate_id (mshr_alloc_id_st0),
|
||||
.allocate_tail (mshr_tail_st0),
|
||||
`UNUSED_PIN (allocate_ready),
|
||||
|
||||
// lookup
|
||||
.lookup_valid (mshr_lookup_st0),
|
||||
.lookup_addr (addr_st0),
|
||||
.lookup_matches (mshr_matches_st0),
|
||||
|
||||
// finalize
|
||||
.finalize_valid (mshr_finalize_st1),
|
||||
.finalize_release(mshr_release_st1),
|
||||
.finalize_pending(mshr_pending_st1),
|
||||
.finalize_id (mshr_id_st1),
|
||||
.finalize_tail (mshr_tail_st1)
|
||||
);
|
||||
|
||||
// ignore allocated id from mshr matches
|
||||
wire [MSHR_SIZE-1:0] lookup_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign lookup_matches[i] = (i != mshr_alloc_id_st0) && mshr_matches_st0[i];
|
||||
end
|
||||
assign mshr_pending_st0 = (| lookup_matches);
|
||||
|
||||
// schedule core response
|
||||
|
||||
wire crsq_valid, crsq_ready;
|
||||
wire [`CS_WORD_WIDTH-1:0] crsq_data;
|
||||
wire [REQ_SEL_WIDTH-1:0] crsq_idx;
|
||||
wire [TAG_WIDTH-1:0] crsq_tag;
|
||||
|
||||
assign crsq_valid = do_read_hit_st1 || do_replay_rd_st1;
|
||||
assign crsq_idx = req_idx_st1;
|
||||
assign crsq_data = read_data_st1;
|
||||
assign crsq_tag = tag_st1;
|
||||
|
||||
`RESET_RELAY (crsp_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.OUT_REG (CORE_OUT_REG)
|
||||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (crsp_reset),
|
||||
.valid_in (crsq_valid && ~rdw_hazard_st1),
|
||||
.ready_in (crsq_ready),
|
||||
.data_in ({crsq_tag, crsq_data, crsq_idx}),
|
||||
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
|
||||
.valid_out (core_rsp_valid),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
assign crsq_stall = crsq_valid && ~crsq_ready;
|
||||
|
||||
// schedule memory request
|
||||
|
||||
wire mreq_push, mreq_pop, mreq_empty;
|
||||
wire [`CS_WORD_WIDTH-1:0] mreq_data;
|
||||
wire [WORD_SIZE-1:0] mreq_byteen;
|
||||
wire [WORD_SEL_WIDTH-1:0] mreq_wsel;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
|
||||
wire mreq_rw;
|
||||
|
||||
assign mreq_push = (do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1;
|
||||
|
||||
assign mreq_pop = mem_req_valid && mem_req_ready;
|
||||
|
||||
assign mreq_rw = WRITE_ENABLE && rw_st1;
|
||||
assign mreq_addr = addr_st1;
|
||||
assign mreq_id = mshr_id_st1;
|
||||
assign mreq_wsel = wsel_st1;
|
||||
assign mreq_byteen = byteen_st1;
|
||||
assign mreq_data = write_data_st1;
|
||||
|
||||
`RESET_RELAY (mreq_reset, reset);
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH),
|
||||
.DEPTH (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-2),
|
||||
.OUT_REG (MEM_OUT_REG)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (mreq_reset),
|
||||
.push (mreq_push),
|
||||
.pop (mreq_pop),
|
||||
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_byteen, mreq_wsel, mreq_data}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}),
|
||||
.empty (mreq_empty),
|
||||
.alm_full (mreq_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign mem_req_valid = ~mreq_empty;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_read_misses = do_read_miss_st1;
|
||||
assign perf_write_misses = do_write_miss_st1;
|
||||
assign perf_mshr_stalls = mshr_alm_full;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_BANK
|
||||
wire crsq_fire = crsq_valid && crsq_ready;
|
||||
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire);
|
||||
always @(posedge clk) begin
|
||||
if (pipeline_stall) begin
|
||||
`TRACE(3, ("%d: *** %s-bank%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsq_stall, mreq_alm_full, mshr_alm_full));
|
||||
end
|
||||
if (init_enable) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d init: addr=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(init_line_sel, BANK_ID)));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
end
|
||||
if (replay_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
|
||||
end
|
||||
if (core_req_fire) begin
|
||||
if (core_req_rw)
|
||||
`TRACE(2, ("%d: %s-bank%0d core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
|
||||
else
|
||||
`TRACE(2, ("%d: %s-bank%0d core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
|
||||
end
|
||||
if (crsq_fire) begin
|
||||
`TRACE(2, ("%d: %s-bank%0d core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_idx, crsq_data, req_uuid_st1));
|
||||
end
|
||||
if (mreq_push) begin
|
||||
if (do_creq_wr_st1)
|
||||
`TRACE(2, ("%d: %s-bank%0d writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_addr, BANK_ID), mreq_byteen, mreq_data, req_uuid_st1));
|
||||
else
|
||||
`TRACE(2, ("%d: %s-bank%0d fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_addr, BANK_ID), mreq_id, req_uuid_st1));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
348
hw/rtl/cache/VX_cache_bypass.sv
vendored
Normal file
348
hw/rtl/cache/VX_cache_bypass.sv
vendored
Normal file
@@ -0,0 +1,348 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_cache_bypass #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter NC_TAG_BIT = 0,
|
||||
|
||||
parameter NC_ENABLE = 0,
|
||||
parameter PASSTHRU = 0,
|
||||
|
||||
parameter CORE_ADDR_WIDTH = 1,
|
||||
parameter CORE_DATA_SIZE = 1,
|
||||
parameter CORE_TAG_IN_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
parameter MEM_DATA_SIZE = 1,
|
||||
parameter MEM_TAG_IN_WIDTH = 1,
|
||||
parameter MEM_TAG_OUT_WIDTH = 1,
|
||||
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
||||
parameter CORE_TAG_OUT_WIDTH= CORE_TAG_IN_WIDTH - NC_ENABLE
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Core request in
|
||||
input wire [NUM_REQS-1:0] core_req_valid_in,
|
||||
input wire [NUM_REQS-1:0] core_req_rw_in,
|
||||
input wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_in,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_req_tag_in,
|
||||
output wire [NUM_REQS-1:0] core_req_ready_in,
|
||||
|
||||
// Core request out
|
||||
output wire [NUM_REQS-1:0] core_req_valid_out,
|
||||
output wire [NUM_REQS-1:0] core_req_rw_out,
|
||||
output wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_out,
|
||||
output wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_req_tag_out,
|
||||
input wire [NUM_REQS-1:0] core_req_ready_out,
|
||||
|
||||
// Core response in
|
||||
input wire [NUM_REQS-1:0] core_rsp_valid_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_rsp_tag_in,
|
||||
output wire [NUM_REQS-1:0] core_rsp_ready_in,
|
||||
|
||||
// Core response out
|
||||
output wire [NUM_REQS-1:0] core_rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out,
|
||||
output wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out,
|
||||
input wire [NUM_REQS-1:0] core_rsp_ready_out,
|
||||
|
||||
// Memory request in
|
||||
input wire mem_req_valid_in,
|
||||
input wire mem_req_rw_in,
|
||||
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
|
||||
input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire mem_req_ready_in,
|
||||
|
||||
// Memory request out
|
||||
output wire mem_req_valid_out,
|
||||
output wire mem_req_rw_out,
|
||||
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
|
||||
output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
// Memory response in
|
||||
input wire mem_rsp_valid_in,
|
||||
input wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_in,
|
||||
input wire [MEM_TAG_OUT_WIDTH-1:0] mem_rsp_tag_in,
|
||||
output wire mem_rsp_ready_in,
|
||||
|
||||
// Memory response out
|
||||
output wire mem_rsp_valid_out,
|
||||
output wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_out,
|
||||
output wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_out,
|
||||
input wire mem_rsp_ready_out
|
||||
);
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
|
||||
localparam MUX_DATAW = CORE_TAG_IN_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1;
|
||||
|
||||
localparam WORDS_PER_LINE = MEM_DATA_SIZE / CORE_DATA_SIZE;
|
||||
localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
|
||||
localparam CORE_TAG_ID_BITS = CORE_TAG_IN_WIDTH - UUID_WIDTH;
|
||||
localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS;
|
||||
|
||||
localparam MEM_TAG_OUT_NC_WIDTH = MEM_TAG_OUT_WIDTH - 1 + NC_ENABLE;
|
||||
|
||||
// core request handling
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
||||
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire core_req_nc_valid;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU != 0) begin
|
||||
assign core_req_nc_idxs[i] = 1'b1;
|
||||
end else begin
|
||||
assign core_req_nc_idxs[i] = core_req_tag_in[i][NC_TAG_BIT];
|
||||
end
|
||||
end
|
||||
|
||||
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs;
|
||||
|
||||
wire core_req_in_fire = | (core_req_valid_in & core_req_ready_in);
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TYPE (PASSTHRU ? "R" : "P"),
|
||||
.LOCK_ENABLE (1)
|
||||
) req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (core_req_in_fire),
|
||||
.requests (core_req_valid_in_nc),
|
||||
.grant_index (core_req_nc_idx),
|
||||
.grant_onehot (core_req_nc_sel),
|
||||
.grant_valid (core_req_nc_valid)
|
||||
);
|
||||
|
||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs;
|
||||
assign core_req_rw_out = core_req_rw_in;
|
||||
assign core_req_addr_out = core_req_addr_in;
|
||||
assign core_req_byteen_out = core_req_byteen_in;
|
||||
assign core_req_data_out = core_req_data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_bits_remove #(
|
||||
.N (CORE_TAG_IN_WIDTH),
|
||||
.S (NC_ENABLE),
|
||||
.POS (NC_TAG_BIT)
|
||||
) core_req_tag_nc_remove (
|
||||
.data_in (core_req_tag_in[i]),
|
||||
.data_out (core_req_tag_out[i])
|
||||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i])
|
||||
: core_req_ready_out[i];
|
||||
end
|
||||
|
||||
// memory request handling
|
||||
|
||||
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
|
||||
assign mem_req_ready_in = mem_req_ready_out;
|
||||
|
||||
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
|
||||
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
|
||||
wire core_req_rw_in_sel;
|
||||
|
||||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
|
||||
end
|
||||
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_idx];
|
||||
|
||||
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_tag_in_sel[CORE_TAG_ID_BITS-1:0];
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[WSEL_BITS +: MEM_ADDR_WIDTH];
|
||||
|
||||
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
||||
|
||||
wire [WSEL_BITS-1:0] req_wsel = core_req_addr_in_sel[WSEL_BITS-1:0];
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = '0;
|
||||
mem_req_byteen_in_r[req_wsel] = core_req_byteen_in_sel;
|
||||
|
||||
mem_req_data_in_r = 'x;
|
||||
mem_req_data_in_r[req_wsel] = core_req_data_in_sel;
|
||||
end
|
||||
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
|
||||
end else begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
|
||||
end
|
||||
end else begin
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id});
|
||||
end else begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id});
|
||||
end
|
||||
end
|
||||
|
||||
wire [MEM_TAG_OUT_NC_WIDTH-1:0] mem_req_tag_bypass;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign mem_req_tag_bypass = {core_req_tag_in_sel[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
|
||||
end else begin
|
||||
assign mem_req_tag_bypass = mem_req_tag_id_bypass;
|
||||
end
|
||||
|
||||
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_bypass_nc;
|
||||
wire [(MEM_TAG_IN_WIDTH + 1)-1:0] mem_req_tag_in_nc;
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_OUT_NC_WIDTH),
|
||||
.S (NC_ENABLE ? 0 : 1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_req_tag_bypass_nc_insert (
|
||||
.data_in (mem_req_tag_bypass),
|
||||
.sel_in (1'b0),
|
||||
.data_out (mem_req_tag_bypass_nc)
|
||||
);
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_IN_WIDTH),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_req_tag_in_nc_insert (
|
||||
.data_in (mem_req_tag_in),
|
||||
.sel_in (1'b0),
|
||||
.data_out (mem_req_tag_in_nc)
|
||||
);
|
||||
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : mem_req_tag_bypass_nc;
|
||||
|
||||
// core response handling
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_in_nc;
|
||||
|
||||
wire is_mem_rsp_nc;
|
||||
if (PASSTHRU != 0) begin
|
||||
assign is_mem_rsp_nc = mem_rsp_valid_in;
|
||||
end else begin
|
||||
assign is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_bits_insert #(
|
||||
.N (CORE_TAG_OUT_WIDTH),
|
||||
.S (NC_ENABLE),
|
||||
.POS (NC_TAG_BIT)
|
||||
) core_rsp_tag_in_nc_insert (
|
||||
.data_in (core_rsp_tag_in[i]),
|
||||
.sel_in ('0),
|
||||
.data_out (core_rsp_tag_in_nc[i])
|
||||
);
|
||||
end
|
||||
|
||||
wire [MEM_TAG_OUT_NC_WIDTH-1:0] mem_rsp_tag_in_nc;
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_OUT_WIDTH),
|
||||
.S (NC_ENABLE ? 0 : 1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_rsp_tag_in_nc_remove (
|
||||
.data_in (mem_rsp_tag_in),
|
||||
.data_out (mem_rsp_tag_in_nc)
|
||||
);
|
||||
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign rsp_idx = mem_rsp_tag_in_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
|
||||
end else begin
|
||||
assign rsp_idx = 1'b0;
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0] rsp_nc_valid_r;
|
||||
always @(*) begin
|
||||
rsp_nc_valid_r = '0;
|
||||
rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc;
|
||||
end
|
||||
|
||||
assign core_rsp_valid_out = core_rsp_valid_in | rsp_nc_valid_r;
|
||||
assign core_rsp_ready_in = core_rsp_ready_out;
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_in_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ?
|
||||
core_rsp_data_in[i] : mem_rsp_data_in[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in_nc[i] : {mem_rsp_tag_in_nc[MEM_TAG_OUT_NC_WIDTH-1 -: UUID_WIDTH], mem_rsp_tag_in_nc[CORE_TAG_ID_BITS-1:0]};
|
||||
end else begin
|
||||
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in_nc[i] : mem_rsp_tag_in_nc[CORE_TAG_ID_BITS-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
// memory response handling
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
assign mem_rsp_valid_out = 1'b0;
|
||||
end else begin
|
||||
assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT];
|
||||
end
|
||||
|
||||
assign mem_rsp_data_out = mem_rsp_data_in;
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_IN_WIDTH + 1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_rsp_tag_out_remove (
|
||||
.data_in (mem_rsp_tag_in[(MEM_TAG_IN_WIDTH + 1)-1:0]),
|
||||
.data_out (mem_rsp_tag_out)
|
||||
);
|
||||
|
||||
assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in[rsp_idx] && core_rsp_ready_out[rsp_idx]) : mem_rsp_ready_out;
|
||||
|
||||
endmodule
|
||||
368
hw/rtl/cache/VX_cache_cluster.sv
vendored
Normal file
368
hw/rtl/cache/VX_cache_cluster.sv
vendored
Normal file
@@ -0,0 +1,368 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_cluster #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
parameter NUM_UNITS = 1,
|
||||
parameter NUM_INPUTS = 1,
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
// Number of requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_cache_perf_if.master cache_perf_if,
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if.slave core_bus_if [NUM_INPUTS * NUM_REQS],
|
||||
VX_mem_bus_if.master mem_bus_if
|
||||
);
|
||||
localparam NUM_CACHES = `UP(NUM_UNITS);
|
||||
localparam PASSTHRU = (NUM_UNITS == 0);
|
||||
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
|
||||
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_cache_perf_if perf_cache_unit_if[NUM_CACHES]();
|
||||
`PERF_CACHE_ADD (cache_perf_if, perf_cache_unit_if, NUM_CACHES);
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH)
|
||||
) cache_mem_bus_if[NUM_CACHES]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus_tmp_if[NUM_INPUTS]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH)
|
||||
) arb_core_bus_tmp_if[NUM_CACHES]();
|
||||
|
||||
for (genvar j = 0; j < NUM_INPUTS; ++j) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
|
||||
end
|
||||
|
||||
`RESET_RELAY (cache_arb_reset, reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (NUM_INPUTS),
|
||||
.NUM_OUTPUTS (NUM_CACHES),
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG_REQ ((NUM_INPUTS != NUM_CACHES) ? 2 : 0),
|
||||
.OUT_REG_RSP ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
|
||||
) cache_arb (
|
||||
.clk (clk),
|
||||
.reset (cache_arb_reset),
|
||||
.bus_in_if (core_bus_tmp_if),
|
||||
.bus_out_if (arb_core_bus_tmp_if)
|
||||
);
|
||||
|
||||
for (genvar k = 0; k < NUM_CACHES; ++k) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (arb_core_bus_if[k * NUM_REQS + i], arb_core_bus_tmp_if[k]);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH),
|
||||
.CORE_OUT_REG ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_REG),
|
||||
.MEM_OUT_REG ((NUM_CACHES > 1) ? 2 : MEM_OUT_REG),
|
||||
.NC_ENABLE (NC_ENABLE),
|
||||
.PASSTHRU (PASSTHRU)
|
||||
) cache_wrap (
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf_if (perf_cache_unit_if[i]),
|
||||
`endif
|
||||
.clk (clk),
|
||||
.reset (cache_reset),
|
||||
.core_bus_if (arb_core_bus_if[i * NUM_REQS +: NUM_REQS]),
|
||||
.mem_bus_if (cache_mem_bus_if[i])
|
||||
);
|
||||
end
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH + `ARB_SEL_BITS(NUM_CACHES, 1))
|
||||
) mem_bus_tmp_if[1]();
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_INPUTS (NUM_CACHES),
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG_REQ ((NUM_CACHES > 1) ? 2 : 0),
|
||||
.OUT_REG_RSP ((NUM_CACHES > 1) ? 2 : 0)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (mem_arb_reset),
|
||||
.bus_in_if (cache_mem_bus_if),
|
||||
.bus_out_if (mem_bus_tmp_if)
|
||||
);
|
||||
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]);
|
||||
|
||||
endmodule
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module VX_cache_cluster_top #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
parameter NUM_UNITS = 2,
|
||||
parameter NUM_INPUTS = 4,
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 4,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 16,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = 16,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 1,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 2,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 2,
|
||||
|
||||
parameter NUM_CACHES = `UP(NUM_UNITS),
|
||||
parameter PASSTHRU = (NUM_UNITS == 0),
|
||||
parameter ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES),
|
||||
parameter MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS))
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Core request
|
||||
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_valid,
|
||||
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_rw,
|
||||
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_ready,
|
||||
|
||||
// Core response
|
||||
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_valid,
|
||||
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_ready,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready
|
||||
);
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus_if[NUM_INPUTS * NUM_REQS]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH)
|
||||
) mem_bus_if();
|
||||
|
||||
// Core request
|
||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
||||
for (genvar r = 0; r < NUM_REQS; ++r) begin
|
||||
assign core_bus_if[i * NUM_REQS + r].req_valid = core_req_valid[i][r];
|
||||
assign core_bus_if[i * NUM_REQS + r].req_data.rw = core_req_rw[i][r];
|
||||
assign core_bus_if[i * NUM_REQS + r].req_data.byteen = core_req_byteen[i][r];
|
||||
assign core_bus_if[i * NUM_REQS + r].req_data.addr = core_req_addr[i][r];
|
||||
assign core_bus_if[i * NUM_REQS + r].req_data.data = core_req_data[i][r];
|
||||
assign core_bus_if[i * NUM_REQS + r].req_data.tag = core_req_tag[i][r];
|
||||
assign core_req_ready[i][r] = core_bus_if[i * NUM_REQS + r].req_ready;
|
||||
end
|
||||
end
|
||||
|
||||
// Core response
|
||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
||||
for (genvar r = 0; r < NUM_REQS; ++r) begin
|
||||
assign core_rsp_valid[i][r] = core_bus_if[i * NUM_REQS + r].rsp_valid;
|
||||
assign core_rsp_data[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.data;
|
||||
assign core_rsp_tag[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.tag;
|
||||
assign core_bus_if[i * NUM_REQS + r].rsp_ready = core_rsp_ready[i][r];
|
||||
end
|
||||
end
|
||||
|
||||
// Memory request
|
||||
assign mem_req_valid = mem_bus_if.req_valid;
|
||||
assign mem_req_rw = mem_bus_if.req_data.rw;
|
||||
assign mem_req_byteen = mem_bus_if.req_data.byteen;
|
||||
assign mem_req_addr = mem_bus_if.req_data.addr;
|
||||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
|
||||
// Memory response
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_bus_if.rsp_ready;
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.NUM_UNITS (NUM_UNITS),
|
||||
.NUM_INPUTS (NUM_INPUTS),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.CORE_OUT_REG (CORE_OUT_REG),
|
||||
.MEM_OUT_REG (MEM_OUT_REG)
|
||||
) cache (
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf_if (perf_icache_if),
|
||||
`endif
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.core_bus_if (core_bus_if),
|
||||
.mem_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
endmodule
|
||||
152
hw/rtl/cache/VX_cache_data.sv
vendored
Normal file
152
hw/rtl/cache/VX_cache_data.sv
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_data #(
|
||||
parameter `STRING INSTANCE_ID= "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[`UP(UUID_WIDTH)-1:0] req_uuid,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
input wire stall,
|
||||
|
||||
input wire read,
|
||||
input wire fill,
|
||||
input wire write,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
|
||||
input wire [WORD_SIZE-1:0] byteen,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
|
||||
input wire [`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (line_addr)
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
|
||||
|
||||
always @(*) begin
|
||||
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
|
||||
wren_r = '0;
|
||||
wren_r[wsel] = byteen;
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last
|
||||
// this allows performing onehot encoding of the way index in parallel with BRAM read.
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i])
|
||||
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}};
|
||||
end
|
||||
end
|
||||
assign wren = wren_w;
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
end
|
||||
|
||||
wire [`CLOG2(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) way_enc (
|
||||
.data_in (way_sel),
|
||||
.data_out (way_idx),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (write || fill),
|
||||
.wren (wren),
|
||||
.addr (line_sel),
|
||||
.wdata (wdata),
|
||||
.rdata (rdata)
|
||||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
assign per_way_rdata = rdata[wsel];
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = rdata;
|
||||
end
|
||||
|
||||
assign read_data = per_way_rdata[way_idx];
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_DATA
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-read: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, read_data, req_uuid));
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d data-write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, byteen, write_data, req_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
97
hw/rtl/cache/VX_cache_define.vh
vendored
97
hw/rtl/cache/VX_cache_define.vh
vendored
@@ -1,72 +1,65 @@
|
||||
`ifndef VX_CACHE_DEFINE
|
||||
`define VX_CACHE_DEFINE
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`ifndef VX_CACHE_DEFINE_VH
|
||||
`define VX_CACHE_DEFINE_VH
|
||||
|
||||
// cache request identifier
|
||||
`define DBG_CACHE_REQ_IDW 44
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQS)
|
||||
`define CS_REQ_SEL_BITS `CLOG2(NUM_REQS)
|
||||
|
||||
`define PORTS_BITS `LOG2UP(NUM_PORTS)
|
||||
`define CS_WORD_WIDTH (8 * WORD_SIZE)
|
||||
`define CS_LINE_WIDTH (8 * LINE_SIZE)
|
||||
`define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS)
|
||||
`define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS)
|
||||
|
||||
// tag valid tid word_sel
|
||||
`define MSHR_DATA_WIDTH ((CORE_TAG_WIDTH + 1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
|
||||
`define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS))
|
||||
`define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE)
|
||||
|
||||
`define WORD_WIDTH (8 * WORD_SIZE)
|
||||
|
||||
`define CACHE_LINE_WIDTH (8 * CACHE_LINE_SIZE)
|
||||
|
||||
`define BANK_SIZE (CACHE_SIZE / NUM_BANKS)
|
||||
`define LINES_PER_BANK (`BANK_SIZE / CACHE_LINE_SIZE)
|
||||
`define WORDS_PER_LINE (CACHE_LINE_SIZE / WORD_SIZE)
|
||||
|
||||
`define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE))
|
||||
`define MEM_ADDR_WIDTH (32-`CLOG2(CACHE_LINE_SIZE))
|
||||
`define LINE_ADDR_WIDTH (`MEM_ADDR_WIDTH-`CLOG2(NUM_BANKS))
|
||||
`define CS_WORD_ADDR_WIDTH (`MEM_ADDR_WIDTH-`CLOG2(WORD_SIZE))
|
||||
`define CS_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH-`CLOG2(LINE_SIZE))
|
||||
`define CS_LINE_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH-`CLOG2(NUM_BANKS))
|
||||
|
||||
// Word select
|
||||
`define WORD_SELECT_BITS `CLOG2(`WORDS_PER_LINE)
|
||||
`define WORD_SELECT_ADDR_START 0
|
||||
`define WORD_SELECT_ADDR_END (`WORD_SELECT_ADDR_START+`WORD_SELECT_BITS-1)
|
||||
`define CS_WORD_SEL_BITS `CLOG2(`CS_WORDS_PER_LINE)
|
||||
`define CS_WORD_SEL_ADDR_START 0
|
||||
`define CS_WORD_SEL_ADDR_END (`CS_WORD_SEL_ADDR_START+`CS_WORD_SEL_BITS-1)
|
||||
|
||||
// Bank select
|
||||
`define BANK_SELECT_BITS `CLOG2(NUM_BANKS)
|
||||
`define BANK_SELECT_ADDR_START (1+`WORD_SELECT_ADDR_END+BANK_ADDR_OFFSET)
|
||||
`define BANK_SELECT_ADDR_END (`BANK_SELECT_ADDR_START+`BANK_SELECT_BITS-1)
|
||||
`define CS_BANK_SEL_BITS `CLOG2(NUM_BANKS)
|
||||
`define CS_BANK_SEL_ADDR_START (1+`CS_WORD_SEL_ADDR_END)
|
||||
`define CS_BANK_SEL_ADDR_END (`CS_BANK_SEL_ADDR_START+`CS_BANK_SEL_BITS-1)
|
||||
|
||||
// Line select
|
||||
`define LINE_SELECT_BITS `CLOG2(`LINES_PER_BANK)
|
||||
`define LINE_SELECT_ADDR_START (1+`BANK_SELECT_ADDR_END)
|
||||
`define LINE_SELECT_ADDR_END (`LINE_SELECT_ADDR_START-BANK_ADDR_OFFSET+`LINE_SELECT_BITS-1)
|
||||
`define CS_LINE_SEL_BITS `CLOG2(`CS_LINES_PER_BANK)
|
||||
`define CS_LINE_SEL_ADDR_START (1+`CS_BANK_SEL_ADDR_END)
|
||||
`define CS_LINE_SEL_ADDR_END (`CS_LINE_SEL_ADDR_START+`CS_LINE_SEL_BITS-1)
|
||||
|
||||
// Tag select
|
||||
`define TAG_SELECT_BITS (`WORD_ADDR_WIDTH-1-`LINE_SELECT_ADDR_END)
|
||||
`define TAG_SELECT_ADDR_START (1+`LINE_SELECT_ADDR_END)
|
||||
`define TAG_SELECT_ADDR_END (`WORD_ADDR_WIDTH-1)
|
||||
`define CS_TAG_SEL_BITS (`CS_WORD_ADDR_WIDTH-1-`CS_LINE_SEL_ADDR_END)
|
||||
`define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END)
|
||||
`define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1)
|
||||
|
||||
`define SELECT_BANK_ID(x) x[`BANK_SELECT_ADDR_END : `BANK_SELECT_ADDR_START]
|
||||
`define SELECT_LINE_ADDR0(x) x[`WORD_ADDR_WIDTH-1 : `LINE_SELECT_ADDR_START]
|
||||
`define SELECT_LINE_ADDRX(x) {x[`WORD_ADDR_WIDTH-1 : `LINE_SELECT_ADDR_START], x[`BANK_SELECT_ADDR_START-1 : 1+`WORD_SELECT_ADDR_END]}
|
||||
|
||||
`define LINE_TAG_ADDR(x) x[`LINE_ADDR_WIDTH-1 : `LINE_SELECT_BITS]
|
||||
|
||||
`define CACHE_REQ_ID_RNG CORE_TAG_WIDTH-1 : (CORE_TAG_WIDTH-`DBG_CACHE_REQ_IDW)
|
||||
`define CS_LINE_TAG_ADDR(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CORE_RSP_TAGS ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS)
|
||||
`define CS_LINE_TO_MEM_ADDR(x, i) {x, `CS_BANK_SEL_BITS'(i)}
|
||||
`define CS_MEM_ADDR_TO_BANK_ID(x) x[0 +: `CS_BANK_SEL_BITS]
|
||||
`define CS_MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
|
||||
`define CS_MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `CS_BANK_SEL_BITS]
|
||||
|
||||
`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
|
||||
`define CS_LINE_TO_FULL_ADDR(x, i) {x, (`XLEN-$bits(x))'(i << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))}
|
||||
`define CS_MEM_TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)}
|
||||
|
||||
`define MEM_ADDR_TO_BANK_ID(x) x[0 +: `BANK_SELECT_BITS]
|
||||
|
||||
`define MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
|
||||
|
||||
`define MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `BANK_SELECT_BITS]
|
||||
|
||||
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
`endif
|
||||
`endif // VX_CACHE_DEFINE_VH
|
||||
|
||||
51
hw/rtl/cache/VX_cache_init.sv
vendored
Normal file
51
hw/rtl/cache/VX_cache_init.sv
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_init #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] addr_out,
|
||||
output wire valid_out
|
||||
);
|
||||
reg enabled;
|
||||
reg [`CS_LINE_SEL_BITS-1:0] line_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
enabled <= 1;
|
||||
line_ctr <= '0;
|
||||
end else begin
|
||||
if (enabled) begin
|
||||
if (line_ctr == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
enabled <= 0;
|
||||
end
|
||||
line_ctr <= line_ctr + `CS_LINE_SEL_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign addr_out = line_ctr;
|
||||
assign valid_out = enabled;
|
||||
|
||||
endmodule
|
||||
271
hw/rtl/cache/VX_cache_mshr.sv
vendored
Normal file
271
hw/rtl/cache/VX_cache_mshr.sv
vendored
Normal file
@@ -0,0 +1,271 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
// this is an implementation of a pipelined multi-banked cache
|
||||
// we allocate a free slot from the MSHR before processing a core request
|
||||
// and release the slot when we get a cache hit.
|
||||
// during a memory fill response we initiate the replay sequence
|
||||
// and dequeue all associated pending entries.
|
||||
|
||||
// Warning: This MSHR implementation is strongly coupled with the bank pipeline
|
||||
// and as such changes to either module requires careful evaluation.
|
||||
// This implementation makes the following assumptions:
|
||||
// (1) two-cycle pipeline: st0 and st1.
|
||||
// (2) core request flow: st0: allocate / lookup, st1: finalize.
|
||||
// (3) the first dequeue after the fill should happen in st0, when the fill is in st1
|
||||
// this is enforced inside the bank by "rdw_hazard_st0".
|
||||
|
||||
module VX_cache_mshr #(
|
||||
parameter `STRING INSTANCE_ID= "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 4,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
// MSHR parameters
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid,
|
||||
input wire[`UP(UUID_WIDTH)-1:0] lkp_req_uuid,
|
||||
input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
// allocate
|
||||
input wire allocate_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] allocate_addr,
|
||||
input wire allocate_rw,
|
||||
input wire [DATA_WIDTH-1:0] allocate_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_tail,
|
||||
output wire allocate_ready,
|
||||
|
||||
// lookup
|
||||
input wire lookup_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire [MSHR_SIZE-1:0] lookup_matches,
|
||||
|
||||
// memory fill
|
||||
input wire fill_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] fill_id,
|
||||
output wire [`CS_LINE_ADDR_WIDTH-1:0] fill_addr,
|
||||
|
||||
// dequeue
|
||||
output wire dequeue_valid,
|
||||
output wire [`CS_LINE_ADDR_WIDTH-1:0] dequeue_addr,
|
||||
output wire dequeue_rw,
|
||||
output wire [DATA_WIDTH-1:0] dequeue_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] dequeue_id,
|
||||
input wire dequeue_ready,
|
||||
|
||||
// finalize
|
||||
input wire finalize_valid,
|
||||
input wire finalize_release,
|
||||
input wire finalize_pending,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_tail
|
||||
);
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
|
||||
reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
|
||||
reg [MSHR_ADDR_WIDTH-1:0] next_index [MSHR_SIZE-1:0];
|
||||
|
||||
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
|
||||
reg [MSHR_SIZE-1:0] next_table, next_table_x, next_table_n;
|
||||
reg [MSHR_SIZE-1:0] write_table;
|
||||
|
||||
reg allocate_rdy, allocate_rdy_n;
|
||||
reg [MSHR_ADDR_WIDTH-1:0] allocate_id_r, allocate_id_n;
|
||||
|
||||
reg dequeue_val, dequeue_val_n;
|
||||
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n;
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] tail_idx;
|
||||
|
||||
wire allocate_fire = allocate_valid && allocate_ready;
|
||||
wire dequeue_fire = dequeue_valid && dequeue_ready;
|
||||
|
||||
wire [MSHR_SIZE-1:0] addr_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr);
|
||||
end
|
||||
|
||||
VX_lzc #(
|
||||
.N (MSHR_SIZE),
|
||||
.REVERSE (1)
|
||||
) allocate_sel (
|
||||
.data_in (~valid_table_n),
|
||||
.data_out (allocate_id_n),
|
||||
.valid_out (allocate_rdy_n)
|
||||
);
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (MSHR_SIZE)
|
||||
) tail_sel (
|
||||
.data_in (addr_matches & ~next_table_x),
|
||||
.data_out (tail_idx),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table;
|
||||
next_table_x = next_table;
|
||||
dequeue_val_n = dequeue_val;
|
||||
dequeue_id_n = dequeue_id;
|
||||
|
||||
if (fill_valid) begin
|
||||
dequeue_val_n = 1;
|
||||
dequeue_id_n = fill_id;
|
||||
end
|
||||
|
||||
if (dequeue_fire) begin
|
||||
valid_table_n[dequeue_id] = 0;
|
||||
if (next_table[dequeue_id]) begin
|
||||
dequeue_id_n = next_index[dequeue_id];
|
||||
end else begin
|
||||
dequeue_val_n = 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (finalize_valid) begin
|
||||
if (finalize_release) begin
|
||||
valid_table_n[finalize_id] = 0;
|
||||
end
|
||||
if (finalize_pending) begin
|
||||
next_table_x[finalize_tail] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
next_table_n = next_table_x;
|
||||
if (allocate_fire) begin
|
||||
valid_table_n[allocate_id] = 1;
|
||||
next_table_n[allocate_id] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= '0;
|
||||
allocate_rdy <= 0;
|
||||
dequeue_val <= 0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
allocate_rdy <= allocate_rdy_n;
|
||||
dequeue_val <= dequeue_val_n;
|
||||
end
|
||||
|
||||
if (allocate_fire) begin
|
||||
addr_table[allocate_id] <= allocate_addr;
|
||||
write_table[allocate_id] <= allocate_rw;
|
||||
end
|
||||
|
||||
if (finalize_valid && finalize_pending) begin
|
||||
next_index[finalize_tail] <= finalize_id;
|
||||
end
|
||||
|
||||
dequeue_id_r <= dequeue_id_n;
|
||||
allocate_id_r <= allocate_id_n;
|
||||
next_table <= next_table_n;
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s-bank%0d inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s-bank%0d invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s-bank%0d invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.LUTRAM (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (allocate_valid),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (allocate_id_r),
|
||||
.wdata (allocate_data),
|
||||
.raddr (dequeue_id_r),
|
||||
.rdata (dequeue_data)
|
||||
);
|
||||
|
||||
assign fill_addr = addr_table[fill_id];
|
||||
|
||||
assign allocate_ready = allocate_rdy;
|
||||
assign allocate_id = allocate_id_r;
|
||||
assign allocate_tail = tail_idx;
|
||||
|
||||
assign dequeue_valid = dequeue_val;
|
||||
assign dequeue_addr = addr_table[dequeue_id_r];
|
||||
assign dequeue_rw = write_table[dequeue_id_r];
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
|
||||
assign lookup_matches = addr_matches & ~write_table;
|
||||
|
||||
`UNUSED_VAR (lookup_valid)
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_MSHR
|
||||
reg show_table;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
show_table <= 0;
|
||||
end else begin
|
||||
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
|
||||
end
|
||||
if (allocate_fire)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-allocate: addr=0x%0h, tail=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_tail, allocate_id, lkp_req_uuid));
|
||||
if (lookup_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_matches, lkp_req_uuid));
|
||||
if (finalize_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-finalize release=%b, pending=%b, tail=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
finalize_release, finalize_pending, finalize_tail, finalize_id, fin_req_uuid));
|
||||
if (fill_valid)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id));
|
||||
if (dequeue_fire)
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid));
|
||||
if (show_table) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d mshr-table", $time, INSTANCE_ID, BANK_ID));
|
||||
for (integer i = 0; i < MSHR_SIZE; ++i) begin
|
||||
if (valid_table[i]) begin
|
||||
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)));
|
||||
if (write_table[i])
|
||||
`TRACE(3, ("(w)"));
|
||||
else
|
||||
`TRACE(3, ("(r)"));
|
||||
if (next_table[i])
|
||||
`TRACE(3, ("->%0d", next_index[i]));
|
||||
end
|
||||
end
|
||||
`TRACE(3, ("\n"));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
@@ -1,9 +1,19 @@
|
||||
`ifndef VX_PERF_CACHE_IF
|
||||
`define VX_PERF_CACHE_IF
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_cache_if ();
|
||||
interface VX_cache_perf_if ();
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] reads;
|
||||
wire [`PERF_CTR_BITS-1:0] writes;
|
||||
@@ -37,5 +47,3 @@ interface VX_perf_cache_if ();
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
116
hw/rtl/cache/VX_cache_tags.sv
vendored
Normal file
116
hw/rtl/cache/VX_cache_tags.sv
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_tags #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 16,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire [`UP(UUID_WIDTH)-1:0] req_uuid,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
input wire stall,
|
||||
|
||||
// read/fill
|
||||
input wire lookup,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire fill,
|
||||
input wire init,
|
||||
output wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [NUM_WAYS-1:0] tag_matches
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_TAG_ADDR(line_addr);
|
||||
|
||||
if (NUM_WAYS > 1) begin
|
||||
reg [NUM_WAYS-1:0] repl_way;
|
||||
// cyclic assignment of replacement way
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
repl_way <= 1;
|
||||
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
|
||||
repl_way <= {repl_way[NUM_WAYS-2:0], repl_way[NUM_WAYS-1]};
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
assign way_sel[i] = fill && repl_way[i];
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (stall)
|
||||
assign way_sel = fill;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
wire [`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire read_valid;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.NO_RWCHECK (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (way_sel[i] || init),
|
||||
`UNUSED_PIN (wren),
|
||||
.addr (line_sel),
|
||||
.wdata ({~init, line_tag}),
|
||||
.rdata ({read_valid, read_tag})
|
||||
);
|
||||
|
||||
assign tag_matches[i] = read_valid && (line_tag == read_tag);
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_TAG
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag));
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_matches != 0) begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag, req_uuid));
|
||||
end else begin
|
||||
`TRACE(3, ("%d: %s-bank%0d tag-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
501
hw/rtl/cache/VX_cache_wrap.sv
vendored
Normal file
501
hw/rtl/cache/VX_cache_wrap.sv
vendored
Normal file
@@ -0,0 +1,501 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_wrap #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_TAG_BIT = 0,
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
// Force bypass for all requests
|
||||
parameter PASSTHRU = 0,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_REG = 0
|
||||
) (
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_cache_perf_if.master cache_perf_if,
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if.slave core_bus_if [NUM_REQS],
|
||||
VX_mem_bus_if.master mem_bus_if
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid parameter"))
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CORE_TAG_X_WIDTH = TAG_WIDTH - NC_ENABLE;
|
||||
localparam MEM_TAG_X_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH)) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
|
||||
localparam NC_BYPASS = (NC_ENABLE || PASSTHRU);
|
||||
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
|
||||
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
|
||||
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
|
||||
assign core_req_data[i] = core_bus_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
|
||||
assign core_bus_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Core response buffering
|
||||
wire [NUM_REQS-1:0] core_rsp_valid_s;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
`RESET_RELAY (core_rsp_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
||||
.SIZE ((NC_BYPASS && !DIRECT_PASSTHRU) ? `OUT_REG_TO_EB_SIZE(CORE_OUT_REG) : 0),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(CORE_OUT_REG))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (core_rsp_reset),
|
||||
.valid_in (core_rsp_valid_s[i]),
|
||||
.ready_in (core_rsp_ready_s[i]),
|
||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_if[i].rsp_valid),
|
||||
.ready_out (core_bus_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory request buffering
|
||||
wire mem_req_valid_s;
|
||||
wire mem_req_rw_s;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire mem_req_ready_s;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
|
||||
.SIZE ((NC_BYPASS && !DIRECT_PASSTHRU) ? `OUT_REG_TO_EB_SIZE(MEM_OUT_REG) : 0),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(MEM_OUT_REG))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
|
||||
.valid_out (mem_bus_if.req_valid),
|
||||
.ready_out (mem_bus_if.req_ready)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Core request
|
||||
wire [NUM_REQS-1:0] core_req_valid_b;
|
||||
wire [NUM_REQS-1:0] core_req_rw_b;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr_b;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_b;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data_b;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_req_tag_b;
|
||||
wire [NUM_REQS-1:0] core_req_ready_b;
|
||||
|
||||
// Core response
|
||||
wire [NUM_REQS-1:0] core_rsp_valid_b;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_b;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_rsp_tag_b;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_b;
|
||||
|
||||
// Memory request
|
||||
wire mem_req_valid_b;
|
||||
wire mem_req_rw_b;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_b;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_b;
|
||||
wire [MEM_TAG_X_WIDTH-1:0] mem_req_tag_b;
|
||||
wire mem_req_ready_b;
|
||||
|
||||
// Memory response
|
||||
wire mem_rsp_valid_b;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_b;
|
||||
wire [MEM_TAG_X_WIDTH-1:0] mem_rsp_tag_b;
|
||||
wire mem_rsp_ready_b;
|
||||
|
||||
if (NC_BYPASS) begin
|
||||
|
||||
`RESET_RELAY (nc_bypass_reset, reset);
|
||||
|
||||
VX_cache_bypass #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NC_TAG_BIT (NC_TAG_BIT),
|
||||
|
||||
.NC_ENABLE (NC_ENABLE),
|
||||
.PASSTHRU (PASSTHRU),
|
||||
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_DATA_SIZE (WORD_SIZE),
|
||||
.CORE_TAG_IN_WIDTH (TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
.MEM_DATA_SIZE (LINE_SIZE),
|
||||
.MEM_TAG_IN_WIDTH (MEM_TAG_X_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
|
||||
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_bypass (
|
||||
.clk (clk),
|
||||
.reset (nc_bypass_reset),
|
||||
|
||||
// Core request in
|
||||
.core_req_valid_in (core_req_valid),
|
||||
.core_req_rw_in (core_req_rw),
|
||||
.core_req_byteen_in (core_req_byteen),
|
||||
.core_req_addr_in (core_req_addr),
|
||||
.core_req_data_in (core_req_data),
|
||||
.core_req_tag_in (core_req_tag),
|
||||
.core_req_ready_in (core_req_ready),
|
||||
|
||||
// Core request out
|
||||
.core_req_valid_out (core_req_valid_b),
|
||||
.core_req_rw_out (core_req_rw_b),
|
||||
.core_req_byteen_out(core_req_byteen_b),
|
||||
.core_req_addr_out (core_req_addr_b),
|
||||
.core_req_data_out (core_req_data_b),
|
||||
.core_req_tag_out (core_req_tag_b),
|
||||
.core_req_ready_out (core_req_ready_b),
|
||||
|
||||
// Core response in
|
||||
.core_rsp_valid_in (core_rsp_valid_b),
|
||||
.core_rsp_data_in (core_rsp_data_b),
|
||||
.core_rsp_tag_in (core_rsp_tag_b),
|
||||
.core_rsp_ready_in (core_rsp_ready_b),
|
||||
|
||||
// Core response out
|
||||
.core_rsp_valid_out (core_rsp_valid_s),
|
||||
.core_rsp_data_out (core_rsp_data_s),
|
||||
.core_rsp_tag_out (core_rsp_tag_s),
|
||||
.core_rsp_ready_out (core_rsp_ready_s),
|
||||
|
||||
// Memory request in
|
||||
.mem_req_valid_in (mem_req_valid_b),
|
||||
.mem_req_rw_in (mem_req_rw_b),
|
||||
.mem_req_addr_in (mem_req_addr_b),
|
||||
.mem_req_byteen_in (mem_req_byteen_b),
|
||||
.mem_req_data_in (mem_req_data_b),
|
||||
.mem_req_tag_in (mem_req_tag_b),
|
||||
.mem_req_ready_in (mem_req_ready_b),
|
||||
|
||||
// Memory request out
|
||||
.mem_req_valid_out (mem_req_valid_s),
|
||||
.mem_req_addr_out (mem_req_addr_s),
|
||||
.mem_req_rw_out (mem_req_rw_s),
|
||||
.mem_req_byteen_out (mem_req_byteen_s),
|
||||
.mem_req_data_out (mem_req_data_s),
|
||||
.mem_req_tag_out (mem_req_tag_s),
|
||||
.mem_req_ready_out (mem_req_ready_s),
|
||||
|
||||
// Memory response in
|
||||
.mem_rsp_valid_in (mem_bus_if.rsp_valid),
|
||||
.mem_rsp_data_in (mem_bus_if.rsp_data.data),
|
||||
.mem_rsp_tag_in (mem_bus_if.rsp_data.tag),
|
||||
.mem_rsp_ready_in (mem_bus_if.rsp_ready),
|
||||
|
||||
// Memory response out
|
||||
.mem_rsp_valid_out (mem_rsp_valid_b),
|
||||
.mem_rsp_data_out (mem_rsp_data_b),
|
||||
.mem_rsp_tag_out (mem_rsp_tag_b),
|
||||
.mem_rsp_ready_out (mem_rsp_ready_b)
|
||||
);
|
||||
end else begin
|
||||
assign core_req_valid_b = core_req_valid;
|
||||
assign core_req_rw_b = core_req_rw;
|
||||
assign core_req_addr_b = core_req_addr;
|
||||
assign core_req_byteen_b= core_req_byteen;
|
||||
assign core_req_data_b = core_req_data;
|
||||
assign core_req_tag_b = core_req_tag;
|
||||
assign core_req_ready = core_req_ready_b;
|
||||
|
||||
assign core_rsp_valid_s = core_rsp_valid_b;
|
||||
assign core_rsp_data_s = core_rsp_data_b;
|
||||
assign core_rsp_tag_s = core_rsp_tag_b;
|
||||
assign core_rsp_ready_b = core_rsp_ready_s;
|
||||
|
||||
assign mem_req_valid_s = mem_req_valid_b;
|
||||
assign mem_req_addr_s = mem_req_addr_b;
|
||||
assign mem_req_rw_s = mem_req_rw_b;
|
||||
assign mem_req_byteen_s = mem_req_byteen_b;
|
||||
assign mem_req_data_s = mem_req_data_b;
|
||||
assign mem_req_ready_b = mem_req_ready_s;
|
||||
|
||||
// Add explicit NC=0 flag to the memory request tag
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_WIDTH-1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_req_tag_insert (
|
||||
.data_in (mem_req_tag_b),
|
||||
.sel_in (1'b0),
|
||||
.data_out (mem_req_tag_s)
|
||||
);
|
||||
|
||||
assign mem_rsp_valid_b = mem_bus_if.rsp_valid;
|
||||
assign mem_rsp_data_b = mem_bus_if.rsp_data.data;
|
||||
assign mem_bus_if.rsp_ready = mem_rsp_ready_b;
|
||||
|
||||
// Remove NC flag from the memory response tag
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_WIDTH),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_rsp_tag_remove (
|
||||
.data_in (mem_bus_if.rsp_data.tag),
|
||||
.data_out (mem_rsp_tag_b)
|
||||
);
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
|
||||
`UNUSED_VAR (core_req_valid_b)
|
||||
`UNUSED_VAR (core_req_rw_b)
|
||||
`UNUSED_VAR (core_req_addr_b)
|
||||
`UNUSED_VAR (core_req_byteen_b)
|
||||
`UNUSED_VAR (core_req_data_b)
|
||||
`UNUSED_VAR (core_req_tag_b)
|
||||
assign core_req_ready_b = '0;
|
||||
|
||||
assign core_rsp_valid_b = '0;
|
||||
assign core_rsp_data_b = '0;
|
||||
assign core_rsp_tag_b = '0;
|
||||
`UNUSED_VAR (core_rsp_ready_b)
|
||||
|
||||
assign mem_req_valid_b = 0;
|
||||
assign mem_req_addr_b = '0;
|
||||
assign mem_req_rw_b = '0;
|
||||
assign mem_req_byteen_b = '0;
|
||||
assign mem_req_data_b = '0;
|
||||
assign mem_req_tag_b = '0;
|
||||
`UNUSED_VAR (mem_req_ready_b)
|
||||
|
||||
`UNUSED_VAR (mem_rsp_valid_b)
|
||||
`UNUSED_VAR (mem_rsp_data_b)
|
||||
`UNUSED_VAR (mem_rsp_tag_b)
|
||||
assign mem_rsp_ready_b = 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign cache_perf_if.reads = '0;
|
||||
assign cache_perf_if.writes = '0;
|
||||
assign cache_perf_if.read_misses = '0;
|
||||
assign cache_perf_if.write_misses = '0;
|
||||
assign cache_perf_if.bank_stalls = '0;
|
||||
assign cache_perf_if.mshr_stalls = '0;
|
||||
assign cache_perf_if.mem_stalls = '0;
|
||||
assign cache_perf_if.crsp_stalls = '0;
|
||||
`endif
|
||||
|
||||
end else begin
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (CORE_TAG_X_WIDTH)
|
||||
) core_bus_wrap_if[NUM_REQS]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (MEM_TAG_X_WIDTH)
|
||||
) mem_bus_wrap_if();
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_wrap_if[i].req_valid = core_req_valid_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.rw = core_req_rw_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.addr = core_req_addr_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.byteen = core_req_byteen_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.data = core_req_data_b[i];
|
||||
assign core_bus_wrap_if[i].req_data.tag = core_req_tag_b[i];
|
||||
assign core_req_ready_b[i] = core_bus_wrap_if[i].req_ready;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_valid_b[i] = core_bus_wrap_if[i].rsp_valid;
|
||||
assign core_rsp_data_b[i] = core_bus_wrap_if[i].rsp_data.data;
|
||||
assign core_rsp_tag_b[i] = core_bus_wrap_if[i].rsp_data.tag;
|
||||
assign core_bus_wrap_if[i].rsp_ready = core_rsp_ready_b[i];
|
||||
end
|
||||
|
||||
assign mem_req_valid_b = mem_bus_wrap_if.req_valid;
|
||||
assign mem_req_addr_b = mem_bus_wrap_if.req_data.addr;
|
||||
assign mem_req_rw_b = mem_bus_wrap_if.req_data.rw;
|
||||
assign mem_req_byteen_b = mem_bus_wrap_if.req_data.byteen;
|
||||
assign mem_req_data_b = mem_bus_wrap_if.req_data.data;
|
||||
assign mem_req_tag_b = mem_bus_wrap_if.req_data.tag;
|
||||
assign mem_bus_wrap_if.req_ready = mem_req_ready_b;
|
||||
|
||||
assign mem_bus_wrap_if.rsp_valid = mem_rsp_valid_b;
|
||||
assign mem_bus_wrap_if.rsp_data.data = mem_rsp_data_b;
|
||||
assign mem_bus_wrap_if.rsp_data.tag = mem_rsp_tag_b;
|
||||
assign mem_rsp_ready_b = mem_bus_wrap_if.rsp_ready;
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
|
||||
VX_cache #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (CORE_TAG_X_WIDTH),
|
||||
.CORE_OUT_REG (NC_BYPASS ? 1 : CORE_OUT_REG),
|
||||
.MEM_OUT_REG (NC_BYPASS ? 1 : MEM_OUT_REG)
|
||||
) cache (
|
||||
.clk (clk),
|
||||
.reset (cache_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf_if (cache_perf_if),
|
||||
`endif
|
||||
|
||||
.core_bus_if (core_bus_wrap_if),
|
||||
.mem_bus_if (mem_bus_wrap_if)
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_BANK
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign core_req_uuid = 0;
|
||||
assign core_rsp_uuid = 0;
|
||||
end
|
||||
|
||||
wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready;
|
||||
wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_bus_if[i].req_data.rw)
|
||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
|
||||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
|
||||
|
||||
if ((UUID_WIDTH != 0) && (NC_BYPASS != 0)) begin
|
||||
assign mem_req_uuid = mem_bus_if.req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign mem_rsp_uuid = mem_bus_if.rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign mem_req_uuid = 0;
|
||||
assign mem_rsp_uuid = 0;
|
||||
end
|
||||
|
||||
wire mem_req_fire = mem_bus_if.req_valid && mem_bus_if.req_ready;
|
||||
wire mem_rsp_fire = mem_bus_if.rsp_valid && mem_bus_if.rsp_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_bus_if.req_data.rw)
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
314
hw/rtl/cache/VX_core_req_bank_sel.sv
vendored
314
hw/rtl/cache/VX_core_req_bank_sel.sv
vendored
@@ -1,314 +0,0 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_core_req_bank_sel #(
|
||||
parameter CACHE_ID = 0,
|
||||
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 64,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 4,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 3,
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [`PERF_CTR_BITS-1:0] bank_stalls,
|
||||
`endif
|
||||
|
||||
input wire [NUM_REQS-1:0] core_req_valid,
|
||||
input wire [NUM_REQS-1:0] core_req_rw,
|
||||
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire [NUM_REQS-1:0] core_req_ready,
|
||||
|
||||
output wire [NUM_BANKS-1:0] per_bank_core_req_valid,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask,
|
||||
output wire [NUM_BANKS-1:0] per_bank_core_req_rw,
|
||||
output wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
|
||||
input wire [NUM_BANKS-1:0] per_bank_core_req_ready
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
|
||||
`STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value"))
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire [NUM_REQS-1:0][`LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
||||
wire [NUM_REQS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel;
|
||||
wire [NUM_REQS-1:0][`UP(`BANK_SELECT_BITS)-1:0] core_req_bid;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
if (BANK_ADDR_OFFSET == 0) begin
|
||||
assign core_req_line_addr[i] = `SELECT_LINE_ADDR0(core_req_addr[i]);
|
||||
end else begin
|
||||
assign core_req_line_addr[i] = `SELECT_LINE_ADDRX(core_req_addr[i]);
|
||||
end
|
||||
assign core_req_wsel[i] = core_req_addr[i][`UP(`WORD_SELECT_BITS)-1:0];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (NUM_BANKS > 1) begin
|
||||
assign core_req_bid[i] = `SELECT_BANK_ID(core_req_addr[i]);
|
||||
end else begin
|
||||
assign core_req_bid[i] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
reg [NUM_BANKS-1:0] per_bank_core_req_valid_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
|
||||
reg [NUM_BANKS-1:0] per_bank_core_req_rw_r;
|
||||
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r;
|
||||
reg [NUM_REQS-1:0] core_req_ready_r;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
if (NUM_PORTS > 1) begin
|
||||
|
||||
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_line_addr_r;
|
||||
reg [NUM_BANKS-1:0] per_bank_rw_r;
|
||||
wire [NUM_REQS-1:0] core_req_line_match;
|
||||
|
||||
always @(*) begin
|
||||
per_bank_line_addr_r = 'x;
|
||||
per_bank_rw_r = 'x;
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
per_bank_line_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
per_bank_rw_r[core_req_bid[i]] = core_req_rw[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_line_match[i] = (core_req_line_addr[i] == per_bank_line_addr_r[core_req_bid[i]])
|
||||
&& (core_req_rw[i] == per_bank_rw_r[core_req_bid[i]]);
|
||||
end
|
||||
|
||||
if (NUM_PORTS < NUM_REQS) begin
|
||||
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][NUM_REQS-1:0] req_select_table_r;
|
||||
|
||||
always @(*) begin
|
||||
per_bank_core_req_valid_r = 0;
|
||||
per_bank_core_req_pmask_r = 0;
|
||||
per_bank_core_req_rw_r = 'x;
|
||||
per_bank_core_req_addr_r = 'x;
|
||||
per_bank_core_req_wsel_r = 'x;
|
||||
per_bank_core_req_byteen_r= 'x;
|
||||
per_bank_core_req_data_r = 'x;
|
||||
per_bank_core_req_tag_r = 'x;
|
||||
per_bank_core_req_tid_r = 'x;
|
||||
req_select_table_r = 'x;
|
||||
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
per_bank_core_req_valid_r[core_req_bid[i]] = 1;
|
||||
per_bank_core_req_pmask_r[core_req_bid[i]][i % NUM_PORTS] = core_req_line_match[i];
|
||||
per_bank_core_req_wsel_r[core_req_bid[i]][i % NUM_PORTS] = core_req_wsel[i];
|
||||
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
|
||||
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
|
||||
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
|
||||
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
|
||||
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
|
||||
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
req_select_table_r[core_req_bid[i]][i % NUM_PORTS] = (1 << i);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
|
||||
&& core_req_line_match[i]
|
||||
&& req_select_table_r[core_req_bid[i]][i % NUM_PORTS][i];
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
always @(*) begin
|
||||
per_bank_core_req_valid_r = 0;
|
||||
per_bank_core_req_pmask_r = 0;
|
||||
per_bank_core_req_rw_r = 'x;
|
||||
per_bank_core_req_addr_r = 'x;
|
||||
per_bank_core_req_wsel_r = 'x;
|
||||
per_bank_core_req_byteen_r= 'x;
|
||||
per_bank_core_req_data_r = 'x;
|
||||
per_bank_core_req_tag_r = 'x;
|
||||
per_bank_core_req_tid_r = 'x;
|
||||
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
per_bank_core_req_valid_r[core_req_bid[i]] = 1;
|
||||
per_bank_core_req_pmask_r[core_req_bid[i]][i % NUM_PORTS] = core_req_line_match[i];
|
||||
per_bank_core_req_wsel_r[core_req_bid[i]][i % NUM_PORTS] = core_req_wsel[i];
|
||||
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
|
||||
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
|
||||
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
|
||||
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
|
||||
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
|
||||
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
|
||||
&& core_req_line_match[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
always @(*) begin
|
||||
per_bank_core_req_valid_r = 0;
|
||||
per_bank_core_req_rw_r = 'x;
|
||||
per_bank_core_req_addr_r = 'x;
|
||||
per_bank_core_req_wsel_r = 'x;
|
||||
per_bank_core_req_byteen_r= 'x;
|
||||
per_bank_core_req_data_r = 'x;
|
||||
per_bank_core_req_tag_r = 'x;
|
||||
per_bank_core_req_tid_r = 'x;
|
||||
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
per_bank_core_req_valid_r[core_req_bid[i]] = 1;
|
||||
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
|
||||
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
per_bank_core_req_wsel_r[core_req_bid[i]] = core_req_wsel[i];
|
||||
per_bank_core_req_byteen_r[core_req_bid[i]]= core_req_byteen[i];
|
||||
per_bank_core_req_data_r[core_req_bid[i]] = core_req_data[i];
|
||||
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
|
||||
per_bank_core_req_tid_r[core_req_bid[i]] = `REQS_BITS'(i);
|
||||
end
|
||||
end
|
||||
|
||||
per_bank_core_req_pmask_r = per_bank_core_req_valid_r;
|
||||
end
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
always @(*) begin
|
||||
core_req_ready_r = 0;
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid_r[i]) begin
|
||||
core_req_ready_r[per_bank_core_req_tid_r[i]] = per_bank_core_req_ready[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
core_req_ready_r = 0;
|
||||
core_req_ready_r[per_bank_core_req_tid_r[0]] = per_bank_core_req_ready;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
always @(*) begin
|
||||
per_bank_core_req_valid_r = 0;
|
||||
per_bank_core_req_rw_r = 'x;
|
||||
per_bank_core_req_addr_r = 'x;
|
||||
per_bank_core_req_wsel_r = 'x;
|
||||
per_bank_core_req_byteen_r= 'x;
|
||||
per_bank_core_req_data_r = 'x;
|
||||
per_bank_core_req_tag_r = 'x;
|
||||
per_bank_core_req_tid_r = 'x;
|
||||
per_bank_core_req_valid_r[core_req_bid[0]] = core_req_valid;
|
||||
per_bank_core_req_rw_r[core_req_bid[0]] = core_req_rw;
|
||||
per_bank_core_req_addr_r[core_req_bid[0]] = core_req_line_addr;
|
||||
per_bank_core_req_wsel_r[core_req_bid[0]] = core_req_wsel;
|
||||
per_bank_core_req_byteen_r[core_req_bid[0]] = core_req_byteen;
|
||||
per_bank_core_req_data_r[core_req_bid[0]] = core_req_data;
|
||||
per_bank_core_req_tag_r[core_req_bid[0]] = core_req_tag;
|
||||
per_bank_core_req_tid_r[core_req_bid[0]] = 0;
|
||||
core_req_ready_r = per_bank_core_req_ready[core_req_bid[0]];
|
||||
|
||||
per_bank_core_req_pmask_r = per_bank_core_req_valid_r;
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_bid)
|
||||
always @(*) begin
|
||||
per_bank_core_req_valid_r = core_req_valid;
|
||||
per_bank_core_req_rw_r = core_req_rw;
|
||||
per_bank_core_req_addr_r = core_req_line_addr;
|
||||
per_bank_core_req_wsel_r = core_req_wsel;
|
||||
per_bank_core_req_byteen_r = core_req_byteen;
|
||||
per_bank_core_req_data_r = core_req_data;
|
||||
per_bank_core_req_tag_r = core_req_tag;
|
||||
per_bank_core_req_tid_r = 0;
|
||||
core_req_ready_r = per_bank_core_req_ready;
|
||||
|
||||
per_bank_core_req_pmask_r = per_bank_core_req_valid_r;
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
assign per_bank_core_req_valid = per_bank_core_req_valid_r;
|
||||
assign per_bank_core_req_pmask = per_bank_core_req_pmask_r;
|
||||
assign per_bank_core_req_rw = per_bank_core_req_rw_r;
|
||||
assign per_bank_core_req_addr = per_bank_core_req_addr_r;
|
||||
assign per_bank_core_req_wsel = per_bank_core_req_wsel_r;
|
||||
assign per_bank_core_req_byteen = per_bank_core_req_byteen_r;
|
||||
assign per_bank_core_req_data = per_bank_core_req_data_r;
|
||||
assign per_bank_core_req_tag = per_bank_core_req_tag_r;
|
||||
assign per_bank_core_req_tid = per_bank_core_req_tid_r;
|
||||
assign core_req_ready = core_req_ready_r;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [NUM_REQS-1:0] core_req_sel_r;
|
||||
|
||||
always @(*) begin
|
||||
core_req_sel_r = 0;
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
core_req_sel_r[i] = per_bank_core_req_ready[core_req_bid[i]];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] bank_stalls_r;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] bank_stall_cnt;
|
||||
|
||||
wire [NUM_REQS-1:0] bank_stall_mask = core_req_sel_r & ~core_req_ready;
|
||||
`POP_COUNT(bank_stall_cnt, bank_stall_mask);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bank_stalls_r <= 0;
|
||||
end else begin
|
||||
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'(bank_stall_cnt);
|
||||
end
|
||||
end
|
||||
|
||||
assign bank_stalls = bank_stalls_r;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
350
hw/rtl/cache/VX_core_rsp_merge.sv
vendored
350
hw/rtl/cache/VX_core_rsp_merge.sv
vendored
@@ -1,350 +0,0 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_core_rsp_merge #(
|
||||
parameter CACHE_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0,
|
||||
// output register
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Per Bank WB
|
||||
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
|
||||
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
|
||||
|
||||
// Core Response
|
||||
output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid,
|
||||
output wire [NUM_REQS-1:0] core_rsp_tmask,
|
||||
output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
|
||||
reg [NUM_BANKS-1:0] per_bank_core_rsp_ready_r;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
|
||||
// The core response bus handles a single tag at the time
|
||||
// We first need to select the current tag to process,
|
||||
// then send all bank responses for that tag as a batch
|
||||
|
||||
wire [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
wire core_rsp_ready_unqual;
|
||||
|
||||
if (NUM_PORTS > 1) begin
|
||||
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
per_bank_core_rsp_sent_r <= '0;
|
||||
end else begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
|
||||
per_bank_core_rsp_sent_r[i] <= '0;
|
||||
end else begin
|
||||
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_valid_p;
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (genvar p = 0; p < NUM_PORTS; ++p) begin
|
||||
assign per_bank_core_rsp_valid_p[i][p] = per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& !per_bank_core_rsp_sent_r[i][p];
|
||||
end
|
||||
end
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_BANKS * NUM_PORTS),
|
||||
.DATAW (CORE_TAG_WIDTH)
|
||||
) find_first (
|
||||
.valid_i (per_bank_core_rsp_valid_p),
|
||||
.data_i (per_bank_core_rsp_tag),
|
||||
.data_o (core_rsp_tag_unqual),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
per_bank_core_rsp_sent = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (integer p = 0; p < NUM_PORTS; ++p) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& !per_bank_core_rsp_sent_r[i][p]
|
||||
&& (per_bank_core_rsp_tag[i][p][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
|
||||
per_bank_core_rsp_sent[i][p] = core_rsp_ready_unqual;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_BANKS),
|
||||
.DATAW (CORE_TAG_WIDTH)
|
||||
) find_first (
|
||||
.valid_i (per_bank_core_rsp_valid),
|
||||
.data_i (per_bank_core_rsp_tag),
|
||||
.data_o (core_rsp_tag_unqual),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
per_bank_core_rsp_ready_r = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& (per_bank_core_rsp_tag[i][0][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
|
||||
.PASSTHRU (0 == OUT_REG)
|
||||
) out_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_any),
|
||||
.data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}),
|
||||
.ready_in (core_rsp_ready_unqual),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_tmask, core_rsp_tag, core_rsp_data}),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_unqual;
|
||||
|
||||
if (NUM_PORTS > 1) begin
|
||||
|
||||
reg [NUM_REQS-1:0][(`PORTS_BITS + `BANK_SELECT_BITS)-1:0] bank_select_table;
|
||||
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
per_bank_core_rsp_sent_r <= '0;
|
||||
end else begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
|
||||
per_bank_core_rsp_sent_r[i] <= '0;
|
||||
end else begin
|
||||
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = '0;
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_data_unqual = 'x;
|
||||
bank_select_table = 'x;
|
||||
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
for (integer p = 0; p < NUM_PORTS; ++p) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& !per_bank_core_rsp_sent_r[i][p]) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_tag[i][p];
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
|
||||
bank_select_table[per_bank_core_rsp_tid[i][p]] = {`PORTS_BITS'(p), `BANK_SELECT_BITS'(i)};
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
per_bank_core_rsp_sent = '0;
|
||||
for (integer i = 0; i < NUM_REQS; i++) begin
|
||||
if (core_rsp_valid_unqual[i]) begin
|
||||
per_bank_core_rsp_sent[bank_select_table[i][0 +: `BANK_SELECT_BITS]][bank_select_table[i][`BANK_SELECT_BITS +: `PORTS_BITS]] = core_rsp_ready_unqual[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_data_unqual = 'x;
|
||||
bank_select_table = 'x;
|
||||
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_rsp_valid[i]) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
|
||||
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
.PASSTHRU (0 == OUT_REG)
|
||||
) out_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_unqual[i]),
|
||||
.data_in ({core_rsp_tag_unqual[i], core_rsp_data_unqual[i]}),
|
||||
.ready_in (core_rsp_ready_unqual[i]),
|
||||
.valid_out (core_rsp_valid[i]),
|
||||
.data_out ({core_rsp_tag[i],core_rsp_data[i]}),
|
||||
.ready_out (core_rsp_ready[i])
|
||||
);
|
||||
end
|
||||
|
||||
assign core_rsp_tmask = core_rsp_valid;
|
||||
|
||||
end
|
||||
|
||||
assign per_bank_core_rsp_ready = per_bank_core_rsp_ready_r;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
reg [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_tmask_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_tmask_unqual = 0;
|
||||
core_rsp_tmask_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
|
||||
|
||||
core_rsp_tag_unqual = per_bank_core_rsp_tag;
|
||||
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
|
||||
end
|
||||
|
||||
assign core_rsp_valid = per_bank_core_rsp_valid;
|
||||
assign core_rsp_tmask = core_rsp_tmask_unqual;
|
||||
assign per_bank_core_rsp_ready = core_rsp_ready;
|
||||
|
||||
end else begin
|
||||
|
||||
reg [`CORE_RSP_TAGS-1:0] core_rsp_valid_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
|
||||
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag;
|
||||
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
|
||||
end
|
||||
|
||||
assign core_rsp_valid = core_rsp_valid_unqual;
|
||||
assign core_rsp_tmask = core_rsp_valid_unqual;
|
||||
assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid];
|
||||
|
||||
end
|
||||
|
||||
assign core_rsp_tag = core_rsp_tag_unqual;
|
||||
assign core_rsp_data = core_rsp_data_unqual;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR(per_bank_core_rsp_tid)
|
||||
assign core_rsp_valid = per_bank_core_rsp_valid;
|
||||
assign core_rsp_tmask = per_bank_core_rsp_valid;
|
||||
assign core_rsp_tag = per_bank_core_rsp_tag;
|
||||
assign core_rsp_data = per_bank_core_rsp_data;
|
||||
assign per_bank_core_rsp_ready = core_rsp_ready;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
133
hw/rtl/cache/VX_data_access.sv
vendored
133
hw/rtl/cache/VX_data_access.sv
vendored
@@ -1,133 +0,0 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_data_access #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] req_id,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
input wire stall,
|
||||
|
||||
input wire read,
|
||||
input wire fill,
|
||||
input wire write,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] addr,
|
||||
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel,
|
||||
input wire [NUM_PORTS-1:0] pmask,
|
||||
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen,
|
||||
input wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] fill_data,
|
||||
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] write_data,
|
||||
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] read_data
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (addr)
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1;
|
||||
|
||||
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] rdata;
|
||||
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0];
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
reg [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] wdata_r;
|
||||
reg [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
|
||||
if (NUM_PORTS > 1) begin
|
||||
always @(*) begin
|
||||
wdata_r = 'x;
|
||||
wren_r = 0;
|
||||
for (integer i = 0; i < NUM_PORTS; ++i) begin
|
||||
if (pmask[i]) begin
|
||||
wdata_r[wsel[i]] = write_data[i];
|
||||
wren_r[wsel[i]] = byteen[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (pmask)
|
||||
always @(*) begin
|
||||
wdata_r = {`WORDS_PER_LINE{write_data}};
|
||||
wren_r = 0;
|
||||
wren_r[wsel] = byteen;
|
||||
end
|
||||
end
|
||||
assign wdata = write ? wdata_r : fill_data;
|
||||
assign wren = write ? wren_r : {BYTEENW{fill}};
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
`UNUSED_VAR (pmask)
|
||||
assign wdata = write ? write_data : fill_data;
|
||||
assign wren = write ? byteen : {BYTEENW{fill}};
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (pmask)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CACHE_LINE_WIDTH),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.addr (line_addr),
|
||||
.wren (wren),
|
||||
.wdata (wdata),
|
||||
.rdata (rdata)
|
||||
);
|
||||
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
for (genvar i = 0; i < NUM_PORTS; ++i) begin
|
||||
assign read_data[i] = rdata[wsel[i]];
|
||||
end
|
||||
end else begin
|
||||
assign read_data = rdata;
|
||||
end
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_DATA
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data);
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, blk_addr=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, read_data, req_id);
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, byteen=%b, blk_addr=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), byteen, line_addr, write_data, req_id);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
36
hw/rtl/cache/VX_flush_ctrl.sv
vendored
36
hw/rtl/cache/VX_flush_ctrl.sv
vendored
@@ -1,36 +0,0 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_flush_ctrl #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
output wire [`LINE_SELECT_BITS-1:0] addr_out,
|
||||
output wire valid_out
|
||||
);
|
||||
reg flush_enable;
|
||||
reg [`LINE_SELECT_BITS-1:0] flush_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
flush_enable <= 1;
|
||||
flush_ctr <= 0;
|
||||
end else begin
|
||||
if (flush_enable) begin
|
||||
if (flush_ctr == ((2 ** `LINE_SELECT_BITS)-1)) begin
|
||||
flush_enable <= 0;
|
||||
end
|
||||
flush_ctr <= flush_ctr + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign addr_out = flush_ctr;
|
||||
assign valid_out = flush_enable;
|
||||
|
||||
endmodule
|
||||
234
hw/rtl/cache/VX_miss_resrv.sv
vendored
234
hw/rtl/cache/VX_miss_resrv.sv
vendored
@@ -1,234 +0,0 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_miss_resrv #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 1,
|
||||
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 1,
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] deq_req_id,
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] lkp_req_id,
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] rel_req_id,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
// allocate
|
||||
input wire allocate_valid,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] allocate_addr,
|
||||
input wire [`MSHR_DATA_WIDTH-1:0] allocate_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
|
||||
output wire allocate_ready,
|
||||
|
||||
// fill
|
||||
input wire fill_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] fill_id,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] fill_addr,
|
||||
|
||||
// lookup
|
||||
input wire lookup_valid,
|
||||
input wire lookup_replay,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] lookup_id,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire lookup_match,
|
||||
|
||||
// dequeue
|
||||
output wire dequeue_valid,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] dequeue_id,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] dequeue_addr,
|
||||
output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data,
|
||||
input wire dequeue_ready,
|
||||
|
||||
// release
|
||||
input wire release_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] release_id
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
|
||||
reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table, addr_table_n;
|
||||
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
|
||||
reg [MSHR_SIZE-1:0] ready_table, ready_table_n;
|
||||
|
||||
reg allocate_rdy_r, allocate_rdy_n;
|
||||
reg [MSHR_ADDR_WIDTH-1:0] allocate_id_r, allocate_id_n;
|
||||
|
||||
reg dequeue_val_r, dequeue_val_n, dequeue_val_x;
|
||||
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n, dequeue_id_x;
|
||||
|
||||
reg [MSHR_SIZE-1:0] valid_table_x;
|
||||
reg [MSHR_SIZE-1:0] ready_table_x;
|
||||
|
||||
wire [MSHR_SIZE-1:0] addr_matches;
|
||||
|
||||
wire allocate_fire = allocate_valid && allocate_ready;
|
||||
|
||||
wire dequeue_fire = dequeue_valid && dequeue_ready;
|
||||
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign addr_matches[i] = (addr_table[i] == lookup_addr);
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
valid_table_x = valid_table;
|
||||
ready_table_x = ready_table;
|
||||
if (dequeue_fire) begin
|
||||
valid_table_x[dequeue_id] = 0;
|
||||
end
|
||||
if (lookup_replay) begin
|
||||
ready_table_x |= addr_matches;
|
||||
end
|
||||
end
|
||||
|
||||
VX_lzc #(
|
||||
.N (MSHR_SIZE)
|
||||
) dequeue_sel (
|
||||
.in_i (valid_table_x & ready_table_x),
|
||||
.cnt_o (dequeue_id_x),
|
||||
.valid_o (dequeue_val_x)
|
||||
);
|
||||
|
||||
VX_lzc #(
|
||||
.N (MSHR_SIZE)
|
||||
) allocate_sel (
|
||||
.in_i (~valid_table_n),
|
||||
.cnt_o (allocate_id_n),
|
||||
.valid_o (allocate_rdy_n)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table_x;
|
||||
ready_table_n = ready_table_x;
|
||||
addr_table_n = addr_table;
|
||||
dequeue_val_n = dequeue_val_r;
|
||||
dequeue_id_n = dequeue_id_r;
|
||||
|
||||
if (dequeue_fire) begin
|
||||
dequeue_val_n = dequeue_val_x;
|
||||
dequeue_id_n = dequeue_id_x;
|
||||
end
|
||||
|
||||
if (allocate_fire) begin
|
||||
valid_table_n[allocate_id] = 1;
|
||||
ready_table_n[allocate_id] = 0;
|
||||
addr_table_n[allocate_id] = allocate_addr;
|
||||
end
|
||||
|
||||
if (fill_valid) begin
|
||||
dequeue_val_n = 1;
|
||||
dequeue_id_n = fill_id;
|
||||
end
|
||||
|
||||
if (release_valid) begin
|
||||
valid_table_n[release_id] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= 0;
|
||||
allocate_rdy_r <= 0;
|
||||
dequeue_val_r <= 0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
allocate_rdy_r <= allocate_rdy_n;
|
||||
dequeue_val_r <= dequeue_val_n;
|
||||
end
|
||||
ready_table <= ready_table_n;
|
||||
addr_table <= addr_table_n;
|
||||
dequeue_id_r <= dequeue_id_n;
|
||||
allocate_id_r <= allocate_id_n;
|
||||
|
||||
`ASSERT(!allocate_fire || !valid_table[allocate_id_r], ("runtime error"));
|
||||
`ASSERT(!release_valid || valid_table[release_id], ("runtime error"));
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((!allocate_fire || ~valid_table[allocate_id]), ("%t: *** cache%0d:%0d in-use allocation: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id))
|
||||
|
||||
`RUNTIME_ASSERT((!fill_valid || valid_table[fill_id]), ("%t: *** cache%0d:%0d invalid fill: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id))
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`MSHR_DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.LUTRAM (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.waddr (allocate_id_r),
|
||||
.raddr (dequeue_id_r),
|
||||
.wren (allocate_valid),
|
||||
.wdata (allocate_data),
|
||||
.rdata (dequeue_data)
|
||||
);
|
||||
|
||||
assign fill_addr = addr_table[fill_id];
|
||||
|
||||
assign allocate_ready = allocate_rdy_r;
|
||||
assign allocate_id = allocate_id_r;
|
||||
|
||||
assign dequeue_valid = dequeue_val_r;
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
assign dequeue_addr = addr_table[dequeue_id_r];
|
||||
|
||||
wire [MSHR_SIZE-1:0] lookup_entries;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign lookup_entries[i] = (i != lookup_id);
|
||||
end
|
||||
assign lookup_match = |(lookup_entries & valid_table & addr_matches);
|
||||
|
||||
`UNUSED_VAR (lookup_valid)
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_MSHR
|
||||
always @(posedge clk) begin
|
||||
if (allocate_fire || fill_valid || dequeue_fire || lookup_replay || lookup_valid || release_valid) begin
|
||||
if (allocate_fire)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_req_id);
|
||||
if (fill_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID));
|
||||
if (dequeue_fire)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_id);
|
||||
if (lookup_replay)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-replay: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lkp_req_id);
|
||||
if (lookup_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b (#%0d)\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lookup_match, lkp_req_id);
|
||||
if (release_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-release id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, release_id, rel_req_id);
|
||||
dpi_trace("%d: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID);
|
||||
for (integer i = 0; i < MSHR_SIZE; ++i) begin
|
||||
if (valid_table[i]) begin
|
||||
dpi_trace(" ");
|
||||
if (ready_table[i])
|
||||
dpi_trace("*");
|
||||
dpi_trace("%0d=%0h", i, `LINE_TO_BYTE_ADDR(addr_table[i], BANK_ID));
|
||||
end
|
||||
end
|
||||
dpi_trace("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
323
hw/rtl/cache/VX_nc_bypass.sv
vendored
323
hw/rtl/cache/VX_nc_bypass.sv
vendored
@@ -1,323 +0,0 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_nc_bypass #(
|
||||
parameter NUM_PORTS = 1,
|
||||
parameter NUM_REQS = 1,
|
||||
parameter NUM_RSP_TAGS = 0,
|
||||
parameter NC_TAG_BIT = 0,
|
||||
|
||||
parameter CORE_ADDR_WIDTH = 1,
|
||||
parameter CORE_DATA_SIZE = 1,
|
||||
parameter CORE_TAG_IN_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
parameter MEM_DATA_SIZE = 1,
|
||||
parameter MEM_TAG_IN_WIDTH = 1,
|
||||
parameter MEM_TAG_OUT_WIDTH = 1,
|
||||
|
||||
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
||||
parameter CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
|
||||
parameter MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Core request in
|
||||
input wire [NUM_REQS-1:0] core_req_valid_in,
|
||||
input wire [NUM_REQS-1:0] core_req_rw_in,
|
||||
input wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_in,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_req_tag_in,
|
||||
output wire [NUM_REQS-1:0] core_req_ready_in,
|
||||
|
||||
// Core request out
|
||||
output wire [NUM_REQS-1:0] core_req_valid_out,
|
||||
output wire [NUM_REQS-1:0] core_req_rw_out,
|
||||
output wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_out,
|
||||
output wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_req_tag_out,
|
||||
input wire [NUM_REQS-1:0] core_req_ready_out,
|
||||
|
||||
// Core response in
|
||||
input wire [NUM_RSP_TAGS-1:0] core_rsp_valid_in,
|
||||
input wire [NUM_REQS-1:0] core_rsp_tmask_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in,
|
||||
input wire [NUM_RSP_TAGS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_rsp_tag_in,
|
||||
output wire [NUM_RSP_TAGS-1:0] core_rsp_ready_in,
|
||||
|
||||
// Core response out
|
||||
output wire [NUM_RSP_TAGS-1:0] core_rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0] core_rsp_tmask_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out,
|
||||
output wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out,
|
||||
input wire [NUM_RSP_TAGS-1:0] core_rsp_ready_out,
|
||||
|
||||
// Memory request in
|
||||
input wire mem_req_valid_in,
|
||||
input wire mem_req_rw_in,
|
||||
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
|
||||
input wire [NUM_PORTS-1:0] mem_req_pmask_in,
|
||||
input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in,
|
||||
input wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire mem_req_ready_in,
|
||||
|
||||
// Memory request out
|
||||
output wire mem_req_valid_out,
|
||||
output wire mem_req_rw_out,
|
||||
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
|
||||
output wire [NUM_PORTS-1:0] mem_req_pmask_out,
|
||||
output wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_out,
|
||||
output wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
// Memory response in
|
||||
input wire mem_rsp_valid_in,
|
||||
input wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_in,
|
||||
input wire [MEM_TAG_OUT_WIDTH-1:0] mem_rsp_tag_in,
|
||||
output wire mem_rsp_ready_in,
|
||||
|
||||
// Memory response out
|
||||
output wire mem_rsp_valid_out,
|
||||
output wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_out,
|
||||
output wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_out,
|
||||
input wire mem_rsp_ready_out
|
||||
);
|
||||
`STATIC_ASSERT((NUM_RSP_TAGS == 1 || NUM_RSP_TAGS == NUM_REQS), ("invalid paramter"))
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam CORE_REQ_TIDW = $clog2(NUM_REQS);
|
||||
localparam MUX_DATAW = CORE_TAG_IN_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1;
|
||||
|
||||
localparam CORE_LDATAW = $clog2(CORE_DATA_WIDTH);
|
||||
localparam MEM_LDATAW = $clog2(MEM_DATA_WIDTH);
|
||||
localparam D = MEM_LDATAW - CORE_LDATAW;
|
||||
|
||||
// core request handling
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
||||
wire [NUM_REQS-1:0] core_req_nc_tids;
|
||||
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire core_req_nc_valid;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_nc_tids[i] = core_req_tag_in[i][NC_TAG_BIT];
|
||||
end
|
||||
|
||||
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_tids;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) core_req_sel (
|
||||
.data_in (core_req_valid_in_nc),
|
||||
.index (core_req_nc_tid),
|
||||
.onehot (core_req_nc_sel),
|
||||
.valid_out (core_req_nc_valid)
|
||||
);
|
||||
|
||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
|
||||
assign core_req_rw_out = core_req_rw_in;
|
||||
assign core_req_addr_out = core_req_addr_in;
|
||||
assign core_req_byteen_out = core_req_byteen_in;
|
||||
assign core_req_data_out = core_req_data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_bits_remove #(
|
||||
.N (CORE_TAG_IN_WIDTH),
|
||||
.S (1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) core_req_tag_remove (
|
||||
.data_in (core_req_tag_in[i]),
|
||||
.data_out (core_req_tag_out[i])
|
||||
);
|
||||
end
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ?
|
||||
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
|
||||
end
|
||||
end else begin
|
||||
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
|
||||
end
|
||||
|
||||
// memory request handling
|
||||
|
||||
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
|
||||
assign mem_req_ready_in = mem_req_ready_out;
|
||||
|
||||
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_c;
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_IN_WIDTH),
|
||||
.S (1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_req_tag_insert (
|
||||
.data_in (mem_req_tag_in),
|
||||
.sel_in ('0),
|
||||
.data_out (mem_req_tag_in_c)
|
||||
);
|
||||
|
||||
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
|
||||
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
|
||||
wire core_req_rw_in_sel;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
|
||||
end
|
||||
|
||||
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_tid];
|
||||
end else begin
|
||||
assign core_req_tag_in_sel = core_req_tag_in;
|
||||
assign core_req_data_in_sel = core_req_data_in;
|
||||
assign core_req_byteen_in_sel = core_req_byteen_in;
|
||||
assign core_req_addr_in_sel = core_req_addr_in;
|
||||
assign core_req_rw_in_sel = core_req_rw_in;
|
||||
end
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
|
||||
|
||||
if (D != 0) begin
|
||||
reg [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in_r;
|
||||
reg [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
||||
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = 0;
|
||||
mem_req_byteen_in_r[0] = core_req_byteen_in_sel;
|
||||
|
||||
mem_req_wsel_in_r = 'x;
|
||||
mem_req_wsel_in_r[0] = req_addr_idx;
|
||||
|
||||
mem_req_data_in_r = 'x;
|
||||
mem_req_data_in_r[0] = core_req_data_in_sel;
|
||||
end
|
||||
|
||||
assign mem_req_pmask_out = mem_req_valid_in ? mem_req_pmask_in : NUM_PORTS'(1'b1);
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_wsel_out = mem_req_valid_in ? mem_req_wsel_in : mem_req_wsel_in_r;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_wsel_in)
|
||||
`UNUSED_VAR (mem_req_pmask_in)
|
||||
assign mem_req_pmask_out = 0;
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
|
||||
assign mem_req_wsel_out = 0;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
|
||||
end
|
||||
|
||||
// core response handling
|
||||
|
||||
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_c;
|
||||
|
||||
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
|
||||
|
||||
for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin
|
||||
VX_bits_insert #(
|
||||
.N (CORE_TAG_OUT_WIDTH),
|
||||
.S (1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) core_rsp_tag_insert (
|
||||
.data_in (core_rsp_tag_in[i]),
|
||||
.sel_in ('0),
|
||||
.data_out (core_rsp_tag_out_c[i])
|
||||
);
|
||||
end
|
||||
|
||||
if (NUM_RSP_TAGS > 1) begin
|
||||
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW];
|
||||
reg [NUM_REQS-1:0] rsp_nc_valid_r;
|
||||
always @(*) begin
|
||||
rsp_nc_valid_r = 0;
|
||||
rsp_nc_valid_r[rsp_tid] = is_mem_rsp_nc;
|
||||
end
|
||||
|
||||
assign core_rsp_valid_out = core_rsp_valid_in | rsp_nc_valid_r;
|
||||
assign core_rsp_tmask_out = core_rsp_tmask_in;
|
||||
assign core_rsp_ready_in = core_rsp_ready_out;
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_IN_WIDTH +: D];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ?
|
||||
core_rsp_data_in[i] : mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_c[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
end
|
||||
end else begin
|
||||
assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc;
|
||||
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_c : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
assign core_rsp_ready_in = core_rsp_ready_out;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW];
|
||||
reg [NUM_REQS-1:0] core_rsp_tmask_in_r;
|
||||
always @(*) begin
|
||||
core_rsp_tmask_in_r = 0;
|
||||
core_rsp_tmask_in_r[rsp_tid] = 1;
|
||||
end
|
||||
assign core_rsp_tmask_out = core_rsp_valid_in ? core_rsp_tmask_in : core_rsp_tmask_in_r;
|
||||
end else begin
|
||||
assign core_rsp_tmask_out = core_rsp_tmask_in || is_mem_rsp_nc;
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_IN_WIDTH +: D];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in ?
|
||||
core_rsp_data_in[i] : mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in ? core_rsp_data_in[i] : mem_rsp_data_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// memory response handling
|
||||
|
||||
assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT];
|
||||
assign mem_rsp_data_out = mem_rsp_data_in;
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_IN_WIDTH+1),
|
||||
.S (1),
|
||||
.POS (NC_TAG_BIT)
|
||||
) mem_rsp_tag_remove (
|
||||
.data_in (mem_rsp_tag_in[(MEM_TAG_IN_WIDTH+1)-1:0]),
|
||||
.data_out (mem_rsp_tag_out)
|
||||
);
|
||||
|
||||
if (NUM_RSP_TAGS > 1) begin
|
||||
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW];
|
||||
assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in[rsp_tid] && core_rsp_ready_out[rsp_tid]) : mem_rsp_ready_out;
|
||||
end else begin
|
||||
assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in && core_rsp_ready_out) : mem_rsp_ready_out;
|
||||
end
|
||||
|
||||
endmodule
|
||||
371
hw/rtl/cache/VX_shared_mem.sv
vendored
371
hw/rtl/cache/VX_shared_mem.sv
vendored
@@ -1,371 +0,0 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_shared_mem #(
|
||||
parameter CACHE_ID = 0,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = (1024*16),
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 2,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
// Core Request Queue Size
|
||||
parameter CREQ_SIZE = 2,
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 8,
|
||||
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS),
|
||||
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = `CLOG2(256)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if.master perf_cache_if,
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
input wire [NUM_REQS-1:0] core_req_valid,
|
||||
input wire [NUM_REQS-1:0] core_req_rw,
|
||||
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire [NUM_REQS-1:0] core_req_ready,
|
||||
|
||||
// Core response
|
||||
output wire core_rsp_valid,
|
||||
output wire [NUM_REQS-1:0] core_rsp_tmask,
|
||||
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire core_rsp_ready
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (CORE_TAG_ID_BITS)
|
||||
|
||||
localparam CACHE_LINE_SIZE = WORD_SIZE;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid_unqual;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_rw_unqual;
|
||||
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual;
|
||||
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_unqual;
|
||||
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_unqual;
|
||||
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_unqual;
|
||||
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready_unqual;
|
||||
|
||||
VX_core_req_bank_sel #(
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_LINE_SIZE (WORD_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_PORTS (1),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET)
|
||||
) core_req_bank_sel (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.bank_stalls(perf_cache_if.bank_stalls),
|
||||
`endif
|
||||
.core_req_valid (core_req_valid),
|
||||
.core_req_rw (core_req_rw),
|
||||
.core_req_addr (core_req_addr),
|
||||
.core_req_byteen (core_req_byteen),
|
||||
.core_req_data (core_req_data),
|
||||
.core_req_tag (core_req_tag),
|
||||
.core_req_ready (core_req_ready),
|
||||
.per_bank_core_req_valid (per_bank_core_req_valid_unqual),
|
||||
.per_bank_core_req_tid (per_bank_core_req_tid_unqual),
|
||||
.per_bank_core_req_rw (per_bank_core_req_rw_unqual),
|
||||
.per_bank_core_req_addr (per_bank_core_req_addr_unqual),
|
||||
.per_bank_core_req_byteen(per_bank_core_req_byteen_unqual),
|
||||
.per_bank_core_req_tag (per_bank_core_req_tag_unqual),
|
||||
.per_bank_core_req_data (per_bank_core_req_data_unqual),
|
||||
.per_bank_core_req_ready (per_bank_core_req_ready_unqual),
|
||||
`UNUSED_PIN (per_bank_core_req_pmask),
|
||||
`UNUSED_PIN (per_bank_core_req_wsel)
|
||||
);
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
||||
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
|
||||
|
||||
wire creq_out_valid, creq_out_ready;
|
||||
wire creq_in_valid, creq_in_ready;
|
||||
|
||||
wire creq_in_fire = creq_in_valid && creq_in_ready;
|
||||
`UNUSED_VAR (creq_in_fire)
|
||||
|
||||
wire creq_out_fire = creq_out_valid && creq_out_ready;
|
||||
`UNUSED_VAR (creq_out_fire)
|
||||
|
||||
assign creq_in_valid = (| core_req_valid);
|
||||
assign per_bank_core_req_ready_unqual = {NUM_BANKS{creq_in_ready}};
|
||||
|
||||
wire [NUM_BANKS-1:0] core_req_read_mask, core_req_read_mask_unqual;
|
||||
wire core_req_writeonly, core_req_writeonly_unqual;
|
||||
|
||||
assign core_req_read_mask_unqual = per_bank_core_req_valid_unqual & ~per_bank_core_req_rw_unqual;
|
||||
assign core_req_writeonly_unqual = ~(| core_req_read_mask_unqual);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_BANKS * (1 + 1 + `LINE_ADDR_WIDTH + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS) + NUM_BANKS + 1),
|
||||
.SIZE (CREQ_SIZE),
|
||||
.OUT_REG (1) // output should be registered for the data_store addr port
|
||||
) core_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (creq_in_ready),
|
||||
.valid_in (creq_in_valid),
|
||||
.data_in ({per_bank_core_req_valid_unqual,
|
||||
per_bank_core_req_rw_unqual,
|
||||
per_bank_core_req_addr_unqual,
|
||||
per_bank_core_req_byteen_unqual,
|
||||
per_bank_core_req_data_unqual,
|
||||
per_bank_core_req_tag_unqual,
|
||||
per_bank_core_req_tid_unqual,
|
||||
core_req_read_mask_unqual,
|
||||
core_req_writeonly_unqual}),
|
||||
.data_out ({per_bank_core_req_valid,
|
||||
per_bank_core_req_rw,
|
||||
per_bank_core_req_addr,
|
||||
per_bank_core_req_byteen,
|
||||
per_bank_core_req_data,
|
||||
per_bank_core_req_tag,
|
||||
per_bank_core_req_tid,
|
||||
core_req_read_mask,
|
||||
core_req_writeonly}),
|
||||
.ready_out (creq_out_ready),
|
||||
.valid_out (creq_out_valid)
|
||||
);
|
||||
|
||||
wire crsq_in_valid, crsq_in_ready;
|
||||
wire crsq_last_read;
|
||||
|
||||
assign creq_out_ready = core_req_writeonly
|
||||
|| (crsq_in_ready && crsq_last_read);
|
||||
|
||||
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
|
||||
wire [WORD_SIZE-1:0] wren = per_bank_core_req_byteen[i]
|
||||
& {WORD_SIZE{per_bank_core_req_valid[i]
|
||||
&& per_bank_core_req_rw[i]}};
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] addr = per_bank_core_req_addr[i][`LINE_SELECT_BITS-1:0];
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`WORD_WIDTH),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (WORD_SIZE),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.addr (addr),
|
||||
.wren (wren),
|
||||
.wdata (per_bank_core_req_data[i]),
|
||||
.rdata (per_bank_core_rsp_data[i])
|
||||
);
|
||||
end
|
||||
|
||||
// The core response bus handles a single tag at the time
|
||||
// We first need to select the current tag to process,
|
||||
// then send all bank responses for that tag as a batch
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_valids_in;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
|
||||
wire [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
|
||||
reg [NUM_BANKS-1:0] bank_rsp_sel_r, bank_rsp_sel_n;
|
||||
|
||||
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
|
||||
|
||||
assign crsq_last_read = (bank_rsp_sel_n == core_req_read_mask);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bank_rsp_sel_r <= 0;
|
||||
end else begin
|
||||
if (crsq_in_fire) begin
|
||||
if (crsq_last_read) begin
|
||||
bank_rsp_sel_r <= 0;
|
||||
end else begin
|
||||
bank_rsp_sel_r <= bank_rsp_sel_n;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_BANKS),
|
||||
.DATAW (CORE_TAG_WIDTH)
|
||||
) find_first (
|
||||
.valid_i (core_req_read_mask & ~bank_rsp_sel_r),
|
||||
.data_i (per_bank_core_req_tag),
|
||||
.data_o (core_rsp_tag_in),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valids_in = 0;
|
||||
core_rsp_data_in = 'x;
|
||||
bank_rsp_sel_n = bank_rsp_sel_r;
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (core_req_read_mask[i]
|
||||
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
|
||||
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
|
||||
bank_rsp_sel_n[i] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign crsq_in_valid = creq_out_valid && ~core_req_writeonly;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
|
||||
.SIZE (CRSQ_SIZE)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (crsq_in_valid),
|
||||
.data_in ({core_rsp_valids_in, core_rsp_data_in, core_rsp_tag_in}),
|
||||
.ready_in (crsq_in_ready),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_tmask, core_rsp_data, core_rsp_tag}),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [NUM_BANKS-1:0][`DBG_CACHE_REQ_IDW-1:0] req_id_st0, req_id_st1;
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign req_id_st0[i] = per_bank_core_req_tag_unqual[i][`CACHE_REQ_ID_RNG];
|
||||
assign req_id_st1[i] = per_bank_core_req_tag[i][`CACHE_REQ_ID_RNG];
|
||||
end else begin
|
||||
assign req_id_st0[i] = 0;
|
||||
assign req_id_st1[i] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_BANK
|
||||
|
||||
reg is_multi_tag_req;
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel;
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_BANKS),
|
||||
.DATAW (CORE_TAG_WIDTH)
|
||||
) find_first_d (
|
||||
.valid_i (per_bank_core_req_valid),
|
||||
.data_i (per_bank_core_req_tag),
|
||||
.data_o (core_req_tag_sel),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
is_multi_tag_req = 0;
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid[i]
|
||||
&& (core_req_tag_sel[CORE_TAG_ID_BITS-1:0] != per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
|
||||
is_multi_tag_req = creq_out_valid;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (!crsq_in_ready) begin
|
||||
dpi_trace("%d: *** cache%0d pipeline-stall\n", $time, CACHE_ID);
|
||||
end
|
||||
if (is_multi_tag_req) begin
|
||||
dpi_trace("%d: *** cache%0d multi-tag request!\n", $time, CACHE_ID);
|
||||
end
|
||||
if (creq_in_fire) begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid_unqual[i]) begin
|
||||
if (per_bank_core_req_rw_unqual[i]) begin
|
||||
dpi_trace("%d: smem%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h (#%0d)\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i], req_id_st0[i]);
|
||||
end else begin
|
||||
dpi_trace("%d: smem%0d:%0d core-rd-req: addr=%0h, tag=%0h (#%0d)\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], req_id_st0[i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if (creq_out_fire) begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid[i]) begin
|
||||
if (per_bank_core_req_rw[i]) begin
|
||||
dpi_trace("%d: smem%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, data=%0h (#%0d)\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_data[i], req_id_st1[i]);
|
||||
end else begin
|
||||
dpi_trace("%d: smem%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, data=%0h (#%0d)\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_rsp_data[i], req_id_st1[i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_reads, core_writes
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
|
||||
wire perf_crsp_stall_per_cycle = core_rsp_valid & ~core_rsp_ready;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_core_reads <= 0;
|
||||
perf_core_writes <= 0;
|
||||
perf_crsp_stalls <= 0;
|
||||
end else begin
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_cache_if.reads = perf_core_reads;
|
||||
assign perf_cache_if.writes = perf_core_writes;
|
||||
assign perf_cache_if.read_misses = '0;
|
||||
assign perf_cache_if.write_misses = '0;
|
||||
assign perf_cache_if.mshr_stalls = '0;
|
||||
assign perf_cache_if.mem_stalls = '0;
|
||||
assign perf_cache_if.crsp_stalls = perf_crsp_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
79
hw/rtl/cache/VX_tag_access.sv
vendored
79
hw/rtl/cache/VX_tag_access.sv
vendored
@@ -1,79 +0,0 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_tag_access #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter CACHE_LINE_SIZE = 1,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] req_id,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
input wire stall,
|
||||
|
||||
// read/fill
|
||||
input wire lookup,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] addr,
|
||||
input wire fill,
|
||||
input wire flush,
|
||||
output wire tag_match
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
wire [`TAG_SELECT_BITS-1:0] read_tag;
|
||||
wire read_valid;
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0];
|
||||
wire [`TAG_SELECT_BITS-1:0] line_tag = `LINE_TAG_ADDR(addr);
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`TAG_SELECT_BITS + 1),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.NO_RWCHECK (1)
|
||||
) tag_store (
|
||||
.clk( clk),
|
||||
.addr (line_addr),
|
||||
.wren (fill || flush),
|
||||
.wdata ({!flush, line_tag}),
|
||||
.rdata ({read_valid, read_tag})
|
||||
);
|
||||
|
||||
assign tag_match = read_valid && (line_tag == read_tag);
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_TAG
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag);
|
||||
end
|
||||
if (flush) begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-flush: addr=%0h, blk_addr=%0d\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr);
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_match) begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, blk_addr=%0d, tag_id=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, req_id);
|
||||
end else begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, read_tag, req_id);
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
172
hw/rtl/core/VX_alu_unit.sv
Normal file
172
hw/rtl/core/VX_alu_unit.sv
Normal file
@@ -0,0 +1,172 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS]
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) execute_if[BLOCK_SIZE]();
|
||||
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 0)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.execute_if (execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_block_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
wire is_muldiv_op;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_execute_if();
|
||||
|
||||
assign int_execute_if.valid = execute_if[block_idx].valid && ~is_muldiv_op;
|
||||
assign int_execute_if.data = execute_if[block_idx].data;
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_commit_if();
|
||||
|
||||
`RESET_RELAY (int_reset, reset);
|
||||
|
||||
VX_int_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.BLOCK_IDX (block_idx),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_unit (
|
||||
.clk (clk),
|
||||
.reset (int_reset),
|
||||
.execute_if (int_execute_if),
|
||||
.branch_ctl_if (branch_ctl_if[block_idx]),
|
||||
.commit_if (int_commit_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
assign is_muldiv_op = `INST_ALU_IS_M(execute_if[block_idx].data.op_mod);
|
||||
|
||||
`RESET_RELAY (mdv_reset, reset);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_execute_if();
|
||||
|
||||
assign mdv_execute_if.valid = execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign mdv_execute_if.data = execute_if[block_idx].data;
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_commit_if();
|
||||
|
||||
VX_muldiv_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_unit (
|
||||
.clk (clk),
|
||||
.reset (mdv_reset),
|
||||
.execute_if (mdv_execute_if),
|
||||
.commit_if (mdv_commit_if)
|
||||
);
|
||||
|
||||
assign execute_if[block_idx].ready = is_muldiv_op ? mdv_execute_if.ready : int_execute_if.ready;
|
||||
|
||||
`else
|
||||
|
||||
assign is_muldiv_op = 0;
|
||||
assign execute_if[block_idx].ready = int_execute_if.ready;
|
||||
|
||||
`endif
|
||||
|
||||
// send response
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_REG (PARTIAL_BW ? 1 : 3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.valid,
|
||||
`endif
|
||||
int_commit_if.valid
|
||||
}),
|
||||
.ready_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.ready,
|
||||
`endif
|
||||
int_commit_if.ready
|
||||
}),
|
||||
.data_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.data,
|
||||
`endif
|
||||
int_commit_if.data
|
||||
}),
|
||||
.data_out (commit_block_if[block_idx].data),
|
||||
.valid_out (commit_block_if[block_idx].valid),
|
||||
.ready_out (commit_block_if[block_idx].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (PARTIAL_BW ? 3 : 0)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.commit_in_if (commit_block_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
endmodule
|
||||
226
hw/rtl/core/VX_commit.sv
Normal file
226
hw/rtl/core/VX_commit.sv
Normal file
@@ -0,0 +1,226 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave alu_commit_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.slave lsu_commit_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.slave fpu_commit_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_commit_if.slave sfu_commit_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_writeback_if.master writeback_if [`ISSUE_WIDTH],
|
||||
VX_commit_csr_if.master commit_csr_if,
|
||||
VX_commit_sched_if.master commit_sched_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
||||
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
|
||||
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
|
||||
|
||||
// commit arbitration
|
||||
|
||||
VX_commit_if commit_if[`ISSUE_WIDTH]();
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] commit_fire;
|
||||
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] commit_wid;
|
||||
wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] commit_tmask;
|
||||
wire [`ISSUE_WIDTH-1:0] commit_eop;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (`NUM_EX_UNITS),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_REG (1)
|
||||
) commit_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.valid_in ({
|
||||
sfu_commit_if[i].valid,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if[i].valid,
|
||||
`endif
|
||||
alu_commit_if[i].valid,
|
||||
lsu_commit_if[i].valid
|
||||
}),
|
||||
.ready_in ({
|
||||
sfu_commit_if[i].ready,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if[i].ready,
|
||||
`endif
|
||||
alu_commit_if[i].ready,
|
||||
lsu_commit_if[i].ready
|
||||
}),
|
||||
.data_in ({
|
||||
sfu_commit_if[i].data,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if[i].data,
|
||||
`endif
|
||||
alu_commit_if[i].data,
|
||||
lsu_commit_if[i].data
|
||||
}),
|
||||
.data_out (commit_if[i].data),
|
||||
.valid_out (commit_if[i].valid),
|
||||
.ready_out (commit_if[i].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
assign commit_fire[i] = commit_if[i].valid && commit_if[i].ready;
|
||||
assign commit_tmask[i] = {`NUM_THREADS{commit_fire[i]}} & commit_if[i].data.tmask;
|
||||
assign commit_wid[i] = commit_if[i].data.wid;
|
||||
assign commit_eop[i] = commit_if[i].data.eop;
|
||||
end
|
||||
|
||||
// CSRs update
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0][COMMIT_SIZEW-1:0] commit_size, commit_size_r;
|
||||
wire [COMMIT_ALL_SIZEW-1:0] commit_size_all, commit_size_all_r;
|
||||
wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr;
|
||||
|
||||
assign commit_fire_any = (| commit_fire);
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [COMMIT_SIZEW-1:0] pop_count;
|
||||
`POP_COUNT(pop_count, commit_tmask[i]);
|
||||
assign commit_size[i] = pop_count;
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `ISSUE_WIDTH * COMMIT_SIZEW),
|
||||
.RESETW (1)
|
||||
) commit_size_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire_any, commit_size}),
|
||||
.data_out ({commit_fire_any_r, commit_size_r})
|
||||
);
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (COMMIT_SIZEW),
|
||||
.DATAW_OUT (COMMIT_ALL_SIZEW),
|
||||
.N (`ISSUE_WIDTH),
|
||||
.OP ("+")
|
||||
) commit_size_reduce (
|
||||
.data_in (commit_size_r),
|
||||
.data_out (commit_size_all)
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + COMMIT_ALL_SIZEW),
|
||||
.RESETW (1)
|
||||
) commit_size_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire_any_r, commit_size_all}),
|
||||
.data_out ({commit_fire_any_rr, commit_size_all_r})
|
||||
);
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] instret;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
instret <= '0;
|
||||
end else begin
|
||||
if (commit_fire_any_rr) begin
|
||||
instret <= instret + `PERF_CTR_BITS'(commit_size_all_r);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign commit_csr_if.instret = instret;
|
||||
|
||||
// Committed instructions
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (`ISSUE_WIDTH * (1 + `NW_WIDTH)),
|
||||
.RESETW (`ISSUE_WIDTH)
|
||||
) committed_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({(commit_fire & commit_eop), commit_wid}),
|
||||
.data_out ({commit_sched_if.committed, commit_sched_if.committed_wid})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign writeback_if[i].valid = commit_if[i].valid && commit_if[i].data.wb;
|
||||
assign writeback_if[i].data.uuid = commit_if[i].data.uuid;
|
||||
assign writeback_if[i].data.wis = wid_to_wis(commit_if[i].data.wid);
|
||||
assign writeback_if[i].data.PC = commit_if[i].data.PC;
|
||||
assign writeback_if[i].data.tmask = commit_if[i].data.tmask;
|
||||
assign writeback_if[i].data.rd = commit_if[i].data.rd;
|
||||
assign writeback_if[i].data.data = commit_if[i].data.data;
|
||||
assign writeback_if[i].data.sop = commit_if[i].data.sop;
|
||||
assign writeback_if[i].data.eop = commit_if[i].data.eop;
|
||||
assign commit_if[i].ready = 1'b1;
|
||||
end
|
||||
|
||||
// simulation helper signal to get RISC-V tests Pass/Fail status
|
||||
reg [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value_r;
|
||||
always @(posedge clk) begin
|
||||
if (writeback_if[0].valid) begin
|
||||
sim_wb_value_r[writeback_if[0].data.rd] <= writeback_if[0].data.data[0];
|
||||
end
|
||||
end
|
||||
assign sim_wb_value = sim_wb_value_r;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (alu_commit_if[i].valid && alu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, alu_commit_if[i].data.wid, alu_commit_if[i].data.PC, alu_commit_if[i].data.tmask, alu_commit_if[i].data.wb, alu_commit_if[i].data.rd, alu_commit_if[i].data.sop, alu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, alu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", alu_commit_if[i].data.uuid));
|
||||
end
|
||||
if (lsu_commit_if[i].valid && lsu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, lsu_commit_if[i].data.wid, lsu_commit_if[i].data.PC, lsu_commit_if[i].data.tmask, lsu_commit_if[i].data.wb, lsu_commit_if[i].data.rd, lsu_commit_if[i].data.sop, lsu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, lsu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", lsu_commit_if[i].data.uuid));
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_commit_if[i].valid && fpu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, fpu_commit_if[i].data.wid, fpu_commit_if[i].data.PC, fpu_commit_if[i].data.tmask, fpu_commit_if[i].data.wb, fpu_commit_if[i].data.rd, fpu_commit_if[i].data.sop, fpu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, fpu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", fpu_commit_if[i].data.uuid));
|
||||
end
|
||||
`endif
|
||||
if (sfu_commit_if[i].valid && sfu_commit_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=SFU, tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", $time, CORE_ID, sfu_commit_if[i].data.wid, sfu_commit_if[i].data.PC, sfu_commit_if[i].data.tmask, sfu_commit_if[i].data.wb, sfu_commit_if[i].data.rd, sfu_commit_if[i].data.sop, sfu_commit_if[i].data.eop));
|
||||
`TRACE_ARRAY1D(1, sfu_commit_if[i].data.data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", sfu_commit_if[i].data.uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
469
hw/rtl/core/VX_core.sv
Normal file
469
hw/rtl/core/VX_core.sv
Normal file
@@ -0,0 +1,469 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`include "VX_fpu_define.vh"
|
||||
`endif
|
||||
|
||||
module VX_core import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
`endif
|
||||
|
||||
VX_dcr_bus_if.slave dcr_bus_if,
|
||||
|
||||
VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS],
|
||||
|
||||
VX_mem_bus_if.master icache_bus_if,
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if.master gbar_bus_if,
|
||||
`endif
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
VX_schedule_if schedule_if();
|
||||
VX_fetch_if fetch_if();
|
||||
VX_decode_if decode_if();
|
||||
VX_sched_csr_if sched_csr_if();
|
||||
VX_decode_sched_if decode_sched_if();
|
||||
VX_commit_sched_if commit_sched_if();
|
||||
VX_commit_csr_if commit_csr_if();
|
||||
VX_branch_ctl_if branch_ctl_if[`NUM_ALU_BLOCKS]();
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
|
||||
VX_dispatch_if alu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if alu_commit_if[`ISSUE_WIDTH]();
|
||||
|
||||
VX_dispatch_if lsu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if lsu_commit_if[`ISSUE_WIDTH]();
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if fpu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if fpu_commit_if[`ISSUE_WIDTH]();
|
||||
`endif
|
||||
VX_dispatch_if sfu_dispatch_if[`ISSUE_WIDTH]();
|
||||
VX_commit_if sfu_commit_if[`ISSUE_WIDTH]();
|
||||
|
||||
VX_writeback_if writeback_if[`ISSUE_WIDTH]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_bus_tmp_if[DCACHE_NUM_REQS]();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_tmp_if();
|
||||
VX_pipeline_perf_if pipeline_perf_if();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (dcr_data_reset, reset);
|
||||
`RESET_RELAY (schedule_reset, reset);
|
||||
`RESET_RELAY (fetch_reset, reset);
|
||||
`RESET_RELAY (decode_reset, reset);
|
||||
`RESET_RELAY (issue_reset, reset);
|
||||
`RESET_RELAY (execute_reset, reset);
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
base_dcrs_t base_dcrs;
|
||||
|
||||
VX_dcr_data dcr_data (
|
||||
.clk (clk),
|
||||
.reset (dcr_data_reset),
|
||||
.dcr_bus_if (dcr_bus_if),
|
||||
.base_dcrs (base_dcrs)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (3)
|
||||
|
||||
VX_schedule #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) schedule (
|
||||
.clk (clk),
|
||||
.reset (schedule_reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.decode_sched_if(decode_sched_if),
|
||||
.commit_sched_if(commit_sched_if),
|
||||
|
||||
.schedule_if (schedule_if),
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (gbar_bus_if),
|
||||
`endif
|
||||
.sched_csr_if (sched_csr_if),
|
||||
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) fetch (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (fetch_reset),
|
||||
.icache_bus_if (icache_bus_if),
|
||||
.schedule_if (schedule_if),
|
||||
.fetch_if (fetch_if)
|
||||
);
|
||||
|
||||
VX_decode #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (decode_reset),
|
||||
.fetch_if (fetch_if),
|
||||
.decode_if (decode_if),
|
||||
.decode_sched_if(decode_sched_if)
|
||||
);
|
||||
|
||||
VX_issue #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
.clk (clk),
|
||||
.reset (issue_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_issue_if (pipeline_perf_if.issue),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
.alu_dispatch_if(alu_dispatch_if),
|
||||
.lsu_dispatch_if(lsu_dispatch_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_dispatch_if(fpu_dispatch_if),
|
||||
`endif
|
||||
.sfu_dispatch_if(sfu_dispatch_if)
|
||||
);
|
||||
|
||||
VX_execute #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) execute (
|
||||
`SCOPE_IO_BIND (2)
|
||||
|
||||
.clk (clk),
|
||||
.reset (execute_reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_tmp_if),
|
||||
.pipeline_perf_if(pipeline_perf_if),
|
||||
`endif
|
||||
|
||||
.dcache_bus_if (dcache_bus_tmp_if),
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_dispatch_if(fpu_dispatch_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.sched_csr_if (sched_csr_if),
|
||||
|
||||
.alu_dispatch_if(alu_dispatch_if),
|
||||
.lsu_dispatch_if(lsu_dispatch_if),
|
||||
.sfu_dispatch_if(sfu_dispatch_if),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.sfu_commit_if (sfu_commit_if),
|
||||
|
||||
.sim_ebreak (sim_ebreak)
|
||||
);
|
||||
|
||||
VX_commit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) commit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.sfu_commit_if (sfu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_sched_if(commit_sched_if),
|
||||
|
||||
.sim_wb_value (sim_wb_value)
|
||||
);
|
||||
|
||||
VX_smem_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) smem_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_in_if (mem_perf_if),
|
||||
.mem_perf_out_if (mem_perf_tmp_if),
|
||||
`endif
|
||||
.dcache_bus_in_if (dcache_bus_tmp_if),
|
||||
.dcache_bus_out_if (dcache_bus_if)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_wr_req_per_cycle;
|
||||
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rsp_per_cycle;
|
||||
|
||||
wire perf_icache_pending_read_cycle;
|
||||
wire [`CLOG2(DCACHE_NUM_REQS+1)+1-1:0] perf_dcache_pending_read_cycle;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_icache_pending_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dcache_pending_reads;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ifetches;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_loads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_stores;
|
||||
|
||||
wire perf_icache_req_fire = icache_bus_if.req_valid & icache_bus_if.req_ready;
|
||||
wire perf_icache_rsp_fire = icache_bus_if.rsp_valid & icache_bus_if.rsp_ready;
|
||||
|
||||
wire [DCACHE_NUM_REQS-1:0] perf_dcache_rd_req_fire, perf_dcache_wr_req_fire, perf_dcache_rsp_fire;
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
assign perf_dcache_rd_req_fire[i] = dcache_bus_if[i].req_valid && ~dcache_bus_if[i].req_data.rw && dcache_bus_if[i].req_ready;
|
||||
assign perf_dcache_wr_req_fire[i] = dcache_bus_if[i].req_valid && dcache_bus_if[i].req_data.rw && dcache_bus_if[i].req_ready;
|
||||
assign perf_dcache_rsp_fire[i] = dcache_bus_if[i].rsp_valid && dcache_bus_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`POP_COUNT(perf_dcache_rd_req_per_cycle, perf_dcache_rd_req_fire);
|
||||
`POP_COUNT(perf_dcache_wr_req_per_cycle, perf_dcache_wr_req_fire);
|
||||
`POP_COUNT(perf_dcache_rsp_per_cycle, perf_dcache_rsp_fire);
|
||||
|
||||
assign perf_icache_pending_read_cycle = perf_icache_req_fire - perf_icache_rsp_fire;
|
||||
assign perf_dcache_pending_read_cycle = perf_dcache_rd_req_per_cycle - perf_dcache_rsp_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_icache_pending_reads <= '0;
|
||||
perf_dcache_pending_reads <= '0;
|
||||
end else begin
|
||||
perf_icache_pending_reads <= $signed(perf_icache_pending_reads) + `PERF_CTR_BITS'($signed(perf_icache_pending_read_cycle));
|
||||
perf_dcache_pending_reads <= $signed(perf_dcache_pending_reads) + `PERF_CTR_BITS'($signed(perf_dcache_pending_read_cycle));
|
||||
end
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_icache_lat;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dcache_lat;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ifetches <= '0;
|
||||
perf_loads <= '0;
|
||||
perf_stores <= '0;
|
||||
perf_icache_lat <= '0;
|
||||
perf_dcache_lat <= '0;
|
||||
end else begin
|
||||
perf_ifetches <= perf_ifetches + `PERF_CTR_BITS'(perf_icache_req_fire);
|
||||
perf_loads <= perf_loads + `PERF_CTR_BITS'(perf_dcache_rd_req_per_cycle);
|
||||
perf_stores <= perf_stores + `PERF_CTR_BITS'(perf_dcache_wr_req_per_cycle);
|
||||
perf_icache_lat <= perf_icache_lat + perf_icache_pending_reads;
|
||||
perf_dcache_lat <= perf_dcache_lat + perf_dcache_pending_reads;
|
||||
end
|
||||
end
|
||||
|
||||
assign pipeline_perf_if.ifetches = perf_ifetches;
|
||||
assign pipeline_perf_if.loads = perf_loads;
|
||||
assign pipeline_perf_if.stores = perf_stores;
|
||||
assign pipeline_perf_if.load_latency = perf_dcache_lat;
|
||||
assign pipeline_perf_if.ifetch_latency = perf_icache_lat;
|
||||
assign pipeline_perf_if.load_latency = perf_dcache_lat;
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module VX_core_top
|
||||
import VX_gpu_pkg::*;
|
||||
#(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire dcr_write_valid,
|
||||
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_write_addr,
|
||||
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_write_data,
|
||||
|
||||
output wire [DCACHE_NUM_REQS-1:0] dcache_req_valid,
|
||||
output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready,
|
||||
|
||||
input wire [DCACHE_NUM_REQS-1:0] dcache_rsp_valid,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_rsp_data,
|
||||
input wire [DCACHE_NUM_REQS-1:0][DCACHE_NOSM_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
output wire [DCACHE_NUM_REQS-1:0] dcache_rsp_ready,
|
||||
|
||||
output wire icache_req_valid,
|
||||
output wire icache_req_rw,
|
||||
output wire [ICACHE_WORD_SIZE-1:0] icache_req_byteen,
|
||||
output wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr,
|
||||
output wire [ICACHE_WORD_SIZE*8-1:0] icache_req_data,
|
||||
output wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag,
|
||||
input wire icache_req_ready,
|
||||
|
||||
input wire icache_rsp_valid,
|
||||
input wire [ICACHE_WORD_SIZE*8-1:0] icache_rsp_data,
|
||||
input wire [ICACHE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
output wire icache_rsp_ready,
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
output wire gbar_req_valid,
|
||||
output wire [`NB_WIDTH-1:0] gbar_req_id,
|
||||
output wire [`NC_WIDTH-1:0] gbar_req_size_m1,
|
||||
output wire [`NC_WIDTH-1:0] gbar_req_core_id,
|
||||
input wire gbar_req_ready,
|
||||
input wire gbar_rsp_valid,
|
||||
input wire [`NB_WIDTH-1:0] gbar_rsp_id,
|
||||
`endif
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak,
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if gbar_bus_if();
|
||||
|
||||
assign gbar_req_valid = gbar_bus_if.req_valid;
|
||||
assign gbar_req_id = gbar_bus_if.req_id;
|
||||
assign gbar_req_size_m1 = gbar_bus_if.req_size_m1;
|
||||
assign gbar_req_core_id = gbar_bus_if.req_core_id;
|
||||
assign gbar_bus_if.req_ready = gbar_req_ready;
|
||||
assign gbar_bus_if.rsp_valid = gbar_rsp_valid;
|
||||
assign gbar_bus_if.rsp_id = gbar_rsp_id;
|
||||
`endif
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
|
||||
assign dcr_bus_if.write_valid = dcr_write_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_write_addr;
|
||||
assign dcr_bus_if.write_data = dcr_write_data;
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
|
||||
) dcache_bus_if[DCACHE_NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
assign dcache_req_valid[i] = dcache_bus_if[i].req_valid;
|
||||
assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw;
|
||||
assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen;
|
||||
assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr;
|
||||
assign dcache_req_data[i] = dcache_bus_if[i].req_data.data;
|
||||
assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag;
|
||||
assign dcache_bus_if[i].req_ready = dcache_req_ready[i];
|
||||
|
||||
assign dcache_bus_if[i].rsp_valid = dcache_rsp_valid[i];
|
||||
assign dcache_bus_if[i].rsp_data.tag = dcache_rsp_tag[i];
|
||||
assign dcache_bus_if[i].rsp_data.data = dcache_rsp_data[i];
|
||||
assign dcache_rsp_ready[i] = dcache_bus_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (ICACHE_TAG_WIDTH)
|
||||
) icache_bus_if();
|
||||
|
||||
assign icache_req_valid = icache_bus_if.req_valid;
|
||||
assign icache_req_rw = icache_bus_if.req_data.rw;
|
||||
assign icache_req_byteen = icache_bus_if.req_data.byteen;
|
||||
assign icache_req_addr = icache_bus_if.req_data.addr;
|
||||
assign icache_req_data = icache_bus_if.req_data.data;
|
||||
assign icache_req_tag = icache_bus_if.req_data.tag;
|
||||
assign icache_bus_if.req_ready = icache_req_ready;
|
||||
|
||||
assign icache_bus_if.rsp_valid = icache_rsp_valid;
|
||||
assign icache_bus_if.rsp_data.tag = icache_rsp_tag;
|
||||
assign icache_bus_if.rsp_data.data = icache_rsp_data;
|
||||
assign icache_rsp_ready = icache_bus_if.rsp_ready;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
`endif
|
||||
|
||||
`ifdef SCOPE
|
||||
wire [0:0] scope_reset_w = 1'b0;
|
||||
wire [0:0] scope_bus_in_w = 1'b0;
|
||||
wire [0:0] scope_bus_out_w;
|
||||
`UNUSED_VAR (scope_bus_out_w)
|
||||
`endif
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID (0)
|
||||
) core (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
.dcr_bus_if (dcr_bus_if),
|
||||
|
||||
.dcache_bus_if (dcache_bus_if),
|
||||
|
||||
.icache_bus_if (icache_bus_if),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (gbar_bus_if),
|
||||
`endif
|
||||
|
||||
.sim_ebreak (sim_ebreak),
|
||||
.sim_wb_value (sim_wb_value),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
endmodule
|
||||
304
hw/rtl/core/VX_csr_data.sv
Normal file
304
hw/rtl/core/VX_csr_data.sv
Normal file
@@ -0,0 +1,304 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`include "VX_fpu_define.vh"
|
||||
`endif
|
||||
|
||||
module VX_csr_data
|
||||
import VX_gpu_pkg::*;
|
||||
`ifdef EXT_F_ENABLE
|
||||
import VX_fpu_pkg::*;
|
||||
`endif
|
||||
#(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
VX_sfu_perf_if.slave sfu_perf_if,
|
||||
`endif
|
||||
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if [`NUM_FPU_BLOCKS],
|
||||
`endif
|
||||
|
||||
input wire [`PERF_CTR_BITS-1:0] cycles,
|
||||
input wire [`NUM_WARPS-1:0] active_warps,
|
||||
input wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks,
|
||||
|
||||
input wire read_enable,
|
||||
input wire [`UUID_WIDTH-1:0] read_uuid,
|
||||
input wire [`NW_WIDTH-1:0] read_wid,
|
||||
input wire [`VX_CSR_ADDR_BITS-1:0] read_addr,
|
||||
output wire [31:0] read_data_ro,
|
||||
output wire [31:0] read_data_rw,
|
||||
|
||||
input wire write_enable,
|
||||
input wire [`UUID_WIDTH-1:0] write_uuid,
|
||||
input wire [`NW_WIDTH-1:0] write_wid,
|
||||
input wire [`VX_CSR_ADDR_BITS-1:0] write_addr,
|
||||
input wire [31:0] write_data
|
||||
);
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (write_wid)
|
||||
`UNUSED_VAR (write_data)
|
||||
|
||||
// CSRs Write /////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FP_FLAGS_BITS-1:0] fcsr, fcsr_n;
|
||||
wire [`NUM_FPU_BLOCKS-1:0] fpu_write_enable;
|
||||
wire [`NUM_FPU_BLOCKS-1:0][`NW_WIDTH-1:0] fpu_write_wid;
|
||||
fflags_t [`NUM_FPU_BLOCKS-1:0] fpu_write_fflags;
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
assign fpu_write_enable[i] = fpu_to_csr_if[i].write_enable;
|
||||
assign fpu_write_wid[i] = fpu_to_csr_if[i].write_wid;
|
||||
assign fpu_write_fflags[i] = fpu_to_csr_if[i].write_fflags;
|
||||
end
|
||||
always @(*) begin
|
||||
fcsr_n = fcsr;
|
||||
for (integer i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
if (fpu_write_enable[i]) begin
|
||||
fcsr_n[fpu_write_wid[i]][`FP_FLAGS_BITS-1:0] = fcsr[fpu_write_wid[i]][`FP_FLAGS_BITS-1:0]
|
||||
| fpu_write_fflags[i];
|
||||
end
|
||||
end
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`VX_CSR_FFLAGS: fcsr_n[write_wid][`FP_FLAGS_BITS-1:0] = write_data[`FP_FLAGS_BITS-1:0];
|
||||
`VX_CSR_FRM: fcsr_n[write_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS] = write_data[`INST_FRM_BITS-1:0];
|
||||
`VX_CSR_FCSR: fcsr_n[write_wid] = write_data[`FP_FLAGS_BITS+`INST_FRM_BITS-1:0];
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
assign fpu_to_csr_if[i].read_frm = fcsr[fpu_to_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
fcsr <= '0;
|
||||
end else begin
|
||||
fcsr <= fcsr_n;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_FFLAGS,
|
||||
`VX_CSR_FRM,
|
||||
`VX_CSR_FCSR,
|
||||
`endif
|
||||
`VX_CSR_SATP,
|
||||
`VX_CSR_MSTATUS,
|
||||
`VX_CSR_MNSTATUS,
|
||||
`VX_CSR_MEDELEG,
|
||||
`VX_CSR_MIDELEG,
|
||||
`VX_CSR_MIE,
|
||||
`VX_CSR_MTVEC,
|
||||
`VX_CSR_MEPC,
|
||||
`VX_CSR_PMPCFG0,
|
||||
`VX_CSR_PMPADDR0: /* do nothing!*/;
|
||||
default: begin
|
||||
`ASSERT(0, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// CSRs read //////////////////////////////////////////////////////////////
|
||||
|
||||
reg [31:0] read_data_ro_r;
|
||||
reg [31:0] read_data_rw_r;
|
||||
reg read_addr_valid_r;
|
||||
|
||||
always @(*) begin
|
||||
read_data_ro_r = '0;
|
||||
read_data_rw_r = '0;
|
||||
read_addr_valid_r = 1;
|
||||
case (read_addr)
|
||||
`VX_CSR_MVENDORID : read_data_ro_r = 32'(`VENDOR_ID);
|
||||
`VX_CSR_MARCHID : read_data_ro_r = 32'(`ARCHITECTURE_ID);
|
||||
`VX_CSR_MIMPID : read_data_ro_r = 32'(`IMPLEMENTATION_ID);
|
||||
`VX_CSR_MISA : read_data_ro_r = (((`CLOG2(`XLEN)-4) << (`XLEN-2)) | `MISA_STD);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_FFLAGS : read_data_rw_r = 32'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`VX_CSR_FRM : read_data_rw_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`VX_CSR_FCSR : read_data_rw_r = 32'(fcsr[read_wid]);
|
||||
`endif
|
||||
`VX_CSR_WARP_ID : read_data_ro_r = 32'(read_wid);
|
||||
`VX_CSR_CORE_ID : read_data_ro_r = 32'(CORE_ID);
|
||||
`VX_CSR_THREAD_MASK: read_data_ro_r = 32'(thread_masks[read_wid]);
|
||||
`VX_CSR_WARP_MASK : read_data_ro_r = 32'(active_warps);
|
||||
`VX_CSR_NUM_THREADS: read_data_ro_r = 32'(`NUM_THREADS);
|
||||
`VX_CSR_NUM_WARPS : read_data_ro_r = 32'(`NUM_WARPS);
|
||||
`VX_CSR_NUM_CORES : read_data_ro_r = 32'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
`VX_CSR_MCYCLE : read_data_ro_r = 32'(cycles[31:0]);
|
||||
`VX_CSR_MCYCLE_H : read_data_ro_r = 32'(cycles[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_RESERVED : read_data_ro_r = 'x;
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
|
||||
`VX_CSR_MINSTRET : read_data_ro_r = 32'(commit_csr_if.instret[31:0]);
|
||||
`VX_CSR_MINSTRET_H : read_data_ro_r = 32'(commit_csr_if.instret[`PERF_CTR_BITS-1:32]);
|
||||
|
||||
`VX_CSR_SATP,
|
||||
`VX_CSR_MSTATUS,
|
||||
`VX_CSR_MNSTATUS,
|
||||
`VX_CSR_MEDELEG,
|
||||
`VX_CSR_MIDELEG,
|
||||
`VX_CSR_MIE,
|
||||
`VX_CSR_MTVEC,
|
||||
`VX_CSR_MEPC,
|
||||
`VX_CSR_PMPCFG0,
|
||||
`VX_CSR_PMPADDR0 : read_data_ro_r = 32'(0);
|
||||
|
||||
default: begin
|
||||
read_addr_valid_r = 0;
|
||||
if ((read_addr >= `VX_CSR_MPM_BASE && read_addr < (`VX_CSR_MPM_BASE + 32))
|
||||
|| (read_addr >= `VX_CSR_MPM_BASE_H && read_addr < (`VX_CSR_MPM_BASE_H + 32))) begin
|
||||
read_addr_valid_r = 1;
|
||||
`ifdef PERF_ENABLE
|
||||
case (base_dcrs.mpm_class)
|
||||
`VX_DCR_MPM_CLASS_CORE: begin
|
||||
case (read_addr)
|
||||
// PERF: pipeline
|
||||
`VX_CSR_MPM_IBUF_ST : read_data_ro_r = pipeline_perf_if.ibf_stalls[31:0];
|
||||
`VX_CSR_MPM_IBUF_ST_H : read_data_ro_r = 32'(pipeline_perf_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SCRB_ST : read_data_ro_r = pipeline_perf_if.scb_stalls[31:0];
|
||||
`VX_CSR_MPM_SCRB_ST_H : read_data_ro_r = 32'(pipeline_perf_if.scb_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_ALU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_ALU][31:0];
|
||||
`VX_CSR_MPM_ALU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_ALU][`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_LSU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_LSU][31:0];
|
||||
`VX_CSR_MPM_LSU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_LSU][`PERF_CTR_BITS-1:32]);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_MPM_FPU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_FPU][31:0];
|
||||
`VX_CSR_MPM_FPU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_FPU][`PERF_CTR_BITS-1:32]);
|
||||
`else
|
||||
`VX_CSR_MPM_FPU_ST : read_data_ro_r = '0;
|
||||
`VX_CSR_MPM_FPU_ST_H : read_data_ro_r = '0;
|
||||
`endif
|
||||
`VX_CSR_MPM_SFU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_SFU][31:0];
|
||||
`VX_CSR_MPM_SFU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_SFU][`PERF_CTR_BITS-1:32]);
|
||||
// PERF: memory
|
||||
`VX_CSR_MPM_IFETCHES : read_data_ro_r = pipeline_perf_if.ifetches[31:0];
|
||||
`VX_CSR_MPM_IFETCHES_H : read_data_ro_r = 32'(pipeline_perf_if.ifetches[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_LOADS : read_data_ro_r = pipeline_perf_if.loads[31:0];
|
||||
`VX_CSR_MPM_LOADS_H : read_data_ro_r = 32'(pipeline_perf_if.loads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_STORES : read_data_ro_r = pipeline_perf_if.stores[31:0];
|
||||
`VX_CSR_MPM_STORES_H : read_data_ro_r = 32'(pipeline_perf_if.stores[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_IFETCH_LAT : read_data_ro_r = pipeline_perf_if.ifetch_latency[31:0];
|
||||
`VX_CSR_MPM_IFETCH_LAT_H : read_data_ro_r = 32'(pipeline_perf_if.ifetch_latency[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_LOAD_LAT : read_data_ro_r = pipeline_perf_if.load_latency[31:0];
|
||||
`VX_CSR_MPM_LOAD_LAT_H : read_data_ro_r = 32'(pipeline_perf_if.load_latency[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`VX_DCR_MPM_CLASS_MEM: begin
|
||||
case (read_addr)
|
||||
// PERF: icache
|
||||
`VX_CSR_MPM_ICACHE_READS : read_data_ro_r = mem_perf_if.icache_reads[31:0];
|
||||
`VX_CSR_MPM_ICACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.icache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_ICACHE_MISS_R : read_data_ro_r = mem_perf_if.icache_read_misses[31:0];
|
||||
`VX_CSR_MPM_ICACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: dcache
|
||||
`VX_CSR_MPM_DCACHE_READS : read_data_ro_r = mem_perf_if.dcache_reads[31:0];
|
||||
`VX_CSR_MPM_DCACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.dcache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_WRITES : read_data_ro_r = mem_perf_if.dcache_writes[31:0];
|
||||
`VX_CSR_MPM_DCACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.dcache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_MISS_R : read_data_ro_r = mem_perf_if.dcache_read_misses[31:0];
|
||||
`VX_CSR_MPM_DCACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_MISS_W : read_data_ro_r = mem_perf_if.dcache_write_misses[31:0];
|
||||
`VX_CSR_MPM_DCACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_BANK_ST : read_data_ro_r = mem_perf_if.dcache_bank_stalls[31:0];
|
||||
`VX_CSR_MPM_DCACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_DCACHE_MSHR_ST : read_data_ro_r = mem_perf_if.dcache_mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_DCACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: smem
|
||||
`VX_CSR_MPM_SMEM_READS : read_data_ro_r = mem_perf_if.smem_reads[31:0];
|
||||
`VX_CSR_MPM_SMEM_READS_H : read_data_ro_r = 32'(mem_perf_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SMEM_WRITES : read_data_ro_r = mem_perf_if.smem_writes[31:0];
|
||||
`VX_CSR_MPM_SMEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_SMEM_BANK_ST : read_data_ro_r = mem_perf_if.smem_bank_stalls[31:0];
|
||||
`VX_CSR_MPM_SMEM_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: l2cache
|
||||
`VX_CSR_MPM_L2CACHE_READS : read_data_ro_r = mem_perf_if.l2cache_reads[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l2cache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_WRITES : read_data_ro_r = mem_perf_if.l2cache_writes[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.l2cache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_MISS_R : read_data_ro_r = mem_perf_if.l2cache_read_misses[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.l2cache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_MISS_W : read_data_ro_r = mem_perf_if.l2cache_write_misses[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.l2cache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l2cache_bank_stalls[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.l2cache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L2CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l2cache_mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_L2CACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.l2cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: l3cache
|
||||
`VX_CSR_MPM_L3CACHE_READS : read_data_ro_r = mem_perf_if.l3cache_reads[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_READS_H : read_data_ro_r = 32'(mem_perf_if.l3cache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_WRITES : read_data_ro_r = mem_perf_if.l3cache_writes[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_WRITES_H : read_data_ro_r = 32'(mem_perf_if.l3cache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_MISS_R : read_data_ro_r = mem_perf_if.l3cache_read_misses[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_MISS_R_H : read_data_ro_r = 32'(mem_perf_if.l3cache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_MISS_W : read_data_ro_r = mem_perf_if.l3cache_write_misses[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_MISS_W_H : read_data_ro_r = 32'(mem_perf_if.l3cache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_BANK_ST : read_data_ro_r = mem_perf_if.l3cache_bank_stalls[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_BANK_ST_H : read_data_ro_r = 32'(mem_perf_if.l3cache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_L3CACHE_MSHR_ST : read_data_ro_r = mem_perf_if.l3cache_mshr_stalls[31:0];
|
||||
`VX_CSR_MPM_L3CACHE_MSHR_ST_H : read_data_ro_r = 32'(mem_perf_if.l3cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: memory
|
||||
`VX_CSR_MPM_MEM_READS : read_data_ro_r = mem_perf_if.mem_reads[31:0];
|
||||
`VX_CSR_MPM_MEM_READS_H : read_data_ro_r = 32'(mem_perf_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_MEM_WRITES : read_data_ro_r = mem_perf_if.mem_writes[31:0];
|
||||
`VX_CSR_MPM_MEM_WRITES_H : read_data_ro_r = 32'(mem_perf_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`VX_CSR_MPM_MEM_LAT : read_data_ro_r = mem_perf_if.mem_latency[31:0];
|
||||
`VX_CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(mem_perf_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
`endif
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign read_data_ro = read_data_ro_r;
|
||||
assign read_data_rw = read_data_rw_r;
|
||||
|
||||
`UNUSED_VAR (base_dcrs)
|
||||
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [`PERF_CTR_BITS-1:0] perf_wctl_stalls = sfu_perf_if.wctl_stalls;
|
||||
`UNUSED_VAR (perf_wctl_stalls);
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
181
hw/rtl/core/VX_csr_unit.sv
Normal file
181
hw/rtl/core/VX_csr_unit.sv
Normal file
@@ -0,0 +1,181 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
VX_sfu_perf_if.slave sfu_perf_if,
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if [`NUM_FPU_BLOCKS],
|
||||
`endif
|
||||
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
VX_execute_if.slave execute_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * 32 + PID_WIDTH + 1 + 1;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
reg [NUM_LANES-1:0][31:0] csr_read_data;
|
||||
reg [31:0] csr_write_data;
|
||||
wire [31:0] csr_read_data_ro, csr_read_data_rw;
|
||||
wire [31:0] csr_req_data;
|
||||
reg csr_rd_enable;
|
||||
wire csr_wr_enable;
|
||||
wire csr_req_ready;
|
||||
|
||||
// wait for all pending instructions to complete
|
||||
assign sched_csr_if.alm_empty_wid = execute_if.data.wid;
|
||||
wire no_pending_instr = sched_csr_if.alm_empty;
|
||||
|
||||
wire csr_req_valid = execute_if.valid && no_pending_instr;
|
||||
assign execute_if.ready = csr_req_ready && no_pending_instr;
|
||||
|
||||
wire [`VX_CSR_ADDR_BITS-1:0] csr_addr = execute_if.data.imm[`VX_CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = execute_if.data.imm[`VX_CSR_ADDR_BITS +: `NRI_BITS];
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] rs1_data;
|
||||
`UNUSED_VAR (rs1_data)
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign rs1_data[i] = execute_if.data.rs1_data[i][31:0];
|
||||
end
|
||||
|
||||
wire csr_write_enable = (execute_if.data.op_type == `INST_SFU_CSRRW);
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if(pipeline_perf_if),
|
||||
.sfu_perf_if (sfu_perf_if),
|
||||
`endif
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.cycles (sched_csr_if.cycles),
|
||||
.active_warps (sched_csr_if.active_warps),
|
||||
.thread_masks (sched_csr_if.thread_masks),
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`endif
|
||||
|
||||
.read_enable (csr_req_valid && csr_rd_enable),
|
||||
.read_uuid (execute_if.data.uuid),
|
||||
.read_wid (execute_if.data.wid),
|
||||
.read_addr (csr_addr),
|
||||
.read_data_ro (csr_read_data_ro),
|
||||
.read_data_rw (csr_read_data_rw),
|
||||
|
||||
.write_enable (csr_req_valid && csr_wr_enable),
|
||||
.write_uuid (execute_if.data.uuid),
|
||||
.write_wid (execute_if.data.wid),
|
||||
.write_addr (csr_addr),
|
||||
.write_data (csr_write_data)
|
||||
);
|
||||
|
||||
// CSR read
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] wtid, gtid;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
if (PID_BITS != 0) begin
|
||||
assign wtid[i] = 32'(execute_if.data.pid * NUM_LANES + i);
|
||||
end else begin
|
||||
assign wtid[i] = 32'(i);
|
||||
end
|
||||
assign gtid[i] = (32'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (32'(execute_if.data.wid) << `NT_BITS) + wtid[i];
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
csr_rd_enable = 0;
|
||||
case (csr_addr)
|
||||
`VX_CSR_THREAD_ID : csr_read_data = wtid;
|
||||
`VX_CSR_MHARTID : csr_read_data = gtid;
|
||||
default : begin
|
||||
csr_read_data = {NUM_LANES{csr_read_data_ro | csr_read_data_rw}};
|
||||
csr_rd_enable = 1;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// CSR write
|
||||
|
||||
assign csr_req_data = execute_if.data.use_imm ? 32'(csr_imm) : rs1_data[0];
|
||||
|
||||
assign csr_wr_enable = (csr_write_enable || (| csr_req_data));
|
||||
|
||||
always @(*) begin
|
||||
case (execute_if.data.op_type)
|
||||
`INST_SFU_CSRRW: begin
|
||||
csr_write_data = csr_req_data;
|
||||
end
|
||||
`INST_SFU_CSRRS: begin
|
||||
csr_write_data = csr_read_data_rw | csr_req_data;
|
||||
end
|
||||
//`INST_SFU_CSRRC
|
||||
default: begin
|
||||
csr_write_data = csr_read_data_rw & ~csr_req_data;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// unlock the warp
|
||||
assign sched_csr_if.unlock_warp = csr_req_valid && csr_req_ready && execute_if.data.eop;
|
||||
assign sched_csr_if.unlock_wid = execute_if.data.wid;
|
||||
|
||||
// send response
|
||||
wire [NUM_LANES-1:0][31:0] csr_commit_data;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, csr_read_data, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, csr_commit_data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = `XLEN'(csr_commit_data[i]);
|
||||
end
|
||||
|
||||
endmodule
|
||||
57
hw/rtl/core/VX_dcr_data.sv
Normal file
57
hw/rtl/core/VX_dcr_data.sv
Normal file
@@ -0,0 +1,57 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_dcr_data import VX_gpu_pkg::*; (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_dcr_bus_if.slave dcr_bus_if,
|
||||
|
||||
// Outputs
|
||||
output base_dcrs_t base_dcrs
|
||||
);
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
base_dcrs_t dcrs;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (dcr_bus_if.write_valid) begin
|
||||
case (dcr_bus_if.write_addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0 : dcrs.startup_addr[31:0] <= dcr_bus_if.write_data;
|
||||
`ifdef XLEN_64
|
||||
`VX_DCR_BASE_STARTUP_ADDR1 : dcrs.startup_addr[63:32] <= dcr_bus_if.write_data;
|
||||
`endif
|
||||
`VX_DCR_BASE_MPM_CLASS : dcrs.mpm_class <= dcr_bus_if.write_data[7:0];
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign base_dcrs = dcrs;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (dcr_bus_if.write_valid) begin
|
||||
`TRACE(1, ("%d: base-dcr: state=", $time));
|
||||
trace_base_dcr(1, dcr_bus_if.write_addr);
|
||||
`TRACE(1, (", data=0x%0h\n", dcr_bus_if.write_data));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
552
hw/rtl/core/VX_decode.sv
Normal file
552
hw/rtl/core/VX_decode.sv
Normal file
@@ -0,0 +1,552 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(x) \
|
||||
x``_r = {1'b0, ``x}; \
|
||||
use_``x = 1
|
||||
|
||||
`define USED_FREG(x) \
|
||||
x``_r = {1'b1, ``x}; \
|
||||
use_``x = 1
|
||||
`else
|
||||
`define USED_IREG(x) \
|
||||
x``_r = ``x; \
|
||||
use_``x = 1
|
||||
`endif
|
||||
|
||||
module VX_decode #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_fetch_if.slave fetch_if,
|
||||
|
||||
// outputs
|
||||
VX_decode_if.master decode_if,
|
||||
VX_decode_sched_if.master decode_sched_if
|
||||
);
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
reg [`INST_MOD_BITS-1:0] op_mod;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [`XLEN-1:0] imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
||||
reg is_wstall;
|
||||
|
||||
wire [31:0] instr = fetch_if.data.instr;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [1:0] func2 = instr[26:25];
|
||||
wire [2:0] func3 = instr[14:12];
|
||||
wire [4:0] func5 = instr[31:27];
|
||||
wire [6:0] func7 = instr[31:25];
|
||||
wire [11:0] u_12 = instr[31:20];
|
||||
|
||||
wire [4:0] rd = instr[11:7];
|
||||
wire [4:0] rs1 = instr[19:15];
|
||||
wire [4:0] rs2 = instr[24:20];
|
||||
wire [4:0] rs3 = instr[31:27];
|
||||
|
||||
`UNUSED_VAR (func2)
|
||||
`UNUSED_VAR (func5)
|
||||
`UNUSED_VAR (rs3)
|
||||
`UNUSED_VAR (use_rd)
|
||||
`UNUSED_VAR (use_rs1)
|
||||
`UNUSED_VAR (use_rs2)
|
||||
`UNUSED_VAR (use_rs3)
|
||||
|
||||
wire is_itype_sh = func3[0] && ~func3[1];
|
||||
|
||||
wire [19:0] ui_imm = instr[31:12];
|
||||
`ifdef XLEN_64
|
||||
wire [11:0] i_imm = is_itype_sh ? {6'b0, instr[25:20]} : u_12;
|
||||
wire [11:0] iw_imm = is_itype_sh ? {7'b0, instr[24:20]} : u_12;
|
||||
`else
|
||||
wire [11:0] i_imm = is_itype_sh ? {7'b0, instr[24:20]} : u_12;
|
||||
`endif
|
||||
wire [11:0] s_imm = {func7, rd};
|
||||
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
|
||||
reg [`INST_ALU_BITS-1:0] r_type;
|
||||
always @(*) begin
|
||||
case (func3)
|
||||
3'h0: r_type = (opcode[5] && func7[5]) ? `INST_ALU_SUB : `INST_ALU_ADD;
|
||||
3'h1: r_type = `INST_ALU_SLL;
|
||||
3'h2: r_type = `INST_ALU_SLT;
|
||||
3'h3: r_type = `INST_ALU_SLTU;
|
||||
3'h4: r_type = `INST_ALU_XOR;
|
||||
3'h5: r_type = func7[5] ? `INST_ALU_SRA : `INST_ALU_SRL;
|
||||
3'h6: r_type = `INST_ALU_OR;
|
||||
3'h7: r_type = `INST_ALU_AND;
|
||||
endcase
|
||||
end
|
||||
|
||||
reg [`INST_BR_BITS-1:0] b_type;
|
||||
always @(*) begin
|
||||
case (func3)
|
||||
3'h0: b_type = `INST_BR_EQ;
|
||||
3'h1: b_type = `INST_BR_NE;
|
||||
3'h4: b_type = `INST_BR_LT;
|
||||
3'h5: b_type = `INST_BR_GE;
|
||||
3'h6: b_type = `INST_BR_LTU;
|
||||
3'h7: b_type = `INST_BR_GEU;
|
||||
default: b_type = 'x;
|
||||
endcase
|
||||
end
|
||||
|
||||
reg [`INST_BR_BITS-1:0] s_type;
|
||||
always @(*) begin
|
||||
case (u_12)
|
||||
12'h000: s_type = `INST_OP_BITS'(`INST_BR_ECALL);
|
||||
12'h001: s_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h002: s_type = `INST_OP_BITS'(`INST_BR_URET);
|
||||
12'h102: s_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h302: s_type = `INST_OP_BITS'(`INST_BR_MRET);
|
||||
default: s_type = 'x;
|
||||
endcase
|
||||
end
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
reg [`INST_M_BITS-1:0] m_type;
|
||||
always @(*) begin
|
||||
case (func3)
|
||||
3'h0: m_type = `INST_M_MUL;
|
||||
3'h1: m_type = `INST_M_MULH;
|
||||
3'h2: m_type = `INST_M_MULHSU;
|
||||
3'h3: m_type = `INST_M_MULHU;
|
||||
3'h4: m_type = `INST_M_DIV;
|
||||
3'h5: m_type = `INST_M_DIVU;
|
||||
3'h6: m_type = `INST_M_REM;
|
||||
3'h7: m_type = `INST_M_REMU;
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = '0;
|
||||
op_type = 'x;
|
||||
op_mod = '0;
|
||||
rd_r = '0;
|
||||
rs1_r = '0;
|
||||
rs2_r = '0;
|
||||
rs3_r = '0;
|
||||
imm = 'x;
|
||||
use_imm = 0;
|
||||
use_PC = 0;
|
||||
use_rd = 0;
|
||||
use_rs1 = 0;
|
||||
use_rs2 = 0;
|
||||
use_rs3 = 0;
|
||||
is_wstall = 0;
|
||||
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{(`XLEN-12){i_imm[11]}}, i_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
end
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
`INST_I_W: begin
|
||||
// ADDIW, SLLIW, SRLIW, SRAIW
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
op_mod[2] = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{(`XLEN-12){iw_imm[11]}}, iw_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R_W: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
// MULW, DIVW, DIVUW, REMW, REMUW
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
// ADDW, SUBW, SLLW, SRLW, SRAW
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
end
|
||||
op_mod[2] = 1;
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`endif
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JAL);
|
||||
op_mod[0] = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-21){jal_imm[20]}}, jal_imm};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JALR);
|
||||
op_mod[0] = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(b_type);
|
||||
op_mod[0] = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-13){b_imm[12]}}, b_imm};
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_FENCE: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_LSU_FENCE;
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CSR(func3[1:0]));
|
||||
use_rd = 1;
|
||||
is_wstall = 1;
|
||||
use_imm = func3[2];
|
||||
imm[`VX_CSR_ADDR_BITS-1:0] = u_12; // addr
|
||||
`USED_IREG (rd);
|
||||
if (func3[2]) begin
|
||||
imm[`VX_CSR_ADDR_BITS +: `NRI_BITS] = rs1; // imm
|
||||
end else begin
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
end else begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(s_type);
|
||||
op_mod[0] = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = `XLEN'd4;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FL,
|
||||
`endif
|
||||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b0, func3});
|
||||
use_rd = 1;
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
use_imm = 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rd);
|
||||
end else
|
||||
`endif
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FS,
|
||||
`endif
|
||||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b1, func3});
|
||||
imm = {{(`XLEN-12){s_imm[11]}}, s_imm};
|
||||
use_imm = 1;
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rs2);
|
||||
end else
|
||||
`endif
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FMADD,
|
||||
`INST_FMSUB,
|
||||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `INST_OP_BITS'({2'b11, opcode[3:2]});
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
`USED_FREG (rs3);
|
||||
end
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
`ifdef FLEN_64
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
`endif
|
||||
use_rd = 1;
|
||||
case (func5)
|
||||
5'b00000, // FADD
|
||||
5'b00001, // FSUB
|
||||
5'b00010, // FMUL
|
||||
5'b00011: begin // FDIV
|
||||
op_type = `INST_OP_BITS'(func5[1:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
5'b00100: begin
|
||||
// NCP: FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = `INST_MOD_BITS'(func3[1:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
5'b00101: begin
|
||||
// NCP: FMIN=6, FMAX=7
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = func3[0] ? 7 : 6;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
`ifdef FLEN_64
|
||||
5'b01000: begin
|
||||
// CVT.S.D, CVT.D.S
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_F2F);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
`endif
|
||||
5'b01011: begin
|
||||
// SQRT
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b10100: begin
|
||||
// CMP
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
5'b11000: begin
|
||||
// CVT.W.X, CVT.WU.X
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_F2U) : `INST_OP_BITS'(`INST_FPU_F2I);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
`endif
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11010: begin
|
||||
// CVT.X.W, CVT.X.WU
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_U2F) : `INST_OP_BITS'(`INST_FPU_I2F);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
`endif
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
5'b11100: begin
|
||||
if (func3[0]) begin
|
||||
// NCP: FCLASS=3
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 3;
|
||||
end else begin
|
||||
// NCP: FMV.X.W=4
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 4;
|
||||
end
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11110: begin
|
||||
// NCP: FMV.W.X=5
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 5;
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
`INST_EXT1: begin
|
||||
case (func7)
|
||||
7'h00: begin
|
||||
ex_type = `EX_SFU;
|
||||
is_wstall = 1;
|
||||
case (func3)
|
||||
3'h0: begin // TMC
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_TMC);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h1: begin // WSPAWN
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_WSPAWN);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
3'h2: begin // SPLIT
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_SPLIT);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
3'h3: begin // JOIN
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_JOIN);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h4: begin // BAR
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_BAR);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
3'h5: begin // PRED
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_PRED);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_EXT2: begin
|
||||
case (func3)
|
||||
3'h1: begin
|
||||
case (func2)
|
||||
2'h0: begin // CMOV
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CMOV);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
`USED_IREG (rs3);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
// disable write to integer register r0
|
||||
wire wb = use_rd && (rd_r != 0);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (0)
|
||||
) req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (fetch_if.valid),
|
||||
.ready_in (fetch_if.ready),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_mod, use_PC, imm, use_imm, wb, rd_r, rs1_r, rs2_r, rs3_r}),
|
||||
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod, decode_if.data.use_PC, decode_if.data.imm, decode_if.data.use_imm, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}),
|
||||
.valid_out (decode_if.valid),
|
||||
.ready_out (decode_if.ready)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
|
||||
assign decode_sched_if.valid = fetch_fire;
|
||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||
assign decode_sched_if.is_wstall = is_wstall;
|
||||
|
||||
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, decode_if.data.PC, instr));
|
||||
trace_ex_type(1, decode_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod, decode_if.data.rd, decode_if.data.rs2, decode_if.data.use_imm, decode_if.data.imm);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=0x%0h, opds=%b%b%b%b, use_pc=%b, use_imm=%b (#%0d)\n",
|
||||
decode_if.data.op_mod, decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, decode_if.data.imm, use_rd, use_rs1, use_rs2, use_rs3, decode_if.data.use_PC, decode_if.data.use_imm, decode_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
227
hw/rtl/core/VX_dispatch.sv
Normal file
227
hw/rtl/core/VX_dispatch.sv
Normal file
@@ -0,0 +1,227 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS],
|
||||
`endif
|
||||
// inputs
|
||||
VX_operands_if.slave operands_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_dispatch_if.master alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if.master fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0][`NT_WIDTH-1:0] last_active_tid;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign tids[i] = `NT_WIDTH'(i);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_find_first #(
|
||||
.N (`NUM_THREADS),
|
||||
.DATAW (`NT_WIDTH),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.valid_in (operands_if[i].data.tmask),
|
||||
.data_in (tids),
|
||||
.data_out (last_active_tid[i]),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
end
|
||||
|
||||
// ALU dispatch
|
||||
|
||||
VX_operands_if alu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign alu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_ALU);
|
||||
assign alu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.valid_in (alu_operands_if[i].valid),
|
||||
.ready_in (alu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(alu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (alu_dispatch_if[i].data),
|
||||
.valid_out (alu_dispatch_if[i].valid),
|
||||
.ready_out (alu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
// LSU dispatch
|
||||
|
||||
VX_operands_if lsu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign lsu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_LSU);
|
||||
assign lsu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
.valid_in (lsu_operands_if[i].valid),
|
||||
.ready_in (lsu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(lsu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (lsu_dispatch_if[i].data),
|
||||
.valid_out (lsu_dispatch_if[i].valid),
|
||||
.ready_out (lsu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
// FPU dispatch
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
||||
VX_operands_if fpu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign fpu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_FPU);
|
||||
assign fpu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.valid_in (fpu_operands_if[i].valid),
|
||||
.ready_in (fpu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(fpu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (fpu_dispatch_if[i].data),
|
||||
.valid_out (fpu_dispatch_if[i].valid),
|
||||
.ready_out (fpu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
`endif
|
||||
|
||||
// SFU dispatch
|
||||
|
||||
VX_operands_if sfu_operands_if[`ISSUE_WIDTH]();
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign sfu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_SFU);
|
||||
assign sfu_operands_if[i].data = operands_if[i].data;
|
||||
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) sfu_buffer (
|
||||
.clk (clk),
|
||||
.reset (sfu_reset),
|
||||
.valid_in (sfu_operands_if[i].valid),
|
||||
.ready_in (sfu_operands_if[i].ready),
|
||||
.data_in (`TO_DISPATCH_DATA(sfu_operands_if[i].data, last_active_tid[i])),
|
||||
.data_out (sfu_dispatch_if[i].data),
|
||||
.valid_out (sfu_dispatch_if[i].valid),
|
||||
.ready_out (sfu_dispatch_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
// can take next request?
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign operands_if[i].ready = (alu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_ALU))
|
||||
|| (lsu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_LSU))
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| (fpu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_FPU))
|
||||
`endif
|
||||
|| (sfu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_SFU));
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_n, perf_stalls_r;
|
||||
wire [`ISSUE_WIDTH-1:0] operands_stall;
|
||||
wire [`ISSUE_WIDTH-1:0][`EX_BITS-1:0] operands_ex_type;
|
||||
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign operands_stall[i] = operands_if[i].valid && ~operands_if[i].ready;
|
||||
assign operands_ex_type[i] = operands_if[i].data.ex_type;
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
perf_stalls_n = perf_stalls_r;
|
||||
for (integer i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
if (operands_stall[i]) begin
|
||||
perf_stalls_n[operands_ex_type[i]] += `PERF_CTR_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_stalls_r <= '0;
|
||||
end else begin
|
||||
perf_stalls_r <= perf_stalls_n;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
|
||||
assign perf_stalls[i] = perf_stalls_r[i];
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), operands_if[i].data.PC));
|
||||
trace_ex_type(1, operands_if[i].data.ex_type);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.op_mod, operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
|
||||
`TRACE_ARRAY1D(1, operands_if[i].data.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, operands_if[i].data.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, operands_if[i].data.rs3_data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
256
hw/rtl/core/VX_dispatch_unit.sv
Normal file
256
hw/rtl/core/VX_dispatch_unit.sv
Normal file
@@ -0,0 +1,256 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||
parameter BLOCK_SIZE = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_execute_if.master execute_if [BLOCK_SIZE]
|
||||
|
||||
);
|
||||
`STATIC_ASSERT ((`NUM_THREADS == NUM_LANES * (`NUM_THREADS / NUM_LANES)), ("invalid parameter"))
|
||||
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
||||
localparam NUM_PACKETS = `NUM_THREADS / NUM_LANES;
|
||||
localparam PID_BITS = `CLOG2(NUM_PACKETS);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam BATCH_COUNT = `ISSUE_WIDTH / BLOCK_SIZE;
|
||||
localparam BATCH_COUNT_W= `LOG2UP(BATCH_COUNT);
|
||||
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT/2));
|
||||
|
||||
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
|
||||
localparam DATA_REGS_OFF = 0;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] dispatch_valid;
|
||||
wire [`ISSUE_WIDTH-1:0][IN_DATAW-1:0] dispatch_data;
|
||||
wire [`ISSUE_WIDTH-1:0] dispatch_ready;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign dispatch_valid[i] = dispatch_if[i].valid;
|
||||
assign dispatch_data[i] = dispatch_if[i].data;
|
||||
assign dispatch_if[i].ready = dispatch_ready[i];
|
||||
end
|
||||
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices;
|
||||
wire [BLOCK_SIZE-1:0] block_ready;
|
||||
wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask;
|
||||
wire [BLOCK_SIZE-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] block_regs;
|
||||
wire [BLOCK_SIZE-1:0][PID_WIDTH-1:0] block_pid;
|
||||
wire [BLOCK_SIZE-1:0] block_sop;
|
||||
wire [BLOCK_SIZE-1:0] block_eop;
|
||||
wire [BLOCK_SIZE-1:0] block_done;
|
||||
|
||||
wire batch_done = (& block_done);
|
||||
|
||||
logic [BATCH_COUNT_W-1:0] batch_idx;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
batch_idx <= '0;
|
||||
end else if (batch_done) begin
|
||||
batch_idx <= batch_idx + BATCH_COUNT_W'(1);
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign batch_idx = 0;
|
||||
`UNUSED_VAR (batch_done)
|
||||
end
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||
assign issue_indices[block_idx] = issue_idx;
|
||||
|
||||
wire valid_p, ready_p;
|
||||
|
||||
if (`NUM_THREADS != NUM_LANES) begin
|
||||
reg [NUM_PACKETS-1:0] sent_mask_p;
|
||||
wire [PID_WIDTH-1:0] start_p_n, start_p, end_p;
|
||||
wire dispatch_valid_r;
|
||||
reg is_first_p;
|
||||
|
||||
wire fire_p = valid_p && ready_p;
|
||||
|
||||
wire is_last_p = (start_p == end_p);
|
||||
|
||||
wire fire_eop = fire_p && is_last_p;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
sent_mask_p <= '0;
|
||||
is_first_p <= 1;
|
||||
end else begin
|
||||
if ((BATCH_COUNT != 1) ? batch_done : fire_eop) begin
|
||||
sent_mask_p <= '0;
|
||||
is_first_p <= 1;
|
||||
end else if (fire_p) begin
|
||||
sent_mask_p[start_p] <= 1;
|
||||
is_first_p <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0][NUM_LANES-1:0] per_packet_tmask;
|
||||
wire [NUM_PACKETS-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] per_packet_regs;
|
||||
|
||||
wire [`NUM_THREADS-1:0] dispatch_tmask = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
for (genvar j = 0; j < NUM_LANES; ++j) begin
|
||||
localparam k = i * NUM_LANES + j;
|
||||
assign per_packet_tmask[i][j] = dispatch_tmask[k];
|
||||
assign per_packet_regs[i][0][j] = dispatch_rs1_data[k];
|
||||
assign per_packet_regs[i][1][j] = dispatch_rs2_data[k];
|
||||
assign per_packet_regs[i][2][j] = dispatch_rs3_data[k];
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0] packet_valids;
|
||||
wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids;
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
assign packet_valids[i] = (| per_packet_tmask[i]);
|
||||
assign packet_ids[i] = PID_WIDTH'(i);
|
||||
end
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_PACKETS),
|
||||
.DATAW (PID_WIDTH),
|
||||
.REVERSE (0)
|
||||
) find_first (
|
||||
.valid_in (packet_valids & ~sent_mask_p),
|
||||
.data_in (packet_ids),
|
||||
.data_out (start_p_n),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_find_first #(
|
||||
.N (NUM_PACKETS),
|
||||
.DATAW (PID_WIDTH),
|
||||
.REVERSE (1)
|
||||
) find_last (
|
||||
.valid_in (packet_valids),
|
||||
.data_in (packet_ids),
|
||||
.data_out (end_p),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + PID_WIDTH),
|
||||
.RESETW (1),
|
||||
.DEPTH (FANOUT_ENABLE ? 1 : 0)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset || fire_p), // should flush on fire
|
||||
.enable (1'b1),
|
||||
.data_in ({dispatch_valid[issue_idx], start_p_n}),
|
||||
.data_out ({dispatch_valid_r, start_p})
|
||||
);
|
||||
|
||||
wire [NUM_LANES-1:0] tmask_p = per_packet_tmask[start_p];
|
||||
wire [2:0][NUM_LANES-1:0][`XLEN-1:0] regs_p = per_packet_regs[start_p];
|
||||
|
||||
wire block_enable = (BATCH_COUNT == 1 || ~(& sent_mask_p));
|
||||
|
||||
assign valid_p = dispatch_valid_r && block_enable;
|
||||
assign block_tmask[block_idx] = tmask_p;
|
||||
assign block_regs[block_idx] = regs_p;
|
||||
assign block_pid[block_idx] = start_p;
|
||||
assign block_sop[block_idx] = is_first_p;
|
||||
assign block_eop[block_idx] = is_last_p;
|
||||
if (FANOUT_ENABLE) begin
|
||||
assign block_ready[block_idx] = dispatch_valid_r && ready_p && block_enable;
|
||||
end else begin
|
||||
assign block_ready[block_idx] = ready_p && block_enable;
|
||||
end
|
||||
assign block_done[block_idx] = ~dispatch_valid[issue_idx] || fire_eop;
|
||||
end else begin
|
||||
assign valid_p = dispatch_valid[issue_idx];
|
||||
assign block_tmask[block_idx] = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS];
|
||||
assign block_regs[block_idx][0] = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
assign block_regs[block_idx][1] = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
assign block_regs[block_idx][2] = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
assign block_pid[block_idx] = '0;
|
||||
assign block_sop[block_idx] = 1'b1;
|
||||
assign block_eop[block_idx] = 1'b1;
|
||||
assign block_ready[block_idx] = ready_p;
|
||||
assign block_done[block_idx] = ~valid_p || ready_p;
|
||||
end
|
||||
|
||||
wire [ISSUE_IDX_W-1:0] wsi;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
if (BLOCK_SIZE != 1) begin
|
||||
assign wsi = {batch_idx, BLOCK_SIZE_W'(block_idx)};
|
||||
end else begin
|
||||
assign wsi = batch_idx;
|
||||
end
|
||||
end else begin
|
||||
assign wsi = block_idx;
|
||||
end
|
||||
|
||||
`RESET_RELAY(buf_out_reset, reset);
|
||||
|
||||
wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], wsi);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (OUT_DATAW),
|
||||
.SIZE (`OUT_REG_TO_EB_SIZE(OUT_REG)),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(OUT_REG))
|
||||
) buf_out (
|
||||
.clk (clk),
|
||||
.reset (buf_out_reset),
|
||||
.valid_in (valid_p),
|
||||
.ready_in (ready_p),
|
||||
.data_in ({
|
||||
dispatch_data[issue_idx][IN_DATAW-1 : DATA_TMASK_OFF+`NUM_THREADS+ISSUE_WIS_W],
|
||||
block_wid,
|
||||
block_tmask[block_idx],
|
||||
dispatch_data[issue_idx][DATA_TMASK_OFF-1 : DATA_REGS_OFF + 3 * `NUM_THREADS * `XLEN],
|
||||
block_regs[block_idx][0],
|
||||
block_regs[block_idx][1],
|
||||
block_regs[block_idx][2],
|
||||
block_pid[block_idx],
|
||||
block_sop[block_idx],
|
||||
block_eop[block_idx]}),
|
||||
.data_out (execute_if[block_idx].data),
|
||||
.valid_out (execute_if[block_idx].valid),
|
||||
.ready_out (execute_if[block_idx].ready)
|
||||
);
|
||||
end
|
||||
|
||||
reg [`ISSUE_WIDTH-1:0] ready_in;
|
||||
always @(*) begin
|
||||
ready_in = 0;
|
||||
for (integer i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
ready_in[issue_indices[i]] = block_ready[i] && block_eop[i];
|
||||
end
|
||||
end
|
||||
assign dispatch_ready = ready_in;
|
||||
|
||||
endmodule
|
||||
137
hw/rtl/core/VX_execute.sv
Normal file
137
hw/rtl/core/VX_execute.sv
Normal file
@@ -0,0 +1,137 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
// Dcache interface
|
||||
VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS],
|
||||
|
||||
// commit interface
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
|
||||
// fetch interface
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if.slave fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master fpu_commit_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
|
||||
VX_dispatch_if.slave alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master alu_commit_if [`ISSUE_WIDTH],
|
||||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS],
|
||||
|
||||
VX_dispatch_if.slave lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master lsu_commit_if [`ISSUE_WIDTH],
|
||||
|
||||
VX_dispatch_if.slave sfu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master sfu_commit_if [`ISSUE_WIDTH],
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if fpu_to_csr_if[`NUM_FPU_BLOCKS]();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.dispatch_if (alu_dispatch_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.commit_if (alu_commit_if)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) lsu_unit (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
.cache_bus_if (dcache_bus_if),
|
||||
.dispatch_if (lsu_dispatch_if),
|
||||
.commit_if (lsu_commit_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.dispatch_if (fpu_dispatch_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.commit_if (fpu_commit_if)
|
||||
);
|
||||
`endif
|
||||
|
||||
VX_sfu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) sfu_unit (
|
||||
.clk (clk),
|
||||
.reset (sfu_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if (pipeline_perf_if),
|
||||
`endif
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
||||
.dispatch_if (sfu_dispatch_if),
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`endif
|
||||
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.commit_if (sfu_commit_if)
|
||||
);
|
||||
|
||||
// simulation helper signal to get RISC-V tests Pass/Fail status
|
||||
assign sim_ebreak = alu_dispatch_if[0].valid && alu_dispatch_if[0].ready
|
||||
&& alu_dispatch_if[0].data.wis == 0
|
||||
&& `INST_ALU_IS_BR(alu_dispatch_if[0].data.op_mod)
|
||||
&& (`INST_BR_BITS'(alu_dispatch_if[0].data.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_BITS'(alu_dispatch_if[0].data.op_type) == `INST_BR_ECALL);
|
||||
|
||||
endmodule
|
||||
184
hw/rtl/core/VX_fetch.sv
Normal file
184
hw/rtl/core/VX_fetch.sv
Normal file
@@ -0,0 +1,184 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Icache interface
|
||||
VX_mem_bus_if.master icache_bus_if,
|
||||
|
||||
// inputs
|
||||
VX_schedule_if.slave schedule_if,
|
||||
|
||||
// outputs
|
||||
VX_fetch_if.master fetch_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
|
||||
|
||||
wire icache_req_valid;
|
||||
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
|
||||
wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag;
|
||||
wire icache_req_ready;
|
||||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] req_tag, rsp_tag;
|
||||
|
||||
wire icache_req_fire = icache_req_valid && icache_req_ready;
|
||||
|
||||
wire [ISW_WIDTH-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
|
||||
|
||||
assign req_tag = schedule_if.data.wid;
|
||||
|
||||
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
|
||||
|
||||
wire [`XLEN-1:0] rsp_PC;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`XLEN + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (icache_req_fire),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (req_tag),
|
||||
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
||||
.raddr (rsp_tag),
|
||||
.rdata ({rsp_PC, rsp_tmask})
|
||||
);
|
||||
|
||||
// Ensure that the ibuffer doesn't fill up.
|
||||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
|
||||
// This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus.
|
||||
wire [`ISSUE_WIDTH-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_pending_size #(
|
||||
.SIZE (`IBUF_SIZE)
|
||||
) pending_reads (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (icache_req_fire && schedule_isw == i),
|
||||
.decr (fetch_if.ibuf_pop[i]),
|
||||
.full (pending_ibuf_full[i]),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
|
||||
// Icache Request
|
||||
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
|
||||
assign icache_req_valid = schedule_if.valid && ibuf_ready;
|
||||
assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2];
|
||||
assign icache_req_tag = {schedule_if.data.uuid, req_tag};
|
||||
assign schedule_if.ready = icache_req_ready && ibuf_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (ICACHE_ADDR_WIDTH + ICACHE_TAG_WIDTH),
|
||||
.SIZE (2),
|
||||
.OUT_REG (1) // external bus should be registered
|
||||
) req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (icache_req_valid),
|
||||
.ready_in (icache_req_ready),
|
||||
.data_in ({icache_req_addr, icache_req_tag}),
|
||||
.data_out ({icache_bus_if.req_data.addr, icache_bus_if.req_data.tag}),
|
||||
.valid_out (icache_bus_if.req_valid),
|
||||
.ready_out (icache_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign icache_bus_if.req_data.rw = 0;
|
||||
assign icache_bus_if.req_data.byteen = 4'b1111;
|
||||
assign icache_bus_if.req_data.data = '0;
|
||||
|
||||
// Icache Response
|
||||
|
||||
assign fetch_if.valid = icache_bus_if.rsp_valid;
|
||||
assign fetch_if.data.tmask = rsp_tmask;
|
||||
assign fetch_if.data.wid = rsp_tag;
|
||||
assign fetch_if.data.PC = rsp_PC;
|
||||
assign fetch_if.data.instr = icache_bus_if.rsp_data.data;
|
||||
assign fetch_if.data.uuid = rsp_uuid;
|
||||
assign icache_bus_if.rsp_ready = fetch_if.ready;
|
||||
|
||||
`ifdef DBG_SCOPE_FETCH
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (3*`UUID_WIDTH + 108)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
|
||||
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
|
||||
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_ICACHE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
always @(posedge clk) begin
|
||||
if (schedule_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, schedule_if.data.PC, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
end
|
||||
if (fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, fetch_if.data.PC, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
129
hw/rtl/core/VX_gather_unit.sv
Normal file
129
hw/rtl/core/VX_gather_unit.sv
Normal file
@@ -0,0 +1,129 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gather_unit import VX_gpu_pkg::*; #(
|
||||
parameter BLOCK_SIZE = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave commit_in_if [BLOCK_SIZE],
|
||||
|
||||
// outputs
|
||||
VX_commit_if.master commit_out_if [`ISSUE_WIDTH]
|
||||
|
||||
);
|
||||
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam DATA_WIS_OFF = DATAW - (`UUID_WIDTH + `NW_WIDTH);
|
||||
|
||||
wire [BLOCK_SIZE-1:0] commit_in_valid;
|
||||
wire [BLOCK_SIZE-1:0][DATAW-1:0] commit_in_data;
|
||||
wire [BLOCK_SIZE-1:0] commit_in_ready;
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_IDX_W-1:0] commit_in_wsi;
|
||||
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
assign commit_in_valid[i] = commit_in_if[i].valid;
|
||||
assign commit_in_data[i] = commit_in_if[i].data;
|
||||
assign commit_in_if[i].ready = commit_in_ready[i];
|
||||
if (BLOCK_SIZE != `ISSUE_WIDTH) begin
|
||||
if (BLOCK_SIZE != 1) begin
|
||||
assign commit_in_wsi[i] = {commit_in_data[i][DATA_WIS_OFF+BLOCK_SIZE_W +: (ISSUE_IDX_W-BLOCK_SIZE_W)], BLOCK_SIZE_W'(i)};
|
||||
end else begin
|
||||
assign commit_in_wsi[i] = commit_in_data[i][DATA_WIS_OFF +: ISSUE_IDX_W];
|
||||
end
|
||||
end else begin
|
||||
assign commit_in_wsi[i] = BLOCK_SIZE_W'(i);
|
||||
end
|
||||
end
|
||||
|
||||
reg [`ISSUE_WIDTH-1:0] commit_out_valid;
|
||||
reg [`ISSUE_WIDTH-1:0][DATAW-1:0] commit_out_data;
|
||||
wire [`ISSUE_WIDTH-1:0] commit_out_ready;
|
||||
|
||||
always @(*) begin
|
||||
commit_out_valid = '0;
|
||||
for (integer i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
commit_out_data[i] = 'x;
|
||||
end
|
||||
for (integer i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
commit_out_valid[commit_in_wsi[i]] = commit_in_valid[i];
|
||||
commit_out_data[commit_in_wsi[i]] = commit_in_data[i];
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
assign commit_in_ready[i] = commit_out_ready[commit_in_wsi[i]];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_tmp_if();
|
||||
|
||||
`RESET_RELAY(commit_out_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`OUT_REG_TO_EB_SIZE(OUT_REG)),
|
||||
.OUT_REG (`OUT_REG_TO_EB_REG(OUT_REG))
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (commit_out_reset),
|
||||
.valid_in (commit_out_valid[i]),
|
||||
.ready_in (commit_out_ready[i]),
|
||||
.data_in (commit_out_data[i]),
|
||||
.data_out (commit_tmp_if.data),
|
||||
.valid_out (commit_tmp_if.valid),
|
||||
.ready_out (commit_tmp_if.ready)
|
||||
);
|
||||
|
||||
logic [`NUM_THREADS-1:0] commit_tmask_r;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_r;
|
||||
if (PID_BITS != 0) begin
|
||||
always @(*) begin
|
||||
commit_tmask_r = '0;
|
||||
commit_data_r = 'x;
|
||||
for (integer j = 0; j < NUM_LANES; ++j) begin
|
||||
commit_tmask_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j];
|
||||
commit_data_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign commit_tmask_r = commit_tmp_if.data.tmask;
|
||||
assign commit_data_r = commit_tmp_if.data.data;
|
||||
end
|
||||
|
||||
assign commit_out_if[i].valid = commit_tmp_if.valid;
|
||||
assign commit_out_if[i].data = {
|
||||
commit_tmp_if.data.uuid,
|
||||
commit_tmp_if.data.wid,
|
||||
commit_tmask_r,
|
||||
commit_tmp_if.data.PC,
|
||||
commit_tmp_if.data.wb,
|
||||
commit_tmp_if.data.rd,
|
||||
commit_data_r,
|
||||
1'b0, // PID
|
||||
commit_tmp_if.data.sop,
|
||||
commit_tmp_if.data.eop
|
||||
};
|
||||
assign commit_tmp_if.ready = commit_out_if[i].ready;
|
||||
end
|
||||
|
||||
endmodule
|
||||
73
hw/rtl/core/VX_ibuffer.sv
Normal file
73
hw/rtl/core/VX_ibuffer.sv
Normal file
@@ -0,0 +1,73 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_ibuffer import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_ibuffer_if.master ibuffer_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4);
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] ibuf_ready_in;
|
||||
|
||||
wire [ISW_WIDTH-1:0] decode_isw = wid_to_isw(decode_if.data.wid);
|
||||
wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid);
|
||||
|
||||
assign decode_if.ready = ibuf_ready_in[decode_isw];
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
.OUT_REG (1)
|
||||
) instr_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (decode_if.valid && decode_isw == i),
|
||||
.ready_in (ibuf_ready_in[i]),
|
||||
.data_in ({
|
||||
decode_if.data.uuid,
|
||||
decode_wis,
|
||||
decode_if.data.tmask,
|
||||
decode_if.data.ex_type,
|
||||
decode_if.data.op_type,
|
||||
decode_if.data.op_mod,
|
||||
decode_if.data.wb,
|
||||
decode_if.data.use_PC,
|
||||
decode_if.data.use_imm,
|
||||
decode_if.data.PC,
|
||||
decode_if.data.imm,
|
||||
decode_if.data.rd,
|
||||
decode_if.data.rs1,
|
||||
decode_if.data.rs2,
|
||||
decode_if.data.rs3}),
|
||||
.data_out(ibuffer_if[i].data),
|
||||
.valid_out (ibuffer_if[i].valid),
|
||||
.ready_out(ibuffer_if[i].ready)
|
||||
);
|
||||
|
||||
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||
end
|
||||
|
||||
endmodule
|
||||
191
hw/rtl/core/VX_int_unit.sv
Normal file
191
hw/rtl/core/VX_int_unit.sv
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_int_unit #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter BLOCK_IDX = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if,
|
||||
VX_branch_ctl_if.master branch_ctl_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam LANE_WIDTH = `UP(LANE_BITS);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam SHIFT_IMM_BITS = `CLOG2(`XLEN);
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] add_result;
|
||||
wire [NUM_LANES-1:0][`XLEN:0] sub_result; // +1 bit for branch compare
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] shr_result;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] msc_result;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] add_result_w;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] sub_result_w;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] shr_result_w;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] msc_result_w;
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] alu_result;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_result_r;
|
||||
|
||||
`ifdef XLEN_64
|
||||
wire is_alu_w = `INST_ALU_IS_W(execute_if.data.op_mod);
|
||||
`else
|
||||
wire is_alu_w = 0;
|
||||
`endif
|
||||
|
||||
`UNUSED_VAR (execute_if.data.op_mod)
|
||||
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(execute_if.data.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(execute_if.data.op_type);
|
||||
wire is_br_op = `INST_ALU_IS_BR(execute_if.data.op_mod);
|
||||
wire is_sub_op = `INST_ALU_IS_SUB(alu_op);
|
||||
wire is_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire [1:0] op_class = is_br_op ? `INST_BR_CLASS(alu_op) : `INST_ALU_CLASS(alu_op);
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1 = execute_if.data.rs1_data;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2 = execute_if.data.rs2_data;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1_PC = execute_if.data.use_PC ? {NUM_LANES{execute_if.data.PC}} : alu_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.use_imm ? {NUM_LANES{execute_if.data.imm}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.use_imm && ~is_br_op) ? {NUM_LANES{execute_if.data.imm}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
assign add_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] sub_in1 = {is_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
wire [`XLEN:0] sub_in2 = {is_signed & alu_in2_br[i][`XLEN-1], alu_in2_br[i]};
|
||||
assign sub_result[i] = sub_in1 - sub_in2;
|
||||
assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
assign shr_result[i] = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
|
||||
wire [32:0] shr_in1_w = {is_signed && alu_in1[i][31], alu_in1[i][31:0]};
|
||||
wire [31:0] shr_res_w = 32'($signed(shr_in1_w) >>> alu_in2_imm[i][4:0]);
|
||||
assign shr_result_w[i] = `XLEN'($signed(shr_res_w));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
always @(*) begin
|
||||
case (alu_op[1:0])
|
||||
2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND
|
||||
2'b01: msc_result[i] = alu_in1[i] | alu_in2_imm[i]; // OR
|
||||
2'b10: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i]; // XOR
|
||||
2'b11: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]; // SLL
|
||||
endcase
|
||||
end
|
||||
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] slt_br_result = `XLEN'({is_br_op && ~(| sub_result[i][`XLEN-1:0]), sub_result[i][`XLEN]});
|
||||
wire [`XLEN-1:0] sub_slt_br_result = (is_sub_op && ~is_br_op) ? sub_result[i][`XLEN-1:0] : slt_br_result;
|
||||
always @(*) begin
|
||||
case ({is_alu_w, op_class})
|
||||
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
|
||||
3'b010: alu_result[i] = shr_result[i]; // SRL, SRA, SRLI, SRAI
|
||||
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
|
||||
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
|
||||
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
|
||||
3'b110: alu_result[i] = shr_result_w[i]; // SRLW, SRAW, SRLIW, SRAIW
|
||||
3'b111: alu_result[i] = msc_result_w[i]; // SLLW
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// branch
|
||||
|
||||
wire [`XLEN-1:0] PC_r, imm_r;
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire [LANE_WIDTH-1:0] tid, tid_r;
|
||||
wire is_br_op_r;
|
||||
|
||||
if (LANE_BITS != 0) begin
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
end else begin
|
||||
assign tid = 0;
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + (NUM_LANES * `XLEN) + `XLEN + `XLEN + 1 + `INST_BR_BITS + LANE_WIDTH)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (execute_if.valid),
|
||||
.ready_in (execute_if.ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, execute_if.data.imm, is_br_op, br_op, tid}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, alu_result_r, PC_r, imm_r, is_br_op_r, br_op_r, tid_r}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (br_op_r)
|
||||
wire is_br_neg = `INST_BR_IS_NEG(br_op_r);
|
||||
wire is_br_less = `INST_BR_IS_LESS(br_op_r);
|
||||
wire is_br_static = `INST_BR_IS_STATIC(br_op_r);
|
||||
wire [`XLEN-1:0] br_result = alu_result_r[tid_r];
|
||||
|
||||
wire is_less = br_result[0];
|
||||
wire is_equal = br_result[1];
|
||||
|
||||
wire br_enable = is_br_op_r && commit_if.valid && commit_if.ready && commit_if.data.eop;
|
||||
wire br_taken = ((is_br_less ? is_less : is_equal) ^ is_br_neg) | is_br_static;
|
||||
wire [`XLEN-1:0] br_dest = is_br_static ? br_result : (PC_r + imm_r);
|
||||
wire [`NW_WIDTH-1:0] br_wid;
|
||||
`ASSIGN_BLOCKED_WID (br_wid, commit_if.data.wid, BLOCK_IDX, `NUM_ALU_BLOCKS)
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_WIDTH + 1 + `XLEN)
|
||||
) branch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? (PC_r + 4) : alu_result_r[i];
|
||||
end
|
||||
|
||||
assign commit_if.data.PC = PC_r;
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, commit_if.data.PC, branch_ctl_if.taken, branch_ctl_if.dest, commit_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
108
hw/rtl/core/VX_ipdom_stack.sv
Normal file
108
hw/rtl/core/VX_ipdom_stack.sv
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_ipdom_stack #(
|
||||
parameter WIDTH = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter ADDRW = `LOG2UP(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [WIDTH-1:0] q0,
|
||||
input wire [WIDTH-1:0] q1,
|
||||
output wire [WIDTH-1:0] d,
|
||||
output wire d_set,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
reg slot_set [DEPTH-1:0];
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr, wr_ptr;
|
||||
|
||||
reg empty_r, full_r;
|
||||
|
||||
wire [WIDTH-1:0] d0, d1;
|
||||
|
||||
wire d_set_n = slot_set[rd_ptr];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr <= '0;
|
||||
wr_ptr <= '0;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
`ASSERT(~push || ~full, ("runtime error: writing to a full stack!"));
|
||||
`ASSERT(~pop || ~empty, ("runtime error: reading an empty stack!"));
|
||||
`ASSERT(~push || ~pop, ("runtime error: push and pop in same cycle not supported!"));
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
empty_r <= 0;
|
||||
full_r <= (ADDRW'(DEPTH-1) == wr_ptr);
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - ADDRW'(d_set_n);
|
||||
rd_ptr <= rd_ptr - ADDRW'(d_set_n);
|
||||
empty_r <= (rd_ptr == 0) && (d_set_n == 1);
|
||||
full_r <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (WIDTH * 2),
|
||||
.SIZE (DEPTH),
|
||||
.OUT_REG (OUT_REG ? 1 : 0),
|
||||
.LUTRAM (OUT_REG ? 0 : 1)
|
||||
) store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q1, q0}),
|
||||
.raddr (rd_ptr),
|
||||
.rdata ({d1, d0})
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
slot_set[wr_ptr] <= 0;
|
||||
end else if (pop) begin
|
||||
slot_set[rd_ptr] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire d_set_r;
|
||||
VX_pipe_register #(
|
||||
.DATAW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in (d_set_n),
|
||||
.data_out (d_set_r)
|
||||
);
|
||||
|
||||
assign d = d_set_r ? d0 : d1;
|
||||
assign d_set = ~d_set_r;
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
|
||||
endmodule
|
||||
180
hw/rtl/core/VX_issue.sv
Normal file
180
hw/rtl/core/VX_issue.sv
Normal file
@@ -0,0 +1,180 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_issue #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_pipeline_perf_if.issue perf_issue_if,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
|
||||
VX_dispatch_if.master alu_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_dispatch_if.master lsu_dispatch_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_dispatch_if.master fpu_dispatch_if [`ISSUE_WIDTH],
|
||||
`endif
|
||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if [`ISSUE_WIDTH]();
|
||||
VX_ibuffer_if scoreboard_if [`ISSUE_WIDTH]();
|
||||
VX_operands_if operands_if [`ISSUE_WIDTH]();
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
`RESET_RELAY (scoreboard_reset, reset);
|
||||
`RESET_RELAY (operands_reset, reset);
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
||||
VX_operands #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if)
|
||||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (perf_issue_if.dsp_stalls),
|
||||
`endif
|
||||
.operands_if (operands_if),
|
||||
.alu_dispatch_if(alu_dispatch_if),
|
||||
.lsu_dispatch_if(lsu_dispatch_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_dispatch_if(fpu_dispatch_if),
|
||||
`endif
|
||||
.sfu_dispatch_if(sfu_dispatch_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire operands_if_fire = operands_if[0].valid && operands_if[0].ready;
|
||||
wire operands_if_not_ready = ~operands_if[0].ready;
|
||||
wire writeback_if_valid = writeback_if[0].valid;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS +
|
||||
1 + `NR_BITS + `XLEN + 1 + 1 + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
operands_if_fire,
|
||||
operands_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes({
|
||||
operands_if[0].data.uuid,
|
||||
operands_if[0].data.tmask,
|
||||
operands_if[0].data.ex_type,
|
||||
operands_if[0].data.op_type,
|
||||
operands_if[0].data.op_mod,
|
||||
operands_if[0].data.wb,
|
||||
operands_if[0].data.rd,
|
||||
operands_if[0].data.imm,
|
||||
operands_if[0].data.use_PC,
|
||||
operands_if[0].data.use_imm,
|
||||
operands_if[0].data.rs1_data,
|
||||
operands_if[0].data.rs2_data,
|
||||
operands_if[0].data.rs3_data,
|
||||
writeback_if[0].data.uuid,
|
||||
writeback_if[0].data.tmask,
|
||||
writeback_if[0].data.rd,
|
||||
writeback_if[0].data.data,
|
||||
writeback_if[0].data.eop
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({operands_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, operands_if.PC, operands_if.tmask, operands_if.wid, operands_if.ex_type, operands_if.op_type, operands_if.ready, operands_if.valid}),
|
||||
.probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_scb_stalls;
|
||||
|
||||
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] scoreboard_stalls_per_cycle;
|
||||
reg [`ISSUE_WIDTH-1:0] scoreboard_stalls;
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign scoreboard_stalls[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready;
|
||||
end
|
||||
`POP_COUNT(scoreboard_stalls_per_cycle, scoreboard_stalls);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_ibf_stalls <= '0;
|
||||
perf_scb_stalls <= '0;
|
||||
end else begin
|
||||
if (decode_if.valid && ~decode_if.ready) begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(1);
|
||||
end
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(scoreboard_stalls_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
|
||||
assign perf_issue_if.scb_stalls = perf_scb_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
647
hw/rtl/core/VX_lsu_unit.sv
Normal file
647
hw/rtl/core/VX_lsu_unit.sv
Normal file
@@ -0,0 +1,647 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_mem_bus_if.master cache_bus_if [DCACHE_NUM_REQS],
|
||||
|
||||
// inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH]
|
||||
);
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_SIZE);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = `XLEN - MEM_ASHIFT;
|
||||
localparam REQ_ASHIFT = `CLOG2(DCACHE_WORD_SIZE);
|
||||
localparam CACHE_TAG_WIDTH = `UUID_WIDTH + (NUM_LANES * `CACHE_ADDR_TYPE_BITS) + LSUQ_TAG_BITS;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) execute_if[BLOCK_SIZE]();
|
||||
|
||||
`RESET_RELAY (dispatch_reset, reset);
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (1)
|
||||
) dispatch_unit (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.execute_if (execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_st_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_ld_if();
|
||||
|
||||
`UNUSED_VAR (execute_if[0].data.op_mod)
|
||||
`UNUSED_VAR (execute_if[0].data.use_PC)
|
||||
`UNUSED_VAR (execute_if[0].data.use_imm)
|
||||
`UNUSED_VAR (execute_if[0].data.rs3_data)
|
||||
`UNUSED_VAR (execute_if[0].data.tid)
|
||||
|
||||
`ifdef SM_ENABLE
|
||||
`STATIC_ASSERT((1 << `SMEM_LOG_SIZE) == `MEM_BLOCK_SIZE * ((1 << `SMEM_LOG_SIZE) / `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % (1 << `SMEM_LOG_SIZE)), ("invalid parameter"))
|
||||
localparam SMEM_START_B = MEM_ADDRW'(`XLEN'(`SMEM_BASE_ADDR) >> MEM_ASHIFT);
|
||||
localparam SMEM_END_B = MEM_ADDRW'((`XLEN'(`SMEM_BASE_ADDR) + (1 << `SMEM_LOG_SIZE)) >> MEM_ASHIFT);
|
||||
`endif
|
||||
|
||||
// tag = uuid + addr_type + wid + PC + tmask + rd + op_type + align + is_dup + pid + pkt_addr
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + (NUM_LANES * `CACHE_ADDR_TYPE_BITS) + `NW_WIDTH + `XLEN + NUM_LANES + `NR_BITS + `INST_LSU_BITS + (NUM_LANES * (REQ_ASHIFT)) + `LSU_DUP_ENABLED + PID_WIDTH + LSUQ_SIZEW;
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
wire [NUM_LANES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] lsu_addr_type;
|
||||
|
||||
// full address calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] full_addr;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign full_addr[i] = execute_if[0].data.rs1_data[i][`XLEN-1:0] + execute_if[0].data.imm;
|
||||
end
|
||||
|
||||
// detect duplicate addresses
|
||||
|
||||
wire lsu_is_dup;
|
||||
`ifdef LSU_DUP
|
||||
if (NUM_LANES > 1) begin
|
||||
wire [NUM_LANES-2:0] addr_matches;
|
||||
for (genvar i = 0; i < (NUM_LANES-1); ++i) begin
|
||||
assign addr_matches[i] = (execute_if[0].data.rs1_data[i+1] == execute_if[0].data.rs1_data[0]) || ~execute_if[0].data.tmask[i+1];
|
||||
end
|
||||
assign lsu_is_dup = execute_if[0].data.tmask[0] && (& addr_matches);
|
||||
end else begin
|
||||
assign lsu_is_dup = 0;
|
||||
end
|
||||
`else
|
||||
assign lsu_is_dup = 0;
|
||||
`endif
|
||||
|
||||
// detect address type
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [MEM_ADDRW-1:0] full_addr_b = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
// is non-cacheable I/O address
|
||||
wire is_addr_io = (full_addr_b >= MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT));
|
||||
`ifdef SM_ENABLE
|
||||
// is shared memory address
|
||||
wire is_addr_sm = (full_addr_b >= SMEM_START_B) && (full_addr_b < SMEM_END_B);
|
||||
assign lsu_addr_type[i] = {is_addr_io, is_addr_sm};
|
||||
`else
|
||||
assign lsu_addr_type[i] = is_addr_io;
|
||||
`endif
|
||||
end
|
||||
|
||||
wire mem_req_empty;
|
||||
wire st_rsp_ready;
|
||||
wire lsu_valid, lsu_ready;
|
||||
|
||||
// fence: stall the pipeline until all pending requests are sent
|
||||
wire is_fence = `INST_LSU_IS_FENCE(execute_if[0].data.op_type);
|
||||
wire fence_wait = is_fence && ~mem_req_empty;
|
||||
|
||||
assign lsu_valid = execute_if[0].valid && ~fence_wait;
|
||||
assign execute_if[0].ready = lsu_ready && ~fence_wait;
|
||||
|
||||
// schedule memory request
|
||||
|
||||
wire mem_req_valid;
|
||||
wire [NUM_LANES-1:0] mem_req_mask;
|
||||
wire mem_req_rw;
|
||||
wire [NUM_LANES-1:0][`MEM_ADDR_WIDTH-REQ_ASHIFT-1:0] mem_req_addr;
|
||||
reg [NUM_LANES-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] mem_req_data;
|
||||
wire [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_rsp_valid;
|
||||
wire [NUM_LANES-1:0] mem_rsp_mask;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] mem_rsp_data;
|
||||
wire [TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_sop;
|
||||
wire mem_rsp_eop;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
assign mem_req_valid = lsu_valid;
|
||||
assign lsu_ready = mem_req_ready
|
||||
&& (~mem_req_rw || st_rsp_ready); // writes commit directly
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign mem_req_mask[i] = execute_if[0].data.tmask[i] && (~lsu_is_dup || (i == 0));
|
||||
end
|
||||
|
||||
assign mem_req_rw = ~execute_if[0].data.wb;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
`UNUSED_VAR (mem_req_fire)
|
||||
`UNUSED_VAR (mem_rsp_fire)
|
||||
|
||||
// address formatting
|
||||
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0];
|
||||
assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT];
|
||||
end
|
||||
|
||||
// byte enable formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
always @(*) begin
|
||||
mem_req_byteen[i] = '0;
|
||||
case (`INST_LSU_WSIZE(execute_if[0].data.op_type))
|
||||
0: begin // 8-bit
|
||||
mem_req_byteen[i][req_align[i]] = 1'b1;
|
||||
end
|
||||
1: begin // 16 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1;
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
2: begin // 32 bit
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1;
|
||||
mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1;
|
||||
end
|
||||
`endif
|
||||
default : mem_req_byteen[i] = {DCACHE_WORD_SIZE{1'b1}};
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// memory misalignment not supported!
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire lsu_req_fire = execute_if[0].valid && execute_if[0].ready;
|
||||
`RUNTIME_ASSERT((~lsu_req_fire || ~execute_if[0].data.tmask[i] || is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if[0].data.op_type))) == 0),
|
||||
("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
execute_if[0].data.wid, execute_if[0].data.PC, full_addr[i], `INST_LSU_WSIZE(execute_if[0].data.op_type), execute_if[0].data.uuid));
|
||||
end
|
||||
|
||||
// store data formatting
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
always @(*) begin
|
||||
mem_req_data[i] = execute_if[0].data.rs2_data[i];
|
||||
case (req_align[i])
|
||||
1: mem_req_data[i][`XLEN-1:8] = execute_if[0].data.rs2_data[i][`XLEN-9:0];
|
||||
2: mem_req_data[i][`XLEN-1:16] = execute_if[0].data.rs2_data[i][`XLEN-17:0];
|
||||
3: mem_req_data[i][`XLEN-1:24] = execute_if[0].data.rs2_data[i][`XLEN-25:0];
|
||||
`ifdef XLEN_64
|
||||
4: mem_req_data[i][`XLEN-1:32] = execute_if[0].data.rs2_data[i][`XLEN-33:0];
|
||||
5: mem_req_data[i][`XLEN-1:40] = execute_if[0].data.rs2_data[i][`XLEN-41:0];
|
||||
6: mem_req_data[i][`XLEN-1:48] = execute_if[0].data.rs2_data[i][`XLEN-49:0];
|
||||
7: mem_req_data[i][`XLEN-1:56] = execute_if[0].data.rs2_data[i][`XLEN-57:0];
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// track SOP/EOP for out-of-order memory responses
|
||||
|
||||
wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr;
|
||||
wire mem_rsp_sop_pkt, mem_rsp_eop_pkt;
|
||||
|
||||
if (PID_BITS != 0) begin
|
||||
reg [`LSUQ_SIZE-1:0][PID_BITS:0] pkt_ctr;
|
||||
reg [`LSUQ_SIZE-1:0] pkt_sop, pkt_eop;
|
||||
|
||||
wire mem_req_rd_fire = mem_req_fire && execute_if[0].data.wb;
|
||||
wire mem_req_rd_sop_fire = mem_req_rd_fire && execute_if[0].data.sop;
|
||||
wire mem_req_rd_eop_fire = mem_req_rd_fire && execute_if[0].data.eop;
|
||||
wire mem_rsp_eop_fire = mem_rsp_fire && mem_rsp_eop;
|
||||
wire full;
|
||||
|
||||
VX_allocator #(
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) pkt_allocator (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_en (mem_req_rd_eop_fire),
|
||||
.acquire_addr(pkt_waddr),
|
||||
.release_en (mem_rsp_eop_pkt),
|
||||
.release_addr(pkt_raddr),
|
||||
`UNUSED_PIN (empty),
|
||||
.full (full)
|
||||
);
|
||||
|
||||
wire rd_during_wr = mem_req_rd_fire && mem_rsp_eop_fire && (pkt_raddr == pkt_waddr);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pkt_ctr <= '0;
|
||||
pkt_sop <= '0;
|
||||
pkt_eop <= '0;
|
||||
end else begin
|
||||
if (mem_req_rd_sop_fire) begin
|
||||
pkt_sop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (mem_req_rd_eop_fire) begin
|
||||
pkt_eop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
pkt_sop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (mem_rsp_eop_pkt) begin
|
||||
pkt_eop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (~rd_during_wr) begin
|
||||
if (mem_req_rd_fire) begin
|
||||
pkt_ctr[pkt_waddr] <= pkt_ctr[pkt_waddr] + PID_BITS'(1);
|
||||
end
|
||||
if (mem_rsp_eop_fire) begin
|
||||
pkt_ctr[pkt_raddr] <= pkt_ctr[pkt_raddr] - PID_BITS'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr];
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1);
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("allocator full!"))
|
||||
`RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("Oops!"))
|
||||
`UNUSED_VAR (mem_rsp_sop)
|
||||
end else begin
|
||||
assign pkt_waddr = 0;
|
||||
assign mem_rsp_sop_pkt = mem_rsp_sop;
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop;
|
||||
`UNUSED_VAR (pkt_raddr)
|
||||
end
|
||||
|
||||
assign mem_req_tag = {
|
||||
execute_if[0].data.uuid, lsu_addr_type, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, execute_if[0].data.rd, execute_if[0].data.op_type, req_align, execute_if[0].data.pid, pkt_waddr
|
||||
`ifdef LSU_DUP
|
||||
, lsu_is_dup
|
||||
`endif
|
||||
};
|
||||
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_req_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_req_rw;
|
||||
wire [DCACHE_NUM_REQS-1:0][(`XLEN/8)-1:0] cache_req_byteen;
|
||||
wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] cache_req_addr;
|
||||
wire [DCACHE_NUM_REQS-1:0][`XLEN-1:0] cache_req_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][CACHE_TAG_WIDTH-1:0] cache_req_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_req_ready;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_rsp_valid;
|
||||
wire [DCACHE_NUM_REQS-1:0][`XLEN-1:0] cache_rsp_data;
|
||||
wire [DCACHE_NUM_REQS-1:0][CACHE_TAG_WIDTH-1:0] cache_rsp_tag;
|
||||
wire [DCACHE_NUM_REQS-1:0] cache_rsp_ready;
|
||||
|
||||
`RESET_RELAY (mem_scheduler_reset, reset);
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched", CORE_ID)),
|
||||
.NUM_REQS (LSU_MEM_REQS),
|
||||
.NUM_BANKS (DCACHE_NUM_REQS),
|
||||
.ADDR_WIDTH (DCACHE_ADDR_WIDTH),
|
||||
.DATA_WIDTH (`XLEN),
|
||||
.QUEUE_SIZE (`LSUQ_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.MEM_TAG_ID (`UUID_WIDTH + (NUM_LANES * `CACHE_ADDR_TYPE_BITS)),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.RSP_PARTIAL (1),
|
||||
.MEM_OUT_REG (2)
|
||||
) mem_scheduler (
|
||||
.clk (clk),
|
||||
.reset (mem_scheduler_reset),
|
||||
|
||||
// Input request
|
||||
.req_valid (mem_req_valid),
|
||||
.req_rw (mem_req_rw),
|
||||
.req_mask (mem_req_mask),
|
||||
.req_byteen (mem_req_byteen),
|
||||
.req_addr (mem_req_addr),
|
||||
.req_data (mem_req_data),
|
||||
.req_tag (mem_req_tag),
|
||||
.req_empty (mem_req_empty),
|
||||
.req_ready (mem_req_ready),
|
||||
`UNUSED_PIN (write_notify),
|
||||
|
||||
// Output response
|
||||
.rsp_valid (mem_rsp_valid),
|
||||
.rsp_mask (mem_rsp_mask),
|
||||
.rsp_data (mem_rsp_data),
|
||||
.rsp_tag (mem_rsp_tag),
|
||||
.rsp_sop (mem_rsp_sop),
|
||||
.rsp_eop (mem_rsp_eop),
|
||||
.rsp_ready (mem_rsp_ready),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (cache_req_valid),
|
||||
.mem_req_rw (cache_req_rw),
|
||||
.mem_req_byteen (cache_req_byteen),
|
||||
.mem_req_addr (cache_req_addr),
|
||||
.mem_req_data (cache_req_data),
|
||||
.mem_req_tag (cache_req_tag),
|
||||
.mem_req_ready (cache_req_ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (cache_rsp_valid),
|
||||
.mem_rsp_data (cache_rsp_data),
|
||||
.mem_rsp_tag (cache_rsp_tag),
|
||||
.mem_rsp_ready (cache_rsp_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
assign cache_bus_if[i].req_valid = cache_req_valid[i];
|
||||
assign cache_bus_if[i].req_data.rw = cache_req_rw[i];
|
||||
assign cache_bus_if[i].req_data.byteen = cache_req_byteen[i];
|
||||
assign cache_bus_if[i].req_data.addr = cache_req_addr[i];
|
||||
assign cache_bus_if[i].req_data.data = cache_req_data[i];
|
||||
assign cache_req_ready[i] = cache_bus_if[i].req_ready;
|
||||
|
||||
assign cache_rsp_valid[i] = cache_bus_if[i].rsp_valid;
|
||||
assign cache_rsp_data[i] = cache_bus_if[i].rsp_data.data;
|
||||
assign cache_bus_if[i].rsp_ready = cache_rsp_ready[i];
|
||||
end
|
||||
|
||||
// cache tag formatting: <uuid, tag, type>
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
wire [`UUID_WIDTH-1:0] cache_req_uuid, cache_rsp_uuid;
|
||||
wire [NUM_LANES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type, cache_rsp_type;
|
||||
wire [`CLOG2(`LSUQ_SIZE)-1:0] cache_req_tag_x, cache_rsp_tag_x;
|
||||
if (DCACHE_NUM_BATCHES > 1) begin
|
||||
|
||||
wire [DCACHE_NUM_BATCHES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type_b, cache_rsp_type_b;
|
||||
wire [`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type_bi, cache_rsp_type_bi;
|
||||
wire [DCACHE_BATCH_SEL_BITS-1:0] cache_req_bid, cache_rsp_bid;
|
||||
|
||||
assign {cache_req_uuid, cache_req_type, cache_req_bid, cache_req_tag_x} = cache_req_tag[i];
|
||||
assign cache_req_type_bi = cache_req_type_b[cache_req_bid];
|
||||
assign cache_bus_if[i].req_data.tag = {cache_req_uuid, cache_req_bid, cache_req_tag_x, cache_req_type_bi};
|
||||
|
||||
assign {cache_rsp_uuid, cache_rsp_bid, cache_rsp_tag_x, cache_rsp_type_bi} = cache_bus_if[i].rsp_data.tag;
|
||||
assign cache_rsp_type_b = {DCACHE_NUM_BATCHES{cache_rsp_type_bi}};
|
||||
assign cache_rsp_tag[i] = {cache_rsp_uuid, cache_rsp_type, cache_rsp_bid, cache_rsp_tag_x};
|
||||
|
||||
for (genvar j = 0; j < DCACHE_NUM_BATCHES; ++j) begin
|
||||
localparam k = j * DCACHE_NUM_REQS + i;
|
||||
if (k < NUM_LANES) begin
|
||||
assign cache_req_type_b[j] = cache_req_type[k];
|
||||
assign cache_rsp_type[k] = cache_rsp_type_b[j];
|
||||
end else begin
|
||||
assign cache_req_type_b[j] = '0;
|
||||
`UNUSED_VAR (cache_rsp_type_b[j])
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
assign {cache_req_uuid, cache_req_type, cache_req_tag_x} = cache_req_tag[i];
|
||||
assign cache_bus_if[i].req_data.tag = {cache_req_uuid, cache_req_tag_x, cache_req_type[i]};
|
||||
|
||||
assign {cache_rsp_uuid, cache_rsp_tag_x, cache_rsp_type[i]} = cache_bus_if[i].rsp_data.tag;
|
||||
assign cache_rsp_tag[i] = {cache_rsp_uuid, cache_rsp_type, cache_rsp_tag_x};
|
||||
|
||||
for (genvar j = 0; j < DCACHE_NUM_REQS; ++j) begin
|
||||
if (i != j) begin
|
||||
`UNUSED_VAR (cache_req_type[j])
|
||||
assign cache_rsp_type[j] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [NUM_LANES-1:0][`CACHE_ADDR_TYPE_BITS-1:0] rsp_addr_type;
|
||||
wire [`NW_WIDTH-1:0] rsp_wid;
|
||||
wire [NUM_LANES-1:0] rsp_tmask_uq;
|
||||
wire [`XLEN-1:0] rsp_pc;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire [`INST_LSU_BITS-1:0] rsp_op_type;
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] rsp_align;
|
||||
wire [PID_WIDTH-1:0] rsp_pid;
|
||||
wire rsp_is_dup;
|
||||
|
||||
`ifndef LSU_DUP
|
||||
assign rsp_is_dup = 0;
|
||||
`endif
|
||||
|
||||
assign {
|
||||
rsp_uuid, rsp_addr_type, rsp_wid, rsp_tmask_uq, rsp_pc, rsp_rd, rsp_op_type, rsp_align, rsp_pid, pkt_raddr
|
||||
`ifdef LSU_DUP
|
||||
, rsp_is_dup
|
||||
`endif
|
||||
} = mem_rsp_tag;
|
||||
`UNUSED_VAR (rsp_addr_type)
|
||||
`UNUSED_VAR (rsp_op_type)
|
||||
|
||||
// load response formatting
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] rsp_data;
|
||||
wire [NUM_LANES-1:0] rsp_tmask;
|
||||
|
||||
`ifdef XLEN_64
|
||||
`ifdef EXT_F_ENABLE
|
||||
// apply nan-boxing to flw outputs
|
||||
wire rsp_is_float = rsp_rd[5];
|
||||
`else
|
||||
wire rsp_is_float = 0;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; i++) begin
|
||||
`ifdef XLEN_64
|
||||
wire [63:0] rsp_data64 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? (rsp_align[0][2] ? mem_rsp_data[0][63:32] : mem_rsp_data[0][31:0]) :
|
||||
(rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]);
|
||||
`else
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
|
||||
`endif
|
||||
wire [15:0] rsp_data16 = rsp_align[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
|
||||
wire [7:0] rsp_data8 = rsp_align[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
always @(*) begin
|
||||
case (`INST_LSU_FMT(rsp_op_type))
|
||||
`INST_FMT_B: rsp_data[i] = `XLEN'(signed'(rsp_data8));
|
||||
`INST_FMT_H: rsp_data[i] = `XLEN'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = `XLEN'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = `XLEN'(unsigned'(rsp_data16));
|
||||
`ifdef XLEN_64
|
||||
`INST_FMT_W: rsp_data[i] = rsp_is_float ? (`XLEN'(rsp_data32) | 64'hffffffff00000000) : `XLEN'(signed'(rsp_data32));
|
||||
`INST_FMT_WU: rsp_data[i] = `XLEN'(unsigned'(rsp_data32));
|
||||
`INST_FMT_D: rsp_data[i] = `XLEN'(signed'(rsp_data64));
|
||||
`else
|
||||
`INST_FMT_W: rsp_data[i] = `XLEN'(signed'(rsp_data32));
|
||||
`endif
|
||||
default: rsp_data[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign rsp_tmask = rsp_is_dup ? rsp_tmask_uq : mem_rsp_mask;
|
||||
|
||||
// load commit
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) ld_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_rsp_valid),
|
||||
.ready_in (mem_rsp_ready),
|
||||
.data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
||||
.data_out ({commit_ld_if.data.uuid, commit_ld_if.data.wid, commit_ld_if.data.tmask, commit_ld_if.data.PC, commit_ld_if.data.rd, commit_ld_if.data.data, commit_ld_if.data.pid, commit_ld_if.data.sop, commit_ld_if.data.eop}),
|
||||
.valid_out (commit_ld_if.valid),
|
||||
.ready_out (commit_ld_if.ready)
|
||||
);
|
||||
|
||||
assign commit_ld_if.data.wb = 1'b1;
|
||||
|
||||
// store commit
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) st_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_fire && mem_req_rw),
|
||||
.ready_in (st_rsp_ready),
|
||||
.data_in ({execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, execute_if[0].data.pid, execute_if[0].data.sop, execute_if[0].data.eop}),
|
||||
.data_out ({commit_st_if.data.uuid, commit_st_if.data.wid, commit_st_if.data.tmask, commit_st_if.data.PC, commit_st_if.data.pid, commit_st_if.data.sop, commit_st_if.data.eop}),
|
||||
.valid_out (commit_st_if.valid),
|
||||
.ready_out (commit_st_if.ready)
|
||||
);
|
||||
assign commit_st_if.data.rd = '0;
|
||||
assign commit_st_if.data.wb = 1'b0;
|
||||
assign commit_st_if.data.data = commit_ld_if.data.data; // force arbiter passthru
|
||||
|
||||
// lsu commit
|
||||
|
||||
`RESET_RELAY (commit_reset, reset);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_arb_if[1]();
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_REG (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.valid_in ({commit_st_if.valid, commit_ld_if.valid}),
|
||||
.ready_in ({commit_st_if.ready, commit_ld_if.ready}),
|
||||
.data_in ({commit_st_if.data, commit_ld_if.data}),
|
||||
.data_out (commit_arb_if[0].data),
|
||||
.valid_out (commit_arb_if[0].valid),
|
||||
.ready_out (commit_arb_if[0].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
VX_gather_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_REG (3)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
.commit_in_if (commit_arb_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes({execute_if[0].data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
wire [31:0] full_addr_0 = full_addr[0];
|
||||
wire [31:0] mem_req_data_0 = mem_req_data[0];
|
||||
wire [31:0] rsp_data_0 = rsp_data[0];
|
||||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({mem_req_data_0, execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
|
||||
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, rsp_tmask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
|
||||
.probe2 ({cache_bus_if.req_data.data, cache_bus_if.req_data.tag, cache_bus_if.req_data.byteen, cache_bus_if.req_data.addr, cache_bus_if.req_data.rw, cache_bus_if.req_ready, cache_bus_if.req_valid}),
|
||||
.probe3 ({cache_bus_if.rsp_data.data, cache_bus_if.rsp_data.tag, cache_bus_if.rsp_ready, cache_bus_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
if (execute_if[0].valid && fence_wait) begin
|
||||
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID));
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, type=", mem_req_tag, mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, lsu_addr_type, NUM_LANES);
|
||||
`TRACE(1, (", data="));
|
||||
`TRACE_ARRAY1D(1, mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", is_dup=%b (#%0d)\n", lsu_is_dup, execute_if[0].data.uuid));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, type=", mem_req_tag, mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, lsu_addr_type, NUM_LANES);
|
||||
`TRACE(1, (", rd=%0d, is_dup=%b (#%0d)\n", execute_if[0].data.rd, lsu_is_dup, execute_if[0].data.uuid));
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, tag=0x%0h, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_pc, mem_rsp_mask, mem_rsp_tag, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", is_dup=%b (#%0d)\n", rsp_is_dup, rsp_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
336
hw/rtl/core/VX_muldiv_unit.sv
Normal file
336
hw/rtl/core/VX_muldiv_unit.sv
Normal file
@@ -0,0 +1,336 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_muldiv_unit #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam TAGW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
wire [`INST_M_BITS-1:0] muldiv_op = `INST_M_BITS'(execute_if.data.op_type);
|
||||
|
||||
wire is_mulx_op = `INST_M_IS_MULX(muldiv_op);
|
||||
wire is_signed_op = `INST_M_SIGNED(muldiv_op);
|
||||
`ifdef XLEN_64
|
||||
wire is_alu_w = `INST_ALU_IS_W(execute_if.data.op_mod);
|
||||
`else
|
||||
wire is_alu_w = 0;
|
||||
`endif
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] mul_result_out;
|
||||
wire [`UUID_WIDTH-1:0] mul_uuid_out;
|
||||
wire [`NW_WIDTH-1:0] mul_wid_out;
|
||||
wire [NUM_LANES-1:0] mul_tmask_out;
|
||||
wire [`XLEN-1:0] mul_PC_out;
|
||||
wire [`NR_BITS-1:0] mul_rd_out;
|
||||
wire mul_wb_out;
|
||||
wire [PID_WIDTH-1:0] mul_pid_out;
|
||||
wire mul_sop_out, mul_eop_out;
|
||||
|
||||
wire mul_valid_in = execute_if.valid && is_mulx_op;
|
||||
wire mul_ready_in;
|
||||
wire mul_valid_out;
|
||||
wire mul_ready_out;
|
||||
|
||||
wire is_mulh_in = `INST_M_IS_MULH(muldiv_op);
|
||||
wire is_signed_mul_a = `INST_M_SIGNED_A(muldiv_op);
|
||||
wire is_signed_mul_b = is_signed_op;
|
||||
|
||||
`ifdef IMUL_DPI
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] mul_result_tmp;
|
||||
|
||||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] mul_resultl, mul_resulth;
|
||||
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
|
||||
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
|
||||
always @(*) begin
|
||||
dpi_imul (mul_fire_in, is_signed_mul_a, is_signed_mul_b, mul_in1, mul_in2, mul_resultl, mul_resulth);
|
||||
end
|
||||
assign mul_result_tmp[i] = is_mulh_in ? mul_resulth : (is_alu_w ? `XLEN'($signed(mul_resultl[31:0])) : mul_resultl);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + (NUM_LANES * `XLEN)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
|
||||
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out})
|
||||
);
|
||||
|
||||
assign mul_ready_in = mul_ready_out || ~mul_valid_out;
|
||||
|
||||
`else
|
||||
|
||||
wire [NUM_LANES-1:0][2*(`XLEN+1)-1:0] mul_result_tmp;
|
||||
wire is_mulh_out;
|
||||
wire is_mul_w_out;
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN:0] mul_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN:0] mul_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]} : {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]} : {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
end
|
||||
|
||||
wire mul_strode;
|
||||
wire mul_busy;
|
||||
|
||||
VX_elastic_adapter mul_elastic_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mul_valid_in),
|
||||
.ready_in (mul_ready_in),
|
||||
.valid_out (mul_valid_out),
|
||||
.ready_out (mul_ready_out),
|
||||
.strobe (mul_strode),
|
||||
.busy (mul_busy)
|
||||
);
|
||||
|
||||
VX_serial_mul #(
|
||||
.A_WIDTH (`XLEN+1),
|
||||
.LANES (NUM_LANES),
|
||||
.SIGNED (1)
|
||||
) serial_mul (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.strobe (mul_strode),
|
||||
.busy (mul_busy),
|
||||
|
||||
.dataa (mul_in1),
|
||||
.datab (mul_in2),
|
||||
.result (mul_result_tmp)
|
||||
);
|
||||
|
||||
reg [TAGW+2-1:0] mul_tag_r;
|
||||
always @(posedge clk) begin
|
||||
if (mul_valid_in && mul_ready_in) begin
|
||||
mul_tag_r <= {execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, is_mulh_in, is_alu_w, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop};
|
||||
end
|
||||
end
|
||||
|
||||
assign {mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out, is_mul_w_out, mul_pid_out, mul_sop_out, mul_eop_out} = mul_tag_r;
|
||||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] mul_in1 = {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
||||
wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
||||
|
||||
VX_multiplier #(
|
||||
.A_WIDTH (`XLEN+1),
|
||||
.B_WIDTH (`XLEN+1),
|
||||
.R_WIDTH (2*(`XLEN+1)),
|
||||
.SIGNED (1),
|
||||
.LATENCY (`LATENCY_IMUL)
|
||||
) multiplier (
|
||||
.clk (clk),
|
||||
.enable (mul_ready_in),
|
||||
.dataa (mul_in1),
|
||||
.datab (mul_in2),
|
||||
.result (mul_result_tmp[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, is_mulh_in, is_alu_w}),
|
||||
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, is_mulh_out, is_mul_w_out})
|
||||
);
|
||||
|
||||
assign mul_ready_in = mul_ready_out || ~mul_valid_out;
|
||||
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef XLEN_64
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] :
|
||||
(is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) :
|
||||
mul_result_tmp[i][`XLEN-1:0]);
|
||||
`else
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] : mul_result_tmp[i][`XLEN-1:0];
|
||||
`UNUSED_VAR (is_mul_w_out)
|
||||
`endif
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_result_out;
|
||||
wire [`UUID_WIDTH-1:0] div_uuid_out;
|
||||
wire [`NW_WIDTH-1:0] div_wid_out;
|
||||
wire [NUM_LANES-1:0] div_tmask_out;
|
||||
wire [`XLEN-1:0] div_PC_out;
|
||||
wire [`NR_BITS-1:0] div_rd_out;
|
||||
wire div_wb_out;
|
||||
wire [PID_WIDTH-1:0] div_pid_out;
|
||||
wire div_sop_out, div_eop_out;
|
||||
|
||||
wire is_rem_op = `INST_M_IS_REM(muldiv_op);
|
||||
|
||||
wire div_valid_in = execute_if.valid && ~is_mulx_op;
|
||||
wire div_ready_in;
|
||||
wire div_valid_out;
|
||||
wire div_ready_out;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign div_in1[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]}: execute_if.data.rs1_data[i];
|
||||
assign div_in2[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]}: execute_if.data.rs2_data[i];
|
||||
end
|
||||
|
||||
`ifdef IDIV_DPI
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_result_in;
|
||||
wire div_fire_in = div_valid_in && div_ready_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] div_quotient, div_remainder;
|
||||
always @(*) begin
|
||||
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
|
||||
end
|
||||
assign div_result_in[i] = is_rem_op ? (is_alu_w ? `XLEN'($signed(div_remainder[31:0])) : div_remainder) :
|
||||
(is_alu_w ? `XLEN'($signed(div_quotient[31:0])) : div_quotient);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + (NUM_LANES * `XLEN)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) div_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (div_ready_in),
|
||||
.data_in ({div_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, div_result_in}),
|
||||
.data_out ({div_valid_out, div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_pid_out, div_sop_out, div_eop_out, div_result_out})
|
||||
);
|
||||
|
||||
assign div_ready_in = div_ready_out || ~div_valid_out;
|
||||
|
||||
`else
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] div_quotient, div_remainder;
|
||||
wire is_rem_op_out;
|
||||
wire is_div_w_out;
|
||||
wire div_strode;
|
||||
wire div_busy;
|
||||
|
||||
VX_elastic_adapter div_elastic_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (div_valid_in),
|
||||
.ready_in (div_ready_in),
|
||||
.valid_out (div_valid_out),
|
||||
.ready_out (div_ready_out),
|
||||
.strobe (div_strode),
|
||||
.busy (div_busy)
|
||||
);
|
||||
|
||||
VX_serial_div #(
|
||||
.WIDTHN (`XLEN),
|
||||
.WIDTHD (`XLEN),
|
||||
.WIDTHQ (`XLEN),
|
||||
.WIDTHR (`XLEN),
|
||||
.LANES (NUM_LANES)
|
||||
) serial_div (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.strobe (div_strode),
|
||||
.busy (div_busy),
|
||||
|
||||
.is_signed (is_signed_op),
|
||||
.numer (div_in1),
|
||||
.denom (div_in2),
|
||||
|
||||
.quotient (div_quotient),
|
||||
.remainder (div_remainder)
|
||||
);
|
||||
|
||||
reg [TAGW+2-1:0] div_tag_r;
|
||||
always @(posedge clk) begin
|
||||
if (div_valid_in && div_ready_in) begin
|
||||
div_tag_r <= {execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, is_rem_op, is_alu_w, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop};
|
||||
end
|
||||
end
|
||||
|
||||
assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out, div_pid_out, div_sop_out, div_eop_out} = div_tag_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef XLEN_64
|
||||
assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) :
|
||||
(is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]);
|
||||
`else
|
||||
assign div_result_out[i] = is_rem_op_out ? div_remainder[i] : div_quotient[i];
|
||||
`UNUSED_VAR (is_div_w_out)
|
||||
`endif
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
// can accept new request?
|
||||
assign execute_if.ready = is_mulx_op ? mul_ready_in : div_ready_in;
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (TAGW + (NUM_LANES * `XLEN)),
|
||||
.OUT_REG (1)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({div_valid_out, mul_valid_out}),
|
||||
.ready_in ({div_ready_out, mul_ready_out}),
|
||||
.data_in ({{div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_pid_out, div_sop_out, div_eop_out, div_result_out},
|
||||
{mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out}}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, commit_if.data.data}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
endmodule
|
||||
302
hw/rtl/core/VX_operands.sv
Normal file
302
hw/rtl/core/VX_operands.sv
Normal file
@@ -0,0 +1,302 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_operands import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter CACHE_ENABLE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.master operands_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + `NR_BITS;
|
||||
|
||||
localparam STATE_IDLE = 2'd0;
|
||||
localparam STATE_FETCH1 = 2'd1;
|
||||
localparam STATE_FETCH2 = 2'd2;
|
||||
localparam STATE_FETCH3 = 2'd3;
|
||||
localparam STATE_BITS = 2;
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
|
||||
reg [`NR_BITS-1:0] gpr_rd_rid, gpr_rd_rid_n;
|
||||
reg [ISSUE_WIS_W-1:0] gpr_rd_wis, gpr_rd_wis_n;
|
||||
|
||||
reg [ISSUE_RATIO-1:0][`NUM_THREADS-1:0][`XLEN-1:0] cache_data, cache_data_n;
|
||||
reg [ISSUE_RATIO-1:0][`NR_BITS-1:0] cache_reg, cache_reg_n;
|
||||
reg [ISSUE_RATIO-1:0][`NUM_THREADS-1:0] cache_tmask, cache_tmask_n;
|
||||
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
||||
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
||||
|
||||
reg [STATE_BITS-1:0] state, state_n;
|
||||
reg [`NR_BITS-1:0] rs2, rs2_n;
|
||||
reg [`NR_BITS-1:0] rs3, rs3_n;
|
||||
reg rs2_ready, rs2_ready_n;
|
||||
reg rs3_ready, rs3_ready_n;
|
||||
reg data_ready, data_ready_n;
|
||||
|
||||
wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0);
|
||||
wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0);
|
||||
wire is_rs3_zero = (scoreboard_if[i].data.rs3 == 0);
|
||||
|
||||
VX_operands_if staging_if();
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
rs2_n = rs2;
|
||||
rs3_n = rs3;
|
||||
rs2_ready_n = rs2_ready;
|
||||
rs3_ready_n = rs3_ready;
|
||||
rs1_data_n = rs1_data;
|
||||
rs2_data_n = rs2_data;
|
||||
rs3_data_n = rs3_data;
|
||||
cache_data_n = cache_data;
|
||||
cache_reg_n = cache_reg;
|
||||
cache_tmask_n= cache_tmask;
|
||||
cache_eop_n = cache_eop;
|
||||
gpr_rd_rid_n = gpr_rd_rid;
|
||||
gpr_rd_wis_n = gpr_rd_wis;
|
||||
data_ready_n = data_ready;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (staging_if.valid && staging_if.ready) begin
|
||||
data_ready_n = 0;
|
||||
end
|
||||
if (scoreboard_if[i].valid && data_ready_n == 0) begin
|
||||
data_ready_n = 1;
|
||||
if (is_rs3_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if[i].data.rs3 == cache_reg[scoreboard_if[i].data.wis] &&
|
||||
(scoreboard_if[i].data.tmask & cache_tmask[scoreboard_if[i].data.wis]) == scoreboard_if[i].data.tmask)) begin
|
||||
rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if[i].data.wis];
|
||||
rs3_ready_n = 1;
|
||||
end else begin
|
||||
rs3_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if[i].data.rs3;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH3;
|
||||
end
|
||||
if (is_rs2_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if[i].data.rs2 == cache_reg[scoreboard_if[i].data.wis] &&
|
||||
(scoreboard_if[i].data.tmask & cache_tmask[scoreboard_if[i].data.wis]) == scoreboard_if[i].data.tmask)) begin
|
||||
rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if[i].data.wis];
|
||||
rs2_ready_n = 1;
|
||||
end else begin
|
||||
rs2_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if[i].data.rs2;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH2;
|
||||
end
|
||||
if (is_rs1_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if[i].data.rs1 == cache_reg[scoreboard_if[i].data.wis] &&
|
||||
(scoreboard_if[i].data.tmask & cache_tmask[scoreboard_if[i].data.wis]) == scoreboard_if[i].data.tmask)) begin
|
||||
rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if[i].data.wis];
|
||||
end else begin
|
||||
gpr_rd_rid_n = scoreboard_if[i].data.rs1;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH1;
|
||||
end
|
||||
end
|
||||
gpr_rd_wis_n = scoreboard_if[i].data.wis;
|
||||
rs2_n = scoreboard_if[i].data.rs2;
|
||||
rs3_n = scoreboard_if[i].data.rs3;
|
||||
end
|
||||
STATE_FETCH1: begin
|
||||
rs1_data_n = gpr_rd_data;
|
||||
if (~rs2_ready) begin
|
||||
gpr_rd_rid_n = rs2;
|
||||
state_n = STATE_FETCH2;
|
||||
end else if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH2: begin
|
||||
rs2_data_n = gpr_rd_data;
|
||||
if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH3: begin
|
||||
rs3_data_n = gpr_rd_data;
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
|
||||
if (CACHE_ENABLE != 0 && writeback_if[i].valid) begin
|
||||
if ((cache_reg[writeback_if[i].data.wis] == writeback_if[i].data.rd)
|
||||
|| (cache_eop[writeback_if[i].data.wis] && writeback_if[i].data.sop)) begin
|
||||
for (integer j = 0; j < `NUM_THREADS; ++j) begin
|
||||
if (writeback_if[i].data.tmask[j]) begin
|
||||
cache_data_n[writeback_if[i].data.wis][j] = writeback_if[i].data.data[j];
|
||||
end
|
||||
end
|
||||
cache_reg_n[writeback_if[i].data.wis] = writeback_if[i].data.rd;
|
||||
cache_eop_n[writeback_if[i].data.wis] = writeback_if[i].data.eop;
|
||||
if (writeback_if[i].data.sop) begin
|
||||
cache_tmask_n[writeback_if[i].data.wis] = writeback_if[i].data.tmask;
|
||||
end else begin
|
||||
cache_tmask_n[writeback_if[i].data.wis] |= writeback_if[i].data.tmask;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
gpr_rd_rid <= '0;
|
||||
gpr_rd_wis <= '0;
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
cache_reg <= '0;
|
||||
data_ready <= 0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
rs2 <= rs2_n;
|
||||
rs3 <= rs3_n;
|
||||
rs2_ready <= rs2_ready_n;
|
||||
rs3_ready <= rs3_ready_n;
|
||||
rs1_data <= rs1_data_n;
|
||||
rs2_data <= rs2_data_n;
|
||||
rs3_data <= rs3_data_n;
|
||||
gpr_rd_rid <= gpr_rd_rid_n;
|
||||
gpr_rd_wis <= gpr_rd_wis_n;
|
||||
cache_data <= cache_data_n;
|
||||
cache_reg <= cache_reg_n;
|
||||
cache_tmask <= cache_tmask_n;
|
||||
cache_eop <= cache_eop_n;
|
||||
data_ready <= data_ready_n;
|
||||
end
|
||||
end
|
||||
|
||||
// GPR banks
|
||||
|
||||
`ifdef GPR_RESET
|
||||
reg wr_enabled = 0;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_enabled <= 1;
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire wr_enabled = 1;
|
||||
`endif
|
||||
|
||||
for (genvar j = 0; j < `NUM_THREADS; ++j) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (`XLEN),
|
||||
.SIZE (`NUM_REGS * ISSUE_RATIO),
|
||||
`ifdef GPR_RESET
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
`UNUSED_PIN (wren),
|
||||
.write (wr_enabled && writeback_if[i].valid && writeback_if[i].data.tmask[j]),
|
||||
.waddr (wis_to_addr(writeback_if[i].data.rd, writeback_if[i].data.wis)),
|
||||
.wdata (writeback_if[i].data.data[j]),
|
||||
.raddr (wis_to_addr(gpr_rd_rid, gpr_rd_wis)),
|
||||
.rdata (gpr_rd_data[j])
|
||||
);
|
||||
end
|
||||
|
||||
// staging buffer
|
||||
|
||||
`RESET_RELAY (stg_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) stg_buf (
|
||||
.clk (clk),
|
||||
.reset (stg_buf_reset),
|
||||
.valid_in (scoreboard_if[i].valid),
|
||||
.ready_in (scoreboard_if[i].ready),
|
||||
.data_in ({
|
||||
scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.wis,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd}),
|
||||
.data_out ({
|
||||
staging_if.data.uuid,
|
||||
staging_if.data.wis,
|
||||
staging_if.data.tmask,
|
||||
staging_if.data.PC,
|
||||
staging_if.data.wb,
|
||||
staging_if.data.ex_type,
|
||||
staging_if.data.op_type,
|
||||
staging_if.data.op_mod,
|
||||
staging_if.data.use_PC,
|
||||
staging_if.data.use_imm,
|
||||
staging_if.data.imm,
|
||||
staging_if.data.rd}),
|
||||
.valid_out (staging_if.valid),
|
||||
.ready_out (staging_if.ready)
|
||||
);
|
||||
|
||||
assign staging_if.data.rs1_data = rs1_data;
|
||||
assign staging_if.data.rs2_data = rs2_data;
|
||||
assign staging_if.data.rs3_data = rs3_data;
|
||||
|
||||
// output buffer
|
||||
|
||||
wire valid_stg, ready_stg;
|
||||
assign valid_stg = staging_if.valid && data_ready;
|
||||
assign staging_if.ready = ready_stg && data_ready;
|
||||
|
||||
`RESET_RELAY (out_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW + (3 * `NUM_THREADS * `XLEN)),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.valid_in (valid_stg),
|
||||
.ready_in (ready_stg),
|
||||
.data_in (staging_if.data),
|
||||
.data_out (operands_if[i].data),
|
||||
.valid_out (operands_if[i].valid),
|
||||
.ready_out (operands_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
endmodule
|
||||
79
hw/rtl/core/VX_pending_instr.sv
Normal file
79
hw/rtl/core/VX_pending_instr.sv
Normal file
@@ -0,0 +1,79 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_pending_instr #(
|
||||
parameter CTR_WIDTH = 12,
|
||||
parameter ALM_EMPTY = 1,
|
||||
parameter DECR_COUNT = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire incr,
|
||||
input wire [`NW_WIDTH-1:0] incr_wid,
|
||||
input wire [DECR_COUNT-1:0] decr,
|
||||
input wire [DECR_COUNT-1:0][`NW_WIDTH-1:0] decr_wid,
|
||||
input wire [`NW_WIDTH-1:0] alm_empty_wid,
|
||||
output wire empty,
|
||||
output wire alm_empty
|
||||
);
|
||||
localparam COUNTW = `CLOG2(DECR_COUNT+1);
|
||||
|
||||
reg [`NUM_WARPS-1:0][CTR_WIDTH-1:0] pending_instrs;
|
||||
reg [`NUM_WARPS-1:0][COUNTW-1:0] decr_cnt;
|
||||
reg [`NUM_WARPS-1:0][DECR_COUNT-1:0] decr_mask;
|
||||
reg [`NUM_WARPS-1:0] incr_cnt, incr_cnt_n;
|
||||
reg [`NUM_WARPS-1:0] alm_empty_r, empty_r;
|
||||
|
||||
always @(*) begin
|
||||
incr_cnt_n = 0;
|
||||
decr_mask = 0;
|
||||
if (incr) begin
|
||||
incr_cnt_n[incr_wid] = 1;
|
||||
end
|
||||
for (integer i = 0; i < DECR_COUNT; ++i) begin
|
||||
if (decr[i]) begin
|
||||
decr_mask[decr_wid[i]][i] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
wire [COUNTW-1:0] decr_cnt_n;
|
||||
`POP_COUNT(decr_cnt_n, decr_mask[i]);
|
||||
|
||||
wire [CTR_WIDTH-1:0] pending_instrs_n = pending_instrs[i] + CTR_WIDTH'(incr_cnt[i]) - CTR_WIDTH'(decr_cnt[i]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
incr_cnt[i] <= '0;
|
||||
decr_cnt[i] <= '0;
|
||||
pending_instrs[i] <= '0;
|
||||
alm_empty_r[i] <= 0;
|
||||
empty_r[i] <= 1;
|
||||
end else begin
|
||||
incr_cnt[i] <= incr_cnt_n[i];
|
||||
decr_cnt[i] <= decr_cnt_n;
|
||||
pending_instrs[i] <= pending_instrs_n;
|
||||
alm_empty_r[i] <= (pending_instrs_n == ALM_EMPTY);
|
||||
empty_r[i] <= (pending_instrs_n == 0);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign alm_empty = alm_empty_r[alm_empty_wid];
|
||||
assign empty = (& empty_r);
|
||||
|
||||
endmodule
|
||||
379
hw/rtl/core/VX_schedule.sv
Normal file
379
hw/rtl/core/VX_schedule.sv
Normal file
@@ -0,0 +1,379 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_schedule import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// configuration
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
// inputsdecode_if
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
VX_branch_ctl_if.slave branch_ctl_if [`NUM_ALU_BLOCKS],
|
||||
VX_decode_sched_if.slave decode_sched_if,
|
||||
VX_commit_sched_if.slave commit_sched_if,
|
||||
|
||||
// outputs
|
||||
VX_schedule_if.master schedule_if,
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if.master gbar_bus_if,
|
||||
`endif
|
||||
VX_sched_csr_if.master sched_csr_if,
|
||||
|
||||
// status
|
||||
output wire busy
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled
|
||||
reg [`NUM_WARPS-1:0] stalled_warps, stalled_warps_n; // set when branch/gpgpu instructions are issued
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks, thread_masks_n;
|
||||
reg [`NUM_WARPS-1:0][`XLEN-1:0] warp_pcs, warp_pcs_n;
|
||||
|
||||
wire [`NW_WIDTH-1:0] schedule_wid;
|
||||
wire [`NUM_THREADS-1:0] schedule_tmask;
|
||||
wire [`XLEN-1:0] schedule_pc;
|
||||
wire schedule_valid;
|
||||
wire schedule_ready;
|
||||
|
||||
// split/join
|
||||
wire join_valid;
|
||||
wire join_is_dvg;
|
||||
wire join_is_else;
|
||||
wire [`NW_WIDTH-1:0] join_wid;
|
||||
wire [`NUM_THREADS-1:0] join_tmask;
|
||||
wire [`XLEN-1:0] join_pc;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] cycles;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`UUID_WIDTH-1:0] issued_instrs;
|
||||
|
||||
wire schedule_fire = schedule_valid && schedule_ready;
|
||||
wire schedule_if_fire = schedule_if.valid && schedule_if.ready;
|
||||
|
||||
// branch
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_valid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_taken;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`XLEN-1:0] branch_dest;
|
||||
for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin
|
||||
assign branch_valid[i] = branch_ctl_if[i].valid;
|
||||
assign branch_wid[i] = branch_ctl_if[i].wid;
|
||||
assign branch_taken[i] = branch_ctl_if[i].taken;
|
||||
assign branch_dest[i] = branch_ctl_if[i].dest;
|
||||
end
|
||||
|
||||
// barriers
|
||||
reg [`NUM_BARRIERS-1:0][`NUM_WARPS-1:0] barrier_masks, barrier_masks_n;
|
||||
reg [`NUM_WARPS-1:0] barrier_stalls, barrier_stalls_n;
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] active_barrier_count;
|
||||
wire [`NUM_WARPS-1:0] curr_barrier_mask;
|
||||
`ifdef GBAR_ENABLE
|
||||
reg [`NUM_WARPS-1:0] curr_barrier_mask_n;
|
||||
reg gbar_req_valid;
|
||||
reg [`NB_WIDTH-1:0] gbar_req_id;
|
||||
reg [`NC_WIDTH-1:0] gbar_req_size_m1;
|
||||
`endif
|
||||
|
||||
assign curr_barrier_mask = barrier_masks[warp_ctl_if.barrier.id];
|
||||
`POP_COUNT(active_barrier_count, curr_barrier_mask);
|
||||
`UNUSED_VAR (active_barrier_count)
|
||||
|
||||
always @(*) begin
|
||||
active_warps_n = active_warps;
|
||||
stalled_warps_n = stalled_warps;
|
||||
thread_masks_n = thread_masks;
|
||||
barrier_masks_n = barrier_masks;
|
||||
barrier_stalls_n= barrier_stalls;
|
||||
warp_pcs_n = warp_pcs;
|
||||
|
||||
// wspawn handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
active_warps_n |= warp_ctl_if.wspawn.wmask;
|
||||
for (integer i = 0; i < `NUM_WARPS; ++i) begin
|
||||
if (warp_ctl_if.wspawn.wmask[i]) begin
|
||||
thread_masks_n[i][0] = 1;
|
||||
warp_pcs_n[i] = warp_ctl_if.wspawn.pc;
|
||||
end
|
||||
end
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
// TMC handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
active_warps_n[warp_ctl_if.wid] = (warp_ctl_if.tmc.tmask != 0);
|
||||
thread_masks_n[warp_ctl_if.wid] = warp_ctl_if.tmc.tmask;
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
// split handling
|
||||
if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
|
||||
if (warp_ctl_if.split.is_dvg) begin
|
||||
thread_masks_n[warp_ctl_if.wid] = warp_ctl_if.split.then_tmask;
|
||||
end
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
// join handling
|
||||
if (join_valid) begin
|
||||
if (join_is_dvg) begin
|
||||
if (join_is_else) begin
|
||||
warp_pcs_n[join_wid] = join_pc;
|
||||
end
|
||||
thread_masks_n[join_wid] = join_tmask;
|
||||
end
|
||||
stalled_warps_n[join_wid] = 0; // unlock warp
|
||||
end
|
||||
|
||||
// barrier handling
|
||||
`ifdef GBAR_ENABLE
|
||||
curr_barrier_mask_n = curr_barrier_mask;
|
||||
curr_barrier_mask_n[warp_ctl_if.wid] = 1;
|
||||
`endif
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
|
||||
if (~warp_ctl_if.barrier.is_global
|
||||
&& (active_barrier_count[`NW_WIDTH-1:0] == warp_ctl_if.barrier.size_m1[`NW_WIDTH-1:0])) begin
|
||||
barrier_masks_n[warp_ctl_if.barrier.id] = '0;
|
||||
barrier_stalls_n &= ~barrier_masks[warp_ctl_if.barrier.id];
|
||||
end else begin
|
||||
barrier_masks_n[warp_ctl_if.barrier.id][warp_ctl_if.wid] = 1;
|
||||
barrier_stalls_n[warp_ctl_if.wid] = 1;
|
||||
end
|
||||
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
|
||||
end
|
||||
`ifdef GBAR_ENABLE
|
||||
if (gbar_bus_if.rsp_valid && (gbar_req_id == gbar_bus_if.rsp_id)) begin
|
||||
barrier_masks_n[gbar_bus_if.rsp_id] = '0;
|
||||
barrier_stalls_n = '0; // unlock all warps
|
||||
end
|
||||
`endif
|
||||
|
||||
// Branch handling
|
||||
for (integer i = 0; i < `NUM_ALU_BLOCKS; ++i) begin
|
||||
if (branch_valid[i]) begin
|
||||
if (branch_taken[i]) begin
|
||||
warp_pcs_n[branch_wid[i]] = branch_dest[i];
|
||||
end
|
||||
stalled_warps_n[branch_wid[i]] = 0; // unlock warp
|
||||
end
|
||||
end
|
||||
|
||||
// decode unlock
|
||||
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
|
||||
stalled_warps_n[decode_sched_if.wid] = 0;
|
||||
end
|
||||
|
||||
// CSR unlock
|
||||
if (sched_csr_if.unlock_warp) begin
|
||||
stalled_warps_n[sched_csr_if.unlock_wid] = 0;
|
||||
end
|
||||
|
||||
// stall the warp until decode stage
|
||||
if (schedule_fire) begin
|
||||
stalled_warps_n[schedule_wid] = 1;
|
||||
end
|
||||
|
||||
// advance PC
|
||||
if (schedule_if_fire) begin
|
||||
warp_pcs_n[schedule_if.data.wid] = schedule_if.data.PC + 4;
|
||||
end
|
||||
end
|
||||
|
||||
`UNUSED_VAR (base_dcrs)
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
barrier_masks <= '0;
|
||||
`ifdef GBAR_ENABLE
|
||||
gbar_req_valid <= 0;
|
||||
`endif
|
||||
stalled_warps <= '0;
|
||||
warp_pcs <= '0;
|
||||
active_warps <= '0;
|
||||
thread_masks <= '0;
|
||||
barrier_stalls <= '0;
|
||||
issued_instrs <= '0;
|
||||
cycles <= '0;
|
||||
|
||||
// activate first warp
|
||||
warp_pcs[0] <= base_dcrs.startup_addr;
|
||||
active_warps[0] <= 1;
|
||||
thread_masks[0][0] <= 1;
|
||||
end else begin
|
||||
active_warps <= active_warps_n;
|
||||
stalled_warps <= stalled_warps_n;
|
||||
thread_masks <= thread_masks_n;
|
||||
warp_pcs <= warp_pcs_n;
|
||||
barrier_masks <= barrier_masks_n;
|
||||
barrier_stalls <= barrier_stalls_n;
|
||||
|
||||
// global barrier scheduling
|
||||
`ifdef GBAR_ENABLE
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid
|
||||
&& warp_ctl_if.barrier.is_global
|
||||
&& (curr_barrier_mask_n == active_warps)) begin
|
||||
gbar_req_valid <= 1;
|
||||
gbar_req_id <= warp_ctl_if.barrier.id;
|
||||
gbar_req_size_m1 <= warp_ctl_if.barrier.size_m1[`NC_WIDTH-1:0];
|
||||
end
|
||||
if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin
|
||||
gbar_req_valid <= 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
if (schedule_if_fire) begin
|
||||
issued_instrs[schedule_if.data.wid] <= issued_instrs[schedule_if.data.wid] + `UUID_WIDTH'(1);
|
||||
end
|
||||
|
||||
if (busy) begin
|
||||
cycles <= cycles + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// barrier handling
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
assign gbar_bus_if.req_valid = gbar_req_valid;
|
||||
assign gbar_bus_if.req_id = gbar_req_id;
|
||||
assign gbar_bus_if.req_size_m1 = gbar_req_size_m1;
|
||||
assign gbar_bus_if.req_core_id = `NC_WIDTH'(CORE_ID % `NUM_CORES);
|
||||
`endif
|
||||
|
||||
// split/join handling
|
||||
|
||||
`RESET_RELAY (split_join_reset, reset);
|
||||
|
||||
VX_split_join #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) split_join (
|
||||
.clk (clk),
|
||||
.reset (split_join_reset),
|
||||
.valid (warp_ctl_if.valid),
|
||||
.wid (warp_ctl_if.wid),
|
||||
.split (warp_ctl_if.split),
|
||||
.sjoin (warp_ctl_if.sjoin),
|
||||
.join_valid (join_valid),
|
||||
.join_is_dvg (join_is_dvg),
|
||||
.join_is_else (join_is_else),
|
||||
.join_wid (join_wid),
|
||||
.join_tmask (join_tmask),
|
||||
.join_pc (join_pc)
|
||||
);
|
||||
|
||||
// schedule the next ready warp
|
||||
|
||||
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls);
|
||||
|
||||
VX_lzc #(
|
||||
.N (`NUM_WARPS),
|
||||
.REVERSE (1)
|
||||
) wid_select (
|
||||
.data_in (ready_warps),
|
||||
.data_out (schedule_wid),
|
||||
.valid_out (schedule_valid)
|
||||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + `XLEN)-1:0] schedule_data;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign schedule_data[i] = {thread_masks[i], warp_pcs[i]};
|
||||
end
|
||||
|
||||
assign {schedule_tmask, schedule_pc} = {
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `XLEN)-1:(`NUM_THREADS + `XLEN)-4],
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `XLEN)-5:0]
|
||||
};
|
||||
|
||||
`ifndef NDEBUG
|
||||
localparam GNW_WIDTH = `LOG2UP(`NUM_CLUSTERS * `NUM_CORES * `NUM_WARPS);
|
||||
reg [`UUID_WIDTH-1:0] instr_uuid;
|
||||
wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(schedule_wid);
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(1, 0, 0));
|
||||
end else if (schedule_fire) begin
|
||||
instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(0, 32'(g_wid), 64'(schedule_pc)));
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire [`UUID_WIDTH-1:0] instr_uuid = '0;
|
||||
`endif
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`NUM_THREADS + `XLEN + `NW_WIDTH)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (schedule_valid),
|
||||
.ready_in (schedule_ready),
|
||||
.data_in ({schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid}),
|
||||
.valid_out (schedule_if.valid),
|
||||
.ready_out (schedule_if.ready)
|
||||
);
|
||||
|
||||
assign schedule_if.data.uuid = instr_uuid;
|
||||
|
||||
`RESET_RELAY (pending_instr_reset, reset);
|
||||
|
||||
wire no_pending_instr;
|
||||
VX_pending_instr #(
|
||||
.CTR_WIDTH (12),
|
||||
.DECR_COUNT (`ISSUE_WIDTH),
|
||||
.ALM_EMPTY (1)
|
||||
) pending_instr(
|
||||
.clk (clk),
|
||||
.reset (pending_instr_reset),
|
||||
.incr (schedule_if_fire),
|
||||
.incr_wid (schedule_if.data.wid),
|
||||
.decr (commit_sched_if.committed),
|
||||
.decr_wid (commit_sched_if.committed_wid),
|
||||
.alm_empty_wid (sched_csr_if.alm_empty_wid),
|
||||
.alm_empty (sched_csr_if.alm_empty),
|
||||
.empty (no_pending_instr)
|
||||
);
|
||||
|
||||
`BUFFER_BUSY (busy, (active_warps != 0 || ~no_pending_instr), 1);
|
||||
|
||||
// export CSRs
|
||||
assign sched_csr_if.cycles = cycles;
|
||||
assign sched_csr_if.active_warps = active_warps;
|
||||
assign sched_csr_if.thread_masks = thread_masks;
|
||||
|
||||
// timeout handling
|
||||
reg [31:0] timeout_ctr;
|
||||
reg timeout_enable;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
timeout_ctr <= '0;
|
||||
timeout_enable <= 0;
|
||||
end else begin
|
||||
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
|
||||
timeout_enable <= 1;
|
||||
end
|
||||
if (timeout_enable && active_warps !=0 && active_warps == stalled_warps) begin
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (active_warps == 0 || active_warps != stalled_warps) begin
|
||||
timeout_ctr <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps));
|
||||
|
||||
endmodule
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user