Merge branch 'tensor_core' into kernels
This commit is contained in:
@@ -10,6 +10,7 @@ ENTRY(_start)
|
||||
|
||||
MEMORY {
|
||||
DRAM0 (rwx): ORIGIN = 0x80000000, LENGTH = 512M
|
||||
DRAMARG (rwx): ORIGIN = 0x9fff0000, LENGTH = 8K
|
||||
DRAM1 (rwx): ORIGIN = 0xa0000000, LENGTH = 16M
|
||||
DRAM2 (rwx): ORIGIN = 0xa1000000, LENGTH = 16M
|
||||
}
|
||||
@@ -275,6 +276,10 @@ SECTIONS
|
||||
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
|
||||
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
|
||||
|
||||
.args : {
|
||||
*(.args)
|
||||
. += 8K;
|
||||
}> DRAMARG
|
||||
.operand.a : {
|
||||
*(.operand.a)
|
||||
. += 32K;
|
||||
|
||||
@@ -170,7 +170,7 @@ static void __attribute__ ((noinline)) spawn_tasks_cluster_all_cb() {
|
||||
vx_tmc_zero();
|
||||
}
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_tasks_all_cb() {
|
||||
static void __attribute__ ((noinline)) spawn_tasks_all_cb() {
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
@@ -254,6 +254,7 @@ void vx_spawn_tasks_cluster(int num_tasks, vx_spawn_tasks_cb callback, void *arg
|
||||
vx_wspawn_wait();
|
||||
}
|
||||
|
||||
// TODO: this is incomplete
|
||||
// TODO: Instead of launching an additional wave just to work on remaining
|
||||
// threads, handle this in the last wave amongst other full warps.
|
||||
if (rem_threads_in_last_warp != 0 && core_id_in_cluster == 0) {
|
||||
|
||||
Reference in New Issue
Block a user