Restructure
This commit is contained in:
BIN
kernel/vx_os/.DS_Store
vendored
Normal file
BIN
kernel/vx_os/.DS_Store
vendored
Normal file
Binary file not shown.
150
kernel/vx_os/vx_back/vx_back.c
Normal file
150
kernel/vx_os/vx_back/vx_back.c
Normal file
@@ -0,0 +1,150 @@
|
||||
|
||||
#include "vx_back.h"
|
||||
#include "../vx_io/vx_io.h"
|
||||
|
||||
|
||||
void vx_before_main()
|
||||
{
|
||||
// unsigned num_available_warps = vx_available_warps();
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
queue_initialize(q + i);
|
||||
}
|
||||
}
|
||||
|
||||
void vx_reschedule_warps()
|
||||
{
|
||||
|
||||
|
||||
register unsigned curr_warp asm("s10");
|
||||
// vx_printf("Reschedule: ", curr_warp);
|
||||
|
||||
if (queue_isEmpty(q+curr_warp))
|
||||
{
|
||||
// vx_printf("Done: ", curr_warp);
|
||||
done[curr_warp] = 1;
|
||||
if (curr_warp == 0)
|
||||
{
|
||||
vx_load_context();
|
||||
return;
|
||||
}
|
||||
ECALL;
|
||||
}
|
||||
|
||||
Job j;
|
||||
queue_dequeue(q+curr_warp,&j);
|
||||
|
||||
// vx_printf("Reschedule -> ", j.wid);
|
||||
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
||||
vx_createThreads(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
|
||||
|
||||
ECALL;
|
||||
|
||||
}
|
||||
|
||||
void vx_schedule_warps()
|
||||
{
|
||||
|
||||
unsigned num_available_warps = vx_available_warps();
|
||||
|
||||
asm __volatile__("mv s3, sp");
|
||||
|
||||
for (int curr_warp = 1; curr_warp < num_available_warps; ++curr_warp)
|
||||
{
|
||||
if (!queue_isEmpty(q+curr_warp))
|
||||
{
|
||||
Job j;
|
||||
queue_dequeue(q+curr_warp,&j);
|
||||
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
||||
vx_wspawn(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
|
||||
}
|
||||
}
|
||||
|
||||
asm __volatile__("mv sp, s3");
|
||||
|
||||
|
||||
vx_save_context();
|
||||
|
||||
// vx_print_str("saved context\n");
|
||||
|
||||
register unsigned val asm("tp");
|
||||
if (val)
|
||||
{
|
||||
if (!queue_isEmpty(q))
|
||||
{
|
||||
// vx_print_str("found something for w0\n");
|
||||
Job j;
|
||||
queue_dequeue(q,&j);
|
||||
// vx_printf("num_threads: ", j.n_threads);
|
||||
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
||||
vx_createThreads(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vx_spawnWarps(unsigned num_Warps, unsigned num_threads, FUNC, void * args)
|
||||
{
|
||||
vx_before_main();
|
||||
|
||||
unsigned num_available_warps = vx_available_warps();
|
||||
// vx_printf("Num available warps: ", num_available_warps);
|
||||
|
||||
asm __volatile__("addi s2, sp, 0");
|
||||
int warp = 0;
|
||||
for (unsigned i = 0; i < num_Warps; i++)
|
||||
{
|
||||
asm __volatile__("lui s3, 0xFFFF0");
|
||||
asm __volatile__("add sp, sp, s3");
|
||||
register unsigned stack_ptr asm("sp");
|
||||
|
||||
Job j;
|
||||
j.wid = i;
|
||||
j.n_threads = num_threads;
|
||||
j.base_sp = stack_ptr;
|
||||
j.func_ptr = (unsigned) func;
|
||||
j.args = args;
|
||||
j.assigned_warp = warp;
|
||||
|
||||
queue_enqueue(q + warp,&j);
|
||||
++warp;
|
||||
if (warp >= num_available_warps) warp = 0;
|
||||
}
|
||||
asm __volatile__("addi sp, s2, 0");
|
||||
|
||||
|
||||
vx_schedule_warps();
|
||||
|
||||
}
|
||||
|
||||
void vx_wait_for_warps(unsigned num_wait)
|
||||
{
|
||||
// vx_printf("wait for: ", num_wait);
|
||||
unsigned num_available_warps = vx_available_warps();
|
||||
unsigned num = 0;
|
||||
while (num != num_wait)
|
||||
{
|
||||
num = 0;
|
||||
for (int i = 0; i < num_available_warps; i++)
|
||||
{
|
||||
if (done[i] == 1)
|
||||
{
|
||||
num += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vx_printf("num found: ", num);
|
||||
for (int i = 0; i < num_available_warps; i++) done[i] = 0;
|
||||
}
|
||||
|
||||
|
||||
void * vx_get_arg_struct(void)
|
||||
{
|
||||
register void *ret asm("s7");
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
55
kernel/vx_os/vx_back/vx_back.h
Normal file
55
kernel/vx_os/vx_back/vx_back.h
Normal file
@@ -0,0 +1,55 @@
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "../vx_util/queue.h"
|
||||
|
||||
#define WSPAWN asm __volatile__(".word 0x3006b"::);
|
||||
#define CLONE asm __volatile__(".word 0x3506b":::);
|
||||
#define JALRS asm __volatile__(".word 0x1bfe0eb":::"s10");
|
||||
#define ECALL asm __volatile__(".word 0x00000073");
|
||||
#define JMPRT asm __volatile__(".word 0x5406b");
|
||||
#define SPLIT asm __volatile__(".word 0xf206b");
|
||||
#define P_JUMP asm __volatile__(".word 0x1ff707b");
|
||||
#define JOIN asm __volatile__(".word 0x306b");
|
||||
|
||||
|
||||
#define __if(val) bool temp = !val; \
|
||||
register unsigned p asm("t5") = temp; \
|
||||
register void * e asm("t6") = &&ELSE; \
|
||||
SPLIT; \
|
||||
P_JUMP; \
|
||||
|
||||
|
||||
#define __else register void * w asm("t3") = &&AFTER; \
|
||||
asm __volatile__("jr t3"); \
|
||||
ELSE: asm __volatile__("nop");
|
||||
|
||||
#define __end_if AFTER:\
|
||||
JOIN;
|
||||
|
||||
static int done[] = {0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
static int main_sp[1];
|
||||
|
||||
unsigned context[32];
|
||||
void vx_save_context(void);
|
||||
void vx_load_context(void);
|
||||
|
||||
|
||||
#define FUNC void (func)(unsigned, unsigned)
|
||||
|
||||
unsigned vx_available_warps(void);
|
||||
unsigned vx_available_threads(void);
|
||||
|
||||
|
||||
void vx_createThreads(unsigned, unsigned, unsigned, void *, unsigned);
|
||||
void vx_wspawn(unsigned, unsigned, unsigned, void *, unsigned);
|
||||
void vx_spawnWarps(unsigned num_Warps, unsigned num_threads, FUNC, void *);
|
||||
void vx_schedule_warps(void);
|
||||
void vx_reschedule_warps(void);
|
||||
void vx_wait_for_warps(unsigned);
|
||||
void * vx_get_arg_struct(void);
|
||||
|
||||
|
||||
151
kernel/vx_os/vx_back/vx_back.s
Normal file
151
kernel/vx_os/vx_back/vx_back.s
Normal file
@@ -0,0 +1,151 @@
|
||||
|
||||
|
||||
|
||||
.section .text
|
||||
|
||||
.type _start, @function
|
||||
.global _start
|
||||
_start:
|
||||
li a0, 4 # Num Warps
|
||||
csrw 0x20, a0 # Setting the number of available warps
|
||||
li a0, 8 # Num Threads
|
||||
csrw 0x21, a0 # Setting the number of available threads
|
||||
csrw mhartid,zero
|
||||
csrw misa,zero
|
||||
lui sp, 0x7ffff
|
||||
jal vx_before_main
|
||||
jal main
|
||||
ecall
|
||||
|
||||
.type vx_createThreads, @function
|
||||
.global vx_createThreads
|
||||
vx_createThreads:
|
||||
mv s7 ,a3 # Moving args to s7
|
||||
mv s10,a4 # Moving assigned_warp to s10
|
||||
mv t5 ,sp # Saving the current stack pointer to t5
|
||||
mv t2 , a0 # t2 = num_threads
|
||||
loop_init:
|
||||
li a0,1 # i = 0
|
||||
loop_cond:
|
||||
bge a0, t2, loop_done # i < num_threads
|
||||
loop_body:
|
||||
addi sp,sp,-2048 # Allocate 2k stack for new thread
|
||||
mv t1, a0 # #lane = i
|
||||
.word 0x3506b # clone register state
|
||||
loop_inc:
|
||||
addi a0, a0, 1
|
||||
j loop_cond
|
||||
loop_done:
|
||||
mv sp,t5 # Restoring the stack
|
||||
li a0,0 # setting tid = 0 for main thread
|
||||
mv t6,a2 # setting func_addr
|
||||
mv s11,t2 # setting num_threads to spawn
|
||||
.word 0x1bfe0eb
|
||||
la a0, vx_reschedule_warps
|
||||
.word 0x5406b
|
||||
|
||||
|
||||
.type vx_wspawn, @function
|
||||
.global vx_wspawn
|
||||
vx_wspawn:
|
||||
la t1, vx_createThreads
|
||||
.word 0x3006b # WSPAWN instruction
|
||||
ret
|
||||
|
||||
.global context
|
||||
|
||||
.type vx_save_context, @function
|
||||
.global vx_save_context
|
||||
vx_save_context:
|
||||
la tp, context
|
||||
sw x0 , 0 (tp)
|
||||
sw x1 , 4 (tp)
|
||||
sw x2 , 8 (tp)
|
||||
sw x3 , 12(tp)
|
||||
sw x4 , 16(tp)
|
||||
sw x5 , 20(tp)
|
||||
sw x6 , 24(tp)
|
||||
sw x7 , 28(tp)
|
||||
sw x8 , 32(tp)
|
||||
sw x9 , 36(tp)
|
||||
sw x10, 40(tp)
|
||||
sw x11, 44(tp)
|
||||
sw x12, 48(tp)
|
||||
sw x13, 52(tp)
|
||||
sw x14, 56(tp)
|
||||
sw x15, 60(tp)
|
||||
sw x16, 64(tp)
|
||||
sw x17, 68(tp)
|
||||
sw x18, 72(tp)
|
||||
sw x19, 76(tp)
|
||||
sw x20, 80(tp)
|
||||
sw x21, 84(tp)
|
||||
sw x22, 88(tp)
|
||||
sw x23, 92(tp)
|
||||
sw x24, 96(tp)
|
||||
sw x25, 100(tp)
|
||||
sw x26, 104(tp)
|
||||
sw x27, 108(tp)
|
||||
sw x28, 112(tp)
|
||||
sw x29, 116(tp)
|
||||
sw x30, 120(tp)
|
||||
sw x31, 124(tp)
|
||||
li tp, 1
|
||||
ret
|
||||
|
||||
|
||||
.type vx_load_context, @function
|
||||
.global vx_load_context
|
||||
vx_load_context:
|
||||
la tp, context
|
||||
lw x0 , 0 (tp)
|
||||
lw x1 , 4 (tp)
|
||||
lw x2 , 8 (tp)
|
||||
lw x3 , 12(tp)
|
||||
lw x4 , 16(tp)
|
||||
lw x5 , 20(tp)
|
||||
lw x6 , 24(tp)
|
||||
lw x7 , 28(tp)
|
||||
lw x8 , 32(tp)
|
||||
lw x9 , 36(tp)
|
||||
lw x10, 40(tp)
|
||||
lw x11, 44(tp)
|
||||
lw x12, 48(tp)
|
||||
lw x13, 52(tp)
|
||||
lw x14, 56(tp)
|
||||
lw x15, 60(tp)
|
||||
lw x16, 64(tp)
|
||||
lw x17, 68(tp)
|
||||
lw x18, 72(tp)
|
||||
lw x19, 76(tp)
|
||||
lw x20, 80(tp)
|
||||
lw x21, 84(tp)
|
||||
lw x22, 88(tp)
|
||||
lw x23, 92(tp)
|
||||
lw x24, 96(tp)
|
||||
lw x25, 100(tp)
|
||||
lw x26, 104(tp)
|
||||
lw x27, 108(tp)
|
||||
lw x28, 112(tp)
|
||||
lw x29, 116(tp)
|
||||
lw x30, 120(tp)
|
||||
lw x31, 124(tp)
|
||||
li tp, 0
|
||||
ret
|
||||
|
||||
.type vx_available_warps, @function
|
||||
.global vx_available_warps
|
||||
vx_available_warps:
|
||||
csrr a0, 0x20
|
||||
ret
|
||||
|
||||
.type vx_available_threads, @function
|
||||
.global vx_available_threads
|
||||
vx_available_threads:
|
||||
csrr a0, 0x21
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
BIN
kernel/vx_os/vx_io/.DS_Store
vendored
Normal file
BIN
kernel/vx_os/vx_io/.DS_Store
vendored
Normal file
Binary file not shown.
29
kernel/vx_os/vx_io/vx_io.c
Normal file
29
kernel/vx_os/vx_io/vx_io.c
Normal file
@@ -0,0 +1,29 @@
|
||||
|
||||
#include "vx_io.h"
|
||||
|
||||
void vx_print_hex(unsigned f)
|
||||
{
|
||||
if (f < 16)
|
||||
{
|
||||
vx_print_str(hextoa[f]);
|
||||
return;
|
||||
}
|
||||
int temp;
|
||||
int sf = 32;
|
||||
bool start = false;
|
||||
do
|
||||
{
|
||||
temp = (f >> (sf - 4)) & 0xf;
|
||||
if (temp != 0) start = true;
|
||||
if (start) vx_print_str(hextoa[temp]);
|
||||
sf -= 4;
|
||||
} while(sf > 0);
|
||||
}
|
||||
|
||||
|
||||
void vx_printf(char * c, unsigned f)
|
||||
{
|
||||
vx_print_str(c);
|
||||
vx_print_hex(f);
|
||||
vx_print_str("\n");
|
||||
}
|
||||
9
kernel/vx_os/vx_io/vx_io.h
Normal file
9
kernel/vx_os/vx_io/vx_io.h
Normal file
@@ -0,0 +1,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
|
||||
void vx_print_hex(unsigned);
|
||||
void vx_print_str(char *);
|
||||
void vx_printf(char *, unsigned);
|
||||
30
kernel/vx_os/vx_io/vx_io.s
Normal file
30
kernel/vx_os/vx_io/vx_io.s
Normal file
@@ -0,0 +1,30 @@
|
||||
|
||||
|
||||
.type vx_print_str, @function
|
||||
.global vx_print_str
|
||||
vx_print_str:
|
||||
addi sp, sp, -12
|
||||
sw ra, 0(sp)
|
||||
sw a1, 4(sp)
|
||||
bl:
|
||||
lbu a1,0(a0)
|
||||
beqz a1,be
|
||||
jal vx_printc
|
||||
addi a0, a0, 1
|
||||
j bl
|
||||
be:
|
||||
lw ra, 0(sp)
|
||||
lw a1, 4(sp)
|
||||
addi sp, sp, 12
|
||||
ret
|
||||
|
||||
|
||||
.type vx_printc, @function
|
||||
.global vx_printc
|
||||
vx_printc:
|
||||
la a7, 0x00010000
|
||||
sw a1, 0(a7)
|
||||
ret
|
||||
|
||||
|
||||
|
||||
BIN
kernel/vx_os/vx_util/.DS_Store
vendored
Normal file
BIN
kernel/vx_os/vx_util/.DS_Store
vendored
Normal file
Binary file not shown.
49
kernel/vx_os/vx_util/queue.h
Normal file
49
kernel/vx_os/vx_util/queue.h
Normal file
@@ -0,0 +1,49 @@
|
||||
|
||||
#ifndef __QUEUE__
|
||||
|
||||
#define __QUEUE__
|
||||
|
||||
|
||||
|
||||
#define SIZE 50
|
||||
#define WARPS 7
|
||||
|
||||
|
||||
typedef struct Job_t
|
||||
{
|
||||
unsigned wid;
|
||||
unsigned n_threads;
|
||||
unsigned base_sp;
|
||||
unsigned func_ptr;
|
||||
void * args;
|
||||
unsigned assigned_warp;
|
||||
|
||||
} Job;
|
||||
|
||||
typedef struct Queue_t
|
||||
{
|
||||
unsigned start_i;
|
||||
unsigned end_i;
|
||||
unsigned num_j;
|
||||
unsigned total_warps;
|
||||
unsigned active_warps;
|
||||
struct Job_t jobs[SIZE];
|
||||
|
||||
} Queue;
|
||||
|
||||
Queue q[8];
|
||||
|
||||
void queue_initialize(Queue *);
|
||||
|
||||
void queue_enqueue(Queue *, Job *);
|
||||
|
||||
void queue_dequeue(Queue *, Job *);
|
||||
|
||||
int queue_isFull(Queue *);
|
||||
int queue_isEmpty(Queue *);
|
||||
int queue_availableWarps(Queue *);
|
||||
|
||||
|
||||
void func();
|
||||
|
||||
#endif
|
||||
123
kernel/vx_os/vx_util/queue.s
Normal file
123
kernel/vx_os/vx_util/queue.s
Normal file
@@ -0,0 +1,123 @@
|
||||
|
||||
.equ A_WARPS, 7
|
||||
.equ SIZE, 50
|
||||
|
||||
.section .text
|
||||
|
||||
.type queue_initialize, @function
|
||||
.global queue_initialize
|
||||
queue_initialize:
|
||||
mv t0, a0 # loading base address of q
|
||||
li t1, 0 # to initialize variables
|
||||
li t2, A_WARPS # Num of available warps
|
||||
sw t1, 0 (t0) # start_i
|
||||
sw t1, 4 (t0) # end_i
|
||||
sw t1, 8 (t0) # num_j
|
||||
sw t2, 12(t0) # total_warps
|
||||
sw t1, 16(t0) # active_warps
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
.type queue_enqueue, @function
|
||||
.global queue_enqueue
|
||||
queue_enqueue:
|
||||
mv t0, a0 # loding base address of q
|
||||
lw t1, 8 (t0) # t1 = num_j
|
||||
addi t1, t1, 1 # ++t1
|
||||
sw t1, 8 (t0) # num_j = t1
|
||||
addi t1, t0, 20 # t1 = jobs_addr
|
||||
lw t4, 4 (t0) # t4 = end_i
|
||||
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
|
||||
add t1, t1, t2 # jobs + index
|
||||
lw t3, 0 (a1) # wid
|
||||
sw t3, 0 (t1) #
|
||||
lw t3, 4 (a1) # n_threads
|
||||
sw t3, 4 (t1) #
|
||||
lw t3, 8 (a1) # base_sp
|
||||
sw t3, 8 (t1) #
|
||||
lw t3, 12(a1) # func_ptr
|
||||
sw t3, 12(t1) #
|
||||
lw t3, 16(a1) # args
|
||||
sw t3, 16(t1) #
|
||||
lw t3, 20(a1) # assigned_warp
|
||||
sw t3, 20(t1) #
|
||||
addi t4, t4, 1 # end_i++
|
||||
li t5, SIZE # size
|
||||
bne t4, t5, ec # if ((q.end_i + 1) == SIZE)
|
||||
mv t4, zero
|
||||
ec:
|
||||
sw t4, 4 (t0) # end_i
|
||||
ret
|
||||
|
||||
|
||||
.type queue_dequeue, @function
|
||||
.global queue_dequeue
|
||||
|
||||
queue_dequeue:
|
||||
mv t0, a0 # loading base address of q
|
||||
lw t1, 8 (t0) # t1 = num_j
|
||||
addi t1, t1, -1 # --t1
|
||||
sw t1, 8 (t0) # num_j = t1
|
||||
addi t1, t0, 20 # t1 = jobs_addr
|
||||
lw t4, 0 (t0) # t4 = start_i
|
||||
li t6, SIZE # size
|
||||
mv t5, t4 # t5 = start_i
|
||||
addi t5, t5, 1 # t5++
|
||||
bne t5, t6, dc # if ((q.start_i + 1) == SIZE)
|
||||
mv t5, zero
|
||||
dc:
|
||||
sw t5, 0(t0) # storing start_i
|
||||
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
|
||||
add t1, t1, t2 # jobs + index
|
||||
lw t3, 0 (t1) # wid
|
||||
sw t3, 0 (a1) #
|
||||
lw t3, 4 (t1) # n_threads
|
||||
sw t3, 4 (a1) #
|
||||
lw t3, 8 (t1) # base_sp
|
||||
sw t3, 8 (a1) #
|
||||
lw t3, 12(t1) # func_ptr
|
||||
sw t3, 12(a1) #
|
||||
lw t3, 16(t1) # args
|
||||
sw t3, 16(a1) #
|
||||
lw t3, 20(t1) # assigned_warp
|
||||
sw t3, 20(a1) #
|
||||
ret
|
||||
|
||||
|
||||
.type queue_isFull, @function
|
||||
.global queue_isFull
|
||||
queue_isFull:
|
||||
mv t0, a0 # loading base address of q
|
||||
lw t1, 8 (t0) # t1 = num_j
|
||||
mv a0, zero # ret_val = 0
|
||||
li t3, SIZE # t3 = SIZE
|
||||
bne t3, t1, qf # if (num_j == 1)
|
||||
addi a0, a0, 1 # ret_val = 1;
|
||||
qf:
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.type queue_isEmpty, @function
|
||||
.global queue_isEmpty
|
||||
queue_isEmpty:
|
||||
mv t0, a0 # loading base address of q
|
||||
lw t1, 8 (t0) # t1 = num_j
|
||||
mv a0, zero # ret_val = 0
|
||||
mv t3, zero # t3 = 0
|
||||
bne t3, t1, qe # if (num_j == 0)
|
||||
addi a0, a0, 1 # ret_val = 1;
|
||||
qe:
|
||||
ret
|
||||
|
||||
|
||||
.type queue_availableWarps, @function
|
||||
.global queue_availableWarps
|
||||
queue_availableWarps:
|
||||
mv t0, a0 # loading base address of q
|
||||
lw t1, 12(t0) # t1 = total_warps
|
||||
lw t2, 16(t0) # t2 = active_warps
|
||||
sltu a0, t2, t1
|
||||
ret
|
||||
Reference in New Issue
Block a user