Restructure

This commit is contained in:
felsabbagh3
2019-03-22 04:14:52 -04:00
parent 097e0217de
commit 6c64fa35f8
239 changed files with 3839 additions and 819 deletions

BIN
kernel/vx_os/.DS_Store vendored Normal file

Binary file not shown.

View File

@@ -0,0 +1,150 @@
#include "vx_back.h"
#include "../vx_io/vx_io.h"
void vx_before_main()
{
// unsigned num_available_warps = vx_available_warps();
for (int i = 0; i < 8; i++)
{
queue_initialize(q + i);
}
}
void vx_reschedule_warps()
{
register unsigned curr_warp asm("s10");
// vx_printf("Reschedule: ", curr_warp);
if (queue_isEmpty(q+curr_warp))
{
// vx_printf("Done: ", curr_warp);
done[curr_warp] = 1;
if (curr_warp == 0)
{
vx_load_context();
return;
}
ECALL;
}
Job j;
queue_dequeue(q+curr_warp,&j);
// vx_printf("Reschedule -> ", j.wid);
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
vx_createThreads(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
ECALL;
}
void vx_schedule_warps()
{
unsigned num_available_warps = vx_available_warps();
asm __volatile__("mv s3, sp");
for (int curr_warp = 1; curr_warp < num_available_warps; ++curr_warp)
{
if (!queue_isEmpty(q+curr_warp))
{
Job j;
queue_dequeue(q+curr_warp,&j);
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
vx_wspawn(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
}
}
asm __volatile__("mv sp, s3");
vx_save_context();
// vx_print_str("saved context\n");
register unsigned val asm("tp");
if (val)
{
if (!queue_isEmpty(q))
{
// vx_print_str("found something for w0\n");
Job j;
queue_dequeue(q,&j);
// vx_printf("num_threads: ", j.n_threads);
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
vx_createThreads(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
}
}
}
void vx_spawnWarps(unsigned num_Warps, unsigned num_threads, FUNC, void * args)
{
vx_before_main();
unsigned num_available_warps = vx_available_warps();
// vx_printf("Num available warps: ", num_available_warps);
asm __volatile__("addi s2, sp, 0");
int warp = 0;
for (unsigned i = 0; i < num_Warps; i++)
{
asm __volatile__("lui s3, 0xFFFF0");
asm __volatile__("add sp, sp, s3");
register unsigned stack_ptr asm("sp");
Job j;
j.wid = i;
j.n_threads = num_threads;
j.base_sp = stack_ptr;
j.func_ptr = (unsigned) func;
j.args = args;
j.assigned_warp = warp;
queue_enqueue(q + warp,&j);
++warp;
if (warp >= num_available_warps) warp = 0;
}
asm __volatile__("addi sp, s2, 0");
vx_schedule_warps();
}
void vx_wait_for_warps(unsigned num_wait)
{
// vx_printf("wait for: ", num_wait);
unsigned num_available_warps = vx_available_warps();
unsigned num = 0;
while (num != num_wait)
{
num = 0;
for (int i = 0; i < num_available_warps; i++)
{
if (done[i] == 1)
{
num += 1;
}
}
}
// vx_printf("num found: ", num);
for (int i = 0; i < num_available_warps; i++) done[i] = 0;
}
void * vx_get_arg_struct(void)
{
register void *ret asm("s7");
return ret;
}

View File

@@ -0,0 +1,55 @@
#pragma once
#include <stdbool.h>
#include "../vx_util/queue.h"
#define WSPAWN asm __volatile__(".word 0x3006b"::);
#define CLONE asm __volatile__(".word 0x3506b":::);
#define JALRS asm __volatile__(".word 0x1bfe0eb":::"s10");
#define ECALL asm __volatile__(".word 0x00000073");
#define JMPRT asm __volatile__(".word 0x5406b");
#define SPLIT asm __volatile__(".word 0xf206b");
#define P_JUMP asm __volatile__(".word 0x1ff707b");
#define JOIN asm __volatile__(".word 0x306b");
#define __if(val) bool temp = !val; \
register unsigned p asm("t5") = temp; \
register void * e asm("t6") = &&ELSE; \
SPLIT; \
P_JUMP; \
#define __else register void * w asm("t3") = &&AFTER; \
asm __volatile__("jr t3"); \
ELSE: asm __volatile__("nop");
#define __end_if AFTER:\
JOIN;
static int done[] = {0, 0, 0, 0, 0, 0, 0};
static int main_sp[1];
unsigned context[32];
void vx_save_context(void);
void vx_load_context(void);
#define FUNC void (func)(unsigned, unsigned)
unsigned vx_available_warps(void);
unsigned vx_available_threads(void);
void vx_createThreads(unsigned, unsigned, unsigned, void *, unsigned);
void vx_wspawn(unsigned, unsigned, unsigned, void *, unsigned);
void vx_spawnWarps(unsigned num_Warps, unsigned num_threads, FUNC, void *);
void vx_schedule_warps(void);
void vx_reschedule_warps(void);
void vx_wait_for_warps(unsigned);
void * vx_get_arg_struct(void);

View File

@@ -0,0 +1,151 @@
.section .text
.type _start, @function
.global _start
_start:
li a0, 4 # Num Warps
csrw 0x20, a0 # Setting the number of available warps
li a0, 8 # Num Threads
csrw 0x21, a0 # Setting the number of available threads
csrw mhartid,zero
csrw misa,zero
lui sp, 0x7ffff
jal vx_before_main
jal main
ecall
.type vx_createThreads, @function
.global vx_createThreads
vx_createThreads:
mv s7 ,a3 # Moving args to s7
mv s10,a4 # Moving assigned_warp to s10
mv t5 ,sp # Saving the current stack pointer to t5
mv t2 , a0 # t2 = num_threads
loop_init:
li a0,1 # i = 0
loop_cond:
bge a0, t2, loop_done # i < num_threads
loop_body:
addi sp,sp,-2048 # Allocate 2k stack for new thread
mv t1, a0 # #lane = i
.word 0x3506b # clone register state
loop_inc:
addi a0, a0, 1
j loop_cond
loop_done:
mv sp,t5 # Restoring the stack
li a0,0 # setting tid = 0 for main thread
mv t6,a2 # setting func_addr
mv s11,t2 # setting num_threads to spawn
.word 0x1bfe0eb
la a0, vx_reschedule_warps
.word 0x5406b
.type vx_wspawn, @function
.global vx_wspawn
vx_wspawn:
la t1, vx_createThreads
.word 0x3006b # WSPAWN instruction
ret
.global context
.type vx_save_context, @function
.global vx_save_context
vx_save_context:
la tp, context
sw x0 , 0 (tp)
sw x1 , 4 (tp)
sw x2 , 8 (tp)
sw x3 , 12(tp)
sw x4 , 16(tp)
sw x5 , 20(tp)
sw x6 , 24(tp)
sw x7 , 28(tp)
sw x8 , 32(tp)
sw x9 , 36(tp)
sw x10, 40(tp)
sw x11, 44(tp)
sw x12, 48(tp)
sw x13, 52(tp)
sw x14, 56(tp)
sw x15, 60(tp)
sw x16, 64(tp)
sw x17, 68(tp)
sw x18, 72(tp)
sw x19, 76(tp)
sw x20, 80(tp)
sw x21, 84(tp)
sw x22, 88(tp)
sw x23, 92(tp)
sw x24, 96(tp)
sw x25, 100(tp)
sw x26, 104(tp)
sw x27, 108(tp)
sw x28, 112(tp)
sw x29, 116(tp)
sw x30, 120(tp)
sw x31, 124(tp)
li tp, 1
ret
.type vx_load_context, @function
.global vx_load_context
vx_load_context:
la tp, context
lw x0 , 0 (tp)
lw x1 , 4 (tp)
lw x2 , 8 (tp)
lw x3 , 12(tp)
lw x4 , 16(tp)
lw x5 , 20(tp)
lw x6 , 24(tp)
lw x7 , 28(tp)
lw x8 , 32(tp)
lw x9 , 36(tp)
lw x10, 40(tp)
lw x11, 44(tp)
lw x12, 48(tp)
lw x13, 52(tp)
lw x14, 56(tp)
lw x15, 60(tp)
lw x16, 64(tp)
lw x17, 68(tp)
lw x18, 72(tp)
lw x19, 76(tp)
lw x20, 80(tp)
lw x21, 84(tp)
lw x22, 88(tp)
lw x23, 92(tp)
lw x24, 96(tp)
lw x25, 100(tp)
lw x26, 104(tp)
lw x27, 108(tp)
lw x28, 112(tp)
lw x29, 116(tp)
lw x30, 120(tp)
lw x31, 124(tp)
li tp, 0
ret
.type vx_available_warps, @function
.global vx_available_warps
vx_available_warps:
csrr a0, 0x20
ret
.type vx_available_threads, @function
.global vx_available_threads
vx_available_threads:
csrr a0, 0x21
ret

BIN
kernel/vx_os/vx_io/.DS_Store vendored Normal file

Binary file not shown.

View File

@@ -0,0 +1,29 @@
#include "vx_io.h"
void vx_print_hex(unsigned f)
{
if (f < 16)
{
vx_print_str(hextoa[f]);
return;
}
int temp;
int sf = 32;
bool start = false;
do
{
temp = (f >> (sf - 4)) & 0xf;
if (temp != 0) start = true;
if (start) vx_print_str(hextoa[temp]);
sf -= 4;
} while(sf > 0);
}
void vx_printf(char * c, unsigned f)
{
vx_print_str(c);
vx_print_hex(f);
vx_print_str("\n");
}

View File

@@ -0,0 +1,9 @@
#pragma once
#include <stdbool.h>
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
void vx_print_hex(unsigned);
void vx_print_str(char *);
void vx_printf(char *, unsigned);

View File

@@ -0,0 +1,30 @@
.type vx_print_str, @function
.global vx_print_str
vx_print_str:
addi sp, sp, -12
sw ra, 0(sp)
sw a1, 4(sp)
bl:
lbu a1,0(a0)
beqz a1,be
jal vx_printc
addi a0, a0, 1
j bl
be:
lw ra, 0(sp)
lw a1, 4(sp)
addi sp, sp, 12
ret
.type vx_printc, @function
.global vx_printc
vx_printc:
la a7, 0x00010000
sw a1, 0(a7)
ret

BIN
kernel/vx_os/vx_util/.DS_Store vendored Normal file

Binary file not shown.

View File

@@ -0,0 +1,49 @@
#ifndef __QUEUE__
#define __QUEUE__
#define SIZE 50
#define WARPS 7
typedef struct Job_t
{
unsigned wid;
unsigned n_threads;
unsigned base_sp;
unsigned func_ptr;
void * args;
unsigned assigned_warp;
} Job;
typedef struct Queue_t
{
unsigned start_i;
unsigned end_i;
unsigned num_j;
unsigned total_warps;
unsigned active_warps;
struct Job_t jobs[SIZE];
} Queue;
Queue q[8];
void queue_initialize(Queue *);
void queue_enqueue(Queue *, Job *);
void queue_dequeue(Queue *, Job *);
int queue_isFull(Queue *);
int queue_isEmpty(Queue *);
int queue_availableWarps(Queue *);
void func();
#endif

View File

@@ -0,0 +1,123 @@
.equ A_WARPS, 7
.equ SIZE, 50
.section .text
.type queue_initialize, @function
.global queue_initialize
queue_initialize:
mv t0, a0 # loading base address of q
li t1, 0 # to initialize variables
li t2, A_WARPS # Num of available warps
sw t1, 0 (t0) # start_i
sw t1, 4 (t0) # end_i
sw t1, 8 (t0) # num_j
sw t2, 12(t0) # total_warps
sw t1, 16(t0) # active_warps
ret
.type queue_enqueue, @function
.global queue_enqueue
queue_enqueue:
mv t0, a0 # loding base address of q
lw t1, 8 (t0) # t1 = num_j
addi t1, t1, 1 # ++t1
sw t1, 8 (t0) # num_j = t1
addi t1, t0, 20 # t1 = jobs_addr
lw t4, 4 (t0) # t4 = end_i
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
add t1, t1, t2 # jobs + index
lw t3, 0 (a1) # wid
sw t3, 0 (t1) #
lw t3, 4 (a1) # n_threads
sw t3, 4 (t1) #
lw t3, 8 (a1) # base_sp
sw t3, 8 (t1) #
lw t3, 12(a1) # func_ptr
sw t3, 12(t1) #
lw t3, 16(a1) # args
sw t3, 16(t1) #
lw t3, 20(a1) # assigned_warp
sw t3, 20(t1) #
addi t4, t4, 1 # end_i++
li t5, SIZE # size
bne t4, t5, ec # if ((q.end_i + 1) == SIZE)
mv t4, zero
ec:
sw t4, 4 (t0) # end_i
ret
.type queue_dequeue, @function
.global queue_dequeue
queue_dequeue:
mv t0, a0 # loading base address of q
lw t1, 8 (t0) # t1 = num_j
addi t1, t1, -1 # --t1
sw t1, 8 (t0) # num_j = t1
addi t1, t0, 20 # t1 = jobs_addr
lw t4, 0 (t0) # t4 = start_i
li t6, SIZE # size
mv t5, t4 # t5 = start_i
addi t5, t5, 1 # t5++
bne t5, t6, dc # if ((q.start_i + 1) == SIZE)
mv t5, zero
dc:
sw t5, 0(t0) # storing start_i
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
add t1, t1, t2 # jobs + index
lw t3, 0 (t1) # wid
sw t3, 0 (a1) #
lw t3, 4 (t1) # n_threads
sw t3, 4 (a1) #
lw t3, 8 (t1) # base_sp
sw t3, 8 (a1) #
lw t3, 12(t1) # func_ptr
sw t3, 12(a1) #
lw t3, 16(t1) # args
sw t3, 16(a1) #
lw t3, 20(t1) # assigned_warp
sw t3, 20(a1) #
ret
.type queue_isFull, @function
.global queue_isFull
queue_isFull:
mv t0, a0 # loading base address of q
lw t1, 8 (t0) # t1 = num_j
mv a0, zero # ret_val = 0
li t3, SIZE # t3 = SIZE
bne t3, t1, qf # if (num_j == 1)
addi a0, a0, 1 # ret_val = 1;
qf:
ret
.type queue_isEmpty, @function
.global queue_isEmpty
queue_isEmpty:
mv t0, a0 # loading base address of q
lw t1, 8 (t0) # t1 = num_j
mv a0, zero # ret_val = 0
mv t3, zero # t3 = 0
bne t3, t1, qe # if (num_j == 0)
addi a0, a0, 1 # ret_val = 1;
qe:
ret
.type queue_availableWarps, @function
.global queue_availableWarps
queue_availableWarps:
mv t0, a0 # loading base address of q
lw t1, 12(t0) # t1 = total_warps
lw t2, 16(t0) # t2 = active_warps
sltu a0, t2, t1
ret