First Commit

This commit is contained in:
2025-02-06 22:24:29 +08:00
parent ed7df4c81e
commit 7539e6a53c
18116 changed files with 6181499 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
cube example
============
Simple port of ctrulib's gpu example to nihstro shaders. The C program code is mostly unchanged from the original, however the example shader in the data subdirectory should give you a good idea of the basic nihcode shader syntax.
Before trying to compile, make sure your NIHSTRO environment variable points to the directory nihstro-assemble resides in. Additionally, ctrulib in revision 1f52ac344d or similar is required, plus some patches to implement proper uniform setters.

View File

@@ -0,0 +1,48 @@
// setup constants
.alias myconst c32 as (1.0, 0.0, 0.5, 1.0)
// setup output map
.alias outpos o0 as position
.alias outcol o1 as color
.alias outtex0 o2.xyzw as texcoord0 // Would like to use .xy instead, but this is not supported by ctrulib currently
.alias outtex1 o3.xyzw as texcoord1
.alias outtex2 o4.xyzw as texcoord2
// setup uniform map, for use with SHDR_GetUniformRegister
.alias projection c0 // -c3
.alias modelview c4 // -c7
.alias lightDirection c8
.alias lightAmbient c9
main:
mov r1.xyz, v0.xyz
mov r1.w, myconst.w
mdvl: // tempreg = mdlvMtx * in.pos
dp4 r0.x, modelview[0], r1
dp4 r0.y, modelview[1], r1
dp4 r0.z, modelview[2], r1
mov r0.w, myconst.w
proj: // result.pos = projMtx * tempreg
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
tex: // result.texcoord = in.texcoord
mov outtex0, v1
mov outtex1, myconst.yyyw
mov outtex2, myconst.yyyw
col: // Hacky lighting: color = ambient.xyz + clamp(dot(L,N), 1.0) * ambient.www
dp3 r0.xyz, lightDirection.xyz, v2.xyz
max r0.xyz, myconst.yyy, r0.xyz
mul r0.xyz, lightAmbient.www, r0.xyz
add outcol.xyz, lightAmbient.xyz, r0.xyz
mov outcol.w, myconst.w
nop
end
endmain:

Binary file not shown.

View File

@@ -0,0 +1,16 @@
.section ".text"
.arm
.align 4
.global _vboMemcpy50
# r0 : dst
# r1 : src
# fixed size 0x50
_vboMemcpy50:
push {r4-r11}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
pop {r4-r11}
bx lr

View File

@@ -0,0 +1,432 @@
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <3ds.h>
#include "gs.h"
#include "math.h"
#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4)
static void gsInitMatrixStack();
Handle linearAllocMutex;
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES];
typedef struct
{
u32 offset;
mtx44 data;
}bufferMatrix_s;
bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE];
int bufferMatrixListLength;
//----------------------
// GS SYSTEM STUFF
//----------------------
void initBufferMatrixList()
{
bufferMatrixListLength=0;
}
void gsInit(shaderProgram_s* shader)
{
gsInitMatrixStack();
initBufferMatrixList();
svcCreateMutex(&linearAllocMutex, false);
if(shader)
{
gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection");
gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview");
shaderProgramUse(shader);
}
}
void gsExit(void)
{
svcCloseHandle(linearAllocMutex);
}
void gsStartFrame(void)
{
GPUCMD_SetBufferOffset(0);
initBufferMatrixList();
}
void* gsLinearAlloc(size_t size)
{
void* ret=NULL;
svcWaitSynchronization(linearAllocMutex, U64_MAX);
ret=linearAlloc(size);
svcReleaseMutex(linearAllocMutex);
return ret;
}
void gsLinearFree(void* mem)
{
svcWaitSynchronization(linearAllocMutex, U64_MAX);
linearFree(mem);
svcReleaseMutex(linearAllocMutex);
}
//----------------------
// MATRIX STACK STUFF
//----------------------
static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE];
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04};
static u8 gsMatrixStackOffsets[GS_MATRIXTYPES];
static bool gsMatrixStackUpdated[GS_MATRIXTYPES];
static GS_MATRIX gsCurrentMatrixType;
static void gsInitMatrixStack()
{
int i;
for(i=0; i<GS_MATRIXTYPES; i++)
{
gsMatrixStackOffsets[i]=0;
gsMatrixStackUpdated[i]=true;
loadIdentity44((float*)gsMatrixStacks[i][0]);
}
gsCurrentMatrixType=GS_PROJECTION;
}
float* gsGetMatrix(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return NULL;
return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]];
}
int gsLoadMatrix(GS_MATRIX m, float* data)
{
if(m<0 || m>=GS_MATRIXTYPES || !data)return -1;
memcpy(gsGetMatrix(m), data, sizeof(mtx44));
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsPushMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1;
float* cur=gsGetMatrix(m);
gsMatrixStackOffsets[m]++;
memcpy(gsGetMatrix(m), cur, sizeof(mtx44));
return 0;
}
int gsPopMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1;
gsMatrixStackOffsets[m]--;
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsMatrixMode(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return -1;
gsCurrentMatrixType=m;
return 0;
}
//------------------------
// MATRIX TRANSFORM STUFF
//------------------------
int gsMultMatrix(float* data)
{
if(!data)return -1;
mtx44 tmp;
multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp);
memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
return 0;
}
void gsLoadIdentity()
{
loadIdentity44(gsGetMatrix(gsCurrentMatrixType));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsProjectionMatrix(float fovy, float aspect, float near, float far)
{
initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateX(float x)
{
rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateY(float y)
{
rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateZ(float z)
{
rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsScale(float x, float y, float z)
{
scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsTranslate(float x, float y, float z)
{
translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
//----------------------
// MATRIX RENDER STUFF
//----------------------
static void gsSetUniformMatrix(u32 startreg, float* m)
{
float param[16];
param[0x0]=m[3]; //w
param[0x1]=m[2]; //z
param[0x2]=m[1]; //y
param[0x3]=m[0]; //x
param[0x4]=m[7];
param[0x5]=m[6];
param[0x6]=m[5];
param[0x7]=m[4];
param[0x8]=m[11];
param[0x9]=m[10];
param[0xa]=m[9];
param[0xb]=m[8];
param[0xc]=m[15];
param[0xd]=m[14];
param[0xe]=m[13];
param[0xf]=m[12];
GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4);
}
static int gsUpdateTransformation()
{
GS_MATRIX m;
for(m=0; m<GS_MATRIXTYPES; m++)
{
if(gsMatrixStackUpdated[m])
{
if(m==GS_PROJECTION && bufferMatrixListLength<BUFFERMATRIXLIST_SIZE)
{
GPUCMD_GetBuffer(NULL, NULL, &bufferMatrixList[bufferMatrixListLength].offset);
memcpy(bufferMatrixList[bufferMatrixListLength].data, gsGetMatrix(m), sizeof(mtx44));
bufferMatrixListLength++;
}
gsSetUniformMatrix(gsMatrixStackRegisters[m], gsGetMatrix(m));
gsMatrixStackUpdated[m]=false;
}
}
return 0;
}
void gsAdjustBufferMatrices(mtx44 transformation)
{
int i;
u32* buffer;
u32 offset;
GPUCMD_GetBuffer(&buffer, NULL, &offset);
for(i=0; i<bufferMatrixListLength; i++)
{
u32 o=bufferMatrixList[i].offset;
if(o+2<offset) //TODO : better check, need to account for param size
{
mtx44 newMatrix;
GPUCMD_SetBufferOffset(o);
multMatrix44((float*)bufferMatrixList[i].data, (float*)transformation, (float*)newMatrix);
gsSetUniformMatrix(gsMatrixStackRegisters[GS_PROJECTION], (float*)newMatrix);
}
}
GPUCMD_SetBufferOffset(offset);
}
//----------------------
// VBO STUFF
//----------------------
int gsVboInit(gsVbo_s* vbo)
{
if(!vbo)return -1;
vbo->data=NULL;
vbo->currentSize=0;
vbo->maxSize=0;
vbo->commands=NULL;
vbo->commandsSize=0;
return 0;
}
int gsVboCreate(gsVbo_s* vbo, u32 size)
{
if(!vbo)return -1;
vbo->data=gsLinearAlloc(size);
vbo->numVertices=0;
vbo->currentSize=0;
vbo->maxSize=size;
return 0;
}
void* gsVboGetOffset(gsVbo_s* vbo)
{
if(!vbo)return NULL;
return (void*)(&((u8*)vbo->data)[vbo->currentSize]);
}
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units)
{
if(!vbo || !data || !size)return -1;
if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1;
memcpy(gsVboGetOffset(vbo), data, size);
vbo->currentSize+=size;
vbo->numVertices+=units;
return 0;
}
int gsVboFlushData(gsVbo_s* vbo)
{
if(!vbo)return -1;
//unnecessary if we use flushAndRun
// GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize);
return 0;
}
int gsVboDestroy(gsVbo_s* vbo)
{
if(!vbo)return -1;
if(vbo->commands)free(vbo->commands);
if(vbo->data)gsLinearFree(vbo->data);
gsVboInit(vbo);
return 0;
}
extern u32 debugValue[];
void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n)
{
//set attribute buffer address
GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3);
//set primitive type
GPUCMD_AddSingleParam(0x0002025E, primitive);
GPUCMD_AddSingleParam(0x0002025F, 0x00000001);
//index buffer not used for drawArrays but 0x000F0227 still required
GPUCMD_AddSingleParam(0x000F0227, 0x80000000);
//pass number of vertices
GPUCMD_AddSingleParam(0x000F0228, n);
GPUCMD_AddSingleParam(0x00010253, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000000);
GPUCMD_AddSingleParam(0x000F022E, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000001);
GPUCMD_AddSingleParam(0x000F0231, 0x00000001);
// GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff
}
//not thread safe
int gsVboPrecomputeCommands(gsVbo_s* vbo)
{
if(!vbo || vbo->commands)return -1;
static u32 tmpBuffer[128];
u32* savedAdr; u32 savedSize, savedOffset;
GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset);
GPUCMD_SetBuffer(tmpBuffer, 128, 0);
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize);
vbo->commands=memalign(0x4, vbo->commandsSize*4);
if(!vbo->commands)return -1;
memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4);
GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset);
return 0;
}
extern u32* gpuCmdBuf;
extern u32 gpuCmdBufSize;
extern u32 gpuCmdBufOffset;
void _vboMemcpy50(u32* dst, u32* src);
void _GPUCMD_AddRawCommands(u32* cmd, u32 size)
{
if(!cmd || !size)return;
if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd);
else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
gpuCmdBufOffset+=size;
}
int gsVboDraw(gsVbo_s* vbo)
{
if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1;
gsUpdateTransformation();
gsVboPrecomputeCommands(vbo);
// u64 val=svcGetSystemTick();
if(vbo->commands)
{
_GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize);
}else{
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
}
// debugValue[5]+=(u32)(svcGetSystemTick()-val);
// debugValue[6]++;
return 0;
}

View File

@@ -0,0 +1,59 @@
#ifndef GS_H
#define GS_H
#include <3ds.h>
#include "math.h"
#define GS_MATRIXSTACK_SIZE (8)
typedef enum
{
GS_PROJECTION = 0,
GS_MODELVIEW = 1,
GS_MATRIXTYPES
}GS_MATRIX;
typedef struct
{
u8* data;
u32 currentSize; // in bytes
u32 maxSize; // in bytes
u32 numVertices;
u32* commands;
u32 commandsSize;
}gsVbo_s;
void gsInit(shaderProgram_s* shader);
void gsExit(void);
void gsStartFrame(void);
void gsAdjustBufferMatrices(mtx44 transformation);
void* gsLinearAlloc(size_t size);
void gsLinearFree(void* mem);
float* gsGetMatrix(GS_MATRIX m);
int gsLoadMatrix(GS_MATRIX m, float* data);
int gsPushMatrix();
int gsPopMatrix();
int gsMatrixMode(GS_MATRIX m);
void gsLoadIdentity();
void gsProjectionMatrix(float fovy, float aspect, float near, float far);
void gsRotateX(float x);
void gsRotateY(float y);
void gsRotateZ(float z);
void gsScale(float x, float y, float z);
void gsTranslate(float x, float y, float z);
int gsMultMatrix(float* data);
int gsVboInit(gsVbo_s* vbo);
int gsVboCreate(gsVbo_s* vbo, u32 size);
int gsVboFlushData(gsVbo_s* vbo);
int gsVboDestroy(gsVbo_s* vbo);
int gsVboDraw(gsVbo_s* vbo);
void* gsVboGetOffset(gsVbo_s* vbo);
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units);
#endif

View File

@@ -0,0 +1,354 @@
///////////////////////////////////////
// GPU example //
///////////////////////////////////////
//this example is meant to show how to use the GPU to render a 3D object
//it also shows how to do stereoscopic 3D
//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft
//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <3ds.h>
#include "math.h"
#include "gs.h"
#include "test_vsh_shbin.h"
#include "texture_bin.h"
//will be moved into ctrulib at some point
#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080)
#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0))
//transfer from GPU output buffer to actual framebuffer flags
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X))
//shader structure
DVLB_s* dvlb;
shaderProgram_s shader;
//texture data pointer
u32* texData;
//vbo structure
gsVbo_s vbo;
//GPU framebuffer address
u32* gpuOut=(u32*)0x1F119400;
//GPU depth buffer address
u32* gpuDOut=(u32*)0x1F370800;
//angle for the vertex lighting (cf test.vsh)
float lightAngle;
//object position and rotation angle
vect3Df_s position, angle;
//vertex structure
typedef struct
{
vect3Df_s position;
float texcoord[2];
vect3Df_s normal;
}vertex_s;
//object data (cube)
//obviously this doesn't have to be defined manually, but we will here for the purposes of the example
//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z}
//we're drawing triangles so three lines = one triangle
const vertex_s modelVboData[]=
{
//first face (PZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second face (MZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//third face (PX)
//first triangle
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//fourth face (MX)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//fifth face (PY)
//first triangle
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//sixth face (MY)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
};
//stolen from staplebutt
void GPU_SetDummyTexEnv(u8 num)
{
GPU_SetTexEnv(num,
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_REPLACE,
GPU_REPLACE,
0xFFFFFFFF);
}
// topscreen
void renderFrame()
{
GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
GPU_DepthMap(-1.0f, 0.0f);
GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
GPU_SetBlendingColor(0,0,0,0);
GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
GPUCMD_AddWrite(GPUREG_0118, 0);
GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
GPU_SetTextureEnable(GPU_TEXUNIT0);
GPU_SetTexEnv(0,
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_MODULATE, GPU_MODULATE,
0xFFFFFFFF);
GPU_SetDummyTexEnv(1);
GPU_SetDummyTexEnv(2);
GPU_SetDummyTexEnv(3);
GPU_SetDummyTexEnv(4);
GPU_SetDummyTexEnv(5);
//texturing stuff
GPU_SetTexture(
GPU_TEXUNIT0, //texture unit
(u32*)osConvertVirtToPhys((u32)texData), //data buffer
128, //texture width
128, //texture height
GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params
GPU_RGBA8 //texture pixel format
);
GPU_SetAttributeBuffers(
3, //3 attributes: vertices, texcoords, and normals
(u32*)osConvertVirtToPhys((u32)texData), //mesh buffer
GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position)
GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord)
GPU_ATTRIBFMT(2, 3, GPU_FLOAT), // GPU Input attribute register 2 (v2): 3 floats (normal)
0xFFC,
0x210,
1,
(u32[]){0x00000000},
(u64[]){0x210},
(u8[]){3}
);
//setup lighting (this is specific to our shader)
vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle)));
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightDirection"), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightAmbient"), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1);
//initialize projection matrix to standard perspective stuff
gsMatrixMode(GS_PROJECTION);
gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f);
gsRotateZ(M_PI/2); //because framebuffer is sideways...
//draw object
gsMatrixMode(GS_MODELVIEW);
gsPushMatrix();
gsTranslate(position.x, position.y, position.z);
gsRotateX(angle.x);
gsRotateY(angle.y);
gsVboDraw(&vbo);
gsPopMatrix();
GPU_FinishDrawing();
}
int main(int argc, char** argv)
{
gfxInitDefault();
//initialize GPU
GPU_Init(NULL);
//let GFX know we're ok with doing stereoscopic 3D rendering
gfxSet3D(true);
//allocate our GPU command buffers
//they *have* to be on the linear heap
u32 gpuCmdSize=0x40000;
u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4);
u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4);
//actually reset the GPU
GPU_Reset(NULL, gpuCmd, gpuCmdSize);
//load our vertex shader binary
dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size);
shaderProgramInit(&shader);
shaderProgramSetVsh(&shader, &dvlb->DVLE[0]);
//initialize GS
gsInit(&shader);
// Flush the command buffer so that the shader upload gets executed
GPUCMD_Finalize();
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//create texture
texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned
memcpy(texData, texture_bin, texture_bin_size);
//create VBO
gsVboInit(&vbo);
gsVboCreate(&vbo, sizeof(modelVboData));
gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s));
gsVboFlushData(&vbo);
//initialize object position and angle
position=vect3Df(0.0f, 0.0f, -2.0f);
angle=vect3Df(M_PI/4, M_PI/4, 0.0f);
//background color (blue)
u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF);
while(aptMainLoop())
{
//get current 3D slider state
float slider=CONFIG_3D_SLIDERSTATE;
//controls
hidScanInput();
//START to exit to hbmenu
if(keysDown()&KEY_START)break;
//A/B to change vertex lighting angle
if(keysHeld()&KEY_A)lightAngle+=0.1f;
if(keysHeld()&KEY_B)lightAngle-=0.1f;
//D-PAD to rotate object
if(keysHeld()&KEY_DOWN)angle.x+=0.05f;
if(keysHeld()&KEY_UP)angle.x-=0.05f;
if(keysHeld()&KEY_LEFT)angle.y+=0.05f;
if(keysHeld()&KEY_RIGHT)angle.y-=0.05f;
//R/L to bring object closer to or move it further from the camera
if(keysHeld()&KEY_R)position.z+=0.1f;
if(keysHeld()&KEY_L)position.z-=0.1f;
//generate our GPU command buffer for this frame
gsStartFrame();
renderFrame();
GPUCMD_Finalize();
if(slider>0.0f)
{
//new and exciting 3D !
//make a copy of left gpu buffer
u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset);
memcpy(gpuCmdRight, gpuCmd, offset*4);
//setup interaxial
float interaxial=slider*0.12f;
//adjust left gpu buffer fo 3D !
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//draw left framebuffer
GPUCMD_FlushAndRun(NULL);
//while GPU starts drawing the left buffer, adjust right one for 3D !
GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset);
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//we wait for the left buffer to finish drawing
gspWaitForP3D();
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
//we draw the right buffer, wait for it to finish and then switch back to left one
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
//draw the right framebuffer
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//transfer from GPU output buffer to actual framebuffer
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
}else{
//boring old 2D !
//draw the frame
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//clear the screen
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
}
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
gfxSwapBuffersGpu();
gspWaitForEvent(GSPEVENT_VBlank0, true);
}
gsExit();
shaderProgramFree(&shader);
DVLB_Free(dvlb);
gfxExit();
return 0;
}

View File

@@ -0,0 +1,148 @@
#include <math.h>
#include <string.h>
#include "math.h"
void loadIdentity44(float* m)
{
if(!m)return;
memset(m, 0x00, 16*4);
m[0]=m[5]=m[10]=m[15]=1.0f;
}
void multMatrix44(float* m1, float* m2, float* m) //4x4
{
int i, j;
for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
}
void translateMatrix(float* tm, float x, float y, float z)
{
float rm[16], m[16];
loadIdentity44(rm);
rm[3]=x;
rm[7]=y;
rm[11]=z;
multMatrix44(tm,rm,m);
memcpy(tm,m,16*sizeof(float));
}
// 00 01 02 03
// 04 05 06 07
// 08 09 10 11
// 12 13 14 15
void rotateMatrixX(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=1.0f;
rm[5]=cos(x);
rm[6]=sin(x);
rm[9]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixY(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[2]=sin(x);
rm[5]=1.0f;
rm[8]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixZ(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[1]=sin(x);
rm[4]=-sin(x);
rm[5]=cos(x);
rm[10]=1.0f;
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void scaleMatrix(float* tm, float x, float y, float z)
{
tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x;
tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y;
tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z;
}
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far)
{
float top = near*tan(fovy/2);
float right = (top*aspect);
float mp[4*4];
mp[0x0] = near/right;
mp[0x1] = 0.0f;
mp[0x2] = 0.0f;
mp[0x3] = 0.0f;
mp[0x4] = 0.0f;
mp[0x5] = near/top;
mp[0x6] = 0.0f;
mp[0x7] = 0.0f;
mp[0x8] = 0.0f;
mp[0x9] = 0.0f;
mp[0xA] = -(far+near)/(far-near);
mp[0xB] = -2.0f*(far*near)/(far-near);
mp[0xC] = 0.0f;
mp[0xD] = 0.0f;
mp[0xE] = -1.0f;
mp[0xF] = 0.0f;
float mp2[4*4];
loadIdentity44(mp2);
mp2[0xA]=0.5;
mp2[0xB]=-0.5;
multMatrix44(mp2, mp, m);
}
vect3Df_s getMatrixColumn(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]);
}
vect3Df_s getMatrixRow(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]);
}
vect4Df_s getMatrixColumn4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]);
}
vect4Df_s getMatrixRow4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]);
}

View File

@@ -0,0 +1,144 @@
#ifndef MATH_H
#define MATH_H
#include <3ds/types.h>
#include <math.h>
typedef float mtx44[4][4];
typedef float mtx33[3][3];
typedef struct
{
s32 x, y, z;
}vect3Di_s;
static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z)
{
return (vect3Di_s){x,y,z};
}
static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Di_s vmuli(vect3Di_s v, s32 f)
{
return (vect3Di_s){v.x*f,v.y*f,v.z*f};
}
typedef struct
{
float x, y, z;
}vect3Df_s;
static inline vect3Df_s vect3Df(float x, float y, float z)
{
return (vect3Df_s){x,y,z};
}
static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Df_s vmulf(vect3Df_s v, float f)
{
return (vect3Df_s){v.x*f,v.y*f,v.z*f};
}
static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2)
{
return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z};
}
static inline float vmagf(vect3Df_s v)
{
return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
}
static inline float vdistf(vect3Df_s v1, vect3Df_s v2)
{
return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z));
}
static inline vect3Df_s vnormf(vect3Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
return (vect3Df_s){v.x/l,v.y/l,v.z/l};
}
typedef struct
{
float x, y, z, w;
}vect4Df_s;
static inline vect4Df_s vect4Df(float x, float y, float z, float w)
{
return (vect4Df_s){x,y,z,w};
}
static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w};
}
static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w};
}
static inline vect4Df_s vmulf4(vect4Df_s v, float f)
{
return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f};
}
static inline float vdotf4(vect4Df_s v1, vect4Df_s v2)
{
return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w;
}
static inline vect4Df_s vnormf4(vect4Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w);
return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l};
}
//interstuff
static inline vect3Di_s vf2i(vect3Df_s v)
{
return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)};
}
static inline vect3Df_s vi2f(vect3Di_s v)
{
return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z};
}
void loadIdentity44(float* m);
void multMatrix44(float* m1, float* m2, float* m);
void translateMatrix(float* tm, float x, float y, float z);
void rotateMatrixX(float* tm, float x, bool r);
void rotateMatrixY(float* tm, float x, bool r);
void rotateMatrixZ(float* tm, float x, bool r);
void scaleMatrix(float* tm, float x, float y, float z);
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far);
vect3Df_s getMatrixColumn(float* m, u8 i);
vect3Df_s getMatrixRow(float* m, u8 i);
vect4Df_s getMatrixColumn4(float* m, u8 i);
vect4Df_s getMatrixRow4(float* m, u8 i);
#endif

View File

@@ -0,0 +1,6 @@
cube lighting example
=====================
An example similar to cube, but with some rudimentary vertex lighting effects. The shader used is somewhat more complex and involves a LOOP to implement multiple light sources.
Before trying to compile, make sure your NIHSTRO environment variable points to the directory nihstro-assemble resides in. Additionally, ctrulib in revision 1f52ac344d or similar is required, plus some patches to implement proper uniform setters.

View File

@@ -0,0 +1,66 @@
// setup constants
.alias myconst c32 as (1.0, 0.0, 0.5, 1.0)
// setup output map
.alias outpos o0 as position
.alias outcol o1 as color
.alias outtex0 o2.xyzw as texcoord0 // Would like to use .xy instead, but this is not supported by ctrulib currently
.alias outtex1 o3.xyzw as texcoord1
.alias outtex2 o4.xyzw as texcoord2
// setup uniform map, for use with SHDR_GetUniformRegister
.alias projection c0-c3
.alias modelview c4-c7
.alias num_lights i1
.alias light_dir c8
.alias light_diffuse c9
.alias light_ambient c10
.alias light_dir2 c11
.alias light_diffuse2 c12
.alias light_ambient2 c13
main:
mov r1.xyz, v0.xyz
mov r1.w, myconst.w
mdvl: // tempreg = mdlvMtx * in.pos
dp4 r0.x, modelview[0], r1
dp4 r0.y, modelview[1], r1
dp4 r0.z, modelview[2], r1
mov r0.w, myconst.w
proj: // result.pos = projMtx * tempreg
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
tex: // result.texcoord = in.texcoord
mov outtex0, v1.xyzw
mov outtex1, myconst.yyyw
mov outtex2, myconst.yyyw
lighting: // color = sum over all lights(diffuse * clamp(dot(L,N),0) + ambient)
mov r0, myconst.yyyw
loop num_lights
mov r1.xyz, myconst.yyy
dp3 r1.xyz, light_dir[lcnt].xyz, v2.xyz
max r1.xyz, r1.xyz, myconst.yyy
mul r1.xyz, r1.xyz, light_diffuse[lcnt].xyz
add r1.xyz, r1.xyz, light_ambient[lcnt].xyz
add r0.xyz, r1.xyz, r0.xyz
nop
endloop
min r0.xyz, r0.xyz, myconst.xxx
mov outcol, r0
nop
end
endmain:

Binary file not shown.

View File

@@ -0,0 +1,16 @@
.section ".text"
.arm
.align 4
.global _vboMemcpy50
# r0 : dst
# r1 : src
# fixed size 0x50
_vboMemcpy50:
push {r4-r11}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
pop {r4-r11}
bx lr

View File

@@ -0,0 +1,432 @@
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <3ds.h>
#include "gs.h"
#include "math.h"
#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4)
static void gsInitMatrixStack();
Handle linearAllocMutex;
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES];
typedef struct
{
u32 offset;
mtx44 data;
}bufferMatrix_s;
bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE];
int bufferMatrixListLength;
//----------------------
// GS SYSTEM STUFF
//----------------------
void initBufferMatrixList()
{
bufferMatrixListLength=0;
}
void gsInit(shaderProgram_s* shader)
{
gsInitMatrixStack();
initBufferMatrixList();
svcCreateMutex(&linearAllocMutex, false);
if(shader)
{
gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection");
gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview");
shaderProgramUse(shader);
}
}
void gsExit(void)
{
svcCloseHandle(linearAllocMutex);
}
void gsStartFrame(void)
{
GPUCMD_SetBufferOffset(0);
initBufferMatrixList();
}
void* gsLinearAlloc(size_t size)
{
void* ret=NULL;
svcWaitSynchronization(linearAllocMutex, U64_MAX);
ret=linearAlloc(size);
svcReleaseMutex(linearAllocMutex);
return ret;
}
void gsLinearFree(void* mem)
{
svcWaitSynchronization(linearAllocMutex, U64_MAX);
linearFree(mem);
svcReleaseMutex(linearAllocMutex);
}
//----------------------
// MATRIX STACK STUFF
//----------------------
static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE];
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04};
static u8 gsMatrixStackOffsets[GS_MATRIXTYPES];
static bool gsMatrixStackUpdated[GS_MATRIXTYPES];
static GS_MATRIX gsCurrentMatrixType;
static void gsInitMatrixStack()
{
int i;
for(i=0; i<GS_MATRIXTYPES; i++)
{
gsMatrixStackOffsets[i]=0;
gsMatrixStackUpdated[i]=true;
loadIdentity44((float*)gsMatrixStacks[i][0]);
}
gsCurrentMatrixType=GS_PROJECTION;
}
float* gsGetMatrix(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return NULL;
return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]];
}
int gsLoadMatrix(GS_MATRIX m, float* data)
{
if(m<0 || m>=GS_MATRIXTYPES || !data)return -1;
memcpy(gsGetMatrix(m), data, sizeof(mtx44));
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsPushMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1;
float* cur=gsGetMatrix(m);
gsMatrixStackOffsets[m]++;
memcpy(gsGetMatrix(m), cur, sizeof(mtx44));
return 0;
}
int gsPopMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1;
gsMatrixStackOffsets[m]--;
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsMatrixMode(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return -1;
gsCurrentMatrixType=m;
return 0;
}
//------------------------
// MATRIX TRANSFORM STUFF
//------------------------
int gsMultMatrix(float* data)
{
if(!data)return -1;
mtx44 tmp;
multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp);
memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
return 0;
}
void gsLoadIdentity()
{
loadIdentity44(gsGetMatrix(gsCurrentMatrixType));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsProjectionMatrix(float fovy, float aspect, float near, float far)
{
initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateX(float x)
{
rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateY(float y)
{
rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateZ(float z)
{
rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsScale(float x, float y, float z)
{
scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsTranslate(float x, float y, float z)
{
translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
//----------------------
// MATRIX RENDER STUFF
//----------------------
static void gsSetUniformMatrix(u32 startreg, float* m)
{
float param[16];
param[0x0]=m[3]; //w
param[0x1]=m[2]; //z
param[0x2]=m[1]; //y
param[0x3]=m[0]; //x
param[0x4]=m[7];
param[0x5]=m[6];
param[0x6]=m[5];
param[0x7]=m[4];
param[0x8]=m[11];
param[0x9]=m[10];
param[0xa]=m[9];
param[0xb]=m[8];
param[0xc]=m[15];
param[0xd]=m[14];
param[0xe]=m[13];
param[0xf]=m[12];
GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4);
}
static int gsUpdateTransformation()
{
GS_MATRIX m;
for(m=0; m<GS_MATRIXTYPES; m++)
{
if(gsMatrixStackUpdated[m])
{
if(m==GS_PROJECTION && bufferMatrixListLength<BUFFERMATRIXLIST_SIZE)
{
GPUCMD_GetBuffer(NULL, NULL, &bufferMatrixList[bufferMatrixListLength].offset);
memcpy(bufferMatrixList[bufferMatrixListLength].data, gsGetMatrix(m), sizeof(mtx44));
bufferMatrixListLength++;
}
gsSetUniformMatrix(gsMatrixStackRegisters[m], gsGetMatrix(m));
gsMatrixStackUpdated[m]=false;
}
}
return 0;
}
void gsAdjustBufferMatrices(mtx44 transformation)
{
int i;
u32* buffer;
u32 offset;
GPUCMD_GetBuffer(&buffer, NULL, &offset);
for(i=0; i<bufferMatrixListLength; i++)
{
u32 o=bufferMatrixList[i].offset;
if(o+2<offset) //TODO : better check, need to account for param size
{
mtx44 newMatrix;
GPUCMD_SetBufferOffset(o);
multMatrix44((float*)bufferMatrixList[i].data, (float*)transformation, (float*)newMatrix);
gsSetUniformMatrix(gsMatrixStackRegisters[GS_PROJECTION], (float*)newMatrix);
}
}
GPUCMD_SetBufferOffset(offset);
}
//----------------------
// VBO STUFF
//----------------------
int gsVboInit(gsVbo_s* vbo)
{
if(!vbo)return -1;
vbo->data=NULL;
vbo->currentSize=0;
vbo->maxSize=0;
vbo->commands=NULL;
vbo->commandsSize=0;
return 0;
}
int gsVboCreate(gsVbo_s* vbo, u32 size)
{
if(!vbo)return -1;
vbo->data=gsLinearAlloc(size);
vbo->numVertices=0;
vbo->currentSize=0;
vbo->maxSize=size;
return 0;
}
void* gsVboGetOffset(gsVbo_s* vbo)
{
if(!vbo)return NULL;
return (void*)(&((u8*)vbo->data)[vbo->currentSize]);
}
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units)
{
if(!vbo || !data || !size)return -1;
if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1;
memcpy(gsVboGetOffset(vbo), data, size);
vbo->currentSize+=size;
vbo->numVertices+=units;
return 0;
}
int gsVboFlushData(gsVbo_s* vbo)
{
if(!vbo)return -1;
//unnecessary if we use flushAndRun
// GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize);
return 0;
}
int gsVboDestroy(gsVbo_s* vbo)
{
if(!vbo)return -1;
if(vbo->commands)free(vbo->commands);
if(vbo->data)gsLinearFree(vbo->data);
gsVboInit(vbo);
return 0;
}
extern u32 debugValue[];
void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n)
{
//set attribute buffer address
GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3);
//set primitive type
GPUCMD_AddSingleParam(0x0002025E, primitive);
GPUCMD_AddSingleParam(0x0002025F, 0x00000001);
//index buffer not used for drawArrays but 0x000F0227 still required
GPUCMD_AddSingleParam(0x000F0227, 0x80000000);
//pass number of vertices
GPUCMD_AddSingleParam(0x000F0228, n);
GPUCMD_AddSingleParam(0x00010253, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000000);
GPUCMD_AddSingleParam(0x000F022E, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000001);
GPUCMD_AddSingleParam(0x000F0231, 0x00000001);
// GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff
}
//not thread safe
int gsVboPrecomputeCommands(gsVbo_s* vbo)
{
if(!vbo || vbo->commands)return -1;
static u32 tmpBuffer[128];
u32* savedAdr; u32 savedSize, savedOffset;
GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset);
GPUCMD_SetBuffer(tmpBuffer, 128, 0);
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize);
vbo->commands=memalign(0x4, vbo->commandsSize*4);
if(!vbo->commands)return -1;
memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4);
GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset);
return 0;
}
extern u32* gpuCmdBuf;
extern u32 gpuCmdBufSize;
extern u32 gpuCmdBufOffset;
void _vboMemcpy50(u32* dst, u32* src);
void _GPUCMD_AddRawCommands(u32* cmd, u32 size)
{
if(!cmd || !size)return;
if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd);
else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
gpuCmdBufOffset+=size;
}
int gsVboDraw(gsVbo_s* vbo)
{
if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1;
gsUpdateTransformation();
gsVboPrecomputeCommands(vbo);
// u64 val=svcGetSystemTick();
if(vbo->commands)
{
_GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize);
}else{
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
}
// debugValue[5]+=(u32)(svcGetSystemTick()-val);
// debugValue[6]++;
return 0;
}

View File

@@ -0,0 +1,59 @@
#ifndef GS_H
#define GS_H
#include <3ds.h>
#include "math.h"
#define GS_MATRIXSTACK_SIZE (8)
typedef enum
{
GS_PROJECTION = 0,
GS_MODELVIEW = 1,
GS_MATRIXTYPES
}GS_MATRIX;
typedef struct
{
u8* data;
u32 currentSize; // in bytes
u32 maxSize; // in bytes
u32 numVertices;
u32* commands;
u32 commandsSize;
}gsVbo_s;
void gsInit(shaderProgram_s* shader);
void gsExit(void);
void gsStartFrame(void);
void gsAdjustBufferMatrices(mtx44 transformation);
void* gsLinearAlloc(size_t size);
void gsLinearFree(void* mem);
float* gsGetMatrix(GS_MATRIX m);
int gsLoadMatrix(GS_MATRIX m, float* data);
int gsPushMatrix();
int gsPopMatrix();
int gsMatrixMode(GS_MATRIX m);
void gsLoadIdentity();
void gsProjectionMatrix(float fovy, float aspect, float near, float far);
void gsRotateX(float x);
void gsRotateY(float y);
void gsRotateZ(float z);
void gsScale(float x, float y, float z);
void gsTranslate(float x, float y, float z);
int gsMultMatrix(float* data);
int gsVboInit(gsVbo_s* vbo);
int gsVboCreate(gsVbo_s* vbo, u32 size);
int gsVboFlushData(gsVbo_s* vbo);
int gsVboDestroy(gsVbo_s* vbo);
int gsVboDraw(gsVbo_s* vbo);
void* gsVboGetOffset(gsVbo_s* vbo);
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units);
#endif

View File

@@ -0,0 +1,379 @@
///////////////////////////////////////
// GPU example //
///////////////////////////////////////
//this example is meant to show how to use the GPU to render a 3D object
//it also shows how to do stereoscopic 3D
//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft
//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <3ds.h>
#include "math.h"
#include "gs.h"
#include "test_vsh_shbin.h"
#include "texture_bin.h"
//will be moved into ctrulib at some point
#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080)
#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0))
//transfer from GPU output buffer to actual framebuffer flags
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X))
//shader structure
DVLB_s* dvlb;
shaderProgram_s shader;
//texture data pointer
u32* texData;
//vbo structure
gsVbo_s vbo;
//GPU framebuffer address
u32* gpuOut=(u32*)0x1F119400;
//GPU depth buffer address
u32* gpuDOut=(u32*)0x1F370800;
//angle for the vertex lighting (cf test.vsh)
float lightAngle;
//object position and rotation angle
vect3Df_s position, angle;
//vertex structure
typedef struct
{
vect3Df_s position;
float texcoord[2];
vect3Df_s normal;
}vertex_s;
//object data (cube)
//obviously this doesn't have to be defined manually, but we will here for the purposes of the example
//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z}
//we're drawing triangles so three lines = one triangle
const vertex_s modelVboData[]=
{
//first face (PZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second face (MZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//third face (PX)
//first triangle
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//fourth face (MX)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//fifth face (PY)
//first triangle
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//sixth face (MY)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
};
//stolen from staplebutt
void GPU_SetDummyTexEnv(u8 num)
{
GPU_SetTexEnv(num,
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_REPLACE,
GPU_REPLACE,
0xFFFFFFFF);
}
// topscreen
void renderFrame()
{
GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
GPU_DepthMap(-1.0f, 0.0f);
GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
GPU_SetBlendingColor(0,0,0,0);
GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
GPUCMD_AddWrite(GPUREG_0118, 0);
//lighting stuff
static double lightAngle2 = 0;
lightAngle2 += 0.03;
static double lightAngle3 = 0;
lightAngle3 += 0.1;
vect3Df_s lightDir[3] = { vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle))),
vnormf(vect3Df(cos(lightAngle2), -1.0f, sin(lightAngle2))),
vnormf(vect3Df(cos(lightAngle3*2), cos(lightAngle3), sin(lightAngle3))) };
unsigned num_lights = 3;
unsigned light_size = 3;
uint32_t val = ((num_lights-1u))|(0<<8)|(light_size<<16u);
// Set int uniforms
GPUCMD_AddWrite(GPUREG_GSH_INTUNIFORM_I1, val);
GPUCMD_AddWrite(GPUREG_VSH_INTUNIFORM_I1, val);
GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
GPU_SetTextureEnable(GPU_TEXUNIT0);
GPU_SetTexEnv(0,
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_MODULATE, GPU_MODULATE,
0xFFFFFFFF);
GPU_SetDummyTexEnv(1);
GPU_SetDummyTexEnv(2);
GPU_SetDummyTexEnv(3);
GPU_SetDummyTexEnv(4);
GPU_SetDummyTexEnv(5);
//setup lighting (this is specific to our shader)
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "light_dir"), (u32*)(float[]){0.0f, -lightDir[0].z, -lightDir[0].y, -lightDir[0].x}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "light_diffuse"), (u32*)(float[]){0.2f, 0.2f, 0.2f, 0.2f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "light_ambient"), (u32*)(float[]){0.4f, 0.4f, 0.4f, 0.4f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size + shaderInstanceGetUniformLocation(shader.vertexShader, "light_dir"), (u32*)(float[]){0.0f, -lightDir[1].z, -lightDir[1].y, -lightDir[1].x}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size + shaderInstanceGetUniformLocation(shader.vertexShader, "light_diffuse"), (u32*)(float[]){0.f, 0.f, 0.5f, 0.f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size + shaderInstanceGetUniformLocation(shader.vertexShader, "light_ambient"), (u32*)(float[]){0.f, 0.f, 0.f, 0.f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size*2 + shaderInstanceGetUniformLocation(shader.vertexShader, "light_dir"), (u32*)(float[]){0.0f, -lightDir[2].z, -lightDir[2].y, -lightDir[2].x}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size*2 + shaderInstanceGetUniformLocation(shader.vertexShader, "light_diffuse"), (u32*)(float[]){0.0f, 0.5f, 0.f, 0.f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size*2 + shaderInstanceGetUniformLocation(shader.vertexShader, "light_ambient"), (u32*)(float[]){0.f, 0.f, 0.f, 0.f}, 1);
//texturing stuff
GPU_SetTexture(
GPU_TEXUNIT0, //texture unit
(u32*)osConvertVirtToPhys((u32)texData), //data buffer
128, //texture width
128, //texture height
GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params
GPU_RGBA8 //texture pixel format
);
GPU_SetAttributeBuffers(
3, //3 attributes: vertices, texcoords, and normals
(u32*)osConvertVirtToPhys((u32)texData), //mesh buffer
GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position)
GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord)
GPU_ATTRIBFMT(2, 3, GPU_FLOAT), // GPU Input attribute register 2 (v2): 3 floats (normal)
0xFFC,
0x210,
1,
(u32[]){0x00000000},
(u64[]){0x210},
(u8[]){3}
);
//initialize projection matrix to standard perspective stuff
gsMatrixMode(GS_PROJECTION);
gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f);
gsRotateZ(M_PI/2); //because framebuffer is sideways...
//draw object
gsMatrixMode(GS_MODELVIEW);
gsPushMatrix();
gsTranslate(position.x, position.y, position.z);
gsRotateX(angle.x);
gsRotateY(angle.y);
gsVboDraw(&vbo);
gsPopMatrix();
GPU_FinishDrawing();
}
int main(int argc, char** argv)
{
gfxInitDefault();
//initialize GPU
GPU_Init(NULL);
//let GFX know we're ok with doing stereoscopic 3D rendering
gfxSet3D(true);
//allocate our GPU command buffers
//they *have* to be on the linear heap
u32 gpuCmdSize=0x40000;
u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4);
u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4);
//actually reset the GPU
GPU_Reset(NULL, gpuCmd, gpuCmdSize);
//load our vertex shader binary
dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size);
shaderProgramInit(&shader);
shaderProgramSetVsh(&shader, &dvlb->DVLE[0]);
//initialize GS
gsInit(&shader);
// Flush the command buffer so that the shader upload gets executed
GPUCMD_Finalize();
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//create texture
texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned
memcpy(texData, texture_bin, texture_bin_size);
//create VBO
gsVboInit(&vbo);
gsVboCreate(&vbo, sizeof(modelVboData));
gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s));
gsVboFlushData(&vbo);
//initialize object position and angle
position=vect3Df(0.0f, 0.0f, -2.0f);
angle=vect3Df(M_PI/4, M_PI/4, 0.0f);
//background color (blue)
u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF);
while(aptMainLoop())
{
//get current 3D slider state
float slider=CONFIG_3D_SLIDERSTATE;
//controls
hidScanInput();
//START to exit to hbmenu
if(keysDown()&KEY_START)break;
//A/B to change vertex lighting angle
if(keysHeld()&KEY_A)lightAngle+=0.1f;
if(keysHeld()&KEY_B)lightAngle-=0.1f;
//D-PAD to rotate object
if(keysHeld()&KEY_DOWN)angle.x+=0.05f;
if(keysHeld()&KEY_UP)angle.x-=0.05f;
if(keysHeld()&KEY_LEFT)angle.y+=0.05f;
if(keysHeld()&KEY_RIGHT)angle.y-=0.05f;
//R/L to bring object closer to or move it further from the camera
if(keysHeld()&KEY_R)position.z+=0.1f;
if(keysHeld()&KEY_L)position.z-=0.1f;
//generate our GPU command buffer for this frame
gsStartFrame();
renderFrame();
GPUCMD_Finalize();
if(slider>0.0f)
{
//new and exciting 3D !
//make a copy of left gpu buffer
u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset);
memcpy(gpuCmdRight, gpuCmd, offset*4);
//setup interaxial
float interaxial=slider*0.12f;
//adjust left gpu buffer fo 3D !
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//draw left framebuffer
GPUCMD_FlushAndRun(NULL);
//while GPU starts drawing the left buffer, adjust right one for 3D !
GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset);
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//we wait for the left buffer to finish drawing
gspWaitForP3D();
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
//we draw the right buffer, wait for it to finish and then switch back to left one
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
//draw the right framebuffer
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//transfer from GPU output buffer to actual framebuffer
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
}else{
//boring old 2D !
//draw the frame
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//clear the screen
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
}
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
gfxSwapBuffersGpu();
gspWaitForEvent(GSPEVENT_VBlank0, true);
}
gsExit();
shaderProgramFree(&shader);
DVLB_Free(dvlb);
gfxExit();
return 0;
}

View File

@@ -0,0 +1,148 @@
#include <math.h>
#include <string.h>
#include "math.h"
void loadIdentity44(float* m)
{
if(!m)return;
memset(m, 0x00, 16*4);
m[0]=m[5]=m[10]=m[15]=1.0f;
}
void multMatrix44(float* m1, float* m2, float* m) //4x4
{
int i, j;
for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
}
void translateMatrix(float* tm, float x, float y, float z)
{
float rm[16], m[16];
loadIdentity44(rm);
rm[3]=x;
rm[7]=y;
rm[11]=z;
multMatrix44(tm,rm,m);
memcpy(tm,m,16*sizeof(float));
}
// 00 01 02 03
// 04 05 06 07
// 08 09 10 11
// 12 13 14 15
void rotateMatrixX(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=1.0f;
rm[5]=cos(x);
rm[6]=sin(x);
rm[9]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixY(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[2]=sin(x);
rm[5]=1.0f;
rm[8]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixZ(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[1]=sin(x);
rm[4]=-sin(x);
rm[5]=cos(x);
rm[10]=1.0f;
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void scaleMatrix(float* tm, float x, float y, float z)
{
tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x;
tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y;
tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z;
}
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far)
{
float top = near*tan(fovy/2);
float right = (top*aspect);
float mp[4*4];
mp[0x0] = near/right;
mp[0x1] = 0.0f;
mp[0x2] = 0.0f;
mp[0x3] = 0.0f;
mp[0x4] = 0.0f;
mp[0x5] = near/top;
mp[0x6] = 0.0f;
mp[0x7] = 0.0f;
mp[0x8] = 0.0f;
mp[0x9] = 0.0f;
mp[0xA] = -(far+near)/(far-near);
mp[0xB] = -2.0f*(far*near)/(far-near);
mp[0xC] = 0.0f;
mp[0xD] = 0.0f;
mp[0xE] = -1.0f;
mp[0xF] = 0.0f;
float mp2[4*4];
loadIdentity44(mp2);
mp2[0xA]=0.5;
mp2[0xB]=-0.5;
multMatrix44(mp2, mp, m);
}
vect3Df_s getMatrixColumn(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]);
}
vect3Df_s getMatrixRow(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]);
}
vect4Df_s getMatrixColumn4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]);
}
vect4Df_s getMatrixRow4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]);
}

View File

@@ -0,0 +1,144 @@
#ifndef MATH_H
#define MATH_H
#include <3ds/types.h>
#include <math.h>
typedef float mtx44[4][4];
typedef float mtx33[3][3];
typedef struct
{
s32 x, y, z;
}vect3Di_s;
static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z)
{
return (vect3Di_s){x,y,z};
}
static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Di_s vmuli(vect3Di_s v, s32 f)
{
return (vect3Di_s){v.x*f,v.y*f,v.z*f};
}
typedef struct
{
float x, y, z;
}vect3Df_s;
static inline vect3Df_s vect3Df(float x, float y, float z)
{
return (vect3Df_s){x,y,z};
}
static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Df_s vmulf(vect3Df_s v, float f)
{
return (vect3Df_s){v.x*f,v.y*f,v.z*f};
}
static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2)
{
return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z};
}
static inline float vmagf(vect3Df_s v)
{
return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
}
static inline float vdistf(vect3Df_s v1, vect3Df_s v2)
{
return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z));
}
static inline vect3Df_s vnormf(vect3Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
return (vect3Df_s){v.x/l,v.y/l,v.z/l};
}
typedef struct
{
float x, y, z, w;
}vect4Df_s;
static inline vect4Df_s vect4Df(float x, float y, float z, float w)
{
return (vect4Df_s){x,y,z,w};
}
static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w};
}
static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w};
}
static inline vect4Df_s vmulf4(vect4Df_s v, float f)
{
return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f};
}
static inline float vdotf4(vect4Df_s v1, vect4Df_s v2)
{
return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w;
}
static inline vect4Df_s vnormf4(vect4Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w);
return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l};
}
//interstuff
static inline vect3Di_s vf2i(vect3Df_s v)
{
return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)};
}
static inline vect3Df_s vi2f(vect3Di_s v)
{
return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z};
}
void loadIdentity44(float* m);
void multMatrix44(float* m1, float* m2, float* m);
void translateMatrix(float* tm, float x, float y, float z);
void rotateMatrixX(float* tm, float x, bool r);
void rotateMatrixY(float* tm, float x, bool r);
void rotateMatrixZ(float* tm, float x, bool r);
void scaleMatrix(float* tm, float x, float y, float z);
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far);
vect3Df_s getMatrixColumn(float* m, u8 i);
vect3Df_s getMatrixRow(float* m, u8 i);
vect4Df_s getMatrixColumn4(float* m, u8 i);
vect4Df_s getMatrixRow4(float* m, u8 i);
#endif