First Commit

This commit is contained in:
2025-02-06 22:24:29 +08:00
parent ed7df4c81e
commit 7539e6a53c
18116 changed files with 6181499 additions and 0 deletions

25
externals/nihstro/.travis.yml vendored Normal file
View File

@@ -0,0 +1,25 @@
os:
- linux
language: cpp
sudo: false
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- kalakris-cmake
- boost-latest
packages:
- gcc-4.9
- g++-4.9
- cmake
- libboost1.55-all-dev
script:
- export CC=gcc-4.9
- export CXX=g++-4.9
- mkdir build
- cd build
- cmake ..
- make

49
externals/nihstro/CMakeLists.txt vendored Normal file
View File

@@ -0,0 +1,49 @@
cmake_minimum_required(VERSION 2.6)
project(nihstro)
find_package(Boost COMPONENTS program_options unit_test_framework)
add_definitions(-std=c++11)
include_directories(include)
add_executable(nihstro-disassemble src/disassembler.cpp src/parser_shbin.cpp)
install(TARGETS nihstro-disassemble DESTINATION bin)
# TODO: Re-enable
# add_subdirectory(examples/inline_assembler/simple)
# TODO: What if program_options was found but not unit_test_framework?
if(Boost_FOUND)
set(PARSER_SRCS src/parser_assembly.cpp
src/preprocessor.cpp
src/parser_assembly/common.cpp
src/parser_assembly/compare.cpp
src/parser_assembly/declaration.cpp
src/parser_assembly/flowcontrol.cpp
src/parser_assembly/floatop.cpp)
include_directories(${Boost_INCLUDE_DIRS})
add_executable(nihstro-assemble src/assembler.cpp ${PARSER_SRCS})
target_link_libraries(nihstro-assemble ${Boost_PROGRAM_OPTIONS_LIBRARY})
install(TARGETS nihstro-assemble DESTINATION bin)
# tests
if(Boost_UNIT_TEST_FRAMEWORK_FOUND)
add_executable(test-parser src/tests/parser.cpp ${PARSER_SRCS})
target_compile_definitions(test-parser PUBLIC -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN)
target_link_libraries(test-parser ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
enable_testing()
add_test(ParserTests test-parser)
add_executable(test-source-tree-iterator src/tests/source_tree_iterator.cpp)
target_compile_definitions(test-source-tree-iterator PUBLIC -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN)
target_link_libraries(test-source-tree-iterator ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
add_test(ParserTests test-source-tree-iterator)
else()
message(WARNING "Boost testing framework not found => not building assembler tests")
endif()
else()
message(WARNING "Boost not found => not building assembler")
endif()

102
externals/nihstro/Readme.md vendored Normal file
View File

@@ -0,0 +1,102 @@
# nihstro - 3DS shader tools
[![Travis CI Build Status](https://travis-ci.org/neobrain/nihstro.svg)](https://travis-ci.org/neobrain/nihstro)
nihstro is a collection of tools for 3DS shaders targeted at homebrew development and/or reverse engineering. Currently, it supports assembling 3DS shader binaries from assembly source code and disassembling shaders from `shbin` files. It also provides C++ interfaces for analyzing and runtime-compiling shaders.
This project is released under a three-clause BSD license. For details see license.txt.
## Components
nihstro is well-modularized to minimize dependencies for any particular use case.
### Shader assembler
nihstro includes a standalone shader assembler for generating [SHBIN](http://3dbrew.org/wiki/SHBIN) files from human-readable shader source code (the syntax of which is called "nihcode"). It is perfectly suitable for homebrew development. Shader source needs to follow the [nihcode specification](docs/nihcode_spec.md).
Usage:
`nihstro-assemble <input_filename.vsh> -o <output_filename.shbin>`
Reads vertex shader source code from the input file and generates a shader binary from it.
Further command line options:
* `-h, --help`: Show command line usage
* `-i, --input`: Explicit switch for specifying the input shader source filename (if omitted, the first switch-less argument is interpreted as the filename)
* `-o, --output`: Output shbin filename (required)
* `-e, --entrypoint`: label name in the input source at which shader execution should start (defaults to "main")
* `-g, --geo_shader`: Compile shader source as a geometry shader
### Shader disassembler
nihstro includes a standalone shader disassembler for disassembling SHBIN files and inspecting meta data (symbol information, constant values, etc).
Usage:
`nihstro-disassemble <filename.shbin>`
Parses the shader binary header and outputs basic information on the DVLE sections.
`nihstro-disassemble <filename.shbin> <DVLE index>`
Parses the shader binary header and outputs basic information, but also disassembles the shader code using the information in the indexed DVLE (main offset, symbols, etc).
### Shader bytecode and SHBIN C++ headers
The header `shader_bytecode.h` defines C++ data structures which describe raw shader binary code, while `shader_binary.h` defines the layout of SHBIN files. This allows for convenient inspection of data in C++ code. Note that these headers are currently not API stable.
### Inline assembler (experimental)
The header `inline_assembly.h` provides an experimental mean for runtime generation of PICA200 shaders and SHBIN files within C++ code, so that homebrew authors don't need to ship shaders as precompiled files. While you could use nihstro's actual assembler to allow for runtime shader compilation, the inline assembler may be more convenient and is lighter on dependencies (since it doesn't require Boost to function). However, for obvious reasons it incurs a performance penalty and an increased memory usage compared to offline shader compilation.
A simple [example program](examples/inline_assembler/simple) is included to illustrate how to use the inline assembler.
Note that the inline assembler is highly experimental. It may or may not work for you yet, and its API will change a lot in the future.
## Building
All nihstro components require compiler support for C++11 to work.
The C++ headers `shader_bytecode.h` and `shader_binary` can be easily be included in other project and hence are easy to integrate into any build system (as long as nihstro's directory structure is preserved).
For the standalone assembler and disassembler, you will also need CMake to generate build files (however it is simple to setup a different build system from scratch if need be), and at least parts of the [Boost libraries](http://www.boost.org/) installed (including Spirit, Fusion, and others).
### Installing dependencies on Windows
You will need to download [CMake](https://cmake.org/download/) and [Boost](http://www.boost.org/users/download/) from their respective download pages. Both projects provide prebuilt binaries. Note that the Boost binaries only work with MSVC, so MinGW users will need to obtain prebuilt binaries from an unofficial source (not recommended) or build Boost from source.
### Installing dependencies on Linux
Chances are your Linux distribution already has CMake and Boost installed. Use your package manager to verify this is the case and to install them if need be. Note that most distributions provide program binaries and development libraries in separate packages; for building nihstro, both are needed.
### Installing dependencies on OS X
On OS X, it is recommended that you use [Homebrew](http://brew.sh/) to install dependencies. You'll need to run the following to build nihstro:
```
brew install cmake boost
```
### Compiling on Linux, OS X, and other Unix-like systems
To compile the standalone assembler and disassembler, run the following commands from within the nihstro root directory:
```
mkdir -p build
cd build
cmake ..
make
```
This will build the `nihstro-assemble` and `nihstro-disassemble` standalone executables inside the `build` directory.
### Compiling on Windows
Start the [CMake GUI](https://cmake.org/runningcmake/). You will have to provide two paths: The source code location and the build directory. Point the former to the nihstro root directory, and the latter to a subdirectory called `build`. You may need to create this directory manually if it doesn't exist.
To make sure CMake finds your Boost installation, press the "Add Entry" button and create a new PATH variable with the name `BOOST_ROOT`. Point it towards the root directory of your boost installation. The correct folder should contain a subdirectory called `boost` with lots of further child directories.
Once you're done, hit the "Configure" button and adjust the compiler settings appropriately (usually, the default settings should be fine). If an error occurs, CMake might have trouble locating your Boost installation, and you should double-check that you installed the correct set of Boost libraries and that you set up the `BOOST_ROOT` variable correctly.
If all went fine, click "Generate" and use the generated build files in the `build` subdirectory to build nihstro. In particular if you're using MSVC, open the file `build/nihstro.sln` in Visual Studio.
## Contributing
I welcome any contributions! Just create a GitHub fork and submit your changes back via pull requests.
## Kudos
A big "thank you!" to everyone who contributed to the information on 3dbrew, which has proven amazingly useful for my 3DS related projects. Another shout-out goes to smealum's aemstro, which served as a great reference when debugging nihstro.

View File

@@ -0,0 +1,315 @@
# Shader Instruction Set
This page gives an overview over the instruction set supported by nihstro. Note that there is a similar reference list on [3dbrew](http://3dbrew.org/wiki/Shader_Instruction_Set), which documents the actual implementation on hardware though. nihstro seeks to abstract away annoying details like the fact that there are 3 different CALL instructions, and instead provides convenience shortcuts where possible without giving up flexibility.
# Table of Contents
- [Shader Instruction Set](#shader-instruction-set)
- [Arithmetic Instructions](#arithmetic-instructions)
- [Flow Control Instructions](#flow-control-instructions)
- [Special Purpose Instructions](#special-purpose-instructions)
## Arithmetic Instructions
Most arithmetic instructions take a destination operand and one or more source operands. Source operands may use any kind of swizzle mask, while destination operands may not use reordering or duplicating swizzle masks. Below you will find a short operation description for each instruction, e.g. `dest[i] = src[i]`, which means that the `i`-th source component (as specified by the swizzle mask) will be assigned to the `i`-th destination component (as specified by the swizzle mask), with `i` ranging from 1 to the number of swizzle mask components. Components not listed in the destination swizzle mask hence will not be written.
Static indexing (i.e. indexing with a constant, not to be confused with the above notation) may be done for both operand types. Source operands additionally support *dynamic indexing*, where the index depends on one of the address registers `a0`/`a1` or on the loop counter `lcnt`. Examples:
* static indexing: `c0[20]`
* dynamic indexing: `c0[2+a0]`
#### mov: Copy floating point value
Syntax: `mov dest_operand, src_operand`
Operation: `dest[i] = src[i]`
Restrictions:
* `src` and `dest` must have the same number of components
#### add: Per-component floating point sum
Syntax: `add dest_operand, src1_operand, src2_operand`
Operation: `dest[i] = src1[i] + src2[i]`
Restrictions:
* `src1`, `src2`, and `dest` must have the same number of components
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing
Notes:
* subtraction can be performed using negation: `add r0, c0, -c1`
* when chaining an addition and a multiplication, consider using `mad` instead
#### mul: Per-component floating point multiplication
Syntax: `mul dest_operand, src1_operand, src2_operand`
Operation: `dest[i] = src1[i] * src2[i]`
Restrictions:
* `src1`, `src2`, and `dest` must have the same number of components
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing
Notes:
* division can be performed by computing the reciprocal of src2 and multiplying the result: `rcp r0, c1; mul r0, c0, r0`
* when chaining an addition and a multiplication, consider using `mad` instead
#### mad: Fused multiply-add of three floating point numbers
Syntax: `mad dest_operand, src1_operand, src2_operand, src3_operand`
Operation: `dest[i] = src1[i] * src2[i] + src3[i]`
Restrictions:
* `src1`, `src2`, `src3`, and `dest` must have the same number of components
* not more than two source operands may be float uniform registers
* no dynamic indexing may be performed on any of the source operands.
Notes:
* when dynamic indexing is not avoidable, use `add` and `mul` instead
* not supported currently
#### max: Copy the greater of two floating point numbers
Syntax: `max dest_operand, src1_operand, src2_operand`
Operation: `dest[i] = max(src1[i], src2[i])`
Restrictions:
* `src1`, `src2`, and `dest` must have the same number of components
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing
#### min: Copy the smaller of two floating point numbers
Syntax: `min dest_operand, src1_operand, src2_operand`
Operation: `dest[i] = min(src1[i], src2[i])`
Restrictions:
* `src1`, `src2`, and `dest` must have the same number of components
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing
#### flr: Floating point floor
Syntax: `flr dest_operand, src_operand`
Operation: `dest[i] = floor(src[i])`
Restrictions:
* `src` and `dest` must have the same number of components
#### rcp: Floating point reciprocal
Syntax: `rcp dest_operand, src_operand`
Operation: `dest[i] = 1 / src[i]`
Restrictions:
* `src` and `dest` must have the same number of components
#### rsq: Floating point reciprocal square root
Syntax: `rsq dest_operand, src_operand`
Operation: `dest[i] = 1 / sqrt(src[i])`
Restrictions:
* `src` and `dest` must have the same number of components
#### exp: Floating point base-2 exponential
Syntax: `exp dest_operand, src_operand`
Operation: `dest[i] = exp(src[i])`
Restrictions:
* `src1` and `dest` must have the same number of components
#### log: Floating point base-2 logarithm
Syntax: `log dest_operand, src_operand`
Operation: `dest[i] = log(src[i])`
Restrictions:
* `src1` and `dest` must have the same number of components
#### dp3: Floating point 3-component dot-product
Syntax: `dp3 dest_operand, src1_operand, src2_operand`
Operation: `dest[i] = src1[0]*src2[0]+src1[1]*src2[1]+src1[2]*src2[2])`
Restrictions:
* `src1`, `src2`, and `dest` must have the same number of components
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing
#### dp4: Floating point 4-component dot-product
Syntax: `dp4 dest_operand, src1_operand, src2_operand`
Operation: `dest[i] = src1[0]*src2[0]+src1[1]*src2[1]+src1[2]*src2[2]+src1[3]*src2[3])`
Restrictions:
* `src1`, `src2`, and `dest` must have the same number of components
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing
#### dph: Floating point homogeneous dot-product
Syntax: `dph dest_operand, src1_operand, src2_operand`
Operation: `dest[i] = src1[0]*src2[0]+src1[1]*src2[1]+src1[2]*src2[2]+src2[3]`
Restrictions:
* `src1`, `src2`, and `dest` must have the same number of components
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing.
#### sge: Set to one if greater or equal
Syntax: `sge dest_operand, src1_operand, src2_operand`
Operation: `dest[i] = (src1[i] >= src2[i]) ? 1.0 : 0.0`
Restrictions:
* `src1`, `src2`, and `dest` must have the same number of components
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing
#### slt: Set to one if (strictly) less
Syntax: `slt dest_operand, src1_operand, src2_operand`
Operation: `dest[i] = (src1[i] < src2[i]) ? 1.0 : 0.0`
Restrictions:
* `src1`, `src2`, and `dest` must have the same number of components
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing
#### mova: Move to address register
Syntax: `mova src_operand`
Operation:
a0 = src.x
a1 = src.y
Restrictions:
* src_operand must be a two-component vector.
Notes:
* not supported currently
## Flow Control Instructions
These allow for non-linear code execution, e.g. by conditionally or repeatedly running code.
Some flow control instruction take a "condition" parameter. A condition is either
* a boolean uniform or
* an expression consisting of one or two conditional code components, combined via `&&` ("and") or `||` ("or"), and optionally negated. Examples: `cc.x`, `cc.y && !cc.x`
#### cmp: Compare two floating point numbers
Syntax: `cmp src1_operand, src2_operand, op1, op2`
`op1` and `op2` may be any of the strings `==` (equal), `!=` (not equal), `<` (less than), `<=` (less than or equal to), `>` (greater than), and `>=` (greater than or equal to).
Operation:
cc.x = (src1[0] op1 src2[0])
cc.y = (src1[1] op2 src2[1])
Restrictions:
* `src1` and `src2` must be two-component vectors
* it is not possible to set `cc.x` without also setting `cc.y`
* not more than one of the source operands may be a float uniform register and/or use dynamic indexing
Notes:
* this instruction is used to set conditional codes, which can be used as conditions for `if`/`jmp`/`call`/`break`.
#### if: Conditional code execution
Syntax: `if condition`
Operation:
If `condition` is true, conditionally executes the code between itself and the corresponding `else` or `endif` pseudo-instruction. Otherwise, executes the code in the `else` branch, if one is given (otherwise, skips the branch body and continues after the `endif` statement).
Restrictions:
* not more than one `else` branch may be specified (`else if` syntax is not supported)
Notes:
* all `if` branches must be closed explicitly using `endif`
* jumping out of a branch body may result in undefined behavior
Example:
if cc.x && !cc.y
// do stuff
else
if b0
// do other stuff
endif
endif
#### loop: Repeat code execution
Syntax: `loop int_uniform`
Operation:
Initialize `lcnt` to `int_uniform.y`, then process code between `loop` and `endloop` for `int_uniform.x+1` iterations in total. After each iteration, `lcnt` is incremented by `int_uniform.z`.
Restrictions:
* no swizzle mask may be applied on the given uniform
* there is no direct way of looping zero times (the easiest way is to use `break` with an extra boolean uniform)
Notes:
* `lcnt` can be used to dynamically index arrays, e.g. to implement vertex lighting with multiple light sources
#### break: Break out of current loop
Syntax: `break condition`
Operation:
If `condition` is true, break out of the current loop.
Restrictions:
* jumping out of a branch body may result in undefined behavior
#### jmp: Jump to code address
Syntax: `jmp target_label if condition`
Restrictions:
* jumping out of or into branch bodies or loops may result in undefined behavior
* there is no way to force a jump without specifying a condition
Notes:
* if you need to automatically return from a function, use `call` instead
Example:
main:
jmp my_helper_code if b0
// if not b0, do other stuff here
nop
end
my_helper_code:
// do stuff
nop
end
#### call: Jump to code address and return to caller
Possible syntaxes:
`call target_label until return_label if condition`
`call target_label until return_label`
Operation:
If `condition` is true (or none is given), jumps to `target_label` and processes shader code there until `return_label` is hit, at which point code execution jumps back to the caller.
Restrictions:
* jumping out of or into branch bodies or loops may result in undefined behavior
Notes:
* if you don't need to automatically return from a function, use `jmp` instead
Example:
main:
call my_helper_code until end_helper_code
nop
end
my_helper_code:
// do stuff here
nop
end_helper_code:
## Special Purpose Instructions
#### nop: No operation
Syntax: `nop`
Notes:
* This may be necessary before using `end` to make sure all pending write operations have been completed
#### end: Finish shader execution
Syntax: `end`
Operation:
Stops shader execution.

130
externals/nihstro/docs/nihcode_spec.md vendored Normal file
View File

@@ -0,0 +1,130 @@
#nihcode Specification
Version 0.1.
This page seeks to be a formal-ish specification of the input assembly language *nihcode* used by the nihstro shader assembler.
## Version information
This document is is intended to give developers an idea of how things are expected to work. Please file issue reports for any deviations in nihstro's behavior from this specifications that you find. Similarly, any inclarities in the specification will be corrected if reported, too.
## Structure
nihcode is a sequence of statements, each of which must be put onto a separate line. There are five types of statements:
* version information statements
* include statement
* alias declaration statements,
* label declaration statements, and
* instruction statements,
each of which is described in its own section below. Additionally, C++-like comments may be inserted at any point and are started using the character sequences `//`, `#`, or `;`. Comments span the rest of the line after any of these characters. Any statement must be written on its own line.
A pseudo-code example of nihcode looks like this:
// First example shader
.version 0.1 // version information
.alias inpos v0 // alias declaration
.alias intex v1 // alias declaration
.alias pos o0 as position // alias declaration
.alias pos o1.xy as texcoord0 // alias declaration
.include "utils.h" // include utility functionality
main: // label declaration
mov o0, v0 // instruction
mov o1.xy, v1.xy // instruction
nop // instruction
end // instruction
## Shader Registers, builtin Identifiers, Swizzle Masks
A shader can access a number of different registers with different purposes. *Input registers* expose the raw input vertex attribute data, while the output vertex attributes used for rendering is written to *output registers*. External programs can pass parameters to the shader by setting *uniforms*. Additionally, a number of *temporary registers* are free for any use. There are also special-purpose registers, namely the *address registers* and the *conditional code register*.
Registers are being referred to by using *identifiers*. There is a number of builtin identifiers, each of which refers to one register. Note that most registers are vectors, i.e. they comprise multiple components, which are accessed using swizzle masks.
* `v0`-`v15`: Input registers (read-only), four-component vectors
* `r0`-`r15`: Temporary registers (read-write), four-component vectors
* `c0`-`c95`: Float uniforms (read-only), four-component vectors
* `i0`-`i3`: Integer uniforms (read-only), four-component vectors
* `b0`-`b15`: Boolean uniforms (read-only), scalar
* `o0`-`o15`: Output registers (write-only), four-component vectors
* `a0, a1, aL`: Address registers (used with MOVA and dynamic indexing), scalar
* `cc`: Conditional code register (used with CMP and flow-control instructions), two-component vector
For better readability, one can also define new identifiers, as explained below. Identifiers may only use a restricted set of names including lower- or uppercase letters a-Z, underscores, and decimal digits (the latter two which may not be used as the first character of the name). Additionally, an identifier may be followed by a swizzle mask, separated by the character `.` (e.g. `texcoord.zyx`). Swizzle masks allow for reordering, duplicating, and removing of one or more vector components of the identified register (without actually modifying that register).
When used with certain instructions, identifiers may be mentioned along with a sign, an array index, and/or a swizzle mask. Constructs like this are called *expressions*.
The following names are reserved identifiers, and may not be used during declarations:
* Any names starting with a `gl_` prefix
* Any names starting with a `dmp_` prefix
* Any names starting with an underscore prefix
* Any of the instruction opcodes mentioned below may not be used for the identifier name
## Aliases
### Plain Aliases (any register)
`.alias <new_identifier> <existing_identifier>{.<swizzle_mask>}`
Declares a new identifier called `new_identifier` which will refer to the same register that `existing_identifier` refers to, applying a swizzle_mask if specified. All subsequent uses of `new_identifier` are equivalent to using `existing_identifier.swizzle_mask`. Aliases of any register type may be created, however it should be noted that using output registers requires explicit assignment of an output semantic (see below).
E.g. `.alias input_texture v2.xy`
### Alias with Assignment of a Semantic (output registers)
`.alias <new_identifier> <existing_identifier>{.swizzle_mask} as <semantic>`
Declares an alias of `existing_identifier` with the name `new_identifier` and assigns the given semantic to the corresponding output register. An output semantic needs to be given to describe how the output vertex attribute is intended to be used after shader execution. `semantic` may be any of the strings `position`, `quaternion`, `color`, `texcoord0`, `texcoord1`, `texcoord2`, and `view`. If not all output register components are being written to, a swizzle mask should be used to denote the "active" components. Note that this swizzle mask may not reorder any components.
E.g. `.alias output_texcoord o1.xy as texcoord0`
### Constant Declarations (uniform registers)
scalar constants: `.alias <new_identifier> <existing_identifier> as <value>`
vector constants: `.alias <new_identifier> <existing_identifier> as (<x>, <y>{, <z>{, <w>}})`
Declares an alias of `existing_identifier` with the name `new_identifier` and assigns the given default value to it. Default values are parsed by the ctrulib API and automatically applied when enabling a shader. The number of components in the given constant must match the number of components in the specified register.
E.g. `.alias my_const c4 as (0.1, 3.2, -3.14, 0.0)`
## Label Declarations
`<labelname>:`
Declares a new label with the name `labelname` at the given source line, which can be used in flow control operations. Label names follow the same conventions as identifiers and may not share the same name with an existing identifier.
## Instruction Statements
Writes the given opcode according to the given arguments to the shader binary. There are a lot of instructions, and each of them uses one of the following formats:
Trivial operations:
`<opcode>`
Used by `else`, `emit`, `end`, `endif`, `endloop`, and `nop`.
Arithmetic operations:
`<opcode> <expression1>{, <expression2>{, <expression3>{, <expression4>}}}`
Used by `add`, `dp3`, `dp4`, `dph`, `ex2`, `flr`, `lg2`, `mad`, `max`, `min`, `mov`, `mova`, `mul`, `rcp`, `rsq`, `sge` and `slt`. The number of required expressions as well as their meaning depends on the opcode.
E.g. `mul o3.xyz c4.xyz v0.xyz`
Compare operation:
`cmp <expression1>, <expression2>, <op_x>, <op_y>`
Used exclusively by `cmp`. `expression1` and `expression2` must evaluate to two-component float vectors. `op_x` and `op_y` specify comparison operations for the x and y components of the given expressions, respectively. They may be `==`, `!=`, `<`, `<=`, `>` or `>=`.
E.g. `cmp c0.xy, i2.xy, <=, ==`
Flow control operations:
`<opcode> <condition>`
Used by `break`, `if` and `loop`.
`<opcode> {<target_label>} {until <return_label>} {if <condition>}`
Used by `jmp` and `call`.
`condition` may either be an identifier of a boolean uniform or a conditional expression. Examples for conditional expressions are `cc.x`, `!cc.x`, `!cc.xy`, `cc.x && !cc.y`, and `cc.x || cc.y`, where `{!}cc.xy` is equivalent to `{!}cc.x && {!}cc.y`. `target_label` and `return_label` must be label identifiers. Their meaning depends on the given opcode.
For a full instruction set reference, go to [instruction set reference](instruction_set.md). You may also want to refer to [3dbrew](http://3dbrew.org/wiki/Shader_Instruction_Set) for low-level documentation on each opcode. Is is suggested that you take a look at the nihstro examples to get a better picture of how to apply that information.
## Include Statements
`.include "filename"`
Replaces the `.include` line with the contents of the given file. The filename is taken to be relative to the file it was included from.
## Version Information
`.version number`
This statement is a hint for the compiler to see which language specification the shader was written against. It may be used to toggle a compatibility assembling mode.
E.g. `.version 0.1`

View File

@@ -0,0 +1,6 @@
cube example
============
Simple port of ctrulib's gpu example to nihstro shaders. The C program code is mostly unchanged from the original, however the example shader in the data subdirectory should give you a good idea of the basic nihcode shader syntax.
Before trying to compile, make sure your NIHSTRO environment variable points to the directory nihstro-assemble resides in. Additionally, ctrulib in revision 1f52ac344d or similar is required, plus some patches to implement proper uniform setters.

View File

@@ -0,0 +1,48 @@
// setup constants
.alias myconst c32 as (1.0, 0.0, 0.5, 1.0)
// setup output map
.alias outpos o0 as position
.alias outcol o1 as color
.alias outtex0 o2.xyzw as texcoord0 // Would like to use .xy instead, but this is not supported by ctrulib currently
.alias outtex1 o3.xyzw as texcoord1
.alias outtex2 o4.xyzw as texcoord2
// setup uniform map, for use with SHDR_GetUniformRegister
.alias projection c0 // -c3
.alias modelview c4 // -c7
.alias lightDirection c8
.alias lightAmbient c9
main:
mov r1.xyz, v0.xyz
mov r1.w, myconst.w
mdvl: // tempreg = mdlvMtx * in.pos
dp4 r0.x, modelview[0], r1
dp4 r0.y, modelview[1], r1
dp4 r0.z, modelview[2], r1
mov r0.w, myconst.w
proj: // result.pos = projMtx * tempreg
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
tex: // result.texcoord = in.texcoord
mov outtex0, v1
mov outtex1, myconst.yyyw
mov outtex2, myconst.yyyw
col: // Hacky lighting: color = ambient.xyz + clamp(dot(L,N), 1.0) * ambient.www
dp3 r0.xyz, lightDirection.xyz, v2.xyz
max r0.xyz, myconst.yyy, r0.xyz
mul r0.xyz, lightAmbient.www, r0.xyz
add outcol.xyz, lightAmbient.xyz, r0.xyz
mov outcol.w, myconst.w
nop
end
endmain:

Binary file not shown.

View File

@@ -0,0 +1,16 @@
.section ".text"
.arm
.align 4
.global _vboMemcpy50
# r0 : dst
# r1 : src
# fixed size 0x50
_vboMemcpy50:
push {r4-r11}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
pop {r4-r11}
bx lr

View File

@@ -0,0 +1,432 @@
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <3ds.h>
#include "gs.h"
#include "math.h"
#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4)
static void gsInitMatrixStack();
Handle linearAllocMutex;
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES];
typedef struct
{
u32 offset;
mtx44 data;
}bufferMatrix_s;
bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE];
int bufferMatrixListLength;
//----------------------
// GS SYSTEM STUFF
//----------------------
void initBufferMatrixList()
{
bufferMatrixListLength=0;
}
void gsInit(shaderProgram_s* shader)
{
gsInitMatrixStack();
initBufferMatrixList();
svcCreateMutex(&linearAllocMutex, false);
if(shader)
{
gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection");
gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview");
shaderProgramUse(shader);
}
}
void gsExit(void)
{
svcCloseHandle(linearAllocMutex);
}
void gsStartFrame(void)
{
GPUCMD_SetBufferOffset(0);
initBufferMatrixList();
}
void* gsLinearAlloc(size_t size)
{
void* ret=NULL;
svcWaitSynchronization(linearAllocMutex, U64_MAX);
ret=linearAlloc(size);
svcReleaseMutex(linearAllocMutex);
return ret;
}
void gsLinearFree(void* mem)
{
svcWaitSynchronization(linearAllocMutex, U64_MAX);
linearFree(mem);
svcReleaseMutex(linearAllocMutex);
}
//----------------------
// MATRIX STACK STUFF
//----------------------
static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE];
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04};
static u8 gsMatrixStackOffsets[GS_MATRIXTYPES];
static bool gsMatrixStackUpdated[GS_MATRIXTYPES];
static GS_MATRIX gsCurrentMatrixType;
static void gsInitMatrixStack()
{
int i;
for(i=0; i<GS_MATRIXTYPES; i++)
{
gsMatrixStackOffsets[i]=0;
gsMatrixStackUpdated[i]=true;
loadIdentity44((float*)gsMatrixStacks[i][0]);
}
gsCurrentMatrixType=GS_PROJECTION;
}
float* gsGetMatrix(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return NULL;
return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]];
}
int gsLoadMatrix(GS_MATRIX m, float* data)
{
if(m<0 || m>=GS_MATRIXTYPES || !data)return -1;
memcpy(gsGetMatrix(m), data, sizeof(mtx44));
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsPushMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1;
float* cur=gsGetMatrix(m);
gsMatrixStackOffsets[m]++;
memcpy(gsGetMatrix(m), cur, sizeof(mtx44));
return 0;
}
int gsPopMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1;
gsMatrixStackOffsets[m]--;
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsMatrixMode(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return -1;
gsCurrentMatrixType=m;
return 0;
}
//------------------------
// MATRIX TRANSFORM STUFF
//------------------------
int gsMultMatrix(float* data)
{
if(!data)return -1;
mtx44 tmp;
multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp);
memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
return 0;
}
void gsLoadIdentity()
{
loadIdentity44(gsGetMatrix(gsCurrentMatrixType));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsProjectionMatrix(float fovy, float aspect, float near, float far)
{
initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateX(float x)
{
rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateY(float y)
{
rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateZ(float z)
{
rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsScale(float x, float y, float z)
{
scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsTranslate(float x, float y, float z)
{
translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
//----------------------
// MATRIX RENDER STUFF
//----------------------
static void gsSetUniformMatrix(u32 startreg, float* m)
{
float param[16];
param[0x0]=m[3]; //w
param[0x1]=m[2]; //z
param[0x2]=m[1]; //y
param[0x3]=m[0]; //x
param[0x4]=m[7];
param[0x5]=m[6];
param[0x6]=m[5];
param[0x7]=m[4];
param[0x8]=m[11];
param[0x9]=m[10];
param[0xa]=m[9];
param[0xb]=m[8];
param[0xc]=m[15];
param[0xd]=m[14];
param[0xe]=m[13];
param[0xf]=m[12];
GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4);
}
static int gsUpdateTransformation()
{
GS_MATRIX m;
for(m=0; m<GS_MATRIXTYPES; m++)
{
if(gsMatrixStackUpdated[m])
{
if(m==GS_PROJECTION && bufferMatrixListLength<BUFFERMATRIXLIST_SIZE)
{
GPUCMD_GetBuffer(NULL, NULL, &bufferMatrixList[bufferMatrixListLength].offset);
memcpy(bufferMatrixList[bufferMatrixListLength].data, gsGetMatrix(m), sizeof(mtx44));
bufferMatrixListLength++;
}
gsSetUniformMatrix(gsMatrixStackRegisters[m], gsGetMatrix(m));
gsMatrixStackUpdated[m]=false;
}
}
return 0;
}
void gsAdjustBufferMatrices(mtx44 transformation)
{
int i;
u32* buffer;
u32 offset;
GPUCMD_GetBuffer(&buffer, NULL, &offset);
for(i=0; i<bufferMatrixListLength; i++)
{
u32 o=bufferMatrixList[i].offset;
if(o+2<offset) //TODO : better check, need to account for param size
{
mtx44 newMatrix;
GPUCMD_SetBufferOffset(o);
multMatrix44((float*)bufferMatrixList[i].data, (float*)transformation, (float*)newMatrix);
gsSetUniformMatrix(gsMatrixStackRegisters[GS_PROJECTION], (float*)newMatrix);
}
}
GPUCMD_SetBufferOffset(offset);
}
//----------------------
// VBO STUFF
//----------------------
int gsVboInit(gsVbo_s* vbo)
{
if(!vbo)return -1;
vbo->data=NULL;
vbo->currentSize=0;
vbo->maxSize=0;
vbo->commands=NULL;
vbo->commandsSize=0;
return 0;
}
int gsVboCreate(gsVbo_s* vbo, u32 size)
{
if(!vbo)return -1;
vbo->data=gsLinearAlloc(size);
vbo->numVertices=0;
vbo->currentSize=0;
vbo->maxSize=size;
return 0;
}
void* gsVboGetOffset(gsVbo_s* vbo)
{
if(!vbo)return NULL;
return (void*)(&((u8*)vbo->data)[vbo->currentSize]);
}
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units)
{
if(!vbo || !data || !size)return -1;
if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1;
memcpy(gsVboGetOffset(vbo), data, size);
vbo->currentSize+=size;
vbo->numVertices+=units;
return 0;
}
int gsVboFlushData(gsVbo_s* vbo)
{
if(!vbo)return -1;
//unnecessary if we use flushAndRun
// GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize);
return 0;
}
int gsVboDestroy(gsVbo_s* vbo)
{
if(!vbo)return -1;
if(vbo->commands)free(vbo->commands);
if(vbo->data)gsLinearFree(vbo->data);
gsVboInit(vbo);
return 0;
}
extern u32 debugValue[];
void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n)
{
//set attribute buffer address
GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3);
//set primitive type
GPUCMD_AddSingleParam(0x0002025E, primitive);
GPUCMD_AddSingleParam(0x0002025F, 0x00000001);
//index buffer not used for drawArrays but 0x000F0227 still required
GPUCMD_AddSingleParam(0x000F0227, 0x80000000);
//pass number of vertices
GPUCMD_AddSingleParam(0x000F0228, n);
GPUCMD_AddSingleParam(0x00010253, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000000);
GPUCMD_AddSingleParam(0x000F022E, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000001);
GPUCMD_AddSingleParam(0x000F0231, 0x00000001);
// GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff
}
//not thread safe
int gsVboPrecomputeCommands(gsVbo_s* vbo)
{
if(!vbo || vbo->commands)return -1;
static u32 tmpBuffer[128];
u32* savedAdr; u32 savedSize, savedOffset;
GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset);
GPUCMD_SetBuffer(tmpBuffer, 128, 0);
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize);
vbo->commands=memalign(0x4, vbo->commandsSize*4);
if(!vbo->commands)return -1;
memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4);
GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset);
return 0;
}
extern u32* gpuCmdBuf;
extern u32 gpuCmdBufSize;
extern u32 gpuCmdBufOffset;
void _vboMemcpy50(u32* dst, u32* src);
void _GPUCMD_AddRawCommands(u32* cmd, u32 size)
{
if(!cmd || !size)return;
if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd);
else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
gpuCmdBufOffset+=size;
}
int gsVboDraw(gsVbo_s* vbo)
{
if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1;
gsUpdateTransformation();
gsVboPrecomputeCommands(vbo);
// u64 val=svcGetSystemTick();
if(vbo->commands)
{
_GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize);
}else{
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
}
// debugValue[5]+=(u32)(svcGetSystemTick()-val);
// debugValue[6]++;
return 0;
}

View File

@@ -0,0 +1,59 @@
#ifndef GS_H
#define GS_H
#include <3ds.h>
#include "math.h"
#define GS_MATRIXSTACK_SIZE (8)
typedef enum
{
GS_PROJECTION = 0,
GS_MODELVIEW = 1,
GS_MATRIXTYPES
}GS_MATRIX;
typedef struct
{
u8* data;
u32 currentSize; // in bytes
u32 maxSize; // in bytes
u32 numVertices;
u32* commands;
u32 commandsSize;
}gsVbo_s;
void gsInit(shaderProgram_s* shader);
void gsExit(void);
void gsStartFrame(void);
void gsAdjustBufferMatrices(mtx44 transformation);
void* gsLinearAlloc(size_t size);
void gsLinearFree(void* mem);
float* gsGetMatrix(GS_MATRIX m);
int gsLoadMatrix(GS_MATRIX m, float* data);
int gsPushMatrix();
int gsPopMatrix();
int gsMatrixMode(GS_MATRIX m);
void gsLoadIdentity();
void gsProjectionMatrix(float fovy, float aspect, float near, float far);
void gsRotateX(float x);
void gsRotateY(float y);
void gsRotateZ(float z);
void gsScale(float x, float y, float z);
void gsTranslate(float x, float y, float z);
int gsMultMatrix(float* data);
int gsVboInit(gsVbo_s* vbo);
int gsVboCreate(gsVbo_s* vbo, u32 size);
int gsVboFlushData(gsVbo_s* vbo);
int gsVboDestroy(gsVbo_s* vbo);
int gsVboDraw(gsVbo_s* vbo);
void* gsVboGetOffset(gsVbo_s* vbo);
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units);
#endif

View File

@@ -0,0 +1,354 @@
///////////////////////////////////////
// GPU example //
///////////////////////////////////////
//this example is meant to show how to use the GPU to render a 3D object
//it also shows how to do stereoscopic 3D
//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft
//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <3ds.h>
#include "math.h"
#include "gs.h"
#include "test_vsh_shbin.h"
#include "texture_bin.h"
//will be moved into ctrulib at some point
#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080)
#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0))
//transfer from GPU output buffer to actual framebuffer flags
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X))
//shader structure
DVLB_s* dvlb;
shaderProgram_s shader;
//texture data pointer
u32* texData;
//vbo structure
gsVbo_s vbo;
//GPU framebuffer address
u32* gpuOut=(u32*)0x1F119400;
//GPU depth buffer address
u32* gpuDOut=(u32*)0x1F370800;
//angle for the vertex lighting (cf test.vsh)
float lightAngle;
//object position and rotation angle
vect3Df_s position, angle;
//vertex structure
typedef struct
{
vect3Df_s position;
float texcoord[2];
vect3Df_s normal;
}vertex_s;
//object data (cube)
//obviously this doesn't have to be defined manually, but we will here for the purposes of the example
//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z}
//we're drawing triangles so three lines = one triangle
const vertex_s modelVboData[]=
{
//first face (PZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second face (MZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//third face (PX)
//first triangle
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//fourth face (MX)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//fifth face (PY)
//first triangle
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//sixth face (MY)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
};
//stolen from staplebutt
void GPU_SetDummyTexEnv(u8 num)
{
GPU_SetTexEnv(num,
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_REPLACE,
GPU_REPLACE,
0xFFFFFFFF);
}
// topscreen
void renderFrame()
{
GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
GPU_DepthMap(-1.0f, 0.0f);
GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
GPU_SetBlendingColor(0,0,0,0);
GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
GPUCMD_AddWrite(GPUREG_0118, 0);
GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
GPU_SetTextureEnable(GPU_TEXUNIT0);
GPU_SetTexEnv(0,
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_MODULATE, GPU_MODULATE,
0xFFFFFFFF);
GPU_SetDummyTexEnv(1);
GPU_SetDummyTexEnv(2);
GPU_SetDummyTexEnv(3);
GPU_SetDummyTexEnv(4);
GPU_SetDummyTexEnv(5);
//texturing stuff
GPU_SetTexture(
GPU_TEXUNIT0, //texture unit
(u32*)osConvertVirtToPhys((u32)texData), //data buffer
128, //texture width
128, //texture height
GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params
GPU_RGBA8 //texture pixel format
);
GPU_SetAttributeBuffers(
3, //3 attributes: vertices, texcoords, and normals
(u32*)osConvertVirtToPhys((u32)texData), //mesh buffer
GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position)
GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord)
GPU_ATTRIBFMT(2, 3, GPU_FLOAT), // GPU Input attribute register 2 (v2): 3 floats (normal)
0xFFC,
0x210,
1,
(u32[]){0x00000000},
(u64[]){0x210},
(u8[]){3}
);
//setup lighting (this is specific to our shader)
vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle)));
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightDirection"), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightAmbient"), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1);
//initialize projection matrix to standard perspective stuff
gsMatrixMode(GS_PROJECTION);
gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f);
gsRotateZ(M_PI/2); //because framebuffer is sideways...
//draw object
gsMatrixMode(GS_MODELVIEW);
gsPushMatrix();
gsTranslate(position.x, position.y, position.z);
gsRotateX(angle.x);
gsRotateY(angle.y);
gsVboDraw(&vbo);
gsPopMatrix();
GPU_FinishDrawing();
}
int main(int argc, char** argv)
{
gfxInitDefault();
//initialize GPU
GPU_Init(NULL);
//let GFX know we're ok with doing stereoscopic 3D rendering
gfxSet3D(true);
//allocate our GPU command buffers
//they *have* to be on the linear heap
u32 gpuCmdSize=0x40000;
u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4);
u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4);
//actually reset the GPU
GPU_Reset(NULL, gpuCmd, gpuCmdSize);
//load our vertex shader binary
dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size);
shaderProgramInit(&shader);
shaderProgramSetVsh(&shader, &dvlb->DVLE[0]);
//initialize GS
gsInit(&shader);
// Flush the command buffer so that the shader upload gets executed
GPUCMD_Finalize();
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//create texture
texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned
memcpy(texData, texture_bin, texture_bin_size);
//create VBO
gsVboInit(&vbo);
gsVboCreate(&vbo, sizeof(modelVboData));
gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s));
gsVboFlushData(&vbo);
//initialize object position and angle
position=vect3Df(0.0f, 0.0f, -2.0f);
angle=vect3Df(M_PI/4, M_PI/4, 0.0f);
//background color (blue)
u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF);
while(aptMainLoop())
{
//get current 3D slider state
float slider=CONFIG_3D_SLIDERSTATE;
//controls
hidScanInput();
//START to exit to hbmenu
if(keysDown()&KEY_START)break;
//A/B to change vertex lighting angle
if(keysHeld()&KEY_A)lightAngle+=0.1f;
if(keysHeld()&KEY_B)lightAngle-=0.1f;
//D-PAD to rotate object
if(keysHeld()&KEY_DOWN)angle.x+=0.05f;
if(keysHeld()&KEY_UP)angle.x-=0.05f;
if(keysHeld()&KEY_LEFT)angle.y+=0.05f;
if(keysHeld()&KEY_RIGHT)angle.y-=0.05f;
//R/L to bring object closer to or move it further from the camera
if(keysHeld()&KEY_R)position.z+=0.1f;
if(keysHeld()&KEY_L)position.z-=0.1f;
//generate our GPU command buffer for this frame
gsStartFrame();
renderFrame();
GPUCMD_Finalize();
if(slider>0.0f)
{
//new and exciting 3D !
//make a copy of left gpu buffer
u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset);
memcpy(gpuCmdRight, gpuCmd, offset*4);
//setup interaxial
float interaxial=slider*0.12f;
//adjust left gpu buffer fo 3D !
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//draw left framebuffer
GPUCMD_FlushAndRun(NULL);
//while GPU starts drawing the left buffer, adjust right one for 3D !
GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset);
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//we wait for the left buffer to finish drawing
gspWaitForP3D();
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
//we draw the right buffer, wait for it to finish and then switch back to left one
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
//draw the right framebuffer
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//transfer from GPU output buffer to actual framebuffer
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
}else{
//boring old 2D !
//draw the frame
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//clear the screen
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
}
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
gfxSwapBuffersGpu();
gspWaitForEvent(GSPEVENT_VBlank0, true);
}
gsExit();
shaderProgramFree(&shader);
DVLB_Free(dvlb);
gfxExit();
return 0;
}

View File

@@ -0,0 +1,148 @@
#include <math.h>
#include <string.h>
#include "math.h"
void loadIdentity44(float* m)
{
if(!m)return;
memset(m, 0x00, 16*4);
m[0]=m[5]=m[10]=m[15]=1.0f;
}
void multMatrix44(float* m1, float* m2, float* m) //4x4
{
int i, j;
for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
}
void translateMatrix(float* tm, float x, float y, float z)
{
float rm[16], m[16];
loadIdentity44(rm);
rm[3]=x;
rm[7]=y;
rm[11]=z;
multMatrix44(tm,rm,m);
memcpy(tm,m,16*sizeof(float));
}
// 00 01 02 03
// 04 05 06 07
// 08 09 10 11
// 12 13 14 15
void rotateMatrixX(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=1.0f;
rm[5]=cos(x);
rm[6]=sin(x);
rm[9]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixY(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[2]=sin(x);
rm[5]=1.0f;
rm[8]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixZ(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[1]=sin(x);
rm[4]=-sin(x);
rm[5]=cos(x);
rm[10]=1.0f;
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void scaleMatrix(float* tm, float x, float y, float z)
{
tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x;
tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y;
tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z;
}
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far)
{
float top = near*tan(fovy/2);
float right = (top*aspect);
float mp[4*4];
mp[0x0] = near/right;
mp[0x1] = 0.0f;
mp[0x2] = 0.0f;
mp[0x3] = 0.0f;
mp[0x4] = 0.0f;
mp[0x5] = near/top;
mp[0x6] = 0.0f;
mp[0x7] = 0.0f;
mp[0x8] = 0.0f;
mp[0x9] = 0.0f;
mp[0xA] = -(far+near)/(far-near);
mp[0xB] = -2.0f*(far*near)/(far-near);
mp[0xC] = 0.0f;
mp[0xD] = 0.0f;
mp[0xE] = -1.0f;
mp[0xF] = 0.0f;
float mp2[4*4];
loadIdentity44(mp2);
mp2[0xA]=0.5;
mp2[0xB]=-0.5;
multMatrix44(mp2, mp, m);
}
vect3Df_s getMatrixColumn(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]);
}
vect3Df_s getMatrixRow(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]);
}
vect4Df_s getMatrixColumn4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]);
}
vect4Df_s getMatrixRow4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]);
}

View File

@@ -0,0 +1,144 @@
#ifndef MATH_H
#define MATH_H
#include <3ds/types.h>
#include <math.h>
typedef float mtx44[4][4];
typedef float mtx33[3][3];
typedef struct
{
s32 x, y, z;
}vect3Di_s;
static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z)
{
return (vect3Di_s){x,y,z};
}
static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Di_s vmuli(vect3Di_s v, s32 f)
{
return (vect3Di_s){v.x*f,v.y*f,v.z*f};
}
typedef struct
{
float x, y, z;
}vect3Df_s;
static inline vect3Df_s vect3Df(float x, float y, float z)
{
return (vect3Df_s){x,y,z};
}
static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Df_s vmulf(vect3Df_s v, float f)
{
return (vect3Df_s){v.x*f,v.y*f,v.z*f};
}
static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2)
{
return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z};
}
static inline float vmagf(vect3Df_s v)
{
return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
}
static inline float vdistf(vect3Df_s v1, vect3Df_s v2)
{
return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z));
}
static inline vect3Df_s vnormf(vect3Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
return (vect3Df_s){v.x/l,v.y/l,v.z/l};
}
typedef struct
{
float x, y, z, w;
}vect4Df_s;
static inline vect4Df_s vect4Df(float x, float y, float z, float w)
{
return (vect4Df_s){x,y,z,w};
}
static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w};
}
static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w};
}
static inline vect4Df_s vmulf4(vect4Df_s v, float f)
{
return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f};
}
static inline float vdotf4(vect4Df_s v1, vect4Df_s v2)
{
return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w;
}
static inline vect4Df_s vnormf4(vect4Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w);
return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l};
}
//interstuff
static inline vect3Di_s vf2i(vect3Df_s v)
{
return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)};
}
static inline vect3Df_s vi2f(vect3Di_s v)
{
return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z};
}
void loadIdentity44(float* m);
void multMatrix44(float* m1, float* m2, float* m);
void translateMatrix(float* tm, float x, float y, float z);
void rotateMatrixX(float* tm, float x, bool r);
void rotateMatrixY(float* tm, float x, bool r);
void rotateMatrixZ(float* tm, float x, bool r);
void scaleMatrix(float* tm, float x, float y, float z);
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far);
vect3Df_s getMatrixColumn(float* m, u8 i);
vect3Df_s getMatrixRow(float* m, u8 i);
vect4Df_s getMatrixColumn4(float* m, u8 i);
vect4Df_s getMatrixRow4(float* m, u8 i);
#endif

View File

@@ -0,0 +1,6 @@
cube lighting example
=====================
An example similar to cube, but with some rudimentary vertex lighting effects. The shader used is somewhat more complex and involves a LOOP to implement multiple light sources.
Before trying to compile, make sure your NIHSTRO environment variable points to the directory nihstro-assemble resides in. Additionally, ctrulib in revision 1f52ac344d or similar is required, plus some patches to implement proper uniform setters.

View File

@@ -0,0 +1,66 @@
// setup constants
.alias myconst c32 as (1.0, 0.0, 0.5, 1.0)
// setup output map
.alias outpos o0 as position
.alias outcol o1 as color
.alias outtex0 o2.xyzw as texcoord0 // Would like to use .xy instead, but this is not supported by ctrulib currently
.alias outtex1 o3.xyzw as texcoord1
.alias outtex2 o4.xyzw as texcoord2
// setup uniform map, for use with SHDR_GetUniformRegister
.alias projection c0-c3
.alias modelview c4-c7
.alias num_lights i1
.alias light_dir c8
.alias light_diffuse c9
.alias light_ambient c10
.alias light_dir2 c11
.alias light_diffuse2 c12
.alias light_ambient2 c13
main:
mov r1.xyz, v0.xyz
mov r1.w, myconst.w
mdvl: // tempreg = mdlvMtx * in.pos
dp4 r0.x, modelview[0], r1
dp4 r0.y, modelview[1], r1
dp4 r0.z, modelview[2], r1
mov r0.w, myconst.w
proj: // result.pos = projMtx * tempreg
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
tex: // result.texcoord = in.texcoord
mov outtex0, v1.xyzw
mov outtex1, myconst.yyyw
mov outtex2, myconst.yyyw
lighting: // color = sum over all lights(diffuse * clamp(dot(L,N),0) + ambient)
mov r0, myconst.yyyw
loop num_lights
mov r1.xyz, myconst.yyy
dp3 r1.xyz, light_dir[lcnt].xyz, v2.xyz
max r1.xyz, r1.xyz, myconst.yyy
mul r1.xyz, r1.xyz, light_diffuse[lcnt].xyz
add r1.xyz, r1.xyz, light_ambient[lcnt].xyz
add r0.xyz, r1.xyz, r0.xyz
nop
endloop
min r0.xyz, r0.xyz, myconst.xxx
mov outcol, r0
nop
end
endmain:

Binary file not shown.

View File

@@ -0,0 +1,16 @@
.section ".text"
.arm
.align 4
.global _vboMemcpy50
# r0 : dst
# r1 : src
# fixed size 0x50
_vboMemcpy50:
push {r4-r11}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
pop {r4-r11}
bx lr

View File

@@ -0,0 +1,432 @@
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <3ds.h>
#include "gs.h"
#include "math.h"
#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4)
static void gsInitMatrixStack();
Handle linearAllocMutex;
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES];
typedef struct
{
u32 offset;
mtx44 data;
}bufferMatrix_s;
bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE];
int bufferMatrixListLength;
//----------------------
// GS SYSTEM STUFF
//----------------------
void initBufferMatrixList()
{
bufferMatrixListLength=0;
}
void gsInit(shaderProgram_s* shader)
{
gsInitMatrixStack();
initBufferMatrixList();
svcCreateMutex(&linearAllocMutex, false);
if(shader)
{
gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection");
gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview");
shaderProgramUse(shader);
}
}
void gsExit(void)
{
svcCloseHandle(linearAllocMutex);
}
void gsStartFrame(void)
{
GPUCMD_SetBufferOffset(0);
initBufferMatrixList();
}
void* gsLinearAlloc(size_t size)
{
void* ret=NULL;
svcWaitSynchronization(linearAllocMutex, U64_MAX);
ret=linearAlloc(size);
svcReleaseMutex(linearAllocMutex);
return ret;
}
void gsLinearFree(void* mem)
{
svcWaitSynchronization(linearAllocMutex, U64_MAX);
linearFree(mem);
svcReleaseMutex(linearAllocMutex);
}
//----------------------
// MATRIX STACK STUFF
//----------------------
static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE];
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04};
static u8 gsMatrixStackOffsets[GS_MATRIXTYPES];
static bool gsMatrixStackUpdated[GS_MATRIXTYPES];
static GS_MATRIX gsCurrentMatrixType;
static void gsInitMatrixStack()
{
int i;
for(i=0; i<GS_MATRIXTYPES; i++)
{
gsMatrixStackOffsets[i]=0;
gsMatrixStackUpdated[i]=true;
loadIdentity44((float*)gsMatrixStacks[i][0]);
}
gsCurrentMatrixType=GS_PROJECTION;
}
float* gsGetMatrix(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return NULL;
return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]];
}
int gsLoadMatrix(GS_MATRIX m, float* data)
{
if(m<0 || m>=GS_MATRIXTYPES || !data)return -1;
memcpy(gsGetMatrix(m), data, sizeof(mtx44));
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsPushMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1;
float* cur=gsGetMatrix(m);
gsMatrixStackOffsets[m]++;
memcpy(gsGetMatrix(m), cur, sizeof(mtx44));
return 0;
}
int gsPopMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1;
gsMatrixStackOffsets[m]--;
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsMatrixMode(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return -1;
gsCurrentMatrixType=m;
return 0;
}
//------------------------
// MATRIX TRANSFORM STUFF
//------------------------
int gsMultMatrix(float* data)
{
if(!data)return -1;
mtx44 tmp;
multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp);
memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
return 0;
}
void gsLoadIdentity()
{
loadIdentity44(gsGetMatrix(gsCurrentMatrixType));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsProjectionMatrix(float fovy, float aspect, float near, float far)
{
initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateX(float x)
{
rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateY(float y)
{
rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateZ(float z)
{
rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsScale(float x, float y, float z)
{
scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsTranslate(float x, float y, float z)
{
translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
//----------------------
// MATRIX RENDER STUFF
//----------------------
static void gsSetUniformMatrix(u32 startreg, float* m)
{
float param[16];
param[0x0]=m[3]; //w
param[0x1]=m[2]; //z
param[0x2]=m[1]; //y
param[0x3]=m[0]; //x
param[0x4]=m[7];
param[0x5]=m[6];
param[0x6]=m[5];
param[0x7]=m[4];
param[0x8]=m[11];
param[0x9]=m[10];
param[0xa]=m[9];
param[0xb]=m[8];
param[0xc]=m[15];
param[0xd]=m[14];
param[0xe]=m[13];
param[0xf]=m[12];
GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4);
}
static int gsUpdateTransformation()
{
GS_MATRIX m;
for(m=0; m<GS_MATRIXTYPES; m++)
{
if(gsMatrixStackUpdated[m])
{
if(m==GS_PROJECTION && bufferMatrixListLength<BUFFERMATRIXLIST_SIZE)
{
GPUCMD_GetBuffer(NULL, NULL, &bufferMatrixList[bufferMatrixListLength].offset);
memcpy(bufferMatrixList[bufferMatrixListLength].data, gsGetMatrix(m), sizeof(mtx44));
bufferMatrixListLength++;
}
gsSetUniformMatrix(gsMatrixStackRegisters[m], gsGetMatrix(m));
gsMatrixStackUpdated[m]=false;
}
}
return 0;
}
void gsAdjustBufferMatrices(mtx44 transformation)
{
int i;
u32* buffer;
u32 offset;
GPUCMD_GetBuffer(&buffer, NULL, &offset);
for(i=0; i<bufferMatrixListLength; i++)
{
u32 o=bufferMatrixList[i].offset;
if(o+2<offset) //TODO : better check, need to account for param size
{
mtx44 newMatrix;
GPUCMD_SetBufferOffset(o);
multMatrix44((float*)bufferMatrixList[i].data, (float*)transformation, (float*)newMatrix);
gsSetUniformMatrix(gsMatrixStackRegisters[GS_PROJECTION], (float*)newMatrix);
}
}
GPUCMD_SetBufferOffset(offset);
}
//----------------------
// VBO STUFF
//----------------------
int gsVboInit(gsVbo_s* vbo)
{
if(!vbo)return -1;
vbo->data=NULL;
vbo->currentSize=0;
vbo->maxSize=0;
vbo->commands=NULL;
vbo->commandsSize=0;
return 0;
}
int gsVboCreate(gsVbo_s* vbo, u32 size)
{
if(!vbo)return -1;
vbo->data=gsLinearAlloc(size);
vbo->numVertices=0;
vbo->currentSize=0;
vbo->maxSize=size;
return 0;
}
void* gsVboGetOffset(gsVbo_s* vbo)
{
if(!vbo)return NULL;
return (void*)(&((u8*)vbo->data)[vbo->currentSize]);
}
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units)
{
if(!vbo || !data || !size)return -1;
if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1;
memcpy(gsVboGetOffset(vbo), data, size);
vbo->currentSize+=size;
vbo->numVertices+=units;
return 0;
}
int gsVboFlushData(gsVbo_s* vbo)
{
if(!vbo)return -1;
//unnecessary if we use flushAndRun
// GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize);
return 0;
}
int gsVboDestroy(gsVbo_s* vbo)
{
if(!vbo)return -1;
if(vbo->commands)free(vbo->commands);
if(vbo->data)gsLinearFree(vbo->data);
gsVboInit(vbo);
return 0;
}
extern u32 debugValue[];
void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n)
{
//set attribute buffer address
GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3);
//set primitive type
GPUCMD_AddSingleParam(0x0002025E, primitive);
GPUCMD_AddSingleParam(0x0002025F, 0x00000001);
//index buffer not used for drawArrays but 0x000F0227 still required
GPUCMD_AddSingleParam(0x000F0227, 0x80000000);
//pass number of vertices
GPUCMD_AddSingleParam(0x000F0228, n);
GPUCMD_AddSingleParam(0x00010253, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000000);
GPUCMD_AddSingleParam(0x000F022E, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000001);
GPUCMD_AddSingleParam(0x000F0231, 0x00000001);
// GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff
}
//not thread safe
int gsVboPrecomputeCommands(gsVbo_s* vbo)
{
if(!vbo || vbo->commands)return -1;
static u32 tmpBuffer[128];
u32* savedAdr; u32 savedSize, savedOffset;
GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset);
GPUCMD_SetBuffer(tmpBuffer, 128, 0);
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize);
vbo->commands=memalign(0x4, vbo->commandsSize*4);
if(!vbo->commands)return -1;
memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4);
GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset);
return 0;
}
extern u32* gpuCmdBuf;
extern u32 gpuCmdBufSize;
extern u32 gpuCmdBufOffset;
void _vboMemcpy50(u32* dst, u32* src);
void _GPUCMD_AddRawCommands(u32* cmd, u32 size)
{
if(!cmd || !size)return;
if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd);
else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
gpuCmdBufOffset+=size;
}
int gsVboDraw(gsVbo_s* vbo)
{
if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1;
gsUpdateTransformation();
gsVboPrecomputeCommands(vbo);
// u64 val=svcGetSystemTick();
if(vbo->commands)
{
_GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize);
}else{
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
}
// debugValue[5]+=(u32)(svcGetSystemTick()-val);
// debugValue[6]++;
return 0;
}

View File

@@ -0,0 +1,59 @@
#ifndef GS_H
#define GS_H
#include <3ds.h>
#include "math.h"
#define GS_MATRIXSTACK_SIZE (8)
typedef enum
{
GS_PROJECTION = 0,
GS_MODELVIEW = 1,
GS_MATRIXTYPES
}GS_MATRIX;
typedef struct
{
u8* data;
u32 currentSize; // in bytes
u32 maxSize; // in bytes
u32 numVertices;
u32* commands;
u32 commandsSize;
}gsVbo_s;
void gsInit(shaderProgram_s* shader);
void gsExit(void);
void gsStartFrame(void);
void gsAdjustBufferMatrices(mtx44 transformation);
void* gsLinearAlloc(size_t size);
void gsLinearFree(void* mem);
float* gsGetMatrix(GS_MATRIX m);
int gsLoadMatrix(GS_MATRIX m, float* data);
int gsPushMatrix();
int gsPopMatrix();
int gsMatrixMode(GS_MATRIX m);
void gsLoadIdentity();
void gsProjectionMatrix(float fovy, float aspect, float near, float far);
void gsRotateX(float x);
void gsRotateY(float y);
void gsRotateZ(float z);
void gsScale(float x, float y, float z);
void gsTranslate(float x, float y, float z);
int gsMultMatrix(float* data);
int gsVboInit(gsVbo_s* vbo);
int gsVboCreate(gsVbo_s* vbo, u32 size);
int gsVboFlushData(gsVbo_s* vbo);
int gsVboDestroy(gsVbo_s* vbo);
int gsVboDraw(gsVbo_s* vbo);
void* gsVboGetOffset(gsVbo_s* vbo);
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units);
#endif

View File

@@ -0,0 +1,379 @@
///////////////////////////////////////
// GPU example //
///////////////////////////////////////
//this example is meant to show how to use the GPU to render a 3D object
//it also shows how to do stereoscopic 3D
//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft
//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <3ds.h>
#include "math.h"
#include "gs.h"
#include "test_vsh_shbin.h"
#include "texture_bin.h"
//will be moved into ctrulib at some point
#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080)
#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0))
//transfer from GPU output buffer to actual framebuffer flags
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X))
//shader structure
DVLB_s* dvlb;
shaderProgram_s shader;
//texture data pointer
u32* texData;
//vbo structure
gsVbo_s vbo;
//GPU framebuffer address
u32* gpuOut=(u32*)0x1F119400;
//GPU depth buffer address
u32* gpuDOut=(u32*)0x1F370800;
//angle for the vertex lighting (cf test.vsh)
float lightAngle;
//object position and rotation angle
vect3Df_s position, angle;
//vertex structure
typedef struct
{
vect3Df_s position;
float texcoord[2];
vect3Df_s normal;
}vertex_s;
//object data (cube)
//obviously this doesn't have to be defined manually, but we will here for the purposes of the example
//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z}
//we're drawing triangles so three lines = one triangle
const vertex_s modelVboData[]=
{
//first face (PZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second face (MZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//third face (PX)
//first triangle
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//fourth face (MX)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//fifth face (PY)
//first triangle
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//sixth face (MY)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
};
//stolen from staplebutt
void GPU_SetDummyTexEnv(u8 num)
{
GPU_SetTexEnv(num,
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_REPLACE,
GPU_REPLACE,
0xFFFFFFFF);
}
// topscreen
void renderFrame()
{
GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
GPU_DepthMap(-1.0f, 0.0f);
GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
GPU_SetBlendingColor(0,0,0,0);
GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
GPUCMD_AddWrite(GPUREG_0118, 0);
//lighting stuff
static double lightAngle2 = 0;
lightAngle2 += 0.03;
static double lightAngle3 = 0;
lightAngle3 += 0.1;
vect3Df_s lightDir[3] = { vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle))),
vnormf(vect3Df(cos(lightAngle2), -1.0f, sin(lightAngle2))),
vnormf(vect3Df(cos(lightAngle3*2), cos(lightAngle3), sin(lightAngle3))) };
unsigned num_lights = 3;
unsigned light_size = 3;
uint32_t val = ((num_lights-1u))|(0<<8)|(light_size<<16u);
// Set int uniforms
GPUCMD_AddWrite(GPUREG_GSH_INTUNIFORM_I1, val);
GPUCMD_AddWrite(GPUREG_VSH_INTUNIFORM_I1, val);
GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
GPU_SetTextureEnable(GPU_TEXUNIT0);
GPU_SetTexEnv(0,
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_MODULATE, GPU_MODULATE,
0xFFFFFFFF);
GPU_SetDummyTexEnv(1);
GPU_SetDummyTexEnv(2);
GPU_SetDummyTexEnv(3);
GPU_SetDummyTexEnv(4);
GPU_SetDummyTexEnv(5);
//setup lighting (this is specific to our shader)
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "light_dir"), (u32*)(float[]){0.0f, -lightDir[0].z, -lightDir[0].y, -lightDir[0].x}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "light_diffuse"), (u32*)(float[]){0.2f, 0.2f, 0.2f, 0.2f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "light_ambient"), (u32*)(float[]){0.4f, 0.4f, 0.4f, 0.4f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size + shaderInstanceGetUniformLocation(shader.vertexShader, "light_dir"), (u32*)(float[]){0.0f, -lightDir[1].z, -lightDir[1].y, -lightDir[1].x}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size + shaderInstanceGetUniformLocation(shader.vertexShader, "light_diffuse"), (u32*)(float[]){0.f, 0.f, 0.5f, 0.f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size + shaderInstanceGetUniformLocation(shader.vertexShader, "light_ambient"), (u32*)(float[]){0.f, 0.f, 0.f, 0.f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size*2 + shaderInstanceGetUniformLocation(shader.vertexShader, "light_dir"), (u32*)(float[]){0.0f, -lightDir[2].z, -lightDir[2].y, -lightDir[2].x}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size*2 + shaderInstanceGetUniformLocation(shader.vertexShader, "light_diffuse"), (u32*)(float[]){0.0f, 0.5f, 0.f, 0.f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, light_size*2 + shaderInstanceGetUniformLocation(shader.vertexShader, "light_ambient"), (u32*)(float[]){0.f, 0.f, 0.f, 0.f}, 1);
//texturing stuff
GPU_SetTexture(
GPU_TEXUNIT0, //texture unit
(u32*)osConvertVirtToPhys((u32)texData), //data buffer
128, //texture width
128, //texture height
GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params
GPU_RGBA8 //texture pixel format
);
GPU_SetAttributeBuffers(
3, //3 attributes: vertices, texcoords, and normals
(u32*)osConvertVirtToPhys((u32)texData), //mesh buffer
GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position)
GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord)
GPU_ATTRIBFMT(2, 3, GPU_FLOAT), // GPU Input attribute register 2 (v2): 3 floats (normal)
0xFFC,
0x210,
1,
(u32[]){0x00000000},
(u64[]){0x210},
(u8[]){3}
);
//initialize projection matrix to standard perspective stuff
gsMatrixMode(GS_PROJECTION);
gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f);
gsRotateZ(M_PI/2); //because framebuffer is sideways...
//draw object
gsMatrixMode(GS_MODELVIEW);
gsPushMatrix();
gsTranslate(position.x, position.y, position.z);
gsRotateX(angle.x);
gsRotateY(angle.y);
gsVboDraw(&vbo);
gsPopMatrix();
GPU_FinishDrawing();
}
int main(int argc, char** argv)
{
gfxInitDefault();
//initialize GPU
GPU_Init(NULL);
//let GFX know we're ok with doing stereoscopic 3D rendering
gfxSet3D(true);
//allocate our GPU command buffers
//they *have* to be on the linear heap
u32 gpuCmdSize=0x40000;
u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4);
u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4);
//actually reset the GPU
GPU_Reset(NULL, gpuCmd, gpuCmdSize);
//load our vertex shader binary
dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size);
shaderProgramInit(&shader);
shaderProgramSetVsh(&shader, &dvlb->DVLE[0]);
//initialize GS
gsInit(&shader);
// Flush the command buffer so that the shader upload gets executed
GPUCMD_Finalize();
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//create texture
texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned
memcpy(texData, texture_bin, texture_bin_size);
//create VBO
gsVboInit(&vbo);
gsVboCreate(&vbo, sizeof(modelVboData));
gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s));
gsVboFlushData(&vbo);
//initialize object position and angle
position=vect3Df(0.0f, 0.0f, -2.0f);
angle=vect3Df(M_PI/4, M_PI/4, 0.0f);
//background color (blue)
u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF);
while(aptMainLoop())
{
//get current 3D slider state
float slider=CONFIG_3D_SLIDERSTATE;
//controls
hidScanInput();
//START to exit to hbmenu
if(keysDown()&KEY_START)break;
//A/B to change vertex lighting angle
if(keysHeld()&KEY_A)lightAngle+=0.1f;
if(keysHeld()&KEY_B)lightAngle-=0.1f;
//D-PAD to rotate object
if(keysHeld()&KEY_DOWN)angle.x+=0.05f;
if(keysHeld()&KEY_UP)angle.x-=0.05f;
if(keysHeld()&KEY_LEFT)angle.y+=0.05f;
if(keysHeld()&KEY_RIGHT)angle.y-=0.05f;
//R/L to bring object closer to or move it further from the camera
if(keysHeld()&KEY_R)position.z+=0.1f;
if(keysHeld()&KEY_L)position.z-=0.1f;
//generate our GPU command buffer for this frame
gsStartFrame();
renderFrame();
GPUCMD_Finalize();
if(slider>0.0f)
{
//new and exciting 3D !
//make a copy of left gpu buffer
u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset);
memcpy(gpuCmdRight, gpuCmd, offset*4);
//setup interaxial
float interaxial=slider*0.12f;
//adjust left gpu buffer fo 3D !
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//draw left framebuffer
GPUCMD_FlushAndRun(NULL);
//while GPU starts drawing the left buffer, adjust right one for 3D !
GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset);
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//we wait for the left buffer to finish drawing
gspWaitForP3D();
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
//we draw the right buffer, wait for it to finish and then switch back to left one
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
//draw the right framebuffer
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//transfer from GPU output buffer to actual framebuffer
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
}else{
//boring old 2D !
//draw the frame
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//clear the screen
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
}
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
gfxSwapBuffersGpu();
gspWaitForEvent(GSPEVENT_VBlank0, true);
}
gsExit();
shaderProgramFree(&shader);
DVLB_Free(dvlb);
gfxExit();
return 0;
}

View File

@@ -0,0 +1,148 @@
#include <math.h>
#include <string.h>
#include "math.h"
void loadIdentity44(float* m)
{
if(!m)return;
memset(m, 0x00, 16*4);
m[0]=m[5]=m[10]=m[15]=1.0f;
}
void multMatrix44(float* m1, float* m2, float* m) //4x4
{
int i, j;
for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
}
void translateMatrix(float* tm, float x, float y, float z)
{
float rm[16], m[16];
loadIdentity44(rm);
rm[3]=x;
rm[7]=y;
rm[11]=z;
multMatrix44(tm,rm,m);
memcpy(tm,m,16*sizeof(float));
}
// 00 01 02 03
// 04 05 06 07
// 08 09 10 11
// 12 13 14 15
void rotateMatrixX(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=1.0f;
rm[5]=cos(x);
rm[6]=sin(x);
rm[9]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixY(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[2]=sin(x);
rm[5]=1.0f;
rm[8]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixZ(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[1]=sin(x);
rm[4]=-sin(x);
rm[5]=cos(x);
rm[10]=1.0f;
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void scaleMatrix(float* tm, float x, float y, float z)
{
tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x;
tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y;
tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z;
}
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far)
{
float top = near*tan(fovy/2);
float right = (top*aspect);
float mp[4*4];
mp[0x0] = near/right;
mp[0x1] = 0.0f;
mp[0x2] = 0.0f;
mp[0x3] = 0.0f;
mp[0x4] = 0.0f;
mp[0x5] = near/top;
mp[0x6] = 0.0f;
mp[0x7] = 0.0f;
mp[0x8] = 0.0f;
mp[0x9] = 0.0f;
mp[0xA] = -(far+near)/(far-near);
mp[0xB] = -2.0f*(far*near)/(far-near);
mp[0xC] = 0.0f;
mp[0xD] = 0.0f;
mp[0xE] = -1.0f;
mp[0xF] = 0.0f;
float mp2[4*4];
loadIdentity44(mp2);
mp2[0xA]=0.5;
mp2[0xB]=-0.5;
multMatrix44(mp2, mp, m);
}
vect3Df_s getMatrixColumn(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]);
}
vect3Df_s getMatrixRow(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]);
}
vect4Df_s getMatrixColumn4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]);
}
vect4Df_s getMatrixRow4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]);
}

View File

@@ -0,0 +1,144 @@
#ifndef MATH_H
#define MATH_H
#include <3ds/types.h>
#include <math.h>
typedef float mtx44[4][4];
typedef float mtx33[3][3];
typedef struct
{
s32 x, y, z;
}vect3Di_s;
static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z)
{
return (vect3Di_s){x,y,z};
}
static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Di_s vmuli(vect3Di_s v, s32 f)
{
return (vect3Di_s){v.x*f,v.y*f,v.z*f};
}
typedef struct
{
float x, y, z;
}vect3Df_s;
static inline vect3Df_s vect3Df(float x, float y, float z)
{
return (vect3Df_s){x,y,z};
}
static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Df_s vmulf(vect3Df_s v, float f)
{
return (vect3Df_s){v.x*f,v.y*f,v.z*f};
}
static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2)
{
return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z};
}
static inline float vmagf(vect3Df_s v)
{
return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
}
static inline float vdistf(vect3Df_s v1, vect3Df_s v2)
{
return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z));
}
static inline vect3Df_s vnormf(vect3Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
return (vect3Df_s){v.x/l,v.y/l,v.z/l};
}
typedef struct
{
float x, y, z, w;
}vect4Df_s;
static inline vect4Df_s vect4Df(float x, float y, float z, float w)
{
return (vect4Df_s){x,y,z,w};
}
static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w};
}
static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w};
}
static inline vect4Df_s vmulf4(vect4Df_s v, float f)
{
return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f};
}
static inline float vdotf4(vect4Df_s v1, vect4Df_s v2)
{
return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w;
}
static inline vect4Df_s vnormf4(vect4Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w);
return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l};
}
//interstuff
static inline vect3Di_s vf2i(vect3Df_s v)
{
return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)};
}
static inline vect3Df_s vi2f(vect3Di_s v)
{
return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z};
}
void loadIdentity44(float* m);
void multMatrix44(float* m1, float* m2, float* m);
void translateMatrix(float* tm, float x, float y, float z);
void rotateMatrixX(float* tm, float x, bool r);
void rotateMatrixY(float* tm, float x, bool r);
void rotateMatrixZ(float* tm, float x, bool r);
void scaleMatrix(float* tm, float x, float y, float z);
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far);
vect3Df_s getMatrixColumn(float* m, u8 i);
vect3Df_s getMatrixRow(float* m, u8 i);
vect4Df_s getMatrixColumn4(float* m, u8 i);
vect4Df_s getMatrixRow4(float* m, u8 i);
#endif

View File

@@ -0,0 +1 @@
add_executable(simple simple.cpp)

View File

@@ -0,0 +1,113 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <fstream>
#include <iostream>
#include <iterator>
#include "nihstro/inline_assembly.h"
using namespace nihstro;
static const auto in_pos = SourceRegister::MakeInput(0);
static const auto in_tex = SourceRegister::MakeInput(1);
static const auto in_norm = SourceRegister::MakeInput(2);
static const auto backup_pos = SourceRegister::MakeTemporary(1);
static const auto temp_pos = SourceRegister::MakeTemporary(0);
static const auto constant = SourceRegister::MakeFloat(20);
static const SourceRegister projection[4] = { SourceRegister::MakeFloat(0), SourceRegister::MakeFloat(1), SourceRegister::MakeFloat(2), SourceRegister::MakeFloat(3) };
static const SourceRegister modelview[4] = { SourceRegister::MakeFloat(4), SourceRegister::MakeFloat(5), SourceRegister::MakeFloat(6), SourceRegister::MakeFloat(7) };
static const auto light_direction = SourceRegister::MakeFloat(8);
static const auto light_ambient = SourceRegister::MakeFloat(9);
static const DestRegister out_pos = DestRegister::MakeOutput(0);
static const DestRegister out_col = DestRegister::MakeOutput(1);
static const DestRegister out_tex0 = DestRegister::MakeOutput(2);
static const DestRegister out_tex1 = DestRegister::MakeOutput(3);
static const DestRegister out_tex2 = DestRegister::MakeOutput(4);
const auto shbin = InlineAsm::CompileToShbin({
// TODO: Declare output names
// TODO: Declare constant
// TODO: Declare uniform names
// TODO: Explicitly set entry point
InlineAsm::DeclareOutput(out_pos, OutputRegisterInfo::POSITION),
InlineAsm::DeclareOutput(out_col, OutputRegisterInfo::COLOR),
InlineAsm::DeclareOutput(out_tex0, OutputRegisterInfo::TEXCOORD0),
InlineAsm::DeclareOutput(out_tex1, OutputRegisterInfo::TEXCOORD1),
InlineAsm::DeclareOutput(out_tex2, OutputRegisterInfo::TEXCOORD2),
InlineAsm::DeclareConstant(constant, 1.0, 0.0, 0.5, 1.0),
InlineAsm::DeclareUniform(projection[0], projection[3], "projection"),
InlineAsm::DeclareUniform(modelview[0], modelview[3], "modelview"),
InlineAsm::DeclareUniform(light_direction, light_direction, "lightDirection"),
InlineAsm::DeclareUniform(light_ambient, light_ambient, "lightAmbient"),
{ OpCode::Id::MOV, backup_pos, "xyz", in_pos, "xyz" },
{ OpCode::Id::MOV, backup_pos, "w", constant, "xyzw" }, // TODO: Would like to just specify "w" here! // TODO: Somehow, c4 gets written instead...
{ OpCode::Id::DP4, temp_pos, "x", modelview[0], backup_pos },
{ OpCode::Id::DP4, temp_pos, "y", modelview[1], backup_pos },
{ OpCode::Id::DP4, temp_pos, "z", modelview[2], backup_pos },
{ OpCode::Id::MOV, temp_pos, "w", constant, "xyzw" }, // TODO: Would like to just specify "w" here!
{ OpCode::Id::DP4, out_pos, "x", projection[0], temp_pos },
{ OpCode::Id::DP4, out_pos, "y", projection[1], temp_pos },
{ OpCode::Id::DP4, out_pos, "z", projection[2], temp_pos },
{ OpCode::Id::DP4, out_pos, "w", projection[3], temp_pos },
{ OpCode::Id::MOV, out_tex0, in_tex },
{ OpCode::Id::MOV, out_tex1, constant, "yyyw" },
{ OpCode::Id::MOV, out_tex2, constant, "yyyw" },
{ OpCode::Id::DP3, temp_pos, "xyz", light_direction, in_norm },
{ OpCode::Id::MAX, temp_pos, "xyz", constant, "yyy", temp_pos },
{ OpCode::Id::MUL, temp_pos, "xyz", light_ambient, "www", temp_pos },
{ OpCode::Id::ADD, out_col, "xyz", light_ambient, temp_pos },
{ OpCode::Id::MOV, out_col, "w", constant, "xyzw" }, // TODO: Would like to just specify "w" here!
{ OpCode::Id::NOP },
{ OpCode::Id::END }
});
int main(int argc, char* argv[])
{
if (argc < 2) {
std::cout << "Error: No filename given" << std::endl;
return 0;
}
std::ofstream file(argv[1], std::ios::binary);
std::copy(shbin.begin(), shbin.end(), std::ostream_iterator<uint8_t>(file));
std::cout << "Successfully compiled shader to " << argv[1] << "!" << std::endl;
return 0;
}

View File

@@ -0,0 +1,249 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <limits>
#include <type_traits>
#ifndef __forceinline
#ifndef _WIN32
#define __forceinline inline __attribute__((always_inline))
#endif
#endif
namespace nihstro {
/*
* Abstract bitfield class
*
* Allows endianness-independent access to individual bitfields within some raw
* integer value. The assembly generated by this class is identical to the
* usage of raw bitfields, so it's a perfectly fine replacement.
*
* For BitField<X,Y,Z>, X is the distance of the bitfield to the LSB of the
* raw value, Y is the length in bits of the bitfield. Z is an integer type
* which determines the sign of the bitfield. Z must have the same size as the
* raw integer.
*
*
* General usage:
*
* Create a new union with the raw integer value as a member.
* Then for each bitfield you want to expose, add a BitField member
* in the union. The template parameters are the bit offset and the number
* of desired bits.
*
* Changes in the bitfield members will then get reflected in the raw integer
* value and vice-versa.
*
*
* Sample usage:
*
* union SomeRegister
* {
* u32 hex;
*
* BitField<0,7,u32> first_seven_bits; // unsigned
* BitField<7,8,u32> next_eight_bits; // unsigned
* BitField<3,15,s32> some_signed_fields; // signed
* };
*
* This is equivalent to the little-endian specific code:
*
* union SomeRegister
* {
* u32 hex;
*
* struct
* {
* u32 first_seven_bits : 7;
* u32 next_eight_bits : 8;
* };
* struct
* {
* u32 : 3; // padding
* s32 some_signed_fields : 15;
* };
* };
*
*
* Caveats:
*
* 1)
* BitField provides automatic casting from and to the storage type where
* appropriate. However, when using non-typesafe functions like printf, an
* explicit cast must be performed on the BitField object to make sure it gets
* passed correctly, e.g.:
* printf("Value: %d", (s32)some_register.some_signed_fields);
*
* 2)
* Not really a caveat, but potentially irritating: This class is used in some
* packed structures that do not guarantee proper alignment. Therefore we have
* to use #pragma pack here not to pack the members of the class, but instead
* to break GCC's assumption that the members of the class are aligned on
* sizeof(StorageType).
* TODO(neobrain): Confirm that this is a proper fix and not just masking
* symptoms.
*/
#pragma pack(1)
template<std::size_t position, std::size_t bits, typename T>
struct BitField
{
private:
// This constructor might be considered ambiguous:
// Would it initialize the storage or just the bitfield?
// Hence, delete it. Use the assignment operator to set bitfield values!
BitField(T val) = delete;
public:
// Force default constructor to be created
// so that we can use this within unions
BitField() = default;
#ifndef _WIN32
// We explicitly delete the copy assigment operator here, because the
// default copy assignment would copy the full storage value, rather than
// just the bits relevant to this particular bit field.
// Ideally, we would just implement the copy assignment to copy only the
// relevant bits, but this requires compiler support for unrestricted
// unions.
// MSVC 2013 has no support for this, hence we disable this code on
// Windows (so that the default copy assignment operator will be used).
// For any C++11 conformant compiler we delete the operator to make sure
// we never use this inappropriate operator to begin with.
// TODO: Implement this operator properly once all target compilers
// support unrestricted unions.
// TODO: Actually, deleting and overriding this operator both cause more
// harm than anything. Instead, it's suggested to never use the copy
// constructor directly but instead invoke Assign() explicitly.
// BitField& operator=(const BitField&) = delete;
#endif
__forceinline BitField& operator=(T val)
{
Assign(val);
return *this;
}
__forceinline operator typename std::add_const<T>::type() const
{
return Value();
}
__forceinline void Assign(const T& value) {
storage = (storage & ~GetMask()) | ((((StorageType)value) << position) & GetMask());
}
__forceinline typename std::add_const<T>::type Value() const
{
if (std::numeric_limits<T>::is_signed)
{
std::size_t shift = 8 * sizeof(T)-bits;
return (T)(((storage & GetMask()) << (shift - position)) >> shift);
}
else
{
return (T)((storage & GetMask()) >> position);
}
}
static size_t NumBits() {
return bits;
}
private:
// StorageType is T for non-enum types and the underlying type of T if
// T is an enumeration. Note that T is wrapped within an enable_if in the
// former case to workaround compile errors which arise when using
// std::underlying_type<T>::type directly.
typedef typename std::conditional < std::is_enum<T>::value,
std::underlying_type<T>,
std::enable_if < true, T >> ::type::type StorageType;
// Unsigned version of StorageType
typedef typename std::make_unsigned<StorageType>::type StorageTypeU;
__forceinline StorageType GetMask() const
{
return ((~(StorageTypeU)0) >> (8 * sizeof(T)-bits)) << position;
}
StorageType storage;
static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range");
// And, you know, just in case people specify something stupid like bits=position=0x80000000
static_assert(position < 8 * sizeof(T), "Invalid position");
static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
static_assert(bits > 0, "Invalid number of bits");
static_assert(std::is_standard_layout<T>::value, "Invalid base type");
};
/**
* Abstract bit flag class. This is basically a specialization of BitField for single-bit fields.
* Instead of being cast to the underlying type, it acts like a boolean.
*/
template<std::size_t position, typename T>
struct BitFlag : protected BitField<position, 1, T>
{
private:
BitFlag(T val) = delete;
typedef BitField<position, 1, T> ParentType;
public:
BitFlag() = default;
#ifndef _WIN32
BitFlag& operator=(const BitFlag&) = delete;
#endif
__forceinline BitFlag& operator=(bool val)
{
Assign(val);
return *this;
}
__forceinline operator bool() const
{
return Value();
}
__forceinline void Assign(bool value) {
ParentType::Assign(value);
}
__forceinline bool Value() const
{
return ParentType::Value() != 0;
}
};
#pragma pack()
} // namespace

View File

@@ -0,0 +1,48 @@
#pragma once
#include <cstdint>
#include <limits>
#include "bit_field.h"
namespace nihstro {
inline uint32_t to_float24(float val) {
static_assert(std::numeric_limits<float>::is_iec559, "Compiler does not adhere to IEEE 754");
union Float32 {
BitField< 0, 23, uint32_t> mant;
BitField<23, 8, uint32_t> biased_exp;
BitField<31, 1, uint32_t> sign;
static int ExponentBias() {
return 127;
}
} f32 = reinterpret_cast<Float32&>(val);
union Float24 {
uint32_t hex;
BitField< 0, 16, uint32_t> mant;
BitField<16, 7, uint32_t> biased_exp;
BitField<23, 1, uint32_t> sign;
static int ExponentBias() {
return 63;
}
} f24 = { 0 };
int biased_exp = (int)f32.biased_exp - Float32::ExponentBias() + Float24::ExponentBias();
unsigned mant = (biased_exp >= 0) ? (f32.mant >> (f32.mant.NumBits() - f24.mant.NumBits())) : 0;
if (biased_exp >= (1 << f24.biased_exp.NumBits())) {
// TODO: Return +inf or -inf
}
f24.biased_exp = std::max(0, biased_exp);
f24.mant = mant;
f24.sign = f32.sign.Value();
return f24.hex;
}
} // namespace

View File

@@ -0,0 +1,577 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <algorithm>
#include <array>
#include <initializer_list>
#include <vector>
#include "bit_field.h"
#include "float24.h"
#include "shader_binary.h"
#include "shader_bytecode.h"
namespace nihstro {
struct ShaderBinary {
std::vector<Instruction> program;
std::vector<SwizzlePattern> swizzle_table;
std::vector<OutputRegisterInfo> output_table;
std::vector<ConstantInfo> constant_table;
std::vector<char> symbol_table;
std::vector<UniformInfo> uniform_table;
};
struct DestRegisterOrTemporary : public DestRegister {
DestRegisterOrTemporary(const DestRegister& oth) : DestRegister(oth) {}
DestRegisterOrTemporary(const SourceRegister& oth) : DestRegister(DestRegister::FromTypeAndIndex(oth.GetRegisterType(), oth.GetIndex())) {
if (oth.GetRegisterType() != RegisterType::Temporary)
throw "Invalid source register used as output";
}
};
struct InlineAsm {
enum RelativeAddress {
None,
A1,
A2,
AL
};
struct DestMask {
DestMask(const std::string& mask) {
static const std::map<std::string,uint32_t> valid_masks {
{ "x", 8 }, { "y", 4 }, { "z", 2 }, { "w", 1 },
{ "xy", 12 }, { "xz", 10 }, { "xw", 9 },
{ "yz", 6 }, { "yw", 5 }, { "zw", 3 },
{ "xyz", 14 }, { "xyw", 13 }, { "xzw", 11 }, { "yzw", 7 },
{ "xyzw", 15 }, { "", 15 }
};
dest_mask = valid_masks.at(mask);
}
DestMask(const char* mask) : DestMask(std::string(mask)) {}
DestMask(const DestMask&) = default;
uint32_t dest_mask;
};
struct SwizzleMask {
SwizzleMask(const std::string& swizzle) : negate(false) {
selectors[0] = SwizzlePattern::Selector::x;
selectors[1] = SwizzlePattern::Selector::y;
selectors[2] = SwizzlePattern::Selector::z;
selectors[3] = SwizzlePattern::Selector::w;
if (swizzle.length() == 0)
return;
if (swizzle.length() > 5) {
throw "Invalid swizzle mask";
}
int index = 0;
if (swizzle[index] == '-') {
negate = true;
} else if (swizzle[index] == '+') {
index++;
}
for (int i = 0; i < 4; ++i) {
if (swizzle.length() <= index + i)
return;
switch (swizzle[index + i]) {
case 'x': selectors[i] = SwizzlePattern::Selector::x; break;
case 'y': selectors[i] = SwizzlePattern::Selector::y; break;
case 'z': selectors[i] = SwizzlePattern::Selector::z; break;
case 'w': selectors[i] = SwizzlePattern::Selector::w; break;
default:
throw "Invalid swizzle mask";
}
}
}
SwizzleMask(const char* swizzle) : SwizzleMask(std::string(swizzle)) {}
SwizzleMask(const SwizzleMask&) = default;
SwizzlePattern::Selector selectors[4];
bool negate;
};
enum Type {
Regular,
Output,
Constant,
Uniform,
Else,
EndIf,
EndLoop,
Label
} type;
InlineAsm(Type type) : type(type) {
}
InlineAsm(OpCode opcode) : type(Regular) {
if (opcode.GetInfo().type != OpCode::Type::Trivial) {
throw "Invalid opcode used with zero arguments";
}
full_instruction.instr.opcode = opcode;
}
InlineAsm(OpCode opcode, int src) : type(Regular) {
switch (opcode.EffectiveOpCode()) {
case OpCode::Id::LOOP:
// if (src.GetRegisterType() != RegisterType::IntUniform)
// throw "LOOP argument must be an integer register!";
reg_id = src;
full_instruction.instr.hex = 0;
full_instruction.instr.opcode = opcode;
break;
default:
throw "Unknown opcode argument";
}
}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest, const DestMask& dest_mask,
SourceRegister src1, const SwizzleMask swizzle_src1 = SwizzleMask{""}) : type(Regular) {
Instruction& instr = full_instruction.instr;
instr.hex = 0;
instr.opcode = opcode;
SwizzlePattern& swizzle = full_instruction.swizzle;
swizzle.hex = 0;
switch(opcode.GetInfo().type) {
case OpCode::Type::Arithmetic:
// TODO: Assert valid inputs, considering the field width!
instr.common.dest = dest;
instr.common.src1 = src1;
swizzle.negate_src1 = swizzle_src1.negate;
swizzle.dest_mask = dest_mask.dest_mask;
for (int i = 0; i < 4; ++i) {
swizzle.SetSelectorSrc1(i, swizzle_src1.selectors[i]);
}
break;
default:
throw "Unknown inline assmembler command";
}
}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest, const DestMask& dest_mask,
SourceRegister src1, const SwizzleMask& swizzle_src1,
SourceRegister src2, const SwizzleMask& swizzle_src2 = "", RelativeAddress addr = None) : type(Regular) {
Instruction& instr = full_instruction.instr;
instr.hex = 0;
instr.opcode = opcode;
SwizzlePattern& swizzle = full_instruction.swizzle;
swizzle.hex = 0;
switch(opcode.GetInfo().type) {
case OpCode::Type::Arithmetic:
// TODO: Assert valid inputs, considering the field width!
instr.common.dest = dest;
instr.common.src1 = src1;
instr.common.src2 = src2;
instr.common.address_register_index = addr;
swizzle.negate_src1 = swizzle_src1.negate;
swizzle.negate_src2 = swizzle_src2.negate;
swizzle.dest_mask = dest_mask.dest_mask;
for (int i = 0; i < 4; ++i) {
swizzle.SetSelectorSrc1(i, swizzle_src1.selectors[i]);
swizzle.SetSelectorSrc2(i, swizzle_src2.selectors[i]);
}
break;
default:
throw "Unknown inline assembler command";
}
}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest, const DestMask& dest_mask,
SourceRegister src1, const SwizzleMask& swizzle_src1,
SourceRegister src2, const SwizzleMask& swizzle_src2,
SourceRegister src3, const SwizzleMask& swizzle_src3 = {""}) : type(Regular) {
Instruction& instr = full_instruction.instr;
instr.hex = 0;
instr.opcode = opcode;
SwizzlePattern& swizzle = full_instruction.swizzle;
swizzle.hex = 0;
switch(opcode.GetInfo().type) {
case OpCode::Type::MultiplyAdd:
// TODO: Assert valid inputs, considering the field width!
instr.mad.dest = dest;
instr.mad.src1 = src1;
instr.mad.src2 = src2;
instr.mad.src3 = src3;
full_instruction.swizzle.negate_src1 = swizzle_src1.negate;
full_instruction.swizzle.negate_src2 = swizzle_src2.negate;
full_instruction.swizzle.negate_src3 = swizzle_src3.negate;
swizzle.dest_mask = dest_mask.dest_mask;
for (int i = 0; i < 4; ++i) {
full_instruction.swizzle.SetSelectorSrc1(i, swizzle_src1.selectors[i]);
full_instruction.swizzle.SetSelectorSrc2(i, swizzle_src2.selectors[i]);
}
break;
default:
throw "Unknown inline assembler command";
}
}
// Convenience constructors with implicit swizzle mask
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest,
SourceRegister src1, const SwizzleMask& swizzle_src1 = "") : InlineAsm(opcode, dest, "", src1, swizzle_src1) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest,
SourceRegister src1, const SwizzleMask& swizzle_src1,
SourceRegister src2, const SwizzleMask& swizzle_src2 = "") : InlineAsm(opcode, dest, "", src1, swizzle_src1, src2, swizzle_src2) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest, const DestMask& dest_mask,
SourceRegister src1,
SourceRegister src2, const SwizzleMask& swizzle_src2 = "") : InlineAsm(opcode, dest, dest_mask, src1, "", src2, swizzle_src2) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest,
SourceRegister src1,
SourceRegister src2, const SwizzleMask& swizzle_src2 = "") : InlineAsm(opcode, dest, "", src1, "", src2, swizzle_src2) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest,
SourceRegister src1, const SwizzleMask& swizzle_src1,
SourceRegister src2, const SwizzleMask& swizzle_src2,
SourceRegister src3, const SwizzleMask& swizzle_src3 = "") : InlineAsm(opcode, dest, "", src1, swizzle_src1, src2, swizzle_src2, src3, swizzle_src3) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest, const DestMask& dest_mask,
SourceRegister src1,
SourceRegister src2, const SwizzleMask& swizzle_src2,
SourceRegister src3, const SwizzleMask& swizzle_src3 = "") : InlineAsm(opcode, dest, dest_mask, src1, "", src2, swizzle_src2, src3, swizzle_src3) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest, const DestMask& dest_mask,
SourceRegister src1, const SwizzleMask& swizzle_src1,
SourceRegister src2,
SourceRegister src3, const SwizzleMask& swizzle_src3 = "") : InlineAsm(opcode, dest, dest_mask, src1, swizzle_src1, src2, "", src3, swizzle_src3) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest,
SourceRegister src1,
SourceRegister src2, const SwizzleMask& swizzle_src2,
SourceRegister src3, const SwizzleMask& swizzle_src3 = "") : InlineAsm(opcode, dest, "", src1, "", src2, swizzle_src2, src3, swizzle_src3) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest,
SourceRegister src1, const SwizzleMask& swizzle_src1,
SourceRegister src2,
SourceRegister src3, const SwizzleMask& swizzle_src3 = "") : InlineAsm(opcode, dest, "", src1, swizzle_src1, src2, "", src3, swizzle_src3) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest, const DestMask& dest_mask,
SourceRegister src1,
SourceRegister src2,
SourceRegister src3, const SwizzleMask& swizzle_src3 = "") : InlineAsm(opcode, dest, dest_mask, src1, "", src2, "", src3, swizzle_src3) {}
InlineAsm(OpCode opcode, DestRegisterOrTemporary dest,
SourceRegister src1,
SourceRegister src2,
SourceRegister src3, const SwizzleMask& swizzle_src3 = "") : InlineAsm(opcode, dest, "", src1, "", src2, "", src3, swizzle_src3) {}
static InlineAsm DeclareOutput(DestRegister reg, OutputRegisterInfo::Type semantic) {
if (reg.GetRegisterType() != RegisterType::Output)
throw "Invalid register used to declare output";
InlineAsm ret(Output);
ret.output_semantic = semantic;
ret.reg_id = reg.GetIndex();
return ret;
}
static InlineAsm DeclareConstant(SourceRegister reg, float x, float y, float z, float w) {
if (reg.GetRegisterType() != RegisterType::FloatUniform)
throw "Invalid source register used to declare shader constant";
InlineAsm ret(Constant);
ret.value[0] = x;
ret.value[1] = y;
ret.value[2] = z;
ret.value[3] = w;
ret.constant_type = ConstantInfo::Float;
ret.reg_id = reg.GetIndex();
return ret;
}
static InlineAsm DeclareUniform(SourceRegister reg_first, SourceRegister reg_last, const std::string& name) {
InlineAsm ret(Uniform);
ret.reg_id = reg_first.GetIndex();
ret.reg_id_last = reg_last.GetIndex();
ret.name = name;
return ret;
}
// TODO: Group this into a union once MSVC supports unrestricted unions!
struct {
Instruction instr;
SwizzlePattern swizzle;
} full_instruction;
std::string name;
unsigned reg_id;
unsigned reg_id_last;
OutputRegisterInfo::Type output_semantic;
ConstantInfo::Type constant_type;
float value[4];
static size_t FindSwizzlePattern(const SwizzlePattern& pattern, std::vector<SwizzlePattern>& swizzle_table) {
auto it = std::find_if(swizzle_table.begin(), swizzle_table.end(), [&](const SwizzlePattern& candidate) { return candidate.hex == pattern.hex; });
size_t ret = std::distance(swizzle_table.begin(), it);
if (it == swizzle_table.end())
swizzle_table.push_back(pattern);
return ret;
}
static const ShaderBinary CompileToRawBinary(std::initializer_list<InlineAsm> code_) {
ShaderBinary binary;
std::vector<InlineAsm> code(code_);
for (int i = 0; i < code.size(); ++i) {
auto command = code[i];
switch (command.type) {
case Regular:
{
auto& instr = command.full_instruction.instr;
switch (instr.opcode.Value().GetInfo().type) {
case OpCode::Type::Trivial:
break;
case OpCode::Type::Arithmetic:
instr.common.operand_desc_id = FindSwizzlePattern(command.full_instruction.swizzle, binary.swizzle_table);
break;
case OpCode::Type::UniformFlowControl:
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::LOOP) {
instr.flow_control.int_uniform_id = command.reg_id;
instr.flow_control.dest_offset = binary.program.size();
// std::cout << "Started at "<< binary.program.size() << " <-> "<<i <<std::endl;
// TODO: Handle nested LOOPs
for (int i2 = i + 1; i2 < code.size(); ++i2) {
if (code[i2].type == Regular) {
// std::cout << "went at "<< i2 << std::endl;
instr.flow_control.dest_offset = instr.flow_control.dest_offset + 1;
} else if (code[i2].type == EndLoop) {
break;
}
if (i2 == code.size() - 1) {
throw "No closing EndLoop directive found";
}
}
} else {
throw "Unknown flow control instruction";
}
break;
default:
throw "Unknown instruction";
}
binary.program.push_back(command.full_instruction.instr);
break;
}
case Output:
{
OutputRegisterInfo output;
output.type = command.output_semantic;
output.id = command.reg_id;
output.component_mask = 0xF; // TODO: Make configurable
binary.output_table.push_back(output);
break;
}
case Constant:
{
ConstantInfo constant;
constant.type = command.constant_type;
constant.regid = command.reg_id;
switch (command.constant_type) {
case ConstantInfo::Float:
constant.f.x = to_float24(command.value[0]);
constant.f.y = to_float24(command.value[1]);
constant.f.z = to_float24(command.value[2]);
constant.f.w = to_float24(command.value[3]);
break;
default:
throw "Unknown constant type";
}
binary.constant_table.push_back(constant);
break;
}
case Uniform:
{
UniformInfo uniform;
uniform.basic.symbol_offset = binary.symbol_table.size();
uniform.basic.reg_start = command.reg_id + 16; // TODO: Hardcoded against float uniforms
uniform.basic.reg_end = command.reg_id_last + 16;
binary.uniform_table.push_back(uniform);
std::copy(command.name.begin(), command.name.end(), std::back_inserter(binary.symbol_table));
binary.symbol_table.push_back('\0');
break;
}
case EndLoop:
break;
default:
throw "Unknown type";
}
}
return binary;
}
// Overestimates the actual size
static const size_t CompiledShbinSize(std::initializer_list<InlineAsm> code) {
size_t size = 0;
size += sizeof(DVLBHeader);
size += sizeof(DVLPHeader);
size += sizeof(uint32_t) + sizeof(DVLEHeader); // Currently only one DVLE is supported
for (const auto& command : code) {
switch (command.type) {
case Regular:
size += sizeof(Instruction);
size += sizeof(SwizzleInfo);
break;
case Output:
size += sizeof(OutputRegisterInfo);
break;
case Constant:
size += sizeof(ConstantInfo);
break;
case Uniform:
size += command.name.size() + 1;
size += sizeof(UniformInfo);
break;
case EndLoop:
break;
default:
throw "Unknown command type";
}
}
return size;
}
static const std::vector<uint8_t> CompileToShbin(std::initializer_list<InlineAsm> code) {
std::vector<uint8_t> ret(CompiledShbinSize(code));
ShaderBinary bin = CompileToRawBinary(code);
struct {
DVLBHeader header;
uint32_t dvle_offset;
} *dvlb = (decltype(dvlb))ret.data();
dvlb->header.magic_word = DVLBHeader::MAGIC_WORD;
dvlb->header.num_programs = 1;
unsigned dvlp_offset = sizeof(*dvlb);
DVLPHeader* dvlp = (DVLPHeader*)&ret.data()[dvlp_offset];
dvlp->magic_word = DVLPHeader::MAGIC_WORD;
unsigned dvle_offset = dvlb->dvle_offset = dvlp_offset + sizeof(DVLPHeader);
DVLEHeader* dvle = (DVLEHeader*)&ret.data()[dvle_offset];
dvle->magic_word = DVLEHeader::MAGIC_WORD;
dvlb->dvle_offset = dvle_offset;
unsigned binary_offset = dvle_offset + sizeof(DVLEHeader);
dvlp->binary_offset = binary_offset - dvlp_offset;
dvlp->binary_size_words = bin.program.size();
std::copy(bin.program.begin(), bin.program.end(), (Instruction*)&ret.data()[binary_offset]);
unsigned swizzle_table_offset = binary_offset + bin.program.size() * sizeof(Instruction);
dvlp->swizzle_info_offset = swizzle_table_offset - dvlp_offset;
dvlp->swizzle_info_num_entries = bin.swizzle_table.size();
SwizzleInfo* swizzle_table_ptr = (SwizzleInfo*)&ret.data()[swizzle_table_offset];
for (const auto& swizzle : bin.swizzle_table) {
swizzle_table_ptr->pattern = swizzle;
swizzle_table_ptr->unknown = 0;
swizzle_table_ptr++;
}
unsigned output_table_offset = swizzle_table_offset + bin.swizzle_table.size() * sizeof(SwizzleInfo);
OutputRegisterInfo* output_table_ptr = (OutputRegisterInfo*)&ret.data()[output_table_offset];
for (const auto& output : bin.output_table) {
*output_table_ptr = output;
output_table_ptr++;
}
dvle->output_register_table_offset = output_table_offset - dvle_offset;
dvle->output_register_table_size = bin.output_table.size();
unsigned constant_table_offset = output_table_offset + bin.output_table.size() * sizeof(OutputRegisterInfo);
ConstantInfo* constant_table_ptr = (ConstantInfo*)&ret.data()[constant_table_offset];
for (const auto& constant : bin.constant_table) {
*constant_table_ptr = constant;
constant_table_ptr++;
}
dvle->constant_table_offset = constant_table_offset - dvle_offset;
dvle->constant_table_size = bin.constant_table.size();
// TODO: UniformTable spans more than the written data.. fix this design issue :/
unsigned uniform_table_offset = constant_table_offset + bin.constant_table.size() * sizeof(ConstantInfo);
uint64_t* uniform_table_ptr = (uint64_t*)&ret.data()[uniform_table_offset];
for (const auto& uniform : bin.uniform_table) {
*uniform_table_ptr = reinterpret_cast<const uint64_t&>(uniform.basic);
uniform_table_ptr++;
}
dvle->uniform_table_offset = uniform_table_offset - dvle_offset;
dvle->uniform_table_size = bin.uniform_table.size();
unsigned symbol_table_offset = uniform_table_offset + bin.uniform_table.size() * sizeof(uint64_t);
std::copy(bin.symbol_table.begin(), bin.symbol_table.end(), &ret.data()[symbol_table_offset]);
dvle->symbol_table_offset = symbol_table_offset - dvle_offset;
dvle->symbol_table_size = bin.symbol_table.size();
ret.resize(symbol_table_offset + bin.symbol_table.size());
return ret;
}
};
} // namespace

View File

@@ -0,0 +1,384 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include <vector>
#include <ostream>
#include <boost/optional.hpp>
#include <boost/variant.hpp>
#include "source_tree.h"
#include "shader_binary.h"
#include "shader_bytecode.h"
namespace nihstro {
struct InputSwizzlerMask {
int num_components;
enum Component : uint8_t {
x = 0,
y = 1,
z = 2,
w = 3,
};
std::array<Component,4> components;
static InputSwizzlerMask FullMask() {
return { 4, {x,y,z,w} };
}
bool operator == (const InputSwizzlerMask& oth) const {
return this->num_components == oth.num_components && this->components == oth.components;
}
// TODO: Move to implementation?
friend std::ostream& operator<<(std::ostream& os, const Component& v) {
switch(v) {
case x: return os << "x";
case y: return os << "y";
case z: return os << "z";
case w: return os << "w";
default: return os << "?";
}
}
friend std::ostream& operator<<(std::ostream& os, const InputSwizzlerMask& v) {
if (!v.num_components)
return os << "(empty_mask)";
for (int i = 0; i < v.num_components; ++i)
os << v.components[i];
return os;
}
friend std::string to_string(const Component& v) {
std::stringstream ss;
ss << v;
return ss.str();
}
friend std::string to_string(const InputSwizzlerMask& v) {
std::stringstream ss;
ss << v;
return ss.str();
}
};
using Identifier = std::string;
// A sign, i.e. +1 or -1
using Sign = int;
struct IntegerWithSign {
int sign;
unsigned value;
int GetValue() const {
return sign * value;
}
};
// Raw index + address register index
struct IndexExpression : std::vector<boost::variant<IntegerWithSign, Identifier>> {
int GetCount() const {
return this->size();
}
bool IsRawIndex(int arg) const {
return (*this)[arg].which() == 0;
}
int GetRawIndex(int arg) const {
return boost::get<IntegerWithSign>((*this)[arg]).GetValue();
}
bool IsAddressRegisterIdentifier(int arg) const {
return (*this)[arg].which() == 1;
}
Identifier GetAddressRegisterIdentifier(int arg) const {
return boost::get<Identifier>((*this)[arg]);
}
};
struct Expression {
struct SignedIdentifier {
boost::optional<Sign> sign;
Identifier identifier;
} signed_identifier;
boost::optional<IndexExpression> index;
std::vector<InputSwizzlerMask> swizzle_masks;
int GetSign() const {
if (!RawSign())
return +1;
else
return *RawSign();
}
const Identifier& GetIdentifier() const {
return RawIdentifier();
}
bool HasIndexExpression() const {
return static_cast<bool>(RawIndex());
}
const IndexExpression& GetIndexExpression() const {
return *RawIndex();
}
const std::vector<InputSwizzlerMask>& GetSwizzleMasks() const {
return RawSwizzleMasks();
}
private:
const boost::optional<Sign>& RawSign() const {
return signed_identifier.sign;
}
const Identifier& RawIdentifier() const {
return signed_identifier.identifier;
}
const boost::optional<IndexExpression>& RawIndex() const {
return index;
}
const std::vector<InputSwizzlerMask>& RawSwizzleMasks() const {
return swizzle_masks;
}
};
struct ConditionInput {
bool invert;
Identifier identifier;
boost::optional<InputSwizzlerMask> swizzler_mask;
bool GetInvertFlag() const {
return invert;
}
const Identifier& GetIdentifier() const {
return identifier;
}
bool HasSwizzleMask() const {
return static_cast<bool>(swizzler_mask);
}
const InputSwizzlerMask& GetSwizzleMask() const {
return *swizzler_mask;
}
};
struct Condition {
ConditionInput input1;
Instruction::FlowControlType::Op op;
ConditionInput input2;
const ConditionInput& GetFirstInput() const {
return input1;
}
Instruction::FlowControlType::Op GetConditionOp() const {
return op;
}
const ConditionInput& GetSecondInput() const {
return input2;
}
};
using StatementLabel = std::string;
struct StatementInstruction {
OpCode opcode;
std::vector<Expression> expressions;
StatementInstruction() = default;
// TODO: Obsolete constructor?
StatementInstruction(const OpCode& opcode) : opcode(opcode) {
}
StatementInstruction(const OpCode& opcode, const std::vector<Expression> expressions) : opcode(opcode), expressions(expressions) {
}
const OpCode& GetOpCode() const {
return opcode;
}
const std::vector<Expression>& GetArguments() const {
return expressions;
}
};
using FloatOpInstruction = StatementInstruction;
struct CompareInstruction {
OpCode opcode;
std::vector<Expression> arguments;
std::vector<Instruction::Common::CompareOpType::Op> ops;
const OpCode& GetOpCode() const {
return opcode;
}
const Expression& GetSrc1() const {
return arguments[0];
}
const Expression& GetSrc2() const {
return arguments[1];
}
Instruction::Common::CompareOpType::Op GetOp1() const {
return ops[0];
}
Instruction::Common::CompareOpType::Op GetOp2() const {
return ops[1];
}
};
struct FlowControlInstruction {
OpCode opcode;
std::string target_label;
boost::optional<std::string> return_label;
boost::optional<Condition> condition;
const OpCode& GetOpCode() const {
return opcode;
}
const std::string& GetTargetLabel() const {
return target_label;
}
bool HasReturnLabel() const {
return static_cast<bool>(return_label);
}
const std::string& GetReturnLabel() const {
return *return_label;
}
bool HasCondition() const {
return static_cast<bool>(condition);
}
const Condition& GetCondition() const {
return *condition;
}
};
struct SetEmitInstruction {
OpCode opcode;
unsigned vertex_id;
struct Flags {
boost::optional<bool> primitive_flag;
boost::optional<bool> invert_flag;
} flags;
bool PrimitiveFlag() const {
return flags.primitive_flag && *flags.primitive_flag;
}
bool InvertFlag() const {
return flags.invert_flag && *flags.invert_flag;
}
};
struct StatementDeclaration {
std::string alias_name;
Identifier identifier_start; /* aliased identifier (start register) */
boost::optional<Identifier> identifier_end; /* aliased identifier (end register) */
boost::optional<InputSwizzlerMask> swizzle_mask; // referring to the aliased identifier
struct Extra {
std::vector<float> constant_value;
boost::optional<OutputRegisterInfo::Type> output_semantic;
} extra;
};
struct ParserContext {
// There currently is no context
};
struct Parser {
using Iterator = SourceTreeIterator;
Parser(const ParserContext& context);
~Parser();
// Skip whitespaces, blank lines, and comments; returns number of line breaks skipped.
unsigned Skip(Iterator& begin, Iterator end);
// Skip to the next line
void SkipSingleLine(Iterator& begin, Iterator end);
// Parse alias declaration including line ending
bool ParseDeclaration(Iterator& begin, Iterator end, StatementDeclaration* declaration);
// Parse label declaration including line ending
bool ParseLabel(Iterator& begin, Iterator end, StatementLabel* label);
// Parse nothing but a single opcode
bool ParseOpCode(Iterator& begin, Iterator end, OpCode* opcode);
// Parse trival instruction including line ending
bool ParseSimpleInstruction(Iterator& begin, Iterator end, OpCode* opcode);
// Parse float instruction including line ending
bool ParseFloatOp(Iterator& begin, Iterator end, FloatOpInstruction* content);
// Parse compare instruction including line ending
bool ParseCompare(Iterator& begin, Iterator end, CompareInstruction* content);
// Parse flow control instruction including line ending
bool ParseFlowControl(Iterator& begin, Iterator end, FlowControlInstruction* content);
// Parse SetEmit instruction including line ending
bool ParseSetEmit(Iterator& begin, Iterator end, SetEmitInstruction* content);
private:
struct ParserImpl;
std::unique_ptr<ParserImpl> impl;
};
} // namespace

View File

@@ -0,0 +1,376 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
// Enable this for detailed XML overview of parser results
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include "nihstro/parser_assembly.h"
#include "nihstro/source_tree.h"
#include "nihstro/shader_binary.h"
#include "nihstro/shader_bytecode.h"
namespace spirit = boost::spirit;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::qi::ascii;
namespace phoenix = boost::phoenix;
using spirit::_1;
using spirit::_2;
using spirit::_3;
using spirit::_4;
using namespace nihstro;
// Adapt common parser data structures for use with boost::spirit
BOOST_FUSION_ADAPT_STRUCT(
Expression::SignedIdentifier,
(boost::optional<Sign>, sign)
(Identifier, identifier)
)
BOOST_FUSION_ADAPT_STRUCT(
Expression,
(Expression::SignedIdentifier, signed_identifier)
(boost::optional<IndexExpression>, index)
(std::vector<InputSwizzlerMask>, swizzle_masks)
)
class Diagnostics
{
public:
// Ass a new diagnostic message corresponding to the specified rule tag
void Add(const std::string& tag, const char* diagnostic) {
entries[tag] = diagnostic;
}
// Lookup the diagnostic of the specified rule tag and return it (or nullptr if it can't be found)
const char* operator [](const char* tag) const {
auto it = entries.find(tag);
if (it == entries.end())
return nullptr;
else
return it->second;
}
private:
std::map<std::string, const char*> entries;
};
struct ErrorHandler
{
template <class, class, class, class, class>
struct result { typedef void type; };
template <class D, class B, class E, class W, class I>
void operator ()(const D& diagnostics, B begin, E end, W where, const I& info) const
{
const spirit::utf8_string& tag(info.tag);
const char* const what(tag.c_str());
const char* diagnostic(diagnostics[what]);
std::string scratch;
if (!diagnostic) {
scratch.reserve(25 + tag.length());
scratch = "Expected ";
scratch += tag;
diagnostic = scratch.c_str();
}
auto newline_iterator = std::find(begin, end, '\n');
std::stringstream err;
err << diagnostic << std::endl
<< std::string(4, ' ') << std::string(begin, newline_iterator) << std::endl
<< std::string(4 + std::distance(begin, where), ' ') << '^' << std::endl;
throw err.str();
}
};
extern phoenix::function<ErrorHandler> error_handler;
template<typename Iterator>
struct AssemblySkipper : public qi::grammar<Iterator> {
AssemblySkipper() : AssemblySkipper::base_type(skip) {
comments = (qi::lit("//") | '#' | ';') >> *(qi::char_ - qi::eol);
skip = +(comments | ascii::blank);
}
qi::rule<Iterator> comments;
qi::rule<Iterator> skip;
};
namespace std {
static std::ostream& operator<<(std::ostream& os, const OpCode& opcode) {
// TODO: Should print actual opcode here..
return os << static_cast<uint32_t>(static_cast<OpCode::Id>(opcode));
}
}
template<typename Iterator>
struct CommonRules {
using Skipper = AssemblySkipper<Iterator>;
CommonRules(const ParserContext& context);
// Rule-ified symbols, which can be assigned names
qi::rule<Iterator, Skipper> peek_identifier;
// Building blocks
qi::rule<Iterator, std::string(), Skipper> identifier;
qi::rule<Iterator, Expression(), Skipper> expression;
qi::rule<Iterator, Skipper> end_of_statement;
qi::symbols<char, OpCode> opcodes_trivial;
qi::symbols<char, OpCode> opcodes_compare;
std::array<qi::symbols<char, OpCode>, 4> opcodes_float; // indexed by number of arguments
std::array<qi::symbols<char, OpCode>, 2> opcodes_flowcontrol;
qi::symbols<char, OpCode> opcodes_setemit;
qi::symbols<char, int> signs;
qi::symbols<char, InputSwizzlerMask::Component> swizzlers;
qi::rule<Iterator, InputSwizzlerMask(), Skipper> swizzle_mask;
Diagnostics diagnostics;
private:
qi::rule<Iterator, IndexExpression(), Skipper> index_expression;
qi::rule<Iterator, boost::variant<IntegerWithSign, Identifier>(), Skipper> index_expression_first_term;
qi::rule<Iterator, boost::variant<IntegerWithSign, Identifier>(), Skipper> index_expression_following_terms;
// Empty rule
qi::rule<Iterator, Skipper> opening_bracket;
qi::rule<Iterator, Skipper> closing_bracket;
qi::rule<Iterator, IntegerWithSign(), Skipper> sign_with_uint;
qi::rule<Iterator, unsigned int(), Skipper> uint_after_sign;
};
template<typename Iterator, bool require_end_of_line>
struct TrivialOpParser : qi::grammar<Iterator, OpCode(), AssemblySkipper<Iterator>> {
using Skipper = AssemblySkipper<Iterator>;
TrivialOpParser(const ParserContext& context);
CommonRules<Iterator> common;
qi::symbols<char, OpCode>& opcodes_trivial;
qi::symbols<char, OpCode>& opcodes_compare;
std::array<qi::symbols<char, OpCode>, 4>& opcodes_float; // indexed by number of arguments
std::array<qi::symbols<char, OpCode>, 2>& opcodes_flowcontrol;
// Rule-ified symbols, which can be assigned names
qi::rule<Iterator, OpCode(), Skipper> opcode;
// Compounds
qi::rule<Iterator, OpCode(), Skipper> trivial_instruction;
qi::rule<Iterator, Skipper>& end_of_statement;
Diagnostics diagnostics;
};
template<typename Iterator>
struct FloatOpParser : qi::grammar<Iterator, FloatOpInstruction(), AssemblySkipper<Iterator>> {
using Skipper = AssemblySkipper<Iterator>;
FloatOpParser(const ParserContext& context);
CommonRules<Iterator> common;
std::array<qi::symbols<char, OpCode>, 4>& opcodes_float;
// Rule-ified symbols, which can be assigned names
qi::rule<Iterator, OpCode(), Skipper> opcode[4];
// Building blocks
qi::rule<Iterator, Expression(), Skipper>& expression;
qi::rule<Iterator, std::vector<Expression>(), Skipper> expression_chain[4]; // sequence of instruction arguments
qi::rule<Iterator, Skipper>& end_of_statement;
// Compounds
qi::rule<Iterator, FloatOpInstruction(), Skipper> float_instr[4];
qi::rule<Iterator, FloatOpInstruction(), Skipper> float_instruction;
// Utility
qi::rule<Iterator, Skipper> not_comma;
Diagnostics diagnostics;
};
template<typename Iterator>
struct CompareParser : qi::grammar<Iterator, CompareInstruction(), AssemblySkipper<Iterator>> {
using Skipper = AssemblySkipper<Iterator>;
using CompareOp = Instruction::Common::CompareOpType;
using CompareOpEnum = CompareOp::Op;
CompareParser(const ParserContext& context);
CommonRules<Iterator> common;
qi::symbols<char, OpCode>& opcodes_compare;
qi::symbols<char, CompareOpEnum> compare_ops;
// Rule-ified symbols, which can be assigned debug names
qi::rule<Iterator, OpCode(), Skipper> opcode;
qi::rule<Iterator, CompareOpEnum(), Skipper> compare_op;
qi::rule<Iterator, std::vector<CompareOpEnum>(), Skipper> two_ops;
// Building blocks
qi::rule<Iterator, Expression(), Skipper>& expression;
qi::rule<Iterator, std::vector<Expression>(), Skipper> two_expressions;
qi::rule<Iterator, Skipper>& end_of_statement;
// Compounds
qi::rule<Iterator, CompareInstruction(), Skipper> instr[1];
qi::rule<Iterator, CompareInstruction(), Skipper> instruction;
// Utility
qi::rule<Iterator, Skipper> not_comma;
Diagnostics diagnostics;
};
template<typename Iterator>
struct FlowControlParser : qi::grammar<Iterator, FlowControlInstruction(), AssemblySkipper<Iterator>> {
using Skipper = AssemblySkipper<Iterator>;
using ConditionOp = Instruction::FlowControlType;
using ConditionOpEnum = Instruction::FlowControlType::Op;
FlowControlParser(const ParserContext& context);
CommonRules<Iterator> common;
std::array<qi::symbols<char, OpCode>, 2>& opcodes_flowcontrol;
qi::symbols<char, ConditionOpEnum> condition_ops;
// Rule-ified symbols, which can be assigned debug names
qi::rule<Iterator, OpCode(), Skipper> opcode[2];
qi::rule<Iterator, ConditionOpEnum(), Skipper> condition_op;
// Building blocks
qi::rule<Iterator, Expression(), Skipper>& expression;
qi::rule<Iterator, std::string(), Skipper>& identifier;
qi::rule<Iterator, InputSwizzlerMask(), Skipper>& swizzle_mask;
qi::rule<Iterator, ConditionInput(), Skipper> condition_input;
qi::rule<Iterator, Condition(), Skipper> condition;
qi::rule<Iterator, Skipper>& end_of_statement;
// Compounds
qi::rule<Iterator, FlowControlInstruction(), Skipper> instr[2];
qi::rule<Iterator, FlowControlInstruction(), Skipper> flow_control_instruction;
// Utility
qi::rule<Iterator, Skipper> not_comma;
qi::rule<Iterator, bool(), Skipper> negation;
Diagnostics diagnostics;
};
template<typename Iterator>
struct SetEmitParser : qi::grammar<Iterator, SetEmitInstruction(), AssemblySkipper<Iterator>> {
using Skipper = AssemblySkipper<Iterator>;
SetEmitParser(const ParserContext& context);
CommonRules<Iterator> common;
qi::symbols<char, OpCode>& opcodes_setemit;
// Rule-ified symbols, which can be assigned debug names
qi::rule<Iterator, OpCode(), Skipper> opcode;
qi::rule<Iterator, unsigned int(), Skipper> vertex_id;
qi::rule<Iterator, bool(), Skipper> prim_flag;
qi::rule<Iterator, bool(), Skipper> inv_flag;
qi::rule<Iterator, SetEmitInstruction::Flags(), Skipper> flags;
// Building blocks
qi::rule<Iterator, Skipper>& end_of_statement;
// Compounds
qi::rule<Iterator, SetEmitInstruction(), Skipper> setemit_instruction;
// Utility
qi::rule<Iterator, Skipper> not_comma;
qi::rule<Iterator, bool(), Skipper> negation;
Diagnostics diagnostics;
};
template<typename Iterator>
struct LabelParser : qi::grammar<Iterator, StatementLabel(), AssemblySkipper<Iterator>> {
using Skipper = AssemblySkipper<Iterator>;
LabelParser(const ParserContext& context);
CommonRules<Iterator> common;
qi::rule<Iterator, Skipper>& end_of_statement;
qi::rule<Iterator, std::string(), Skipper>& identifier;
qi::rule<Iterator, std::string(), Skipper> label;
Diagnostics diagnostics;
};
template<typename Iterator>
struct DeclarationParser : qi::grammar<Iterator, StatementDeclaration(), AssemblySkipper<Iterator>> {
using Skipper = AssemblySkipper<Iterator>;
DeclarationParser(const ParserContext& context);
CommonRules<Iterator> common;
qi::rule<Iterator, Skipper> string_as;
qi::rule<Iterator, std::vector<float>(), Skipper> dummy_const;
qi::rule<Iterator, boost::optional<OutputRegisterInfo::Type>(), Skipper> dummy_semantic;
qi::symbols<char, OutputRegisterInfo::Type> output_semantics;
// Rule-ified symbols, which can be assigned names
qi::rule<Iterator, OutputRegisterInfo::Type(),Skipper> output_semantics_rule;
// Building blocks
qi::rule<Iterator, std::string(), Skipper>& identifier;
qi::rule<Iterator, InputSwizzlerMask(), Skipper>& swizzle_mask;
qi::rule<Iterator, std::vector<float>(), Skipper> constant;
qi::rule<Iterator, std::string(), Skipper> alias_identifier;
qi::rule<Iterator, StatementDeclaration::Extra(), Skipper> const_or_semantic;
qi::rule<Iterator, Skipper>& end_of_statement;
qi::rule<Iterator, StatementDeclaration(), Skipper> declaration;
Diagnostics diagnostics;
};
using ParserIterator = SourceTreeIterator;

View File

@@ -0,0 +1,207 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <fstream>
#include <map>
#include <string>
#include <sstream>
#include <vector>
#include "nihstro/shader_binary.h"
namespace nihstro {
struct ShaderInfo {
std::vector<Instruction> code;
std::vector<SwizzleInfo> swizzle_info;
std::vector<ConstantInfo> constant_table;
std::vector<LabelInfo> label_table;
std::map<uint32_t, std::string> labels;
std::vector<OutputRegisterInfo> output_register_info;
std::vector<UniformInfo> uniform_table;
void Clear() {
code.clear();
swizzle_info.clear();
constant_table.clear();
label_table.clear();
labels.clear();
output_register_info.clear();
uniform_table.clear();
}
bool HasLabel(uint32_t offset) const {
return labels.find(offset) != labels.end();
}
std::string GetLabel (uint32_t offset) const {
auto it = labels.find(offset);
if (it != labels.end())
return it->second;
return "";
}
template<typename T>
std::string LookupDestName(const T& dest, const SwizzlePattern& swizzle) const {
if (dest < 0x8) {
// TODO: This one still needs some prettification in case
// multiple output_infos describing this output register
// are found.
std::string ret;
for (const auto& output_info : output_register_info) {
if (dest != output_info.id)
continue;
// Only display output register name if the output components it's mapped to are
// actually written to.
// swizzle.dest_mask and output_info.component_mask use different bit order,
// so we can't use AND them bitwise to check this.
int matching_mask = 0;
for (int i = 0; i < 4; ++i)
matching_mask |= output_info.component_mask & (swizzle.DestComponentEnabled(i) << i);
if (!matching_mask)
continue;
// Add a vertical bar so that we have at least *some*
// indication that we hit multiple matches.
if (!ret.empty())
ret += "|";
ret += output_info.GetSemanticName();
}
if (!ret.empty())
return ret;
} else if (dest.GetRegisterType() == RegisterType::Temporary) {
// TODO: Not sure if uniform_info can assign names to temporary registers.
// If that is the case, we should check the table for better names here.
std::stringstream stream;
stream << "temp_" << std::hex << dest.GetIndex();
return stream.str();
}
return "(?)";
}
template<class T>
std::string LookupSourceName(const T& source, unsigned addr_reg_index) const {
if (source.GetRegisterType() != RegisterType::Temporary) {
for (const auto& uniform_info : uniform_table) {
// Magic numbers are needed because uniform info registers use the
// range 0..0x10 for input registers and 0x10...0x70 for uniform registers,
// i.e. there is a "gap" at the temporary registers, for which no
// name can be assigned (?).
int off = (source.GetRegisterType() == RegisterType::Input) ? 0 : 0x10;
if (source - off >= uniform_info.basic.reg_start &&
source - off <= uniform_info.basic.reg_end) {
std::string name = uniform_info.name;
std::string index;
bool is_array = uniform_info.basic.reg_end != uniform_info.basic.reg_start;
if (is_array) {
index += std::to_string(source - off - uniform_info.basic.reg_start);
}
if (addr_reg_index != 0) {
index += (is_array) ? " + " : "";
index += "a" + std::to_string(addr_reg_index - 1);
}
if (!index.empty())
name += "[" + index + "]";
return name;
}
}
}
// Constants and uniforms really are the same internally
for (const auto& constant_info : constant_table) {
if (source - 0x20 == constant_info.regid) {
return "const_" + std::to_string(constant_info.regid.Value());
}
}
// For temporary registers, we at least print "temp_X" if no better name could be found.
if (source.GetRegisterType() == RegisterType::Temporary) {
std::stringstream stream;
stream << "temp_" << std::hex << source.GetIndex();
return stream.str();
}
return "(?)";
}
};
class ShbinParser {
public:
void ReadHeaders(const std::string& filename);
void ReadDVLE(int dvle_index);
const DVLBHeader& GetDVLBHeader() const {
return dvlb_header;
}
const DVLPHeader& GetDVLPHeader() const {
return dvlp_header;
}
const DVLEHeader& GetDVLEHeader(int index) const {
return dvle_headers[index];
}
const std::string& GetFilename(int dvle_index) const {
return dvle_filenames[dvle_index];
}
private:
// Reads a null-terminated string from the given offset
std::string ReadSymbol(uint32_t offset);
std::fstream file;
DVLBHeader dvlb_header;
DVLPHeader dvlp_header;
uint32_t dvlp_offset;
public:
std::vector<uint32_t> dvle_offsets;
std::vector<DVLEHeader> dvle_headers;
std::vector<std::string> dvle_filenames;
ShaderInfo shader_info;
uint32_t main_offset;
};
} // namespace

View File

@@ -0,0 +1,36 @@
// Copyright 2015 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
namespace nihstro {
struct SourceTree;
SourceTree PreprocessAssemblyFile(const std::string& filename);
} // namespace

View File

@@ -0,0 +1,258 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <cstdint>
#include "shader_bytecode.h"
namespace nihstro {
#pragma pack(1)
struct DVLBHeader {
enum : uint32_t {
MAGIC_WORD = 0x424C5644, // "DVLB"
};
uint32_t magic_word;
uint32_t num_programs;
// DVLE offset table with num_programs entries follows
};
static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size");
struct DVLPHeader {
enum : uint32_t {
MAGIC_WORD = 0x504C5644, // "DVLP"
};
uint32_t magic_word;
uint32_t version;
uint32_t binary_offset; // relative to DVLP start
uint32_t binary_size_words;
uint32_t swizzle_info_offset;
uint32_t swizzle_info_num_entries;
uint32_t filename_symbol_offset;
};
static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size");
struct DVLEHeader {
enum : uint32_t {
MAGIC_WORD = 0x454c5644, // "DVLE"
};
enum class ShaderType : uint8_t {
VERTEX = 0,
GEOMETRY = 1,
};
uint32_t magic_word;
uint16_t pad1;
ShaderType type;
uint8_t pad2;
// Offset within binary blob to program entry point
uint32_t main_offset_words;
uint32_t endmain_offset_words;
uint32_t pad3;
uint32_t pad4;
// Table of constant values for single registers
uint32_t constant_table_offset;
uint32_t constant_table_size; // number of entries
// Table of program code labels
uint32_t label_table_offset;
uint32_t label_table_size;
// Table of output registers and their semantics
uint32_t output_register_table_offset;
uint32_t output_register_table_size;
// Table of uniforms (which may span multiple registers) and their values
uint32_t uniform_table_offset;
uint32_t uniform_table_size;
// Table of null-terminated strings referenced by the tables above
uint32_t symbol_table_offset;
uint32_t symbol_table_size;
};
static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size");
struct SwizzleInfo {
SwizzlePattern pattern;
uint32_t unknown;
};
struct ConstantInfo {
enum Type : uint32_t {
Bool = 0,
Int = 1,
Float = 2
};
union {
uint32_t full_first_word;
BitField<0, 2, Type> type;
BitField<16, 8, uint32_t> regid;
};
union {
uint32_t value_hex[4];
BitField<0, 1, uint32_t> b;
struct {
uint8_t x;
uint8_t y;
uint8_t z;
uint8_t w;
} i;
struct {
// All of these are float24 values!
uint32_t x;
uint32_t y;
uint32_t z;
uint32_t w;
} f;
};
};
struct LabelInfo {
BitField<0, 8, uint32_t> id;
uint32_t program_offset;
uint32_t unk;
uint32_t name_offset;
};
union OutputRegisterInfo {
enum Type : uint64_t {
POSITION = 0,
QUATERNION = 1,
COLOR = 2,
TEXCOORD0 = 3,
TEXCOORD1 = 5,
TEXCOORD2 = 6,
VIEW = 8,
};
OutputRegisterInfo& operator =(const OutputRegisterInfo& oth) {
hex.Assign(oth.hex);
return *this;
}
BitField< 0, 64, uint64_t> hex;
BitField< 0, 16, Type> type;
BitField<16, 16, uint64_t> id;
BitField<32, 4, uint64_t> component_mask;
BitField<32, 32, uint64_t> descriptor;
const std::string GetMask() const {
std::string ret;
if (component_mask & 1) ret += "x";
if (component_mask & 2) ret += "y";
if (component_mask & 4) ret += "z";
if (component_mask & 8) ret += "w";
return ret;
}
const std::string GetSemanticName() const {
static const std::map<Type, std::string> map = {
{ POSITION, "out.pos" },
{ QUATERNION, "out.quat" },
{ COLOR, "out.col" },
{ TEXCOORD0, "out.tex0" },
{ TEXCOORD1, "out.tex1" },
{ TEXCOORD2, "out.tex2" },
{ VIEW, "out.view" }
};
auto it = map.find(type);
if (it != map.end())
return it->second;
else
return "out.unk";
}
};
struct UniformInfo {
struct {
static RegisterType GetType(uint32_t reg) {
if (reg < 0x10) return RegisterType::Input;
else if (reg < 0x70) return RegisterType::FloatUniform;
else if (reg < 0x74) return RegisterType::IntUniform;
else if (reg >= 0x78 && reg < 0x88) return RegisterType::BoolUniform;
else return RegisterType::Unknown;
}
static int GetIndex(uint32_t reg) {
switch (GetType(reg)) {
case RegisterType::Input: return reg;
case RegisterType::FloatUniform: return reg - 0x10;
case RegisterType::IntUniform: return reg - 0x70;
case RegisterType::BoolUniform: return reg - 0x78;
default: return -1;
}
}
RegisterType GetStartType() const {
return GetType(reg_start);
}
RegisterType GetEndType() const {
return GetType(reg_end);
}
int GetStartIndex() const {
return GetIndex(reg_start);
}
int GetEndIndex() const {
return GetIndex(reg_end);
}
uint32_t symbol_offset;
union {
BitField< 0, 16, uint32_t> reg_start;
BitField<16, 16, uint32_t> reg_end; // inclusive
};
} basic;
std::string name;
};
#pragma pack()
} // namespace

View File

@@ -0,0 +1,813 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <cstdint>
#include <map>
#include <stdexcept>
#include <string>
#include <sstream>
#include "bit_field.h"
namespace nihstro {
enum class RegisterType {
Input,
Output,
Temporary,
FloatUniform,
IntUniform,
BoolUniform,
Address,
ConditionalCode,
Unknown
};
static std::string GetRegisterName(RegisterType type) {
switch (type) {
case RegisterType::Input: return "v";
case RegisterType::Output: return "o";
case RegisterType::Temporary: return "r";
case RegisterType::FloatUniform: return "c";
case RegisterType::IntUniform: return "i";
case RegisterType::BoolUniform: return "b";
case RegisterType::ConditionalCode: return "cc";
case RegisterType::Unknown: return "u";
default: return "";
}
}
struct SourceRegister {
SourceRegister() = default;
SourceRegister(uint32_t value) {
this->value = value;
}
RegisterType GetRegisterType() const {
if (value < 0x10)
return RegisterType::Input;
else if (value < 0x20)
return RegisterType::Temporary;
else
return RegisterType::FloatUniform;
}
int GetIndex() const {
if (GetRegisterType() == RegisterType::Input)
return value;
else if (GetRegisterType() == RegisterType::Temporary)
return value - 0x10;
else if (GetRegisterType() == RegisterType::FloatUniform)
return value - 0x20;
}
static const SourceRegister FromTypeAndIndex(RegisterType type, int index) {
SourceRegister reg;
if (type == RegisterType::Input)
reg.value = index;
else if (type == RegisterType::Temporary)
reg.value = index + 0x10;
else if (type == RegisterType::FloatUniform)
reg.value = index + 0x20;
else {
// TODO: Should throw an exception or something.
}
return reg;
}
static const SourceRegister MakeInput(int index) {
return FromTypeAndIndex(RegisterType::Input, index);
}
static const SourceRegister MakeTemporary(int index) {
return FromTypeAndIndex(RegisterType::Temporary, index);
}
static const SourceRegister MakeFloat(int index) {
return FromTypeAndIndex(RegisterType::FloatUniform, index);
}
std::string GetName() const {
std::stringstream ss;
ss << GetRegisterName(GetRegisterType()) << GetIndex();
return ss.str();
}
operator uint32_t() const {
return value;
}
template<typename T>
decltype(uint32_t{} - T{}) operator -(const T& oth) const {
return value - oth;
}
template<typename T>
decltype(uint32_t{} & T{}) operator &(const T& oth) const {
return value & oth;
}
uint32_t operator &(const SourceRegister& oth) const {
return value & oth.value;
}
uint32_t operator ~() const {
return ~value;
}
private:
uint32_t value;
};
struct DestRegister {
DestRegister() = default;
DestRegister(uint32_t value) {
this->value = value;
}
RegisterType GetRegisterType() const {
if (value < 0x10)
return RegisterType::Output;
else
return RegisterType::Temporary;
}
int GetIndex() const {
if (GetRegisterType() == RegisterType::Output)
return value;
else if (GetRegisterType() == RegisterType::Temporary)
return value - 0x10;
else // if (GetRegisterType() == RegisterType::FloatUniform)
// TODO: This will lead to negative returned values...
return value - 0x20;
}
static const DestRegister FromTypeAndIndex(RegisterType type, int index) {
DestRegister reg;
if (type == RegisterType::Output)
reg.value = index;
else if (type == RegisterType::Temporary)
reg.value = index + 0x10;
else if (type == RegisterType::FloatUniform) // TODO: Wait what? These shouldn't be writable..
reg.value = index + 0x20;
else {
// TODO: Should throw an exception or something.
}
return reg;
}
static const DestRegister MakeOutput(int index) {
return FromTypeAndIndex(RegisterType::Output, index);
}
static const DestRegister MakeTemporary(int index) {
return FromTypeAndIndex(RegisterType::Temporary, index);
}
std::string GetName() const {
std::stringstream ss;
ss << GetRegisterName(GetRegisterType()) << GetIndex();
return ss.str();
}
operator uint32_t() const {
return value;
}
template<typename T>
decltype(uint32_t{} - T{}) operator -(const T& oth) const {
return value - oth;
}
template<typename T>
decltype(uint32_t{} & T{}) operator &(const T& oth) const {
return value & oth;
}
uint32_t operator &(const DestRegister& oth) const {
return value & oth.value;
}
uint32_t operator ~() const {
return ~value;
}
private:
uint32_t value;
};
struct OpCode {
enum class Id : uint32_t {
ADD = 0x00,
DP3 = 0x01,
DP4 = 0x02,
DPH = 0x03, // Dot product of Vec4 and Vec3; the Vec3 is made into
// a Vec4 by appending 1.0 as the fourth component
DST = 0x04, // Distance, same as in vs_3_0
EX2 = 0x05, // Base-2 exponential
LG2 = 0x06, // Base-2 logarithm
LIT = 0x07, // Clamp for lighting
MUL = 0x08,
SGE = 0x09, // Set to 1.0 if SRC1 is greater or equal to SRC2
SLT = 0x0A, // Set to 1.0 if SRC1 is less than SRC2
FLR = 0x0B,
MAX = 0x0C,
MIN = 0x0D,
RCP = 0x0E, // Reciprocal
RSQ = 0x0F, // Reciprocal of square root
MOVA = 0x12, // Move to Address Register
MOV = 0x13,
DPHI = 0x18,
DSTI = 0x19,
SGEI = 0x1A,
SLTI = 0x1B,
BREAK = 0x20,
NOP = 0x21,
END = 0x22,
BREAKC = 0x23,
CALL = 0x24,
CALLC = 0x25,
CALLU = 0x26,
IFU = 0x27,
IFC = 0x28,
LOOP = 0x29,
EMIT = 0x2A,
SETEMIT = 0x2B,
JMPC = 0x2C,
JMPU = 0x2D,
CMP = 0x2E, // LSB opcode bit ignored
// lower 3 opcode bits ignored for these
MADI = 0x30,
MAD = 0x38, // lower 3 opcode bits ignored
// Pseudo-instructions, used internally by the assembler
PSEUDO_INSTRUCTION_START = 0x40,
GEN_IF = PSEUDO_INSTRUCTION_START, // Generic IF (IFC or IFU)
ELSE,
ENDIF,
GEN_CALL, // Generic CALL (CALL, CALC, or CALLU)
GEN_JMP, // Generic JMP (JMPC or JMPU)
//RET, // Return from function (not supported yet)
ENDLOOP,
};
enum class Type {
Trivial, // 3dbrew format 0
Arithmetic, // 3dbrew format 1
Conditional, // 3dbrew format 2
UniformFlowControl, // 3dbrew format 3
SetEmit, // 3dbrew format 4
MultiplyAdd, // 3dbrew format 5
Unknown
};
struct Info {
Type type;
// Arithmetic
enum : uint32_t {
OpDesc = 1,
Src1 = 2,
Src2 = 4,
Idx = 8,
Dest = 16,
SrcInversed = 32,
CompareOps = 64,
MOVA = 128 | OpDesc | Src1 | Idx,
OneArgument = OpDesc | Src1 | Idx | Dest,
TwoArguments = OneArgument | Src2,
Compare = OpDesc | Idx | Src1 | Src2 | CompareOps,
};
// Flow Control
enum : uint32_t {
HasUniformIndex = 1,
HasCondition = 2,
HasExplicitDest = 4, // target code given explicitly and context-independently (contrary to e.g. BREAKC)
HasFinishPoint = 8, // last instruction until returning to caller
HasAlternative = 16, // has an "else" branch
LOOP = 32,
BREAKC = HasCondition,
JMP = HasExplicitDest,
JMPC = JMP | HasCondition,
JMPU = JMP | HasUniformIndex,
CALL = JMP | HasFinishPoint,
CALLC = CALL | HasCondition,
CALLU = CALL | HasUniformIndex,
IFU = CALLU | HasAlternative,
IFC = CALLC | HasAlternative,
};
enum : uint32_t {
FullAndBool,
SimpleAndInt,
};
uint32_t subtype;
const char* name;
// TODO: Deprecate.
size_t NumArguments() const {
if (type == Type::Arithmetic) {
if (subtype & Src2)
return 3;
else if (subtype & Src1)
return 2;
}
return 0;
}
};
OpCode() = default;
OpCode(Id value) {
this->value = static_cast<uint32_t>(value);
}
OpCode(uint32_t value) {
this->value = value;
}
Id EffectiveOpCode() const {
uint32_t op = static_cast<uint32_t>(value);
if (static_cast<Id>(op & ~0x7) == Id::MAD)
return Id::MAD;
else if (static_cast<Id>(op & ~0x7) == Id::MADI)
return Id::MADI;
else if (static_cast<Id>(op & ~0x1) == Id::CMP)
return Id::CMP;
else
return static_cast<Id>(value);
}
const Info& GetInfo() const {
#define unknown_instruction { OpCode::Type::Unknown, 0, "UNK" }
static const OpCode::Info info_table[] = {
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "add" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "dp3" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "dp4" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "dph" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "dst" },
{ OpCode::Type::Arithmetic, OpCode::Info::OneArgument, "exp" },
{ OpCode::Type::Arithmetic, OpCode::Info::OneArgument, "log" },
{ OpCode::Type::Arithmetic, OpCode::Info::OneArgument, "lit" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "mul" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "sge" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "slt" },
{ OpCode::Type::Arithmetic, OpCode::Info::OneArgument, "flr" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "max" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments, "min" },
{ OpCode::Type::Arithmetic, OpCode::Info::OneArgument, "rcp" },
{ OpCode::Type::Arithmetic, OpCode::Info::OneArgument, "rsq" },
unknown_instruction,
unknown_instruction,
{ OpCode::Type::Arithmetic, OpCode::Info::MOVA, "mova" },
{ OpCode::Type::Arithmetic, OpCode::Info::OneArgument, "mov" },
unknown_instruction,
unknown_instruction,
unknown_instruction,
unknown_instruction,
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments | OpCode::Info::SrcInversed, "dphi" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments | OpCode::Info::SrcInversed, "dsti" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments | OpCode::Info::SrcInversed, "sgei" },
{ OpCode::Type::Arithmetic, OpCode::Info::TwoArguments | OpCode::Info::SrcInversed, "slti" },
unknown_instruction,
unknown_instruction,
unknown_instruction,
unknown_instruction,
{ OpCode::Type::Trivial, 0, "break" },
{ OpCode::Type::Trivial, 0, "nop" },
{ OpCode::Type::Trivial, 0, "end" },
{ OpCode::Type::Conditional, OpCode::Info::BREAKC, "breakc" },
{ OpCode::Type::Conditional, OpCode::Info::CALL, "call" },
{ OpCode::Type::Conditional, OpCode::Info::CALLC, "callc" },
{ OpCode::Type::UniformFlowControl, OpCode::Info::CALLU, "callu" },
{ OpCode::Type::UniformFlowControl, OpCode::Info::IFU, "ifu" },
{ OpCode::Type::Conditional, OpCode::Info::IFC, "ifc" },
{ OpCode::Type::UniformFlowControl, OpCode::Info::LOOP, "loop" },
{ OpCode::Type::Trivial, 0, "emit" },
{ OpCode::Type::SetEmit, 0, "setemit" },
{ OpCode::Type::Conditional, OpCode::Info::JMPC, "jmpc" },
{ OpCode::Type::Conditional, OpCode::Info::JMPU, "jmpu" },
{ OpCode::Type::Arithmetic, OpCode::Info::Compare, "cmp" },
{ OpCode::Type::Arithmetic, OpCode::Info::Compare, "cmp" },
{ OpCode::Type::MultiplyAdd, OpCode::Info::SrcInversed, "madi" },
{ OpCode::Type::MultiplyAdd, OpCode::Info::SrcInversed, "madi" },
{ OpCode::Type::MultiplyAdd, OpCode::Info::SrcInversed, "madi" },
{ OpCode::Type::MultiplyAdd, OpCode::Info::SrcInversed, "madi" },
{ OpCode::Type::MultiplyAdd, OpCode::Info::SrcInversed, "madi" },
{ OpCode::Type::MultiplyAdd, OpCode::Info::SrcInversed, "madi" },
{ OpCode::Type::MultiplyAdd, OpCode::Info::SrcInversed, "madi" },
{ OpCode::Type::MultiplyAdd, OpCode::Info::SrcInversed, "madi" },
{ OpCode::Type::MultiplyAdd, 0, "mad" },
{ OpCode::Type::MultiplyAdd, 0, "mad" },
{ OpCode::Type::MultiplyAdd, 0, "mad" },
{ OpCode::Type::MultiplyAdd, 0, "mad" },
{ OpCode::Type::MultiplyAdd, 0, "mad" },
{ OpCode::Type::MultiplyAdd, 0, "mad" },
{ OpCode::Type::MultiplyAdd, 0, "mad" },
{ OpCode::Type::MultiplyAdd, 0, "mad" }
};
#undef unknown_instruction
return info_table[value];
}
operator Id() const {
return static_cast<Id>(value);
}
OpCode operator << (size_t bits) const {
return value << bits;
}
template<typename T>
decltype(uint32_t{} - T{}) operator -(const T& oth) const {
return value - oth;
}
uint32_t operator &(const OpCode& oth) const {
return value & oth.value;
}
uint32_t operator ~() const {
return ~value;
}
private:
uint32_t value;
};
} // namespace nihstro
namespace std {
template<>
struct make_unsigned<nihstro::SourceRegister> {
using type = nihstro::SourceRegister;
};
template<>
struct make_unsigned<nihstro::DestRegister> {
using type = nihstro::DestRegister;
};
template<>
struct make_unsigned<nihstro::OpCode> {
using type = nihstro::OpCode;
};
}
namespace nihstro {
#pragma pack(1)
union Instruction {
Instruction& operator =(const Instruction& instr) {
hex = instr.hex;
return *this;
}
uint32_t hex;
BitField<0x1a, 0x6, OpCode> opcode;
// General notes:
//
// When two input registers are used, one of them uses a 5-bit index while the other
// one uses a 7-bit index. This is because at most one floating point uniform may be used
// as an input.
// Format used e.g. by arithmetic instructions and comparisons
union Common { // TODO: Remove name
BitField<0x00, 0x7, uint32_t> operand_desc_id;
const SourceRegister GetSrc1(bool is_inverted) const {
if (!is_inverted) {
return src1;
} else {
return src1i;
}
}
const SourceRegister GetSrc2(bool is_inverted) const {
if (!is_inverted) {
return src2;
} else {
return src2i;
}
}
/**
* Source inputs may be reordered for certain instructions.
* Use GetSrc1 and GetSrc2 instead to access the input register indices hence.
*/
BitField<0x07, 0x5, SourceRegister> src2;
BitField<0x0c, 0x7, SourceRegister> src1;
BitField<0x07, 0x7, SourceRegister> src2i;
BitField<0x0e, 0x5, SourceRegister> src1i;
// Address register value is used for relative addressing of src1 / src2 (inverted)
BitField<0x13, 0x2, uint32_t> address_register_index;
union CompareOpType { // TODO: Make nameless once MSVC supports it
enum Op : uint32_t {
Equal = 0,
NotEqual = 1,
LessThan = 2,
LessEqual = 3,
GreaterThan = 4,
GreaterEqual = 5,
Unk6 = 6,
Unk7 = 7
};
BitField<0x15, 0x3, Op> y;
BitField<0x18, 0x3, Op> x;
const std::string ToString(Op op) const {
switch (op) {
case Equal: return "==";
case NotEqual: return "!=";
case LessThan: return "<";
case LessEqual: return "<=";
case GreaterThan: return ">";
case GreaterEqual: return ">=";
case Unk6: return "UNK6";
case Unk7: return "UNK7";
default: return "";
};
}
} compare_op;
std::string AddressRegisterName() const {
if (address_register_index == 0) return "";
else if (address_register_index == 1) return "a0.x";
else if (address_register_index == 2) return "a0.y";
else /*if (address_register_index == 3)*/ return "aL";
}
BitField<0x15, 0x5, DestRegister> dest;
} common;
union FlowControlType { // TODO: Make nameless once MSVC supports it
enum Op : uint32_t {
Or = 0,
And = 1,
JustX = 2,
JustY = 3
};
BitField<0x00, 0x8, uint32_t> num_instructions;
BitField<0x0a, 0xc, uint32_t> dest_offset;
BitField<0x16, 0x2, Op> op;
BitField<0x16, 0x4, uint32_t> bool_uniform_id;
BitField<0x16, 0x2, uint32_t> int_uniform_id; // TODO: Verify that only this many bits are used...
BitFlag<0x18, uint32_t> refy;
BitFlag<0x19, uint32_t> refx;
} flow_control;
union {
const SourceRegister GetSrc1(bool is_inverted) const {
// The inverted form for src1 is the same, this function is just here for consistency
return src1;
}
const SourceRegister GetSrc2(bool is_inverted) const {
if (!is_inverted) {
return src2;
} else {
return src2i;
}
}
const SourceRegister GetSrc3(bool is_inverted) const {
if (!is_inverted) {
return src3;
} else {
return src3i;
}
}
BitField<0x00, 0x5, uint32_t> operand_desc_id;
BitField<0x05, 0x5, SourceRegister> src3;
BitField<0x0a, 0x7, SourceRegister> src2;
BitField<0x11, 0x5, SourceRegister> src1;
BitField<0x05, 0x7, SourceRegister> src3i;
BitField<0x0c, 0x5, SourceRegister> src2i;
// Address register value is used for relative addressing of src2 / src3 (inverted)
BitField<0x16, 0x2, uint32_t> address_register_index;
std::string AddressRegisterName() const {
if (address_register_index == 0) return "";
else if (address_register_index == 1) return "a0.x";
else if (address_register_index == 2) return "a0.y";
else /*if (address_register_index == 3)*/ return "aL";
}
BitField<0x18, 0x5, DestRegister> dest;
} mad;
union {
BitField<0x16, 1, uint32_t> winding;
BitField<0x17, 1, uint32_t> prim_emit;
BitField<0x18, 2, uint32_t> vertex_id;
} setemit;
};
static_assert(sizeof(Instruction) == 0x4, "Incorrect structure size");
static_assert(std::is_standard_layout<Instruction>::value, "Structure does not have standard layout");
union SwizzlePattern {
SwizzlePattern& operator =(const SwizzlePattern& instr) {
hex = instr.hex;
return *this;
}
uint32_t hex;
enum class Selector : uint32_t {
x = 0,
y = 1,
z = 2,
w = 3
};
/**
* Gets the raw 8-bit selector for the specified (1-indexed) source register.
*/
unsigned GetRawSelector(unsigned src) const {
if (src == 0 || src > 3)
throw std::out_of_range("src needs to be between 1 and 3");
unsigned selectors[] = {
src1_selector, src2_selector, src3_selector
};
return selectors[src - 1];
}
Selector GetSelectorSrc1(int comp) const {
Selector selectors[] = {
src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3
};
return selectors[comp];
}
Selector GetSelectorSrc2(int comp) const {
Selector selectors[] = {
src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3
};
return selectors[comp];
}
Selector GetSelectorSrc3(int comp) const {
Selector selectors[] = {
src3_selector_0, src3_selector_1, src3_selector_2, src3_selector_3
};
return selectors[comp];
}
void SetSelectorSrc1(int comp, Selector value) {
if (comp == 0)
src1_selector_0 = value;
else if (comp == 1)
src1_selector_1 = value;
else if (comp == 2)
src1_selector_2 = value;
else if (comp == 3)
src1_selector_3 = value;
else
throw std::out_of_range("comp needs to be smaller than 4");
}
void SetSelectorSrc2(int comp, Selector value) {
if (comp == 0)
src2_selector_0 = value;
else if (comp == 1)
src2_selector_1 = value;
else if (comp == 2)
src2_selector_2 = value;
else if (comp == 3)
src2_selector_3 = value;
else
throw std::out_of_range("comp needs to be smaller than 4");
}
void SetSelectorSrc3(int comp, Selector value) {
if (comp == 0)
src3_selector_0 = value;
else if (comp == 1)
src3_selector_1 = value;
else if (comp == 2)
src3_selector_2 = value;
else if (comp == 3)
src3_selector_3 = value;
else
throw std::out_of_range("comp needs to be smaller than 4");
}
std::string SelectorToString(bool src2) const {
std::map<Selector, std::string> map = {
{ Selector::x, "x" },
{ Selector::y, "y" },
{ Selector::z, "z" },
{ Selector::w, "w" }
};
std::string ret;
for (int i = 0; i < 4; ++i) {
ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i));
}
return ret;
}
bool DestComponentEnabled(unsigned int i) const {
return (dest_mask & (0x8 >> i)) != 0;
}
void SetDestComponentEnabled(unsigned int i, bool enabled) {
int mask = 0xffff & (0x8 >> i);
dest_mask = (dest_mask & ~mask) | (enabled * mask);
}
std::string DestMaskToString() const {
std::string ret;
for (int i = 0; i < 4; ++i) {
if (!DestComponentEnabled(i))
ret += "_";
else
ret += "xyzw"[i];
}
return ret;
}
// Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
BitField< 0, 4, uint32_t> dest_mask;
BitFlag < 4, uint32_t> negate_src1;
BitField< 5, 8, uint32_t> src1_selector;
BitField< 5, 2, Selector> src1_selector_3;
BitField< 7, 2, Selector> src1_selector_2;
BitField< 9, 2, Selector> src1_selector_1;
BitField<11, 2, Selector> src1_selector_0;
BitFlag <13, uint32_t> negate_src2;
BitField<14, 8, uint32_t> src2_selector;
BitField<14, 2, Selector> src2_selector_3;
BitField<16, 2, Selector> src2_selector_2;
BitField<18, 2, Selector> src2_selector_1;
BitField<20, 2, Selector> src2_selector_0;
BitFlag <22, uint32_t> negate_src3;
BitField<23, 8, uint32_t> src3_selector;
BitField<23, 2, Selector> src3_selector_3;
BitField<25, 2, Selector> src3_selector_2;
BitField<27, 2, Selector> src3_selector_1;
BitField<29, 2, Selector> src3_selector_0;
};
static_assert(sizeof(SwizzlePattern) == 0x4, "Incorrect structure size");
#pragma pack()
} // namespace

View File

@@ -0,0 +1,341 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <list>
#include <string>
#include <boost/optional.hpp>
#include <boost/variant/recursive_wrapper.hpp>
namespace nihstro {
struct SourceTreeIterator;
struct Node;
// SequenceContainer
struct SourceTree {
SourceTree() = default;
SourceTree(const SourceTree& oth);
std::string code;
struct {
std::string filename;
} file_info;
SourceTree* parent = nullptr;
// ordered with respect to "position"
std::list<Node> children;
SourceTreeIterator begin();
SourceTreeIterator end();
// Attach the given tree, changing the child's owner to *this.
SourceTree& Attach(SourceTree tree, std::string::difference_type offset);
};
struct Node {
SourceTree tree;
std::string::difference_type offset_within_parent; // within "code"
};
inline SourceTree::SourceTree(const SourceTree& oth) : code(oth.code), file_info(oth.file_info), parent(oth.parent), children(oth.children) {
for (auto& child : children)
child.tree.parent = this;
}
inline SourceTree& SourceTree::Attach(SourceTree tree, std::string::difference_type offset) {
tree.parent = this;
children.push_back(Node{tree, offset});
return *this;
}
// RandomAccessIterator
struct SourceTreeIterator {
using difference_type = std::string::iterator::difference_type;
using reference = std::string::iterator::reference;
using value_type = std::string::iterator::value_type;
using pointer = std::string::iterator::pointer;
using iterator_category = std::random_access_iterator_tag;
SourceTreeIterator() {
}
SourceTreeIterator(SourceTree& tree) : tree(&tree), position(tree.code.begin()), node_iterator(tree.children.begin()) {
UpdateChildIterator();
}
SourceTreeIterator(const SourceTreeIterator&) = default;
SourceTreeIterator& operator += (difference_type n) {
if (n > 0) {
while (n) {
if (child_iterator) {
auto remaining_to_child = node_iterator->offset_within_parent - (position - tree->code.begin());
if (remaining_to_child >= n) {
// If the next child is more than n steps away, increase position by n and return
// TODO: Should we make sure that we don't end up out-of-bounds here?
position += n;
UpdateNodeIterator();
break;
} else {
// Otherwise, move current position to the child if it isn't there already
position += remaining_to_child;
n -= remaining_to_child;
UpdateNodeIterator();
}
if (child_iterator->get().StepsRemaining() > n) {
// If child is larger than n, advance child by n and return
child_iterator->get() += n;
break;
} else {
// else step out of the child and increment next child iterator by one
n -= child_iterator->get().StepsRemaining();
if (node_iterator != tree->children.end())
node_iterator++;
UpdateChildIterator();
}
} else {
// TODO: Should we make sure that we don't end up out-of-bounds here?
position += n;
UpdateNodeIterator();
break;
}
}
} else if (n < 0) {
// Reduce to n>0 case by starting from begin()
n = (*this - tree->begin()) + n;
*this = tree->begin() + n;
}
return *this;
}
SourceTreeIterator& operator -= (difference_type n) {
*this += -n;
return *this;
}
difference_type operator -(SourceTreeIterator it) const {
return this->StepsGone() - it.StepsGone();
}
bool operator < (const SourceTreeIterator& it) const {
return std::distance(*this, it) > 0;
}
bool operator <= (const SourceTreeIterator& it) const {
return std::distance(*this, it) >= 0;
}
bool operator > (const SourceTreeIterator& it) const {
return !(*this <= it);
}
bool operator >= (const SourceTreeIterator& it) const {
return !(*this < it);
}
bool operator == (const SourceTreeIterator& it) const {
return (*this <= it) && !(*this < it);
}
bool operator != (const SourceTreeIterator& it) const {
return !(*this == it);
}
reference operator* () {
return (*this)[0];
}
SourceTreeIterator operator++ () {
*this += 1;
return *this;
}
SourceTreeIterator operator++ (int) {
auto it = *this;
*this += 1;
return it;
}
SourceTreeIterator operator +(difference_type n) const {
SourceTreeIterator it2 = *this;
it2 += n;
return it2;
}
SourceTreeIterator operator -(SourceTreeIterator::difference_type n) const {
return *this + (-n);
}
reference operator [] (difference_type n) {
auto it = (*this + n);
if (it.WithinChild())
return it.child_iterator->get()[0];
else return *it.position;
}
// Get line number (one-based) within "tree"
unsigned GetLineNumber() const {
// Adding one for natural (i.e. one-based) line numbers
return std::count(tree->code.begin(), position, '\n') + 1;
}
// Get line number (one-based) within the tree of the current child
unsigned GetCurrentLineNumber() const {
if (WithinChild())
return child_iterator->get().GetCurrentLineNumber();
return GetLineNumber();
}
const std::string GetCurrentFilename() const {
if (WithinChild())
return child_iterator->get().GetCurrentFilename();
return tree->file_info.filename;
}
SourceTreeIterator GetParentIterator(const SourceTree* reference_tree) const {
if (tree == reference_tree) {
return *this;
} else {
return child_iterator->get().GetParentIterator(reference_tree);
}
}
SourceTree* GetCurrentTree() {
if (WithinChild())
return child_iterator->get().GetCurrentTree();
else
return tree;
}
private:
difference_type StepsRemaining() const {
return std::distance(*this, tree->end());
}
difference_type StepsGone() const {
auto it = tree->begin();
difference_type diff = 0;
// Advance reference iterator starting from the beginning until we reach *this,
// making sure that both the main position and the child iterator match.
while (it.position != position ||
((bool)it.child_iterator ^ (bool)child_iterator) ||
(it.child_iterator && child_iterator && it.child_iterator->get() != child_iterator->get())) {
// Move to next child (if there is one), or abort if we reach the reference position
if (it.child_iterator) {
auto distance_to_child = std::min(it.node_iterator->offset_within_parent - (it.position -it.tree->code.begin() ), position - it.position);
// Move to child or this->position
diff += distance_to_child;
it.position += distance_to_child;
if (it.position - it.tree->code.begin() == it.node_iterator->offset_within_parent) {
if (node_iterator != tree->children.end() && it.node_iterator == node_iterator) {
return diff + (child_iterator->get() - it.child_iterator->get());
} else {
// Move out of child
diff += it.child_iterator->get().StepsRemaining();
}
} else {
// We moved to this->position => done
return diff;
}
// Move to next child
if (it.node_iterator != it.tree->children.end()) {
it.node_iterator++;
it.UpdateChildIterator();
}
} else {
// no child remaining, hence just move to the given position
return diff + (position - it.position);
}
}
return diff;
}
bool WithinChild() const {
return child_iterator && position - tree->code.begin() == node_iterator->offset_within_parent;
}
void UpdateChildIterator() {
if (node_iterator != tree->children.end())
child_iterator = boost::recursive_wrapper<SourceTreeIterator>(node_iterator->tree);
else
child_iterator = boost::none;
}
void UpdateNodeIterator() {
// Move to the first node which is at the cursor or behind it
while (node_iterator != tree->children.end() && node_iterator->offset_within_parent < std::distance(tree->code.begin(), position)) {
node_iterator++;
UpdateChildIterator();
}
}
SourceTree* tree;
std::string::iterator position;
boost::optional<boost::recursive_wrapper<SourceTreeIterator>> child_iterator; // points to current or next child
std::list<Node>::iterator node_iterator; // points to current or next node
friend struct SourceTree;
};
inline SourceTreeIterator operator +(SourceTreeIterator::difference_type n, const SourceTreeIterator& it) {
return it + n;
}
inline SourceTreeIterator operator -(SourceTreeIterator::difference_type n, const SourceTreeIterator& it) {
return it - n;
}
inline SourceTreeIterator SourceTree::begin() {
return SourceTreeIterator(*this);
}
inline SourceTreeIterator SourceTree::end() {
auto it = SourceTreeIterator(*this);
it.position = code.end();
it.node_iterator = children.end();
it.child_iterator = boost::none;
return it;
}
} // namespace

28
externals/nihstro/license.txt vendored Normal file
View File

@@ -0,0 +1,28 @@
Copyright 2014 Tony Wasserka
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the owner nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

1481
externals/nihstro/src/assembler.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

316
externals/nihstro/src/disassembler.cpp vendored Normal file
View File

@@ -0,0 +1,316 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <vector>
#include <map>
#include <stdint.h>
#include "nihstro/bit_field.h"
#include "nihstro/shader_bytecode.h"
#include "nihstro/parser_shbin.h"
using namespace nihstro;
struct float24 {
static float24 FromFloat32(float val) {
float24 ret;
ret.value = val;
return ret;
}
// 16 bit mantissa, 7 bit exponent, 1 bit sign
// TODO: No idea if this works as intended
static float24 FromRawFloat24(uint32_t hex) {
float24 ret;
if ((hex & 0xFFFFFF) == 0) {
ret.value = 0;
} else {
uint32_t mantissa = hex & 0xFFFF;
uint32_t exponent = (hex >> 16) & 0x7F;
uint32_t sign = hex >> 23;
ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f));
if (sign)
ret.value = -ret.value;
}
return ret;
}
// Not recommended for anything but logging
float ToFloat32() const {
return value;
}
private:
// Stored as a regular float, merely for convenience
// TODO: Perform proper arithmetic on this!
float value;
};
int main(int argc, char *argv[])
{
// TODO: Make this check portable!
if (argc < 2) {
std::cout << "Error: No filename given" << std::endl;
return 0;
}
ShbinParser parser;
try {
parser.ReadHeaders(argv[1]);
for (int i = 0; i < parser.GetDVLBHeader().num_programs; ++i) {
auto& filename = parser.GetFilename(i);
std::cout << "DVLE " << std::setw(3) << std::dec << i << ": "
// << "offset: 0x" << std::hex << std::setfill('0') << std::setw(4) << offset << ", "
<< std::hex << std::setfill('0')
<< "byte offset to main: 0x" << std::hex << std::setw(8) << 4 * parser.GetDVLEHeader(i).main_offset_words << "\", "
<< "original filename \"" << filename << "\""
<< std::setfill(' ') << std::dec << std::endl;
}
std::cout << "Got " << parser.GetDVLBHeader().num_programs << " DVLE headers" << std::endl;
if (argc < 3) {
std::cout << "Error: No DVLE index given" << std::endl;
return 0;
}
uint32_t dvle_index = std::stoi(std::string(argv[2]));
parser.ReadDVLE(dvle_index);
auto& dvle_header = parser.dvle_headers[dvle_index];
for (int i = 0; i < parser.shader_info.constant_table.size(); ++i) {
auto& info = parser.shader_info.constant_table[i];
switch (info.type) {
case ConstantInfo::Float:
std::cout << "Constant register info: " << GetRegisterName(RegisterType::FloatUniform) << info.regid.Value()
<< " = (" << float24::FromRawFloat24(info.f.x).ToFloat32() << ", " << float24::FromRawFloat24(info.f.y).ToFloat32()
<< ", " << float24::FromRawFloat24(info.f.z).ToFloat32() << ", " << float24::FromRawFloat24(info.f.w).ToFloat32() << ")"
<< " (raw: 0x" << std::hex << std::setfill('0') << std::setw(8) << info.full_first_word
<< " 0x" << std::setw(8) << info.f.x << " 0x" << std::setw(8) << info.f.y
<< " 0x" << std::setw(8) << info.f.z << " 0x" << std::setw(8) << info.f.w << std::dec << std::setfill( ' ') << ")"
<< std::endl;
break;
case ConstantInfo::Int:
std::cout << "Constant register info: " << GetRegisterName(RegisterType::IntUniform) << info.regid.Value()
<< " = (" << (int)info.i.x << ", " << (int)info.i.y
<< ", " << (int)info.i.z << ", " << (int)info.i.w << ")"
<< " (raw: 0x" << std::hex << std::setfill('0') << std::setw(8) << info.full_first_word
<< std::dec << std::setfill( ' ') << ")"
<< std::endl;
break;
case ConstantInfo::Bool:
std::cout << "Constant register info: " << GetRegisterName(RegisterType::BoolUniform) << info.regid.Value()
<< " = " << std::boolalpha << (bool)info.b
<< " (raw: 0x" << std::hex << std::setfill('0') << std::setw(8) << info.full_first_word
<< std::dec << std::setfill( ' ') << ")"
<< std::endl;
break;
default:
{
std::stringstream str("Unknown constant type: ");
str << std::hex << info.type.Value();
throw str.str();
}
}
}
for (int i = 0; i < parser.shader_info.label_table.size(); ++i) {
const auto& label_info = parser.shader_info.label_table[i];
std::cout << "Found label \"" << parser.shader_info.labels[label_info.program_offset]
<< "\" at program offset 0x" << std::hex << 4 * label_info.program_offset
<< std::endl;
}
for (auto& info : parser.shader_info.output_register_info)
std::cout << "Output register info: " << GetRegisterName(RegisterType::Output) << info.id.Value() << '.' << std::setw(4) << std::left << info.GetMask()
<< " as " << std::setw(8) << info.GetSemanticName()
<< " (" << std::hex << std::setw(16) << std::setfill('0') << (uint64_t)info.hex << std::setfill(' ') << ")" << std::endl;
if (!parser.shader_info.uniform_table.empty()) {
size_t max_uniform_name_length = std::max_element(parser.shader_info.uniform_table.begin(), parser.shader_info.uniform_table.end(),
[](const UniformInfo& i1, UniformInfo& i2) { return i1.name.length() < i2.name.length(); }
)->name.length();
for (auto& uniform_info : parser.shader_info.uniform_table) {
bool is_range = (uniform_info.basic.reg_start != uniform_info.basic.reg_end);
std::cout << "Found uniform symbol \"" << std::setw(max_uniform_name_length) << uniform_info.name
<< "\" for register" << (is_range ? "s " : " ") << std::dec
<< GetRegisterName(uniform_info.basic.GetStartType()) << uniform_info.basic.GetStartIndex();
if (is_range)
std::cout << "-" << GetRegisterName(uniform_info.basic.GetEndType()) << uniform_info.basic.GetEndIndex();
std::cout << std::endl;
}
}
// TODO:
// std::cout << "Disassembling " << parser.GetDVLPHeader().binary_size_words << " bytes from offset "
// << dvlp_offset << " + " << dvlp_header.binary_offset << " = " << main_offset << " (main at byte offset " << "0x" << std::hex << 4 * dvle_header.main_offset_words << ")" << std::endl;
} catch (const std::string& err) {
std::cout << "Exception while reading \"" << argv[1] << "\": " << err << std::endl;
return 1;
} catch (const std::ios_base::failure& except) {
std::cout << "Exception while reading \"" << argv[1] << "\": ios_base::failure \"" << except.what() << "\" (invalid shbin?)" << std::endl;
return 1;
} catch (const std::bad_alloc&) {
std::cout << "Exception while reading \"" << argv[1] << "\": bad_alloc (invalid shbin?)" << std::endl;
return 1;
}
const ShaderInfo& shader_info = parser.shader_info;
for (uint32_t word = 0; word < shader_info.code.size(); ++word) {
std::cout.flags(std::ios::left | std::ios::hex);
if (shader_info.HasLabel(word)) {
std::cout << std::setw(8) << std::right << std::setfill('0') << 4*word
<< " [--------] " << shader_info.GetLabel(word) << ":" << std::endl;
}
Instruction instr = shader_info.code[word];
OpCode opcode = instr.opcode.Value();
std::cout << std::setw(8) << std::right << std::setfill('0') << 4*word << " "
<< "[" << std::setw(8) << std::right << std::setfill('0') << instr.hex << "] "
<< std::setw(7) << std::left << std::setfill(' ') << opcode.GetInfo().name;
const SwizzlePattern& swizzle = shader_info.swizzle_info[instr.common.operand_desc_id].pattern;
// TODO: Not sure if name lookup works properly, yet!
if (opcode.GetInfo().type == OpCode::Type::Arithmetic) {
bool src_reversed = 0 != (opcode.GetInfo().subtype & OpCode::Info::SrcInversed);
auto src1 = instr.common.GetSrc1(src_reversed);
auto src2 = instr.common.GetSrc2(src_reversed);
auto dest = instr.common.dest.Value();
std::string src1_relative_address;
if (!instr.common.AddressRegisterName().empty())
src1_relative_address = "[" + instr.common.AddressRegisterName() + "]";
if (opcode.GetInfo().subtype & OpCode::Info::Dest) {
std::cout << std::setw(4) << std::right << dest.GetName() << "." << swizzle.DestMaskToString() << " ";
} else {
std::cout << " ";
}
if (opcode.GetInfo().subtype & OpCode::Info::Src1) {
std::cout << std::setw(8) << std::right << ((swizzle.negate_src1 ? "-" : "") + src1.GetName()) + src1_relative_address << "." << swizzle.SelectorToString(false) << " ";
} else {
std::cout << " ";
}
if (opcode.GetInfo().subtype & OpCode::Info::CompareOps) {
std::cout << instr.common.compare_op.ToString(instr.common.compare_op.x) << " " << instr.common.compare_op.ToString(instr.common.compare_op.y) << " ";
} else {
}
if (opcode.GetInfo().subtype & OpCode::Info::Src2) {
std::cout << std::setw(4) << std::right << (swizzle.negate_src2 ? "-" : "") + src2.GetName() << "." << swizzle.SelectorToString(true) << " ";
} else {
std::cout << " ";
}
std::cout << std::setw(2) << instr.common.operand_desc_id.Value() << " addr:" << instr.common.address_register_index.Value()
<< "; " << shader_info.LookupDestName(dest, swizzle) << " <- " << (swizzle.negate_src1 ? "-" : "") + shader_info.LookupSourceName(src1, instr.common.address_register_index);
if (opcode.GetInfo().subtype & OpCode::Info::Src2)
std::cout << ", " << (swizzle.negate_src2 ? "-" : "") + shader_info.LookupSourceName(src2, 0);
std::cout << std::endl;
} else if (opcode.GetInfo().type == OpCode::Type::Conditional) {
std::cout << "if ";
if (opcode.GetInfo().subtype & OpCode::Info::HasCondition) {
const char* ops[] = {
" || ", " && ", "", ""
};
if (instr.flow_control.op != instr.flow_control.JustY)
std::cout << ((!instr.flow_control.refx) ? "!" : " ") << "cc.x";
std::cout << ops[instr.flow_control.op];
if (instr.flow_control.op != instr.flow_control.JustX)
std::cout << ((!instr.flow_control.refy) ? "!" : " ") << "cc.y";
std::cout << " ";
} else if (opcode.GetInfo().subtype & OpCode::Info::HasUniformIndex) {
std::cout << "b" << instr.flow_control.bool_uniform_id << " ";
}
uint32_t target_addr = instr.flow_control.dest_offset;
uint32_t target_addr_else = instr.flow_control.dest_offset;
if (opcode.GetInfo().subtype & OpCode::Info::HasAlternative) {
std::cout << "else jump to 0x" << std::setw(4) << std::right << std::setfill('0') << 4 * instr.flow_control.dest_offset
<< " aka \"" << shader_info.GetLabel(instr.flow_control.dest_offset) << "\"";
} else if (opcode.GetInfo().subtype & OpCode::Info::HasExplicitDest) {
std::cout << "jump to 0x" << std::setw(4) << std::right << std::setfill('0') << 4 * instr.flow_control.dest_offset
<< " aka \"" << shader_info.GetLabel(instr.flow_control.dest_offset) << "\"";
} else {
// TODO: Handle other cases
}
if (opcode.GetInfo().subtype & OpCode::Info::HasFinishPoint) {
std::cout << "(return on " << std::setw(4) << std::right << std::setfill('0') << 4 * instr.flow_control.dest_offset + 4 * instr.flow_control.num_instructions << "\")";
}
std::cout << std::endl;
} else {
std::cout << std::endl;
}
}
std::cout << std::endl << "Swizzle patterns:" << std::endl;
for (int i = 0; i < shader_info.swizzle_info.size(); ++i) {
const auto& info = shader_info.swizzle_info[i];
const auto& pattern = info.pattern;
std::cout << "(" << std::setw(3) << std::right << std::hex << i << ") " << std::setw(8) << std::setfill('0') << pattern.hex << ": " << pattern.dest_mask.Value() << " " <<
" " << (int)pattern.negate_src1 << " " <<
" " << (int)pattern.src1_selector_3.Value() << " " << (int)pattern.src1_selector_2.Value() <<
" " << (int)pattern.src1_selector_1.Value() << " " << (int)pattern.src1_selector_0.Value() << " " <<
" " << (int)pattern.negate_src2 << " " <<
" " << (int)pattern.src2_selector_3.Value() << " " << (int)pattern.src2_selector_2.Value() <<
" " << (int)pattern.src2_selector_1.Value() << " " << (int)pattern.src2_selector_0.Value() << " " <<
" " << (int)pattern.negate_src3 << " " <<
" " << (int)pattern.src3_selector_3.Value() << " " << (int)pattern.src3_selector_2.Value() <<
" " << (int)pattern.src3_selector_1.Value() << " " << (int)pattern.src3_selector_0.Value() << " " <<
" " << std::setw(8) << std::setfill('0') << info.unknown << std::setfill(' ') << std::endl;
}
return 0;
}

View File

@@ -0,0 +1,277 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Enable this for detailed XML overview of parser results
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include "nihstro/parser_assembly.h"
#include "nihstro/parser_assembly_private.h"
#include "nihstro/shader_binary.h"
#include "nihstro/shader_bytecode.h"
namespace spirit = boost::spirit;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::qi::ascii;
namespace phoenix = boost::phoenix;
using spirit::_1;
using spirit::_2;
using spirit::_3;
using spirit::_4;
using namespace nihstro;
// Adapt parser data structures for use with boost::spirit
BOOST_FUSION_ADAPT_STRUCT(
SetEmitInstruction::Flags,
(boost::optional<bool>, primitive_flag)
(boost::optional<bool>, invert_flag)
)
BOOST_FUSION_ADAPT_STRUCT(
SetEmitInstruction,
(OpCode, opcode)
(unsigned, vertex_id)
(SetEmitInstruction::Flags, flags)
)
phoenix::function<ErrorHandler> error_handler;
template<typename Iterator, bool require_end_of_line>
TrivialOpParser<Iterator, require_end_of_line>::TrivialOpParser(const ParserContext& context)
: TrivialOpParser::base_type(trivial_instruction),
common(context),
opcodes_trivial(common.opcodes_trivial),
opcodes_compare(common.opcodes_compare),
opcodes_float(common.opcodes_float),
opcodes_flowcontrol(common.opcodes_flowcontrol),
end_of_statement(common.end_of_statement),
diagnostics(common.diagnostics) {
// Setup rules
if (require_end_of_line) {
opcode = qi::no_case[qi::lexeme[opcodes_trivial >> &ascii::space]];
trivial_instruction = opcode > end_of_statement;
} else {
opcode = qi::no_case[qi::lexeme[opcodes_trivial | opcodes_compare | opcodes_float[0]
| opcodes_float[1] | opcodes_float[2] | opcodes_float[3]
| opcodes_flowcontrol[0] | opcodes_flowcontrol[1] >> &ascii::space]];
trivial_instruction = opcode;
}
// Error handling
BOOST_SPIRIT_DEBUG_NODE(opcode);
BOOST_SPIRIT_DEBUG_NODE(trivial_instruction);
qi::on_error<qi::fail>(trivial_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
}
template<typename Iterator>
SetEmitParser<Iterator>::SetEmitParser(const ParserContext& context)
: SetEmitParser::base_type(setemit_instruction),
common(context),
opcodes_setemit(common.opcodes_setemit),
end_of_statement(common.end_of_statement),
diagnostics(common.diagnostics) {
// Setup rules
auto comma_rule = qi::lit(',');
opcode = qi::lexeme[qi::no_case[opcodes_setemit] >> &ascii::space];
vertex_id = qi::uint_;
prim_flag = qi::lit("prim") >> &(!ascii::alnum) >> qi::attr(true);
inv_flag = qi::lit("inv") >> &(!ascii::alnum) >> qi::attr(true);
flags = ((comma_rule >> prim_flag) ^ (comma_rule >> inv_flag));
setemit_instruction = ((opcode >> vertex_id) >> (flags | qi::attr(SetEmitInstruction::Flags{}))) > end_of_statement;
// Error handling
BOOST_SPIRIT_DEBUG_NODE(opcode);
BOOST_SPIRIT_DEBUG_NODE(vertex_id);
BOOST_SPIRIT_DEBUG_NODE(prim_flag);
BOOST_SPIRIT_DEBUG_NODE(inv_flag);
BOOST_SPIRIT_DEBUG_NODE(flags);
BOOST_SPIRIT_DEBUG_NODE(setemit_instruction);
qi::on_error<qi::fail>(setemit_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
}
template<typename Iterator>
LabelParser<Iterator>::LabelParser(const ParserContext& context)
: LabelParser::base_type(label), common(context),
end_of_statement(common.end_of_statement),
identifier(common.identifier),
diagnostics(common.diagnostics) {
label = identifier >> qi::lit(':') > end_of_statement;
BOOST_SPIRIT_DEBUG_NODE(label);
qi::on_error<qi::fail>(label, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
}
template struct LabelParser<ParserIterator>;
struct Parser::ParserImpl {
using Iterator = SourceTreeIterator;
ParserImpl(const ParserContext& context) : label(context), plain_instruction(context),
simple_instruction(context), instruction(context),
compare(context), flow_control(context),
setemit(context), declaration(context) {
}
unsigned Skip(Iterator& begin, Iterator end) {
unsigned lines_skipped = 0;
do {
parse(begin, end, skipper);
lines_skipped++;
} while (boost::spirit::qi::parse(begin, end, boost::spirit::qi::eol));
return --lines_skipped;
}
void SkipSingleLine(Iterator& begin, Iterator end) {
qi::parse(begin, end, *(qi::char_ - (qi::eol | qi::eoi)) >> (qi::eol | qi::eoi));
}
bool ParseLabel(Iterator& begin, Iterator end, StatementLabel* content) {
assert(content != nullptr);
return phrase_parse(begin, end, label, skipper, *content);
}
bool ParseOpCode(Iterator& begin, Iterator end, OpCode* content) {
assert(content != nullptr);
return phrase_parse(begin, end, plain_instruction, skipper, *content);
}
bool ParseSimpleInstruction(Iterator& begin, Iterator end, OpCode* content) {
assert(content != nullptr);
return phrase_parse(begin, end, simple_instruction, skipper, *content);
}
bool ParseFloatOp(Iterator& begin, Iterator end, FloatOpInstruction* content) {
assert(content != nullptr);
return phrase_parse(begin, end, instruction, skipper, *content);
}
bool ParseCompare(Iterator& begin, Iterator end, CompareInstruction* content) {
assert(content != nullptr);
return phrase_parse(begin, end, compare, skipper, *content);
}
bool ParseFlowControl(Iterator& begin, Iterator end, FlowControlInstruction* content) {
assert(content != nullptr);
return phrase_parse(begin, end, flow_control, skipper, *content);
}
bool ParseSetEmit(Iterator& begin, Iterator end, SetEmitInstruction* content) {
assert(content != nullptr);
return phrase_parse(begin, end, setemit, skipper, *content);
}
bool ParseDeclaration(Iterator& begin, Iterator end, StatementDeclaration* content) {
assert(content != nullptr);
return phrase_parse(begin, end, declaration, skipper, *content);
}
private:
AssemblySkipper<Iterator> skipper;
LabelParser<Iterator> label;
TrivialOpParser<Iterator, false> plain_instruction;
TrivialOpParser<Iterator, true> simple_instruction;
FloatOpParser<Iterator> instruction;
CompareParser<Iterator> compare;
FlowControlParser<Iterator> flow_control;
SetEmitParser<Iterator> setemit;
DeclarationParser<Iterator> declaration;
};
Parser::Parser(const ParserContext& context) : impl(new ParserImpl(context)) {
};
Parser::~Parser() {
}
unsigned Parser::Skip(Iterator& begin, Iterator end) {
return impl->Skip(begin, end);
}
void Parser::SkipSingleLine(Iterator& begin, Iterator end) {
impl->SkipSingleLine(begin, end);
}
bool Parser::ParseLabel(Iterator& begin, Iterator end, StatementLabel* label) {
return impl->ParseLabel(begin, end, label);
}
bool Parser::ParseOpCode(Iterator& begin, Iterator end, OpCode* opcode) {
return impl->ParseOpCode(begin, end, opcode);
}
bool Parser::ParseSimpleInstruction(Iterator& begin, Iterator end, OpCode* opcode) {
return impl->ParseSimpleInstruction(begin, end, opcode);
}
bool Parser::ParseFloatOp(Iterator& begin, Iterator end, FloatOpInstruction* instruction) {
return impl->ParseFloatOp(begin, end, instruction);
}
bool Parser::ParseCompare(Iterator& begin, Iterator end, CompareInstruction* content) {
return impl->ParseCompare(begin, end, content);
}
bool Parser::ParseFlowControl(Iterator& begin, Iterator end, FlowControlInstruction* content) {
return impl->ParseFlowControl(begin, end, content);
}
bool Parser::ParseSetEmit(Iterator& begin, Iterator end, SetEmitInstruction* content) {
return impl->ParseSetEmit(begin, end, content);
}
bool Parser::ParseDeclaration(Iterator& begin, Iterator end, StatementDeclaration* declaration) {
return impl->ParseDeclaration(begin, end, declaration);
}

View File

@@ -0,0 +1,185 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Enable this for detailed XML overview of parser results
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include "nihstro/parser_assembly.h"
#include "nihstro/parser_assembly_private.h"
#include "nihstro/shader_binary.h"
#include "nihstro/shader_bytecode.h"
namespace spirit = boost::spirit;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::qi::ascii;
namespace phoenix = boost::phoenix;
using spirit::_1;
using spirit::_2;
using spirit::_3;
using spirit::_4;
using namespace nihstro;
// Adapt parser data structures for use with boost::spirit
BOOST_FUSION_ADAPT_STRUCT(
IntegerWithSign,
(int, sign)
(unsigned, value)
)
/**
* Implementation of transform_attribute from std::vector<InputSwizzlerMask::Component> to InputSwizzlerMask.
* This eases swizzle mask parsing a lot.
*/
namespace boost { namespace spirit { namespace traits {
template<>
struct transform_attribute<InputSwizzlerMask, std::vector<InputSwizzlerMask::Component>, qi::domain>
{
using Exposed = InputSwizzlerMask;
using type = std::vector<InputSwizzlerMask::Component>;
static void post(Exposed& val, const type& attr) {
val.num_components = attr.size();
for (size_t i = 0; i < attr.size(); ++i)
val.components[i] = attr[i];
}
static type pre(Exposed& val) {
type vec;
for (int i = 0; i < val.num_components; ++i)
vec.push_back(val.components[i]);
return vec;
}
static void fail(Exposed&) { }
};
}}} // namespaces
template<>
CommonRules<ParserIterator>::CommonRules(const ParserContext& context) {
// Setup symbol table
opcodes_trivial.add
( "nop", OpCode::Id::NOP )
( "end", OpCode::Id::END )
( "emit", OpCode::Id::EMIT )
( "else", OpCode::Id::ELSE )
( "endif", OpCode::Id::ENDIF )
( "endloop", OpCode::Id::ENDLOOP );
opcodes_float[0].add
( "mova", OpCode::Id::MOVA );
opcodes_float[1].add
( "exp", OpCode::Id::EX2 )
( "log", OpCode::Id::LG2 )
( "lit", OpCode::Id::LIT )
( "flr", OpCode::Id::FLR )
( "rcp", OpCode::Id::RCP )
( "rsq", OpCode::Id::RSQ )
( "mov", OpCode::Id::MOV );
opcodes_float[2].add
( "add", OpCode::Id::ADD )
( "dp3", OpCode::Id::DP3 )
( "dp4", OpCode::Id::DP4 )
( "dph", OpCode::Id::DPH )
( "dst", OpCode::Id::DST )
( "mul", OpCode::Id::MUL )
( "sge", OpCode::Id::SGE )
( "slt", OpCode::Id::SLT )
( "max", OpCode::Id::MAX )
( "min", OpCode::Id::MIN );
opcodes_float[3].add
( "mad", OpCode::Id::MAD );
opcodes_compare.add
( "cmp", OpCode::Id::CMP );
opcodes_flowcontrol[0].add
( "break", OpCode::Id::BREAK )
( "breakc", OpCode::Id::BREAKC )
( "if", OpCode::Id::GEN_IF )
( "loop", OpCode::Id::LOOP );
opcodes_flowcontrol[1].add
( "jmp", OpCode::Id::GEN_JMP )
( "call", OpCode::Id::GEN_CALL );
opcodes_setemit.add
( "setemitraw", OpCode::Id::SETEMIT );
signs.add( "+", +1)
( "-", -1);
// TODO: Add rgba/stq masks
swizzlers.add
( "x", InputSwizzlerMask::x )
( "y", InputSwizzlerMask::y )
( "z", InputSwizzlerMask::z )
( "w", InputSwizzlerMask::w );
// TODO: Make sure this is followed by a space or *some* separator
// TODO: Use qi::repeat(1,4)(swizzlers) instead of Kleene [failed to work when I tried, so make this work!]
// TODO: Use qi::lexeme[swizzlers] [crashed when I tried, so make this work!]
swizzle_mask = qi::attr_cast<InputSwizzlerMask, std::vector<InputSwizzlerMask::Component>>(*swizzlers);
identifier = qi::lexeme[qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_")];
peek_identifier = &identifier;
uint_after_sign = qi::uint_; // TODO: NOT dot (or alphanum) after this to prevent floats..., TODO: overflows?
sign_with_uint = signs > uint_after_sign;
index_expression_first_term = (qi::attr(+1) >> qi::uint_) | (peek_identifier > identifier);
index_expression_following_terms = (qi::lit('+') >> peek_identifier > identifier) | sign_with_uint;
index_expression = (-index_expression_first_term) // the first element has an optional sign
>> (*index_expression_following_terms); // following elements have a mandatory sign
expression = ((-signs) > peek_identifier > identifier) >> (-(qi::lit('[') > index_expression > qi::lit(']'))) >> *(qi::lit('.') > swizzle_mask);
end_of_statement = qi::omit[qi::eol | qi::eoi];
// Error handling
BOOST_SPIRIT_DEBUG_NODE(identifier);
BOOST_SPIRIT_DEBUG_NODE(uint_after_sign);
BOOST_SPIRIT_DEBUG_NODE(index_expression);
BOOST_SPIRIT_DEBUG_NODE(peek_identifier);
BOOST_SPIRIT_DEBUG_NODE(expression);
BOOST_SPIRIT_DEBUG_NODE(swizzle_mask);
BOOST_SPIRIT_DEBUG_NODE(end_of_statement);
diagnostics.Add(swizzle_mask.name(), "Expected swizzle mask after period");
diagnostics.Add(peek_identifier.name(), "Expected identifier");
diagnostics.Add(uint_after_sign.name(), "Expected integer number after sign");
diagnostics.Add(index_expression.name(), "Expected index expression between '[' and ']'");
diagnostics.Add(expression.name(), "Expected expression of a known identifier");
diagnostics.Add(end_of_statement.name(), "Expected end of statement");
}

View File

@@ -0,0 +1,106 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Enable this for detailed XML overview of parser results
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include "nihstro/parser_assembly.h"
#include "nihstro/parser_assembly_private.h"
#include "nihstro/shader_binary.h"
#include "nihstro/shader_bytecode.h"
namespace spirit = boost::spirit;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::qi::ascii;
namespace phoenix = boost::phoenix;
using spirit::_1;
using spirit::_2;
using spirit::_3;
using spirit::_4;
using namespace nihstro;
// Adapt parser data structures for use with boost::spirit
/*BOOST_FUSION_ADAPT_STRUCT(
IntegerWithSign,
(int, sign)
(unsigned, value)
)
*/
BOOST_FUSION_ADAPT_STRUCT(
CompareInstruction,
(OpCode, opcode)
(std::vector<Expression>, arguments)
(std::vector<Instruction::Common::CompareOpType::Op>, ops)
)
template<>
CompareParser<ParserIterator>::CompareParser(const ParserContext& context)
: CompareParser::base_type(instruction),
common(context),
opcodes_compare(common.opcodes_compare),
expression(common.expression),
end_of_statement(common.end_of_statement),
diagnostics(common.diagnostics) {
// TODO: Will this properly match >= ?
compare_ops.add
( "==", CompareOp::Equal )
( "!=", CompareOp::NotEqual )
( "<", CompareOp::LessThan )
( "<=", CompareOp::LessEqual )
( ">", CompareOp::GreaterThan )
( ">=", CompareOp::GreaterEqual );
// Setup rules
auto comma_rule = qi::lit(',');
opcode = qi::no_case[qi::lexeme[opcodes_compare >> &ascii::space]];
compare_op = qi::lexeme[compare_ops];
// cmp src1, src2, op1, op2
// TODO: Also allow "cmp src1 op1 src2, src1 op2 src2"
two_ops = compare_op > comma_rule > compare_op;
two_expressions = expression > comma_rule > expression;
instr[0] = opcode > two_expressions > comma_rule > two_ops;
instruction = instr[0] > end_of_statement;
// Error handling
BOOST_SPIRIT_DEBUG_NODE(instr[0]);
BOOST_SPIRIT_DEBUG_NODE(instruction);
qi::on_error<qi::fail>(instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
}

View File

@@ -0,0 +1,132 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include "nihstro/parser_assembly.h"
#include "nihstro/parser_assembly_private.h"
#include "nihstro/shader_binary.h"
#include "nihstro/shader_bytecode.h"
using spirit::_1;
using spirit::_2;
using spirit::_3;
using spirit::_4;
using namespace nihstro;
// Adapt parser data structures for use with boost::spirit
BOOST_FUSION_ADAPT_STRUCT(
ConditionInput,
(bool, invert)
(Identifier, identifier)
(boost::optional<InputSwizzlerMask>, swizzler_mask)
)
BOOST_FUSION_ADAPT_STRUCT(
StatementDeclaration::Extra,
(std::vector<float>, constant_value)
(boost::optional<OutputRegisterInfo::Type>, output_semantic)
)
BOOST_FUSION_ADAPT_STRUCT(
StatementDeclaration,
(std::string, alias_name)
(Identifier, identifier_start)
(boost::optional<Identifier>, identifier_end)
(boost::optional<InputSwizzlerMask>, swizzle_mask)
(StatementDeclaration::Extra, extra)
)
// Manually define a swap() overload for qi::hold to work.
/*namespace boost {
namespace spirit {
void swap(nihstro::Condition& a, nihstro::Condition& b) {
boost::fusion::swap(a, b);
}
}
}*/
template<>
DeclarationParser<ParserIterator>::DeclarationParser(const ParserContext& context)
: DeclarationParser::base_type(declaration),
common(context),
identifier(common.identifier), swizzle_mask(common.swizzle_mask),
end_of_statement(common.end_of_statement),
diagnostics(common.diagnostics) {
// Setup symbol table
output_semantics.add("position", OutputRegisterInfo::POSITION);
output_semantics.add("quaternion", OutputRegisterInfo::QUATERNION);
output_semantics.add("color", OutputRegisterInfo::COLOR);
output_semantics.add("texcoord0", OutputRegisterInfo::TEXCOORD0);
output_semantics.add("texcoord1", OutputRegisterInfo::TEXCOORD1);
output_semantics.add("texcoord2", OutputRegisterInfo::TEXCOORD2);
output_semantics.add("view", OutputRegisterInfo::VIEW);
output_semantics_rule = qi::lexeme[output_semantics];
// Setup rules
alias_identifier = qi::omit[qi::lexeme["alias" >> ascii::blank]] > identifier;
// e.g. 5.4 or (1.1, 2, 3)
constant = (qi::repeat(1)[qi::float_]
| (qi::lit('(') > (qi::float_ % qi::lit(',')) > qi::lit(')')));
dummy_const = qi::attr(std::vector<float>());
dummy_semantic = qi::attr(boost::optional<OutputRegisterInfo::Type>());
// match a constant or a semantic, and fill the respective other one with a dummy
const_or_semantic = (dummy_const >> output_semantics_rule) | (constant >> dummy_semantic);
// TODO: Would like to use +ascii::blank instead, but somehow that fails to parse lines like ".alias name o2.xy texcoord0" correctly
string_as = qi::omit[qi::no_skip[*/*+*/ascii::blank >> qi::lit("as") >> +ascii::blank]];
declaration = ((qi::lit('.') > alias_identifier) >> identifier >> -(qi::lit('-') > identifier) >> -(qi::lit('.') > swizzle_mask))
>> (
(string_as > const_or_semantic)
| (dummy_const >> dummy_semantic)
)
> end_of_statement;
// Error handling
output_semantics_rule.name("output semantic after \"as\"");
alias_identifier.name("known preprocessor directive (i.e. alias).");
const_or_semantic.name("constant or semantic after \"as\"");
BOOST_SPIRIT_DEBUG_NODE(output_semantics_rule);
BOOST_SPIRIT_DEBUG_NODE(constant);
BOOST_SPIRIT_DEBUG_NODE(alias_identifier);
BOOST_SPIRIT_DEBUG_NODE(const_or_semantic);
BOOST_SPIRIT_DEBUG_NODE(declaration);
qi::on_error<qi::fail>(declaration, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
}

View File

@@ -0,0 +1,116 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Enable this for detailed XML overview of parser results
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include "nihstro/parser_assembly.h"
#include "nihstro/parser_assembly_private.h"
#include "nihstro/shader_binary.h"
#include "nihstro/shader_bytecode.h"
namespace spirit = boost::spirit;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::qi::ascii;
namespace phoenix = boost::phoenix;
using spirit::_1;
using spirit::_2;
using spirit::_3;
using spirit::_4;
using namespace nihstro;
// Adapt parser data structures for use with boost::spirit
BOOST_FUSION_ADAPT_STRUCT(
StatementInstruction,
(OpCode, opcode)
(std::vector<Expression>, expressions)
)
template<>
FloatOpParser<ParserIterator>::FloatOpParser(const ParserContext& context)
: FloatOpParser::base_type(float_instruction),
common(context),
opcodes_float(common.opcodes_float),
expression(common.expression),
end_of_statement(common.end_of_statement),
diagnostics(common.diagnostics) {
// Setup rules
auto comma_rule = qi::lit(',');
for (int i = 0; i < 4; ++i) {
// Make sure that a mnemonic is always followed by a space (such that e.g. "addbla" fails to match)
opcode[i] = qi::no_case[qi::lexeme[opcodes_float[i] >> &ascii::space]];
}
// chain of arguments for each group of opcodes
expression_chain[0] = expression;
for (int i = 1; i < 4; ++i) {
expression_chain[i] = expression_chain[i - 1] >> comma_rule > expression;
}
// e.g. "add o1, t2, t5"
float_instr[0] = opcode[0] > expression_chain[0];
float_instr[1] = opcode[1] > expression_chain[1];
float_instr[2] = opcode[2] > expression_chain[2];
float_instr[3] = opcode[3] > expression_chain[3];
float_instruction %= (float_instr[0] | float_instr[1] | float_instr[2] | float_instr[3]) > end_of_statement;
// Error handling
BOOST_SPIRIT_DEBUG_NODE(opcode[0]);
BOOST_SPIRIT_DEBUG_NODE(opcode[1]);
BOOST_SPIRIT_DEBUG_NODE(opcode[2]);
BOOST_SPIRIT_DEBUG_NODE(opcode[3]);
BOOST_SPIRIT_DEBUG_NODE(expression_chain[0]);
BOOST_SPIRIT_DEBUG_NODE(expression_chain[1]);
BOOST_SPIRIT_DEBUG_NODE(expression_chain[2]);
BOOST_SPIRIT_DEBUG_NODE(expression_chain[3]);
BOOST_SPIRIT_DEBUG_NODE(float_instr[0]);
BOOST_SPIRIT_DEBUG_NODE(float_instr[1]);
BOOST_SPIRIT_DEBUG_NODE(float_instr[2]);
BOOST_SPIRIT_DEBUG_NODE(float_instr[3]);
BOOST_SPIRIT_DEBUG_NODE(float_instruction);
diagnostics.Add(expression_chain[0].name(), "one argument");
diagnostics.Add(expression_chain[1].name(), "two arguments");
diagnostics.Add(expression_chain[2].name(), "three arguments");
diagnostics.Add(expression_chain[3].name(), "four arguments");
qi::on_error<qi::fail>(float_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
}

View File

@@ -0,0 +1,148 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Enable this for detailed XML overview of parser results
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/swap.hpp>
#include <boost/spirit/include/qi.hpp>
#include "nihstro/parser_assembly.h"
#include "nihstro/parser_assembly_private.h"
#include "nihstro/shader_binary.h"
#include "nihstro/shader_bytecode.h"
namespace spirit = boost::spirit;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::qi::ascii;
namespace phoenix = boost::phoenix;
using spirit::_1;
using spirit::_2;
using spirit::_3;
using spirit::_4;
using namespace nihstro;
// Adapt parser data structures for use with boost::spirit
BOOST_FUSION_ADAPT_STRUCT(
ConditionInput,
(bool, invert)
(Identifier, identifier)
(boost::optional<InputSwizzlerMask>, swizzler_mask)
)
BOOST_FUSION_ADAPT_STRUCT(
Condition,
(ConditionInput, input1)
(Instruction::FlowControlType::Op, op)
(ConditionInput, input2)
)
BOOST_FUSION_ADAPT_STRUCT(
FlowControlInstruction,
(OpCode, opcode)
(std::string, target_label)
(boost::optional<std::string>, return_label)
(boost::optional<Condition>, condition)
)
// Manually define a swap() overload for qi::hold to work.
namespace boost {
namespace spirit {
void swap(nihstro::Condition& a, nihstro::Condition& b) {
boost::fusion::swap(a, b);
}
}
}
template<>
FlowControlParser<ParserIterator>::FlowControlParser(const ParserContext& context)
: FlowControlParser::base_type(flow_control_instruction),
common(context),
opcodes_flowcontrol(common.opcodes_flowcontrol),
expression(common.expression),
identifier(common.identifier),
swizzle_mask(common.swizzle_mask),
end_of_statement(common.end_of_statement),
diagnostics(common.diagnostics) {
condition_ops.add
( "&&", ConditionOp::And )
( "||", ConditionOp::Or );
// Setup rules
auto blank_rule = qi::omit[ascii::blank];
auto label_rule = identifier.alias();
opcode[0] = qi::lexeme[qi::no_case[opcodes_flowcontrol[0]] >> &ascii::space];
opcode[1] = qi::lexeme[qi::no_case[opcodes_flowcontrol[1]] >> &ascii::space];
condition_op = qi::lexeme[condition_ops];
negation = qi::matches[qi::lit("!")];
condition_input = negation >> identifier >> -(qi::lit('.') > swizzle_mask);
// May be a condition involving the conditional codes, or a reference to a uniform
// TODO: Make sure we use qi::hold wherever necessary
condition = qi::hold[condition_input >> condition_op >> condition_input]
| (condition_input >> qi::attr(ConditionOp::JustX) >> qi::attr(ConditionInput{}));
// if condition
instr[0] = opcode[0]
>> qi::attr("__dummy") // Dummy label (set indirectly using else,endif, or endloop pseudo-instructions)
>> qi::attr(boost::optional<std::string>()) // Dummy return label
>> condition;
// call target_label until return_label if condition
instr[1] = opcode[1]
>> label_rule
>> -(qi::no_skip[(blank_rule >> qi::lit("until")) > blank_rule] >> label_rule)
>> -(qi::no_skip[(blank_rule >> qi::lit("if")) > blank_rule] >> condition);
flow_control_instruction %= (instr[0] | instr[1]) > end_of_statement;
// Error handling
BOOST_SPIRIT_DEBUG_NODE(opcode[0]);
BOOST_SPIRIT_DEBUG_NODE(opcode[1]);
BOOST_SPIRIT_DEBUG_NODE(negation);
BOOST_SPIRIT_DEBUG_NODE(condition_op);
BOOST_SPIRIT_DEBUG_NODE(condition_input);
BOOST_SPIRIT_DEBUG_NODE(condition);
BOOST_SPIRIT_DEBUG_NODE(instr[0]);
BOOST_SPIRIT_DEBUG_NODE(instr[1]);
BOOST_SPIRIT_DEBUG_NODE(flow_control_instruction);
qi::on_error<qi::fail>(flow_control_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
}

140
externals/nihstro/src/parser_shbin.cpp vendored Normal file
View File

@@ -0,0 +1,140 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "nihstro/parser_shbin.h"
using namespace nihstro;
void ShbinParser::ReadHeaders(const std::string& filename) {
file.exceptions(std::fstream::badbit | std::fstream::failbit | std::fstream::eofbit);
file.open(filename, std::fstream::in | std::fstream::binary);
file.seekg(0);
file.read((char*)&dvlb_header, sizeof(dvlb_header));
if (dvlb_header.magic_word != DVLBHeader::MAGIC_WORD) {
std::stringstream stream;
stream << "Wrong DVLB magic word: Got 0x" << std::hex << dvlb_header.magic_word;
throw stream.str();
}
dvle_offsets.resize(dvlb_header.num_programs);
dvle_headers.resize(dvlb_header.num_programs);
for (auto& offset : dvle_offsets) {
file.read((char*)&offset, sizeof(offset));
}
// DVLP comes directly after the DVLE offset table
dvlp_offset = file.tellg();
file.seekg(dvlp_offset);
file.read((char*)&dvlp_header, sizeof(dvlp_header));
if (dvlp_header.magic_word != DVLPHeader::MAGIC_WORD) {
std::stringstream stream;
stream << "Wrong DVLP magic word at offset " << std::hex << dvlp_offset << ": Got " << std::hex << dvlp_header.magic_word;
throw stream.str();
}
for (int i = 0; i < dvlb_header.num_programs; ++i) {
auto& dvle_header = dvle_headers[i];
file.seekg(dvle_offsets[i]);
file.read((char*)&dvle_header, sizeof(dvle_header));
if (dvle_header.magic_word != DVLEHeader::MAGIC_WORD) {
std::stringstream stream;
stream << "Wrong DVLE header in DVLE #" << i << ": " << std::hex << dvle_header.magic_word;
throw stream.str();
}
}
// TODO: Is there indeed exactly one filename per DVLE?
dvle_filenames.resize(dvlb_header.num_programs);
uint32_t offset = dvlp_offset + dvlp_header.filename_symbol_offset;
for (int i = 0; i < dvlb_header.num_programs; ++i) {
auto& filename = dvle_filenames[i];
filename = ReadSymbol(offset);
offset += filename.length() + 1;
}
// Read shader binary code
shader_info.code.resize(dvlp_header.binary_size_words);
file.seekg(dvlp_offset + dvlp_header.binary_offset);
file.read((char*)shader_info.code.data(), dvlp_header.binary_size_words * sizeof(Instruction));
// Read operand descriptor table
shader_info.swizzle_info.resize(dvlp_header.swizzle_info_num_entries);
file.seekg(dvlp_offset + dvlp_header.swizzle_info_offset);
file.read((char*)shader_info.swizzle_info.data(), dvlp_header.swizzle_info_num_entries * sizeof(SwizzleInfo));
}
void ShbinParser::ReadDVLE(int dvle_index) {
// TODO: Check if we have called ReadHeaders() before!
if (dvle_index >= dvlb_header.num_programs) {
std::stringstream stream;
stream << "Invalid DVLE index " << dvle_index << "given";
throw stream.str();
}
auto& dvle_header = dvle_headers[dvle_index];
auto& dvle_offset = dvle_offsets[dvle_index];
uint32_t symbol_table_offset = dvle_offset + dvle_header.symbol_table_offset;
shader_info.constant_table.resize(dvle_header.constant_table_size);
uint32_t constant_table_offset = dvle_offset + dvle_header.constant_table_offset;
file.seekg(constant_table_offset);
for (int i = 0; i < dvle_header.constant_table_size; ++i)
file.read((char*)&shader_info.constant_table[i], sizeof(ConstantInfo));
shader_info.label_table.resize(dvle_header.label_table_size);
uint32_t label_table_offset = dvle_offset + dvle_header.label_table_offset;
file.seekg(label_table_offset);
for (int i = 0; i < dvle_header.label_table_size; ++i)
file.read((char*)&shader_info.label_table[i], sizeof(LabelInfo));
for (const auto& label_info : shader_info.label_table)
shader_info.labels.insert({label_info.program_offset, ReadSymbol(symbol_table_offset + label_info.name_offset)});
shader_info.output_register_info.resize(dvle_header.output_register_table_size);
file.seekg(dvle_offset + dvle_header.output_register_table_offset);
for (auto& info : shader_info.output_register_info)
file.read((char*)&info, sizeof(OutputRegisterInfo));
shader_info.uniform_table.resize(dvle_header.uniform_table_size);
uint32_t uniform_table_offset = dvle_offset + dvle_header.uniform_table_offset;
file.seekg(uniform_table_offset);
for (int i = 0; i < dvle_header.uniform_table_size; ++i)
file.read((char*)&shader_info.uniform_table[i].basic, sizeof(shader_info.uniform_table[i].basic));
for (auto& uniform_info : shader_info.uniform_table)
uniform_info.name = ReadSymbol(symbol_table_offset + uniform_info.basic.symbol_offset);
main_offset = dvlp_offset + dvlp_header.binary_offset;
}
std::string ShbinParser::ReadSymbol(uint32_t offset) {
std::string name;
file.seekg(offset);
std::getline(file, name, '\0');
return name;
};

100
externals/nihstro/src/preprocessor.cpp vendored Normal file
View File

@@ -0,0 +1,100 @@
// Copyright 2015 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <nihstro/parser_assembly_private.h>
#include <nihstro/preprocessor.h>
#include <nihstro/source_tree.h>
#include <boost/spirit/include/qi.hpp>
#include <fstream>
namespace nihstro {
template<typename Iterator>
struct IncludeParser : qi::grammar<Iterator, std::string(), AssemblySkipper<Iterator>> {
using Skipper = AssemblySkipper<Iterator>;
IncludeParser() : IncludeParser::base_type(include) {
include = qi::lexeme[qi::lit(".include") >> &qi::ascii::space]
> qi::lexeme[qi::lit("\"") > +qi::char_("a-zA-Z0-9./_\\-") > qi::lit("\"")]
> qi::omit[qi::eol | qi::eoi];
}
qi::rule<Iterator, std::string(), Skipper> include;
};
SourceTree PreprocessAssemblyFile(const std::string& filename) {
SourceTree tree;
tree.file_info.filename = filename;
std::ifstream input_file(filename);
if (!input_file) {
throw std::runtime_error("Could not open input file " + filename);
}
std::string prefix;
{
auto last_slash = filename.find_last_of("/");
if (last_slash != std::string::npos)
prefix = filename.substr(0, last_slash + 1);
}
input_file.seekg(0, std::ios::end);
tree.code.resize(input_file.tellg());
input_file.seekg(0, std::ios::beg);
input_file.read(&tree.code[0], tree.code.size());
input_file.close();
auto cursor = tree.code.begin();
IncludeParser<decltype(cursor)> include_parser;
AssemblySkipper<decltype(cursor)> skipper;
while (cursor != tree.code.end()) {
std::string parsed_filename;
auto cursor_prev = cursor;
if (qi::phrase_parse(cursor, tree.code.end(), include_parser, skipper, parsed_filename)) {
if (parsed_filename[0] == '/')
throw std::runtime_error("Given filename must be relative to the path of the including file");
// TODO: Protect against circular inclusions
auto newtree = PreprocessAssemblyFile(prefix + parsed_filename);
tree.Attach(newtree, cursor_prev - tree.code.begin());
cursor = tree.code.erase(cursor_prev, cursor);
cursor = tree.code.insert(cursor, '\n');
} else {
// Skip this line
qi::parse(cursor, tree.code.end(), *(qi::char_ - (qi::eol | qi::eoi)) >> (qi::eol | qi::eoi));
}
}
return tree;
}
} // namespace

393
externals/nihstro/src/tests/parser.cpp vendored Normal file
View File

@@ -0,0 +1,393 @@
// Copyright 2014 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "nihstro/parser_assembly.h"
#include "nihstro/parser_assembly_private.h"
#include <boost/optional/optional_io.hpp>
#include <boost/spirit/include/qi.hpp>
#define BOOST_TEST_MODULE Parser
#include <boost/test/unit_test.hpp>
// Implement some ostream<< operators for BOOST_CHECK*
namespace std {
template<typename T>
std::ostream& operator << (std::ostream& os, const std::vector<T>& vec) {
auto it = vec.begin();
os << "{";
if (!vec.empty())
os << " " << *it;
while (it != vec.end() && ++it != vec.end()) {
os << ", " << *it;
}
os << " }";
return os;
}
std::ostream& operator << (std::ostream& os, const nihstro::Expression& expr) {
if (expr.signed_identifier.sign)
os << *expr.signed_identifier.sign;
os << expr.signed_identifier.identifier;
if (expr.index)
os << "[" << *expr.index << "]";
for (auto& mask : expr.swizzle_masks)
os << "." << mask;
return os;
}
std::ostream& operator << (std::ostream& os, const nihstro::IndexExpression& expr) {
for (size_t i = 0; i < expr.GetCount(); ++i) {
if (i != 0)
os << ", ";
if (expr.IsRawIndex(i)) {
os << expr.GetRawIndex(i);
} else if (expr.IsAddressRegisterIdentifier(i)) {
os << expr.GetAddressRegisterIdentifier(i);
} else {
os << "?";
}
}
return os;
}
std::ostream& operator << (std::ostream& os, const nihstro::IntegerWithSign& num) {
os << num.GetValue();
return os;
}
std::ostream& operator << (std::ostream& os, const nihstro::Instruction::FlowControlType::Op& op) {
if (op == Instruction::FlowControlType::And)
os << "&&";
else if (op == Instruction::FlowControlType::Or)
os << "||";
else
os << "??";
return os;
}
std::ostream& operator << (std::ostream& os, const nihstro::ConditionInput& inp) {
if (inp.invert)
os << "!";
os << inp.identifier;
if (inp.swizzler_mask)
os << "." << *inp.swizzler_mask;
return os;
}
std::ostream& operator << (std::ostream& os, const nihstro::Condition& cond) {
os << "{ " << cond.input1;
if (cond.op != Instruction::FlowControlType::JustX)
os << " " << cond.op << " " << cond.input2;
os << " }";
return os;
}
} // namespace std
// Utility comparison operators
namespace nihstro {
bool operator == (const IntegerWithSign& a, const IntegerWithSign& b) {
return a.sign == b.sign && a.value == b.value;
}
bool operator == (const Expression& a, const Expression& b) {
bool ret = true;
ret &= a.signed_identifier.sign == b.signed_identifier.sign;
ret &= a.signed_identifier.identifier == b.signed_identifier.identifier;
ret &= a.index == b.index;
ret &= a.swizzle_masks == b.swizzle_masks;
return ret;
}
bool operator == (const ConditionInput& a, const ConditionInput& b) {
return a.invert == b.invert && a.identifier == b.identifier && a.swizzler_mask == b.swizzler_mask;
}
bool operator == (const Condition& a, const Condition& b) {
return a.input1 == b.input1 && a.input2 == b.input2 && a.op == b.op;
}
} // namespace nihstro
// Utility function to parse the given input. Upon match, returns the parsed contents, and returns no value upon failure.
template<typename Attr, typename Parser, typename Skipper>
static boost::optional<Attr> parse(const std::string& input, const Parser& parser, const Skipper& skipper) {
BOOST_TEST_MESSAGE(("Parsing \"" + input + "\"").c_str());
Attr attr;
SourceTree inp;
inp.code = input;
if (boost::spirit::qi::phrase_parse(inp.begin(), inp.end(), parser, skipper, attr))
return attr;
else
return {};
}
// Utility function to check that the given vectors are equal.
template<typename T>
static void CheckVector(const std::vector<T>& vec, const std::vector<T>& exp) {
// TODO: Could just check directly for equality, but need an ostream<< operator for that.
BOOST_CHECK_EQUAL(vec, exp);
}
// Utility function to convert a compile-time character to the corresponding swizzle mask component
template<char a>
static InputSwizzlerMask::Component MakeSwizzlerMaskComponent() {
static_assert(a == 'x' || a == 'y' || a == 'z' || a == 'w', "Invalid component");
if (a == 'x') return InputSwizzlerMask::x;
else if (a == 'y') return InputSwizzlerMask::y;
else if (a == 'z') return InputSwizzlerMask::z;
else return InputSwizzlerMask::w;
}
// Utility function to convert a series of up to four characters to the corresponding swizzle mask
template<char a>
static InputSwizzlerMask MakeInputSwizzlerMask() {
return { 1, { MakeSwizzlerMaskComponent<a>() } };
}
template<char a, char b>
static InputSwizzlerMask MakeInputSwizzlerMask() {
return { 2, { MakeSwizzlerMaskComponent<a>(), MakeSwizzlerMaskComponent<b>() } };
}
template<char a, char b, char c>
static InputSwizzlerMask MakeInputSwizzlerMask() {
return { 3, { MakeSwizzlerMaskComponent<a>(), MakeSwizzlerMaskComponent<b>(), MakeSwizzlerMaskComponent<c>() } };
}
template<char a, char b, char c, char d>
static InputSwizzlerMask MakeInputSwizzlerMask() {
return { 4, { MakeSwizzlerMaskComponent<a>(), MakeSwizzlerMaskComponent<b>(), MakeSwizzlerMaskComponent<c>(), MakeSwizzlerMaskComponent<d>() } };
}
// Utility function to check that parsing of the first declaration statement was successfull and that the result matches the second declaration statements.
static void CheckDeclaration(const boost::optional<StatementDeclaration>& declaration, const StatementDeclaration& exp) {
if (!declaration) {
BOOST_CHECK(false);
} else {
BOOST_CHECK_EQUAL(declaration->alias_name, exp.alias_name);
BOOST_CHECK_EQUAL(declaration->identifier_start, exp.identifier_start);
BOOST_CHECK_EQUAL(declaration->identifier_end, exp.identifier_end);
BOOST_CHECK_EQUAL(declaration->swizzle_mask, exp.swizzle_mask);
CheckVector(declaration->extra.constant_value, exp.extra.constant_value);
BOOST_CHECK_EQUAL(declaration->extra.output_semantic, exp.extra.output_semantic);
}
}
BOOST_AUTO_TEST_CASE(declaration) {
ParserContext context;
DeclarationParser<ParserIterator> declaration_parser(context);
AssemblySkipper<ParserIterator> skipper;
{
// Plain alias
auto declaration = parse<StatementDeclaration>(".alias my_alias r5.xy", declaration_parser, skipper);
CheckDeclaration(declaration, { "my_alias", "r5", {}, MakeInputSwizzlerMask<'x', 'y'>() });
}
{
// Array alias
auto declaration = parse<StatementDeclaration>(".alias my_alias r5-r10", declaration_parser, skipper);
CheckDeclaration(declaration, { "my_alias", "r5", std::string("r10") });
}
{
// Output alias
auto declaration = parse<StatementDeclaration>(".alias my_alias o5.xyz as texcoord0", declaration_parser, skipper);
CheckDeclaration(declaration, { "my_alias", "o5", {}, MakeInputSwizzlerMask<'x', 'y', 'z'>(), { {}, OutputRegisterInfo::TEXCOORD0 } });
}
{
// Output alias without output semantic
BOOST_CHECK_THROW(parse<StatementDeclaration>(".alias my_alias o5.xyz", declaration_parser, skipper),
std::runtime_error);
}
{
// Constant alias
auto declaration = parse<StatementDeclaration>(".alias my_alias c5.xy as (1.0, -2.4)", declaration_parser, skipper);
CheckDeclaration(declaration, { "my_alias", "c5", {}, MakeInputSwizzlerMask<'x', 'y'>(), { { 1.0, -2.4 } } });
}
}
BOOST_AUTO_TEST_CASE(label) {
ParserContext context;
LabelParser<ParserIterator> label_parser(context);
AssemblySkipper<ParserIterator> skipper;
{
auto label = parse<StatementLabel>("my_label:", label_parser, skipper);
BOOST_CHECK(label);
BOOST_CHECK_EQUAL(*label, "my_label");
}
}
static void CheckParsedArithmeticInstruction(const boost::optional<FloatOpInstruction>& result, const FloatOpInstruction& exp) {
if (!result) {
BOOST_CHECK(false);
} else {
BOOST_CHECK_EQUAL(result->opcode, exp.opcode);
CheckVector(result->expressions, exp.expressions);
}
}
BOOST_AUTO_TEST_CASE(arithmetic) {
ParserContext context;
FloatOpParser<ParserIterator> arithmetic_parser(context);
AssemblySkipper<ParserIterator> skipper;
{
// one-argument instruction
auto result = parse<FloatOpInstruction>("mova r2.xy", arithmetic_parser, skipper);
Expression r2_xy = { { {}, "r2" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
CheckParsedArithmeticInstruction(result, { nihstro::OpCode::Id::MOVA, { r2_xy } });
}
{
// two-argument instruction
auto result = parse<FloatOpInstruction>("mov o0.wz, r2.xy", arithmetic_parser, skipper);
Expression o0_wz = { { {}, "o0" }, {}, { MakeInputSwizzlerMask<'w', 'z'>() } };
Expression r2_xy = { { {}, "r2" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
CheckParsedArithmeticInstruction(result, { nihstro::OpCode::Id::MOV, { o0_wz, r2_xy } });
}
{
// two-argument instruction with trivial register index
auto result = parse<FloatOpInstruction>("mov o0.wz, r2[5].xy", arithmetic_parser, skipper);
IndexExpression index_expr;
index_expr.emplace_back(IntegerWithSign{ +1, 5 });
Expression o0_wz = { { {}, "o0" }, {}, { MakeInputSwizzlerMask<'w', 'z'>() } };
Expression r2_xy = { { {}, "r2" }, index_expr, { MakeInputSwizzlerMask<'x', 'y'>() } };
CheckParsedArithmeticInstruction(result, { nihstro::OpCode::Id::MOV, { o0_wz, r2_xy } });
}
{
// two-argument instruction with nontrivial register index
auto result = parse<FloatOpInstruction>("mov o0.wz, r2[5+a1-4].xy", arithmetic_parser, skipper);
IndexExpression index_expr;
index_expr.emplace_back(IntegerWithSign{ +1, 5 });
index_expr.emplace_back("a1");
index_expr.emplace_back(IntegerWithSign{ -1, 4 });
Expression o0_wz = { { {}, "o0" }, {}, { MakeInputSwizzlerMask<'w', 'z'>() } };
Expression r2_xy = { { {}, "r2" }, index_expr, { MakeInputSwizzlerMask<'x', 'y'>() } };
CheckParsedArithmeticInstruction(result, { nihstro::OpCode::Id::MOV, { o0_wz, r2_xy } });
}
{
// three-argument instruction
auto result = parse<FloatOpInstruction>("add o0.xy, r2.xy, r3.xy", arithmetic_parser, skipper);
Expression o0_xy = { { {}, "o0" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
Expression r2_xy = { { {}, "r2" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
Expression r3_xy = { { {}, "r3" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
CheckParsedArithmeticInstruction(result, { nihstro::OpCode::Id::ADD, { o0_xy, r2_xy, r3_xy } });
}
{
// four-argument instruction
auto result = parse<FloatOpInstruction>("mad o0.xy, r2.xy, r3.xy, v4.xy", arithmetic_parser, skipper);
Expression o0_xy = { { {}, "o0" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
Expression r2_xy = { { {}, "r2" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
Expression r3_xy = { { {}, "r3" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
Expression v4_xy = { { {}, "v4" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
CheckParsedArithmeticInstruction(result, { nihstro::OpCode::Id::MAD, { o0_xy, r2_xy, r3_xy, v4_xy } });
}
}
static void CheckParsedFlowControlInstruction(const boost::optional<FlowControlInstruction>& result, const FlowControlInstruction& exp) {
if (!result) {
BOOST_CHECK(false);
} else {
BOOST_CHECK_EQUAL(result->opcode, exp.opcode);
BOOST_CHECK_EQUAL(result->target_label, exp.target_label);
BOOST_CHECK_EQUAL(result->return_label, exp.return_label);
BOOST_CHECK_EQUAL(result->condition, exp.condition);
}
}
BOOST_AUTO_TEST_CASE(flowcontrol) {
ParserContext context;
FlowControlParser<ParserIterator> parser(context);
AssemblySkipper<ParserIterator> skipper;
{
// If-conditionals with two arguments
auto result = parse<FlowControlInstruction>("if cc.x && !cc.y", parser, skipper);
Condition cond = { { false, "cc", MakeInputSwizzlerMask<'x'>() }, Instruction::FlowControlType::And, { true, "cc", MakeInputSwizzlerMask<'y'>() } };
CheckParsedFlowControlInstruction(result, { nihstro::OpCode::Id::GEN_IF, "__dummy", {}, cond } );
}
{
// If-conditionals with one argument with one component
auto result = parse<FlowControlInstruction>("if cc.x", parser, skipper);
Condition cond = { { false, "cc", MakeInputSwizzlerMask<'x'>() }, Instruction::FlowControlType::JustX };
CheckParsedFlowControlInstruction(result, { nihstro::OpCode::Id::GEN_IF, "__dummy", {}, cond } );
}
{
// If-conditionals with one argument with two components
auto result = parse<FlowControlInstruction>("if !cc.xy", parser, skipper);
Condition cond = { { true, "cc", MakeInputSwizzlerMask<'x', 'y'>() }, Instruction::FlowControlType::JustX };
CheckParsedFlowControlInstruction(result, { nihstro::OpCode::Id::GEN_IF, "__dummy", {}, cond });
}
{
// Loop instruction
auto result = parse<FlowControlInstruction>("loop i3.y", parser, skipper);
Condition cond = { { false, "i3", MakeInputSwizzlerMask<'y'>() }, Instruction::FlowControlType::JustX };
CheckParsedFlowControlInstruction(result, { OpCode::Id::LOOP, "__dummy", {}, cond });
}
}
static void CheckParsedCompareInstruction(const boost::optional<CompareInstruction>& result, const CompareInstruction& exp) {
if (!result) {
BOOST_CHECK(false);
} else {
BOOST_CHECK_EQUAL(result->opcode, exp.opcode);
CheckVector(result->arguments, exp.arguments);
CheckVector(result->ops, exp.ops);
}
}
BOOST_AUTO_TEST_CASE(compare) {
ParserContext context;
CompareParser<ParserIterator> parser(context);
AssemblySkipper<ParserIterator> skipper;
{
// Two separate comparisons
auto result = parse<CompareInstruction>("cmp r5.xy, r2.zx, ==, <=", parser, skipper);
Expression r5_xy = { { {}, "r5" }, {}, { MakeInputSwizzlerMask<'x', 'y'>() } };
Expression r2_zx = { { {}, "r2" }, {}, { MakeInputSwizzlerMask<'z', 'x'>() } };
CheckParsedCompareInstruction(result, { OpCode::Id::CMP, { r5_xy, r2_zx }, { Instruction::Common::CompareOpType::Equal, Instruction::Common::CompareOpType::LessEqual } });
}
}

View File

@@ -0,0 +1,169 @@
// Copyright 2015 Tony Wasserka
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of the owner nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <iostream>
#include "nihstro/source_tree.h"
#define BOOST_TEST_MODULE SourceTreeIterator
#include <boost/test/unit_test.hpp>
#include <iterator>
namespace std {
std::ostream& operator << (std::ostream& os, const nihstro::SourceTree& tree) {
std::string::const_iterator it = tree.code.cbegin();
for (auto& child : tree.children) {
os << "\"";
os << std::string(it, tree.code.cbegin() + child.offset_within_parent);
os << "\"";
os << " { ";
os << child.tree;
os << " } ";
it = tree.code.cbegin() + child.offset_within_parent;
}
os << "\"" << std::string(it, tree.code.end()) << "\"";
return os;
}
}
// Utility function to manually flatten the given tree into a string
static std::string FlattenTree(const nihstro::SourceTree& tree) {
std::string ret;
std::string::const_iterator it = tree.code.cbegin();
for (auto& child : tree.children) {
ret += std::string(it, tree.code.cbegin() + child.offset_within_parent);
ret += FlattenTree(child.tree);
it = tree.code.cbegin() + child.offset_within_parent;
}
ret += std::string(it, tree.code.end());
return ret;
}
// Utility function to manually determine the size of the given tree
static std::string::size_type TreeSize(const nihstro::SourceTree& tree) {
std::string::size_type ret = 0;
for (auto& child : tree.children) {
ret += TreeSize(child.tree);
}
ret += tree.code.length();
return ret;
}
#define CHECK_TREE(tree) do { \
/* Check length */ \
BOOST_CHECK_EQUAL(tree.end() - tree.begin(), TreeSize(tree)); \
BOOST_CHECK_EQUAL(std::distance(tree.begin(), tree.end()), TreeSize(tree)); \
/* Check forward iteration */ \
std::string flattened_tree; \
for (auto& val : tree) \
flattened_tree += val; \
auto reference_flattened_tree = FlattenTree(tree); \
BOOST_CHECK_EQUAL(flattened_tree, reference_flattened_tree); \
BOOST_CHECK_EQUAL_COLLECTIONS(flattened_tree.begin(), flattened_tree.end(), \
reference_flattened_tree.begin(), reference_flattened_tree.end()); \
\
/* Check reverse iteration */ \
flattened_tree.clear(); \
for (auto it = tree.end() - 1;; it -= 1) { \
flattened_tree += *it; \
if (it == tree.begin()) \
break; \
} \
std::reverse(reference_flattened_tree.begin(), reference_flattened_tree.end()); \
BOOST_CHECK_EQUAL(flattened_tree, reference_flattened_tree); \
BOOST_CHECK_EQUAL_COLLECTIONS(flattened_tree.begin(), flattened_tree.end(), \
reference_flattened_tree.begin(), reference_flattened_tree.end()); \
\
} while (false)
BOOST_AUTO_TEST_CASE(simple_tree) {
nihstro::SourceTree tree;
tree.code = "a b c";
CHECK_TREE(tree);
}
BOOST_AUTO_TEST_CASE(nested_tree) {
nihstro::SourceTree tree;
nihstro::SourceTree child1;
nihstro::SourceTree child2;
tree.code = "aXbXc";
child1.code = "child1";
child2.code = "child2";
tree.Attach(child1, 1).Attach(child2, 3);
CHECK_TREE(tree);
}
BOOST_AUTO_TEST_CASE(deep_tree) {
nihstro::SourceTree tree;
nihstro::SourceTree child1;
nihstro::SourceTree child1_child1;
nihstro::SourceTree child1_child2;
nihstro::SourceTree child1_child2_child1;
nihstro::SourceTree child1_child3;
nihstro::SourceTree child2;
nihstro::SourceTree child3;
nihstro::SourceTree child3_child1;
nihstro::SourceTree child4;
tree.code = "aaaXaaaXaaaXaaaXaaa";
child1.code = "FirstChild:bbbXbbbXbbbXbbb\n";
child1_child1.code = "FirstSubchildOfChild1:ccc";
child1_child2.code = "SecondSubchildOfChild1:dddXddd";
child1_child2_child1.code = "FirstSubsubchildOfSubchild2OfChild1:eee";
child1_child3.code = "ThirdSubchildOfChild1:fff";
child2.code = "SecondChild:ggg\n";
child3.code = "ThirdChild:hhhXhhh\n";
child3_child1.code = "FirstSubchildOfChild3:iii";
child4.code = "FourthChild:jjj\n";
child1_child2.Attach(child1_child2_child1, 26);
child1.Attach(child1_child1, 14).Attach(child1_child2, 18).Attach(child1_child3, 22);
child3.Attach(child3_child1, 14);
tree.Attach(child1, 3).Attach(child2, 7).Attach(child3, 11).Attach(child4, 15);
CHECK_TREE(tree);
}
BOOST_AUTO_TEST_CASE(subtree_at_begin_and_end) {
nihstro::SourceTree tree;
nihstro::SourceTree child1;
tree.code = "aaa";
child1.code = "bbb";
tree.Attach(child1, 0);
CHECK_TREE(tree);
tree.children.clear();
tree.Attach(child1, tree.code.length());
CHECK_TREE(tree);
}