#include <wasm_simd128.h>
typedef float vec4[4];
typedef float quat[4];
// angles: yaw, pitch, roll, 0, using vec4 to force align
inline void RotationYawPitchRollToRef(vec4 angles, quat dest) {
/* // The original code
const halfRoll = roll * 0.5;
const halfPitch = pitch * 0.5;
const halfYaw = yaw * 0.5;
const sinRoll = Math.sin(halfRoll);
const cosRoll = Math.cos(halfRoll);
const sinPitch = Math.sin(halfPitch);
const cosPitch = Math.cos(halfPitch);
const sinYaw = Math.sin(halfYaw);
const cosYaw = Math.cos(halfYaw);
result._x = cosYaw * sinPitch * cosRoll + sinYaw * cosPitch * sinRoll;
result._y = sinYaw * cosPitch * cosRoll - cosYaw * sinPitch * sinRoll;
result._z = cosYaw * cosPitch * sinRoll - sinYaw * sinPitch * cosRoll;
result._w = cosYaw * cosPitch * cosRoll + sinYaw * sinPitch * sinRoll;
*/
v128_t sin, cos, a, b, c, d;
{
// half_vec not needed outside this block
vec4 half_vec;
// halfYaw, halfPitch, halfRoll, 0
wasm_v128_store(half_vec, wasm_f32x4_mul(wasm_v128_load(angles), wasm_f32x4_const_splat(0.5)));
// non-simd sinf
// sinYaw, sinPitch, sinRoll, 0
sin = wasm_f32x4_make(sinf(half_vec[0]), sinf(half_vec[1]), sinf(half_vec[2]), 0);
}
// use simd to get cos from sin
// https://stackoverflow.com/a/2683608
// cos(x)^2 = 1 - sin(x)^2
// cos(x) = sqrt(1 - sin(x)^2)
// cosYaw, cosPitch, cosRoll, 0
cos = wasm_f32x4_sqrt(wasm_f32x4_add(wasm_f32x4_const_splat(1.0f), wasm_f32x4_neg(wasm_f32x4_mul(sin, sin))));
// cosYaw, sinYaw, cosYaw, cosYaw
b = wasm_i32x4_shuffle(sin, cos, 4, 0, 4, 4);
// sinPitch, cosPitch, cosPitch, cosPitch
c = wasm_i32x4_shuffle(sin, cos, 1, 5, 5, 5);
// cosRoll, cosRoll, sinRoll, cosRoll
d = wasm_i32x4_shuffle(sin, cos, 6, 6, 2, 6);
// cosYaw * sinPitch * cosRoll
// sinYaw * cosPitch * cosRoll
// cosYaw * cosPitch * sinRoll
// cosYaw * cosPitch * cosRoll
a = wasm_f32x4_mul(wasm_f32x4_mul(b, c), d);
// sinYaw, cosYaw, sinYaw, sinYaw
b = wasm_i32x4_shuffle(sin, cos, 0, 4, 0, 0);
// -sinYaw, -cosYaw, -sinYaw, -sinYaw
c = wasm_f32x4_neg(b);
// sinYaw, -cosYaw, -sinYaw, sinYaw
b = wasm_i32x4_shuffle(b, c, 0, 5, 4, 0);
// cosPitch, sinPitch, sinPitch, sinPitch
c = wasm_i32x4_shuffle(sin, cos, 5, 1, 1, 1);
// sinRoll, sinRoll, cosRoll, sinRoll
d = wasm_i32x4_shuffle(sin, cos, 4, 0, 4, 4);
// cosYaw * sinPitch * cosRoll + sinYaw * cosPitch * sinRoll;
// sinYaw * cosPitch * cosRoll - cosYaw * sinPitch * sinRoll;
// cosYaw * cosPitch * sinRoll - sinYaw * sinPitch * cosRoll;
// cosYaw * cosPitch * cosRoll + sinYaw * sinPitch * sinRoll;
a = wasm_f32x4_add(a, wasm_f32x4_mul(wasm_f32x4_mul(b, c), d));
wasm_v128_store(dest, a);
}
// angles: yaw, pitch, roll
typedef float vec3[3];
inline void RotationYawPitchRollToRefUnaligned(vec3 angles, versor dest) {
/*
const halfRoll = roll * 0.5;
const halfPitch = pitch * 0.5;
const halfYaw = yaw * 0.5;
const sinRoll = Math.sin(halfRoll);
const cosRoll = Math.cos(halfRoll);
const sinPitch = Math.sin(halfPitch);
const cosPitch = Math.cos(halfPitch);
const sinYaw = Math.sin(halfYaw);
const cosYaw = Math.cos(halfYaw);
result._x = cosYaw * sinPitch * cosRoll + sinYaw * cosPitch * sinRoll;
result._y = sinYaw * cosPitch * cosRoll - cosYaw * sinPitch * sinRoll;
result._z = cosYaw * cosPitch * sinRoll - sinYaw * sinPitch * cosRoll;
result._w = cosYaw * cosPitch * cosRoll + sinYaw * sinPitch * sinRoll;
*/
v128_t sin, cos, a, b, c, d;
{
// half_vec not needed outside this block
vec4 half_vec;
half_vec[0] = angles[0];
half_vec[1] = angles[1];
half_vec[2] = angles[2];
half_vec[3] = 0; // make sure this is initialized
// halfYaw, halfPitch, halfRoll, 0
wasm_v128_store(half_vec, wasm_f32x4_mul(wasm_v128_load(half_vec), wasm_f32x4_const_splat(0.5)));
// non-simd sinf and cosf
// sinYaw, sinPitch, sinRoll, 0
sin = wasm_f32x4_make(sinf(half_vec[0]), sinf(half_vec[1]), sinf(half_vec[2]), 0);
}
// https://stackoverflow.com/a/2683608
// cos(x)^2 = 1 - sin(x)^2
// cos(x) = sqrt(1 - sin(x)^2)
// cosYaw, cosPitch, cosRoll, 0
cos = wasm_f32x4_sqrt(wasm_f32x4_add(wasm_f32x4_const_splat(1.0f), wasm_f32x4_neg(wasm_f32x4_mul(sin, sin))));
// cosYaw, sinYaw, cosYaw, cosYaw
b = wasm_i32x4_shuffle(sin, cos, 4, 0, 4, 4);
// sinPitch, cosPitch, cosPitch, cosPitch
c = wasm_i32x4_shuffle(sin, cos, 1, 5, 5, 5);
// cosRoll, cosRoll, sinRoll, cosRoll
d = wasm_i32x4_shuffle(sin, cos, 6, 6, 2, 6);
// cosYaw * sinPitch * cosRoll
// sinYaw * cosPitch * cosRoll
// cosYaw * cosPitch * sinRoll
// cosYaw * cosPitch * cosRoll
a = wasm_f32x4_mul(wasm_f32x4_mul(b, c), d);
// sinYaw, cosYaw, sinYaw, sinYaw
b = wasm_i32x4_shuffle(sin, cos, 0, 4, 0, 0);
// -sinYaw, -cosYaw, -sinYaw, -sinYaw
c = wasm_f32x4_neg(b);
// sinYaw, -cosYaw, -sinYaw, sinYaw
b = wasm_i32x4_shuffle(b, c, 0, 5, 4, 0);
// cosPitch, sinPitch, sinPitch, sinPitch
c = wasm_i32x4_shuffle(sin, cos, 5, 1, 1, 1);
// sinRoll, sinRoll, cosRoll, sinRoll
d = wasm_i32x4_shuffle(sin, cos, 4, 0, 4, 4);
// cosYaw * sinPitch * cosRoll + sinYaw * cosPitch * sinRoll;
// sinYaw * cosPitch * cosRoll - cosYaw * sinPitch * sinRoll;
// cosYaw * cosPitch * sinRoll - sinYaw * sinPitch * cosRoll;
// cosYaw * cosPitch * cosRoll + sinYaw * sinPitch * sinRoll;
a = wasm_f32x4_add(a, wasm_f32x4_mul(wasm_f32x4_mul(b, c), d));
wasm_v128_store(dest, a);
}
1 Like
Cool. Did you try without using intrinsics?
Auto-Vectorization or without SIMD?
This function is a port of Quaternion.RotationYawPitchRollToRef to wasm SIMD, referenced in The original code
part of code.
This is not benchmarked yet.
Autovectorization
While on x86_64 platform compilers did an excllent work on Auto-Vectorization, they can not yet do the same targeting wasm simd128, as tested below.
code
/* Type your code here, or load an example. */
#include <math.h>
typedef float vec3[3];
typedef float versor[4];
void RotationYawPitchRollToRefScalar(vec3 angles, versor dest) {
float halfRoll = angles[2] * 0.5f;
float halfPitch = angles[1] * 0.5f;
float halfYaw = angles[0] * 0.5f;
float sinRoll = sinf(halfRoll);
float cosRoll = cosf(halfRoll);
float sinPitch = sinf(halfPitch);
float cosPitch = cosf(halfPitch);
float sinYaw = sinf(halfYaw);
float cosYaw = cosf(halfYaw);
dest[0] = cosYaw * sinPitch * cosRoll + sinYaw * cosPitch * sinRoll;
dest[1] = sinYaw * cosPitch * cosRoll - cosYaw * sinPitch * sinRoll;
dest[2] = cosYaw * cosPitch * sinRoll - sinYaw * sinPitch * cosRoll;
dest[3] = cosYaw * cosPitch * cosRoll + sinYaw * sinPitch * sinRoll;
}
Compile options:
clang -O3 -nostartfiles -Wl,--export=RotationYawPitchRollToRefScalar -Wl,--no-entry -Wl,--allow-undefined test.c -o clang_O3.wasm
clang -O3 -nostartfiles -Wl,--export=RotationYawPitchRollToRefScalar -Wl,--no-entry -Wl,--allow-undefined -msimd128 test.c -o clang_O3_msimd128.wasm
compiled_wasm.zip (11.0 KB)
Ive had the same problems. If you print the bailout info, clang knows it can autovec, it just doesnt do it. There must be a way -_-