#include <wasm_simd128.h>
typedef float vec4[4];
typedef float quat[4];
// angles: yaw, pitch, roll, 0, using vec4 to force align
inline void RotationYawPitchRollToRef(vec4 angles, quat dest) {
/* // The original code
const halfRoll = roll * 0.5;
const halfPitch = pitch * 0.5;
const halfYaw = yaw * 0.5;
const sinRoll = Math.sin(halfRoll);
const cosRoll = Math.cos(halfRoll);
const sinPitch = Math.sin(halfPitch);
const cosPitch = Math.cos(halfPitch);
const sinYaw = Math.sin(halfYaw);
const cosYaw = Math.cos(halfYaw);
result._x = cosYaw * sinPitch * cosRoll + sinYaw * cosPitch * sinRoll;
result._y = sinYaw * cosPitch * cosRoll - cosYaw * sinPitch * sinRoll;
result._z = cosYaw * cosPitch * sinRoll - sinYaw * sinPitch * cosRoll;
result._w = cosYaw * cosPitch * cosRoll + sinYaw * sinPitch * sinRoll;
*/
#if defined(USE_SIMD_SINCOS)
a = wasm_f32x4_mul(wasm_v128_load(angles), wasm_f32x4_const_splat(0.5f));
sin = (v128_t) emu_mm_sincos_ps((v4f *)&cos, a);
#else
{
// half_vec not needed outside this block
vec4 half_vec;
// halfPitch, halfYaw, halfRoll, 0
wasm_v128_store(half_vec, wasm_f32x4_mul(wasm_v128_load(angles), wasm_f32x4_const_splat(0.5f)));
// non-simd sinf, the performance bottleneck
// sinYaw, sinPitch, sinRoll, 0
sin = wasm_f32x4_make(sinf(half_vec[1]), sinf(half_vec[0]), sinf(half_vec[2]), 0);
}
// use simd to get cos from sin
// https://stackoverflow.com/a/2683608
// cos(x)^2 = 1 - sin(x)^2
// cos(x) = sqrt(1 - sin(x)^2)
// cosYaw, cosPitch, cosRoll, 0
cos = wasm_f32x4_sqrt(wasm_f32x4_sub(wasm_f32x4_const_splat(1.0f), wasm_f32x4_mul(sin, sin)));
#endif
// cosYaw, sinYaw, cosYaw, cosYaw
b = wasm_i32x4_shuffle(sin, cos, 4, 0, 4, 4);
// sinPitch, cosPitch, cosPitch, cosPitch
c = wasm_i32x4_shuffle(sin, cos, 1, 5, 5, 5);
// cosRoll, cosRoll, sinRoll, cosRoll
d = wasm_i32x4_shuffle(sin, cos, 6, 6, 2, 6);
// cosYaw * sinPitch * cosRoll
// sinYaw * cosPitch * cosRoll
// cosYaw * cosPitch * sinRoll
// cosYaw * cosPitch * cosRoll
a = wasm_f32x4_mul(wasm_f32x4_mul(b, c), d);
// sinYaw, cosYaw, sinYaw, sinYaw
b = wasm_i32x4_shuffle(sin, cos, 0, 4, 0, 0);
// sinYaw, -cosYaw, -sinYaw, sinYaw
b = wasm_v128_xor(b, wasm_f32x4_const(0.f, -0.f, -0.f, 0.f));
// cosPitch, sinPitch, sinPitch, sinPitch
c = wasm_i32x4_shuffle(sin, cos, 5, 1, 1, 1);
// sinRoll, sinRoll, cosRoll, sinRoll
d = wasm_f32x4_shuffle(sin, cos, 2, 2, 6, 2);
// cosYaw * sinPitch * cosRoll + sinYaw * cosPitch * sinRoll;
// sinYaw * cosPitch * cosRoll - cosYaw * sinPitch * sinRoll;
// cosYaw * cosPitch * sinRoll - sinYaw * sinPitch * cosRoll;
// cosYaw * cosPitch * cosRoll + sinYaw * sinPitch * sinRoll;
a = wasm_f32x4_add(a, wasm_f32x4_mul(wasm_f32x4_mul(b, c), d));
wasm_v128_store(dest, a);
}
// angles: yaw, pitch, roll
typedef float vec3[3];
inline void RotationYawPitchRollToRefUnaligned(vec3 angles, quat dest) {
/*
const halfRoll = roll * 0.5;
const halfPitch = pitch * 0.5;
const halfYaw = yaw * 0.5;
const sinRoll = Math.sin(halfRoll);
const cosRoll = Math.cos(halfRoll);
const sinPitch = Math.sin(halfPitch);
const cosPitch = Math.cos(halfPitch);
const sinYaw = Math.sin(halfYaw);
const cosYaw = Math.cos(halfYaw);
result._x = cosYaw * sinPitch * cosRoll + sinYaw * cosPitch * sinRoll;
result._y = sinYaw * cosPitch * cosRoll - cosYaw * sinPitch * sinRoll;
result._z = cosYaw * cosPitch * sinRoll - sinYaw * sinPitch * cosRoll;
result._w = cosYaw * cosPitch * cosRoll + sinYaw * sinPitch * sinRoll;
*/
v128_t sin, cos, a, b, c, d;
{
// half_vec not needed outside this block
vec4 half_vec;
half_vec[0] = angles[0];
half_vec[1] = angles[1];
half_vec[2] = angles[2];
half_vec[3] = 0; // make sure this is initialized
// halfYaw, halfPitch, halfRoll, 0
wasm_v128_store(half_vec, wasm_f32x4_mul(wasm_v128_load(half_vec), wasm_f32x4_const_splat(0.5)));
// non-simd sinf and cosf
// sinYaw, sinPitch, sinRoll, 0
sin = wasm_f32x4_make(sinf(half_vec[0]), sinf(half_vec[1]), sinf(half_vec[2]), 0);
}
// https://stackoverflow.com/a/2683608
// cos(x)^2 = 1 - sin(x)^2
// cos(x) = sqrt(1 - sin(x)^2)
// cosYaw, cosPitch, cosRoll, 0
cos = wasm_f32x4_sqrt(wasm_f32x4_add(wasm_f32x4_const_splat(1.0f), wasm_f32x4_neg(wasm_f32x4_mul(sin, sin))));
// cosYaw, sinYaw, cosYaw, cosYaw
b = wasm_i32x4_shuffle(sin, cos, 4, 0, 4, 4);
// sinPitch, cosPitch, cosPitch, cosPitch
c = wasm_i32x4_shuffle(sin, cos, 1, 5, 5, 5);
// cosRoll, cosRoll, sinRoll, cosRoll
d = wasm_i32x4_shuffle(sin, cos, 6, 6, 2, 6);
// cosYaw * sinPitch * cosRoll
// sinYaw * cosPitch * cosRoll
// cosYaw * cosPitch * sinRoll
// cosYaw * cosPitch * cosRoll
a = wasm_f32x4_mul(wasm_f32x4_mul(b, c), d);
// sinYaw, cosYaw, sinYaw, sinYaw
b = wasm_i32x4_shuffle(sin, cos, 0, 4, 0, 0);
// sinYaw, -cosYaw, -sinYaw, sinYaw
b = wasm_v128_xor(b, wasm_f32x4_const(0.f, -0.f, -0.f, 0.f));
// cosPitch, sinPitch, sinPitch, sinPitch
c = wasm_i32x4_shuffle(sin, cos, 5, 1, 1, 1);
// sinRoll, sinRoll, cosRoll, sinRoll
d = wasm_f32x4_shuffle(sin, cos, 2, 2, 6, 2);
// cosYaw * sinPitch * cosRoll + sinYaw * cosPitch * sinRoll;
// sinYaw * cosPitch * cosRoll - cosYaw * sinPitch * sinRoll;
// cosYaw * cosPitch * sinRoll - sinYaw * sinPitch * cosRoll;
// cosYaw * cosPitch * cosRoll + sinYaw * sinPitch * sinRoll;
a = wasm_f32x4_add(a, wasm_f32x4_mul(wasm_f32x4_mul(b, c), d));
wasm_v128_store(dest, a);
}
With emu_mm_sincos_ps, it can be faster.
Edit: For those wanted to use this, note that -0.f
can be miscompiled in some compiler with fast-math
.