I am working on an efficient DE GPU implementation of the power 8 bulb. My old code was taken from the nvidia formus, and frankly I didn't understand any of it. But now I had some success in implementing a trig free iteration code in quaternion form.
What remains to be done, is to optimize the quaternion multiplications with 0 terms, and then get to work on the derivative. In the quaternion implementation multiplying the angles by 8 is performed by squaring the quaternion 2 times. The initial quaternion can be constructed by picking components from the (normalized) zval. There is no need to halve the angle when raising to the power of 8, but for the derivative, I will need to construct a half angle quaternion without using trigonometric functions.
kernel float4 quaternion_multiply(float4 p, float4 q)
{
float w = p.w*q.w - dot(p.xyz,q.xyz);
float3 v = float3(p.w,p.w,p.w)*q.xyz+
float3(q.w,q.w,q.w)*p.xyz+
cross(p.xyz,q.xyz);
return float4(v.x,v.y,v.z,w);
}
kernel float2 bulb_iterate(float3 cval, int maxiter)
{
float3 zval = cval;
float4 qt2;
float4 qt4;
float4 qt8;
float4 qp2;
float4 qp4;
float4 qp8;
float4 quat_pre;
float4 quat;
float4 quat_post;
float4 quat_half = float4(0.0f, -0.70710678f, 0.0f, 0.7071067811f);
float2 res;
float r2;
float r;
int i;
r = sqrt(dot(zval,zval));
for (i = 0; i < maxiter ; i++) {
qt2.zw = normalize(zval.xy);
qt2.x = 0.0f;
qt2.y = 0.0f;
qt4 = quaternion_multiply( qt2, qt2 );
qt8 = quaternion_multiply( qt4, qt4 );
qp2.x = 0.0f;
qp2.z = 0.0f;
qp2.w = zval.z / r;
qp2.y = sqrt( 1.0f - qp2.w * qp2.w);
qp4 = quaternion_multiply( qp2, qp2 );
qp8 = quaternion_multiply( quat_half,
quaternion_multiply( qp4, qp4 ));
quat = float4(1.0f,0.0f,0.0f,0.0f);
quat_pre = quaternion_multiply(qt8, qp8);
quat_post.xyz = -quat_pre.xyz;
quat_post.w = quat_pre.w;
r = pow(r, 8.0f);
quat = quaternion_multiply(
quaternion_multiply( quat_pre, quat) , quat_post);
zval = quat.xyz * float3(r,r,r) + cval;
r2 = dot(zval,zval);
r = sqrt(r2);
if (r2>2.0f) break;
}
if (r<2.0f) {
r = 0.0f;
}
if (i==0) {
r = 0.0f;
}
res.x = (float)i;
res.y = r;
return res;
}