hi twinbee,
i also made just the quadratic temporary values, multiplications are done very fast (about 4 clks) on the cpu.
Going through memory is often slower, and i did for the cosine and sine versions also pure x87 asm, to do it with sse2 was a bit to much work for me

If you need fast code and can get use of asm, then i can send you also the cosine code, on this computer i have only the sine version.
The pow8 anniversary is upcoming..
asm //Sine pow8 bulb
push esi
push edi
mov esi, [ebp + 8] //PIteration3D
fld qword [eax] //x
mov edi, [esi + 48] //PVars
fmul st(0), st(0) //xx
fld qword [edx] //y
add edi, 88
fmul st(0), st(0) //yy,xx
fld qword [ecx] //z,yy,xx
fmul st(0), st(0) //zz,yy,xx
fld st(2) //xx,zz,yy,xx
fadd st(0), st(2) //xx+yy=r,zz,yy,xx
fld st(0) //r,r,zz,yy,xx
fmul st(0), st(1) //rr,r,zz,yy,xx
fld st(2)
fmul st(0), st(0) //zzzz(S3*S3),rr,r,zz,yy,xx
fld st(2) //r,zzzz(S3*S3),rr,r,zz,yy,xx z calculation
fmul st(0), st(4) //r*zz
fmul qword [edi + 56] //6*r*zz,zzzz(S3*S3),rr,r,zz,yy,xx
fsubr st(0), st(1) //zzzz-6rzz,zzzz,rr,r,zz,yy,xx
fadd st(0), st(2) //zzzz-6rzz+rr,zzzz,rr,r,zz,yy,xx
fld st(4) //zz,zzzz-6rzz+rr,zzzz,rr,r,zz,yy,xx
fsub st(0), st(4) //zz-r,zzzz-6rzz+rr,zzzz,rr,r,zz,yy,xx
fmulp //(zz-r)*(zzzz-6rzz+rr),zzzz,rr,r,zz,yy,xx
fld st(3) //r,(zz-r)*(zzzz-6rzz+rr),zzzz,rr,r,zz,yy,xx
fsqrt
fmulp //sqrt(r)*(zz-r)*(zzzz-6rzz+rr),zzzz,rr,r,zz,yy,xx
fmul qword [ecx] //*z
fmul qword [edi + 72] //*8
fmul qword [edi - 104] //*dZmul
fchs
fadd qword [esi + 40] //+J3
fstp qword [ecx] //zzzz,rr,r,zz,yy,xx
fld st(0) //zzzz,zzzz,rr,r,zz,yy,xx a calculation
fadd st(0), st(2) //zzzz+rr,zzzz,rr,r,zz,yy,xx
fmulp st(3), st(0) //zzzz,rr,r*(zzzz+rr),zz,yy,xx
fld st(1) //rr,zzzz,rr,r*(zzzz+rr),zz,yy,xx
fmul qword [edi + 120] //rr*70,zzzz,rr,r*(zzzz+rr),zz,yy,xx
fadd st(0), st(1)
fmulp //(rr*70+zzzz)*zzzz,rr,r*(zzzz+rr),zz,yy,xx
fxch st(2) //r*(zzzz+rr),rr,(rr*70+zzzz)*zzzz,zz,yy,xx
fmulp st(3), st(0) //rr,(rr*70+zzzz)*zzzz,zz*r*(zzzz+rr),yy,xx
fxch st(2) //zz*r*(zzzz+rr),(rr*70+zzzz)*zzzz,rr,yy,xx
fmul qword [edi + 104] //28*zz*r*(zzzz+rr),(rr*70+zzzz)*zzzz,rr,yy,xx
fsubp //(rr*70+zzzz)*zzzz-28*zz*r*(zzzz+rr),rr,yy,xx
fxch st(1)
fmul st(0), st(0) //rrrr,(rr*70+zzzz)*zzzz-28*zz*r*(zzzz+rr),yy,xx
fdivp //(zzzz*(rr*70+zzzz-28*zz*r*(zzzz+rr))/rrrr,yy,xx
fadd qword [edi - 56] //a,yy,xx +1
fld st(1) //yy,a,yy,xx y calculation
fmul qword [edi + 64] //7*yy,a,yy,xx
fld st(3) //xx,7*yy,a,yy,xx
fmul qword [edi + 64] //7*xx,7*yy,a,yy,xx
fsub st(0), st(3) //7*xx-yy,7*yy,a,yy,xx
fld st(4) //xx,7*xx-yy,7*yy,a,yy,xx
fsubr st(2), st(0) //xx,7*xx-yy,xx-7*yy,a,yy,xx
fmul st(0), st(0) //xxxx,7*xx-yy,xx-7*yy,a,yy,xx
fmul st(2), st(0) //xxxx,7xx-yy,xxxx(xx-7yy),a,yy,xx
fld st(4) //yy,xxxx,7xx-yy,xxxx(xx-7yy),a,yy,xx
fmul st(0), st(0) //yyyy,xxxx,7xx-yy,xxxx(xx-7yy),a,yy,xx
fmul st(2), st(0) //yyyy,xxxx,yyyy(7xx-yy),xxxx(xx-7yy),a,yy,xx
fxch st(2) //yyyy(7xx-yy),xxxx,yyyy,xxxx(xx-7yy),a,yy,xx
faddp st(3), st(0) //xxxx,yyyy,yyyy(7xx-yy)+xxxx(xx-7yy),a,yy,xx
fxch st(2) //yyyy(7xx-yy)+xxxx(xx-7yy),yyyy,xxxx,a,yy,xx
fmul qword [edi + 72] //*8
fmul qword [eax] //*x
fmul qword [edx] //*y
fmul st(0), st(3) //*a
fadd qword [esi + 32] //+J2
fstp qword [edx] //yyyy,xxxx,a,yy,xx
fld st(1) //xxxx,yyyy,xxxx,a,yy,xx
fmul qword [edi + 120] //70xxxx,yyyy,xxxx,a,yy,xx
fadd st(0), st(1) //70xxxx+yyyy,yyyy,xxxx,a,yy,xx
fmul st(0), st(1) //yyyy(70xxxx+yyyy),yyyy,xxxx,a,yy,xx
fxch st(1) //yyyy,yyyy(70xxxx+yyyy),xxxx,a,yy,xx
fadd st(0), st(2) //yyyy+xxxx,yyyy(70xxxx+yyyy),xxxx,a,yy,xx
fmulp st(4), st(0) //yyyy(70xxxx+yyyy),xxxx,a,yy(yyyy+xxxx),xx
fxch st(4) //xx,xxxx,a,yy(yyyy+xxxx),yyyy(70xxxx+yyyy)
fmulp st(3), st(0) //xxxx,a,xxyy(yyyy+xxxx),yyyy(70xxxx+yyyy)
fmul st(0), st(0) //xxxx*xxxx,a,xxyy(yyyy+xxxx),yyyy(70xxxx+yyyy)
faddp st(3), st(0) //a,xxyy(yyyy+xxxx),xxxx*xxxx+yyyy(70xxxx+yyyy)
fxch st(1) //xxyy(yyyy+xxxx),a,xxxx*xxxx+yyyy(70xxxx+yyyy)
fmul qword [edi + 104]
fsubp st(2), st(0) //a,xxxx*xxxx+yyyy(70xxxx+yyyy)-28xxyy(yyyy+xxxx)
fmulp
fadd qword [esi + 24]
fstp qword [eax]
pop edi
pop esi
end
forgot to mention that i dont do a div0 test at all, i masked the exceptions and slightly rotated the bulb on startup, so this is not really a problem for me
