Jesse
Download Section
Fractal Schemer
Posts: 1013
|
|
« Reply #111 on: August 12, 2010, 06:46:10 PM » |
|
hi twinbee, i also made just the quadratic temporary values, multiplications are done very fast (about 4 clks) on the cpu. Going through memory is often slower, and i did for the cosine and sine versions also pure x87 asm, to do it with sse2 was a bit to much work for me If you need fast code and can get use of asm, then i can send you also the cosine code, on this computer i have only the sine version. The pow8 anniversary is upcoming.. asm //Sine pow8 bulb push esi push edi mov esi, [ebp + 8] //PIteration3D fld qword [eax] //x mov edi, [esi + 48] //PVars fmul st(0), st(0) //xx fld qword [edx] //y add edi, 88 fmul st(0), st(0) //yy,xx fld qword [ecx] //z,yy,xx fmul st(0), st(0) //zz,yy,xx fld st(2) //xx,zz,yy,xx fadd st(0), st(2) //xx+yy=r,zz,yy,xx fld st(0) //r,r,zz,yy,xx fmul st(0), st(1) //rr,r,zz,yy,xx fld st(2) fmul st(0), st(0) //zzzz(S3*S3),rr,r,zz,yy,xx fld st(2) //r,zzzz(S3*S3),rr,r,zz,yy,xx z calculation fmul st(0), st(4) //r*zz fmul qword [edi + 56] //6*r*zz,zzzz(S3*S3),rr,r,zz,yy,xx fsubr st(0), st(1) //zzzz-6rzz,zzzz,rr,r,zz,yy,xx fadd st(0), st(2) //zzzz-6rzz+rr,zzzz,rr,r,zz,yy,xx fld st(4) //zz,zzzz-6rzz+rr,zzzz,rr,r,zz,yy,xx fsub st(0), st(4) //zz-r,zzzz-6rzz+rr,zzzz,rr,r,zz,yy,xx fmulp //(zz-r)*(zzzz-6rzz+rr),zzzz,rr,r,zz,yy,xx fld st(3) //r,(zz-r)*(zzzz-6rzz+rr),zzzz,rr,r,zz,yy,xx fsqrt fmulp //sqrt(r)*(zz-r)*(zzzz-6rzz+rr),zzzz,rr,r,zz,yy,xx fmul qword [ecx] //*z fmul qword [edi + 72] //*8 fmul qword [edi - 104] //*dZmul fchs fadd qword [esi + 40] //+J3 fstp qword [ecx] //zzzz,rr,r,zz,yy,xx fld st(0) //zzzz,zzzz,rr,r,zz,yy,xx a calculation fadd st(0), st(2) //zzzz+rr,zzzz,rr,r,zz,yy,xx fmulp st(3), st(0) //zzzz,rr,r*(zzzz+rr),zz,yy,xx fld st(1) //rr,zzzz,rr,r*(zzzz+rr),zz,yy,xx fmul qword [edi + 120] //rr*70,zzzz,rr,r*(zzzz+rr),zz,yy,xx fadd st(0), st(1) fmulp //(rr*70+zzzz)*zzzz,rr,r*(zzzz+rr),zz,yy,xx fxch st(2) //r*(zzzz+rr),rr,(rr*70+zzzz)*zzzz,zz,yy,xx fmulp st(3), st(0) //rr,(rr*70+zzzz)*zzzz,zz*r*(zzzz+rr),yy,xx fxch st(2) //zz*r*(zzzz+rr),(rr*70+zzzz)*zzzz,rr,yy,xx fmul qword [edi + 104] //28*zz*r*(zzzz+rr),(rr*70+zzzz)*zzzz,rr,yy,xx fsubp //(rr*70+zzzz)*zzzz-28*zz*r*(zzzz+rr),rr,yy,xx fxch st(1) fmul st(0), st(0) //rrrr,(rr*70+zzzz)*zzzz-28*zz*r*(zzzz+rr),yy,xx fdivp //(zzzz*(rr*70+zzzz-28*zz*r*(zzzz+rr))/rrrr,yy,xx fadd qword [edi - 56] //a,yy,xx +1 fld st(1) //yy,a,yy,xx y calculation fmul qword [edi + 64] //7*yy,a,yy,xx fld st(3) //xx,7*yy,a,yy,xx fmul qword [edi + 64] //7*xx,7*yy,a,yy,xx fsub st(0), st(3) //7*xx-yy,7*yy,a,yy,xx fld st(4) //xx,7*xx-yy,7*yy,a,yy,xx fsubr st(2), st(0) //xx,7*xx-yy,xx-7*yy,a,yy,xx fmul st(0), st(0) //xxxx,7*xx-yy,xx-7*yy,a,yy,xx fmul st(2), st(0) //xxxx,7xx-yy,xxxx(xx-7yy),a,yy,xx fld st(4) //yy,xxxx,7xx-yy,xxxx(xx-7yy),a,yy,xx fmul st(0), st(0) //yyyy,xxxx,7xx-yy,xxxx(xx-7yy),a,yy,xx fmul st(2), st(0) //yyyy,xxxx,yyyy(7xx-yy),xxxx(xx-7yy),a,yy,xx fxch st(2) //yyyy(7xx-yy),xxxx,yyyy,xxxx(xx-7yy),a,yy,xx faddp st(3), st(0) //xxxx,yyyy,yyyy(7xx-yy)+xxxx(xx-7yy),a,yy,xx fxch st(2) //yyyy(7xx-yy)+xxxx(xx-7yy),yyyy,xxxx,a,yy,xx fmul qword [edi + 72] //*8 fmul qword [eax] //*x fmul qword [edx] //*y fmul st(0), st(3) //*a fadd qword [esi + 32] //+J2 fstp qword [edx] //yyyy,xxxx,a,yy,xx fld st(1) //xxxx,yyyy,xxxx,a,yy,xx fmul qword [edi + 120] //70xxxx,yyyy,xxxx,a,yy,xx fadd st(0), st(1) //70xxxx+yyyy,yyyy,xxxx,a,yy,xx fmul st(0), st(1) //yyyy(70xxxx+yyyy),yyyy,xxxx,a,yy,xx fxch st(1) //yyyy,yyyy(70xxxx+yyyy),xxxx,a,yy,xx fadd st(0), st(2) //yyyy+xxxx,yyyy(70xxxx+yyyy),xxxx,a,yy,xx fmulp st(4), st(0) //yyyy(70xxxx+yyyy),xxxx,a,yy(yyyy+xxxx),xx fxch st(4) //xx,xxxx,a,yy(yyyy+xxxx),yyyy(70xxxx+yyyy) fmulp st(3), st(0) //xxxx,a,xxyy(yyyy+xxxx),yyyy(70xxxx+yyyy) fmul st(0), st(0) //xxxx*xxxx,a,xxyy(yyyy+xxxx),yyyy(70xxxx+yyyy) faddp st(3), st(0) //a,xxyy(yyyy+xxxx),xxxx*xxxx+yyyy(70xxxx+yyyy) fxch st(1) //xxyy(yyyy+xxxx),a,xxxx*xxxx+yyyy(70xxxx+yyyy) fmul qword [edi + 104] fsubp st(2), st(0) //a,xxxx*xxxx+yyyy(70xxxx+yyyy)-28xxyy(yyyy+xxxx) fmulp fadd qword [esi + 24] fstp qword [eax] pop edi pop esi end
forgot to mention that i dont do a div0 test at all, i masked the exceptions and slightly rotated the bulb on startup, so this is not really a problem for me
|