Some progress:
Finally only ecx have to be saved in dIFS formulas.
Using sse2 is possible but with some constraints for absolute value and negation operation. functions using intrinsics and macros are provided in the source code below. Still not perfect but one can still edit the assembly code after all. Anyway fpu is fast enought for me.
/*
Mandelbulb 3d Formula example.
Original code by marius and jesse.
Modified by knighty. Apr 2015.
Work in progress.
No warranty. This is made by a noob ;)
Compilation using mingw and msys (stricly speaking msys is not required... in principle):
-Compile with (replace "theFormulaFilename" by actual c file name):
gcc -c -m32 -O3 -mfpmath=387 -ffast-math -march=pentium4 theFormulaFilename.c
SSE2 can be used by setting: -mfpmath=both and adding the switch: -msse2
If you want to use sse2, better use assembler. c's sse generated code is just annoying! and not necessarily faster. :(
Anyway, it is still possible to edit the assembler code generated by the compiler...
If you use sse2, use the macros defined below. Otherwise the compiler will use variables stored in .rdata whith will make the formula unusable.
-Verify asm code:
objdump -D theFormulaFilename.o
This is necessary to check that there are no library functions call and that there is
only .text "segment" no .rdata or something like that or a call to external function.
-Extract machine code:
objcopy -Obinary -j .text theFormulaFilename.o theFormulaFilename.bin
-Convert machine code from binary to hexadecimal:
bin2hex theFormulaFilename.bin theFormulaFilename.m3f
(bin2hex is not part of msys or mingw. any other binary file editor would do the job)
-edit theFormulaFilename.m3f in a text editor to add MB3D stuff.
*/
/*Defines**************************************************/
#define USE_SSE2
/*Includes*************************************************/
#include <emmintrin.h>
#include <math.h>
/*MB3D structures definitions*************************************************/
//This structure is specific to dIFS. The use of most of it is unknown and should not be modified.
//Edit: it seems that it is almost the same as for non dIFS.
__attribute__((packed)) struct TIteration3Dext {
double something; // -0x88 ; used in sphereheightmap.m3f
double dum0; // -0x80 ; unknown
double x; // -0x78
double y; // -0x70
double z; // -0x68
double dum1[8]; //unknown
double DE2T; // -0x20 ; Output: distance estimate to current object
double dum2[17]; //unknown
double accumulatedScale; // +0x70
double dum3; // +0x78; unknown
double OTforCol; // +0x80
double dum4[16]; //unknown
//void * sphericalMap; // +0x108; pointer to function
};
__attribute__((packed)) struct Sconsts{//in the same order as m3f file's [constant] section
double Pie;//just to use a constant
long long int abscst;//for ABS()
long long int negcst;//for NEG()
};
__attribute__((packed)) struct Svars{//in the reverse order wrt m3f file
double Zadd;
double Yadd;
double Xadd;
double Scale;
double Bevel;
double HalfLengthZ;
double HalfLengthY;
double HalfLengthX;
double Dummy;//not used. What is it good for?
};
/*Macros**************************************************/
//this macro changes the type of a variable without conversion.
#define REINTERPRET(x,type) (*((type *) &(x)))
//
#define MAX(x,y) ((x)>(y) ? (x) : (y))
#define MIN(x,y) ((x)<(y) ? (x) : (y))
//with sse2, fabs() and negating generates a logical instruction with a constant stored in .rdata
//use these instead of writing: fabs(bar); or foo=-bar; when using sse2.
#ifdef USE_SSE2
#undef fabs
#define ABS(x) (Abs(x, pconst))
#define NEG(x) (Neg(x, pconst))
#else
#define ABS(x) (fabs((x)))
#define NEG(x) (-(x))
#endif
/*Local functions declaration and/or implementation*************************************************/
//if the function is declared static it will not be exported and it won't be in the obj file if inlined
inline void __attribute__((fastcall)) TheFormula(char* siarg, char* diarg);//implemented after formula because formula code must be placed before this function's code.
#ifdef USE_SSE2
static inline double Abs(double x, struct Sconsts* pconst){
__m128d v=_mm_set1_pd (x);//not perfect
__m128d w=_mm_set1_pd (REINTERPRET(pconst->abscst,double));//not perfect either
__m128d r=_mm_and_pd (v,w);
return _mm_cvtsd_f64 (r);
}
static inline double Neg(double x, struct Sconsts* pconst){
__m128d v=_mm_set1_pd (x);
__m128d w=_mm_set1_pd (REINTERPRET(pconst->negcst,double));
__m128d r=_mm_xor_pd (v,w);
return _mm_cvtsd_f64 (r);
}
#endif
/*Formula implementation*************************************************/
// Not a standard calling convention. this is called from an asm code in MB3D which assumes that %ecx is not modified.
// We need to save %ecx register.
// esi and edi (and ebx?) are not modified (or restored?)!???
//
// the arguments of this function are in edi and esi registers.
// esi points to the context structure
// edi points to constants (negative displacements for variables)
void formula(void) {//Big overhead. The only solution I could find. Any Idea?
asm("push %ecx\n\t");//save ecx
asm("mov %esi,%ecx\n\t");
asm("mov %edi,%edx\n\t");//arguments to TheFormula
asm("call @TheFormula@8\n\t");//this is the compiler generated name for TheFormula. If cdecl the generated name is "_TheFormula"
//is it possible to get TheFormula inlined? The following doesn't work.
//void (*foo)() = (void (*)())&TheFormula;
//foo();
asm("pop %ecx");//restore ecx
}
/*The actual formula*************************************************/
inline void __attribute__((fastcall)) TheFormula(char* siarg /*ecx*/, char* diarg/*edx*/) {
//DO NOT MODIFY BEGIN
// Compute ptr to proper start of TIteration3Dext struct.
struct TIteration3Dext* pctx = (struct TIteration3Dext*)(siarg-0x88);
// get pointer to constants.
struct Sconsts* pconst=(struct Sconsts*) (diarg);
// get pointer to variables.
struct Svars* pvar=(struct Svars*) (((char*)pconst)-sizeof(struct Svars));
//DO NOT MODIFY END
// Draw a box
double x=pctx->x, y=pctx->y, z=pctx->z;
x=ABS(x) - (pvar->HalfLengthX - pvar->Bevel);
y=ABS(y) - (pvar->HalfLengthY - pvar->Bevel);
z=ABS(z) - (pvar->HalfLengthZ - pvar->Bevel);
double DE = MAX(x,MAX(y,z));
if (DE>0.){
x = MAX(0.,x);
y = MAX(0.,y);
z = MAX(0.,z);
DE = MAX(DE, sqrt(x*x+y*y+z*z));
}
pctx->DE2T = DE - pvar->Bevel;
// Now do translation and scaling
pctx->x = pctx->x * pvar->Scale + pvar->Xadd;
pctx->y = pctx->y * pvar->Scale + pvar->Yadd;
pctx->z = pctx->z * pvar->Scale + pvar->Zadd;
pctx->accumulatedScale = pctx->accumulatedScale * pvar->Scale;
}
[OPTIONS]
.Version = 6
.DEoption = 20
.SSE2
.Double X halfwidth = 1
.Double Y halfwidth = 1
.Double Z halfwidth = 1
.Double Fillet = 0
.Double Scale = 1
.Double X add = 0
.Double Y add = 0
.Double Z add = 0
[CONSTANTS]
Double = 0
INT64 = $7FFFFFFFFFFFFFFF
INT64 = $8000000000000000
[CODE]
5589E5505351525756E8080000005A5A5A595B585DC35589E583EC088B4D0C8B45082D880000
008D51B8DD4010DD4018DD4020DD55F8DD4220D9C3D9E1DC6238D8C1D9C3D9E1DC6230D8C2D9
CBD9E1DC6228D8C2D9CBDBF1D9C1DBC1DBF4DAC4D9EED9C9DBF1762ED9CBDBF1DAC1D8C8D9CA
DBF1DAC1D8C8D9CDDBF1DAC1DDD9D8C8D9CCDEC1DEC3D9CAD9FAD9CADBF2DAC2DDDAEB08DDDD
DDD8DDD8DDD8DEE9DD5868DD4218DCCAD9CADC4210DD5810D8C9DC4208DD5818DD45F8D8C9DC
41B8DD5820DC88F8000000DD98F8000000C9C3909090
[END]
Box shape dIFS with rounded edge. no OTrap coloring for now.
[/code]