It happens that I just used the diamond-square algorithm myself for a 1024*1024 array of 3 floats but easily adaptable to say a single integer array or whatever, so here you are (for 3 channels @1024*1024):
float vals[1024][1024][3]; // probably needs to be a global or at least malloc'ed
float r,g,b,rx,gx,bx,rn,gn,bn;
rx = gx = bx = -2.0f;
rn = gn = bn = 2.0f;
vals[0][0][0] = vals[0][0][1] = vals[0][0][2] = 0.0f;
UInt32 x, y, dx = 512;
UInt32 sx = 1024;
float o = 0.353553390593274f, f = 0.707106781186548f/65535.0f;
do
{
y = dx;
do
{
x = dx;
do
{
r = vals[x][y][0] = 0.25f*(vals[x-dx][y-dx][0]+vals[(x+dx)&1023][y-dx][0]+vals[x-dx][(y+dx)&1023][0]+vals[(x+dx)&1023][(y+dx)&1023][0]) + (mtr->MTrand()&65535)*f - o;
g = vals[x][y][1] = 0.25f*(vals[x-dx][y-dx][1]+vals[(x+dx)&1023][y-dx][1]+vals[x-dx][(y+dx)&1023][1]+vals[(x+dx)&1023][(y+dx)&1023][1]) + (mtr->MTrand()&65535)*f - o;
b = vals[x][y][2] = 0.25f*(vals[x-dx][y-dx][2]+vals[(x+dx)&1023][y-dx][2]+vals[x-dx][(y+dx)&1023][2]+vals[(x+dx)&1023][(y+dx)&1023][2]) + (mtr->MTrand()&65535)*f - o;
if (r>rx)
{
rx = r;
}
if (r<rn)
{
rn = r;
}
if (g>gx)
{
gx = g;
}
if (g<gn)
{
gn = g;
}
if (b>bx)
{
bx = b;
}
if (b<bn)
{
bn = b;
}
} while ((x+=sx)<1024);
} while ((y+=sx)<1024);
y = 0;
do
{
x = dx;
do
{
r = vals[x][y][0] = 0.25f*(vals[x][(y-dx)&1023][0]+vals[x-dx][y][0]+vals[(x+dx)&1023][y][0]+vals[x][(y+dx)&1023][0]) + (mtr->MTrand()&65535)*f - o;
g = vals[x][y][1] = 0.25f*(vals[x][(y-dx)&1023][1]+vals[x-dx][y][1]+vals[(x+dx)&1023][y][1]+vals[x][(y+dx)&1023][1]) + (mtr->MTrand()&65535)*f - o;
b = vals[x][y][2] = 0.25f*(vals[x][(y-dx)&1023][2]+vals[x-dx][y][2]+vals[(x+dx)&1023][y][2]+vals[x][(y+dx)&1023][2]) + (mtr->MTrand()&65535)*f - o;
if (r>rx)
{
rx = r;
}
if (r<rn)
{
rn = r;
}
if (g>gx)
{
gx = g;
}
if (g<gn)
{
gn = g;
}
if (b>bx)
{
bx = b;
}
if (b<bn)
{
bn = b;
}
} while ((x+=sx)<1024);
if ((y += dx)<1024)
{
x = 0;
do
{
r = vals[x][y][0] = 0.25f*(vals[x][y-dx][0]+vals[(x-dx)&1023][y][0]+vals[(x+dx)&1023][y][0]+vals[x][(y+dx)&1023][0]) + (mtr->MTrand()&65535)*f - o;
g = vals[x][y][1] = 0.25f*(vals[x][y-dx][1]+vals[(x-dx)&1023][y][1]+vals[(x+dx)&1023][y][1]+vals[x][(y+dx)&1023][1]) + (mtr->MTrand()&65535)*f - o;
b = vals[x][y][2] = 0.25f*(vals[x][y-dx][2]+vals[(x-dx)&1023][y][2]+vals[(x+dx)&1023][y][2]+vals[x][(y+dx)&1023][2]) + (mtr->MTrand()&65535)*f - o;
if (r>rx)
{
rx = r;
}
if (r<rn)
{
rn = r;
}
if (g>gx)
{
gx = g;
}
if (g<gn)
{
gn = g;
}
if (b>bx)
{
bx = b;
}
if (b<bn)
{
bn = b;
}
} while ((x+=sx)<1024);
}
} while ((y+=dx)<1024);
dx >>= 1;
sx >>= 1;
f *= 0.707106781186548f;
o *= 0.707106781186548f;
} while (dx);
; Now we normalise the ranges created:
r = 255.0f/(rx-rn);
g = 255.0f/(gx-gn);
b = 255.0f/(bx-bn);
y = 0;
UInt8 *dest=(UInt8*)texMem;
do
{
x = 0;
do
{
*dest++ = (vals[x][y][0]-rn)*r;
*dest++ = (vals[x][y][1]-gn)*g;
*dest++ = (vals[x][y][2]-bn)*b;
*dest++ = 255;
} while (++x<1024);
} while (++y<1024);
Ignoring the /65535.0 in the calculation of "f":
"o" should be half the value of f.
The larger the value of f (and o) then the sharper the "landscape" - in fact anything >1/sqrt(2) for f is effectively introducing true randomness beyond fBm - using 1.0 is really true randomness, obviously smaller values of f (and o) will give smoother landscape/plasma.
Note that MTRand is simply an implementation of the Mersenne Twister returning 32-bit randoms.
If you make "o" larger than 0.5*f then landscape at lower "heights" will be rougher and if you make "o" smaller than 0.5*f then landscape at higher "heights" will be rougher.