| 1 |
module tools.vector; |
|---|
| 2 |
|
|---|
| 3 |
import tools.compat, tools.base, tools.ziggy; |
|---|
| 4 |
const PI180 = PI / 180.0; |
|---|
| 5 |
|
|---|
| 6 |
version(GNU) { |
|---|
| 7 |
import gcc.builtins; |
|---|
| 8 |
alias __builtin_sqrtf fastsqrt; |
|---|
| 9 |
alias __builtin_sqrt fastsqrt; |
|---|
| 10 |
alias __builtin_sqrtl fastsqrt; |
|---|
| 11 |
} else { |
|---|
| 12 |
// TODO: ldc intrinsics here |
|---|
| 13 |
alias sqrt fastsqrt; |
|---|
| 14 |
} |
|---|
| 15 |
|
|---|
| 16 |
// flatten doesn't do what you think it does |
|---|
| 17 |
/*version(GNU) const string prepend = "pragma(GNU_attribute, flatten) "; |
|---|
| 18 |
else */const string prepend = ""; |
|---|
| 19 |
|
|---|
| 20 |
version(GNU) const bool Extend3to4 = true; |
|---|
| 21 |
else const bool Extend3to4 = false; |
|---|
| 22 |
|
|---|
| 23 |
import tools.log: logln; |
|---|
| 24 |
|
|---|
| 25 |
template RelFloatType(T) { |
|---|
| 26 |
static if (is(T == float) || is(T == double) || is(T == real)) |
|---|
| 27 |
alias T RelFloatType; |
|---|
| 28 |
else |
|---|
| 29 |
alias float RelFloatType; |
|---|
| 30 |
} |
|---|
| 31 |
|
|---|
| 32 |
string opGeneric(string specs) { |
|---|
| 33 |
string res; |
|---|
| 34 |
while (specs.length) { |
|---|
| 35 |
auto spec = specs.ctSlice(";").ctStrip(); |
|---|
| 36 |
specs = specs.ctStrip(); |
|---|
| 37 |
auto type = spec.ctSlice("·"); |
|---|
| 38 |
auto name = spec.ctSlice("·").ctStrip(); |
|---|
| 39 |
auto op = spec.ctSlice("·").ctStrip(); |
|---|
| 40 |
auto reverse = spec.ctSlice("·").ctStrip() == "true"; |
|---|
| 41 |
res ~= prepend ~ ` Vector op`~name~`(`~type~` other) { |
|---|
| 42 |
Vector res = void; |
|---|
| 43 |
for (int i = 0; i < (Extend3to4?real_size:size); ++i) |
|---|
| 44 |
static if (is(typeof(other.real_field))) |
|---|
| 45 |
res.real_field[i] = cast(T) (real_field[i] `~op~` other.real_field[i]); |
|---|
| 46 |
else |
|---|
| 47 |
res.real_field[i] = cast(T) (real_field[i] `~op~` other); |
|---|
| 48 |
return res; |
|---|
| 49 |
} |
|---|
| 50 |
` ~ prepend ~ ` void op`~name~`Assign(`~type~` other) { |
|---|
| 51 |
for (int i = 0; i < (Extend3to4?real_size:size); i++) |
|---|
| 52 |
static if (is(typeof(other.real_field))) |
|---|
| 53 |
real_field[i] `~op~`= other.real_field[i]; |
|---|
| 54 |
else |
|---|
| 55 |
real_field[i] `~op~`= other; |
|---|
| 56 |
} |
|---|
| 57 |
`; |
|---|
| 58 |
if (reverse) { |
|---|
| 59 |
res ~= prepend ~ ` Vector op`~name~`_r(`~type~` other) { |
|---|
| 60 |
Vector res = void; |
|---|
| 61 |
for (int i = 0; i < (Extend3to4?real_size:size); ++i) |
|---|
| 62 |
static if (is(typeof(other.real_field))) |
|---|
| 63 |
res.real_field[i] = cast(T) (other.real_field[i] `~op~` real_field[i]); |
|---|
| 64 |
else |
|---|
| 65 |
res.real_field[i] = cast(T) (other `~op~` real_field[i]); |
|---|
| 66 |
return res; |
|---|
| 67 |
} |
|---|
| 68 |
`; |
|---|
| 69 |
} |
|---|
| 70 |
} |
|---|
| 71 |
return res; |
|---|
| 72 |
} |
|---|
| 73 |
|
|---|
| 74 |
struct Vector(T, int size, int _real_size = 0) { |
|---|
| 75 |
version(noSSE) const bool useSSE = false; |
|---|
| 76 |
else /*const bool useSSE = is(typeof({ |
|---|
| 77 |
void* foo; |
|---|
| 78 |
asm { mov EAX, foo; movups XMM0, [EAX]; } |
|---|
| 79 |
}));*/ |
|---|
| 80 |
const bool useSSE = false; |
|---|
| 81 |
version(NoExtend) const ExtendSize = false; |
|---|
| 82 |
else const ExtendSize = size == 3 && is(T == float); |
|---|
| 83 |
const real_size = ExtendSize ? 4 : size; |
|---|
| 84 |
// [ real_field ] |
|---|
| 85 |
// [bogus] |
|---|
| 86 |
// [ field ] |
|---|
| 87 |
// [ tuple ] |
|---|
| 88 |
// [x] [y] [z] |
|---|
| 89 |
// [r] [g] [b] |
|---|
| 90 |
union { |
|---|
| 91 |
static if (is(int: T)) T[real_size] real_field = 0; |
|---|
| 92 |
else T[real_size] real_field; |
|---|
| 93 |
struct { |
|---|
| 94 |
union { |
|---|
| 95 |
T[size] field; |
|---|
| 96 |
Repeat!(T, size) tuple; |
|---|
| 97 |
alias tuple SerField; |
|---|
| 98 |
struct { static if (size>0) T x; static if (size>1) T y; static if (size>2) T z; static if (size>3) T w; } |
|---|
| 99 |
struct { static if (size>0) T r; static if (size>1) T g; static if (size>2) T b; static if (size>3) T a; } |
|---|
| 100 |
} |
|---|
| 101 |
static if (size == 3 && real_size == 4) T bogus; |
|---|
| 102 |
} |
|---|
| 103 |
} |
|---|
| 104 |
static if (is(typeof(-T))) { |
|---|
| 105 |
Vector mirror(int id)() { return Vector(tuple[0..id], -tuple[id], tuple[id+1..$]); } |
|---|
| 106 |
static if (size>0) alias mirror!(0) xmirror; |
|---|
| 107 |
static if (size>1) alias mirror!(1) ymirror; |
|---|
| 108 |
static if (size>2) alias mirror!(2) zmirror; |
|---|
| 109 |
static if (size>3) alias mirror!(3) wmirror; |
|---|
| 110 |
} |
|---|
| 111 |
template _swizzle(string HOW) { |
|---|
| 112 |
// the extra parameters are taken as extensions of the existing vector tuple |
|---|
| 113 |
// vec2f foo; foo.xyz(4) is vec3f |
|---|
| 114 |
// If you can remove this hackaround and not get frontend crashes, PLEASE do so. |
|---|
| 115 |
// I feel dirty just writing it. |
|---|
| 116 |
static if (HOW.length == size) alias Vector Type; |
|---|
| 117 |
else alias Vector!(T, HOW.length) Type; |
|---|
| 118 |
Type swizzle(S...)(S extra) { |
|---|
| 119 |
Vector!(T, size+S.length) newsource = void; |
|---|
| 120 |
newsource.field[0..size][] = field; |
|---|
| 121 |
foreach (id, value; extra) newsource.tuple[size+id] = value; |
|---|
| 122 |
Type res = void; |
|---|
| 123 |
foreach (i, bogus; Repeat!(float, HOW.length)) |
|---|
| 124 |
res.tuple[i] = mixin("newsource."~HOW[i]); |
|---|
| 125 |
return res; |
|---|
| 126 |
} |
|---|
| 127 |
} |
|---|
| 128 |
template swizzle(string HOW) { alias _swizzle!(HOW).swizzle swizzle; } |
|---|
| 129 |
template expr(string EX) { |
|---|
| 130 |
typeof(({ T params; return mixin(EX); })()) expr(T...)(T params) { |
|---|
| 131 |
return mixin(EX); |
|---|
| 132 |
} |
|---|
| 133 |
} |
|---|
| 134 |
alias swizzle!("xy") xy; alias swizzle!("yx") yx; |
|---|
| 135 |
alias swizzle!("xz") xz; alias swizzle!("zx") zx; |
|---|
| 136 |
alias swizzle!("yz") yz; alias swizzle!("zy") zy; |
|---|
| 137 |
Vector!(U, size, _real_size) to(U)() { |
|---|
| 138 |
static assert(is(typeof(cast(U) Init!(T))), "Cannot call to!("~U.stringof~") on Vector!("~T.stringof~", "~size.stringof~")"); |
|---|
| 139 |
Vector!(U, size, _real_size) res = void; |
|---|
| 140 |
foreach (i, entry; tuple) res.tuple[i] = cast(U) entry; |
|---|
| 141 |
return res; |
|---|
| 142 |
} |
|---|
| 143 |
T opIndex(size_t which) { return field[which]; } |
|---|
| 144 |
void opIndexAssign(size_t which, T n) { field[which]=n; } |
|---|
| 145 |
static Vector opCall(T what) { |
|---|
| 146 |
Vector res = void; |
|---|
| 147 |
res.field[] = what; |
|---|
| 148 |
static if (is(T: float)) res.real_field[size .. real_size] = 0; |
|---|
| 149 |
return res; |
|---|
| 150 |
} |
|---|
| 151 |
static if (size > 1) |
|---|
| 152 |
static Vector opCall(Repeat!(T, size) what) { |
|---|
| 153 |
Vector res = void; |
|---|
| 154 |
foreach (i, v; what) res.field[i] = v; |
|---|
| 155 |
static if (is(T: float)) |
|---|
| 156 |
res.real_field[size .. real_size] = 0; |
|---|
| 157 |
return res; |
|---|
| 158 |
} |
|---|
| 159 |
static Vector opCall(T[size] what) { |
|---|
| 160 |
Vector res=void; res.field[]=what; |
|---|
| 161 |
static if (is(typeof(res.filler))) res.filler=0; |
|---|
| 162 |
return res; |
|---|
| 163 |
} |
|---|
| 164 |
static if (is(typeof(abs(field[0])))) Vector abs() { |
|---|
| 165 |
Vector res = void; |
|---|
| 166 |
for (int i = 0; i < size; ++i) |
|---|
| 167 |
res.field[i] = .abs(field[i]); |
|---|
| 168 |
return res; |
|---|
| 169 |
} |
|---|
| 170 |
static if (is(typeof(field[0]>0))) Vector!(byte, size, size) sgn() { |
|---|
| 171 |
Vector!(byte, size, size) res = void; |
|---|
| 172 |
for (int i = 0; i < size; ++i) |
|---|
| 173 |
if (field[i] >= 0) res.field[i] = 1; |
|---|
| 174 |
else res.field[i] = -1; |
|---|
| 175 |
return res; |
|---|
| 176 |
} |
|---|
| 177 |
static if (is(T == float)) Vector sqrt() { |
|---|
| 178 |
Vector res = void; |
|---|
| 179 |
for (int i = 0; i < size; ++i) res.field[i] = .fastsqrt(field[i]); |
|---|
| 180 |
return res; |
|---|
| 181 |
} |
|---|
| 182 |
static if (is(typeof(T*T))) Vector sqr() { |
|---|
| 183 |
Vector res = void; |
|---|
| 184 |
for (int i = 0; i < real_size; ++i) res.field[i] = field[i] * field[i]; |
|---|
| 185 |
return res; |
|---|
| 186 |
} |
|---|
| 187 |
static if (is(typeof(T > T))) { |
|---|
| 188 |
Vector min(Vector other) { |
|---|
| 189 |
Vector res = void; |
|---|
| 190 |
for (int i = 0; i < size; ++i) res.field[i] = tools.base.min(field[i], other.field[i]); |
|---|
| 191 |
return res; |
|---|
| 192 |
} |
|---|
| 193 |
T min() { |
|---|
| 194 |
T res = field[0]; |
|---|
| 195 |
foreach (entry; field[1 .. $]) if (entry < res) res = entry; |
|---|
| 196 |
return res; |
|---|
| 197 |
} |
|---|
| 198 |
Vector max(Vector other) { |
|---|
| 199 |
Vector res = void; |
|---|
| 200 |
for (int i = 0; i < size; ++i) res.field[i] = tools.base.max(field[i], other.field[i]); |
|---|
| 201 |
return res; |
|---|
| 202 |
} |
|---|
| 203 |
T max() { |
|---|
| 204 |
T res = field[0]; |
|---|
| 205 |
foreach (entry; field[1 .. $]) if (entry > res) res = entry; |
|---|
| 206 |
return res; |
|---|
| 207 |
} |
|---|
| 208 |
} else pragma(msg, "Cannot compare ", T.stringof, " for size"); |
|---|
| 209 |
static if (is(typeof(T + T))) mixin(opGeneric("Vector · Add · +; T · Add · + · true; ")); |
|---|
| 210 |
static if (is(typeof(T - T))) mixin(opGeneric("Vector · Sub · -; T · Sub · - · true; ")); |
|---|
| 211 |
static if (is(typeof(T * T))) mixin(opGeneric("Vector · Mul · *; T · Mul · * · true; ")); |
|---|
| 212 |
static if (is(typeof(T / T))) mixin(opGeneric("Vector · Div · /; T · Div · / · true; ")); |
|---|
| 213 |
static if (is(T: real) || is (T: long)) { |
|---|
| 214 |
RelFloatType!(T) length() { return .fastsqrt(lensq); } |
|---|
| 215 |
Vector length(RelFloatType!(T) t) { |
|---|
| 216 |
t /= length(); |
|---|
| 217 |
for (int i = 0; i < real_size; ++i) real_field[i] *= t; |
|---|
| 218 |
return *this; |
|---|
| 219 |
} |
|---|
| 220 |
void normalize() { |
|---|
| 221 |
static if ((size == 3) && is(T == float) && useSSE) { // problems on 64-bit |
|---|
| 222 |
float factor = void; |
|---|
| 223 |
auto p = field.ptr; |
|---|
| 224 |
asm { |
|---|
| 225 |
mov EAX, p; |
|---|
| 226 |
movups XMM0, [EAX]; |
|---|
| 227 |
movaps XMM1, XMM0; // [b] == [a] |
|---|
| 228 |
mulps XMM1, XMM1; // [b: a*a] |
|---|
| 229 |
movaps XMM2, XMM1; // [c: a*a] |
|---|
| 230 |
shufps XMM2, XMM2, 0x1; // [c: a*a shift1] |
|---|
| 231 |
|
|---|
| 232 |
movaps XMM3, XMM1; // [d: a*a] |
|---|
| 233 |
shufps XMM3, XMM3, 0x2; // [d: a*a shift2] |
|---|
| 234 |
|
|---|
| 235 |
addss XMM1, XMM2; addss XMM1, XMM3; // [b: x+y+z ? ? ?] |
|---|
| 236 |
rsqrtss XMM1, XMM1; // [b: sqrt(x+y+z) ? ? ?] |
|---|
| 237 |
shufps XMM1, XMM1, 0x0; // [b: "" same same same] |
|---|
| 238 |
mulps XMM0, XMM1; // [a: res] |
|---|
| 239 |
|
|---|
| 240 |
movups [EAX], XMM0; |
|---|
| 241 |
} |
|---|
| 242 |
} else { |
|---|
| 243 |
auto len = length(); |
|---|
| 244 |
for (int i = 0; i < real_size; ++i) |
|---|
| 245 |
real_field[i] /= len; |
|---|
| 246 |
} |
|---|
| 247 |
} |
|---|
| 248 |
void normalize(out typeof(length()) len) { |
|---|
| 249 |
len = length(); |
|---|
| 250 |
for (int i = 0; i < size; ++i) field[i] /= len; |
|---|
| 251 |
} |
|---|
| 252 |
static if (size == 3) { |
|---|
| 253 |
float angle(Vector v) { // unsigned .. |
|---|
| 254 |
return acos(dot(v) / .fastsqrt(lensq * v.lensq)); |
|---|
| 255 |
} |
|---|
| 256 |
float angle(Vector v, Vector reference) { // signed |
|---|
| 257 |
// yay, http://tomyeah.com/signed-angle-between-two-vectors3d-in-cc/ |
|---|
| 258 |
bool flipped = cross(v).dot(reference) < 0; |
|---|
| 259 |
auto res = acos(dot(v) / .fastsqrt(lensq * v.lensq)); |
|---|
| 260 |
// fudge |
|---|
| 261 |
if (flipped) res = -res; |
|---|
| 262 |
return res; |
|---|
| 263 |
} |
|---|
| 264 |
} else static if (size == 2) { |
|---|
| 265 |
float angle(Vector v) { |
|---|
| 266 |
auto res = atan2(v.y, v.x) - atan2(y, x); |
|---|
| 267 |
while (res < -PI) res += 2*PI; |
|---|
| 268 |
while (res > PI) res -= 2*PI; |
|---|
| 269 |
return res; |
|---|
| 270 |
} |
|---|
| 271 |
} |
|---|
| 272 |
Vector normalized() { Vector res=*this; res.normalize; return res; } |
|---|
| 273 |
static if (is(typeof(cast(float) Init!(T))) && !is(T == float)) |
|---|
| 274 |
Vector!(float, size, _real_size) opCast() { |
|---|
| 275 |
Vector!(float, size, _real_size) res = void; |
|---|
| 276 |
foreach (i, v; tuple) res.tuple[i] = cast(float) v; |
|---|
| 277 |
return res; |
|---|
| 278 |
} |
|---|
| 279 |
static if (size == 3) { |
|---|
| 280 |
Vector cross(ref Vector other) { |
|---|
| 281 |
return Vector(cast(T) (y*other.z-z*other.y), cast(T) (z*other.x-x*other.z), cast(T) (x*other.y-y*other.x)); |
|---|
| 282 |
} |
|---|
| 283 |
Vector opMul_r(T[] mat) { |
|---|
| 284 |
assert(mat.length==16); |
|---|
| 285 |
Vector res=void; |
|---|
| 286 |
res.x=cast(T) (x*mat[0] + y*mat[4] + z*mat[8] + mat[12]); |
|---|
| 287 |
res.y=cast(T) (x*mat[1] + y*mat[5] + z*mat[9] + mat[13]); |
|---|
| 288 |
res.z=cast(T) (x*mat[2] + y*mat[6] + z*mat[10]+ mat[14]); |
|---|
| 289 |
/*auto w = cast(T) (x*mat[3] + y*mat[7] + z*mat[11] + mat[15]); |
|---|
| 290 |
res /= w;*/ |
|---|
| 291 |
return res; |
|---|
| 292 |
} |
|---|
| 293 |
} |
|---|
| 294 |
static if (size == 4) { |
|---|
| 295 |
Vector ham_mult(Vector v) { // yay quaternions |
|---|
| 296 |
Vector res = void; |
|---|
| 297 |
res.x = x*v.x - y*v.y - z*v.z - w*v.w; |
|---|
| 298 |
res.y = x*v.y + y*v.x + z*v.w - w*v.z; |
|---|
| 299 |
res.z = x*v.z - y*v.w + z*v.x + w*v.y; |
|---|
| 300 |
res.w = x*v.w + y*v.z - z*v.y + w*v.x; |
|---|
| 301 |
return res; |
|---|
| 302 |
} |
|---|
| 303 |
Vector ham_sqr() { |
|---|
| 304 |
Vector res = void; |
|---|
| 305 |
res.x = x*x - y*y - z*z - w*w; |
|---|
| 306 |
res.y = 2 * x*y; |
|---|
| 307 |
res.z = 2 * x*z; |
|---|
| 308 |
res.w = 2 * x*w; |
|---|
| 309 |
return res; |
|---|
| 310 |
} |
|---|
| 311 |
} |
|---|
| 312 |
static if (!is(T==byte)) { |
|---|
| 313 |
mixin(opGeneric("Vector!(byte, size) · Mul · *; Vector!(byte, size) · Div · /; ")); |
|---|
| 314 |
} |
|---|
| 315 |
Vector opNeg() { |
|---|
| 316 |
Vector res = *this; foreach (ref v; res.field) v = -v; return res; |
|---|
| 317 |
} |
|---|
| 318 |
float dot(Vector v) { |
|---|
| 319 |
Vector temp = opMul(v); |
|---|
| 320 |
float sum = 0f; |
|---|
| 321 |
foreach (elem; temp.tuple) sum += elem; |
|---|
| 322 |
return sum; |
|---|
| 323 |
} |
|---|
| 324 |
float lensq() { |
|---|
| 325 |
Vector temp = opMul(*this); |
|---|
| 326 |
float sum = 0f; |
|---|
| 327 |
foreach (elem; temp.tuple) sum += elem; |
|---|
| 328 |
return sum; |
|---|
| 329 |
} |
|---|
| 330 |
string toString() { |
|---|
| 331 |
static if (is(T==float)) {// default |
|---|
| 332 |
string res = "<"; |
|---|
| 333 |
foreach (i, entry; field) { |
|---|
| 334 |
if (i) res ~= " "; |
|---|
| 335 |
res ~= Format(entry); |
|---|
| 336 |
} |
|---|
| 337 |
return res ~ ">"; |
|---|
| 338 |
} else return Format("vec(", T.stringof, ") ", field); |
|---|
| 339 |
} |
|---|
| 340 |
static if (is(float: T)) { |
|---|
| 341 |
Vector mix(Vector other, float f) { |
|---|
| 342 |
return opMul(1f - f) + other*f; |
|---|
| 343 |
} |
|---|
| 344 |
Vector mix(Vector other, Vector f) { |
|---|
| 345 |
return opMul(Vector(1f) - f) + other*f; |
|---|
| 346 |
} |
|---|
| 347 |
alias mix blend; |
|---|
| 348 |
static if (size==2) { |
|---|
| 349 |
Vector rotate(float deg) { |
|---|
| 350 |
Vector res = void; |
|---|
| 351 |
res.field[0] = .cos(deg) * field[0] - .sin(deg) * field[1]; |
|---|
| 352 |
res.field[1] = .sin(deg) * field[0] + .cos(deg) * field[1]; |
|---|
| 353 |
field[] = res.field; |
|---|
| 354 |
return res; |
|---|
| 355 |
} |
|---|
| 356 |
Vector rotated(float deg) { Vector res = *this; return res.rotate(deg); } |
|---|
| 357 |
} |
|---|
| 358 |
static if (size==3) { |
|---|
| 359 |
// http://www.mines.edu/~gmurray/ArbitraryAxisRotation/ArbitraryAxisRotation.html |
|---|
| 360 |
Vector rotate(Vector pos, Vector dir, float angle) { |
|---|
| 361 |
Vector res; |
|---|
| 362 |
auto a=pos.tuple[0], b=pos.tuple[1], c=pos.tuple[2]; |
|---|
| 363 |
auto u=dir.tuple[0], v=dir.tuple[1], w=dir.tuple[2]; |
|---|
| 364 |
auto vw=v*v+w*w, uw=u*u+w*w, uv=u*u+v*v, cosa=.cos(angle), sina=.sin(angle); |
|---|
| 365 |
auto lsq=dir.lensq, len=.fastsqrt(lsq), dd=dot(dir); |
|---|
| 366 |
res.x=a*vw+u*(-b*v-c*w+dd)+((x-a)*vw+u*(v*(b-y)+w*(c-z))) * cosa + len*(w*(b-y)+v*(z-c)) * sina; |
|---|
| 367 |
res.y=b*uw+v*(-a*u-c*w+dd)+((y-b)*uw+v*(u*(a-x)+w*(c-z))) * cosa + len*(w*(x-a)+u*(c-z)) * sina; |
|---|
| 368 |
res.z=c*uv+w*(-a*u-b*v+dd)+((z-c)*uv+w*(u*(a-x)+v*(b-y))) * cosa + len*(v*(a-x)+u*(y-b)) * sina; |
|---|
| 369 |
res/=lsq; |
|---|
| 370 |
field[]=res.field; |
|---|
| 371 |
return res; |
|---|
| 372 |
} |
|---|
| 373 |
Vector rotate(Vector dir, float angle) body { |
|---|
| 374 |
debug if (.abs(dir.length - 1.0) > 0.001) { |
|---|
| 375 |
logln("Bad rotation dir: ", dir); |
|---|
| 376 |
asm { int 3; } |
|---|
| 377 |
} |
|---|
| 378 |
Vector res; |
|---|
| 379 |
float dd=dot(dir), cosa=.cos(angle), sina=.sin(angle); |
|---|
| 380 |
|
|---|
| 381 |
static if (is(T==float) && useSSE) { // \TODO: there was some issue here .. investigate. verify. |
|---|
| 382 |
auto dp = dir.real_field.ptr, mp = real_field.ptr, ddp = &dd, sinlp = &sina, cosp = &cosa, tp = res.real_field.ptr; |
|---|
| 383 |
asm { |
|---|
| 384 |
mov EAX, dp; movups XMM4, [EAX]; |
|---|
| 385 |
mov EAX, mp; movups XMM5, [EAX]; |
|---|
| 386 |
// XMM4: uvw_ (dir) |
|---|
| 387 |
// XMM5: xyz_ |
|---|
| 388 |
shufps XMM5, XMM5, 0b_01_10_01_00; // xyzy, a little trick for later |
|---|
| 389 |
|
|---|
| 390 |
movaps XMM6, XMM4; |
|---|
| 391 |
shufps XMM6, XMM6, 0b_00_01_10_10; |
|---|
| 392 |
// XMM6: wwv_ |
|---|
| 393 |
|
|---|
| 394 |
movaps XMM7, XMM4; |
|---|
| 395 |
shufps XMM7, XMM7, 0b_00_00_00_01; |
|---|
| 396 |
// XMM7: vuu_ |
|---|
| 397 |
|
|---|
| 398 |
// start accumulating the result in XMM0 |
|---|
| 399 |
movaps XMM0, XMM4; |
|---|
| 400 |
mov EAX, ddp; movss XMM1, [EAX]; shufps XMM1, XMM1, 0b_00_00_00_00; |
|---|
| 401 |
mulps XMM0, XMM1; // res = dir * dd |
|---|
| 402 |
|
|---|
| 403 |
// start accumulating the cosa part in XMM1 |
|---|
| 404 |
movaps XMM1, XMM6; |
|---|
| 405 |
mulps XMM1, XMM1; |
|---|
| 406 |
movaps XMM2, XMM7; |
|---|
| 407 |
mulps XMM2, XMM2; |
|---|
| 408 |
addps XMM1, XMM2; // (vv_uu_uu * ww_ww_vv |
|---|
| 409 |
mulps XMM1, XMM5; |
|---|
| 410 |
|
|---|
| 411 |
xorps XMM3, XMM3; // zero out |
|---|
| 412 |
subps XMM3, XMM5; // -xyz |
|---|
| 413 |
|
|---|
| 414 |
movaps XMM2, XMM3; // -xyz |
|---|
| 415 |
shufps XMM2, XMM2, 0b_00_00_00_01; // -yxx_ |
|---|
| 416 |
mulps XMM2, XMM7; |
|---|
| 417 |
addps XMM1, XMM2; // + vuu*[-yxx] |
|---|
| 418 |
|
|---|
| 419 |
movaps XMM2, XMM3; // -xyz |
|---|
| 420 |
shufps XMM2, XMM2, 0b_00_01_10_10; // -zzy_ |
|---|
| 421 |
mulps XMM2, XMM6; |
|---|
| 422 |
addps XMM1, XMM2; // + wwv*[-zzy] |
|---|
| 423 |
|
|---|
| 424 |
mov EAX, cosp; movss XMM2, [EAX]; |
|---|
| 425 |
shufps XMM2, XMM2, 0b_00_00_00_00; |
|---|
| 426 |
mulps XMM1, XMM2; // ) * cosa |
|---|
| 427 |
|
|---|
| 428 |
// flush |
|---|
| 429 |
addps XMM0, XMM1; |
|---|
| 430 |
|
|---|
| 431 |
// start accumulating the sina part in XMM1 |
|---|
| 432 |
// remember, XMM3 is still -xyz |
|---|
| 433 |
movaps XMM1, XMM5; |
|---|
| 434 |
unpcklps XMM1, XMM3; // [x, -x, y, -y] |
|---|
| 435 |
shufps XMM1, XMM1, 0b_00_01_00_11; // [-y][x][-x][_] |
|---|
| 436 |
mulps XMM1, XMM6; |
|---|
| 437 |
|
|---|
| 438 |
movaps XMM2, XMM5; // xyzy .. this is what the trick is for: |
|---|
| 439 |
unpckhps XMM2, XMM3; // [z, -z, y, _] ! No shufps needed. :) |
|---|
| 440 |
mulps XMM2, XMM7; // * vuu |
|---|
| 441 |
addps XMM1, XMM2; // almost done |
|---|
| 442 |
|
|---|
| 443 |
mov EAX, sinlp; movss XMM2, [EAX]; |
|---|
| 444 |
shufps XMM2, XMM2, 0b_00_00_00_00; // let it be heard in all your registers |
|---|
| 445 |
mulps XMM1, XMM2; // GO FORTH AND MULTIPLY |
|---|
| 446 |
|
|---|
| 447 |
addps XMM0, XMM1; // .. it is done. |
|---|
| 448 |
mov EAX, tp; |
|---|
| 449 |
movups [EAX], XMM0; // finishing touch |
|---|
| 450 |
|
|---|
| 451 |
// res.? = dir.? * dd + (?*[vw,uw,uv].? + [v,u,u].?*[-y,-x,-x].? + [w,w,v].?*[-z,-z,-y].?) * cosa |
|---|
| 452 |
// + [w,w,v].?*[-y,x,-x].?+[v,u,u].?*[z,-z,y].? * sina |
|---|
| 453 |
// u: -x -y -z |
|---|
| 454 |
// v: x -y z |
|---|
| 455 |
// w: x -y -z |
|---|
| 456 |
} |
|---|
| 457 |
} else { |
|---|
| 458 |
float u=dir.x, v=dir.y, w=dir.z; |
|---|
| 459 |
float uu = u*u, vv = v*v, ww = w*w; |
|---|
| 460 |
float v_w=vv+ww, u_w=uu+ww, u_v=uu+vv; |
|---|
| 461 |
auto dots = Vector(v_w, u_w, u_v); |
|---|
| 462 |
res.x = dir.x*dd+(x*v_w+dir.x*(v*(-y)+w*(-z))) * cosa + (w*(-y)+v*z) * sina; |
|---|
| 463 |
res.y = dir.y*dd+(y*u_w+dir.y*(u*(-x)+w*(-z))) * cosa + (w*x+u*(-z)) * sina; |
|---|
| 464 |
res.z = dir.z*dd+(z*u_v+dir.z*(u*(-x)+v*(-y))) * cosa + (v*(-x)+u*y) * sina; |
|---|
| 465 |
res /= dir.lensq; |
|---|
| 466 |
} |
|---|
| 467 |
field[]=res.field; |
|---|
| 468 |
return res; |
|---|
| 469 |
} |
|---|
| 470 |
Vector rotated(Vector pos, Vector dir, float angle) { |
|---|
| 471 |
Vector res=void; res.field[]=field; |
|---|
| 472 |
return res.rotate(pos, dir, angle); |
|---|
| 473 |
} |
|---|
| 474 |
Vector rotated(Vector dir, float angle) { |
|---|
| 475 |
Vector res=void; res.field[]=field; |
|---|
| 476 |
return res.rotate(dir, angle); |
|---|
| 477 |
} |
|---|
| 478 |
/*static Vector rand_halfsphere_surf(T)(Vector normal, ref float d, ref T rng) { |
|---|
| 479 |
Vector res=void; float lsq = void; |
|---|
| 480 |
Vector!(float, 2) v = void; |
|---|
| 481 |
while (true) { |
|---|
| 482 |
v = typeof(v).rand({ return rng() * 1f / typeof(rng()).max; }); |
|---|
| 483 |
lsq = v.lensq(); |
|---|
| 484 |
if (lsq<1) break; |
|---|
| 485 |
} |
|---|
| 486 |
auto sq = .fastsqrt(1f - lsq); |
|---|
| 487 |
res.x = 2f * v.x * sq; res.y = 2f * v.y * sq; res.z = 1f - 2f * lsq; |
|---|
| 488 |
d = res.dot(normal); |
|---|
| 489 |
if (d>0) return res; else { d = -d; return -res; } |
|---|
| 490 |
}*/ |
|---|
| 491 |
import tools.ziggy; |
|---|
| 492 |
const SQRT2 = 1.414213562373095048801688724; |
|---|
| 493 |
static Vector rand_halfsphere_surf(T)(Vector normal, ref float d, ref T rng) { |
|---|
| 494 |
Vector res = void; |
|---|
| 495 |
res.x = gaussian(rng, SQRT2); |
|---|
| 496 |
res.y = gaussian(rng, SQRT2); |
|---|
| 497 |
res.z = gaussian(rng, SQRT2); |
|---|
| 498 |
res /= res.length; |
|---|
| 499 |
d = res.dot(normal); |
|---|
| 500 |
if (d > 0) return res; else { d = -d; return -res; } |
|---|
| 501 |
} |
|---|
| 502 |
static Vector rand_halfsphere_cosine_weighted(T)(Vector normal, ref float d, ref T rng) { |
|---|
| 503 |
for (int i = 0; i < 256; ++i) { |
|---|
| 504 |
auto res = rand_halfsphere_surf(normal, d, rng); |
|---|
| 505 |
if (d >= (rng() * 1.0 / typeof(rng()).max)) return res; |
|---|
| 506 |
} |
|---|
| 507 |
return normal; // fallback; |
|---|
| 508 |
} |
|---|
| 509 |
Vector mat_mult(float[] mat, float w) in { assert(mat.length == 16 || mat.length == 12); } body { |
|---|
| 510 |
Vector res = void; |
|---|
| 511 |
static if (real_size == 4) real_field[3] = w; |
|---|
| 512 |
auto mat0 = mat[0..4], mat1 = mat[4..8], mat2 = mat[8..12]; |
|---|
| 513 |
float[real_size] sumfield0, sumfield1, sumfield2; |
|---|
| 514 |
sumfield0[] = 0f; sumfield1[] = 0f; sumfield2[] = 0f; |
|---|
| 515 |
for (int k = 0; k < real_size; ++k) { |
|---|
| 516 |
sumfield0[k] = real_field[k] * mat0[k]; |
|---|
| 517 |
sumfield1[k] = real_field[k] * mat1[k]; |
|---|
| 518 |
sumfield2[k] = real_field[k] * mat2[k]; |
|---|
| 519 |
} |
|---|
| 520 |
static if (real_size == 4) { |
|---|
| 521 |
res.x = sumfield0[0] + sumfield0[1] + sumfield0[2] + sumfield0[3]; |
|---|
| 522 |
res.y = sumfield1[0] + sumfield1[1] + sumfield1[2] + sumfield1[3]; |
|---|
| 523 |
res.z = sumfield2[0] + sumfield2[1] + sumfield2[2] + sumfield2[3]; |
|---|
| 524 |
} else { |
|---|
| 525 |
res.x = sumfield0[0] + sumfield0[1] + sumfield0[2] + w * mat0[3]; |
|---|
| 526 |
res.y = sumfield1[0] + sumfield1[1] + sumfield1[2] + w * mat1[3]; |
|---|
| 527 |
res.z = sumfield2[0] + sumfield2[1] + sumfield2[2] + w * mat2[3]; |
|---|
| 528 |
} |
|---|
| 529 |
return res; |
|---|
| 530 |
} |
|---|
| 531 |
} |
|---|
| 532 |
static Vector rand_sphere(float delegate() dg = null) { |
|---|
| 533 |
while (true) { |
|---|
| 534 |
auto test = rand(dg); |
|---|
| 535 |
if (test.lensq < 1) return test; |
|---|
| 536 |
} |
|---|
| 537 |
} |
|---|
| 538 |
static Vector rand(float delegate() dg = null) { |
|---|
| 539 |
const string code = ` |
|---|
| 540 |
Vector res=void; |
|---|
| 541 |
foreach (id, v; res.tuple) res.tuple[id] = #*2f-1f; |
|---|
| 542 |
return res; |
|---|
| 543 |
`; |
|---|
| 544 |
if (dg) { mixin(ctReplace(code, "#", "dg()")); } |
|---|
| 545 |
else { mixin(ctReplace(code, "#", "vec_randf()")); } |
|---|
| 546 |
} |
|---|
| 547 |
Vector sin() { Vector res = void; for (int i = 0; i < size; ++i) res.field[i] = .sin(field[i]); return res; } |
|---|
| 548 |
Vector cos() { Vector res = void; for (int i = 0; i < size; ++i) res.field[i] = .cos(field[i]); return res; } |
|---|
| 549 |
Vector tan() { Vector res = void; for (int i = 0; i < size; ++i) res.field[i] = .tan(field[i]); return res; } |
|---|
| 550 |
Vector exp() { Vector res = void; for (int i = 0; i < size; ++i) res.field[i] = .exp(field[i]); return res; } |
|---|
| 551 |
} |
|---|
| 552 |
} |
|---|
| 553 |
} |
|---|
| 554 |
|
|---|
| 555 |
alias Vector!(float, 2) vec2f; |
|---|
| 556 |
alias Vector!(float, 3) vec3f; |
|---|
| 557 |
alias Vector!(float, 4) vec4f; |
|---|
| 558 |
alias Vector!(int, 2) vec2i; |
|---|
| 559 |
|
|---|
| 560 |
string mad(string s) { |
|---|
| 561 |
string numbuf="", elsebuf=""; bool nummode=false; |
|---|
| 562 |
string res=""; |
|---|
| 563 |
bool blockNum; |
|---|
| 564 |
foreach (ch; s) { |
|---|
| 565 |
if (ch == '[') blockNum = true; |
|---|
| 566 |
if (ch == ']') blockNum = false; |
|---|
| 567 |
if (!blockNum && ch >= '0' && ch <= '9') { |
|---|
| 568 |
if (!nummode) { res ~= elsebuf; numbuf = ""; } |
|---|
| 569 |
nummode = true; |
|---|
| 570 |
} else { |
|---|
| 571 |
if (nummode) { res ~= "mat[" ~ numbuf[0] ~ "*4+" ~ numbuf[1] ~ "]"; elsebuf = ""; } |
|---|
| 572 |
nummode = false; |
|---|
| 573 |
} |
|---|
| 574 |
if (nummode) numbuf ~= ch; |
|---|
| 575 |
else elsebuf ~= ch; |
|---|
| 576 |
} |
|---|
| 577 |
if (nummode) { |
|---|
| 578 |
res ~= "mat[" ~ numbuf[0] ~ "*4+" ~ numbuf[1] ~ "]"; |
|---|
| 579 |
} |
|---|
| 580 |
else res ~= elsebuf; |
|---|
| 581 |
return res; |
|---|
| 582 |
} |
|---|
| 583 |
|
|---|
| 584 |
float[] transpose(float[] mat) in { assert(mat.length == 16); } body { |
|---|
| 585 |
float[16] res = void; |
|---|
| 586 |
res[0 .. 4] = [mat[0], mat[4], mat[ 8], mat[12]]; |
|---|
| 587 |
res[4 .. 8] = [mat[1], mat[5], mat[ 9], mat[13]]; |
|---|
| 588 |
res[8 ..12] = [mat[2], mat[6], mat[10], mat[14]]; |
|---|
| 589 |
res[12..16] = [mat[3], mat[7], mat[11], mat[15]]; |
|---|
| 590 |
mat[] = res; |
|---|
| 591 |
return mat; |
|---|
| 592 |
} |
|---|
| 593 |
|
|---|
| 594 |
// Thanks! http://inside.mines.edu/~gmurray/ArbitraryAxisRotation/ArbitraryAxisRotation.html |
|---|
| 595 |
void genRotation(float[] mat, vec3f base, vec3f dir, float angle) { |
|---|
| 596 |
float cosa=.cos(angle), sina=.sin(angle); |
|---|
| 597 |
float a=base.x, b=base.y, c=base.z; |
|---|
| 598 |
float u=dir.x, v=dir.y, w=dir.z; |
|---|
| 599 |
float uu = u*u, vv = v*v, ww = w*w; |
|---|
| 600 |
float v_w=vv+ww, u_w=uu+ww, u_v=uu+vv; |
|---|
| 601 |
float bvcw = b*v + c*w, aucw = c*w + a*u, aubv = a*u + b*v; |
|---|
| 602 |
float bw_cv = b*w - c*v, cu_aw = c*u - a*w, av_bu = a*v - b*u; |
|---|
| 603 |
// don't forget to divide by dir.lensq, i.e. uu+vv+ww |
|---|
| 604 |
auto dd = dir.lensq, d = fastsqrt(dd); // OWCH! Square root! |
|---|
| 605 |
mat[ 0.. 4] = [uu + v_w*cosa, u*v*(1f - cosa) - w*d*sina, u*w*(1f - cosa) + v*d*sina, a*v_w - u*bvcw + (u*bvcw - a*v_w)*cosa + bw_cv*d*sina]; |
|---|
| 606 |
mat[ 4.. 8] = [u*v*(1f - cosa) + w*d*sina, vv + u_w*cosa, w*v*(1f - cosa) - u*d*sina, b*u_w - v*aucw + (v*aucw - b*u_w)*cosa + cu_aw*d*sina]; |
|---|
| 607 |
mat[ 8..12] = [u*w*(1f - cosa) - v*d*sina, v*w*(1f - cosa) + u*d*sina, ww + u_v*cosa, c*u_v - w*aubv + (w*aubv - c*u_v)*cosa + av_bu*d*sina]; |
|---|
| 608 |
if (mat.length == 16) mat[12..16] = [0f, 0f, 0f, 1f]; |
|---|
| 609 |
foreach (ref value; mat[0..12]) value /= dd; |
|---|
| 610 |
} |
|---|
| 611 |
|
|---|
| 612 |
vec3f bogus; // force template instantiation for unittests |
|---|
| 613 |
|
|---|
| 614 |
import tools.mersenne; |
|---|
| 615 |
alias tools.mersenne.rand rand; |
|---|
| 616 |
float default_randf() { return (1f*rand())/(1f*typeof(rand()).max); } |
|---|
| 617 |
float function() vec_randf; |
|---|
| 618 |
static this() { vec_randf = &default_randf; } |
|---|