| 1 |
/** |
|---|
| 2 |
* The atomic module provides basic support for lock-free |
|---|
| 3 |
* concurrent programming. |
|---|
| 4 |
* |
|---|
| 5 |
* Copyright: Copyright Sean Kelly 2005 - 2010. |
|---|
| 6 |
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) |
|---|
| 7 |
* Authors: Sean Kelly |
|---|
| 8 |
* Source: $(DRUNTIMESRC core/_atomic.d) |
|---|
| 9 |
*/ |
|---|
| 10 |
|
|---|
| 11 |
/* Copyright Sean Kelly 2005 - 2010. |
|---|
| 12 |
* Distributed under the Boost Software License, Version 1.0. |
|---|
| 13 |
* (See accompanying file LICENSE_1_0.txt or copy at |
|---|
| 14 |
* http://www.boost.org/LICENSE_1_0.txt) |
|---|
| 15 |
*/ |
|---|
| 16 |
module core.atomic; |
|---|
| 17 |
|
|---|
| 18 |
version( D_InlineAsm_X86 ) |
|---|
| 19 |
{ |
|---|
| 20 |
version = AsmX86; |
|---|
| 21 |
version = AsmX86_32; |
|---|
| 22 |
enum has64BitCAS = true; |
|---|
| 23 |
} |
|---|
| 24 |
version( D_InlineAsm_X86_64 ) |
|---|
| 25 |
{ |
|---|
| 26 |
version = AsmX86; |
|---|
| 27 |
version = AsmX86_64; |
|---|
| 28 |
enum has64BitCAS = true; |
|---|
| 29 |
} |
|---|
| 30 |
|
|---|
| 31 |
|
|---|
| 32 |
private |
|---|
| 33 |
{ |
|---|
| 34 |
template NakedType(T: shared(T)) { alias T NakedType; } |
|---|
| 35 |
template NakedType(T: shared(T*)) { alias T* NakedType; } |
|---|
| 36 |
template NakedType(T: const(T)) { alias T NakedType; } |
|---|
| 37 |
template NakedType(T: const(T*)) { alias T* NakedType; } |
|---|
| 38 |
template NamedType(T: T*) { alias T NakedType; } |
|---|
| 39 |
template NakedType(T) { alias T NakedType; } |
|---|
| 40 |
} |
|---|
| 41 |
|
|---|
| 42 |
|
|---|
| 43 |
version( AsmX86 ) |
|---|
| 44 |
{ |
|---|
| 45 |
// NOTE: Strictly speaking, the x86 supports atomic operations on |
|---|
| 46 |
// unaligned values. However, this is far slower than the |
|---|
| 47 |
// common case, so such behavior should be prohibited. |
|---|
| 48 |
private bool atomicValueIsProperlyAligned(T)( size_t addr ) |
|---|
| 49 |
{ |
|---|
| 50 |
return addr % T.sizeof == 0; |
|---|
| 51 |
} |
|---|
| 52 |
} |
|---|
| 53 |
|
|---|
| 54 |
|
|---|
| 55 |
version( D_Ddoc ) |
|---|
| 56 |
{ |
|---|
| 57 |
/** |
|---|
| 58 |
* Performs the binary operation 'op' on val using 'mod' as the modifier. |
|---|
| 59 |
* |
|---|
| 60 |
* Params: |
|---|
| 61 |
* val = The target variable. |
|---|
| 62 |
* mod = The modifier to apply. |
|---|
| 63 |
* |
|---|
| 64 |
* Returns: |
|---|
| 65 |
* The result of the operation. |
|---|
| 66 |
*/ |
|---|
| 67 |
T atomicOp(string op, T, V1)( ref shared T val, V1 mod ) |
|---|
| 68 |
{ |
|---|
| 69 |
return val; |
|---|
| 70 |
} |
|---|
| 71 |
|
|---|
| 72 |
|
|---|
| 73 |
/** |
|---|
| 74 |
* Stores 'writeThis' to the memory referenced by 'here' if the value |
|---|
| 75 |
* referenced by 'here' is equal to 'ifThis'. This operation is both |
|---|
| 76 |
* lock-free and atomic. |
|---|
| 77 |
* |
|---|
| 78 |
* Params: |
|---|
| 79 |
* here = The address of the destination variable. |
|---|
| 80 |
* writeThis = The value to store. |
|---|
| 81 |
* ifThis = The comparison value. |
|---|
| 82 |
* |
|---|
| 83 |
* Returns: |
|---|
| 84 |
* true if the store occurred, false if not. |
|---|
| 85 |
*/ |
|---|
| 86 |
bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, const V2 writeThis ) |
|---|
| 87 |
if( is( NakedType!(V1) == NakedType!(T) ) && |
|---|
| 88 |
is( NakedType!(V2) == NakedType!(T) ) ) |
|---|
| 89 |
{ |
|---|
| 90 |
return false; |
|---|
| 91 |
} |
|---|
| 92 |
} |
|---|
| 93 |
else version( AsmX86_32 ) |
|---|
| 94 |
{ |
|---|
| 95 |
T atomicOp(string op, T, V1)( ref shared T val, V1 mod ) |
|---|
| 96 |
if( is( NakedType!(V1) == NakedType!(T) ) ) |
|---|
| 97 |
in |
|---|
| 98 |
{ |
|---|
| 99 |
// NOTE: 32 bit x86 systems support 8 byte CAS, which only requires |
|---|
| 100 |
// 4 byte alignment, so use size_t as the align type here. |
|---|
| 101 |
static if( T.sizeof > size_t.sizeof ) |
|---|
| 102 |
assert( atomicValueIsProperlyAligned!(size_t)( cast(size_t) &val ) ); |
|---|
| 103 |
else |
|---|
| 104 |
assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) ); |
|---|
| 105 |
} |
|---|
| 106 |
body |
|---|
| 107 |
{ |
|---|
| 108 |
// binary operators |
|---|
| 109 |
// |
|---|
| 110 |
// + - * / % ^^ & |
|---|
| 111 |
// | ^ << >> >>> ~ in |
|---|
| 112 |
// == != < <= > >= |
|---|
| 113 |
static if( op == "+" || op == "-" || op == "*" || op == "/" || |
|---|
| 114 |
op == "%" || op == "^^" || op == "&" || op == "|" || |
|---|
| 115 |
op == "^" || op == "<<" || op == ">>" || op == ">>>" || |
|---|
| 116 |
op == "~" || // skip "in" |
|---|
| 117 |
op == "==" || op == "!=" || op == "<" || op == "<=" || |
|---|
| 118 |
op == ">" || op == ">=" ) |
|---|
| 119 |
{ |
|---|
| 120 |
T get = atomicLoad!(msync.raw)( val ); |
|---|
| 121 |
mixin( "return get " ~ op ~ " mod;" ); |
|---|
| 122 |
} |
|---|
| 123 |
else |
|---|
| 124 |
// assignment operators |
|---|
| 125 |
// |
|---|
| 126 |
// += -= *= /= %= ^^= &= |
|---|
| 127 |
// |= ^= <<= >>= >>>= ~= |
|---|
| 128 |
static if( op == "+=" || op == "-=" || op == "*=" || op == "/=" || |
|---|
| 129 |
op == "%=" || op == "^^=" || op == "&=" || op == "|=" || |
|---|
| 130 |
op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~=" |
|---|
| 131 |
{ |
|---|
| 132 |
T get, set; |
|---|
| 133 |
|
|---|
| 134 |
do |
|---|
| 135 |
{ |
|---|
| 136 |
get = set = atomicLoad!(msync.raw)( val ); |
|---|
| 137 |
mixin( "set " ~ op ~ " mod;" ); |
|---|
| 138 |
} while( !cas( &val, get, set ) ); |
|---|
| 139 |
return set; |
|---|
| 140 |
} |
|---|
| 141 |
else |
|---|
| 142 |
{ |
|---|
| 143 |
static assert( false, "Operation not supported." ); |
|---|
| 144 |
} |
|---|
| 145 |
} |
|---|
| 146 |
|
|---|
| 147 |
|
|---|
| 148 |
bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, const V2 writeThis ) |
|---|
| 149 |
if( is( NakedType!(V1) == NakedType!(T) ) && |
|---|
| 150 |
is( NakedType!(V2) == NakedType!(T) ) ) |
|---|
| 151 |
in |
|---|
| 152 |
{ |
|---|
| 153 |
// NOTE: 32 bit x86 systems support 8 byte CAS, which only requires |
|---|
| 154 |
// 4 byte alignment, so use size_t as the align type here. |
|---|
| 155 |
static if( T.sizeof > size_t.sizeof ) |
|---|
| 156 |
assert( atomicValueIsProperlyAligned!(size_t)( cast(size_t) here ) ); |
|---|
| 157 |
else |
|---|
| 158 |
assert( atomicValueIsProperlyAligned!(T)( cast(size_t) here ) ); |
|---|
| 159 |
} |
|---|
| 160 |
body |
|---|
| 161 |
{ |
|---|
| 162 |
static if( T.sizeof == byte.sizeof ) |
|---|
| 163 |
{ |
|---|
| 164 |
////////////////////////////////////////////////////////////////// |
|---|
| 165 |
// 1 Byte CAS |
|---|
| 166 |
////////////////////////////////////////////////////////////////// |
|---|
| 167 |
|
|---|
| 168 |
|
|---|
| 169 |
asm |
|---|
| 170 |
{ |
|---|
| 171 |
mov DL, writeThis; |
|---|
| 172 |
mov AL, ifThis; |
|---|
| 173 |
mov ECX, here; |
|---|
| 174 |
lock; // lock always needed to make this op atomic |
|---|
| 175 |
cmpxchg [ECX], DL; |
|---|
| 176 |
setz AL; |
|---|
| 177 |
} |
|---|
| 178 |
} |
|---|
| 179 |
else static if( T.sizeof == short.sizeof ) |
|---|
| 180 |
{ |
|---|
| 181 |
////////////////////////////////////////////////////////////////// |
|---|
| 182 |
// 2 Byte CAS |
|---|
| 183 |
////////////////////////////////////////////////////////////////// |
|---|
| 184 |
|
|---|
| 185 |
|
|---|
| 186 |
asm |
|---|
| 187 |
{ |
|---|
| 188 |
mov DX, writeThis; |
|---|
| 189 |
mov AX, ifThis; |
|---|
| 190 |
mov ECX, here; |
|---|
| 191 |
lock; // lock always needed to make this op atomic |
|---|
| 192 |
cmpxchg [ECX], DX; |
|---|
| 193 |
setz AL; |
|---|
| 194 |
} |
|---|
| 195 |
} |
|---|
| 196 |
else static if( T.sizeof == int.sizeof ) |
|---|
| 197 |
{ |
|---|
| 198 |
////////////////////////////////////////////////////////////////// |
|---|
| 199 |
// 4 Byte CAS |
|---|
| 200 |
////////////////////////////////////////////////////////////////// |
|---|
| 201 |
|
|---|
| 202 |
|
|---|
| 203 |
asm |
|---|
| 204 |
{ |
|---|
| 205 |
mov EDX, writeThis; |
|---|
| 206 |
mov EAX, ifThis; |
|---|
| 207 |
mov ECX, here; |
|---|
| 208 |
lock; // lock always needed to make this op atomic |
|---|
| 209 |
cmpxchg [ECX], EDX; |
|---|
| 210 |
setz AL; |
|---|
| 211 |
} |
|---|
| 212 |
} |
|---|
| 213 |
else static if( T.sizeof == long.sizeof && has64BitCAS ) |
|---|
| 214 |
{ |
|---|
| 215 |
////////////////////////////////////////////////////////////////// |
|---|
| 216 |
// 8 Byte CAS on a 32-Bit Processor |
|---|
| 217 |
////////////////////////////////////////////////////////////////// |
|---|
| 218 |
|
|---|
| 219 |
|
|---|
| 220 |
asm |
|---|
| 221 |
{ |
|---|
| 222 |
push EDI; |
|---|
| 223 |
push EBX; |
|---|
| 224 |
lea EDI, writeThis; |
|---|
| 225 |
mov EBX, [EDI]; |
|---|
| 226 |
mov ECX, 4[EDI]; |
|---|
| 227 |
lea EDI, ifThis; |
|---|
| 228 |
mov EAX, [EDI]; |
|---|
| 229 |
mov EDX, 4[EDI]; |
|---|
| 230 |
mov EDI, here; |
|---|
| 231 |
lock; // lock always needed to make this op atomic |
|---|
| 232 |
cmpxch8b [EDI]; |
|---|
| 233 |
setz AL; |
|---|
| 234 |
pop EBX; |
|---|
| 235 |
pop EDI; |
|---|
| 236 |
} |
|---|
| 237 |
} |
|---|
| 238 |
else |
|---|
| 239 |
{ |
|---|
| 240 |
static assert( false, "Invalid template type specified." ); |
|---|
| 241 |
} |
|---|
| 242 |
} |
|---|
| 243 |
|
|---|
| 244 |
|
|---|
| 245 |
private |
|---|
| 246 |
{ |
|---|
| 247 |
template isHoistOp(msync ms) |
|---|
| 248 |
{ |
|---|
| 249 |
enum bool isHoistOp = ms == msync.acq || |
|---|
| 250 |
ms == msync.seq; |
|---|
| 251 |
} |
|---|
| 252 |
|
|---|
| 253 |
|
|---|
| 254 |
template isSinkOp(msync ms) |
|---|
| 255 |
{ |
|---|
| 256 |
enum bool isSinkOp = ms == msync.rel || |
|---|
| 257 |
ms == msync.seq; |
|---|
| 258 |
} |
|---|
| 259 |
|
|---|
| 260 |
|
|---|
| 261 |
// NOTE: While x86 loads have acquire semantics for stores, it appears |
|---|
| 262 |
// that independent loads may be reordered by some processors |
|---|
| 263 |
// (notably the AMD64). This implies that the hoist-load barrier |
|---|
| 264 |
// op requires an ordering instruction, which also extends this |
|---|
| 265 |
// requirement to acquire ops (though hoist-store should not need |
|---|
| 266 |
// one if support is added for this later). However, since no |
|---|
| 267 |
// modern architectures will reorder dependent loads to occur |
|---|
| 268 |
// before the load they depend on (except the Alpha), raw loads |
|---|
| 269 |
// are actually a possible means of ordering specific sequences |
|---|
| 270 |
// of loads in some instances. |
|---|
| 271 |
// |
|---|
| 272 |
// For reference, the old behavior (acquire semantics for loads) |
|---|
| 273 |
// required a memory barrier if: ms == msync.seq || isSinkOp!(ms) |
|---|
| 274 |
template needsLoadBarrier( msync ms ) |
|---|
| 275 |
{ |
|---|
| 276 |
const bool needsLoadBarrier = ms != msync.raw; |
|---|
| 277 |
} |
|---|
| 278 |
|
|---|
| 279 |
|
|---|
| 280 |
enum msync |
|---|
| 281 |
{ |
|---|
| 282 |
raw, /// not sequenced |
|---|
| 283 |
acq, /// hoist-load + hoist-store barrier |
|---|
| 284 |
rel, /// sink-load + sink-store barrier |
|---|
| 285 |
seq, /// fully sequenced (acq + rel) |
|---|
| 286 |
} |
|---|
| 287 |
|
|---|
| 288 |
|
|---|
| 289 |
T atomicLoad(msync ms = msync.seq, T)( const ref shared T val ) |
|---|
| 290 |
{ |
|---|
| 291 |
static if( T.sizeof == byte.sizeof ) |
|---|
| 292 |
{ |
|---|
| 293 |
////////////////////////////////////////////////////////////////// |
|---|
| 294 |
// 1 Byte Load |
|---|
| 295 |
////////////////////////////////////////////////////////////////// |
|---|
| 296 |
|
|---|
| 297 |
static if( needsLoadBarrier!(ms) ) |
|---|
| 298 |
{ |
|---|
| 299 |
asm |
|---|
| 300 |
{ |
|---|
| 301 |
mov DL, 0; |
|---|
| 302 |
mov AL, 0; |
|---|
| 303 |
mov ECX, val; |
|---|
| 304 |
lock; // lock always needed to make this op atomic |
|---|
| 305 |
cmpxchg [ECX], DL; |
|---|
| 306 |
} |
|---|
| 307 |
} |
|---|
| 308 |
else |
|---|
| 309 |
{ |
|---|
| 310 |
asm |
|---|
| 311 |
{ |
|---|
| 312 |
mov EAX, val; |
|---|
| 313 |
mov AL, [EAX]; |
|---|
| 314 |
} |
|---|
| 315 |
} |
|---|
| 316 |
} |
|---|
| 317 |
else static if( T.sizeof == short.sizeof ) |
|---|
| 318 |
{ |
|---|
| 319 |
////////////////////////////////////////////////////////////////// |
|---|
| 320 |
// 2 Byte Load |
|---|
| 321 |
////////////////////////////////////////////////////////////////// |
|---|
| 322 |
|
|---|
| 323 |
|
|---|
| 324 |
static if( needsLoadBarrier!(ms) ) |
|---|
| 325 |
{ |
|---|
| 326 |
asm |
|---|
| 327 |
{ |
|---|
| 328 |
mov DX, 0; |
|---|
| 329 |
mov AX, 0; |
|---|
| 330 |
mov ECX, val; |
|---|
| 331 |
lock; // lock always needed to make this op atomic |
|---|
| 332 |
cmpxchg [ECX], DX; |
|---|
| 333 |
} |
|---|
| 334 |
} |
|---|
| 335 |
else |
|---|
| 336 |
{ |
|---|
| 337 |
asm |
|---|
| 338 |
{ |
|---|
| 339 |
mov EAX, val; |
|---|
| 340 |
mov AX, [EAX]; |
|---|
| 341 |
} |
|---|
| 342 |
} |
|---|
| 343 |
} |
|---|
| 344 |
else static if( T.sizeof == int.sizeof ) |
|---|
| 345 |
{ |
|---|
| 346 |
////////////////////////////////////////////////////////////////// |
|---|
| 347 |
// 4 Byte Load |
|---|
| 348 |
////////////////////////////////////////////////////////////////// |
|---|
| 349 |
|
|---|
| 350 |
|
|---|
| 351 |
static if( needsLoadBarrier!(ms) ) |
|---|
| 352 |
{ |
|---|
| 353 |
asm |
|---|
| 354 |
{ |
|---|
| 355 |
mov EDX, 0; |
|---|
| 356 |
mov EAX, 0; |
|---|
| 357 |
mov ECX, val; |
|---|
| 358 |
lock; // lock always needed to make this op atomic |
|---|
| 359 |
cmpxchg [ECX], EDX; |
|---|
| 360 |
} |
|---|
| 361 |
} |
|---|
| 362 |
else |
|---|
| 363 |
{ |
|---|
| 364 |
asm |
|---|
| 365 |
{ |
|---|
| 366 |
mov EAX, val; |
|---|
| 367 |
mov EAX, [EAX]; |
|---|
| 368 |
} |
|---|
| 369 |
} |
|---|
| 370 |
} |
|---|
| 371 |
else static if( T.sizeof == long.sizeof && has64BitCAS ) |
|---|
| 372 |
{ |
|---|
| 373 |
////////////////////////////////////////////////////////////////// |
|---|
| 374 |
// 8 Byte Load on a 32-Bit Processor |
|---|
| 375 |
////////////////////////////////////////////////////////////////// |
|---|
| 376 |
|
|---|
| 377 |
|
|---|
| 378 |
asm |
|---|
| 379 |
{ |
|---|
| 380 |
push EDI; |
|---|
| 381 |
push EBX; |
|---|
| 382 |
mov EBX, 0; |
|---|
| 383 |
mov ECX, 0; |
|---|
| 384 |
mov EAX, 0; |
|---|
| 385 |
mov EDX, 0; |
|---|
| 386 |
mov EDI, val; |
|---|
| 387 |
lock; // lock always needed to make this op atomic |
|---|
| 388 |
cmpxch8b [EDI]; |
|---|
| 389 |
pop EBX; |
|---|
| 390 |
pop EDI; |
|---|
| 391 |
} |
|---|
| 392 |
} |
|---|
| 393 |
else |
|---|
| 394 |
{ |
|---|
| 395 |
static assert( false, "Invalid template type specified." ); |
|---|
| 396 |
} |
|---|
| 397 |
} |
|---|
| 398 |
} |
|---|
| 399 |
} |
|---|
| 400 |
else version( AsmX86_64 ) |
|---|
| 401 |
{ |
|---|
| 402 |
T atomicOp(string op, T, V1)( ref shared T val, V1 mod ) |
|---|
| 403 |
if( is( NakedType!(V1) == NakedType!(T) ) ) |
|---|
| 404 |
in |
|---|
| 405 |
{ |
|---|
| 406 |
// NOTE: 32 bit x86 systems support 8 byte CAS, which only requires |
|---|
| 407 |
// 4 byte alignment, so use size_t as the align type here. |
|---|
| 408 |
static if( T.sizeof > size_t.sizeof ) |
|---|
| 409 |
assert( atomicValueIsProperlyAligned!(size_t)( cast(size_t) &val ) ); |
|---|
| 410 |
else |
|---|
| 411 |
assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) ); |
|---|
| 412 |
} |
|---|
| 413 |
body |
|---|
| 414 |
{ |
|---|
| 415 |
// binary operators |
|---|
| 416 |
// |
|---|
| 417 |
// + - * / % ^^ & |
|---|
| 418 |
// | ^ << >> >>> ~ in |
|---|
| 419 |
// == != < <= > >= |
|---|
| 420 |
static if( op == "+" || op == "-" || op == "*" || op == "/" || |
|---|
| 421 |
op == "%" || op == "^^" || op == "&" || op == "|" || |
|---|
| 422 |
op == "^" || op == "<<" || op == ">>" || op == ">>>" || |
|---|
| 423 |
op == "~" || // skip "in" |
|---|
| 424 |
op == "==" || op == "!=" || op == "<" || op == "<=" || |
|---|
| 425 |
op == ">" || op == ">=" ) |
|---|
| 426 |
{ |
|---|
| 427 |
T get = atomicLoad!(msync.raw)( val ); |
|---|
| 428 |
mixin( "return get " ~ op ~ " mod;" ); |
|---|
| 429 |
} |
|---|
| 430 |
else |
|---|
| 431 |
// assignment operators |
|---|
| 432 |
// |
|---|
| 433 |
// += -= *= /= %= ^^= &= |
|---|
| 434 |
// |= ^= <<= >>= >>>= ~= |
|---|
| 435 |
static if( op == "+=" || op == "-=" || op == "*=" || op == "/=" || |
|---|
| 436 |
op == "%=" || op == "^^=" || op == "&=" || op == "|=" || |
|---|
| 437 |
op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~=" |
|---|
| 438 |
{ |
|---|
| 439 |
T get, set; |
|---|
| 440 |
|
|---|
| 441 |
do |
|---|
| 442 |
{ |
|---|
| 443 |
get = set = atomicLoad!(msync.raw)( val ); |
|---|
| 444 |
mixin( "set " ~ op ~ " mod;" ); |
|---|
| 445 |
} while( !cas( &val, get, set ) ); |
|---|
| 446 |
return set; |
|---|
| 447 |
} |
|---|
| 448 |
else |
|---|
| 449 |
{ |
|---|
| 450 |
static assert( false, "Operation not supported." ); |
|---|
| 451 |
} |
|---|
| 452 |
} |
|---|
| 453 |
|
|---|
| 454 |
|
|---|
| 455 |
bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, const V2 writeThis ) |
|---|
| 456 |
if( is( NakedType!(V1) == NakedType!(T) ) && |
|---|
| 457 |
is( NakedType!(V2) == NakedType!(T) ) ) |
|---|
| 458 |
in |
|---|
| 459 |
{ |
|---|
| 460 |
// NOTE: 32 bit x86 systems support 8 byte CAS, which only requires |
|---|
| 461 |
// 4 byte alignment, so use size_t as the align type here. |
|---|
| 462 |
static if( T.sizeof > size_t.sizeof ) |
|---|
| 463 |
assert( atomicValueIsProperlyAligned!(size_t)( cast(size_t) here ) ); |
|---|
| 464 |
else |
|---|
| 465 |
assert( atomicValueIsProperlyAligned!(T)( cast(size_t) here ) ); |
|---|
| 466 |
} |
|---|
| 467 |
body |
|---|
| 468 |
{ |
|---|
| 469 |
static if( T.sizeof == byte.sizeof ) |
|---|
| 470 |
{ |
|---|
| 471 |
////////////////////////////////////////////////////////////////// |
|---|
| 472 |
// 1 Byte CAS |
|---|
| 473 |
////////////////////////////////////////////////////////////////// |
|---|
| 474 |
|
|---|
| 475 |
|
|---|
| 476 |
asm |
|---|
| 477 |
{ |
|---|
| 478 |
mov DL, writeThis; |
|---|
| 479 |
mov AL, ifThis; |
|---|
| 480 |
mov RCX, here; |
|---|
| 481 |
lock; // lock always needed to make this op atomic |
|---|
| 482 |
cmpxchg [RCX], DL; |
|---|
| 483 |
setz AL; |
|---|
| 484 |
} |
|---|
| 485 |
} |
|---|
| 486 |
else static if( T.sizeof == short.sizeof ) |
|---|
| 487 |
{ |
|---|
| 488 |
////////////////////////////////////////////////////////////////// |
|---|
| 489 |
// 2 Byte CAS |
|---|
| 490 |
////////////////////////////////////////////////////////////////// |
|---|
| 491 |
|
|---|
| 492 |
|
|---|
| 493 |
asm |
|---|
| 494 |
{ |
|---|
| 495 |
mov DX, writeThis; |
|---|
| 496 |
mov AX, ifThis; |
|---|
| 497 |
mov RCX, here; |
|---|
| 498 |
lock; // lock always needed to make this op atomic |
|---|
| 499 |
cmpxchg [RCX], DX; |
|---|
| 500 |
setz AL; |
|---|
| 501 |
} |
|---|
| 502 |
} |
|---|
| 503 |
else static if( T.sizeof == int.sizeof ) |
|---|
| 504 |
{ |
|---|
| 505 |
////////////////////////////////////////////////////////////////// |
|---|
| 506 |
// 4 Byte CAS |
|---|
| 507 |
////////////////////////////////////////////////////////////////// |
|---|
| 508 |
|
|---|
| 509 |
|
|---|
| 510 |
asm |
|---|
| 511 |
{ |
|---|
| 512 |
mov EDX, writeThis; |
|---|
| 513 |
mov EAX, ifThis; |
|---|
| 514 |
mov RCX, here; |
|---|
| 515 |
lock; // lock always needed to make this op atomic |
|---|
| 516 |
cmpxchg [RCX], EDX; |
|---|
| 517 |
setz AL; |
|---|
| 518 |
} |
|---|
| 519 |
} |
|---|
| 520 |
else static if( T.sizeof == long.sizeof ) |
|---|
| 521 |
{ |
|---|
| 522 |
////////////////////////////////////////////////////////////////// |
|---|
| 523 |
// 8 Byte CAS on a 64-Bit Processor |
|---|
| 524 |
////////////////////////////////////////////////////////////////// |
|---|
| 525 |
|
|---|
| 526 |
asm |
|---|
| 527 |
{ |
|---|
| 528 |
mov RDX, writeThis; |
|---|
| 529 |
mov RAX, ifThis; |
|---|
| 530 |
mov RCX, here; |
|---|
| 531 |
lock; // lock always needed to make this op atomic |
|---|
| 532 |
cmpxchg [RCX], RDX; |
|---|
| 533 |
setz AL; |
|---|
| 534 |
} |
|---|
| 535 |
} |
|---|
| 536 |
else |
|---|
| 537 |
{ |
|---|
| 538 |
static assert( false, "Invalid template type specified." ); |
|---|
| 539 |
} |
|---|
| 540 |
} |
|---|
| 541 |
|
|---|
| 542 |
|
|---|
| 543 |
private |
|---|
| 544 |
{ |
|---|
| 545 |
template isHoistOp(msync ms) |
|---|
| 546 |
{ |
|---|
| 547 |
enum bool isHoistOp = ms == msync.acq || |
|---|
| 548 |
ms == msync.seq; |
|---|
| 549 |
} |
|---|
| 550 |
|
|---|
| 551 |
|
|---|
| 552 |
template isSinkOp(msync ms) |
|---|
| 553 |
{ |
|---|
| 554 |
enum bool isSinkOp = ms == msync.rel || |
|---|
| 555 |
ms == msync.seq; |
|---|
| 556 |
} |
|---|
| 557 |
|
|---|
| 558 |
|
|---|
| 559 |
// NOTE: While x86 loads have acquire semantics for stores, it appears |
|---|
| 560 |
// that independent loads may be reordered by some processors |
|---|
| 561 |
// (notably the AMD64). This implies that the hoist-load barrier |
|---|
| 562 |
// op requires an ordering instruction, which also extends this |
|---|
| 563 |
// requirement to acquire ops (though hoist-store should not need |
|---|
| 564 |
// one if support is added for this later). However, since no |
|---|
| 565 |
// modern architectures will reorder dependent loads to occur |
|---|
| 566 |
// before the load they depend on (except the Alpha), raw loads |
|---|
| 567 |
// are actually a possible means of ordering specific sequences |
|---|
| 568 |
// of loads in some instances. |
|---|
| 569 |
// |
|---|
| 570 |
// For reference, the old behavior (acquire semantics for loads) |
|---|
| 571 |
// required a memory barrier if: ms == msync.seq || isSinkOp!(ms) |
|---|
| 572 |
template needsLoadBarrier( msync ms ) |
|---|
| 573 |
{ |
|---|
| 574 |
const bool needsLoadBarrier = ms != msync.raw; |
|---|
| 575 |
} |
|---|
| 576 |
|
|---|
| 577 |
|
|---|
| 578 |
enum msync |
|---|
| 579 |
{ |
|---|
| 580 |
raw, /// not sequenced |
|---|
| 581 |
acq, /// hoist-load + hoist-store barrier |
|---|
| 582 |
rel, /// sink-load + sink-store barrier |
|---|
| 583 |
seq, /// fully sequenced (acq + rel) |
|---|
| 584 |
} |
|---|
| 585 |
|
|---|
| 586 |
|
|---|
| 587 |
T atomicLoad(msync ms = msync.seq, T)( const ref shared T val ) |
|---|
| 588 |
{ |
|---|
| 589 |
static if( T.sizeof == byte.sizeof ) |
|---|
| 590 |
{ |
|---|
| 591 |
////////////////////////////////////////////////////////////////// |
|---|
| 592 |
// 1 Byte Load |
|---|
| 593 |
////////////////////////////////////////////////////////////////// |
|---|
| 594 |
|
|---|
| 595 |
static if( needsLoadBarrier!(ms) ) |
|---|
| 596 |
{ |
|---|
| 597 |
asm |
|---|
| 598 |
{ |
|---|
| 599 |
mov DL, 0; |
|---|
| 600 |
mov AL, 0; |
|---|
| 601 |
mov RCX, val; |
|---|
| 602 |
lock; // lock always needed to make this op atomic |
|---|
| 603 |
cmpxchg [RCX], DL; |
|---|
| 604 |
} |
|---|
| 605 |
} |
|---|
| 606 |
else |
|---|
| 607 |
{ |
|---|
| 608 |
asm |
|---|
| 609 |
{ |
|---|
| 610 |
mov RAX, val; |
|---|
| 611 |
mov AL, [RAX]; |
|---|
| 612 |
} |
|---|
| 613 |
} |
|---|
| 614 |
} |
|---|
| 615 |
else static if( T.sizeof == short.sizeof ) |
|---|
| 616 |
{ |
|---|
| 617 |
////////////////////////////////////////////////////////////////// |
|---|
| 618 |
// 2 Byte Load |
|---|
| 619 |
////////////////////////////////////////////////////////////////// |
|---|
| 620 |
|
|---|
| 621 |
|
|---|
| 622 |
static if( needsLoadBarrier!(ms) ) |
|---|
| 623 |
{ |
|---|
| 624 |
asm |
|---|
| 625 |
{ |
|---|
| 626 |
mov DX, 0; |
|---|
| 627 |
mov AX, 0; |
|---|
| 628 |
mov RCX, val; |
|---|
| 629 |
lock; // lock always needed to make this op atomic |
|---|
| 630 |
cmpxchg [RCX], DX; |
|---|
| 631 |
} |
|---|
| 632 |
} |
|---|
| 633 |
else |
|---|
| 634 |
{ |
|---|
| 635 |
asm |
|---|
| 636 |
{ |
|---|
| 637 |
mov RAX, val; |
|---|
| 638 |
mov AX, [RAX]; |
|---|
| 639 |
} |
|---|
| 640 |
} |
|---|
| 641 |
} |
|---|
| 642 |
else static if( T.sizeof == int.sizeof ) |
|---|
| 643 |
{ |
|---|
| 644 |
////////////////////////////////////////////////////////////////// |
|---|
| 645 |
// 4 Byte Load |
|---|
| 646 |
////////////////////////////////////////////////////////////////// |
|---|
| 647 |
|
|---|
| 648 |
|
|---|
| 649 |
static if( needsLoadBarrier!(ms) ) |
|---|
| 650 |
{ |
|---|
| 651 |
asm |
|---|
| 652 |
{ |
|---|
| 653 |
mov EDX, 0; |
|---|
| 654 |
mov EAX, 0; |
|---|
| 655 |
mov RCX, val; |
|---|
| 656 |
lock; // lock always needed to make this op atomic |
|---|
| 657 |
cmpxchg [RCX], EDX; |
|---|
| 658 |
} |
|---|
| 659 |
} |
|---|
| 660 |
else |
|---|
| 661 |
{ |
|---|
| 662 |
asm |
|---|
| 663 |
{ |
|---|
| 664 |
mov RAX, val; |
|---|
| 665 |
mov EAX, [RAX]; |
|---|
| 666 |
} |
|---|
| 667 |
} |
|---|
| 668 |
} |
|---|
| 669 |
else static if( T.sizeof == long.sizeof ) |
|---|
| 670 |
{ |
|---|
| 671 |
////////////////////////////////////////////////////////////////// |
|---|
| 672 |
// 8 Byte Load |
|---|
| 673 |
////////////////////////////////////////////////////////////////// |
|---|
| 674 |
|
|---|
| 675 |
|
|---|
| 676 |
static if( needsLoadBarrier!(ms) ) |
|---|
| 677 |
{ |
|---|
| 678 |
asm |
|---|
| 679 |
{ |
|---|
| 680 |
mov RDX, 0; |
|---|
| 681 |
mov RAX, 0; |
|---|
| 682 |
mov RCX, val; |
|---|
| 683 |
lock; // lock always needed to make this op atomic |
|---|
| 684 |
cmpxchg [RCX], RDX; |
|---|
| 685 |
} |
|---|
| 686 |
} |
|---|
| 687 |
else |
|---|
| 688 |
{ |
|---|
| 689 |
asm |
|---|
| 690 |
{ |
|---|
| 691 |
mov RAX, val; |
|---|
| 692 |
mov RAX, [RAX]; |
|---|
| 693 |
} |
|---|
| 694 |
} |
|---|
| 695 |
} |
|---|
| 696 |
else |
|---|
| 697 |
{ |
|---|
| 698 |
static assert( false, "Invalid template type specified." ); |
|---|
| 699 |
} |
|---|
| 700 |
} |
|---|
| 701 |
} |
|---|
| 702 |
} |
|---|
| 703 |
|
|---|
| 704 |
|
|---|
| 705 |
//////////////////////////////////////////////////////////////////////////////// |
|---|
| 706 |
// Unit Tests |
|---|
| 707 |
//////////////////////////////////////////////////////////////////////////////// |
|---|
| 708 |
|
|---|
| 709 |
|
|---|
| 710 |
version( unittest ) |
|---|
| 711 |
{ |
|---|
| 712 |
template testCAS( msyT ) |
|---|
| 713 |
{ |
|---|
| 714 |
void testCAS(T)( T val = T.init + 1 ) |
|---|
| 715 |
{ |
|---|
| 716 |
T base; |
|---|
| 717 |
shared(T) atom; |
|---|
| 718 |
|
|---|
| 719 |
assert( base != val ); |
|---|
| 720 |
assert( atom == base ); |
|---|
| 721 |
assert( cas( &atom, base, val ) ); |
|---|
| 722 |
assert( atom == val ); |
|---|
| 723 |
assert( !cas( &atom, base, base ) ); |
|---|
| 724 |
assert( atom == val ); |
|---|
| 725 |
} |
|---|
| 726 |
} |
|---|
| 727 |
|
|---|
| 728 |
|
|---|
| 729 |
void testType(T)( T val = T.init + 1 ) |
|---|
| 730 |
{ |
|---|
| 731 |
testCAS!(T)( val ); |
|---|
| 732 |
} |
|---|
| 733 |
|
|---|
| 734 |
|
|---|
| 735 |
unittest |
|---|
| 736 |
{ |
|---|
| 737 |
testType!(bool)(); |
|---|
| 738 |
|
|---|
| 739 |
testType!(byte)(); |
|---|
| 740 |
testType!(ubyte)(); |
|---|
| 741 |
|
|---|
| 742 |
testType!(short)(); |
|---|
| 743 |
testType!(ushort)(); |
|---|
| 744 |
|
|---|
| 745 |
testType!(int)(); |
|---|
| 746 |
testType!(uint)(); |
|---|
| 747 |
|
|---|
| 748 |
testType!(int*)(); |
|---|
| 749 |
|
|---|
| 750 |
static if( has64BitCAS ) |
|---|
| 751 |
{ |
|---|
| 752 |
testType!(long)(); |
|---|
| 753 |
testType!(ulong)(); |
|---|
| 754 |
} |
|---|
| 755 |
|
|---|
| 756 |
size_t i; |
|---|
| 757 |
|
|---|
| 758 |
atomicOp!"+="(i, cast(size_t)1); |
|---|
| 759 |
assert(i == 1); |
|---|
| 760 |
|
|---|
| 761 |
atomicOp!"-="(i, cast(size_t)1); |
|---|
| 762 |
assert(i == 0); |
|---|
| 763 |
} |
|---|
| 764 |
} |
|---|