tbb_machine.h

00001 /*
00002     Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00116 #include "tbb_stddef.h"
00117 
00118 namespace tbb {
00119 namespace internal {
00120 
00122 // Overridable helpers declarations
00123 //
00124 // A machine/*.h file may choose to define these templates, otherwise it must
00125 // request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s).
00126 //
00127 template <typename T, std::size_t S>
00128 struct machine_load_store;
00129 
00130 template <typename T, std::size_t S>
00131 struct machine_load_store_relaxed;
00132 
00133 template <typename T, std::size_t S>
00134 struct machine_load_store_seq_cst;
00135 //
00136 // End of overridable helpers declarations
00138 
00139 template<size_t S> struct atomic_selector;
00140 
00141 template<> struct atomic_selector<1> {
00142     typedef int8_t word;
00143     inline static word fetch_store ( volatile void* location, word value );
00144 };
00145 
00146 template<> struct atomic_selector<2> {
00147     typedef int16_t word;
00148     inline static word fetch_store ( volatile void* location, word value );
00149 };
00150 
00151 template<> struct atomic_selector<4> {
00152 #if _MSC_VER && !_WIN64
00153     // Work-around that avoids spurious /Wp64 warnings
00154     typedef intptr_t word;
00155 #else
00156     typedef int32_t word;
00157 #endif
00158     inline static word fetch_store ( volatile void* location, word value );
00159 };
00160 
00161 template<> struct atomic_selector<8> {
00162     typedef int64_t word;
00163     inline static word fetch_store ( volatile void* location, word value );
00164 };
00165 
00166 }} // namespaces internal, tbb
00167 
00168 #define __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(M)                                        \
00169     inline void __TBB_machine_generic_store8##M(volatile void *ptr, int64_t value) {         \
00170         for(;;) {                                                                            \
00171             int64_t result = *(int64_t *)ptr;                                                \
00172             if( __TBB_machine_cmpswp8##M(ptr,value,result)==result ) break;                  \
00173         }                                                                                    \
00174     }                                                                                        \
00175 
00176 #define __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(M)                                         \
00177     inline int64_t __TBB_machine_generic_load8##M(const volatile void *ptr) {                \
00178         /* Comparand and new value may be anything, they only must be equal, and      */     \
00179         /* the value should have a low probability to be actually found in 'location'.*/     \
00180         const int64_t anyvalue = 2305843009213693951LL;                                      \
00181         return __TBB_machine_cmpswp8##M(const_cast<volatile void *>(ptr),anyvalue,anyvalue); \
00182     }                                                                                        \
00183 
00184 #if _WIN32||_WIN64
00185 
00186 #ifdef _MANAGED
00187 #pragma managed(push, off)
00188 #endif
00189 
00190     #if __MINGW64__ || __MINGW32__
00191         extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00192         #define __TBB_Yield()  SwitchToThread()
00193         #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00194             #include "machine/gcc_generic.h"
00195         #elif __MINGW64__
00196             #include "machine/linux_intel64.h"
00197         #elif __MINGW32__
00198             #include "machine/linux_ia32.h"
00199         #endif
00200     #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
00201         #include "machine/icc_generic.h"
00202     #elif defined(_M_IX86)
00203         #include "machine/windows_ia32.h"
00204     #elif defined(_M_X64) 
00205         #include "machine/windows_intel64.h"
00206     #elif _XBOX
00207         #include "machine/xbox360_ppc.h"
00208     #endif
00209 
00210 #ifdef _MANAGED
00211 #pragma managed(pop)
00212 #endif
00213 
00214 #elif __TBB_DEFINE_MIC
00215 
00216     #include "machine/mic_common.h"
00217     //TODO: check if ICC atomic intrinsics are available for MIC
00218     #include "machine/linux_intel64.h"
00219 
00220 #elif __linux__ || __FreeBSD__ || __NetBSD__
00221 
00222     #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00223         #include "machine/gcc_generic.h"
00224     #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
00225         #include "machine/icc_generic.h"
00226     #elif __i386__
00227         #include "machine/linux_ia32.h"
00228     #elif __x86_64__
00229         #include "machine/linux_intel64.h"
00230     #elif __ia64__
00231         #include "machine/linux_ia64.h"
00232     #elif __powerpc__
00233         #include "machine/mac_ppc.h"
00234     #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
00235         #include "machine/gcc_generic.h"
00236     #endif
00237     #include "machine/linux_common.h"
00238 
00239 #elif __APPLE__
00240     //TODO:  TBB_USE_GCC_BUILTINS is not used for Mac, Sun, Aix
00241     #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
00242         #include "machine/icc_generic.h"
00243     #elif __i386__
00244         #include "machine/linux_ia32.h"
00245     #elif __x86_64__
00246         #include "machine/linux_intel64.h"
00247     #elif __POWERPC__
00248         #include "machine/mac_ppc.h"
00249     #endif
00250     #include "machine/macos_common.h"
00251 
00252 #elif _AIX
00253 
00254     #include "machine/ibm_aix51.h"
00255 
00256 #elif __sun || __SUNPRO_CC
00257 
00258     #define __asm__ asm
00259     #define __volatile__ volatile
00260 
00261     #if __i386  || __i386__
00262         #include "machine/linux_ia32.h"
00263     #elif __x86_64__
00264         #include "machine/linux_intel64.h"
00265     #elif __sparc
00266         #include "machine/sunos_sparc.h"
00267     #endif
00268     #include <sched.h>
00269 
00270     #define __TBB_Yield() sched_yield()
00271 
00272 #endif /* OS selection */
00273 
00274 #ifndef __TBB_64BIT_ATOMICS
00275     #define __TBB_64BIT_ATOMICS 1
00276 #endif
00277 
00278 //TODO: replace usage of these functions with usage of tbb::atomic, and then remove them
00279 //TODO: map functions with W suffix to use cast to tbb::atomic and according op, i.e. as_atomic().op()
00280 // Special atomic functions
00281 #if __TBB_USE_FENCED_ATOMICS
00282     #define __TBB_machine_cmpswp1   __TBB_machine_cmpswp1full_fence
00283     #define __TBB_machine_cmpswp2   __TBB_machine_cmpswp2full_fence
00284     #define __TBB_machine_cmpswp4   __TBB_machine_cmpswp4full_fence
00285     #define __TBB_machine_cmpswp8   __TBB_machine_cmpswp8full_fence
00286 
00287     #if __TBB_WORDSIZE==8
00288         #define __TBB_machine_fetchadd8             __TBB_machine_fetchadd8full_fence
00289         #define __TBB_machine_fetchstore8           __TBB_machine_fetchstore8full_fence
00290         #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd8release(P,V)
00291         #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd8acquire(P,1)
00292         #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd8release(P,(-1))
00293     #else
00294         #define __TBB_machine_fetchadd4             __TBB_machine_fetchadd4full_fence
00295         #define __TBB_machine_fetchstore4           __TBB_machine_fetchstore4full_fence
00296         #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd4release(P,V)
00297         #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd4acquire(P,1)
00298         #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd4release(P,(-1))
00299     #endif /* __TBB_WORDSIZE==4 */
00300 #else /* !__TBB_USE_FENCED_ATOMICS */
00301     #define __TBB_FetchAndAddWrelease(P,V)      __TBB_FetchAndAddW(P,V)
00302     #define __TBB_FetchAndIncrementWacquire(P)  __TBB_FetchAndAddW(P,1)
00303     #define __TBB_FetchAndDecrementWrelease(P)  __TBB_FetchAndAddW(P,(-1))
00304 #endif /* !__TBB_USE_FENCED_ATOMICS */
00305 
00306 #if __TBB_WORDSIZE==4
00307     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp4(P,V,C)
00308     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd4(P,V)
00309     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore4(P,V)
00310 #elif  __TBB_WORDSIZE==8
00311     #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00312         #error These macros should only be used on 32-bit platforms.
00313     #endif
00314 
00315     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp8(P,V,C)
00316     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd8(P,V)
00317     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore8(P,V)
00318 #else /* __TBB_WORDSIZE != 8 */
00319     #error Unsupported machine word size.
00320 #endif /* __TBB_WORDSIZE */
00321 
00322 #ifndef __TBB_Pause
00323     inline void __TBB_Pause(int32_t) {
00324         __TBB_Yield();
00325     }
00326 #endif
00327 
00328 namespace tbb {
00329 
00331 inline void atomic_fence () { __TBB_full_memory_fence(); }
00332 
00333 namespace internal {
00334 
00336 
00337 class atomic_backoff : no_copy {
00339 
00341     static const int32_t LOOPS_BEFORE_YIELD = 16;
00342     int32_t count;
00343 public:
00344     atomic_backoff() : count(1) {}
00345 
00347     void pause() {
00348         if( count<=LOOPS_BEFORE_YIELD ) {
00349             __TBB_Pause(count);
00350             // Pause twice as long the next time.
00351             count*=2;
00352         } else {
00353             // Pause is so long that we might as well yield CPU to scheduler.
00354             __TBB_Yield();
00355         }
00356     }
00357 
00358     // pause for a few times and then return false immediately.
00359     bool bounded_pause() {
00360         if( count<=LOOPS_BEFORE_YIELD ) {
00361             __TBB_Pause(count);
00362             // Pause twice as long the next time.
00363             count*=2;
00364             return true;
00365         } else {
00366             return false;
00367         }
00368     }
00369 
00370     void reset() {
00371         count = 1;
00372     }
00373 };
00374 
00376 
00377 template<typename T, typename U>
00378 void spin_wait_while_eq( const volatile T& location, U value ) {
00379     atomic_backoff backoff;
00380     while( location==value ) backoff.pause();
00381 }
00382 
00384 
00385 template<typename T, typename U>
00386 void spin_wait_until_eq( const volatile T& location, const U value ) {
00387     atomic_backoff backoff;
00388     while( location!=value ) backoff.pause();
00389 }
00390 
00391 //TODO: add static_assert for the requirements stated below
00392 //TODO: check if it works with signed types
00393 
00394 // there are following restrictions/limitations for this operation:
00395 //  - T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00396 //  - T should be integer type of at most 4 bytes, for the casts and calculations to work.
00397 //      (Together, these rules limit applicability of Masked CAS to uint8_t and uint16_t only,
00398 //      as it does nothing useful for 4 bytes).
00399 //  - The operation assumes that the architecture consistently uses either little-endian or big-endian:
00400 //      it does not support mixed-endian or page-specific bi-endian architectures.
00401 // This function is the only use of __TBB_BIG_ENDIAN.
00402 #if (__TBB_BIG_ENDIAN!=-1)
00403     #if ( __TBB_USE_GENERIC_PART_WORD_CAS)
00404         #error generic implementation of part-word CAS was explicitly disabled for this configuration
00405     #endif
00406 template<typename T>
00407 inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, const T comparand ) {
00408     struct endianness{ static bool is_big_endian(){
00409         #ifndef __TBB_BIG_ENDIAN
00410             const uint32_t probe = 0x03020100;
00411             return (((const char*)(&probe))[0]==0x03);
00412         #elif (__TBB_BIG_ENDIAN==0) || (__TBB_BIG_ENDIAN==1)
00413             return __TBB_BIG_ENDIAN;
00414         #else
00415             #error unexpected value of __TBB_BIG_ENDIAN
00416         #endif
00417     }};
00418 
00419     const uint32_t byte_offset            = (uint32_t) ((uintptr_t)ptr & 0x3);
00420     volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
00421 
00422     // location of T within uint32_t for a C++ shift operation
00423     const uint32_t bits_to_shift     = 8*(endianness::is_big_endian() ? (4 - sizeof(T) - (byte_offset)) : byte_offset);
00424     const uint32_t mask              = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<<bits_to_shift;
00425     const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask;
00426     const uint32_t shifted_value     = ((uint32_t)value     << bits_to_shift)&mask;
00427 
00428     for(atomic_backoff b;;b.pause()) {
00429         const uint32_t surroundings  = *aligned_ptr & ~mask ; // reload the aligned_ptr value which might change during the pause
00430         const uint32_t big_comparand = surroundings | shifted_comparand ;
00431         const uint32_t big_value     = surroundings | shifted_value     ;
00432         // __TBB_machine_cmpswp4 presumed to have full fence.
00433         // Cast shuts up /Wp64 warning
00434         const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
00435         if( big_result == big_comparand                    // CAS succeeded
00436           || ((big_result ^ big_comparand) & mask) != 0)   // CAS failed and the bits of interest have changed
00437         {
00438             return T((big_result & mask) >> bits_to_shift);
00439         }
00440         else continue;                                     // CAS failed but the bits of interest left unchanged
00441     }
00442 }
00443 #endif
00444 template<size_t S, typename T>
00445 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand );
00446 
00447 template<>
00448 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00449 #if __TBB_USE_GENERIC_PART_WORD_CAS
00450     return __TBB_MaskedCompareAndSwap<uint8_t>((volatile uint8_t *)ptr,value,comparand);
00451 #else
00452     return __TBB_machine_cmpswp1(ptr,value,comparand);
00453 #endif
00454 }
00455 
00456 template<>
00457 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00458 #if __TBB_USE_GENERIC_PART_WORD_CAS
00459     return __TBB_MaskedCompareAndSwap<uint16_t>((volatile uint16_t *)ptr,value,comparand);
00460 #else
00461     return __TBB_machine_cmpswp2(ptr,value,comparand);
00462 #endif
00463 }
00464 
00465 template<>
00466 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) {
00467     // Cast shuts up /Wp64 warning
00468     return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
00469 }
00470 
00471 #if __TBB_64BIT_ATOMICS
00472 template<>
00473 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) {
00474     return __TBB_machine_cmpswp8(ptr,value,comparand);
00475 }
00476 #endif
00477 
00478 template<size_t S, typename T>
00479 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00480     atomic_backoff b;
00481     T result;
00482     for(;;) {
00483         result = *reinterpret_cast<volatile T *>(ptr);
00484         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00485         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
00486             break;
00487         b.pause();
00488     }
00489     return result;
00490 }
00491 
00492 template<size_t S, typename T>
00493 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00494     atomic_backoff b;
00495     T result;
00496     for(;;) {
00497         result = *reinterpret_cast<volatile T *>(ptr);
00498         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00499         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
00500             break;
00501         b.pause();
00502     }
00503     return result;
00504 }
00505 
00506 #if __TBB_USE_GENERIC_PART_WORD_CAS
00507 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00508 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00509 #endif
00510 
00511 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
00512 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00513 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00514 #endif
00515 
00516 #if __TBB_USE_GENERIC_FETCH_ADD
00517 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00518 #endif
00519 
00520 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
00521 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00522 #endif
00523 
00524 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
00525 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00526 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00527 #endif
00528 
00529 #if __TBB_USE_GENERIC_FETCH_STORE
00530 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00531 #endif
00532 
00533 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00534 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00535 #endif
00536 
00537 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00538 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S)                                             \
00539     atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) {  \
00540         return __TBB_machine_fetchstore##S( location, value );                                          \
00541     }
00542 
00543 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
00544 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
00545 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
00546 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
00547 
00548 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
00549 #endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00550 
00551 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE
00552 /*TODO: find a more elegant way to handle function names difference*/
00553 #if ! __TBB_USE_FENCED_ATOMICS
00554     /* This name forwarding is needed for generic implementation of
00555      * load8/store8 defined below (via macro) to pick the right CAS function*/
00556     #define   __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8
00557 #endif
00558 __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
00559 __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
00560 
00561 #if ! __TBB_USE_FENCED_ATOMICS
00562     #undef   __TBB_machine_cmpswp8full_fence
00563 #endif
00564 
00565 #define __TBB_machine_store8 tbb::internal::__TBB_machine_generic_store8full_fence
00566 #define __TBB_machine_load8  tbb::internal::__TBB_machine_generic_load8full_fence
00567 #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
00568 
00569 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
00570 
00576 template <typename T, size_t S>
00577 struct machine_load_store {
00578     static T load_with_acquire ( const volatile T& location ) {
00579         T to_return = location;
00580         __TBB_acquire_consistency_helper();
00581         return to_return;
00582     }
00583     static void store_with_release ( volatile T &location, T value ) {
00584         __TBB_release_consistency_helper();
00585         location = value;
00586     }
00587 };
00588 
00589 //in general, plain load and store of 32bit compiler is not atomic for 64bit types
00590 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00591 template <typename T>
00592 struct machine_load_store<T,8> {
00593     static T load_with_acquire ( const volatile T& location ) {
00594         return (T)__TBB_machine_load8( (const volatile void*)&location );
00595     }
00596     static void store_with_release ( volatile T& location, T value ) {
00597         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00598     }
00599 };
00600 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00601 #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
00602 
00603 #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
00604 template <typename T, size_t S>
00605 struct machine_load_store_seq_cst {
00606     static T load ( const volatile T& location ) {
00607         __TBB_full_memory_fence();
00608         return machine_load_store<T,S>::load_with_acquire( location );
00609     }
00610 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00611     static void store ( volatile T &location, T value ) {
00612         atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value );
00613     }
00614 #else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00615     static void store ( volatile T &location, T value ) {
00616         machine_load_store<T,S>::store_with_release( location, value );
00617         __TBB_full_memory_fence();
00618     }
00619 #endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00620 };
00621 
00622 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00623 
00625 template <typename T>
00626 struct machine_load_store_seq_cst<T,8> {
00627     static T load ( const volatile T& location ) {
00628         // Comparand and new value may be anything, they only must be equal, and
00629         // the value should have a low probability to be actually found in 'location'.
00630         const int64_t anyvalue = 2305843009213693951LL;
00631         return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue );
00632     }
00633     static void store ( volatile T &location, T value ) {
00634         int64_t result = (volatile int64_t&)location;
00635         while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result )
00636             result = (volatile int64_t&)location;
00637     }
00638 };
00639 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00640 #endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */
00641 
00642 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
00643 // Relaxed operations add volatile qualifier to prevent compiler from optimizing them out.
00647 template <typename T, size_t S>
00648 struct machine_load_store_relaxed {
00649     static inline T load ( const volatile T& location ) {
00650         return location;
00651     }
00652     static inline void store ( volatile T& location, T value ) {
00653         location = value;
00654     }
00655 };
00656 
00657 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00658 template <typename T>
00659 struct machine_load_store_relaxed<T,8> {
00660     static inline T load ( const volatile T& location ) {
00661         return (T)__TBB_machine_load8( (const volatile void*)&location );
00662     }
00663     static inline void store ( volatile T& location, T value ) {
00664         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00665     }
00666 };
00667 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00668 #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
00669 
00670 #undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery
00671 
00672 template<typename T>
00673 inline T __TBB_load_with_acquire(const volatile T &location) {
00674     return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
00675 }
00676 template<typename T, typename V>
00677 inline void __TBB_store_with_release(volatile T& location, V value) {
00678     machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
00679 }
00681 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
00682     machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value );
00683 }
00684 
00685 template<typename T>
00686 inline T __TBB_load_full_fence(const volatile T &location) {
00687     return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
00688 }
00689 template<typename T, typename V>
00690 inline void __TBB_store_full_fence(volatile T& location, V value) {
00691     machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
00692 }
00694 inline void __TBB_store_full_fence(volatile size_t& location, size_t value) {
00695     machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value );
00696 }
00697 
00698 template<typename T>
00699 inline T __TBB_load_relaxed (const volatile T& location) {
00700     return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) );
00701 }
00702 template<typename T, typename V>
00703 inline void __TBB_store_relaxed ( volatile T& location, V value ) {
00704     machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) );
00705 }
00707 inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) {
00708     machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value );
00709 }
00710 
00711 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as
00712 // strict as type T.  The type should have a trivial default constructor and destructor, so that
00713 // arrays of that type can be declared without initializers.
00714 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00715 // to a type bigger than T.
00716 // The default definition here works on machines where integers are naturally aligned and the
00717 // strictest alignment is 64.
00718 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00719 
00720 #if __TBB_ATTRIBUTE_ALIGNED_PRESENT
00721 
00722 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00723 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00724     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00725 } __attribute__((aligned(PowerOf2)));
00726 #define __TBB_alignof(T) __alignof__(T)
00727 
00728 #elif __TBB_DECLSPEC_ALIGN_PRESENT
00729 
00730 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00731 __declspec(align(PowerOf2))                           \
00732 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00733     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00734 };
00735 #define __TBB_alignof(T) __alignof(T)
00736 
00737 #else /* A compiler with unknown syntax for data alignment */
00738 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
00739 #endif
00740 
00741 /* Now declare types aligned to useful powers of two */
00742 // TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms?
00743 __TBB_DefineTypeWithAlignment(16)
00744 __TBB_DefineTypeWithAlignment(32)
00745 __TBB_DefineTypeWithAlignment(64)
00746 
00747 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
00748 
00749 // Primary template is a declaration of incomplete type so that it fails with unknown alignments
00750 template<size_t N> struct type_with_alignment;
00751 
00752 // Specializations for allowed alignments
00753 template<> struct type_with_alignment<1> { char member; };
00754 template<> struct type_with_alignment<2> { uint16_t member; };
00755 template<> struct type_with_alignment<4> { uint32_t member; };
00756 template<> struct type_with_alignment<8> { uint64_t member; };
00757 template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; };
00758 template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; };
00759 template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; };
00760 
00761 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
00763 
00765 template<size_t Size, typename T>
00766 struct work_around_alignment_bug {
00767     static const size_t alignment = __TBB_alignof(T);
00768 };
00769 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00770 #else
00771 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
00772 #endif  /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */
00773 
00774 #endif  /* __TBB_TypeWithAlignmentAtLeastAsStrict */
00775 
00776 // Template class here is to avoid instantiation of the static data for modules that don't use it
00777 template<typename T>
00778 struct reverse {
00779     static const T byte_table[256];
00780 };
00781 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00782 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00783 template<typename T>
00784 const T reverse<T>::byte_table[256] = {
00785     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00786     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00787     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00788     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00789     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00790     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00791     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00792     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00793     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00794     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00795     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00796     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00797     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00798     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00799     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00800     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00801 };
00802 
00803 } // namespace internal
00804 } // namespace tbb
00805 
00806 // Preserving access to legacy APIs
00807 using tbb::internal::__TBB_load_with_acquire;
00808 using tbb::internal::__TBB_store_with_release;
00809 
00810 // Mapping historically used names to the ones expected by atomic_load_store_traits
00811 #define __TBB_load_acquire  __TBB_load_with_acquire
00812 #define __TBB_store_release __TBB_store_with_release
00813 
00814 #ifndef __TBB_Log2
00815 inline intptr_t __TBB_Log2( uintptr_t x ) {
00816     if( x==0 ) return -1;
00817     intptr_t result = 0;
00818     uintptr_t tmp;
00819 
00820     if( sizeof(x)>4 && (tmp = ((uint64_t)x)>>32)) { x=tmp; result += 32; }
00821     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00822     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00823     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00824     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00825     return (x&2)? result+1: result;
00826 }
00827 #endif
00828 
00829 #ifndef __TBB_AtomicOR
00830 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00831     tbb::internal::atomic_backoff b;
00832     for(;;) {
00833         uintptr_t tmp = *(volatile uintptr_t *)operand;
00834         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00835         if( result==tmp ) break;
00836         b.pause();
00837     }
00838 }
00839 #endif
00840 
00841 #ifndef __TBB_AtomicAND
00842 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00843     tbb::internal::atomic_backoff b;
00844     for(;;) {
00845         uintptr_t tmp = *(volatile uintptr_t *)operand;
00846         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00847         if( result==tmp ) break;
00848         b.pause();
00849     }
00850 }
00851 #endif
00852 
00853 #ifndef __TBB_Flag
00854 typedef unsigned char __TBB_Flag;
00855 #endif
00856 typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
00857 
00858 #ifndef __TBB_TryLockByte
00859 inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
00860     return __TBB_machine_cmpswp1(&flag,1,0)==0;
00861 }
00862 #endif
00863 
00864 #ifndef __TBB_LockByte
00865 inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
00866     if ( !__TBB_TryLockByte(flag) ) {
00867         tbb::internal::atomic_backoff b;
00868         do {
00869             b.pause();
00870         } while ( !__TBB_TryLockByte(flag) );
00871     }
00872     return 0;
00873 }
00874 #endif
00875 
00876 #ifndef  __TBB_UnlockByte
00877 #define __TBB_UnlockByte __TBB_store_with_release
00878 #endif
00879 
00880 #ifndef __TBB_ReverseByte
00881 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00882     return tbb::internal::reverse<unsigned char>::byte_table[src];
00883 }
00884 #endif
00885 
00886 template<typename T>
00887 T __TBB_ReverseBits(T src) {
00888     T dst;
00889     unsigned char *original = (unsigned char *) &src;
00890     unsigned char *reversed = (unsigned char *) &dst;
00891 
00892     for( int i = sizeof(T)-1; i >= 0; i-- )
00893         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00894 
00895     return dst;
00896 }
00897 
00898 #endif /* __TBB_machine_H */

Copyright © 2005-2013 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.