tbb_machine.h

00001 /*
00002     Copyright 2005-2012 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00116 #include "tbb_stddef.h"
00117 
00118 namespace tbb {
00119 namespace internal {
00120 
00122 // Overridable helpers declarations
00123 //
00124 // A machine/*.h file may choose to define these templates, otherwise it must
00125 // request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s).
00126 //
00127 template <typename T, std::size_t S>
00128 struct machine_load_store;
00129 
00130 template <typename T, std::size_t S>
00131 struct machine_load_store_relaxed;
00132 
00133 template <typename T, std::size_t S>
00134 struct machine_load_store_seq_cst;
00135 //
00136 // End of overridable helpers declarations
00138 
00139 template<size_t S> struct atomic_selector;
00140 
00141 template<> struct atomic_selector<1> {
00142     typedef int8_t word;
00143     inline static word fetch_store ( volatile void* location, word value );
00144 };
00145 
00146 template<> struct atomic_selector<2> {
00147     typedef int16_t word;
00148     inline static word fetch_store ( volatile void* location, word value );
00149 };
00150 
00151 template<> struct atomic_selector<4> {
00152 #if _MSC_VER && !_WIN64
00153     // Work-around that avoids spurious /Wp64 warnings
00154     typedef intptr_t word;
00155 #else
00156     typedef int32_t word;
00157 #endif
00158     inline static word fetch_store ( volatile void* location, word value );
00159 };
00160 
00161 template<> struct atomic_selector<8> {
00162     typedef int64_t word;
00163     inline static word fetch_store ( volatile void* location, word value );
00164 };
00165 
00166 }} // namespaces internal, tbb
00167 
00168 #if _WIN32||_WIN64
00169 
00170 #ifdef _MANAGED
00171 #pragma managed(push, off)
00172 #endif
00173 
00174     #if __MINGW64__ || __MINGW32__
00175         extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00176         #define __TBB_Yield()  SwitchToThread()
00177         #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00178             #include "machine/gcc_generic.h"
00179         #elif __MINGW64__
00180             #include "machine/linux_intel64.h"
00181         #elif __MINGW32__
00182             #include "machine/linux_ia32.h"
00183         #endif
00184     #elif defined(_M_IX86)
00185         #include "machine/windows_ia32.h"
00186     #elif defined(_M_X64) 
00187         #include "machine/windows_intel64.h"
00188     #elif _XBOX
00189         #include "machine/xbox360_ppc.h"
00190     #endif
00191 
00192 #ifdef _MANAGED
00193 #pragma managed(pop)
00194 #endif
00195 
00196 #elif __TBB_DEFINE_MIC
00197 
00198     #include "machine/mic_common.h"
00199     #include "machine/linux_intel64.h"
00200 
00201 #elif __linux__ || __FreeBSD__ || __NetBSD__
00202 
00203     #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00204         #include "machine/gcc_generic.h"
00205     #elif __i386__
00206         #include "machine/linux_ia32.h"
00207     #elif __x86_64__
00208         #include "machine/linux_intel64.h"
00209     #elif __ia64__
00210         #include "machine/linux_ia64.h"
00211     #elif __powerpc__
00212         #include "machine/mac_ppc.h"
00213     #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
00214         #include "machine/gcc_generic.h"
00215     #endif
00216     #include "machine/linux_common.h"
00217 
00218 #elif __APPLE__
00219 
00220     #if __i386__
00221         #include "machine/linux_ia32.h"
00222     #elif __x86_64__
00223         #include "machine/linux_intel64.h"
00224     #elif __POWERPC__
00225         #include "machine/mac_ppc.h"
00226     #endif
00227     #include "machine/macos_common.h"
00228 
00229 #elif _AIX
00230 
00231     #include "machine/ibm_aix51.h"
00232 
00233 #elif __sun || __SUNPRO_CC
00234 
00235     #define __asm__ asm
00236     #define __volatile__ volatile
00237 
00238     #if __i386  || __i386__
00239         #include "machine/linux_ia32.h"
00240     #elif __x86_64__
00241         #include "machine/linux_intel64.h"
00242     #elif __sparc
00243         #include "machine/sunos_sparc.h"
00244     #endif
00245     #include <sched.h>
00246 
00247     #define __TBB_Yield() sched_yield()
00248 
00249 #endif /* OS selection */
00250 
00251 #ifndef __TBB_64BIT_ATOMICS
00252     #define __TBB_64BIT_ATOMICS 1
00253 #endif
00254 
00255 // Special atomic functions
00256 #if __TBB_USE_FENCED_ATOMICS
00257     #define __TBB_machine_cmpswp1   __TBB_machine_cmpswp1full_fence
00258     #define __TBB_machine_cmpswp2   __TBB_machine_cmpswp2full_fence
00259     #define __TBB_machine_cmpswp4   __TBB_machine_cmpswp4full_fence
00260     #define __TBB_machine_cmpswp8   __TBB_machine_cmpswp8full_fence
00261 
00262     #if __TBB_WORDSIZE==8
00263         #define __TBB_machine_fetchadd8             __TBB_machine_fetchadd8full_fence
00264         #define __TBB_machine_fetchstore8           __TBB_machine_fetchstore8full_fence
00265         #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd8release(P,V)
00266         #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd8acquire(P,1)
00267         #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd8release(P,(-1))
00268     #else
00269         #error Define macros for 4-byte word, similarly to the above __TBB_WORDSIZE==8 branch.
00270     #endif /* __TBB_WORDSIZE==4 */
00271 #else /* !__TBB_USE_FENCED_ATOMICS */
00272     #define __TBB_FetchAndAddWrelease(P,V)      __TBB_FetchAndAddW(P,V)
00273     #define __TBB_FetchAndIncrementWacquire(P)  __TBB_FetchAndAddW(P,1)
00274     #define __TBB_FetchAndDecrementWrelease(P)  __TBB_FetchAndAddW(P,(-1))
00275 #endif /* !__TBB_USE_FENCED_ATOMICS */
00276 
00277 #if __TBB_WORDSIZE==4
00278     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp4(P,V,C)
00279     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd4(P,V)
00280     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore4(P,V)
00281 #elif  __TBB_WORDSIZE==8
00282     #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00283         #error These macros should only be used on 32-bit platforms.
00284     #endif
00285 
00286     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp8(P,V,C)
00287     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd8(P,V)
00288     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore8(P,V)
00289 #else /* __TBB_WORDSIZE != 8 */
00290     #error Unsupported machine word size.
00291 #endif /* __TBB_WORDSIZE */
00292 
00293 #ifndef __TBB_Pause
00294     inline void __TBB_Pause(int32_t) {
00295         __TBB_Yield();
00296     }
00297 #endif
00298 
00299 namespace tbb {
00300 
00302 inline void atomic_fence () { __TBB_full_memory_fence(); }
00303 
00304 namespace internal {
00305 
00307 
00308 class atomic_backoff : no_copy {
00310 
00312     static const int32_t LOOPS_BEFORE_YIELD = 16;
00313     int32_t count;
00314 public:
00315     atomic_backoff() : count(1) {}
00316 
00318     void pause() {
00319         if( count<=LOOPS_BEFORE_YIELD ) {
00320             __TBB_Pause(count);
00321             // Pause twice as long the next time.
00322             count*=2;
00323         } else {
00324             // Pause is so long that we might as well yield CPU to scheduler.
00325             __TBB_Yield();
00326         }
00327     }
00328 
00329     // pause for a few times and then return false immediately.
00330     bool bounded_pause() {
00331         if( count<=LOOPS_BEFORE_YIELD ) {
00332             __TBB_Pause(count);
00333             // Pause twice as long the next time.
00334             count*=2;
00335             return true;
00336         } else {
00337             return false;
00338         }
00339     }
00340 
00341     void reset() {
00342         count = 1;
00343     }
00344 };
00345 
00347 
00348 template<typename T, typename U>
00349 void spin_wait_while_eq( const volatile T& location, U value ) {
00350     atomic_backoff backoff;
00351     while( location==value ) backoff.pause();
00352 }
00353 
00355 
00356 template<typename T, typename U>
00357 void spin_wait_until_eq( const volatile T& location, const U value ) {
00358     atomic_backoff backoff;
00359     while( location!=value ) backoff.pause();
00360 }
00361 
00362 //TODO: add static_assert for the requirements stated below
00363 //TODO: check if it works with signed types
00364 
00365 // there are following restrictions/limitations for this operation:
00366 //  - T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00367 //  - T should be integer type of at most 4 bytes, for the casts and calculations to work.
00368 //      (Together, these rules limit applicability of Masked CAS to uint8_t and uint16_t only,
00369 //      as it does nothing useful for 4 bytes).
00370 //  - The operation assumes that the architecture consistently uses either little-endian or big-endian:
00371 //      it does not support mixed-endian or page-specific bi-endian architectures.
00372 // This function is the only use of __TBB_BIG_ENDIAN.
00373 #if (__TBB_BIG_ENDIAN!=-1)
00374     #if ( __TBB_USE_GENERIC_PART_WORD_CAS)
00375         #error generic implementation of part-word CAS was explicitly disabled for this configuration
00376     #endif
00377 template<typename T>
00378 inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, const T comparand ) {
00379     struct endianness{ static bool is_big_endian(){
00380         #ifndef __TBB_BIG_ENDIAN
00381             const uint32_t probe = 0x03020100;
00382             return (((const char*)(&probe))[0]==0x03);
00383         #elif (__TBB_BIG_ENDIAN==0) || (__TBB_BIG_ENDIAN==1)
00384             return __TBB_BIG_ENDIAN;
00385         #else
00386             #error unexpected value of __TBB_BIG_ENDIAN
00387         #endif
00388     }};
00389 
00390     const uint32_t byte_offset            = (uint32_t) ((uintptr_t)ptr & 0x3);
00391     volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
00392 
00393     // location of T within uint32_t for a C++ shift operation
00394     const uint32_t bits_to_shift     = 8*(endianness::is_big_endian() ? (4 - sizeof(T) - (byte_offset)) : byte_offset);
00395     const uint32_t mask              = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<<bits_to_shift;
00396     const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask;
00397     const uint32_t shifted_value     = ((uint32_t)value     << bits_to_shift)&mask;
00398 
00399     for(atomic_backoff b;;b.pause()) {
00400         const uint32_t surroundings  = *aligned_ptr & ~mask ; // reload the aligned_ptr value which might change during the pause
00401         const uint32_t big_comparand = surroundings | shifted_comparand ;
00402         const uint32_t big_value     = surroundings | shifted_value     ;
00403         // __TBB_machine_cmpswp4 presumed to have full fence.
00404         // Cast shuts up /Wp64 warning
00405         const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
00406         if( big_result == big_comparand                    // CAS succeeded
00407           || ((big_result ^ big_comparand) & mask) != 0)   // CAS failed and the bits of interest have changed
00408         {
00409             return T((big_result & mask) >> bits_to_shift);
00410         }
00411         else continue;                                     // CAS failed but the bits of interest left unchanged
00412     }
00413 }
00414 #endif
00415 template<size_t S, typename T>
00416 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand );
00417 
00418 template<>
00419 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00420 #if __TBB_USE_GENERIC_PART_WORD_CAS
00421     return __TBB_MaskedCompareAndSwap<uint8_t>((volatile uint8_t *)ptr,value,comparand);
00422 #else
00423     return __TBB_machine_cmpswp1(ptr,value,comparand);
00424 #endif
00425 }
00426 
00427 template<>
00428 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00429 #if __TBB_USE_GENERIC_PART_WORD_CAS
00430     return __TBB_MaskedCompareAndSwap<uint16_t>((volatile uint16_t *)ptr,value,comparand);
00431 #else
00432     return __TBB_machine_cmpswp2(ptr,value,comparand);
00433 #endif
00434 }
00435 
00436 template<>
00437 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) {
00438     // Cast shuts up /Wp64 warning
00439     return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
00440 }
00441 
00442 #if __TBB_64BIT_ATOMICS
00443 template<>
00444 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) {
00445     return __TBB_machine_cmpswp8(ptr,value,comparand);
00446 }
00447 #endif
00448 
00449 template<size_t S, typename T>
00450 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00451     atomic_backoff b;
00452     T result;
00453     for(;;) {
00454         result = *reinterpret_cast<volatile T *>(ptr);
00455         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00456         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
00457             break;
00458         b.pause();
00459     }
00460     return result;
00461 }
00462 
00463 template<size_t S, typename T>
00464 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00465     atomic_backoff b;
00466     T result;
00467     for(;;) {
00468         result = *reinterpret_cast<volatile T *>(ptr);
00469         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00470         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
00471             break;
00472         b.pause();
00473     }
00474     return result;
00475 }
00476 
00477 #if __TBB_USE_GENERIC_PART_WORD_CAS
00478 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00479 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00480 #endif
00481 
00482 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
00483 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00484 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00485 #endif
00486 
00487 #if __TBB_USE_GENERIC_FETCH_ADD
00488 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00489 #endif
00490 
00491 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
00492 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00493 #endif
00494 
00495 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
00496 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00497 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00498 #endif
00499 
00500 #if __TBB_USE_GENERIC_FETCH_STORE
00501 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00502 #endif
00503 
00504 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00505 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00506 #endif
00507 
00508 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00509 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S)                                             \
00510     atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) {  \
00511         return __TBB_machine_fetchstore##S( location, value );                                          \
00512     }
00513 
00514 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
00515 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
00516 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
00517 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
00518 
00519 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
00520 #endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00521 
00522 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE
00523 inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) {
00524     for(;;) {
00525         int64_t result = *(int64_t *)ptr;
00526         if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break;
00527     }
00528 }
00529 
00530 inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
00531     // Comparand and new value may be anything, they only must be equal, and
00532     // the value should have a low probability to be actually found in 'location'.
00533     const int64_t anyvalue = 2305843009213693951LL;
00534     return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
00535 }
00536 #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
00537 
00538 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
00539 
00545 template <typename T, size_t S>
00546 struct machine_load_store {
00547     static T load_with_acquire ( const volatile T& location ) {
00548         T to_return = location;
00549         __TBB_acquire_consistency_helper();
00550         return to_return;
00551     }
00552     static void store_with_release ( volatile T &location, T value ) {
00553         __TBB_release_consistency_helper();
00554         location = value;
00555     }
00556 };
00557 
00558 //in general, plain load and store of 32bit compiler is not atomic for 64bit types
00559 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00560 template <typename T>
00561 struct machine_load_store<T,8> {
00562     static T load_with_acquire ( const volatile T& location ) {
00563         return (T)__TBB_machine_load8( (const volatile void*)&location );
00564     }
00565     static void store_with_release ( volatile T& location, T value ) {
00566         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00567     }
00568 };
00569 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00570 #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
00571 
00572 #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
00573 template <typename T, size_t S>
00574 struct machine_load_store_seq_cst {
00575     static T load ( const volatile T& location ) {
00576         __TBB_full_memory_fence();
00577         return machine_load_store<T,S>::load_with_acquire( location );
00578     }
00579 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00580     static void store ( volatile T &location, T value ) {
00581         atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value );
00582     }
00583 #else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00584     static void store ( volatile T &location, T value ) {
00585         machine_load_store<T,S>::store_with_release( location, value );
00586         __TBB_full_memory_fence();
00587     }
00588 #endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00589 };
00590 
00591 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00592 
00594 template <typename T>
00595 struct machine_load_store_seq_cst<T,8> {
00596     static T load ( const volatile T& location ) {
00597         // Comparand and new value may be anything, they only must be equal, and
00598         // the value should have a low probability to be actually found in 'location'.
00599         const int64_t anyvalue = 2305843009213693951LL;
00600         return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue );
00601     }
00602     static void store ( volatile T &location, T value ) {
00603         int64_t result = (volatile int64_t&)location;
00604         while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result )
00605             result = (volatile int64_t&)location;
00606     }
00607 };
00608 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00609 #endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */
00610 
00611 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
00612 // Relaxed operations add volatile qualifier to prevent compiler from optimizing them out.
00616 template <typename T, size_t S>
00617 struct machine_load_store_relaxed {
00618     static inline T load ( const volatile T& location ) {
00619         return location;
00620     }
00621     static inline void store ( volatile T& location, T value ) {
00622         location = value;
00623     }
00624 };
00625 
00626 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00627 template <typename T>
00628 struct machine_load_store_relaxed<T,8> {
00629     static inline T load ( const volatile T& location ) {
00630         return (T)__TBB_machine_load8( (const volatile void*)&location );
00631     }
00632     static inline void store ( volatile T& location, T value ) {
00633         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00634     }
00635 };
00636 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00637 #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
00638 
00639 #undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery
00640 
00641 template<typename T>
00642 inline T __TBB_load_with_acquire(const volatile T &location) {
00643     return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
00644 }
00645 template<typename T, typename V>
00646 inline void __TBB_store_with_release(volatile T& location, V value) {
00647     machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
00648 }
00650 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
00651     machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value );
00652 }
00653 
00654 template<typename T>
00655 inline T __TBB_load_full_fence(const volatile T &location) {
00656     return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
00657 }
00658 template<typename T, typename V>
00659 inline void __TBB_store_full_fence(volatile T& location, V value) {
00660     machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
00661 }
00663 inline void __TBB_store_full_fence(volatile size_t& location, size_t value) {
00664     machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value );
00665 }
00666 
00667 template<typename T>
00668 inline T __TBB_load_relaxed (const volatile T& location) {
00669     return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) );
00670 }
00671 template<typename T, typename V>
00672 inline void __TBB_store_relaxed ( volatile T& location, V value ) {
00673     machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) );
00674 }
00676 inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) {
00677     machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value );
00678 }
00679 
00680 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as
00681 // strict as type T.  The type should have a trivial default constructor and destructor, so that
00682 // arrays of that type can be declared without initializers.
00683 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00684 // to a type bigger than T.
00685 // The default definition here works on machines where integers are naturally aligned and the
00686 // strictest alignment is 64.
00687 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00688 
00689 #if __TBB_ATTRIBUTE_ALIGNED_PRESENT
00690 
00691 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00692 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00693     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00694 } __attribute__((aligned(PowerOf2)));
00695 #define __TBB_alignof(T) __alignof__(T)
00696 
00697 #elif __TBB_DECLSPEC_ALIGN_PRESENT
00698 
00699 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00700 __declspec(align(PowerOf2))                           \
00701 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00702     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00703 };
00704 #define __TBB_alignof(T) __alignof(T)
00705 
00706 #else /* A compiler with unknown syntax for data alignment */
00707 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
00708 #endif
00709 
00710 /* Now declare types aligned to useful powers of two */
00711 // TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms?
00712 __TBB_DefineTypeWithAlignment(16)
00713 __TBB_DefineTypeWithAlignment(32)
00714 __TBB_DefineTypeWithAlignment(64)
00715 
00716 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
00717 
00718 // Primary template is a declaration of incomplete type so that it fails with unknown alignments
00719 template<size_t N> struct type_with_alignment;
00720 
00721 // Specializations for allowed alignments
00722 template<> struct type_with_alignment<1> { char member; };
00723 template<> struct type_with_alignment<2> { uint16_t member; };
00724 template<> struct type_with_alignment<4> { uint32_t member; };
00725 template<> struct type_with_alignment<8> { uint64_t member; };
00726 template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; };
00727 template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; };
00728 template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; };
00729 
00730 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
00732 
00734 template<size_t Size, typename T>
00735 struct work_around_alignment_bug {
00736     static const size_t alignment = __TBB_alignof(T);
00737 };
00738 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00739 #else
00740 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
00741 #endif  /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */
00742 
00743 #endif  /* __TBB_TypeWithAlignmentAtLeastAsStrict */
00744 
00745 // Template class here is to avoid instantiation of the static data for modules that don't use it
00746 template<typename T>
00747 struct reverse {
00748     static const T byte_table[256];
00749 };
00750 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00751 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00752 template<typename T>
00753 const T reverse<T>::byte_table[256] = {
00754     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00755     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00756     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00757     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00758     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00759     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00760     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00761     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00762     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00763     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00764     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00765     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00766     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00767     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00768     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00769     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00770 };
00771 
00772 } // namespace internal
00773 } // namespace tbb
00774 
00775 // Preserving access to legacy APIs
00776 using tbb::internal::__TBB_load_with_acquire;
00777 using tbb::internal::__TBB_store_with_release;
00778 
00779 // Mapping historically used names to the ones expected by atomic_load_store_traits
00780 #define __TBB_load_acquire  __TBB_load_with_acquire
00781 #define __TBB_store_release __TBB_store_with_release
00782 
00783 #ifndef __TBB_Log2
00784 inline intptr_t __TBB_Log2( uintptr_t x ) {
00785     if( x==0 ) return -1;
00786     intptr_t result = 0;
00787     uintptr_t tmp;
00788 
00789     if( sizeof(x)>4 && (tmp = ((uint64_t)x)>>32)) { x=tmp; result += 32; }
00790     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00791     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00792     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00793     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00794     return (x&2)? result+1: result;
00795 }
00796 #endif
00797 
00798 #ifndef __TBB_AtomicOR
00799 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00800     tbb::internal::atomic_backoff b;
00801     for(;;) {
00802         uintptr_t tmp = *(volatile uintptr_t *)operand;
00803         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00804         if( result==tmp ) break;
00805         b.pause();
00806     }
00807 }
00808 #endif
00809 
00810 #ifndef __TBB_AtomicAND
00811 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00812     tbb::internal::atomic_backoff b;
00813     for(;;) {
00814         uintptr_t tmp = *(volatile uintptr_t *)operand;
00815         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00816         if( result==tmp ) break;
00817         b.pause();
00818     }
00819 }
00820 #endif
00821 
00822 #ifndef __TBB_Flag
00823 typedef unsigned char __TBB_Flag;
00824 #endif
00825 typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
00826 
00827 #ifndef __TBB_TryLockByte
00828 inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
00829     return __TBB_machine_cmpswp1(&flag,1,0)==0;
00830 }
00831 #endif
00832 
00833 #ifndef __TBB_LockByte
00834 inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
00835     if ( !__TBB_TryLockByte(flag) ) {
00836         tbb::internal::atomic_backoff b;
00837         do {
00838             b.pause();
00839         } while ( !__TBB_TryLockByte(flag) );
00840     }
00841     return 0;
00842 }
00843 #endif
00844 
00845 #ifndef  __TBB_UnlockByte
00846 #define __TBB_UnlockByte __TBB_store_with_release
00847 #endif
00848 
00849 #ifndef __TBB_ReverseByte
00850 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00851     return tbb::internal::reverse<unsigned char>::byte_table[src];
00852 }
00853 #endif
00854 
00855 template<typename T>
00856 T __TBB_ReverseBits(T src) {
00857     T dst;
00858     unsigned char *original = (unsigned char *) &src;
00859     unsigned char *reversed = (unsigned char *) &dst;
00860 
00861     for( int i = sizeof(T)-1; i >= 0; i-- )
00862         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00863 
00864     return dst;
00865 }
00866 
00867 #endif /* __TBB_machine_H */

Copyright © 2005-2012 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.