parallel_scan.h

00001 /*
00002     Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_scan_H
00022 #define __TBB_parallel_scan_H
00023 
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include <new>
00027 #include "partitioner.h"
00028 
00029 namespace tbb {
00030 
00032 
00033 struct pre_scan_tag {
00034     static bool is_final_scan() {return false;}
00035 };
00036 
00038 
00039 struct final_scan_tag {
00040     static bool is_final_scan() {return true;}
00041 };
00042 
00044 namespace internal {
00045 
00047 
00048     template<typename Range, typename Body>
00049     class final_sum: public task {
00050     public:
00051         Body my_body;
00052     private:
00053         aligned_space<Range,1> my_range;
00055         Body* my_stuff_last;
00056     public:
00057         final_sum( Body& body_ ) :
00058             my_body(body_,split())
00059         {
00060             poison_pointer(my_stuff_last);
00061         }
00062         ~final_sum() {
00063             my_range.begin()->~Range();
00064         }     
00065         void finish_construction( const Range& range_, Body* stuff_last_ ) {
00066             new( my_range.begin() ) Range(range_);
00067             my_stuff_last = stuff_last_;
00068         }
00069     private:
00070         /*override*/ task* execute() {
00071             my_body( *my_range.begin(), final_scan_tag() );
00072             if( my_stuff_last )
00073                 my_stuff_last->assign(my_body);
00074             return NULL;
00075         }
00076     };       
00077 
00079 
00080     template<typename Range, typename Body>
00081     class sum_node: public task {
00082         typedef final_sum<Range,Body> final_sum_type;
00083     public:
00084         final_sum_type *my_incoming; 
00085         final_sum_type *my_body;
00086         Body *my_stuff_last;
00087     private:
00088         final_sum_type *my_left_sum;
00089         sum_node *my_left;
00090         sum_node *my_right;     
00091         bool my_left_is_final;
00092         Range my_range;
00093         sum_node( const Range range_, bool left_is_final_ ) : 
00094             my_left_sum(NULL), 
00095             my_left(NULL), 
00096             my_right(NULL), 
00097             my_left_is_final(left_is_final_), 
00098             my_range(range_)
00099         {
00100             // Poison fields that will be set by second pass.
00101             poison_pointer(my_body);
00102             poison_pointer(my_incoming);
00103         }
00104         task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
00105             if( !n ) {
00106                 f.recycle_as_child_of( *this );
00107                 f.finish_construction( range_, stuff_last_ );
00108                 return &f;
00109             } else {
00110                 n->my_body = &f;
00111                 n->my_incoming = incoming_;
00112                 n->my_stuff_last = stuff_last_;
00113                 return n;
00114             }
00115         }
00116         /*override*/ task* execute() {
00117             if( my_body ) {
00118                 if( my_incoming )
00119                     my_left_sum->my_body.reverse_join( my_incoming->my_body );
00120                 recycle_as_continuation();
00121                 sum_node& c = *this;
00122                 task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
00123                 task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
00124                 set_ref_count( (a!=NULL)+(b!=NULL) );
00125                 my_body = NULL; 
00126                 if( a ) spawn(*b);
00127                 else a = b;
00128                 return a;
00129             } else {
00130                 return NULL;
00131             }
00132         }
00133         template<typename Range_,typename Body_,typename Partitioner_>
00134         friend class start_scan;
00135 
00136         template<typename Range_,typename Body_>
00137         friend class finish_scan;
00138     };
00139 
00141 
00142     template<typename Range, typename Body>
00143     class finish_scan: public task {
00144         typedef sum_node<Range,Body> sum_node_type;
00145         typedef final_sum<Range,Body> final_sum_type;
00146         final_sum_type** const my_sum;
00147         sum_node_type*& my_return_slot;
00148     public:
00149         final_sum_type* my_right_zombie;
00150         sum_node_type& my_result;
00151 
00152         /*override*/ task* execute() {
00153             __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
00154             if( my_result.my_left )
00155                 my_result.my_left_is_final = false;
00156             if( my_right_zombie && my_sum ) 
00157                 ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
00158             __TBB_ASSERT( !my_return_slot, NULL );
00159             if( my_right_zombie || my_result.my_right ) {
00160                 my_return_slot = &my_result;
00161             } else {
00162                 destroy( my_result );
00163             }
00164             if( my_right_zombie && !my_sum && !my_result.my_right ) {
00165                 destroy(*my_right_zombie);
00166                 my_right_zombie = NULL;
00167             }
00168             return NULL;
00169         }
00170 
00171         finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) : 
00172             my_sum(sum_),
00173             my_return_slot(return_slot_), 
00174             my_right_zombie(NULL),
00175             my_result(result_)
00176         {
00177             __TBB_ASSERT( !my_return_slot, NULL );
00178         }
00179     };
00180 
00182 
00183     template<typename Range, typename Body, typename Partitioner=simple_partitioner>
00184     class start_scan: public task {
00185         typedef sum_node<Range,Body> sum_node_type;
00186         typedef final_sum<Range,Body> final_sum_type;
00187         final_sum_type* my_body;
00189         final_sum_type** my_sum; 
00190         sum_node_type** my_return_slot;
00192         sum_node_type* my_parent_sum;
00193         bool my_is_final;
00194         bool my_is_right_child;
00195         Range my_range;
00196         typename Partitioner::partition_type my_partition;
00197         /*override*/ task* execute();
00198     public:
00199         start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
00200             my_body(parent_.my_body),
00201             my_sum(parent_.my_sum),
00202             my_return_slot(&return_slot_),
00203             my_parent_sum(parent_sum_),
00204             my_is_final(parent_.my_is_final),
00205             my_is_right_child(false),
00206             my_range(parent_.my_range,split()),
00207             my_partition(parent_.my_partition,split())
00208         {
00209             __TBB_ASSERT( !*my_return_slot, NULL );
00210         }
00211 
00212         start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
00213             my_body(&body_),
00214             my_sum(NULL),
00215             my_return_slot(&return_slot_),
00216             my_parent_sum(NULL),
00217             my_is_final(true),
00218             my_is_right_child(false),
00219             my_range(range_),
00220             my_partition(partitioner_)
00221         {
00222             __TBB_ASSERT( !*my_return_slot, NULL );
00223         }
00224 
00225         static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
00226             if( !range_.empty() ) {
00227                 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00228                 internal::sum_node<Range,Body>* root = NULL;
00229                 typedef internal::final_sum<Range,Body> final_sum_type;
00230                 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
00231                 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
00232                     /*my_return_slot=*/root,
00233                     range_,
00234                     *temp_body,
00235                     partitioner_ );
00236                 task::spawn_root_and_wait( pass1 );
00237                 if( root ) {
00238                     root->my_body = temp_body;
00239                     root->my_incoming = NULL;
00240                     root->my_stuff_last = &body_;
00241                     task::spawn_root_and_wait( *root );
00242                 } else {
00243                     body_.assign(temp_body->my_body);
00244                     temp_body->finish_construction( range_, NULL );
00245                     temp_body->destroy(*temp_body);
00246                 }
00247             }
00248         }
00249     };
00250 
00251     template<typename Range, typename Body, typename Partitioner>
00252     task* start_scan<Range,Body,Partitioner>::execute() {
00253         typedef internal::finish_scan<Range,Body> finish_pass1_type;
00254         finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
00255         // Inspecting p->result.left_sum would ordinarily be a race condition.
00256         // But we inspect it only if we are not a stolen task, in which case we
00257         // know that task assigning to p->result.left_sum has completed.
00258         bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
00259         if( treat_as_stolen ) {
00260             // Invocation is for right child that has been really stolen or needs to be virtually stolen
00261             p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
00262             my_is_final = false;
00263         }
00264         task* next_task = NULL;
00265         if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
00266             if( my_is_final )
00267                 (my_body->my_body)( my_range, final_scan_tag() );
00268             else if( my_sum )
00269                 (my_body->my_body)( my_range, pre_scan_tag() );
00270             if( my_sum ) 
00271                 *my_sum = my_body;
00272             __TBB_ASSERT( !*my_return_slot, NULL );
00273         } else {
00274             sum_node_type* result;
00275             if( my_parent_sum ) 
00276                 result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
00277             else
00278                 result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
00279             finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
00280             // Split off right child
00281             start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
00282             b.my_is_right_child = true;    
00283             // Left child is recycling of *this.  Must recycle this before spawning b, 
00284             // otherwise b might complete and decrement c.ref_count() to zero, which
00285             // would cause c.execute() to run prematurely.
00286             recycle_as_child_of(c);
00287             c.set_ref_count(2);
00288             c.spawn(b);
00289             my_sum = &result->my_left_sum;
00290             my_return_slot = &result->my_left;
00291             my_is_right_child = false;
00292             next_task = this;
00293             my_parent_sum = result; 
00294             __TBB_ASSERT( !*my_return_slot, NULL );
00295         }
00296         return next_task;
00297     } 
00298 } // namespace internal
00300 
00301 // Requirements on Range concept are documented in blocked_range.h
00302 
00320 
00322 
00323 template<typename Range, typename Body>
00324 void parallel_scan( const Range& range, Body& body ) {
00325     internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
00326 }
00327 
00329 
00330 template<typename Range, typename Body>
00331 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00332     internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
00333 }
00334 
00336 
00337 template<typename Range, typename Body>
00338 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00339     internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
00340 }
00342 
00343 } // namespace tbb
00344 
00345 #endif /* __TBB_parallel_scan_H */
00346 

Copyright © 2005-2013 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.