00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __TBB_parallel_scan_H
00022 #define __TBB_parallel_scan_H
00023
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include <new>
00027 #include "partitioner.h"
00028
00029 namespace tbb {
00030
00032
00033 struct pre_scan_tag {
00034 static bool is_final_scan() {return false;}
00035 };
00036
00038
00039 struct final_scan_tag {
00040 static bool is_final_scan() {return true;}
00041 };
00042
00044 namespace internal {
00045
00047
00048 template<typename Range, typename Body>
00049 class final_sum: public task {
00050 public:
00051 Body my_body;
00052 private:
00053 aligned_space<Range,1> my_range;
00055 Body* my_stuff_last;
00056 public:
00057 final_sum( Body& body_ ) :
00058 my_body(body_,split())
00059 {
00060 poison_pointer(my_stuff_last);
00061 }
00062 ~final_sum() {
00063 my_range.begin()->~Range();
00064 }
00065 void finish_construction( const Range& range_, Body* stuff_last_ ) {
00066 new( my_range.begin() ) Range(range_);
00067 my_stuff_last = stuff_last_;
00068 }
00069 private:
00070 task* execute() {
00071 my_body( *my_range.begin(), final_scan_tag() );
00072 if( my_stuff_last )
00073 my_stuff_last->assign(my_body);
00074 return NULL;
00075 }
00076 };
00077
00079
00080 template<typename Range, typename Body>
00081 class sum_node: public task {
00082 typedef final_sum<Range,Body> final_sum_type;
00083 public:
00084 final_sum_type *my_incoming;
00085 final_sum_type *my_body;
00086 Body *my_stuff_last;
00087 private:
00088 final_sum_type *my_left_sum;
00089 sum_node *my_left;
00090 sum_node *my_right;
00091 bool my_left_is_final;
00092 Range my_range;
00093 sum_node( const Range range_, bool left_is_final_ ) :
00094 my_left_sum(NULL),
00095 my_left(NULL),
00096 my_right(NULL),
00097 my_left_is_final(left_is_final_),
00098 my_range(range_)
00099 {
00100
00101 poison_pointer(my_body);
00102 poison_pointer(my_incoming);
00103 }
00104 task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
00105 if( !n ) {
00106 f.recycle_as_child_of( *this );
00107 f.finish_construction( range_, stuff_last_ );
00108 return &f;
00109 } else {
00110 n->my_body = &f;
00111 n->my_incoming = incoming_;
00112 n->my_stuff_last = stuff_last_;
00113 return n;
00114 }
00115 }
00116 task* execute() {
00117 if( my_body ) {
00118 if( my_incoming )
00119 my_left_sum->my_body.reverse_join( my_incoming->my_body );
00120 recycle_as_continuation();
00121 sum_node& c = *this;
00122 task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
00123 task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
00124 set_ref_count( (a!=NULL)+(b!=NULL) );
00125 my_body = NULL;
00126 if( a ) spawn(*b);
00127 else a = b;
00128 return a;
00129 } else {
00130 return NULL;
00131 }
00132 }
00133 template<typename Range_,typename Body_,typename Partitioner_>
00134 friend class start_scan;
00135
00136 template<typename Range_,typename Body_>
00137 friend class finish_scan;
00138 };
00139
00141
00142 template<typename Range, typename Body>
00143 class finish_scan: public task {
00144 typedef sum_node<Range,Body> sum_node_type;
00145 typedef final_sum<Range,Body> final_sum_type;
00146 final_sum_type** const my_sum;
00147 sum_node_type*& my_return_slot;
00148 public:
00149 final_sum_type* my_right_zombie;
00150 sum_node_type& my_result;
00151
00152 task* execute() {
00153 __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
00154 if( my_result.my_left )
00155 my_result.my_left_is_final = false;
00156 if( my_right_zombie && my_sum )
00157 ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
00158 __TBB_ASSERT( !my_return_slot, NULL );
00159 if( my_right_zombie || my_result.my_right ) {
00160 my_return_slot = &my_result;
00161 } else {
00162 destroy( my_result );
00163 }
00164 if( my_right_zombie && !my_sum && !my_result.my_right ) {
00165 destroy(*my_right_zombie);
00166 my_right_zombie = NULL;
00167 }
00168 return NULL;
00169 }
00170
00171 finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
00172 my_sum(sum_),
00173 my_return_slot(return_slot_),
00174 my_right_zombie(NULL),
00175 my_result(result_)
00176 {
00177 __TBB_ASSERT( !my_return_slot, NULL );
00178 }
00179 };
00180
00182
00183 template<typename Range, typename Body, typename Partitioner=simple_partitioner>
00184 class start_scan: public task {
00185 typedef sum_node<Range,Body> sum_node_type;
00186 typedef final_sum<Range,Body> final_sum_type;
00187 final_sum_type* my_body;
00189 final_sum_type** my_sum;
00190 sum_node_type** my_return_slot;
00192 sum_node_type* my_parent_sum;
00193 bool my_is_final;
00194 bool my_is_right_child;
00195 Range my_range;
00196 typename Partitioner::partition_type my_partition;
00197 task* execute();
00198 public:
00199 start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
00200 my_body(parent_.my_body),
00201 my_sum(parent_.my_sum),
00202 my_return_slot(&return_slot_),
00203 my_parent_sum(parent_sum_),
00204 my_is_final(parent_.my_is_final),
00205 my_is_right_child(false),
00206 my_range(parent_.my_range,split()),
00207 my_partition(parent_.my_partition,split())
00208 {
00209 __TBB_ASSERT( !*my_return_slot, NULL );
00210 }
00211
00212 start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
00213 my_body(&body_),
00214 my_sum(NULL),
00215 my_return_slot(&return_slot_),
00216 my_parent_sum(NULL),
00217 my_is_final(true),
00218 my_is_right_child(false),
00219 my_range(range_),
00220 my_partition(partitioner_)
00221 {
00222 __TBB_ASSERT( !*my_return_slot, NULL );
00223 }
00224
00225 static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
00226 if( !range_.empty() ) {
00227 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00228 internal::sum_node<Range,Body>* root = NULL;
00229 typedef internal::final_sum<Range,Body> final_sum_type;
00230 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
00231 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
00232 root,
00233 range_,
00234 *temp_body,
00235 partitioner_ );
00236 task::spawn_root_and_wait( pass1 );
00237 if( root ) {
00238 root->my_body = temp_body;
00239 root->my_incoming = NULL;
00240 root->my_stuff_last = &body_;
00241 task::spawn_root_and_wait( *root );
00242 } else {
00243 body_.assign(temp_body->my_body);
00244 temp_body->finish_construction( range_, NULL );
00245 temp_body->destroy(*temp_body);
00246 }
00247 }
00248 }
00249 };
00250
00251 template<typename Range, typename Body, typename Partitioner>
00252 task* start_scan<Range,Body,Partitioner>::execute() {
00253 typedef internal::finish_scan<Range,Body> finish_pass1_type;
00254 finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
00255
00256
00257
00258 bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
00259 if( treat_as_stolen ) {
00260
00261 p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
00262 my_is_final = false;
00263 }
00264 task* next_task = NULL;
00265 if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
00266 if( my_is_final )
00267 (my_body->my_body)( my_range, final_scan_tag() );
00268 else if( my_sum )
00269 (my_body->my_body)( my_range, pre_scan_tag() );
00270 if( my_sum )
00271 *my_sum = my_body;
00272 __TBB_ASSERT( !*my_return_slot, NULL );
00273 } else {
00274 sum_node_type* result;
00275 if( my_parent_sum )
00276 result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,my_is_final);
00277 else
00278 result = new(task::allocate_root()) sum_node_type(my_range,my_is_final);
00279 finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
00280
00281 start_scan& b = *new( c.allocate_child() ) start_scan( result->my_right, *this, result );
00282 b.my_is_right_child = true;
00283
00284
00285
00286 recycle_as_child_of(c);
00287 c.set_ref_count(2);
00288 c.spawn(b);
00289 my_sum = &result->my_left_sum;
00290 my_return_slot = &result->my_left;
00291 my_is_right_child = false;
00292 next_task = this;
00293 my_parent_sum = result;
00294 __TBB_ASSERT( !*my_return_slot, NULL );
00295 }
00296 return next_task;
00297 }
00298 }
00300
00301
00302
00320
00322
00323 template<typename Range, typename Body>
00324 void parallel_scan( const Range& range, Body& body ) {
00325 internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
00326 }
00327
00329
00330 template<typename Range, typename Body>
00331 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00332 internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
00333 }
00334
00336
00337 template<typename Range, typename Body>
00338 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00339 internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
00340 }
00342
00343 }
00344
00345 #endif
00346