Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
parallel_scan.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16 
17 
18 
19 */
20 
21 #ifndef __TBB_parallel_scan_H
22 #define __TBB_parallel_scan_H
23 
24 #include "task.h"
25 #include "aligned_space.h"
26 #include <new>
27 #include "partitioner.h"
28 
29 namespace tbb {
30 
32 
33 struct pre_scan_tag {
34  static bool is_final_scan() {return false;}
35  operator bool() {return is_final_scan();}
36 };
37 
39 
41  static bool is_final_scan() {return true;}
42  operator bool() {return is_final_scan();}
43 };
44 
46 namespace internal {
47 
49 
50  template<typename Range, typename Body>
51  class final_sum: public task {
52  public:
53  Body my_body;
54  private:
58  public:
59  final_sum( Body& body_ ) :
60  my_body(body_,split())
61  {
62  poison_pointer(my_stuff_last);
63  }
65  my_range.begin()->~Range();
66  }
67  void finish_construction( const Range& range_, Body* stuff_last_ ) {
68  new( my_range.begin() ) Range(range_);
69  my_stuff_last = stuff_last_;
70  }
71  private:
73  my_body( *my_range.begin(), final_scan_tag() );
74  if( my_stuff_last )
75  my_stuff_last->assign(my_body);
76  return NULL;
77  }
78  };
79 
81 
82  template<typename Range, typename Body>
83  class sum_node: public task {
85  public:
86  final_sum_type *my_incoming;
87  final_sum_type *my_body;
89  private:
90  final_sum_type *my_left_sum;
94  Range my_range;
95  sum_node( const Range range_, bool left_is_final_ ) :
96  my_stuff_last(NULL),
97  my_left_sum(NULL),
98  my_left(NULL),
99  my_right(NULL),
100  my_left_is_final(left_is_final_),
101  my_range(range_)
102  {
103  // Poison fields that will be set by second pass.
104  poison_pointer(my_body);
105  poison_pointer(my_incoming);
106  }
107  task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
108  if( !n ) {
109  f.recycle_as_child_of( *this );
110  f.finish_construction( range_, stuff_last_ );
111  return &f;
112  } else {
113  n->my_body = &f;
114  n->my_incoming = incoming_;
115  n->my_stuff_last = stuff_last_;
116  return n;
117  }
118  }
120  if( my_body ) {
121  if( my_incoming )
122  my_left_sum->my_body.reverse_join( my_incoming->my_body );
123  recycle_as_continuation();
124  sum_node& c = *this;
125  task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
126  task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
127  set_ref_count( (a!=NULL)+(b!=NULL) );
128  my_body = NULL;
129  if( a ) spawn(*b);
130  else a = b;
131  return a;
132  } else {
133  return NULL;
134  }
135  }
136  template<typename Range_,typename Body_,typename Partitioner_>
137  friend class start_scan;
138 
139  template<typename Range_,typename Body_>
140  friend class finish_scan;
141  };
142 
144 
145  template<typename Range, typename Body>
146  class finish_scan: public task {
149  final_sum_type** const my_sum;
150  sum_node_type*& my_return_slot;
151  public:
152  final_sum_type* my_right_zombie;
153  sum_node_type& my_result;
154 
156  __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
157  if( my_result.my_left )
158  my_result.my_left_is_final = false;
159  if( my_right_zombie && my_sum )
160  ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
161  __TBB_ASSERT( !my_return_slot, NULL );
162  if( my_right_zombie || my_result.my_right ) {
163  my_return_slot = &my_result;
164  } else {
165  destroy( my_result );
166  }
167  if( my_right_zombie && !my_sum && !my_result.my_right ) {
168  destroy(*my_right_zombie);
169  my_right_zombie = NULL;
170  }
171  return NULL;
172  }
173 
174  finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
175  my_sum(sum_),
176  my_return_slot(return_slot_),
177  my_right_zombie(NULL),
178  my_result(result_)
179  {
180  __TBB_ASSERT( !my_return_slot, NULL );
181  }
182  };
183 
185 
186  template<typename Range, typename Body, typename Partitioner=simple_partitioner>
187  class start_scan: public task {
190  final_sum_type* my_body;
192  final_sum_type** my_sum;
193  sum_node_type** my_return_slot;
195  sum_node_type* my_parent_sum;
198  Range my_range;
199  typename Partitioner::partition_type my_partition;
200  task* execute() __TBB_override ;
201  public:
202  start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
203  my_body(parent_.my_body),
204  my_sum(parent_.my_sum),
205  my_return_slot(&return_slot_),
206  my_parent_sum(parent_sum_),
207  my_is_final(parent_.my_is_final),
208  my_is_right_child(false),
209  my_range(parent_.my_range,split()),
210  my_partition(parent_.my_partition,split())
211  {
212  __TBB_ASSERT( !*my_return_slot, NULL );
213  }
214 
215  start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
216  my_body(&body_),
217  my_sum(NULL),
218  my_return_slot(&return_slot_),
219  my_parent_sum(NULL),
220  my_is_final(true),
221  my_is_right_child(false),
222  my_range(range_),
223  my_partition(partitioner_)
224  {
225  __TBB_ASSERT( !*my_return_slot, NULL );
226  }
227 
228  static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
229  if( !range_.empty() ) {
230  typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
231  internal::sum_node<Range,Body>* root = NULL;
232  final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
233  start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
234  /*my_return_slot=*/root,
235  range_,
236  *temp_body,
237  partitioner_ );
238  temp_body->my_body.reverse_join(body_);
239  task::spawn_root_and_wait( pass1 );
240  if( root ) {
241  root->my_body = temp_body;
242  root->my_incoming = NULL;
243  root->my_stuff_last = &body_;
244  task::spawn_root_and_wait( *root );
245  } else {
246  body_.assign(temp_body->my_body);
247  temp_body->finish_construction( range_, NULL );
248  temp_body->destroy(*temp_body);
249  }
250  }
251  }
252  };
253 
254  template<typename Range, typename Body, typename Partitioner>
256  typedef internal::finish_scan<Range,Body> finish_pass1_type;
257  finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
258  // Inspecting p->result.left_sum would ordinarily be a race condition.
259  // But we inspect it only if we are not a stolen task, in which case we
260  // know that task assigning to p->result.left_sum has completed.
261  bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
262  if( treat_as_stolen ) {
263  // Invocation is for right child that has been really stolen or needs to be virtually stolen
264  p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
265  my_is_final = false;
266  }
267  task* next_task = NULL;
268  if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
269  if( my_is_final )
270  (my_body->my_body)( my_range, final_scan_tag() );
271  else if( my_sum )
272  (my_body->my_body)( my_range, pre_scan_tag() );
273  if( my_sum )
274  *my_sum = my_body;
275  __TBB_ASSERT( !*my_return_slot, NULL );
276  } else {
277  sum_node_type* result;
278  if( my_parent_sum )
279  result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
280  else
281  result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
282  finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
283  // Split off right child
284  start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
285  b.my_is_right_child = true;
286  // Left child is recycling of *this. Must recycle this before spawning b,
287  // otherwise b might complete and decrement c.ref_count() to zero, which
288  // would cause c.execute() to run prematurely.
289  recycle_as_child_of(c);
290  c.set_ref_count(2);
291  c.spawn(b);
292  my_sum = &result->my_left_sum;
293  my_return_slot = &result->my_left;
294  my_is_right_child = false;
295  next_task = this;
296  my_parent_sum = result;
297  __TBB_ASSERT( !*my_return_slot, NULL );
298  }
299  return next_task;
300  }
301 
302  template<typename Range, typename Value, typename Scan, typename ReverseJoin>
304  Value my_sum;
305  const Value& identity_element;
306  const Scan& my_scan;
307  const ReverseJoin& my_reverse_join;
308  public:
309  lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
310  : my_sum(identity)
311  , identity_element(identity)
312  , my_scan(scan)
313  , my_reverse_join(rev_join) {}
314 
316  : my_sum(b.identity_element)
317  , identity_element(b.identity_element)
318  , my_scan(b.my_scan)
319  , my_reverse_join(b.my_reverse_join) {}
320 
321  template<typename Tag>
322  void operator()( const Range& r, Tag tag ) {
323  my_sum = my_scan(r, my_sum, tag);
324  }
325 
327  my_sum = my_reverse_join(a.my_sum, my_sum);
328  }
329 
331  my_sum = b.my_sum;
332  }
333 
334  Value result() const {
335  return my_sum;
336  }
337  };
338 } // namespace internal
340 
341 // Requirements on Range concept are documented in blocked_range.h
342 
360 
362 
363 template<typename Range, typename Body>
364 void parallel_scan( const Range& range, Body& body ) {
366 }
367 
369 
370 template<typename Range, typename Body>
371 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
373 }
374 
376 
377 template<typename Range, typename Body>
378 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
380 }
381 
383 
384 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
385 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
386  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
388  return body.result();
389 }
390 
392 
393 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
394 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
395  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
396  tbb::parallel_scan(range,body,partitioner);
397  return body.result();
398 }
399 
401 
402 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
403 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
404  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
405  tbb::parallel_scan(range,body,partitioner);
406  return body.result();
407 }
408 
410 
411 } // namespace tbb
412 
413 #endif /* __TBB_parallel_scan_H */
414 
sum_node_type * my_parent_sum
#define __TBB_override
Definition: tbb_stddef.h:244
static bool is_final_scan()
Definition: parallel_scan.h:41
sum_node< Range, Body > sum_node_type
T * begin() const
Pointer to beginning of array.
Definition: aligned_space.h:39
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:169
lambda_scan_body(lambda_scan_body &b, split)
task * execute() __TBB_override
Should be overridden by derived classes.
sum_node_type ** my_return_slot
final_sum_type **const my_sum
Base class for types that should not be assigned.
Definition: tbb_stddef.h:324
sum_node_type *& my_return_slot
final_sum_type * my_incoming
Definition: parallel_scan.h:86
final_sum_type * my_body
Definition: parallel_scan.h:87
Split work to be done in the scan.
Definition: parallel_scan.h:83
Used to indicate that the initial scan is being performed.
Definition: parallel_scan.h:33
final_sum_type * my_right_zombie
void operator()(const Range &r, Tag tag)
sum_node< Range, Body > sum_node_type
start_scan(sum_node_type *&return_slot_, const Range &range_, final_sum_type &body_, const Partitioner &partitioner_)
final_sum< Range, Body > final_sum_type
Initial task to split the work.
void const char const char int ITT_FORMAT __itt_group_sync p
final_sum_type ** my_sum
Performs final scan for a leaf.
Definition: parallel_scan.h:51
void reverse_join(lambda_scan_body &a)
void finish_construction(const Range &range_, Body *stuff_last_)
Definition: parallel_scan.h:67
int ref_count() const
The internal reference count.
Definition: task.h:862
task * create_child(const Range &range_, final_sum_type &f, sum_node *n, final_sum_type *incoming_, Body *stuff_last_)
task * execute() __TBB_override
Should be overridden by derived classes.
Body * my_stuff_last
Where to put result of last subrange, or NULL if not last subrange.
Definition: parallel_scan.h:57
final_sum< Range, Body > final_sum_type
static void run(const Range &range_, Body &body_, const Partitioner &partitioner_)
The graph class.
An auto partitioner.
Definition: partitioner.h:629
task * execute() __TBB_override
Should be overridden by derived classes.
void parallel_scan(const Range &range, Body &body)
Parallel prefix with default partitioner.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
void poison_pointer(T *__TBB_atomic &)
Definition: tbb_stddef.h:309
const ReverseJoin & my_reverse_join
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:602
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:636
Partitioner::partition_type my_partition
final_sum_type * my_body
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:781
lambda_scan_body(const Value &identity, const Scan &scan, const ReverseJoin &rev_join)
final_sum_type * my_left_sum
Definition: parallel_scan.h:90
Base class for user-defined tasks.
Definition: task.h:592
void assign(lambda_scan_body &b)
final_sum< Range, Body > final_sum_type
Definition: parallel_scan.h:84
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:72
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:399
void recycle_as_child_of(task &new_parent)
Change this to be a child of new_parent.
Definition: task.h:698
static bool is_final_scan()
Definition: parallel_scan.h:34
Combine partial results.
A simple partitioner.
Definition: partitioner.h:602
Used to indicate that the final scan is being performed.
Definition: parallel_scan.h:40
aligned_space< Range > my_range
Definition: parallel_scan.h:55
sum_node(const Range range_, bool left_is_final_)
Definition: parallel_scan.h:95
finish_scan(sum_node_type *&return_slot_, final_sum_type **sum_, sum_node_type &result_)

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.