32 #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
33 #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
40 namespace __gnu_parallel
51 template<
typename RandomAccessIterator>
55 typedef typename traits_type::value_type value_type;
56 typedef typename traits_type::difference_type difference_type;
90 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
113 template<
typename RandomNumberGenerator>
116 {
return rng.genrand_bits(logp); }
120 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
123 RandomNumberGenerator>* pus)
126 typedef typename traits_type::value_type value_type;
127 typedef typename traits_type::difference_type difference_type;
134 difference_type length = sd->
starts[iam + 1] - sd->
starts[iam];
136 difference_type* dist =
new difference_type[sd->
num_bins + 1];
138 value_type** temporaries =
new value_type*[d->
num_threads];
148 for (difference_type i = 0; i < length; ++i)
154 ++(dist[oracle + 1]);
158 sd->
dist[b][iam + 1] = dist[b];
167 __gnu_sequential::partial_sum(sd->
dist[s + 1],
175 for (
bin_index s = 0; s < d->bins_begin; ++s)
176 global_offset += sd->dist[s + 1][d->num_threads];
180 for (
bin_index s = d->bins_begin; s < d->bins_end; ++s)
182 for (
int t = 0; t < d->num_threads + 1; ++t)
183 sd->dist[s + 1][t] += offset;
184 offset = sd->dist[s + 1][d->num_threads];
187 sd->temporaries[iam] =
static_cast<value_type*
>(
188 ::operator
new(
sizeof(value_type) * offset));
193 for (
bin_index b = 0; b < sd->num_bins + 1; ++b)
194 dist[b] = sd->dist[b][iam];
195 for (
bin_index b = 0; b < sd->num_bins; ++b)
196 bin_proc[b] = sd->bin_proc[b];
198 temporaries[t] = sd->temporaries[t];
200 RandomAccessIterator source = sd->source;
201 difference_type start = sd->starts[iam];
204 for (difference_type i = 0; i < length; ++i)
210 ::new(&(temporaries[target_p][dist[target_bin + 1]++]))
211 value_type(*(source + i + start));
217 delete[] temporaries;
222 for (
bin_index b = d->bins_begin; b < d->bins_end; ++b)
225 sd->temporaries[iam] +
226 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
228 sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
230 std::copy(begin, end, sd->source + global_offset +
231 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
234 ::operator
delete(sd->temporaries[iam]);
246 return (T)1 << (
__log2(x - 1) + 1);
256 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
259 RandomAccessIterator end,
261 <RandomAccessIterator>::difference_type n,
263 RandomNumberGenerator& rng)
266 typedef typename traits_type::value_type value_type;
267 typedef typename traits_type::difference_type difference_type;
278 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
282 num_bins_cache = std::max<difference_type>(
283 1, n / (__s.L1_cache_size_lb /
sizeof(value_type)));
288 num_bins = std::min<difference_type>(n, num_bins_cache);
290 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
292 num_bins = std::min<difference_type>(__s.
TLB_size / 2, num_bins);
296 if (num_bins < num_bins_cache)
301 num_bins_cache =
static_cast<bin_index>(std::max<difference_type>(
307 std::min(n, static_cast<difference_type>(num_bins_cache)));
309 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
312 static_cast<difference_type>(__s.
TLB_size / 2), num_bins);
315 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
319 num_threads = std::min<bin_index>(num_threads, num_bins);
321 if (num_threads <= 1)
326 difference_type* starts;
328 # pragma omp parallel num_threads(num_threads)
337 sd.
dist =
new difference_type*[num_bins + 1];
339 for (
bin_index b = 0; b < num_bins + 1; ++b)
340 sd.
dist[b] =
new difference_type[num_threads + 1];
341 for (
bin_index b = 0; b < (num_bins + 1); ++b)
346 starts = sd.
starts =
new difference_type[num_threads + 1];
351 difference_type chunk_length = n / num_threads,
352 split = n % num_threads, start = 0;
353 difference_type bin_chunk_length = num_bins / num_threads,
354 bin_split = num_bins % num_threads;
358 start += (i < split) ? (chunk_length + 1) : chunk_length;
362 bin_cursor += (i < bin_split) ?
363 (bin_chunk_length + 1) : bin_chunk_length;
365 for (; j < bin_cursor; ++j)
371 starts[num_threads] = start;
379 for (
int s = 0; s < (num_bins + 1); ++s)
392 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
395 RandomAccessIterator end,
396 RandomNumberGenerator& rng)
399 typedef typename traits_type::value_type value_type;
400 typedef typename traits_type::difference_type difference_type;
402 difference_type n = end - begin;
407 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
410 std::max<difference_type>
411 (1, n / (__s.L1_cache_size_lb /
sizeof(value_type)));
416 num_bins =
std::min(n, (difference_type)num_bins_cache);
417 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
423 if (num_bins < num_bins_cache)
428 static_cast<bin_index>(std::max<difference_type>(
435 (
std::min(n, static_cast<difference_type>(num_bins_cache)));
437 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
440 std::min<difference_type>(__s.
TLB_size / 2, num_bins);
443 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
447 int num_bits =
__log2(num_bins);
451 value_type* target =
static_cast<value_type*
>(
452 ::operator
new(
sizeof(value_type) * n));
454 difference_type* dist0 =
new difference_type[num_bins + 1],
455 * dist1 =
new difference_type[num_bins + 1];
457 for (
int b = 0; b < num_bins + 1; ++b)
462 for (difference_type i = 0; i < n; ++i)
468 ++(dist0[oracle + 1]);
472 __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
474 for (
int b = 0; b < num_bins + 1; ++b)
478 for (difference_type i = 0; i < n; ++i)
479 ::
new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
481 for (
int b = 0; b < num_bins; ++b)
484 target + dist1[b + 1],
489 std::copy(target, target + n, begin);
494 ::operator
delete(target);
497 __gnu_sequential::random_shuffle(begin, end, rng);
505 template<
typename RandomAccessIterator,
typename RandomNumberGenerator>
508 RandomAccessIterator end,
512 typedef typename traits_type::difference_type difference_type;
513 difference_type n = end - begin;