// Copyright 2019-2024, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto.  Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.

#ifndef _NVCOMPILER_NUMERIC_EXECUTION_HEADER_
#define _NVCOMPILER_NUMERIC_EXECUTION_HEADER_

#if !defined(_NVCOMPILER_NUMERIC_HEADER_) || \
    !defined(_NVCOMPILER_EXECUTION_HEADER_)
  #error <nvhpc/numeric_execution.hpp> should not be included directly. Include <numeric> and <execution> instead.
#endif

#include <nvhpc/stdpar_config.hpp>

#if _NVHPC_INCLUDE_THRUST
#define __NVCOMPILER_PROCESSING_THRUST_INCLUDES
#include <thrust/adjacent_difference.h>
#include <thrust/inner_product.h>
#include <thrust/reduce.h>
#include <thrust/scan.h>
#include <thrust/transform_reduce.h>
#include <thrust/transform_scan.h>
#undef __NVCOMPILER_PROCESSING_THRUST_INCLUDES
#endif

namespace std {

namespace __stdpar {

// _EP = execution policy
// _FIt = forward iterator
// _RIt = random access iterator
// _UF = unary operator/function
// _BF = binary operator/function

template <__back_end _BE> struct __numeric_impl;

// The sequential back end.  Run the algorithm sequentially.

template <> struct __numeric_impl<__back_end::__seq> {

  //========== adjacent_difference ==========

  template <class _FIt1, class _FIt2>
  static _FIt2 adjacent_difference(__no_policy, _FIt1 __first, _FIt1 __last,
                                   _FIt2 __d_first) {
    return std::adjacent_difference(__first, __last, __d_first);
  }

  template <class _FIt1, class _FIt2, class _BF>
  static _FIt2 adjacent_difference(__no_policy, _FIt1 __first, _FIt1 __last,
                                   _FIt2 __d_first, _BF __f) {
    return std::adjacent_difference(__first, __last, __d_first, __f);
  }

  //========== exclusive_scan ==========

  template <class _FIt1, class _FIt2, class _T>
  static _FIt2 exclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _T __init) {
    while (__first != __last) {
      auto __new_sum = __init + *__first;
      *__d_first = __init;
      __init = std::move(__new_sum);
      ++__first;
      ++__d_first;
    }
    return __d_first;
  }

  template <class _FIt1, class _FIt2, class _T, class _BF>
  static _FIt2 exclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _T __init, _BF __f) {
    while (__first != __last) {
      auto __new_sum = __f(__init, *__first);
      *__d_first = __init;
      __init = std::move(__new_sum);
      ++__first;
      ++__d_first;
    }
    return __d_first;
  }

  //========== inclusive_scan ==========

  template <class _FIt1, class _FIt2>
  static _FIt2 inclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first) {
    return std::partial_sum(__first, __last, __d_first);
  }

  template <class _FIt1, class _FIt2, class _BF>
  static _FIt2 inclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _BF __f) {
    return std::partial_sum(__first, __last, __d_first, __f);
  }

  template <class _FIt1, class _FIt2, class _BF, class _T>
  static _FIt2 inclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _BF __f, _T __init) {
    // There is no C++11 version of this algorithm.
    while (__first != __last) {
      __init = __f(std::move(__init), *__first);
      *__d_first = __init;
      ++__first;
      ++__d_first;
    }
    return __d_first;
  }

  //========== reduce ==========

  template <class _FIt, class _T>
  static _T reduce(__no_policy, _FIt __first, _FIt __last, _T __init) {
    return std::accumulate(__first, __last, __init);
  }

  template <class _FIt, class _T, class _BF>
  static _T reduce(__no_policy, _FIt __first, _FIt __last, _T __init, _BF __f) {
    return std::accumulate(__first, __last, __init, __f);
  }

  //========== transform_exclusive_scan ==========

  template <class _FIt1, class _FIt2, class _T, class _BF, class _UF>
  static _FIt2 transform_exclusive_scan(__no_policy, _FIt1 __first,
                                        _FIt1 __last, _FIt2 __d_first,
                                        _T __init, _BF __fsum, _UF __ft) {
    while (__first != __last) {
      auto __new_sum = __fsum(__init, __ft(*__first));
      *__d_first = __init;
      __init = std::move(__new_sum);
      ++__first;
      ++__d_first;
    }
    return __d_first;
  }

  //========== transform_inclusive_scan ==========

  template <class _FIt1, class _FIt2, class _BF, class _UF>
  static _FIt2 transform_inclusive_scan(__no_policy, _FIt1 __first,
                                        _FIt1 __last, _FIt2 __d_first,
                                        _BF __fsum, _UF __ft) {
    decltype(__ft(*__first)) __sum;
    bool __i0 = true;
    while (__first != __last) {
      if (__i0) {
        __sum = __ft(*__first);
        __i0 = false;
      } else {
        __sum = __fsum(std::move(__sum), __ft(*__first));
      }
      *__d_first = __sum;
      ++__first;
      ++__d_first;
    }
    return __d_first;
  }

  template <class _FIt1, class _FIt2, class _BF, class _UF, class _T>
  static _FIt2 transform_inclusive_scan(__no_policy, _FIt1 __first,
                                        _FIt1 __last, _FIt2 __d_first,
                                        _BF __fsum, _UF __ft, _T __init) {
    while (__first != __last) {
      __init = __fsum(std::move(__init), __ft(*__first));
      *__d_first = __init;
      ++__first;
      ++__d_first;
    }
    return __d_first;
  }
  
  //========== transform_reduce ==========

  template <class _FIt1, class _FIt2, class _T>
  static _T transform_reduce(__no_policy, _FIt1 __first1, _FIt1 __last1,
                             _FIt2 __first2, _T __init) {
    return std::inner_product(__first1, __last1, __first2, __init);
  }

  template <class _FIt1, class _FIt2, class _T, class _BF1, class _BF2>
  static _T transform_reduce(__no_policy, _FIt1 __first1, _FIt1 __last1,
                             _FIt2 __first2, _T __init, _BF1 __f1, _BF2 __f2) {
    return std::inner_product(__first1, __last1, __first2, __init, __f1, __f2);
  }

  template <class _FIt, class _T, class _BF, class _UF>
  static _T transform_reduce(__no_policy, _FIt __first, _FIt __last, _T __init,
                             _BF __f1, _UF __f2) {
    return std::inner_product(
      __first, __last, __first, __init, __f1,
      [=](typename std::iterator_traits<_FIt>::value_type __x,
          typename std::iterator_traits<_FIt>::value_type __y) mutable {
        return __f2(__x);
      });
  }
};

// The Thrust parallel back end.  This is used for both CPU and GPU.  The
// correct Thrust execution policy to use to choose between CPU and GPU is a
// template parameter.

template <> struct __numeric_impl<__back_end::__thrust_multicore> {

#if _NVHPC_INCLUDE_THRUST

  //========== adjacent_difference ==========

  template <class _EP, class _FIt1, class _FIt2>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 adjacent_difference(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                                   _FIt2 __d_first) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    return thrust::adjacent_difference((_EP&&)__ep, __first, __last, __d_first);
  }

  template <class _EP, class _FIt1, class _FIt2, class _BF>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 adjacent_difference(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                                   _FIt2 __d_first, _BF __f) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    return thrust::adjacent_difference((_EP&&)__ep, __first, __last, __d_first,
                                       __f);
  }

  //========== exclusive_scan ==========

  template <class _EP, class _FIt1, class _FIt2, class _T>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 exclusive_scan(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    return thrust::exclusive_scan((_EP&&)__ep, __first, __last, __d_first,
                                  __init);
  }
  
  template <class _EP, class _FIt1, class _FIt2, class _T, class _BF>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 exclusive_scan(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _T __init, _BF __f) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    return thrust::exclusive_scan((_EP&&)__ep, __first, __last, __d_first,
                                  __init, __f);
  }

  //========== inclusive_scan ==========

  template <class _EP, class _FIt1, class _FIt2>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 inclusive_scan(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    return thrust::inclusive_scan((_EP&&)__ep, __first, __last, __d_first);
  }
  
  template <class _EP, class _FIt1, class _FIt2, class _BF>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 inclusive_scan(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _BF __f) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    return thrust::inclusive_scan((_EP&&)__ep, __first, __last, __d_first, __f);
  }

#if THRUST_VERSION >= 200700
  template <class _EP, class _FIt1, class _FIt2, class _BF, class _T>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 inclusive_scan(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _BF __f, _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    return thrust::inclusive_scan((_EP&&)__ep, __first, __last, __d_first,
                                  __init, __f);
  }
#endif

  //========== reduce ==========

  template <class _EP, class _FIt, class _T>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _T reduce(_EP&& __ep, _FIt __first, _FIt __last, _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt);
    return thrust::reduce((_EP&&)__ep, __first, __last, __init);
  }

  template <class _EP, class _FIt, class _T, class _BF>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _T reduce(_EP&& __ep, _FIt __first, _FIt __last, _T __init, _BF __f) {
    _ASSERT_RANDOM_ACCESS(_FIt);
    _ASSERT_NOT_FUNC_PTR(_BF);
    return thrust::reduce((_EP&&)__ep, __first, __last, __init, __f);
  }

  //========== transform_exclusive_scan ==========

  template <class _EP, class _FIt1, class _FIt2, class _T, class _BF, class _UF>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 transform_exclusive_scan(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                                        _FIt2 __d_first, _T __init, _BF __fsum,
                                        _UF __ft) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    _ASSERT_NOT_FUNC_PTR(_UF);
    // The Thrust function changes the order of the last three parameters.
    return thrust::transform_exclusive_scan((_EP&&)__ep, __first, __last,
                                            __d_first, __ft, __init, __fsum);
  }

  //========== transform_inclusive_scan ==========

  template <class _EP, class _FIt1, class _FIt2, class _BF, class _UF>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 transform_inclusive_scan(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                                        _FIt2 __d_first, _BF __fsum, _UF __ft) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    _ASSERT_NOT_FUNC_PTR(_UF);
    // The Thrust function reverses the order of the last two parameters.
    return thrust::transform_inclusive_scan((_EP&&)__ep, __first, __last,
                                            __d_first, __ft, __fsum);
  }

#if THRUST_VERSION >= 200800
  template <class _EP, class _FIt1, class _FIt2, class _BF, class _UF, class _T>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _FIt2 transform_inclusive_scan(_EP&& __ep, _FIt1 __first, _FIt1 __last,
                                        _FIt2 __d_first, _BF __fsum, _UF __ft,
                                        _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    _ASSERT_NOT_FUNC_PTR(_UF);
    return thrust::transform_inclusive_scan((_EP&&)__ep, __first, __last,
                                            __d_first, __ft, __init, __fsum);
  }
#endif
  
  //========== transform_reduce ==========

  template <class _EP, class _FIt1, class _FIt2, class _T>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _T transform_reduce(_EP&& __ep, _FIt1 __first1, _FIt1 __last1,
                             _FIt2 __first2, _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    return thrust::inner_product((_EP&&)__ep, __first1, __last1, __first2,
                                 __init);
  }

  template <class _EP, class _FIt1, class _FIt2, class _T, class _BF1,
            class _BF2>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _T transform_reduce(_EP&& __ep, _FIt1 __first1, _FIt1 __last1,
                             _FIt2 __first2, _T __init, _BF1 __f1, _BF2 __f2) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF1);
    _ASSERT_NOT_FUNC_PTR(_BF2);
    return thrust::inner_product((_EP&&)__ep, __first1, __last1, __first2,
                                 __init, __f1, __f2);
  }

  template <class _EP, class _FIt, class _T, class _BF, class _UF>
  _NVHPC_PARALLEL_IMPL_THRUST
  static _T transform_reduce(_EP&& __ep, _FIt __first, _FIt __last, _T __init,
                             _BF __f1, _UF __f2) {
    _ASSERT_RANDOM_ACCESS(_FIt);
    _ASSERT_NOT_FUNC_PTR(_BF);
    _ASSERT_NOT_FUNC_PTR(_UF);
    return thrust::transform_reduce((_EP&&)__ep, __first, __last, __f2, __init,
                                    __f1);
  }

#endif // _NVHPC_INCLUDE_THRUST
};

template <>
struct __numeric_impl<__back_end::__thrust_gpu>
    : __numeric_impl<__back_end::__thrust_multicore> { };

// The __gpu_multicore back end is empty because it is treated specially by the
// dispatch framework.
template <> struct __numeric_impl<__back_end::__gpu_multicore> { };

// A logical-or fold operation.  C++17 is not guaranteed, so a fold expression
// can't be used here.
template <bool...> struct __logical_or : std::false_type { };
template <bool _B0, bool... _B_rest>
struct __logical_or<_B0, _B_rest...>
    : std::integral_constant<bool, _B0 || __logical_or<_B_rest...>::value> { };

// Is the type _Op the same type as _OpTmpl<void> or any of _OpTmpl<_ArgTypes>?
// This is used to check if a reduction operation is one of the standard
// operators, such as std::plus, that can be implemented in OpenACC.
template <class _Op, template <class> class _OpTmpl, class... _ArgTypes>
struct __op_is_instance_of
    : std::integral_constant<
        bool,
        std::is_same<typename std::decay<_Op>::type, _OpTmpl<void>>::value ||
          __logical_or<std::is_same<typename std::decay<_Op>::type,
                                    _OpTmpl<_ArgTypes>>::value...>::value> { };

}}

#if __NVCOMPILER_STDPAR_OPENACC_GPU || _NVHPC_STDPAR_GPU
#include <nvhpc/numeric_openacc.hpp>
#endif

namespace std { namespace __stdpar {

// The OpenACC back end.  Only some algorithms are implemented.

template <> struct __numeric_impl<__back_end::__openacc> {

#if __NVCOMPILER_STDPAR_OPENACC_GPU || _NVHPC_STDPAR_GPU

  //========== exclusive_scan ==========

#if _NVHPC_STDPAR_ACC_EXPERIMENTAL
  template <class _FIt1, class _FIt2, class _T>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _FIt2 exclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    return __openacc::exclusive_scan(__first, __last, __d_first, __init,
                                     std::plus<_T>());
  }

  template <class _FIt1, class _FIt2, class _T, class _BF>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _FIt2 exclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _T __init, _BF __f) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    return __openacc::exclusive_scan(__first, __last, __d_first, __init, __f);
  }
#endif

  //========== inclusive_scan ==========

#if _NVHPC_STDPAR_ACC_EXPERIMENTAL
  template <class _FIt1, class _FIt2>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _FIt2 inclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    return __openacc::inclusive_scan(
      __first, __last, __d_first,
      std::plus<typename std::iterator_traits<_FIt1>::value_type>());
  }

  template <class _FIt1, class _FIt2, class _BF>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _FIt2 inclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _BF __f) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    return __openacc::inclusive_scan(__first, __last, __d_first, __f);
  }

  template <class _FIt1, class _FIt2, class _BF, class _T>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _FIt2 inclusive_scan(__no_policy, _FIt1 __first, _FIt1 __last,
                              _FIt2 __d_first, _BF __f, _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    return __openacc::inclusive_scan(__first, __last, __d_first, __f, __init);
  }
#endif

  //========== reduce ==========

  template <class _FIt, class _T>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _T reduce(__no_policy, _FIt __first, _FIt __last, _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt);
    return __openacc::reduce(__first, __last, __init, std::plus<_T>{});
  }

  template <class _FIt, class _T, class _BF>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _T reduce(__no_policy, _FIt __first, _FIt __last, _T __init, _BF __f) {
    _ASSERT_RANDOM_ACCESS(_FIt);
    _ASSERT_NOT_FUNC_PTR(_BF);
    return __openacc::reduce(__first, __last, __init, __f);
  }

  //========== transform_exclusive_scan ==========

#if _NVHPC_STDPAR_ACC_EXPERIMENTAL
  template <class _FIt1, class _FIt2, class _T, class _BF, class _UF>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _FIt2 transform_exclusive_scan(__no_policy, _FIt1 __first,
                                        _FIt1 __last, _FIt2 __d_first,
                                        _T __init, _BF __fsum, _UF __ft) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    _ASSERT_NOT_FUNC_PTR(_UF);
    return __openacc::transform_exclusive_scan(__first, __last, __d_first,
                                               __init, __fsum, __ft);
  }
#endif

  //========== transform_inclusive_scan ==========

#if _NVHPC_STDPAR_ACC_EXPERIMENTAL
  template <class _FIt1, class _FIt2, class _BF, class _UF>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _FIt2 transform_inclusive_scan(__no_policy, _FIt1 __first,
                                        _FIt1 __last, _FIt2 __d_first,
                                        _BF __fsum, _UF __ft) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    _ASSERT_NOT_FUNC_PTR(_UF);
    return __openacc::transform_inclusive_scan(__first, __last, __d_first,
                                               __fsum, __ft);
  }

  template <class _FIt1, class _FIt2, class _BF, class _UF, class _T>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _FIt2 transform_inclusive_scan(__no_policy, _FIt1 __first,
                                        _FIt1 __last, _FIt2 __d_first,
                                        _BF __fsum, _UF __ft, _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF);
    _ASSERT_NOT_FUNC_PTR(_UF);
    return __openacc::transform_inclusive_scan(__first, __last, __d_first,
                                               __fsum, __ft, __init);
  }
#endif

  //========== transform_reduce ==========

  template <class _FIt1, class _FIt2, class _T>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _T transform_reduce(__no_policy, _FIt1 __first1, _FIt1 __last1,
                             _FIt2 __first2, _T __init) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    return __openacc::transform_reduce(
      __first1, __last1, __first2, __init, std::plus<_T>{},
      std::multiplies<typename std::common_type<
        typename std::iterator_traits<_FIt1>::value_type,
        typename std::iterator_traits<_FIt2>::value_type>::type>{});
  }

  template <class _FIt1, class _FIt2, class _T, class _BF1, class _BF2>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _T transform_reduce(__no_policy, _FIt1 __first1, _FIt1 __last1,
                             _FIt2 __first2, _T __init, _BF1 __fsum,
                             _BF2 __ft) {
    _ASSERT_RANDOM_ACCESS(_FIt1);
    _ASSERT_RANDOM_ACCESS(_FIt2);
    _ASSERT_NOT_FUNC_PTR(_BF1);
    _ASSERT_NOT_FUNC_PTR(_BF2);
    return __openacc::transform_reduce(__first1, __last1, __first2, __init,
                                       __fsum, __ft);
  }

  template <class _FIt, class _T, class _BF, class _UF>
  _NVHPC_PARALLEL_IMPL_OPENACC
  static _T transform_reduce(__no_policy, _FIt __first, _FIt __last, _T __init,
                             _BF __fsum, _UF __ft) {
    _ASSERT_RANDOM_ACCESS(_FIt);
    _ASSERT_NOT_FUNC_PTR(_BF);
    _ASSERT_NOT_FUNC_PTR(_UF);
    return __openacc::transform_reduce(__first, __last, __init, __fsum, __ft);
  }

#endif // __NVCOMPILER_STDPAR_OPENACC_GPU || _NVHPC_STDPAR_GPU
};

// The __openacc_errors back end exists to produce better compilation errors
// is some cases when an unsupported algorithm call is directed to the OpenACC
// back end.

template <> struct __numeric_impl<__back_end::__openacc_errors> {

#if __NVCOMPILER_STDPAR_OPENACC_GPU || _NVHPC_STDPAR_GPU

  //========== reduce ==========

  template <class _FIt, class _T>
  static _T reduce(__no_policy, _FIt __first, _FIt __last, _T __init) {
    static_assert(std::is_scalar<_T>::value,
                  "std::reduce(nv::execution::openacc_par, ...) is only "
                  "implemented for scalar types");
    return __init;
  }

  template <class _FIt, class _T, class _BF>
  static _T reduce(__no_policy, _FIt __first, _FIt __last, _T __init, _BF __f) {
    static_assert(std::is_scalar<_T>::value,
                  "std::reduce(nv::execution::openacc_par, ...) is only "
                  "implemented for scalar types");
    static_assert(__op_is_instance_of<_BF, std::plus, _T>::value,
                  "std::reduce(nv::execution::openacc_par, ...) is only "
                  "implemented when the reduction operation is std::plus");
    return __init;
  }

  //========== transform_reduce ==========

  template <class _FIt1, class _FIt2, class _T>
  static _T transform_reduce(__no_policy, _FIt1 __first1, _FIt1 __last1,
                             _FIt2 __first2, _T __init) {
    static_assert(std::is_scalar<_T>::value,
                  "std::transform_reduce(nv::execution::openacc_par, ...) is "
                  "only implemented for scalar types");
    return __init;
  }

  template <class _FIt1, class _FIt2, class _T, class _BF1, class _BF2>
  static _T transform_reduce(__no_policy, _FIt1 __first1, _FIt1 __last1,
                             _FIt2 __first2, _T __init, _BF1 __f1, _BF2 __f2) {
    static_assert(std::is_scalar<_T>::value,
                  "std::transform_reduce(nv::execution::openacc_par, ...) is "
                  "only implemented for scalar types");
    static_assert(__op_is_instance_of<_BF1, std::plus, _T>::value,
                  "std::transform_reduce(nv::execution::openacc_par, ...) is "
                  "only implemented when the reduction operation is std::plus");
    return __init;
  }

  template <class _FIt, class _T, class _BF, class _UF>
  static _T transform_reduce(__no_policy, _FIt __first, _FIt __last, _T __init,
                             _BF __f1, _UF __f2) {
    static_assert(std::is_scalar<_T>::value,
                  "std::transform_reduce(nv::execution::openacc_par, ...) is "
                  "only implemented for scalar types");
    static_assert(__op_is_instance_of<_BF, std::plus, _T>::value,
                  "std::transform_reduce(nv::execution::openacc_par, ...) is "
                  "only implemented when the reduction operation is std::plus");
    return __init;
  }

#endif // __NVCOMPILER_STDPAR_OPENACC_GPU || _NVHPC_STDPAR_GPU
};

// Each overload of each parallel algorithm has a helper class that packages up
// the argument so they can be passed around within the dispatch framework.

#define _NVHPC_NUMERIC_CALL_IS_VALID(call) \
  _NVHPC_CALL_IS_VALID(__numeric_impl, call)

//========== adjacent_difference ==========

template <class _FIt1, class _FIt2>
struct __call_adjacent_difference {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::adjacent_difference(
        __policy_for<_BE>::__policy(), __first, __last, __d_first);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      adjacent_difference(__policy_for<_BE>::__policy(), std::declval<_FIt1>(),
                          std::declval<_FIt1>(), std::declval<_FIt2>()));
};

template <class _FIt1, class _FIt2, class _BF>
struct __call_adjacent_difference_pred {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  _BF __f;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::adjacent_difference(
        __policy_for<_BE>::__policy(), __first, __last, __d_first, __f);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      adjacent_difference(__policy_for<_BE>::__policy(), std::declval<_FIt1>(),
                          std::declval<_FIt1>(), std::declval<_FIt2>(),
                          std::declval<_BF>()));
};

//========== exclusive_scan ==========

template <class _FIt1, class _FIt2, class _T>
struct __call_exclusive_scan {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  _T __init;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::exclusive_scan(
        __policy_for<_BE>::__policy(), __first, __last, __d_first, __init);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      exclusive_scan(__policy_for<_BE>::__policy(), std::declval<_FIt1>(),
                     std::declval<_FIt1>(), std::declval<_FIt2>(),
                     std::declval<_T>()));
};

template <class _FIt1, class _FIt2, class _T, class _BF>
struct __call_exclusive_scan_op {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  _T __init;
  _BF __f;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::exclusive_scan(
        __policy_for<_BE>::__policy(), __first, __last, __d_first, __init, __f);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      exclusive_scan(__policy_for<_BE>::__policy(), std::declval<_FIt1>(),
                     std::declval<_FIt1>(), std::declval<_FIt2>(),
                     std::declval<_T>(), std::declval<_BF>()));
};

//========== inclusive_scan ==========

template <class _FIt1, class _FIt2>
struct __call_inclusive_scan {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::inclusive_scan(
        __policy_for<_BE>::__policy(), __first, __last, __d_first);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      inclusive_scan(__policy_for<_BE>::__policy(), std::declval<_FIt1>(),
                     std::declval<_FIt1>(), std::declval<_FIt2>()));
};

template <class _FIt1, class _FIt2, class _BF>
struct __call_inclusive_scan_op {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  _BF __f;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::inclusive_scan(
        __policy_for<_BE>::__policy(), __first, __last, __d_first, __f);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      inclusive_scan(__policy_for<_BE>::__policy(), std::declval<_FIt1>(),
                     std::declval<_FIt1>(), std::declval<_FIt2>(),
                     std::declval<_BF>()));
};

template <class _FIt1, class _FIt2, class _BF, class _T>
struct __call_inclusive_scan_op_init {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  _BF __f;
  _T __init;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::inclusive_scan(
        __policy_for<_BE>::__policy(), __first, __last, __d_first, __f, __init);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      inclusive_scan(__policy_for<_BE>::__policy(), std::declval<_FIt1>(),
                     std::declval<_FIt1>(), std::declval<_FIt2>(),
                     std::declval<_BF>(), std::declval<_T>()));
};

//========== reduce ==========

template <class _FIt, class _T>
struct __call_reduce {
  _FIt __first;
  _FIt __last;
  _T __init;
  using __return_type = _T;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::reduce(
        __policy_for<_BE>::__policy(), __first, __last, __init);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      reduce(__policy_for<_BE>::__policy(), std::declval<_FIt>(),
             std::declval<_FIt>(), std::declval<_T>()));
};

template <class _FIt, class _T, class _BF>
struct __call_reduce_op {
  _FIt __first;
  _FIt __last;
  _T __init;
  _BF __f;
  using __return_type = _T;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::reduce(
        __policy_for<_BE>::__policy(), __first, __last, __init, __f);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      reduce(__policy_for<_BE>::__policy(), std::declval<_FIt>(),
             std::declval<_FIt>(), std::declval<_T>(), std::declval<_BF>()));
};

//========== transform_exclusive_scan ==========

template <class _FIt1, class _FIt2, class _T, class _BF, class _UF>
struct __call_transform_exclusive_scan {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  _T __init;
  _BF __fsum;
  _UF __ft;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::transform_exclusive_scan(
        __policy_for<_BE>::__policy(), __first, __last, __d_first, __init,
        __fsum, __ft);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      transform_exclusive_scan(__policy_for<_BE>::__policy(),
                               std::declval<_FIt1>(), std::declval<_FIt1>(),
                               std::declval<_FIt2>(), std::declval<_T>(),
                               std::declval<_BF>(), std::declval<_UF>()));
};

//========== transform_inclusive_scan ==========

template <class _FIt1, class _FIt2, class _BF, class _UF>
struct __call_transform_inclusive_scan {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  _BF __fsum;
  _UF __ft;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::transform_inclusive_scan(
        __policy_for<_BE>::__policy(), __first, __last, __d_first, __fsum,
        __ft);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      transform_inclusive_scan(__policy_for<_BE>::__policy(),
                               std::declval<_FIt1>(), std::declval<_FIt1>(),
                               std::declval<_FIt2>(), std::declval<_BF>(),
                               std::declval<_UF>()));
};

template <class _FIt1, class _FIt2, class _BF, class _UF, class _T>
struct __call_transform_inclusive_scan_init {
  _FIt1 __first;
  _FIt1 __last;
  _FIt2 __d_first;
  _BF __fsum;
  _UF __ft;
  _T __init;
  using __return_type = _FIt2;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::transform_inclusive_scan(
        __policy_for<_BE>::__policy(), __first, __last, __d_first, __fsum,
        __ft, __init);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      transform_inclusive_scan(__policy_for<_BE>::__policy(),
                               std::declval<_FIt1>(), std::declval<_FIt1>(),
                               std::declval<_FIt2>(), std::declval<_BF>(),
                               std::declval<_UF>(), std::declval<_T>()));
};

//========== transform_reduce ==========

template <class _FIt1, class _FIt2, class _T>
struct __call_transform_reduce {
  _FIt1 __first1;
  _FIt1 __last1;
  _FIt2 __first2;
  _T __init;
  using __return_type = _T;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::transform_reduce(
        __policy_for<_BE>::__policy(), __first1, __last1, __first2, __init);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      transform_reduce(__policy_for<_BE>::__policy(), std::declval<_FIt1>(),
                       std::declval<_FIt1>(), std::declval<_FIt2>(),
                       std::declval<_T>()));
};

template <class _FIt1, class _FIt2, class _T, class _BF1, class _BF2>
struct __call_transform_reduce_ops {
  _FIt1 __first1;
  _FIt1 __last1;
  _FIt2 __first2;
  _T __init;
  _BF1 __f1;
  _BF2 __f2;
  using __return_type = _T;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::transform_reduce(
        __policy_for<_BE>::__policy(), __first1, __last1, __first2, __init, __f1, __f2);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      transform_reduce(__policy_for<_BE>::__policy(), std::declval<_FIt1>(),
                       std::declval<_FIt1>(), std::declval<_FIt2>(),
                       std::declval<_T>(), std::declval<_BF1>(),
                       std::declval<_BF2>()));
};

template <class _FIt, class _T, class _BF, class _UF>
struct __call_transform_reduce_1range {
  _FIt __first;
  _FIt __last;
  _T __init;
  _BF __f1;
  _UF __f2;
  using __return_type = _T;
  template <__back_end _BE>
  _NVHPC_PARALLEL_FRAMEWORK_IMPL __return_type __call() const {
    return __numeric_impl<_BE>::transform_reduce(
        __policy_for<_BE>::__policy(), __first, __last, __init, __f1, __f2);
  }
  _NVHPC_NUMERIC_CALL_IS_VALID(
      transform_reduce(__policy_for<_BE>::__policy(), std::declval<_FIt>(),
                       std::declval<_FIt>(), std::declval<_T>(),
                       std::declval<_BF>(), std::declval<_UF>()));
};


} // namespace __stdpar

// The definitions of the standard parallel algorithms.

//========== adjacent_difference ==========

template <class _EP, class _FIt1, class _FIt2>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
adjacent_difference(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_adjacent_difference<_FIt1, _FIt2>{
            __first, __last, __d_first});
  } catch (...) { std::terminate(); }
}

template <class _EP, class _FIt1, class _FIt2, class _BF>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
adjacent_difference(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first,
                    _BF __f) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_adjacent_difference_pred<_FIt1, _FIt2, _BF>{
            __first, __last, __d_first, __f});
  } catch (...) { std::terminate(); }
}

//========== exclusive_scan ==========

template <class _EP, class _FIt1, class _FIt2, class _T>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
exclusive_scan(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first, _T __init) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_exclusive_scan<_FIt1, _FIt2, _T>{
            __first, __last, __d_first, __init});
  } catch (...) { std::terminate(); }
}

template <class _EP, class _FIt1, class _FIt2, class _T, class _BF>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
exclusive_scan(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first, _T __init,
               _BF __f) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_exclusive_scan_op<_FIt1, _FIt2, _T, _BF>{
            __first, __last, __d_first, __init, __f});
  } catch (...) { std::terminate(); }
}

//========== inclusive_scan ==========

template <class _EP, class _FIt1, class _FIt2>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
inclusive_scan(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_inclusive_scan<_FIt1, _FIt2>{
            __first, __last, __d_first});
  } catch (...) { std::terminate(); }
}

template <class _EP, class _FIt1, class _FIt2, class _BF>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
inclusive_scan(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first, _BF __f) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_inclusive_scan_op<_FIt1, _FIt2, _BF>{
            __first, __last, __d_first, __f});
  } catch (...) { std::terminate(); }
} 

template <class _EP, class _FIt1, class _FIt2, class _BF, class _T>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
inclusive_scan(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first, _BF __f,
               _T __init) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_inclusive_scan_op_init<_FIt1, _FIt2, _BF, _T>{
            __first, __last, __d_first, __f, __init});
  } catch (...) { std::terminate(); }
}

//========== reduce ==========

template <class _EP, class _FIt>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, typename std::iterator_traits<_FIt>::value_type>
reduce(_EP&&, _FIt __first, _FIt __last) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
      __stdpar::__call_reduce<_FIt,
                              typename std::iterator_traits<_FIt>::value_type>{
        __first, __last, typename std::iterator_traits<_FIt>::value_type{}});
  } catch (...) { std::terminate(); }
}

template <class _EP, class _FIt, class _T>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _T>
reduce(_EP&&, _FIt __first, _FIt __last, _T __init) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_reduce<_FIt, _T>{__first, __last, __init});
  } catch (...) { std::terminate(); }
}

template <class _EP, class _FIt, class _T, class _BF>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _T>
reduce(_EP&&, _FIt __first, _FIt __last, _T __init, _BF __f) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_reduce_op<_FIt, _T, _BF>{
            __first, __last, __init, __f});
  } catch (...) { std::terminate(); }
}

//========== transform_exclusive_scan ==========

template <class _EP, class _FIt1, class _FIt2, class _T, class _BF, class _UF>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
transform_exclusive_scan(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first,
                         _T __init,_BF __fsum, _UF __ft) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_transform_exclusive_scan<_FIt1, _FIt2, _T, _BF, _UF>{
            __first, __last, __d_first, __init, __fsum, __ft});
  } catch (...) { std::terminate(); }
}

//========== transform_inclusive_scan ==========

template <class _EP, class _FIt1, class _FIt2, class _BF, class _UF>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
transform_inclusive_scan(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first,
                         _BF __fsum, _UF __ft) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_transform_inclusive_scan<_FIt1, _FIt2, _BF, _UF>{
            __first, __last, __d_first, __fsum, __ft});
  } catch (...) { std::terminate(); }
}

template <class _EP, class _FIt1, class _FIt2, class _BF, class _UF, class _T>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _FIt2>
transform_inclusive_scan(_EP&&, _FIt1 __first, _FIt1 __last, _FIt2 __d_first,
                         _BF __fsum, _UF __ft, _T __init) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_transform_inclusive_scan_init<_FIt1, _FIt2, _BF, _UF,
                                                       _T>{
          __first, __last, __d_first, __fsum, __ft, __init});
  } catch (...) { std::terminate(); }
}

//========== transform_reduce ==========

template <class _EP, class _FIt1, class _FIt2, class _T>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _T>
transform_reduce(_EP&&, _FIt1 __first1, _FIt1 __last1, _FIt2 __first2,
                 _T __init) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
         __stdpar::__call_transform_reduce<_FIt1, _FIt2, _T>{
             __first1, __last1, __first2, __init});
  } catch (...) { std::terminate(); }
}

template <class _EP, class _FIt1, class _FIt2, class _T, class _BF1, class _BF2>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _T>
transform_reduce(_EP&&, _FIt1 __first1, _FIt1 __last1, _FIt2 __first2,
                 _T __init, _BF1 __f1, _BF2 __f2) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_transform_reduce_ops<_FIt1, _FIt2, _T, _BF1, _BF2>{
            __first1, __last1, __first2, __init, __f1, __f2});
  } catch (...) { std::terminate(); }
}

template <class _EP, class _FIt, class _T, class _BF, class _UF>
_NVHPC_PARALLEL_ALGORITHM
__stdpar::__enable_if_EP<_EP, _T>
transform_reduce(_EP&&, _FIt __first, _FIt __last, _T __init, _BF __f1,
                 _UF __f2) {
  try {
    _NVHPC_STDPAR_NVTX_RANGE
    return __stdpar::__dispatch<_EP>(
        __stdpar::__call_transform_reduce_1range<_FIt, _T, _BF, _UF>{
            __first, __last, __init, __f1, __f2});
  } catch (...) { std::terminate(); }
}

} // namespace std

#endif
