reduce_kernel_impl.h
__host__ __device__ T operator()(const T &x) const
Definition: reduce_kernel_impl.h:42
T reduce_cuda_kernel_impl(ReduceOp &&op, const T *arg, std::size_t n, T init, cudaStream_t stream, int device_id)
T = reduce(T* arg)
Definition: reduce_kernel_impl.h:51
T min_reduce_cuda_kernel_impl(const T *arg, std::size_t n, cudaStream_t stream, int device_id)
Definition: reduce_kernel_impl.h:88
T max_reduce_cuda_kernel_impl(const T *arg, std::size_t n, cudaStream_t stream, int device_id)
Definition: reduce_kernel_impl.h:80
T product_reduce_cuda_kernel_impl(const T *arg, std::size_t n, cudaStream_t stream, int device_id)
Definition: reduce_kernel_impl.h:64
KroneckerDeltaTile< _N >::numeric_type max(const KroneckerDeltaTile< _N > &arg)
T absmax_reduce_cuda_kernel_impl(const T *arg, std::size_t n, cudaStream_t stream, int device_id)
Definition: reduce_kernel_impl.h:96
T sum_reduce_cuda_kernel_impl(const T *arg, std::size_t n, cudaStream_t stream, int device_id)
Definition: reduce_kernel_impl.h:72
Definition: array_impl.cpp:28
T absmin_reduce_cuda_kernel_impl(const T *arg, std::size_t n, cudaStream_t stream, int device_id)
Definition: reduce_kernel_impl.h:113