parallel_gemm.h
TILEDARRAY_FORCE_INLINE void copy_block(Result *const result, const Arg *const arg)
Definition: vector_op.h:219
std::shared_ptr< T > result()
Definition: parallel_gemm.h:114
MatrixBlockTask(const integer rows, const integer cols, const T *const data, const integer ld)
Definition: parallel_gemm.h:74
Definition: array_impl.cpp:28
Definition: parallel_gemm.h:131
std::integral_constant< std::size_t, ~std::size_t(TILEDARRAY_LOOP_UNWIND - 1ul)> index_mask
Definition: vector_op.h:54
GemmTask(blas::Op op_a, blas::Op op_b, const integer m, const integer n, const integer k, const Alpha alpha, const std::shared_ptr< A > &a, const std::shared_ptr< B > &b, const Beta beta, const std::shared_ptr< C > &c, const integer ldc)
Definition: parallel_gemm.h:143
decltype(auto) gemm(const Tile< Left > &left, const Tile< Right > &right, const Scalar factor, const math::GemmHelper &gemm_config)
Contract 2 tensors over head/tail modes and scale the product.
Definition: tile.h:1396