4coder/test_data/lots_of_files/amp.h

7555 lines
298 KiB
C
Raw Normal View History

2018-03-16 18:19:11 +00:00
/***
* ==++==
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
* ==--==
* =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
*
* amp.h
*
* C++ AMP Library
*
* =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
****/
#pragma once
#include <crtdbg.h>
#include <vector>
#include <iterator>
#include <future>
#include <amprt.h>
#include <xxamp.h>
#include <type_traits>
#define _AMP_H
#pragma pack(push,8)
namespace Concurrency
{
/// <summary>
/// Define an N-dimensional index point; which may also be viewed as a vector
/// based at the origin in N-space.
///
/// The index&lt;N&gt; type represents an N-dimensional vector of int which specifies
/// a unique position in an N-dimensional space. The values in the coordinate
/// vector are ordered from most-significant to least-significant. Thus, in
/// 2-dimensional space, the index vector (5,3) represents the position at
/// row 5, column 3.
///
/// The position is relative to the origin in the N-dimensional space, and can
/// contain negative component values.
/// </summary>
///
/// <param name="_Rank">
/// The dimensionality space into which this index applies, can be any integer
/// greater than 0.
/// </param>
template <int _Rank> class index
{
public:
_CPP_AMP_VERIFY_RANK(_Rank, index);
template <typename _Value_type, int _Rank>
friend class array;
template <int _Rank, int _Element_size>
friend class details::_Array_view_shape;
template <int _Rank, int _Element_size>
friend class details::_Array_view_base;
static const int rank = _Rank;
typedef int value_type;
/// <summary>
/// Default constructor, initializes all elements with 0.
/// </summary>
index() __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opAssign>::func(*this, 0);
}
/// <summary>
/// Copy Constructor.
/// </summary>
/// <param name="_Other">
/// The object to copy from
/// </param>
index(const index<_Rank>& _Other) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opAssign>::func(*this, _Other);
}
/// <summary>
/// Constructor for index&lt;1&gt;
/// </summary>
/// <param name="_I">
/// The value for initialization
/// </param>
explicit index(int _I) __GPU
{
static_assert(_Rank == 1, "This constructor can only be used to construct an index<1> object.");
details::_compound_assign_op_loop_helper<index<_Rank>, details::opAssign>::func(*this, _I);
}
/// <summary>
/// Constructor for index&lt;2&gt;
/// </summary>
/// <param name="_I0">
/// The index value for dimension 0
/// </param>
/// <param name="_I1">
/// The index value for dimension 1
/// </param>
index(int _I0, int _I1) __GPU
{
static_assert(_Rank == 2, "This constructor can only be used to construct an index<2> object.");
_M_base[0] = _I0;
_M_base[1] = _I1;
}
/// <summary>
/// Constructor for index&lt;3&gt;
/// </summary>
/// <param name="_I0">
/// The index value for dimension 0
/// </param>
/// <param name="_I1">
/// The index value for dimension 1
/// </param>
/// <param name="_I2">
/// The index value for dimension 2
/// </param>
index(int _I0, int _I1, int _I2) __GPU
{
static_assert(_Rank == 3, "This constructor can only be used to construct an index<3> object.");
_M_base[0] = _I0;
_M_base[1] = _I1;
_M_base[2] = _I2;
}
/// <summary>
/// Constructs an index&lt;N&gt; with the coordinate values provided the array
/// of int component values. If the coordinate array length is not N,
/// the behavior is undefined.
/// </summary>
/// <param name="_Array">
/// A single-dimensional array with _Rank elements.
/// </param>
explicit index(const int _Array[_Rank]) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opAssign>::func(*this, _Array);
}
/// <summary>
/// copy-assignment operators
/// </summary>
index<_Rank>& operator=(const index<_Rank>& _Other) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opAssign>::func(*this, _Other);
return *this;
}
/// <summary>
/// Index operator.
/// </summary>
/// <param name="_Index">
/// An integral value between 0 and _Rank-1.
/// </param>
/// <returns>
/// The corresponding value stored at _Index.
/// </returns>
int operator[] (unsigned _Index) const __GPU
{
return _M_base[_Index];
}
/// <summary>
/// Index operator.
/// </summary>
/// <param name="_Index">
/// An integral value between 0 and _Rank-1.
/// </param>
/// <returns>
/// A reference to the corresponding value stored at _Index.
/// </returns>
int& operator[] (unsigned _Index) __GPU
{
return _M_base[_Index];
}
// Operations
/// <summary>
/// Element-wise addition of this index with another index.
/// </summary>
/// <param name="_Rhs">
/// The index to add
/// </param>
/// <returns>
/// A reference to this index.
/// </returns>
index<_Rank>& operator+=(const index<_Rank>& _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opAddEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Element-wise subtraction of this index with another index.
/// </summary>
/// <param name="_Rhs">
/// The index to subtract
/// </param>
/// <returns>
/// A reference to this index.
/// </returns>
index<_Rank>& operator-=(const index<_Rank>& _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opSubEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Adds an integer value to each element of this index.
/// </summary>
/// <param name="_Rhs">
/// The integer value to add
/// </param>
/// <returns>
/// A reference to this index.
/// </returns>
index<_Rank>& operator+=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opAddEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Subtracts an integer value from each element of this index.
/// </summary>
/// <param name="_Rhs">
/// The integer value to subtract.
/// </param>
/// <returns>
/// A reference to this index.
/// </returns>
index<_Rank>& operator-=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opSubEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Multiplies each element of this index with an integer value.
/// </summary>
/// <param name="_Rhs">
/// The integer value to multiply.
/// </param>
/// <returns>
/// A reference to this index.
/// </returns>
index<_Rank>& operator*=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opMulEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Divides each element of this index by an integer value.
/// </summary>
/// <param name="_Rhs">
/// The integer value to divide by.
/// </param>
/// <returns>
/// A reference to this index.
/// </returns>
index<_Rank>& operator/=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opDivEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Modulus an integer value into each element of this index.
/// </summary>
/// <param name="_Rhs">
/// The integer value to modulus.
/// </param>
/// <returns>
/// A reference to this index.
/// </returns>
index<_Rank>& operator%=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opModEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Pre-increments each element of this index.
/// </summary>
/// <returns>
/// A reference to this index.
/// </returns>
index<_Rank>& operator++() __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opAddEq>::func(*this, 1);
return *this;
}
/// <summary>
/// Post-increments each element of this index.
/// </summary>
/// <returns>
/// The value of the unincremented index.
/// </returns>
index<_Rank> operator++(int) __GPU
{
index<_Rank> old_Index(*this);
details::_compound_assign_op_loop_helper<index<_Rank>, details::opAddEq>::func(*this, 1);
return old_Index;
}
/// <summary>
/// Pre-decrements each element of this index.
/// </summary>
/// <returns>
/// A reference to this index.
/// </returns>
index<_Rank>& operator--() __GPU
{
details::_compound_assign_op_loop_helper<index<_Rank>, details::opSubEq>::func(*this, 1);
return *this;
}
/// <summary>
/// Post-decrements each element of this index.
/// </summary>
/// <returns>
/// The value of the undecremented index.
/// </returns>
index operator--(int) __GPU
{
index<_Rank> old_Index(*this);
details::_compound_assign_op_loop_helper<index<_Rank>, details::opSubEq>::func(*this, 1);
return old_Index;
}
private:
template<class _Tuple_type>
friend
_Tuple_type details::_Create_uninitialized_tuple() __GPU;
/// <summary>
/// Constructor.
/// </summary>
/// <param name="">
/// Indicates that no initialization is necessary.
/// </param>
index(details::_eInitializeState) __GPU {}
//
// implementation details - end
int _M_base[_Rank];
};
/// <summary>
/// The extent&lt;N&gt; type represents an N-dimensional vector of int which specifies
/// the bounds of an N-dimensional space with an origin of 0. The values in the
/// coordinate vector are ordered from most-significant to least-significant.
/// Thus, in 2-dimensional space, the extent vector (5,3) represents a space
/// with 5 rows and 3 columns.
///
/// All components of an extent must be non-negative.
/// E.g.
/// extent&lt;3&gt; domain(2, 3, 4);
/// represents all points
/// index&lt;3&gt; _Index;
/// such that
/// 0 &lt;= _Index[0] &lt; 2;
/// 0 &lt;= _Index[1] &lt; 3;
/// 0 &lt;= _Index[2] &lt; 4;
/// </summary>
/// <param name="_Rank">
/// The _Rank or the dimensionality of the index space.
/// </param>
template <int _Rank> class extent
{
public:
_CPP_AMP_VERIFY_RANK(_Rank, extent);
template <typename _Value_type, int _Rank>
friend class array;
template <int _Rank, int _Element_size>
friend class details::_Array_view_shape;
template <int _Rank, int _Element_size>
friend class details::_Array_view_base;
static const int rank = _Rank;
typedef int value_type;
/// <summary>
/// Default constructor. The value at each dimension is initialized to zero.
/// </summary>
extent() __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opAssign>::func(*this, 0);
}
/// <summary>
/// Copy constructor. Constructs a new extent from the supplied argument _Other.
/// </summary>
/// <param name="_Other">
/// The extent instance to be copied from .
/// </param>
extent(const extent<_Rank>& _Other) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opAssign>::func(*this, _Other);
}
/// <summary>
/// Constructor for extent&lt;1&gt;.
/// </summary>
/// <param name="_I">
/// The value for initialization
/// </param>
explicit extent(int _I) __GPU
{
static_assert(_Rank == 1, "This constructor can only be used to construct an extent<1> object.");
_M_base[0] = _I;
}
/// <summary>
/// Constructor for extent&lt;2&gt;
/// </summary>
/// <param name="_I0">
/// The extent value for dimension 0
/// </param>
/// <param name="_I1">
/// The extent value for dimension 1
/// </param>
extent(int _I0, int _I1) __GPU
{
static_assert(_Rank == 2, "This constructor can only be used to construct an extent<2> object.");
_M_base[0] = _I0;
_M_base[1] = _I1;
}
/// <summary>
/// Constructor for extent&lt;3&gt;
/// </summary>
/// <param name="_I0">
/// The extent value for dimension 0
/// </param>
/// <param name="_I1">
/// The extent value for dimension 1
/// </param>
/// <param name="_I2">
/// The extent value for dimension 2
/// </param>
extent(int _I0, int _I1, int _I2) __GPU
{
static_assert(_Rank == 3, "This constructor can only be used to construct an extent<3> object.");
_M_base[0] = _I0;
_M_base[1] = _I1;
_M_base[2] = _I2;
}
/// <summary>
/// Constructs an extent with the coordinate values provided the array
/// of int component values. If the coordinate array length is not N,
/// the behavior is undefined.
/// </summary>
/// <param name="_Array">
/// A single-dimensional array with _Rank elements.
/// </param>
explicit extent(const int _Array[_Rank]) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opAssign>::func(*this, _Array);
}
/// <summary>
/// copy-assignment operator
/// </summary>
extent<_Rank>& operator=(const extent<_Rank>& _Other) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opAssign>::func(*this, _Other);
return *this;
}
/// <summary>
/// Index operator.
/// </summary>
/// <param name="_Index">
/// An integral value between 0 and _Rank-1.
/// </param>
/// <returns>
/// The corresponding value stored at _Index.
/// </returns>
int operator[] (unsigned int _Index) const __GPU
{
return _M_base[_Index];
}
/// <summary>
/// Index operators.
/// </summary>
/// <param name="_Index">
/// An integral value between 0 and _Rank-1.
/// </param>
/// <returns>
/// A reference to the value stored at _Index.
/// </returns>
int& operator[] (unsigned int _Index) __GPU
{
return _M_base[_Index];
}
/// <summary>
/// Returns the total linear size of this extent (in units of elements).
/// </summary>
unsigned int size() const __GPU
{
return static_cast<unsigned int>(_product_helper<extent<_Rank>>::func(_M_base));
}
/// <summary>
/// Tests whether the index "_Index" is properly contained within this extent.
/// </summary>
bool contains(const index<rank>& _Index) const __GPU
{
return details::_contains<extent<rank>, index<rank>, rank>::func(*this, _Index);
}
/// <summary>
/// Produces a tiled_extent object with the tile extents given by _Dim0.
/// </summary>
template <int _Dim0> tiled_extent<_Dim0> tile() const __GPU
{
static_assert(rank == 1, "One-dimensional tile() method only available on extent<1>");
static_assert(_Dim0>0, "All tile dimensions must be positive");
return tiled_extent<_Dim0>(*this);
}
/// <summary>
/// Produces a tiled_extent object with the tile extents given by _Dim0, _Dim1
/// </summary>
template <int _Dim0, int _Dim1> tiled_extent<_Dim0, _Dim1> tile() const __GPU
{
static_assert(rank == 2, "Two-dimensional tile() method only available on extent<2>");
static_assert(_Dim0>0 && _Dim1>0, "All tile dimensions must be positive");
return tiled_extent<_Dim0, _Dim1>(*this);
}
/// <summary>
/// Produces a tiled_extent object with the tile extents given by _Dim0, _Dim1, _Dim2.
/// </summary>
template <int _Dim0, int _Dim1, int _Dim2> tiled_extent<_Dim0, _Dim1, _Dim2> tile() const __GPU
{
static_assert(rank == 3, "Three-dimensional tile() method only available on extent<3>");
static_assert(_Dim0>0 && _Dim1>0 && _Dim2>0, "All tile dimensions must be positive");
return tiled_extent<_Dim0, _Dim1, _Dim2>(*this);
}
// Operations
/// <summary>
/// Element-wise addition of this extent with an index.
/// </summary>
/// <param name="_Rhs">
/// The index to add to this extent
/// </param>
/// <returns>
/// A new extent with the result of the computation.
/// </returns>
extent<_Rank> operator+(const index<_Rank>& _Rhs) __GPU
{
extent<_Rank> new_extent(details::_do_not_initialize);
details::_arithmetic_op_loop_helper<extent<_Rank>, details::opAdd>::func(new_extent, *this, _Rhs);
return new_extent;
}
/// <summary>
/// Element-wise subtraction of this extent with an index.
/// </summary>
/// <param name="_Rhs">
/// The index to subtract from this extent
/// </param>
/// <returns>
/// A new extent with the result of the computation.
/// </returns>
extent<_Rank> operator-(const index<_Rank>& _Rhs) __GPU
{
extent<_Rank> new_extent(details::_do_not_initialize);
details::_arithmetic_op_loop_helper<extent<_Rank>, details::opSub>::func(new_extent, *this, _Rhs);
return new_extent;
}
/// <summary>
/// Element-wise addition of this extent with another extent.
/// </summary>
/// <param name="_Rhs">
/// The extent to add
/// </param>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator+=(const extent<_Rank>& _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opAddEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Element-wise subtraction of this extent with another extent.
/// </summary>
/// <param name="_Rhs">
/// The extent to subtract
/// </param>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator-=(const extent<_Rank>& _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opSubEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Element-wise addition of this extent with an index.
/// </summary>
/// <param name="_Rhs">
/// The index to add
/// </param>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator+=(const index<_Rank>& _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opAddEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Element-wise subtraction of this extent with an index.
/// </summary>
/// <param name="_Rhs">
/// The index to subtract
/// </param>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator-=(const index<_Rank>& _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opSubEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Adds an integer value to each element of this extent.
/// </summary>
/// <param name="_Rhs">
/// The integer value to add to this extent
/// </param>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator+=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opAddEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Subtracts an integer value from each element of this extent.
/// </summary>
/// <param name="_Rhs">
/// The integer value to subtract from this extent
/// </param>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator-=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opSubEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Multiplies an integer value to each element of this extent.
/// </summary>
/// <param name="_Rhs">
/// The integer value to multiply into this extent
/// </param>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator*=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opMulEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Divides an integer value into each element of this extent.
/// </summary>
/// <param name="_Rhs">
/// The integer value to divide into this extent
/// </param>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator/=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opDivEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Modulus an integer value from each element of this extent.
/// </summary>
/// <param name="_Rhs">
/// The integer value to modulo this extent
/// </param>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator%=(int _Rhs) __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opModEq>::func(*this, _Rhs);
return *this;
}
/// <summary>
/// Pre-increments each element of this extent.
/// </summary>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator++() __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opAddEq>::func(*this, 1);
return *this;
}
/// <summary>
/// Post-increments each element of this extent.
/// </summary>
/// <returns>
/// The value of the unincremented extent.
/// </returns>
extent<_Rank> operator++(int) __GPU
{
extent<_Rank> old_extent(*this);
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opAddEq>::func(*this, 1);
return old_extent;
}
/// <summary>
/// Pre-decrements each element of this extent.
/// </summary>
/// <returns>
/// A reference to this extent.
/// </returns>
extent<_Rank>& operator--() __GPU
{
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opSubEq>::func(*this, 1);
return *this;
}
/// <summary>
/// Post-decrements each element of this extent.
/// </summary>
/// <returns>
/// The value of the undecremented extent.
/// </returns>
extent<_Rank> operator--(int) __GPU
{
extent<_Rank> old_extent(*this);
details::_compound_assign_op_loop_helper<extent<_Rank>, details::opSubEq>::func(*this, 1);
return old_extent;
}
// implementation details (compiler helpers) - begin
// Index mapping for simple zero-based extent domain.
index<_Rank> _map_index(const index<_Rank>& _Index) const __GPU {
return _Index;
}
private:
template<class _Tuple_type>
friend
_Tuple_type details::_Create_uninitialized_tuple() __GPU;
/// <summary>
/// Constructor.
/// </summary>
/// <param name="">
/// Indicates that no initialization is necessary.
/// </param>
extent(details::_eInitializeState) __GPU {}
// the store
int _M_base[_Rank];
};
template <int _Rank, template <int> class _Tuple_type>
bool operator==(const _Tuple_type<_Rank>& _Lhs, const _Tuple_type<_Rank>& _Rhs) __GPU
{
return details::_cmp_op_loop_helper<_Tuple_type<_Rank>, details::opEq>::func(_Lhs, _Rhs);
}
template <int _Rank, template <int> class _Tuple_type>
bool operator!=(const _Tuple_type<_Rank>& _Lhs, const _Tuple_type<_Rank>& _Rhs) __GPU
{
return !details::_cmp_op_loop_helper<_Tuple_type<_Rank>, details::opEq>::func(_Lhs, _Rhs);
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator+(const _Tuple_type<_Rank>& _Lhs, const _Tuple_type<_Rank>& _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opAdd>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator-(const _Tuple_type<_Rank>& _Lhs, const _Tuple_type<_Rank>& _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opSub>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator+(const _Tuple_type<_Rank>& _Lhs, typename _Tuple_type<_Rank>::value_type _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opAdd>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator+(typename _Tuple_type<_Rank>::value_type _Lhs, const _Tuple_type<_Rank>& _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opAdd>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator-(const _Tuple_type<_Rank>& _Lhs, typename _Tuple_type<_Rank>::value_type _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opSub>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator-(typename _Tuple_type<_Rank>::value_type _Lhs, const _Tuple_type<_Rank>& _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opSub>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator*(const _Tuple_type<_Rank>& _Lhs, typename _Tuple_type<_Rank>::value_type _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opMul>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator*(typename _Tuple_type<_Rank>::value_type _Lhs, const _Tuple_type<_Rank>& _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opMul>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator/(const _Tuple_type<_Rank>& _Lhs, typename _Tuple_type<_Rank>::value_type _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opDiv>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator/(typename _Tuple_type<_Rank>::value_type _Lhs, const _Tuple_type<_Rank>& _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opDiv>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator%(const _Tuple_type<_Rank>& _Lhs, typename _Tuple_type<_Rank>::value_type _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opMod>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
template <int _Rank, template <int> class _Tuple_type>
_Tuple_type<_Rank> operator%(typename _Tuple_type<_Rank>::value_type _Lhs, const _Tuple_type<_Rank>& _Rhs) __GPU
{
_Tuple_type<_Rank> new_Tuple = details::_Create_uninitialized_tuple<_Tuple_type<_Rank>>();
details::_arithmetic_op_loop_helper<_Tuple_type<_Rank>, opMod>::func(new_Tuple, _Lhs, _Rhs);
return new_Tuple;
}
/// <summary>
/// The tile_barrier class is a capability class that is only creatable by
/// the system, and passed to a tiled parallel_for_each lambda as part of
/// the tiled_index parameter. It provides wait methods whose purpose is
/// to synchronize execution of threads running within the thread
/// group (tile).
/// </summary>
class tile_barrier
{
public:
/// <summary>
/// Copy Constructor. The tile_barrier class does not have a public
/// default constructor or assignment operator, only copy-constructor.
/// </summary>
/// <param name="_Other">
/// The tile_barrier instance to be copied from.
/// </param>
#pragma warning( suppress : 4100 ) // unreferenced formal parameter
tile_barrier(const tile_barrier& _Other) __GPU {}
/// <summary>
/// Blocks execution of all threads in a tile until all all threads in the tile have reached this call.
/// Ensures that memory accesses are visible to other threads in the thread tile, and are executed according to program order
/// </summary>
void wait() const __GPU_ONLY
{
__dp_d3d_all_memory_fence_with_tile_barrier();
}
/// <summary>
/// Blocks execution of all threads in a tile until all all threads in the tile have reached this call.
/// Ensures that memory accesses are visible to other threads in the thread tile, and are executed according to program order
/// </summary>
void wait_with_all_memory_fence() const __GPU_ONLY
{
__dp_d3d_all_memory_fence_with_tile_barrier();
}
/// <summary>
/// Blocks execution of all threads in a tile until all all threads in the tile have reached this call.
/// Ensures that global memory accesses are visible to other threads in the thread tile, and are executed according to program order
/// </summary>
void wait_with_global_memory_fence() const __GPU_ONLY
{
__dp_d3d_device_memory_fence_with_tile_barrier();
}
/// <summary>
/// Blocks execution of all threads in a tile until all all threads in the tile have reached this call.
/// Ensures that tile_static memory accesses are visible to other threads in the thread tile, and are executed according to program order
/// </summary>
void wait_with_tile_static_memory_fence() const __GPU_ONLY
{
__dp_d3d_tile_static_memory_fence_with_tile_barrier();
}
};
/// <summary>
/// A _Tiled_index_base is the base class of all three kinds of tiled_index to
/// share the common code.
/// </summary>
template <int _Rank> class _Tiled_index_base
{
public:
_CPP_AMP_VERIFY_RANK(_Rank, tiled_index);
static const int rank = _Rank;
/// <summary>
/// An index that represents the global index within an extent.
/// </summary>
const index<rank> global;
/// <summary>
/// An index that represents the relative index within the current tile of a tiled_extent.
/// </summary>
const index<rank> local;
/// <summary>
/// An index that represents the coordinates of the current tile of a tiled_extent.
/// </summary>
const index<rank> tile;
/// <summary>
/// An index that represents the global coordinates of the origin of the current tile within a tiled_extent.
/// </summary>
const index<rank> tile_origin;
/// <summary>
/// An object which represents a barrier within the current tile of threads.
/// </summary>
const tile_barrier barrier;
/// <summary>
/// A Constructor that initializes data members using the given values.
/// </summary>
/// <param name="_Global">
/// The global index to be copied from
/// </param>
/// <param name="_Local">
/// The local index to be copied from
/// </param>
/// <param name="_Tile">
/// The tile index to be copied from
/// </param>
/// <param name="_Tile_origin">
/// The tile origin to be copied from
/// </param>
/// <param name="_Barrier">
/// The barrier to be copied from
/// </param>
_Tiled_index_base(const index<rank>& _Global,
const index<rank>& _Local,
const index<rank>& _Tile,
const index<rank>& _Tile_origin,
const tile_barrier& _Barrier) __GPU
: global(_Global), local(_Local), tile(_Tile), tile_origin(_Tile_origin), barrier(_Barrier)
{}
/// <summary>
/// Copy Constructor.
/// </summary>
/// <param name="_Other">
/// The tile_index instance to be copied from .
/// </param>
_Tiled_index_base(const _Tiled_index_base& _Other) __GPU
: global(_Other.global),
local(_Other.local),
tile(_Other.tile),
tile_origin(_Other.tile_origin),
barrier(_Other.barrier)
{}
/// <summary>
/// Implicit conversion operator that converts a tiled_index into an index.
/// The implicit conversion converts to the .global index member.
/// </summary>
operator const index<rank>() const __GPU
{
return global;
}
private:
_Tiled_index_base& operator=(const _Tiled_index_base&) __GPU;
};
/// <summary>
/// A tiled_index is a set of indices of 1 to 3 dimensions which have been
/// subdivided into 1-, 2-, or 3-dimensional tiles in a tiled_extent. It has
/// three specialized forms: tiled_index&lt;_Dim0&gt;, tiled_index&lt;_Dim0, _Dim1&gt;, and
/// tiled_index&lt;_Dim0, _Dim1, _Dim2&gt;, where _Dim0-2 specify the length of the tile along
/// the each dimension, with _Dim0 being the most-significant dimension and _Dim2
/// being the least-significant.
/// </summary>
template <int _Dim0, int _Dim1 = 0, int _Dim2 = 0> class tiled_index : public _Tiled_index_base<3>
{
public:
/// <summary>
/// A Constructor that initializes data members using the given values.
/// </summary>
/// <param name="_Global">
/// The global index to be copied from
/// </param>
/// <param name="_Local">
/// The local index to be copied from
/// </param>
/// <param name="_Tile">
/// The tile index to be copied from
/// </param>
/// <param name="_Tile_origin">
/// The tile origin to be copied from
/// </param>
/// <param name="_Barrier">
/// The barrier to be copied from
/// </param>
tiled_index(const index<rank>& _Global,
const index<rank>& _Local,
const index<rank>& _Tile,
const index<rank>& _Tile_origin,
const tile_barrier& _Barrier) __GPU
: _Tiled_index_base(_Global, _Local, _Tile, _Tile_origin, _Barrier)
{}
/// <summary>
/// Copy Constructor.
/// </summary>
/// <param name="_Other">
/// The tile_index instance to be copied from .
/// </param>
tiled_index(const tiled_index& _Other) __GPU
: _Tiled_index_base(_Other)
{}
/// <summary>
/// Returns an instance of an extent that captures the values of the tiled_index
/// template arguments _Dim0, _Dim1, _Dim2
/// </summary>
__declspec(property(get=get_tile_extent)) extent<rank> tile_extent;
extent<rank> get_tile_extent() __GPU { return extent<rank>(_Dim0, _Dim1, _Dim2); }
/// <summary>
/// These constants allow access to the template arguments of tiled_index.
/// </summary>
static const int tile_dim0 = _Dim0;
static const int tile_dim1 = _Dim1;
static const int tile_dim2 = _Dim2;
private:
tiled_index& operator=(const tiled_index&) __GPU;
};
template <int _Dim0, int _Dim1>
class tiled_index<_Dim0, _Dim1, 0> : public _Tiled_index_base<2>
{
public:
/// <summary>
/// A Constructor that initializes data members using the given values.
/// </summary>
/// <param name="_Global">
/// The global index to be copied from
/// </param>
/// <param name="_Local">
/// The local index to be copied from
/// </param>
/// <param name="_Tile">
/// The tile index to be copied from
/// </param>
/// <param name="_Tile_origin">
/// The tile origin to be copied from
/// </param>
/// <param name="_Barrier">
/// The barrier to be copied from
/// </param>
tiled_index(const index<rank>& _Global,
const index<rank>& _Local,
const index<rank>& _Tile,
const index<rank>& _Tile_origin,
const tile_barrier& _Barrier) __GPU
: _Tiled_index_base(_Global, _Local, _Tile, _Tile_origin, _Barrier)
{}
/// <summary>
/// Copy Constructor.
/// </summary>
/// <param name="_Other">
/// The tile_index instance to be copied from .
/// </param>
tiled_index(const tiled_index& _Other) __GPU
: _Tiled_index_base(_Other)
{}
/// <summary>
/// Returns an instance of an extent that captures the values of the tiled_index
/// template arguments _Dim0, _Dim1
/// </summary>
__declspec(property(get=get_tile_extent)) extent<rank> tile_extent;
extent<rank> get_tile_extent() __GPU { return extent<rank>(_Dim0, _Dim1); }
/// <summary>
/// These constants allow access to the template arguments of tiled_index.
/// </summary>
static const int tile_dim0 = _Dim0;
static const int tile_dim1 = _Dim1;
private:
tiled_index& operator=(const tiled_index&) __GPU;
};
template <int _Dim0>
class tiled_index<_Dim0, 0, 0> : public _Tiled_index_base<1>
{
public:
/// <summary>
/// A Constructor that initializes data members using the given values.
/// </summary>
/// <param name="_Global">
/// The global index to be copied from
/// </param>
/// <param name="_Local">
/// The local index to be copied from
/// </param>
/// <param name="_Tile">
/// The tile index to be copied from
/// </param>
/// <param name="_Tile_origin">
/// The tile origin to be copied from
/// </param>
/// <param name="_Barrier">
/// The barrier to be copied from
/// </param>
tiled_index(const index<rank>& _Global,
const index<rank>& _Local,
const index<rank>& _Tile,
const index<rank>& _Tile_origin,
const tile_barrier& _Barrier) __GPU
: _Tiled_index_base(_Global, _Local, _Tile, _Tile_origin, _Barrier)
{}
/// <summary>
/// Copy Constructor.
/// </summary>
/// <param name="_Other">
/// The tile_index instance to be copied from .
/// </param>
tiled_index(const tiled_index& _Other) __GPU
: _Tiled_index_base(_Other)
{}
/// <summary>
/// Returns an instance of an extent that captures the values of the tiled_index
/// template argument _Dim0
/// </summary>
__declspec(property(get=get_tile_extent)) extent<rank> tile_extent;
extent<rank> get_tile_extent() __GPU { return extent<rank>(_Dim0); }
/// <summary>
/// These constants allow access to the template arguments of tiled_index.
/// </summary>
static const int tile_dim0 = _Dim0;
private:
tiled_index& operator=(const tiled_index&) __GPU;
};
/// <summary>
/// A tiled_extent is an extent of 1 to 3 dimensions which also subdivides the extent space into
/// 1-, 2-, or 3-dimensional tiles. It has three specialized forms: tiled_extent&lt;_Dim0&gt;,
/// tiled_extent&lt;_Dim0,_Dim1&gt;, and tiled_extent&lt;_Dim0,_Dim1,_Dim2&gt;, where _Dim0-2 specify the length of the tile
/// along each dimension, with _Dim0 being the most-significant dimension and _Dim2 being the
/// least-significant.
/// </summary>
template <int _Dim0, int _Dim1 /*=0*/, int _Dim2 /*=0*/> class tiled_extent : public Concurrency::extent<3>
{
public:
static_assert(_Dim0>0, "_Dim0 must be positive");
static_assert(_Dim1>0, "_Dim1 must be positive");
static_assert(_Dim2>0, "_Dim2 must be positive");
/// <summary>
/// Default constructor.
/// </summary>
tiled_extent() __GPU {}
/// <summary>
/// Constructs a new tiled_extent from the supplied extent.
/// </summary>
tiled_extent(const Concurrency::extent<rank>& _Other) __GPU : Concurrency::extent<rank>(_Other)
{}
/// <summary>
/// Copy constructor. Constructs a new tiled_extent from the supplied argument "_Other".
/// </summary>
tiled_extent(const tiled_extent& _Other) __GPU : Concurrency::extent<rank>(_Other)
{}
/// <summary>
/// copy-assignment operator
/// </summary>
tiled_extent& operator=(const tiled_extent& _Other) __GPU
{
Concurrency::extent<rank>::operator=(_Other);
return *this;
}
/// <summary>
/// Returns an instance of an extent that captures the values of the tiled_extent
/// template arguments _Dim0, _Dim1, _Dim2.
/// </summary>
__declspec(property(get=get_tile_extent)) Concurrency::extent<rank> tile_extent;
Concurrency::extent<rank> get_tile_extent() const __GPU
{
return Concurrency::extent<rank>(_Dim0, _Dim1, _Dim2);
}
/// <summary>
/// Returns a new tiled_extent with extents adjusted up to be evenly divisible by the tile dimensions.
/// </summary>
tiled_extent pad() const __GPU
{
Concurrency::extent<rank> _New_extent(((static_cast<unsigned int>((*this)[0]) + _Dim0 - 1)/_Dim0) * _Dim0,
((static_cast<unsigned int>((*this)[1]) + _Dim1 - 1)/_Dim1) * _Dim1,
((static_cast<unsigned int>((*this)[2]) + _Dim2 - 1)/_Dim2) * _Dim2);
return tiled_extent<_Dim0,_Dim1,_Dim2>(_New_extent);
}
/// <summary>
/// Returns a new tiled_extent with extents adjusted down to be evenly divisible by the tile dimensions.
/// </summary>
tiled_extent truncate() const __GPU
{
Concurrency::extent<rank> _New_extent(((*this)[0]/_Dim0) * _Dim0, ((*this)[1]/_Dim1) * _Dim1, ((*this)[2]/_Dim2) * _Dim2);
return tiled_extent<_Dim0,_Dim1,_Dim2>(_New_extent);
}
/// <summary>
/// These constants allow access to the template arguments of tiled_extent.
/// </summary>
static const int tile_dim0 = _Dim0;
static const int tile_dim1 = _Dim1;
static const int tile_dim2 = _Dim2;
// implementation details (compiler helpers) - begin
// Given the local index, the tile index, the global index, in the 0-based domain that
// has same extents as 'this', and a barrier object, return a tiled_index<_Dim0, _Dim1, _Dim2> into
// the 'this' tiled_extent domain.
tiled_index<_Dim0, _Dim1, _Dim2> _map_index(const index<rank>& _Local, const index<rank>& _Tile, const index<rank>& _Global, tile_barrier& _Barrier) const __GPU
{
index<rank> _Tile_origin = details::_Create_uninitialized_tuple<index<rank>>();
details::_arithmetic_op_loop_helper<index<rank>, details::opMul>::func(_Tile_origin, _Tile, tile_extent);
return tiled_index<_Dim0, _Dim1, _Dim2>(_Global, _Local, _Tile, _Tile_origin, _Barrier);
}
// implementation details (compiler helpers) - end
};
template <int _Dim0, int _Dim1>
class tiled_extent<_Dim0, _Dim1, 0> : public Concurrency::extent<2>
{
public:
static_assert(_Dim0>0, "_Dim0 must be positive");
static_assert(_Dim1>0, "_Dim1 must be positive");
/// <summary>
/// Default constructor.
/// </summary>
tiled_extent() __GPU {}
/// <summary>
/// Constructs a new tiled_extent from the supplied extent.
/// </summary>
tiled_extent(const Concurrency::extent<rank>& _Other) __GPU : Concurrency::extent<rank>(_Other)
{}
/// <summary>
/// Copy constructor. Constructs a new tiled_extent from the supplied argument "_Other".
/// </summary>
tiled_extent(const tiled_extent& _Other) __GPU : Concurrency::extent<rank>(_Other)
{}
/// <summary>
/// copy-assignment operator
/// </summary>
tiled_extent& operator=(const tiled_extent& _Other) __GPU
{
Concurrency::extent<rank>::operator=(_Other);
return *this;
}
/// <summary>
/// Returns an instance of an extent that captures the values of the tiled_extent
/// template arguments _Dim0, _Dim1.
/// </summary>
__declspec(property(get=get_tile_extent)) Concurrency::extent<rank> tile_extent;
Concurrency::extent<rank> get_tile_extent() const __GPU
{
return Concurrency::extent<rank>(_Dim0, _Dim1);
}
/// <summary>
/// Returns a new tiled_extent with extents adjusted up to be evenly divisible by the tile dimensions.
/// </summary>
tiled_extent pad() const __GPU
{
Concurrency::extent<rank> _New_extent(((static_cast<unsigned int>((*this)[0]) + _Dim0 - 1)/_Dim0) * _Dim0,
((static_cast<unsigned int>((*this)[1]) + _Dim1 - 1)/_Dim1) * _Dim1);
return tiled_extent<_Dim0,_Dim1>(_New_extent);
}
/// <summary>
/// Returns a new tiled_extent with extents adjusted down to be evenly divisible by the tile dimensions.
/// </summary>
tiled_extent truncate() const __GPU
{
Concurrency::extent<rank> _New_extent(((*this)[0]/_Dim0) * _Dim0, ((*this)[1]/_Dim1) * _Dim1);
return tiled_extent<_Dim0,_Dim1>(_New_extent);
}
/// <summary>
/// These constants allow access to the template arguments of tiled_extent.
/// </summary>
static const int tile_dim0 = _Dim0;
static const int tile_dim1 = _Dim1;
// implementation details (compiler helpers) - begin
// Given the local index, the tile index, the global index, in the 0-based domain that
// has same extents as 'this', and a barrier object, return a tiled_index<_Dim0, _Dim1> into
// the 'this' tiled_extent domain.
tiled_index<_Dim0, _Dim1> _map_index(const index<rank>& _Local, const index<rank>& _Tile, const index<rank>& _Global, tile_barrier& _Barrier) const __GPU
{
index<rank> _Tile_origin = details::_Create_uninitialized_tuple<index<rank>>();
details::_arithmetic_op_loop_helper<index<rank>, details::opMul>::func(_Tile_origin, _Tile, tile_extent);
return tiled_index<_Dim0, _Dim1>(_Global, _Local, _Tile, _Tile_origin, _Barrier);
}
// implementation details (compiler helpers) - end
};
template <int _Dim0>
class tiled_extent<_Dim0, 0, 0> : public Concurrency::extent<1>
{
public:
static_assert(_Dim0>0, "_Dim0 must be positive");
/// <summary>
/// Default constructor.
/// </summary>
tiled_extent() __GPU {}
/// <summary>
/// Constructs a new tiled_extent from the supplied extent.
/// </summary>
tiled_extent(const Concurrency::extent<rank>& _Other) __GPU : Concurrency::extent<rank>(_Other)
{}
/// <summary>
/// Copy constructor. Constructs a new tiled_extent from the supplied argument "_Other".
/// </summary>
tiled_extent(const tiled_extent& _Other) __GPU : Concurrency::extent<rank>(_Other)
{}
/// <summary>
/// copy-assignment operator
/// </summary>
tiled_extent& operator=(const tiled_extent& _Other) __GPU
{
Concurrency::extent<rank>::operator=(_Other);
return *this;
}
/// <summary>
/// Returns an instance of an extent that captures the values of the tiled_extent
/// template argument _Dim0.
/// </summary>
__declspec(property(get=get_tile_extent)) Concurrency::extent<rank> tile_extent;
Concurrency::extent<rank> get_tile_extent() const __GPU
{
return Concurrency::extent<rank>(_Dim0);
}
/// <summary>
/// Returns a new tiled_extent with extents adjusted up to be evenly divisible by the tile dimensions.
/// </summary>
tiled_extent pad() const __GPU
{
Concurrency::extent<rank> _New_extent(((static_cast<unsigned int>((*this)[0]) + _Dim0 - 1)/_Dim0) * _Dim0);
return tiled_extent<_Dim0>(_New_extent);
}
/// <summary>
/// Returns a new tiled_extent with extents adjusted down to be evenly divisible by the tile dimensions.
/// </summary>
tiled_extent truncate() const __GPU
{
Concurrency::extent<rank> _New_extent(((*this)[0]/_Dim0) * _Dim0);
return tiled_extent<_Dim0>(_New_extent);
}
/// <summary>
/// These constants allow access to the template arguments of tiled_extent.
/// </summary>
static const int tile_dim0 = _Dim0;
// implementation details (compiler helpers) - begin
// Given the local index, the tile index, the global index, in the 0-based domain that
// has same extents as 'this', and a barrier object, return a tiled_index<_Dim0> into
// the 'this' tiled_extent domain.
tiled_index<_Dim0> _map_index(const index<rank>& _Local, const index<rank>& _Tile, const index<rank>& _Global, tile_barrier& _Barrier) const __GPU
{
index<rank> _Tile_origin = details::_Create_uninitialized_tuple<index<rank>>();
details::_arithmetic_op_loop_helper<index<rank>, details::opMul>::func(_Tile_origin, _Tile, tile_extent);
return tiled_index<_Dim0>(_Global, _Local, _Tile, _Tile_origin, _Barrier);
}
};
namespace details
{
template <int _Old_element_size, int _New_element_size>
int _Calculate_reinterpreted_size(int _Old_size) __GPU_ONLY
{
int _Total_size = _Old_element_size * _Old_size;
int _New_size = (_Total_size + _New_element_size - 1)/ _New_element_size;
return _New_size;
}
template <int _Old_element_size, int _New_element_size>
int _Calculate_reinterpreted_size(int _Old_size) __CPU_ONLY
{
int _Total_size = _Old_element_size * _Old_size;
int _New_size = (_Total_size + _New_element_size - 1)/ _New_element_size;
if (_New_size * _New_element_size > _Total_size)
throw runtime_exception("Element type of reinterpret_as does not evenly divide into extent", E_INVALIDARG);
return _New_size;
}
// This class defines the shape of an array view and provides
// the functionality of translating dimensional indices into
// flat offsets into the underlying buffer
template <int _Rank, int _Element_size /* in number of ints */>
class _Array_view_shape
{
typedef _Array_flatten_helper<_Rank, typename Concurrency::extent<_Rank>::value_type, typename Concurrency::index<_Rank>::value_type> _Flatten_helper;
friend class _Array_view_shape<_Rank+1, _Element_size>;
public:
/// <summary>
/// The extent of this array or view.
/// </summary>
__declspec(property(get=get_extent)) Concurrency::extent<_Rank> extent;
Concurrency::extent<_Rank> get_extent() const __GPU
{
return _M_view_extent;
}
~_Array_view_shape() __GPU {}
protected:
int _Base_linear_offset() const __GPU
{
return (_M_total_linear_offset - (_Element_size * _Flatten_helper::func(_M_array_multiplier._M_base, _M_view_offset._M_base)));
}
_Array_view_shape(const _Array_view_shape& _Other) __GPU
:
_M_array_extent(_Other._M_array_extent),
_M_array_multiplier(_Other._M_array_multiplier),
_M_view_offset(_Other._M_view_offset),
_M_total_linear_offset(_Other._M_total_linear_offset),
_M_view_extent(_Other._M_view_extent)
{
}
// For "section"
_Array_view_shape(const _Array_view_shape& _Other, const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) __GPU
:
_M_array_extent(_Other._M_array_extent),
_M_array_multiplier(_Other._M_array_multiplier),
_M_view_offset(_Other._M_view_offset + _Section_origin),
_M_view_extent(_Section_extent)
{
_Is_valid_section(_Other._M_view_extent, _Section_origin, _Section_extent);
_M_total_linear_offset = _Other._Base_linear_offset() + (_Element_size * _Flatten_helper::func(_M_array_multiplier._M_base, _M_view_offset._M_base));
}
_Array_view_shape(int _Base_linear_offset, const Concurrency::extent<_Rank>& _Array_extent) __GPU
:
_M_array_extent(_Array_extent),
_M_view_offset(index<_Rank>()),
_M_total_linear_offset(_Base_linear_offset),
_M_view_extent(_Array_extent)
{
_Initialize_multiplier();
}
_Array_view_shape(int _Base_linear_offset, const Concurrency::extent<_Rank>& _Array_extent,
const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) __GPU
:
_M_array_extent(_Array_extent),
_M_view_offset(_Section_origin),
_M_total_linear_offset(_Base_linear_offset),
_M_view_extent(_Section_extent)
{
_Is_valid_section(_Array_extent, _Section_origin, _Section_extent);
_Initialize_multiplier();
_M_total_linear_offset += (_Element_size * _Flatten_helper::func(_M_array_multiplier._M_base, _M_view_offset._M_base));
}
_Array_view_shape& operator=(const _Array_view_shape &_Other) __GPU
{
_M_array_extent = _Other._M_array_extent;
_M_array_multiplier = _Other._M_array_multiplier;
_M_view_offset = _Other._M_view_offset;
_M_total_linear_offset = _Other._M_total_linear_offset;
_M_view_extent = _Other._M_view_extent;
return *this;
}
void _Project0(int _I, _Array_view_shape<_Rank-1,_Element_size>& _Projected_shape) const __GPU
{
static_assert(_Rank > 1, "Projection is only supported on array_views with a rank of 2 or higher");
_Is_valid_projection(_I, this->_M_view_extent);
typedef Concurrency::extent<_Rank-1> _RES_EXT;
typedef Concurrency::extent<_Rank> _SRC_EXT;
typedef Concurrency::index<_Rank-1> _RES_IDX;
typedef Concurrency::index<_Rank> _SRC_IDX;
details::_project0<_RES_EXT, _SRC_EXT, _RES_IDX, _SRC_IDX, _Rank>::func(
_Projected_shape._M_array_extent, this->_M_array_extent,
_Projected_shape._M_array_multiplier, this->_M_array_multiplier,
_Projected_shape._M_view_offset, this->_M_view_offset,
_Projected_shape._M_view_extent, this->_M_view_extent);
_Projected_shape._M_total_linear_offset = _M_total_linear_offset + (_Element_size * _I * _M_array_multiplier[0]);
}
_Array_view_shape() __GPU
: _M_array_extent(details::_do_not_initialize), _M_array_multiplier(details::_do_not_initialize),
_M_view_offset(details::_do_not_initialize), _M_view_extent(details::_do_not_initialize)
{
}
private:
void _Initialize_multiplier() __GPU
{
details::_Is_valid_extent(_M_array_extent);
unsigned int _Ext = _M_array_extent[_Rank-1];
details::_Array_init_helper<Concurrency::extent<_Rank>, Concurrency::extent<_Rank>>::func(_Ext, _M_array_multiplier, _M_array_extent);
}
protected:
Concurrency::extent<_Rank> _M_array_extent;
Concurrency::extent<_Rank> _M_array_multiplier;
Concurrency::index<_Rank> _M_view_offset;
int _M_total_linear_offset; // in number of units
Concurrency::extent<_Rank> _M_view_extent;
};
template <int _Rank, int _Element_size>
class _Array_view_base : public _Array_view_shape<_Rank,_Element_size /* in number of ints */>
{
template <int _R, int _S>
friend class _Array_view_base;
public:
typedef details::_Buffer_descriptor _Buffer_descriptor;
~_Array_view_base() __GPU
{
// Unregister the view; Do not throw exception
_Unregister(false);
}
protected:
_Array_view_base() __GPU {}
_Array_view_base(const _Buffer_descriptor& _Buffer_desc, const _Array_view_shape& _Shape) __GPU
:
_M_buffer_descriptor(_Buffer_desc),
_Array_view_shape<_Rank, _Element_size>(_Shape)
{
// Register the view
_Register();
}
_Array_view_base(const _Array_view_base& _Other) __GPU
:
_M_buffer_descriptor(_Other._M_buffer_descriptor),
_Array_view_shape<_Rank, _Element_size>(_Other)
{
// Register the view
_Register_copy(_Other);
}
_Array_view_base(const _Array_view_base& _Other, const Concurrency::extent<_Rank>& _Array_extent) __GPU
:
_M_buffer_descriptor(_Other._M_buffer_descriptor),
_Array_view_shape<_Rank, _Element_size>(_Other._Base_linear_offset(), _Array_extent)
{
// Register the view
_Register();
}
_Array_view_base(const _Array_view_base& _Other, const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) __GPU
:
_M_buffer_descriptor(_Other._M_buffer_descriptor),
_Array_view_shape<_Rank, _Element_size>(_Other, _Section_origin, _Section_extent)
{
// Register the view
_Register();
}
_Array_view_base(const _Buffer_descriptor& _Buffer_desc, const Concurrency::extent<_Rank>& _Array_extent) __GPU
:
_M_buffer_descriptor(_Buffer_desc),
_Array_view_shape<_Rank, _Element_size>(0,_Array_extent)
{
// Register the view
_Register();
}
_Array_view_base(const _Buffer_descriptor& _Buffer_desc, int _Base_linear_offset, const Concurrency::extent<_Rank>& _Array_extent) __GPU
:
_M_buffer_descriptor(_Buffer_desc),
_Array_view_shape<_Rank, _Element_size>(_Base_linear_offset,_Array_extent)
{
// Register the view
_Register();
}
_Array_view_base(
const _Buffer_descriptor& _Buffer_desc,
int _Base_linear_offset,
const Concurrency::extent<_Rank>& _Array_extent,
const Concurrency::index<_Rank>& _View_offset,
const Concurrency::extent<_Rank>& _View_extent
) __GPU
:
_M_buffer_descriptor(_Buffer_desc),
_Array_view_shape<_Rank, _Element_size>(_Base_linear_offset,_Array_extent,_View_offset,_View_extent)
{
// Register the view
_Register();
}
_Array_view_base(const _Buffer_descriptor& _Buffer_desc, const Concurrency::extent<_Rank>& _Array_extent,
const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) __GPU
:
_M_buffer_descriptor(_Buffer_desc),
_Array_view_shape<_Rank, _Element_size>(0,_Array_extent,_Section_origin,_Section_extent)
{
// Register the view
_Register();
}
_Array_view_base(const Concurrency::extent<_Rank>& _Array_extent) __CPU_ONLY
:
_Array_view_shape<_Rank, _Element_size>(0,_Array_extent)
{
_Ubiquitous_buffer_ptr _PUBuf = _Ubiquitous_buffer::_Create_ubiquitous_buffer(_Array_extent.size(), _Element_size * sizeof(int));
_M_buffer_descriptor = _Buffer_descriptor(NULL, _PUBuf, _No_access, _No_access);
// Register the view
_Register();
}
_Array_view_base(_In_ void * _Data, const Concurrency::extent<_Rank>& _Array_extent) __CPU_ONLY
:
_Array_view_shape<_Rank, _Element_size>(0,_Array_extent)
{
if (_Data == NULL) {
throw runtime_exception("Invalid pointer argument (NULL) to array_view constructor", E_INVALIDARG);
}
_Buffer_ptr _PBuf = _Buffer::_Create_buffer(_Data, accelerator(accelerator::cpu_accelerator).default_view, _Array_extent.size(), _Element_size * sizeof(int));
_Ubiquitous_buffer_ptr _PUBuf = _Ubiquitous_buffer::_Create_ubiquitous_buffer(_PBuf);
_M_buffer_descriptor = _Buffer_descriptor(_Data, _PUBuf, _Read_write_access, _Read_write_access);
// Register the view
_Register();
}
_Array_view_base(_In_ void * _Data, const Concurrency::extent<_Rank>& _Array_extent) __GPU_ONLY
:
_Array_view_shape<_Rank, _Element_size>(0,_Array_extent), _M_buffer_descriptor(_Data, NULL, _Read_write_access, _Read_write_access)
{
}
_Array_view_base(const void * _Data, const Concurrency::extent<_Rank>& _Array_extent) __CPU_ONLY
:
_Array_view_shape<_Rank, _Element_size>(0,_Array_extent)
{
if (_Data == NULL) {
throw runtime_exception("Invalid pointer argument (NULL) to array_view constructor", E_INVALIDARG);
}
_Buffer_ptr _PBuf = _Buffer::_Create_buffer(const_cast<void*>(_Data), accelerator(accelerator::cpu_accelerator).default_view, _Array_extent.size(), _Element_size * sizeof(int));
_Ubiquitous_buffer_ptr _PUBuf = _Ubiquitous_buffer::_Create_ubiquitous_buffer(_PBuf);
_M_buffer_descriptor = _Buffer_descriptor(const_cast<void*>(_Data), _PUBuf, _Read_access, _Read_access);
// Register the view
_Register();
}
_Array_view_base(const void * _Data, const Concurrency::extent<_Rank>& _Array_extent) __GPU_ONLY
:
#pragma warning( push )
#pragma warning( disable : 4880 )
// Casting away constness in amp restricted scope might result in
// undefined behavior, therefore, the compiler will report a level 1 warning
// for it. But the following const_cast is harmless thus we are suppressing
// this warning just for this line.
_Array_view_shape<_Rank, _Element_size>(0,_Array_extent), _M_buffer_descriptor(const_cast<void*>(_Data), NULL, _Read_access, _Read_access)
#pragma warning( pop )
{
}
_Array_view_base& operator=(const _Array_view_base &_Other) __GPU
{
if (this != &_Other)
{
// Unregister the current view
_Unregister();
_M_buffer_descriptor = _Other._M_buffer_descriptor;
_Array_view_shape<_Rank, _Element_size>::operator=(_Other);
// Register the new view
_Register_copy(_Other);
}
return *this;
}
_Ret_ void * _Access(const index<_Rank>& _Index) const __GPU
{
int * _Ptr = reinterpret_cast<int *>(_M_buffer_descriptor._M_data_ptr);
return &_Ptr[_M_total_linear_offset + (_Element_size * _Flatten_helper::func(_M_array_multiplier._M_base, _Index._M_base))];
}
_Ret_ void * _Access(_Access_mode _Requested_mode, const index<_Rank>& _Index) const __CPU_ONLY
{
// Refresh the data ptr if we do not have requested access
if ((_M_buffer_descriptor._M_curr_cpu_access_mode & _Requested_mode) != _Requested_mode) {
_M_buffer_descriptor._Get_CPU_access(_Requested_mode);
}
return _Access(_Index);
}
_Ret_ void * _Access(_Access_mode _Requested_mode, const index<_Rank>& _Index) const __GPU_ONLY
{
UNREFERENCED_PARAMETER(_Requested_mode);
return _Access(_Index);
}
_Array_view_base _Section(const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) const __GPU
{
auto _View = _Array_view_base(*this, _Section_origin, _Section_extent);
// Register the constructed view with the section buffer view shape
_View._Register(_Array_view_base::_Create_section_buffer_shape(this->_M_buffer_descriptor, _Section_origin, _Section_extent));
return _View;
}
_Array_view_base _Section(const index<_Rank>& _Idx) const __GPU
{
return _Section(_Idx, this->extent - _Idx);
}
void _Project0(int _I, _Array_view_base<_Rank-1,_Element_size>& _Projected_view) const __GPU
{
_Projected_view._M_buffer_descriptor = this->_M_buffer_descriptor;
_Array_view_shape<_Rank, _Element_size>::_Project0(_I, _Projected_view);
// Register the constructed view with the projection buffer view shape
_Projected_view._Register(_Array_view_base::_Create_projection_buffer_shape(this->_M_buffer_descriptor, 0, _I));
}
template <int _New_element_size>
_Array_view_base<_Rank,_New_element_size> _Reinterpret_as() const __GPU
{
static_assert(_Rank==1, "reinterpret_as is only permissible on array views of rank 1");
int _New_size = _Calculate_reinterpreted_size<_Element_size,_New_element_size>(_M_view_extent.size());
return _Array_view_base<_Rank,_New_element_size>(this->_M_buffer_descriptor,
_M_total_linear_offset,
Concurrency::extent<_Rank>(_New_size));
}
template <int _New_rank>
_Array_view_base<_New_rank, _Element_size> _View_as(const Concurrency::extent<_New_rank>& _View_extent) const __GPU
{
static_assert(_Rank==1, "view_as is only permissible on array views of rank 1");
return _Array_view_base<_New_rank, _Element_size>(this->_M_buffer_descriptor,
_M_total_linear_offset,
_View_extent,
index<_New_rank>(),
_View_extent);
}
_Ret_ _View_shape* _Create_buffer_view_shape() const __CPU_ONLY
{
unsigned int bufElemSize = static_cast<unsigned int>(_M_buffer_descriptor._Get_buffer_ptr()->_Get_master_buffer_elem_size());
unsigned int elemSize = _Element_size * sizeof(int);
size_t linearOffsetInBytes = _Base_linear_offset() * sizeof(int);
size_t baseLSDExtentInBytes = _M_array_extent[_Rank - 1];
baseLSDExtentInBytes *= elemSize;
size_t viewLSDOffsetInBytes = _M_view_offset[_Rank - 1];
viewLSDOffsetInBytes *= elemSize;
size_t viewLSDExtentInBytes = _M_view_extent[_Rank - 1];
viewLSDExtentInBytes *= elemSize;
// The base array extent, view extent, and view offset must be compatible with the underlying
// buffer's element size
if (((linearOffsetInBytes % bufElemSize) != 0) ||
((baseLSDExtentInBytes % bufElemSize) != 0) ||
((viewLSDOffsetInBytes % bufElemSize) != 0) ||
((viewLSDExtentInBytes % bufElemSize) != 0))
{
throw runtime_exception("The array_view base extent, view offset and/or view extent is incompatible with the underlying buffer", E_FAIL);
}
// The shape to be passed to the underlying buffer for registration must be in terms of
// the element size of the buffer
_ASSERTE((linearOffsetInBytes / bufElemSize) <= UINT_MAX);
unsigned int linearOffset = static_cast<unsigned int>(linearOffsetInBytes / bufElemSize);
unsigned int baseExtent[_Rank];
unsigned int viewOffset[_Rank];
unsigned int viewExtent[_Rank];
#pragma warning( push )
#pragma warning( disable : 6294 )
#pragma warning( disable : 6201 ) // Index '-1' is out of valid index range '0' to '0' for possibly stack allocated buffer 'baseExtent'.
for (int i = 0; i < _Rank - 1; ++i) {
baseExtent[i] = _M_array_extent[i];
viewOffset[i] = _M_view_offset[i];
viewExtent[i] = _M_view_extent[i];
}
#pragma warning( pop )
// The extent in the least significant dimension needs to be adjusted for
// difference in element size between the buffer and ourselves
_ASSERTE((baseLSDExtentInBytes / bufElemSize) <= UINT_MAX);
baseExtent[_Rank - 1] = static_cast<unsigned int>(baseLSDExtentInBytes / bufElemSize);
_ASSERTE((viewLSDOffsetInBytes / bufElemSize) <= UINT_MAX);
viewOffset[_Rank - 1] = static_cast<unsigned int>(viewLSDOffsetInBytes / bufElemSize);
_ASSERTE((viewLSDExtentInBytes / bufElemSize) <= UINT_MAX);
viewExtent[_Rank - 1] = static_cast<unsigned int>(viewLSDExtentInBytes / bufElemSize);
return _View_shape::_Create_view_shape(_Rank, linearOffset, baseExtent, viewOffset, viewExtent);
}
protected:
// Underlying storage
_Buffer_descriptor _M_buffer_descriptor;
private:
void _Register() __CPU_ONLY
{
_M_buffer_descriptor._Get_buffer_ptr()->_Register_view(_M_buffer_descriptor._Get_view_key(),
accelerator(accelerator::cpu_accelerator).default_view,
_Create_buffer_view_shape());
if (_M_buffer_descriptor._M_curr_cpu_access_mode != _No_access)
{
_Buffer_ptr _PBuf;
_Get_access_async(_M_buffer_descriptor._Get_view_key(),
accelerator(accelerator::cpu_accelerator).default_view,
_M_buffer_descriptor._M_curr_cpu_access_mode,
_PBuf)._Get();
_M_buffer_descriptor._M_data_ptr = _PBuf->_Get_host_ptr();
}
}
void _Register_copy(const _Array_view_base &_Other) __CPU_ONLY
{
_M_buffer_descriptor._Get_buffer_ptr()->_Register_view_copy(_M_buffer_descriptor._Get_view_key(), _Other._M_buffer_descriptor._Get_view_key());
}
void _Register(_In_ void* _Shape) __CPU_ONLY
{
if (_Shape == NULL) {
return;
}
// Unregister and register with the right shape
_Unregister();
_M_buffer_descriptor._Get_buffer_ptr()->_Register_view(_M_buffer_descriptor._Get_view_key(),
accelerator(accelerator::cpu_accelerator).default_view,
reinterpret_cast<_View_shape*>(_Shape));
if (_M_buffer_descriptor._M_curr_cpu_access_mode != _No_access)
{
_Buffer_ptr _PBuf;
_Get_access_async(_M_buffer_descriptor._Get_view_key(),
accelerator(accelerator::cpu_accelerator).default_view,
_M_buffer_descriptor._M_curr_cpu_access_mode,
_PBuf)._Get();
_M_buffer_descriptor._M_data_ptr = _PBuf->_Get_host_ptr();
}
}
void _Unregister(bool _Throw_exception = true) __CPU_ONLY
{
if (!_Throw_exception && (std::current_exception() == nullptr)) {
_Throw_exception = true;
}
try
{
_M_buffer_descriptor._Get_buffer_ptr()->_Unregister_view(_M_buffer_descriptor._Get_view_key());
}
catch(...)
{
if (_Throw_exception) {
throw;
}
}
}
static _Ret_ void* _Create_projection_buffer_shape(const _Buffer_descriptor& _Descriptor, unsigned int _Dim, int _Dim_offset) __CPU_ONLY
{
_View_shape* _Base_shape = _Get_buffer_view_shape(_Descriptor);
std::vector<unsigned int> _New_view_extent(_Base_shape->_Get_rank());
std::vector<unsigned int> _New_view_offset(_Base_shape->_Get_rank());
bool *_New_projection_info = new bool[_Base_shape->_Get_rank()];
for (unsigned int _I = 0; _I < _Base_shape->_Get_rank(); ++_I)
{
_New_view_extent[_I] = _Base_shape->_Get_view_extent()[_I];
_New_view_offset[_I] = _Base_shape->_Get_view_offset()[_I];
_New_projection_info[_I] = _Base_shape->_Get_projection_info()[_I];
}
// The _Dim'th non-projected dimension needs to be found
unsigned int _UnProjectedDimCount = 0;
for (unsigned int _I = 0; _I < _Base_shape->_Get_rank(); ++_I)
{
if (_Base_shape->_Get_projection_info()[_I]) {
continue;
}
if (_UnProjectedDimCount == _Dim) {
_New_view_extent[_I] = 1;
_New_view_offset[_I] += _Dim_offset;
_New_projection_info[_I] = true;
break;
}
else {
_UnProjectedDimCount++;
}
}
auto _PView_shape = _View_shape::_Create_view_shape(_Base_shape->_Get_rank(),
_Base_shape->_Get_linear_offset(),
_Base_shape->_Get_base_extent(),
_New_view_offset.data(),
_New_view_extent.data(),
_New_projection_info);
delete [] _New_projection_info;
return _PView_shape;
}
static _Ret_ void* _Create_section_buffer_shape(const _Buffer_descriptor& _Descriptor,
const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) __CPU_ONLY
{
_View_shape* _Base_shape = _Get_buffer_view_shape(_Descriptor);
if (_Base_shape->_Get_rank() == _Rank) {
return NULL;
}
std::vector<unsigned int> _New_view_extent(_Base_shape->_Get_rank());
std::vector<unsigned int> _New_view_offset(_Base_shape->_Get_rank());
unsigned int _I = 0, _J = 0;
while (_I < _Base_shape->_Get_rank())
{
if (_Base_shape->_Get_projection_info()[_I])
{
_New_view_extent[_I] = _Base_shape->_Get_view_extent()[_I];
_New_view_offset[_I] = _Base_shape->_Get_view_offset()[_I];
}
else
{
// If _J is the least significant dimension, then we need to adjust the
// offset and extent for the underlying buffer's element size
if (_J == (_Rank - 1))
{
unsigned int bufElemSize = static_cast<unsigned int>(_Descriptor._Get_buffer_ptr()->_Get_master_buffer_elem_size());
unsigned int elemSize = _Element_size * sizeof(int);
size_t sectionLSDOriginInBytes = _Section_origin[_J];
sectionLSDOriginInBytes *= elemSize;
size_t sectionLSDExtentInBytes = _Section_extent[_J];
sectionLSDExtentInBytes *= elemSize;
// The section offset and extent must be compatible with the underlying
// buffer's element size
if (((sectionLSDOriginInBytes % bufElemSize) != 0) ||
((sectionLSDExtentInBytes % bufElemSize) != 0))
{
throw runtime_exception("The array_view section origin and/or extent is incompatible with the underlying buffer", E_FAIL);
}
// The extent in the least significant dimension needs to be adjusted for
// difference in element size between the buffer and ourselves
_ASSERTE((sectionLSDOriginInBytes / bufElemSize) <= UINT_MAX);
_New_view_offset[_I] = _Base_shape->_Get_view_offset()[_I] + static_cast<unsigned int>(sectionLSDOriginInBytes / bufElemSize);
_ASSERTE((sectionLSDExtentInBytes / bufElemSize) <= UINT_MAX);
_New_view_extent[_I] = static_cast<unsigned int>(sectionLSDExtentInBytes / bufElemSize);
}
else
{
_New_view_extent[_I] = _Section_extent[_J];
_New_view_offset[_I] = _Base_shape->_Get_view_offset()[_I] + _Section_origin[_J];
}
_J++;
}
_I++;
}
_ASSERTE(_J == _Rank);
return _View_shape::_Create_view_shape(_Base_shape->_Get_rank(),
_Base_shape->_Get_linear_offset(),
_Base_shape->_Get_base_extent(),
_New_view_offset.data(),
_New_view_extent.data(),
_Base_shape->_Get_projection_info());
}
void _Register() __GPU_ONLY {}
void _Register_copy(const _Array_view_base &_Other) __GPU_ONLY
{
UNREFERENCED_PARAMETER(_Other);
}
void _Register(_In_ void* _Shape) __GPU_ONLY
{
UNREFERENCED_PARAMETER(_Shape);
}
void _Unregister(bool _Throw_exception = true) __GPU_ONLY
{
UNREFERENCED_PARAMETER(_Throw_exception);
}
static _Ret_ void* _Create_projection_buffer_shape(const _Buffer_descriptor& _Descriptor, int _Dim, int _I) __GPU_ONLY
{
UNREFERENCED_PARAMETER(_Descriptor);
UNREFERENCED_PARAMETER(_Dim);
UNREFERENCED_PARAMETER(_I);
return NULL;
}
static _Ret_ void* _Create_section_buffer_shape(const _Buffer_descriptor& _Descriptor, const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) __GPU_ONLY
{
UNREFERENCED_PARAMETER(_Descriptor);
UNREFERENCED_PARAMETER(_Section_origin);
UNREFERENCED_PARAMETER(_Section_extent);
return NULL;
}
};
template<typename _Container>
struct _Is_container
{
template<class _Uty> static auto _Fn(_Uty _Val, decltype(_Val.size(), _Val.data(), 0)) -> std::true_type;
template<class _Uty> static auto _Fn(_Uty _Val, ...) -> std::false_type;
typedef decltype(_Fn(std::declval<_Container>(),0)) type;
};
} // namespace details
/// <summary>
/// An array_view is an N-dimensional view over data held in another container (such as array&lt;T,N&gt;
/// or other container. It exposes an indexing interface congruent to that of array&lt;T,N&gt;).
/// </summary>
/// <param name="_Rank">
/// The number of dimensions of this array_view.
/// </param>
/// <param name="_Value_type">
/// The type of the element.
/// </param>
template <typename _Value_type, int _Rank = 1> class array_view : public _Array_view_base<_Rank, sizeof(_Value_type)/sizeof(int)>
{
typedef _Array_view_base<_Rank, sizeof(_Value_type)/sizeof(int)> _Base;
_CPP_AMP_VERIFY_RANK(_Rank, array_view);
static_assert(0 == (sizeof(_Value_type) % sizeof(int)), "only value types whose size is a multiple of the size of an integer are allowed in array views");
friend class details::_Array_view_projection_helper<_Value_type,_Rank>;
friend class details::_Array_view_projection_helper<_Value_type,_Rank+1>;
friend class array_view<_Value_type, _Rank>;
friend class array_view<const _Value_type, _Rank>;
friend class array_view<_Value_type, _Rank+1>;
friend class array_view<const _Value_type, _Rank+1>;
template <typename _T, int _R>
friend class array;
friend const _Buffer_descriptor& details::_Get_buffer_descriptor<array_view<_Value_type, _Rank>>(const array_view<_Value_type, _Rank>& _Array) __GPU;
public:
static const int rank = _Rank;
typedef typename _Value_type value_type;
/// <summary>
/// Destroys this array_view and reclaims resources.
/// </summary>
~array_view() __GPU {}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src array. The extent of the
/// array_view is that of the _Src array, and the origin of the array view is at zero.
/// </summary>
/// <param name="_Src">
/// An array which contains the data that this array_view is bound to.
/// </param>
array_view(array<_Value_type,_Rank>& _Src) __GPU
: _Base(_Get_buffer_descriptor(_Src), _Src.extent)
{
_Initialize();
}
/// <summary>
/// Copy constructor. Shallow copy.
/// </summary>
array_view(const array_view& _Other) __GPU
: _Base(_Other)
{
_Initialize();
}
/// <summary>
/// Construct an array_view which is not bound to a data source.
/// </summary>
/// <param name="_Extent">
/// The extent of this array view.
/// </param>
explicit array_view(const Concurrency::extent<_Rank>& _Extent) __CPU_ONLY
:_Base(_Extent)
{
_Initialize(_Extent.size(), true);
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
/// </summary>
/// <param name="_Extent">
/// The extent of this array view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> array_view(const Concurrency::extent<_Rank>& _Extent, _Container& _Src) __CPU_ONLY
:_Base(_Src.data(),_Extent)
{
static_assert( std::is_same<decltype(_Src.data()), _Value_type*>::value, "container element type and array view element type must match");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_Extent">
/// The extent of this array view.
/// </param>
/// <param name="_Src">
/// A pointer to the source data this array_view will bind to. If the number of elements pointed to
/// by _Src is less than the size of _Extent, undefined behavior results.
/// </param>
array_view(const Concurrency::extent<_Rank>& _Extent, _Value_type * _Src) __GPU
:_Base(_Src,_Extent)
{
_Initialize();
}
/// <summary>
/// Construct an array_view which is not bound to a data source.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array_view.
/// </param>
explicit array_view(int _E0) __CPU_ONLY
:_Base(Concurrency::extent<1>(_E0))
{
static_assert(_Rank == 1, "rank must be 1");
_Initialize(get_extent().size(), true);
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
//// The length of the array_view is the same as the length of the container
/// </summary>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> explicit array_view(_Container& _Src, typename std::enable_if<details::_Is_container<_Container>::type::value,void **>::type = 0) __CPU_ONLY
:_Base(_Src.data(), Concurrency::extent<1>(static_cast<int>(_Src.size())))
{
if (_Src.size() > INT_MAX) {
throw runtime_exception("Invalid _Src container argument - _Src size is greater than INT_MAX", E_INVALIDARG);
}
static_assert( std::is_same<decltype(_Src.data()), _Value_type*>::value, "container element type and array view element type must match");
static_assert(_Rank == 1, "rank must be 1");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> explicit array_view(int _E0, _Container& _Src) __CPU_ONLY
:_Base(_Src.data(), Concurrency::extent<1>(_E0))
{
static_assert( std::is_same<decltype(_Src.data()), _Value_type*>::value, "container element type and array view element type must match");
static_assert(_Rank == 1, "rank must be 1");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is not bound to a data source.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
explicit array_view(int _E0, int _E1) __CPU_ONLY
:_Base(Concurrency::extent<2>(_E0,_E1))
{
static_assert(_Rank == 2, "rank must be 2");
_Initialize(get_extent().size(), true);
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> explicit array_view(int _E0, int _E1, _Container& _Src) __CPU_ONLY
:_Base(_Src.data(), Concurrency::extent<2>(_E0,_E1))
{
static_assert( std::is_same<decltype(_Src.data()), _Value_type*>::value, "container element type and array view element type must match");
static_assert(_Rank == 2, "rank must be 2");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is not bound to a data source.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array_view.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
explicit array_view(int _E0, int _E1, int _E2) __CPU_ONLY
:_Base(Concurrency::extent<3>(_E0,_E1,_E2))
{
static_assert(_Rank == 3, "rank must be 3");
_Initialize(get_extent().size(), true);
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array_view.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> explicit array_view(int _E0, int _E1, int _E2, _Container& _Src) __CPU_ONLY
:_Base(_Src.data(), Concurrency::extent<3>(_E0,_E1,_E2))
{
static_assert( std::is_same<decltype(_Src.data()), _Value_type*>::value, "container element type and array view element type must match");
static_assert(_Rank == 3, "rank must be 3");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to. If the number of elements pointed to
/// by _Src is less than _E0, undefined behavior results.
/// </param>
explicit array_view(int _E0, _In_ _Value_type * _Src) __GPU
:_Base(_Src, Concurrency::extent<1>(_E0))
{
static_assert(_Rank == 1, "rank must be 1");
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the array _Src.
/// </summary>
/// <param name="_Src">
/// An array which contains the data that this array_view is bound to.
/// </param>
template <typename _Arr_type, int _Size> explicit array_view(_In_ _Arr_type (&_Src) [_Size]) __GPU
:_Base(_Src, Concurrency::extent<1>(_Size))
{
static_assert( std::is_same<typename std::remove_reference<decltype(*_Src)>::type, _Value_type>::value, "container element type and array view element type must match");
static_assert(_Rank == 1, "rank must be 1");
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to. If the number of elements pointed to
/// by _Src is less than _E0*_E1, undefined behavior results.
/// </param>
explicit array_view(int _E0, int _E1, _In_ _Value_type * _Src) __GPU
:_Base(_Src, Concurrency::extent<2>(_E0,_E1))
{
static_assert(_Rank == 2, "rank must be 2");
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array_view.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to. If the number of elements pointed to
/// by _Src is less than _E0*_E1*_E2, undefined behavior results.
/// </param>
explicit array_view(int _E0, int _E1, int _E2, _In_ _Value_type * _Src) __GPU
:_Base(_Src, Concurrency::extent<3>(_E0,_E1,_E2))
{
static_assert(_Rank == 3, "rank must be 3");
_Initialize();
}
/// <summary>
/// Copy Assignment operator. Shallow copy.
/// </summary>
array_view& operator=(const array_view& _Other) __GPU
{
_Base::operator=(_Other);
return *this;
}
/// <summary>
/// Copies elements from this array_view to the destination array.
/// </summary>
void copy_to(array<_Value_type,_Rank>& _Dest) const __CPU_ONLY
{
copy(*this,_Dest);
}
/// <summary>
/// Copies elements from this array_view to the destination array_view.
/// </summary>
void copy_to(const array_view<_Value_type,_Rank>& _Dest) const __CPU_ONLY
{
copy(*this,_Dest);
}
/// <summary>
/// Projects the most-significant dimension of this array_view. If the array_view rank is 1, this
/// produces a single element; otherwise it produces an array_view with one fewer dimensions.
/// </summary>
/// <param name="_I">
/// The most-significant index component
/// </param>
/// <returns>
/// The element at index component _I, or an array_view projected on the most-significant dimension.
/// </returns>
typename details::_Projection_result_type<_Value_type,_Rank>::_Result_type operator[] (int _I) const __GPU
{
return details::_Array_view_projection_helper<_Value_type,_Rank>::_Project0(this, _I);
}
/// <summary>
/// Get a reference to the element indexed by _Index. Unlike the other indexing operators for accessing the
/// array_view on the CPU, this method does not implicitly synchronize this array_view's contents to the CPU.
/// After accessing the array_view on a remote location or performing a copy operation involving this array_view
/// users are responsible to explicitly synchronize the array_view to the CPU before calling this method.
/// Failure to do so results in undefined behavior.
/// </summary>
/// <param name="_Index">
/// The index.
/// </param>
/// <returns>
/// Reference to the element indexed by _Index
/// </returns>
value_type& get_ref(const index<_Rank>& _Index) const __GPU
{
void *_Ptr = _Access(_Index);
return *reinterpret_cast<value_type*>(_Ptr);
}
/// <summary>
/// Get the element value indexed by _I
/// </summary>
/// <param name="_I">
/// The index.
/// </param>
/// <returns>
/// The element value indexed by _I
/// </returns>
value_type& operator[] (const index<_Rank>& _Index) const __GPU
{
return this->operator()(_Index);
}
/// <summary>
/// Get the element value indexed by _I
/// </summary>
/// <param name="_I">
/// The index.
/// </param>
/// <returns>
/// The element value indexed by _I
/// </returns>
value_type& operator() (const index<_Rank>& _Index) const __GPU
{
void * _Ptr = _Access(_Read_write_access, _Index);
return *reinterpret_cast<value_type*>(_Ptr);
}
/// <summary>
/// Projects the most-significant dimension of this array_view. If the array_view rank is 1, this
/// produces a single element; otherwise it produces an array_view with one fewer dimensions.
/// </summary>
/// <param name="_I">
/// The most-significant index component
/// </param>
/// <returns>
/// The element at index component _I, or an array_view projected on the most-significant dimension.
/// </returns>
typename details::_Projection_result_type<_Value_type,_Rank>::_Result_type operator() (int _I) const __GPU
{
return details::_Array_view_projection_helper<_Value_type,_Rank>::_Project0(this, _I);
}
/// <summary>
/// Get the element value indexed by (_I0,_I1)
/// </summary>
/// <param name="_I0">
/// The most-significant component of the index
/// </param>
/// <param name="_I1">
/// The least-significant component of the index
/// </param>
/// <returns>
/// The element value indexed by (_I0,_I1)
/// </returns>
value_type& operator() (int _I0, int _I1) const __GPU
{
static_assert(_Rank == 2, "value_type& array_view::operator()(int,int) is only permissible on array_view<T, 2>");
return this->operator()(index<2>(_I0,_I1));
}
/// <summary>
/// Get the element value indexed by (_I0,_I1,_I2)
/// </summary>
/// <param name="_I0">
/// The most-significant component of the index
/// </param>
/// <param name="_I1">
/// The next-to-most-significant component of the index
/// </param>
/// <param name="_I2">
/// The least-significant component of the index
/// </param>
/// <returns>
/// The element value indexed by (_I0,_I1,_I2)
/// </returns>
value_type& operator() (int _I0, int _I1, int _I2) const __GPU
{
static_assert(_Rank == 3, "value_type& array_view::operator()(int,int,int) is only permissible on array_view<T, 3>");
return this->operator()(index<3>(_I0,_I1,_I2));
}
/// <summary>
/// Produces a subsection of the source array_view at the given origin and extent.
/// </summary>
/// <param name="_Section_origin">
/// The origin of the section.
/// </param>
/// <param name="_Section_extent">
/// The extent of the section
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) const __GPU
{
return _Convert<_Value_type>(_Section(_Section_origin, _Section_extent));
}
/// <summary>
/// Produces a subsection of the source array_view with origin specified by an index, with
/// an extent of (this-&gt;exent - _Idx).
/// </summary>
/// <param name="_Idx">
/// The index that specifies the origin of this section.
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(const Concurrency::index<_Rank>& _Idx) const __GPU
{
return section(_Idx, this->extent - _Idx);
}
/// <summary>
/// Produces a subsection of the source array_view with origin of zero, with
/// an extent of _Ext.
/// </summary>
/// <param name="_Ext">
/// The extent of this section
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(const Concurrency::extent<_Rank>& _Ext) const __GPU
{
return section(Concurrency::index<_Rank>(), _Ext);
}
/// <summary>
/// Produces a one-dimensional subsection of the source array_view with origin specified by the index
/// components _I0, with extent _E0.
/// </summary>
/// <param name="_I0">
/// The origin of this section.
/// </param>
/// <param name="_E0">
/// The extent of this section.
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(int _I0, int _E0) const __GPU
{
static_assert(_Rank == 1, "rank must be 1");
return section(Concurrency::index<1>(_I0), Concurrency::extent<1>(_E0));
}
/// <summary>
/// Produces a two-dimensional subsection of the source array_view with origin specified by the index
/// components (_I0,_I1), with extent (_E0,_E1).
/// </summary>
/// <param name="_I0">
/// The most-significant component of the origin of this section.
/// </param>
/// <param name="_I1">
/// The least-significant component of the origin of this section.
/// </param>
/// <param name="_E0">
/// The most-significant component of the extent of this section.
/// </param>
/// <param name="_E1">
/// The least-significant component of the extent of this section.
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(int _I0, int _I1, int _E0, int _E1) const __GPU
{
static_assert(_Rank == 2, "rank must be 2");
return section(Concurrency::index<2>(_I0,_I1), Concurrency::extent<2>(_E0,_E1));
}
/// <summary>
/// Produces a three-dimensional subsection of the source array_view with origin specified by the index
/// components (_I0,_I1,_I2), with extent (_E0,_E1,_E2).
/// </summary>
/// <param name="_I0">
/// The most-significant component of the origin of this section.
/// </param>
/// <param name="_I1">
/// The next-to-most-significant component of the origin of this section.
/// </param>
/// <param name="_I2">
/// The least-significant component of the origin of this section.
/// </param>
/// <param name="_E0">
/// The most-significant component of the extent of this section.
/// </param>
/// <param name="_E1">
/// The next-to-most-significant component of the extent of this section.
/// </param>
/// <param name="_E2">
/// The least-significant component of the extent of this section.
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(int _I0, int _I1, int _I2, int _E0, int _E1, int _E2) const __GPU
{
static_assert(_Rank == 3, "rank must be 3");
return section(Concurrency::index<3>(_I0,_I1,_I2), Concurrency::extent<3>(_E0,_E1,_E2));
}
/// <summary>
/// Produces a (possibly unsafe) reinterpretation of this array_view that is linear and with
/// a different element type. The size of _Value_type2 must evenly divide into the size of
/// this array.
/// </summary>
/// <returns>
/// A linear array_view with a reinterpreted element type.
/// </returns>
template <typename _Value_type2> array_view<_Value_type2, _Rank> reinterpret_as() const __GPU
{
return _Convert<_Value_type2>(this->template _Reinterpret_as<sizeof(_Value_type2)/sizeof(int)>());
}
/// <summary>
/// Produces an array_view of a different rank over this array_view's data.
/// </summary>
/// <param name="_View_extent">
/// The reshaping extent.
/// </param>
/// <returns>
/// A reshaped array_view.
/// </returns>
template <int _New_rank> array_view<_Value_type,_New_rank> view_as(const Concurrency::extent<_New_rank>& _View_extent) const __GPU
{
return _Convert<_Value_type>(_View_as(_View_extent));
}
/// <summary>
/// Returns a pointer to the raw data of this array_view.
/// </summary>
_Ret_ _Value_type* data() const __GPU
{
static_assert(_Rank == 1, "array_view::data() is only permissible on array_view<T, 1>");
return &this->operator[](index<_Rank>());
}
/// <summary>
/// Informs the array_view that its bound memory has been modified outside
/// the array_view interface. This will render all cached information stale.
/// </summary>
void refresh() const __CPU_ONLY
{
// If the array_view corresponds to a ubiquitous buffer with no data source,
// then refresh is a no-op
if (!_M_buffer_descriptor._Get_buffer_ptr()->_Has_data_source()) {
return;
}
_Buffer_ptr _PBuf;
_Get_access_async(_M_buffer_descriptor._Get_view_key(), _M_buffer_descriptor._Get_buffer_ptr()->_Get_master_accelerator_view(), _Write_access, _PBuf)._Get();
}
/// <summary>
/// Asynchronously synchronizes any modifications made to "this" array_view to the specified accelerator_view.
/// </summary>
/// <param name="_Accl_view">
/// The target accelerator_view to synchronize to.
/// </param>
/// <param name="_Access_type">
/// The desired access_type on the target accelerator_view.
/// This parameter has a default value of access_type_read.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
concurrency::completion_future synchronize_to_async(const accelerator_view& _Accl_view, access_type _Access_type = access_type_read) const __CPU_ONLY
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_array_view_synchronize_event_helper(_M_buffer_descriptor);
_Buffer_ptr _PBuf;
_Event _Ev;
if (_Access_type != access_type_none) {
_Ev = _Get_access_async(_M_buffer_descriptor._Get_view_key(), _Accl_view, _Get_synchronize_access_mode(_Access_type), _PBuf);
}
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Asynchronously synchronizes any modifications made to "this" array_view to its source data.
/// </summary>
/// <param name="_Access_type">
/// The desired access_type on the target accelerator_view.
/// This parameter has a default value of access_type_read.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
concurrency::completion_future synchronize_async(access_type _Access_type = access_type_read) const __CPU_ONLY
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_array_view_synchronize_event_helper(_M_buffer_descriptor);
_Buffer_ptr _PBuf;
_Event _Ev;
// If the array_view corresponds to a ubiquitous buffer with no data source, then synchronize is a no-op
if ((_Access_type != access_type_none) && _M_buffer_descriptor._Get_buffer_ptr()->_Has_data_source())
{
_Ev = _Get_access_async(_M_buffer_descriptor._Get_view_key(),
_M_buffer_descriptor._Get_buffer_ptr()->_Get_master_accelerator_view(),
_Get_synchronize_access_mode(_Access_type),
_PBuf);
}
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Synchronizes any modifications made to "this" array_view to the specified accelerator_view.
/// </summary>
/// <param name="_Accl_view">
/// The target accelerator_view to synchronize to.
/// </param>
/// <param name="_Access_type">
/// The desired access_type on the target accelerator_view.
/// This parameter has a default value of access_type_read.
/// </param>
void synchronize_to(const accelerator_view& _Accl_view, access_type _Access_type = access_type_read) const __CPU_ONLY
{
auto _Span_id = details::_Get_amp_trace()->_Start_array_view_synchronize_event_helper(_M_buffer_descriptor);
_Buffer_ptr _PBuf;
if (_Access_type != access_type_none) {
_Get_access_async(_M_buffer_descriptor._Get_view_key(), _Accl_view, _Get_synchronize_access_mode(_Access_type), _PBuf)._Get();
}
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Synchronizes any modifications made to "this" array_view to its source data.
/// </summary>
/// <param name="_Access_type">
/// The desired access_type on the target accelerator_view.
/// This parameter has a default value of access_type_read.
/// </param>
void synchronize(access_type _Access_type = access_type_read) const __CPU_ONLY
{
auto _Span_id = details::_Get_amp_trace()->_Start_array_view_synchronize_event_helper(_M_buffer_descriptor);
_Buffer_ptr _PBuf;
// If the array_view corresponds to a ubiquitous buffer with no data source, then synchronize is a no-op
if ((_Access_type != access_type_none) && _M_buffer_descriptor._Get_buffer_ptr()->_Has_data_source())
{
_Get_access_async(_M_buffer_descriptor._Get_view_key(),
_M_buffer_descriptor._Get_buffer_ptr()->_Get_master_accelerator_view(),
_Get_synchronize_access_mode(_Access_type),
_PBuf)._Get();
}
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Discards the current data underlying this view. This is an optimization
/// hint to the runtime used to avoid copying the current contents of the view to a target
/// accelerator_view that it is accessed on, and its use is recommended if the existing
/// content is not needed. This method is only available in a restrict(cpu) context and
/// cannot be used in a restrict(amp) context.
/// </summary>
void discard_data() const __CPU_ONLY
{
_M_buffer_descriptor._Get_buffer_ptr()->_Discard(_M_buffer_descriptor._Get_view_key());
}
/// <summary>
/// Returns the accelerator_view where the data source of the array_view is located.
/// If the array_view does not have a data source, this API throws a runtime_exception
/// </summary>
accelerator_view get_source_accelerator_view() const
{
if (_M_buffer_descriptor._Get_buffer_ptr()->_Has_data_source()) {
return _M_buffer_descriptor._Get_buffer_ptr()->_Get_master_accelerator_view();
}
else {
throw runtime_exception("Cannot query source accelerator_view for an array_view without a data source.", E_INVALIDARG);
}
}
__declspec(property(get=get_source_accelerator_view)) accelerator_view source_accelerator_view;
private:
template <typename _T, int _R>
static array_view<_T,_R> _Convert(const _Array_view_base<_R,sizeof(_T)/sizeof(int)>& _Other) __GPU
{
static_assert(sizeof(array_view<_T,_R>) == sizeof(_Array_view_base<_R,sizeof(_T)/sizeof(int)>), "ASSERT FAILURE: implementation relies on binary conversion between the two");
return (*reinterpret_cast<const array_view<_T,_R>*>(&_Other));
}
void _Project0(int _I, array_view<_Value_type, _Rank-1> &_Projected_view) const __GPU
{
_Base::_Project0(_I, _Projected_view);
_Projected_view._Initialize();
}
array_view() __GPU {}
array_view(const array_view& _Other, const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) __GPU
:_Base(_Other, _Section_origin, _Section_extent)
{
_Initialize();
}
array_view(_Buffer_descriptor& _Src_buffer, const Concurrency::extent<_Rank>& _Extent) __GPU
:_Base(_Src_buffer,_Extent)
{
_Initialize();
}
void _Initialize() __GPU
{
// Set the type access mode
_M_buffer_descriptor._M_type_access_mode = _Read_write_access;
}
void _Initialize(size_t _Src_data_size, bool _Discard_data = false) __CPU_ONLY
{
// Ensure that the _Src_data_size is at least as big as the size
// of the array_view
if (_Src_data_size < this->extent.size()) {
throw runtime_exception("Invalid _Src container argument - _Src size is less than the size of the array_view.", E_INVALIDARG);
}
_Initialize();
if (_Discard_data) {
discard_data();
}
}
}; // class array_view<T,R>
// array_view<const T,R>
template <typename _Value_type, int _Rank>
class array_view<const _Value_type, _Rank> : public _Array_view_base<_Rank, sizeof(_Value_type)/sizeof(int)>
{
_CPP_AMP_VERIFY_RANK(_Rank, array_view);
static_assert(0 == (sizeof(_Value_type) % sizeof(int)), "only value types whose size is a multiple of the size of an integer are allowed in array views");
typedef _Array_view_base<_Rank, sizeof(_Value_type)/sizeof(int)> _Base;
friend class details::_Const_array_view_projection_helper<_Value_type,_Rank>;
friend class details::_Const_array_view_projection_helper<_Value_type,_Rank+1>;
friend class array_view<_Value_type, _Rank>;
friend class array_view<const _Value_type, _Rank>;
friend class array_view<_Value_type, _Rank+1>;
friend class array_view<const _Value_type, _Rank+1>;
friend const _Buffer_descriptor& details::_Get_buffer_descriptor<array_view<const _Value_type, _Rank>>(const array_view<const _Value_type, _Rank>& _Array) __GPU;
public:
static const int rank = _Rank;
typedef typename const _Value_type value_type;
/// <summary>
/// Destroys this array_view and reclaims resources.
/// </summary>
~array_view() __GPU {}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src array. The extent of the
/// array_view is that of the _Src array, and the origin of the array view is at zero.
/// </summary>
/// <param name="_Src">
/// An array which contains the data that this array_view is bound to.
/// </param>
array_view(const array<_Value_type,_Rank>& _Src) __GPU
:_Base(_Get_buffer_descriptor(_Src), _Src.extent)
{
_Initialize();
}
/// <summary>
/// Copy constructor. Shallow copy.
/// </summary>
array_view(const array_view<_Value_type,_Rank>& _Src) __GPU
:_Base(_Src)
{
_Initialize();
}
/// <summary>
/// Copy constructor. Shallow copy.
/// </summary>
array_view(const array_view<const _Value_type,_Rank>& _Src) __GPU
:_Base(_Src)
{
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
/// </summary>
/// <param name="_Extent">
/// The extent of this array view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> array_view(const Concurrency::extent<_Rank>& _Extent, const _Container& _Src) __CPU_ONLY
:_Base(_Src.data(),_Extent)
{
static_assert( std::is_same<typename std::remove_const<typename std::remove_reference<decltype(*_Src.data())>::type>::type, _Value_type>::value, "container element type and array view element type must match");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container;
//// The length of the array_view is the same as the length of the container
/// </summary>
/// <param name="_Extent">
/// The extent of this array view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> explicit array_view(const _Container& _Src, typename std::enable_if<details::_Is_container<_Container>::type::value,void **>::type = 0) __CPU_ONLY
:_Base(_Src.data(), Concurrency::extent<1>(static_cast<int>(_Src.size())))
{
if (_Src.size() > INT_MAX) {
throw runtime_exception("Invalid _Src container argument - _Src size is greater than INT_MAX", E_INVALIDARG);
}
static_assert( std::is_same<decltype(_Src.data()), const _Value_type*>::value, "container element type and array view element type must match");
static_assert(_Rank == 1, "rank must be 1");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
/// </summary>
/// <param name="_Extent">
/// The extent of this array view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> array_view(const Concurrency::extent<_Rank>& _Extent, _Container& _Src) __CPU_ONLY
:_Base(_Src.data(),_Extent)
{
static_assert( std::is_same<typename std::remove_const<typename std::remove_reference<decltype(*_Src.data())>::type>::type, _Value_type>::value, "container element type and array view element type must match");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_Extent">
/// The extent of this array view.
/// </param>
/// <param name="_Src">
/// A pointer to the source data this array_view will bind to. If the number of elements pointed to
/// by _Src is less than the size of _Extent, undefined behavior results.
/// </param>
array_view(const Concurrency::extent<_Rank>& _Extent, const _Value_type * _Src) __GPU
:_Base(_Src,_Extent)
{
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_Extent">
/// The extent of this array view.
/// </param>
/// <param name="_Src">
/// A pointer to the source data this array_view will bind to. If the number of elements pointed to
/// by _Src is less than the size of _Extent, undefined behavior results.
/// </param>
array_view(const Concurrency::extent<_Rank>& _Extent, _In_ _Value_type * _Src) __GPU
:_Base(_Src,_Extent)
{
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> array_view(int _E0, const _Container& _Src) __CPU_ONLY
:_Base(_Src.data(), Concurrency::extent<1>(_E0))
{
static_assert( std::is_same<typename std::remove_const<typename std::remove_reference<decltype(*_Src.data())>::type>::type, _Value_type>::value, "container element type and array view element type must match");
static_assert(_Rank == 1, "rank must be 1");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container;
//// The length of the array_view is the same as the length of the container
/// </summary>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Arr_type, int _Size> explicit array_view(const _In_ _Arr_type (&_Src) [_Size]) __GPU
:_Base(_Src, Concurrency::extent<1>(_Size))
{
static_assert( std::is_same<typename std::remove_const<typename std::remove_reference<decltype(*_Src)>::type>::type, _Value_type>::value, "container element type and array view element type must match");
static_assert(_Rank == 1, "rank must be 1");
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> array_view(int _E0, int _E1, const _Container& _Src) __CPU_ONLY
:_Base(_Src.data(), Concurrency::extent<2>(_E0,_E1))
{
static_assert( std::is_same<typename std::remove_const<typename std::remove_reference<decltype(*_Src.data())>::type>::type, _Value_type>::value, "container element type and array view element type must match");
static_assert(_Rank == 2, "rank must be 2");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is bound to the data contained in the _Src container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array_view.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to.
/// </param>
template <typename _Container> array_view(int _E0, int _E1, int _E2, const _Container& _Src) __CPU_ONLY
:_Base(_Src.data(), Concurrency::extent<3>(_E0,_E1,_E2))
{
static_assert( std::is_same<typename std::remove_const<typename std::remove_reference<decltype(*_Src.data())>::type>::type, _Value_type>::value, "container element type and array view element type must match");
static_assert(_Rank == 3, "rank must be 3");
_Initialize(_Src.size());
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to. If the number of elements pointed to
/// by _Src is less than _E0, undefined behavior results.
/// </param>
array_view(int _E0, const _Value_type * _Src) __GPU
:_Base(_Src, Concurrency::extent<1>(_E0))
{
static_assert(_Rank == 1, "rank must be 1");
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to. If the number of elements pointed to
/// by _Src is less than _E0*_E1, undefined behavior results.
/// </param>
array_view(int _E0, int _E1, const _Value_type * _Src) __GPU
:_Base(_Src, Concurrency::extent<2>(_E0,_E1))
{
static_assert(_Rank == 2, "rank must be 2");
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array_view.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to. If the number of elements pointed to
/// by _Src is less than _E0*_E1*_E2, undefined behavior results.
/// </param>
array_view(int _E0, int _E1, int _E2, const _Value_type * _Src) __GPU
:_Base(_Src, Concurrency::extent<3>(_E0,_E1,_E2))
{
static_assert(_Rank == 3, "rank must be 3");
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to. If the number of elements pointed to
/// by _Src is less than _E0, undefined behavior results.
/// </param>
array_view(int _E0, _In_ _Value_type * _Src) __GPU
:_Base(_Src, Concurrency::extent<1>(_E0))
{
static_assert(_Rank == 1, "rank must be 1");
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to. If the number of elements pointed to
/// by _Src is less than _E0*_E1, undefined behavior results.
/// </param>
array_view(int _E0, int _E1, _In_ _Value_type * _Src) __GPU
:_Base(_Src, Concurrency::extent<2>(_E0,_E1))
{
static_assert(_Rank == 2, "rank must be 2");
_Initialize();
}
/// <summary>
/// Construct an array_view which is bound to the data pointed to by _Src.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array_view.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array_view.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array_view.
/// </param>
/// <param name="_Src">
/// A container which contains the data that this array_view is bound to. If the number of elements pointed to
/// by _Src is less than _E0*_E1*_E2, undefined behavior results.
/// </param>
array_view(int _E0, int _E1, int _E2, _In_ _Value_type * _Src) __GPU
:_Base(_Src, Concurrency::extent<3>(_E0,_E1,_E2))
{
static_assert(_Rank == 3, "rank must be 3");
_Initialize();
}
/// <summary>
/// Copy Assignment operator. Shallow copy.
/// </summary>
array_view& operator=(const array_view& _Other) __GPU
{
_Base::operator=(_Other);
return *this;
}
/// <summary>
/// Copy Assignment operator. Shallow copy.
/// </summary>
array_view& operator=(const array_view<_Value_type, _Rank>& _Other) __GPU
{
_Base::operator=(_Other);
return *this;
}
/// <summary>
/// Copies elements from this array_view to the destination array.
/// </summary>
void copy_to(array<_Value_type,_Rank>& _Dest) const __CPU_ONLY
{
copy(*this,_Dest);
}
/// <summary>
/// Copies elements from this array_view to the destination array_view.
/// </summary>
void copy_to(const array_view<_Value_type,_Rank>& _Dest) const __CPU_ONLY
{
copy(*this,_Dest);
}
/// <summary>
/// Projects the most-significant dimension of this array_view. If the array_view rank is 1, this
/// produces a single element; otherwise it produces an array_view with one fewer dimensions.
/// </summary>
/// <param name="_I">
/// The most-significant index component
/// </param>
/// <returns>
/// The element at index component _I, or an array_view projected on the most-significant dimension.
/// </returns>
typename details::_Projection_result_type<_Value_type,_Rank>::_Const_result_type operator[] (int _I) const __GPU
{
return details::_Const_array_view_projection_helper<_Value_type,_Rank>::_Project0(this, _I);
}
/// <summary>
/// Get a reference to the element indexed by _Index. Unlike the other indexing operators for accessing the
/// array_view on the CPU, this method does not implicitly synchronize this array_view's contents to the CPU.
/// After accessing the array_view on a remote location or performing a copy operation involving this array_view
/// users are responsible to explicitly synchronize the array_view to the CPU before calling this method.
/// Failure to do so results in undefined behavior.
/// </summary>
/// <param name="_Index">
/// The index.
/// </param>
/// <returns>
/// Reference to the element indexed by _Index
/// </returns>
value_type& get_ref(const index<_Rank>& _Index) const __GPU
{
void *_Ptr = _Access(_Index);
return *reinterpret_cast<value_type*>(_Ptr);
}
/// <summary>
/// Get the element value indexed by _I
/// </summary>
/// <param name="_I">
/// The index.
/// </param>
/// <returns>
/// The element value indexed by _I
/// </returns>
value_type& operator[] (const index<_Rank>& _Index) const __GPU
{
return this->operator()(_Index);
}
/// <summary>
/// Get the element value indexed by _I
/// </summary>
/// <param name="_I">
/// The index.
/// </param>
/// <returns>
/// The element value indexed by _I
/// </returns>
value_type& operator() (const index<_Rank>& _Index) const __GPU
{
void * _Ptr = _Access(_Read_access, _Index);
return *reinterpret_cast<value_type*>(_Ptr);
}
/// <summary>
/// Projects the most-significant dimension of this array_view. If the array_view rank is 1, this
/// produces a single element; otherwise it produces an array_view with one fewer dimensions.
/// </summary>
/// <param name="_I">
/// The most-significant index component
/// </param>
/// <returns>
/// The element at index component _I, or an array_view projected on the most-significant dimension.
/// </returns>
typename details::_Projection_result_type<_Value_type,_Rank>::_Const_result_type operator() (int _I) const __GPU
{
return details::_Const_array_view_projection_helper<_Value_type,_Rank>::_Project0(this, _I);
}
/// <summary>
/// Get the element value indexed by (_I0,_I1)
/// </summary>
/// <param name="_I0">
/// The most-significant component of the index
/// </param>
/// <param name="_I1">
/// The least-significant component of the index
/// </param>
/// <returns>
/// The element value indexed by (_I0,_I1)
/// </returns>
value_type& operator() (int _I0, int _I1) const __GPU
{
static_assert(_Rank == 2, "value_type& array_view::operator()(int,int) is only permissible on array_view<T, 2>");
return this->operator()(index<2>(_I0,_I1));
}
/// <summary>
/// Get the element value indexed by (_I0,_I1,_I2)
/// </summary>
/// <param name="_I0">
/// The most-significant component of the index
/// </param>
/// <param name="_I1">
/// The next-to-most-significant component of the index
/// </param>
/// <param name="_I2">
/// The least-significant component of the index
/// </param>
/// <returns>
/// The element value indexed by (_I0,_I1,_I2)
/// </returns>
value_type& operator() (int _I0, int _I1, int _I2) const __GPU
{
static_assert(_Rank == 3, "value_type& array_view::operator()(int,int,int) is only permissible on array_view<T, 3>");
return this->operator()(index<3>(_I0,_I1,_I2));
}
/// <summary>
/// Produces a subsection of the source array_view at the given origin and extent.
/// </summary>
/// <param name="_Section_origin">
/// The origin of the section.
/// </param>
/// <param name="_Section_extent">
/// The extent of the section
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) const __GPU
{
return _Convert<_Value_type>(_Section(_Section_origin, _Section_extent));
}
/// <summary>
/// Produces a subsection of the source array_view with origin of zero, with
/// an extent of _Ext.
/// </summary>
/// <param name="_Ext">
/// The extent of this section
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(const Concurrency::extent<_Rank>& _Ext) const __GPU
{
return section(Concurrency::index<_Rank>(), _Ext);
}
/// <summary>
/// Produces a subsection of the source array_view with origin specified by an index, with
/// an extent of (this-&gt;exent - _Idx).
/// </summary>
/// <param name="_Idx">
/// The index that specifies the origin of this section.
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(const Concurrency::index<_Rank>& _Idx) const __GPU
{
return section(_Idx, this->extent - _Idx);
}
/// <summary>
/// Produces a one-dimensional subsection of the source array_view with origin specified by the index
/// components _I0, with extent _E0.
/// </summary>
/// <param name="_I0">
/// The origin of this section.
/// </param>
/// <param name="_E0">
/// The extent of this section.
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(int _I0, int _E0) const __GPU
{
static_assert(_Rank == 1, "rank must be 1");
return section(Concurrency::index<1>(_I0), Concurrency::extent<1>(_E0));
}
/// <summary>
/// Produces a two-dimensional subsection of the source array_view with origin specified by the index
/// components (_I0,_I1), with extent (_E0,_E1).
/// </summary>
/// <param name="_I0">
/// The most-significant component of the origin of this section.
/// </param>
/// <param name="_I1">
/// The least-significant component of the origin of this section.
/// </param>
/// <param name="_E0">
/// The most-significant component of the extent of this section.
/// </param>
/// <param name="_E1">
/// The least-significant component of the extent of this section.
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(int _I0, int _I1, int _E0, int _E1) const __GPU
{
static_assert(_Rank == 2, "rank must be 2");
return section(Concurrency::index<2>(_I0,_I1), Concurrency::extent<2>(_E0,_E1));
}
/// <summary>
/// Produces a three-dimensional subsection of the source array_view with origin specified by the index
/// components (_I0,_I1,_I2), with extent (_E0,_E1,_E2).
/// </summary>
/// <param name="_I0">
/// The most-significant component of the origin of this section.
/// </param>
/// <param name="_I1">
/// The next-to-most-significant component of the origin of this section.
/// </param>
/// <param name="_I2">
/// The least-significant component of the origin of this section.
/// </param>
/// <param name="_E0">
/// The most-significant component of the extent of this section.
/// </param>
/// <param name="_E1">
/// The next-to-most-significant component of the extent of this section.
/// </param>
/// <param name="_E2">
/// The least-significant component of the extent of this section.
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view section(int _I0, int _I1, int _I2, int _E0, int _E1, int _E2) const __GPU
{
static_assert(_Rank == 3, "rank must be 3");
return section(Concurrency::index<3>(_I0,_I1,_I2), Concurrency::extent<3>(_E0,_E1,_E2));
}
/// <summary>
/// Produces a (possibly unsafe) reinterpretation of this array_view that is linear and with
/// a different element type. The size of _Value_type2 must evenly divide into the size of
/// this array_view.
/// </summary>
/// <returns>
/// A linear array_view with a reinterpreted element type.
/// </returns>
template <typename _Value_type2> array_view<const _Value_type2, _Rank> reinterpret_as() const __GPU
{
return _Convert<_Value_type2>(this->template _Reinterpret_as<sizeof(_Value_type2)/sizeof(int)>());
}
/// <summary>
/// Produces an array_view of a different rank over this array_view's data.
/// </summary>
/// <param name="_View_extent">
/// The reshaping extent.
/// </param>
/// <returns>
/// A reshaped array_view.
/// </returns>
template <int _New_rank> array_view<const _Value_type,_New_rank> view_as(const Concurrency::extent<_New_rank>& _View_extent) const __GPU
{
return _Convert<_Value_type>(_View_as(_View_extent));
}
/// <summary>
/// Returns a pointer to the raw data of this array_view.
/// </summary>
const _Value_type* data() const __GPU
{
static_assert(_Rank == 1, "array_view::data() is only permissible on array_view<T, 1>");
return &this->operator[](index<_Rank>());
}
/// <summary>
/// Informs the array_view that its bound memory has been modified outside
/// the array_view interface. This will render all cached information stale.
/// </summary>
void refresh() const __CPU_ONLY
{
_Buffer_ptr _PBuf;
_Get_access_async(_M_buffer_descriptor._Get_view_key(), _M_buffer_descriptor._Get_buffer_ptr()->_Get_master_accelerator_view(), _Write_access, _PBuf)._Get();
}
/// <summary>
/// Asynchronously synchronizes any modifications made to "this" array_view to the specified accelerator_view.
/// </summary>
/// <param name="_Accl_view">
/// The target accelerator_view to synchronize to.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
concurrency::completion_future synchronize_to_async(const accelerator_view& _Accl_view) const __CPU_ONLY
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_array_view_synchronize_event_helper(_M_buffer_descriptor);
_Buffer_ptr _PBuf;
_Event _Ev;
_Ev = _Get_access_async(_M_buffer_descriptor._Get_view_key(), _Accl_view, _Read_access, _PBuf);
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Asynchronously synchronizes any modifications made to "this" array_view to its source data.
/// </summary>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
concurrency::completion_future synchronize_async() const __CPU_ONLY
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_array_view_synchronize_event_helper(_M_buffer_descriptor);
_Buffer_ptr _PBuf;
_Event _Ev;
// If the array_view corresponds to a ubiquitous buffer with no data source,
// then synchronize is a no-op
if (_M_buffer_descriptor._Get_buffer_ptr()->_Has_data_source()) {
_Ev = _Get_access_async(_M_buffer_descriptor._Get_view_key(), _M_buffer_descriptor._Get_buffer_ptr()->_Get_master_accelerator_view(), _Read_access, _PBuf);
}
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Synchronizes any modifications made to "this" array_view to the specified accelerator_view.
/// </summary>
/// <param name="_Accl_view">
/// The target accelerator_view to synchronize to.
/// </param>
void synchronize_to(const accelerator_view& _Accl_view) const __CPU_ONLY
{
auto _Span_id = details::_Get_amp_trace()->_Start_array_view_synchronize_event_helper(_M_buffer_descriptor);
_Buffer_ptr _PBuf;
_Get_access_async(_M_buffer_descriptor._Get_view_key(), _Accl_view, _Read_access, _PBuf)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Synchronizes any modifications made to "this" array_view to its source data.
/// </summary>
void synchronize() const __CPU_ONLY
{
auto _Span_id = details::_Get_amp_trace()->_Start_array_view_synchronize_event_helper(_M_buffer_descriptor);
_Buffer_ptr _PBuf;
// If the array_view corresponds to a ubiquitous buffer with no data source,
// then synchronize is a no-op
if (_M_buffer_descriptor._Get_buffer_ptr()->_Has_data_source()) {
_Get_access_async(_M_buffer_descriptor._Get_view_key(), _M_buffer_descriptor._Get_buffer_ptr()->_Get_master_accelerator_view(), _Read_access, _PBuf)._Get();
}
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Returns the accelerator_view where the data source of the array_view is located.
/// If the array_view does not have a data source, this API throws a runtime_exception
/// </summary>
accelerator_view get_source_accelerator_view() const
{
if (_M_buffer_descriptor._Get_buffer_ptr()->_Has_data_source()) {
return _M_buffer_descriptor._Get_buffer_ptr()->_Get_master_accelerator_view();
}
else {
throw runtime_exception("Cannot query source accelerator_view for an array_view without a data source.", E_INVALIDARG);
}
}
__declspec(property(get=get_source_accelerator_view)) accelerator_view source_accelerator_view;
private:
template <typename _T, int _R>
static array_view<const _T,_R> _Convert(const _Array_view_base<_R,sizeof(_T)/sizeof(int)>& _Other) __GPU
{
static_assert(sizeof(array_view<const _T,_R>) == sizeof(_Array_view_base<_R,sizeof(_T)/sizeof(int)>), "ASSERT FAILURE: implementation relies on binary conversion between the two");
return (*reinterpret_cast<const array_view<const _T,_R>*>(&_Other));
}
void _Project0(int _I, array_view<const _Value_type, _Rank-1> &_Projected_view) const __GPU
{
_Base::_Project0(_I, _Projected_view);
_Projected_view._Initialize();
}
array_view() __GPU {}
array_view(const array_view& _Other, const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) __GPU
:
_Base(_Other, _Section_origin, _Section_extent)
{
_Initialize();
}
void _Initialize() __GPU
{
// Set the type access mode
_M_buffer_descriptor._M_type_access_mode = _Read_access;
}
void _Initialize(size_t _Src_data_size) __CPU_ONLY
{
// Ensure that the _Src_data_size is at least as big as the size
// of the array_view
if (_Src_data_size < this->extent.size()) {
throw runtime_exception("Invalid _Src container argument - _Src size is less than the size of the array_view.", E_INVALIDARG);
}
_Initialize();
}
}; // class array_view<const T,R>
// Forward declarations for copy functions
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array<_Value_type,_Rank>& _Src, array<_Value_type,_Rank>& _Dest);
template <typename _Value_type, int _Rank> void copy(const array<_Value_type,_Rank>& _Src, array<_Value_type,_Rank>& _Dest);
template <typename InputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(InputIterator _SrcFirst, InputIterator _SrcLast, array<_Value_type, _Rank> &_Dest);
template <typename InputIterator, typename _Value_type, int _Rank> void copy(InputIterator _SrcFirst, InputIterator _SrcLast, array<_Value_type, _Rank> &_Dest);
template <typename InputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(InputIterator _SrcFirst, array<_Value_type, _Rank> &_Dest);
template <typename InputIterator, typename _Value_type, int _Rank> void copy(InputIterator _SrcFirst, array<_Value_type, _Rank> &_Dest);
template <typename OutputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array<_Value_type, _Rank> &_Src, OutputIterator _DestIter);
template <typename OutputIterator, typename _Value_type, int _Rank> void copy(const array<_Value_type, _Rank> &_Src, OutputIterator _DestIter);
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array<_Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest);
template <typename _Value_type, int _Rank> void copy(const array<_Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest);
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<const _Value_type, _Rank>& _Src, array<_Value_type, _Rank>& _Dest);
template <typename _Value_type, int _Rank> void copy(const array_view<const _Value_type, _Rank>& _Src, array<_Value_type, _Rank>& _Dest);
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<_Value_type, _Rank>& _Src, array<_Value_type, _Rank>& _Dest);
template <typename _Value_type, int _Rank> void copy(const array_view<_Value_type, _Rank>& _Src, array<_Value_type, _Rank>& _Dest);
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<const _Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest);
template <typename _Value_type, int _Rank> void copy(const array_view<const _Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest);
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<_Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest);
template <typename _Value_type, int _Rank> void copy(const array_view<_Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest);
template <typename InputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(InputIterator _SrcFirst, InputIterator _SrcLast, const array_view<_Value_type, _Rank> &_Dest);
template <typename InputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(InputIterator _SrcFirst, const array_view<_Value_type, _Rank> &_Dest);
template <typename InputIterator, typename _Value_type, int _Rank> void copy(InputIterator _SrcFirst, InputIterator _SrcLast, const array_view<_Value_type, _Rank> &_Dest);
template <typename InputIterator, typename _Value_type, int _Rank> void copy(InputIterator _SrcFirst, const array_view<_Value_type, _Rank> &_Dest);
template <typename OutputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<_Value_type, _Rank> &_Src, OutputIterator _DestIter);
template <typename OutputIterator, typename _Value_type, int _Rank> void copy(const array_view<_Value_type, _Rank> &_Src, OutputIterator _DestIter);
namespace direct3d
{
template<typename _Value_type, int _Rank>
array<_Value_type, _Rank> make_array(const Concurrency::extent<_Rank> &_Extent, const Concurrency::accelerator_view &_Av, _In_ IUnknown *_D3D_buffer) __CPU_ONLY;
}
/// <summary>
/// An array is a multi-dimensional data aggregate on a accelerator_view.
/// </summary>
/// <param name="_Rank">
/// The dimensionality of this array.
/// </param>
/// <param name="_Value_type">
/// The type of the elements in the array.
/// </param>
template <typename _Value_type, int _Rank = 1> class array
{
// internal storage abstraction
typedef details::_Buffer_descriptor _Buffer_descriptor;
typedef _Array_flatten_helper<_Rank, typename Concurrency::extent<_Rank>::value_type, typename Concurrency::index<_Rank>::value_type> _Flatten_helper;
_CPP_AMP_VERIFY_RANK(_Rank, array);
static_assert(!std::is_const<_Value_type>::value, "array<const _Value_type> is not supported");
static_assert(0 == (sizeof(_Value_type) % sizeof(int)), "only value types whose size is a multiple of the size of an integer are allowed in array");
// Friends
template<typename _Value_type, int _Rank>
friend array<_Value_type,_Rank> direct3d::make_array(const Concurrency::extent<_Rank> &_Extent, const Concurrency::accelerator_view &_Av, _In_ IUnknown *_D3D_buffer) __CPU_ONLY;
friend const _Buffer_descriptor& details::_Get_buffer_descriptor<array<_Value_type,_Rank>>(const array<_Value_type,_Rank>& _Array) __GPU;
friend _Ret_ _Ubiquitous_buffer* details::_Get_buffer<array<_Value_type,_Rank>>(const array<_Value_type,_Rank>& _Array) __CPU_ONLY;
friend _Event details::_Get_access_async<array<_Value_type,_Rank>>(const array<_Value_type,_Rank>& _Array, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr) __CPU_ONLY;
public:
static const int rank = _Rank;
typedef typename _Value_type value_type;
/// <summary>
/// Construct an array from extents
/// </summary>
/// <param name="_Extent">
/// An extent that describes the shape of the array.
/// </param>
explicit array(const Concurrency::extent<_Rank> & _Extent) __CPU_ONLY
: _M_extent(_Extent)
{
_Initialize(details::_Select_default_accelerator().default_view, access_type_auto);
}
/// <summary>
/// Construct array&lt;T,1&gt; with the extent _E0
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array.
/// </param>
explicit array(int _E0) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0))
{
static_assert(_Rank == 1, "array(int) is only permissible on array<T, 1>");
_Initialize(details::_Select_default_accelerator().default_view, access_type_auto);
}
/// <summary>
/// Construct an array&lt;T,2&gt; from two integer extents.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
explicit array(int _E0, int _E1) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1))
{
static_assert(_Rank == 2, "array(int, int) is only permissible on array<T, 2>");
_Initialize(details::_Select_default_accelerator().default_view, access_type_auto);
}
/// <summary>
/// Construct an array&lt;T,3&gt; from three integer extents.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
explicit array(int _E0, int _E1, int _E2) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1, _E2))
{
static_assert(_Rank == 3, "array(int, int, int) is only permissible on array<T, 3>");
_Initialize(details::_Select_default_accelerator().default_view, access_type_auto);
}
/// <summary>
/// Construct an array from extents, bound to a specific accelerator_view.
/// </summary>
/// <param name="_Extent">
/// An extent that describes the shape of the array.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
array(const Concurrency::extent<_Rank>& _Extent, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(_Extent)
{
_Initialize(_Av, _Cpu_access_type);
}
/// <summary>
/// Construct array&lt;T,1&gt; with the extent _E0, bound to a specific accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
array(int _E0, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0))
{
static_assert(_Rank == 1, "array(int, accelerator_view) is only permissible on array<T, 1>");
_Initialize(_Av, _Cpu_access_type);
}
/// <summary>
/// Construct an array&lt;T,2&gt; from two integer extents, bound to a specific accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
array(int _E0, int _E1, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1))
{
static_assert(_Rank == 2, "array(int, int, accelerator_view) is only permissible on array<T, 2>");
_Initialize(_Av, _Cpu_access_type);
}
/// <summary>
/// Construct an array&lt;T,3&gt; from three integer extents, bound to a specific accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
array(int _E0, int _E1, int _E2, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1, _E2))
{
static_assert(_Rank == 3, "array(int, int, int, accelerator_view) is only permissible on array<T, 3>");
_Initialize(_Av, _Cpu_access_type);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view.
/// </summary>
/// <param name="_Extent">
/// An extent that describes the shape of the array.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
array(const Concurrency::extent<_Rank>& _Extent, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(_Extent)
{
_Initialize(_Av, _Associated_Av);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
array(int _E0, accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0))
{
static_assert(_Rank == 1, "array(int, accelerator_view, accelerator_view) is only permissible on array<T, 1>");
_Initialize(_Av, _Associated_Av);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
array(int _E0, int _E1, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1))
{
static_assert(_Rank == 2, "array(int, int, accelerator_view, accelerator_view) is only permissible on array<T, 2>");
_Initialize(_Av, _Associated_Av);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
array(int _E0, int _E1, int _E2, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1, _E2))
{
static_assert(_Rank == 3, "array(int, int, int, accelerator_view, accelerator_view) is only permissible on array<T, 3>");
_Initialize(_Av, _Associated_Av);
}
/// <summary>
/// Construct an array initialized from a pair of iterators into a container.
/// </summary>
/// <param name="_Extent">
/// An extent that describes the shape of the array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
template <typename _InputIterator> array(const Concurrency::extent<_Rank>& _Extent, _InputIterator _Src_first, _InputIterator _Src_last) __CPU_ONLY
: _M_extent(_Extent)
{
_Initialize(details::_Select_default_accelerator().default_view, _Src_first, _Src_last, access_type_auto);
}
/// <summary>
/// Construct an array initialized from an iterator.
/// </summary>
/// <param name="_Extent">
/// An extent that describes the shape of the array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
template <typename _InputIterator> array(const Concurrency::extent<_Rank>& _Extent, _InputIterator _Src_first) __CPU_ONLY
: _M_extent(_Extent)
{
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(details::_Select_default_accelerator().default_view, _Src_first, _Src_last, access_type_auto);
}
/// <summary>
/// Construct an array initialized from a pair of iterators into a container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
template <typename _InputIterator> array(int _E0, _InputIterator _Src_first, _InputIterator _Src_last) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0))
{
static_assert(_Rank == 1, "array(int, iterator, iterator) is only permissible on array<T, 1>");
_Initialize(details::_Select_default_accelerator().default_view, _Src_first, _Src_last, access_type_auto);
}
/// <summary>
/// Construct an array initialized from an iterator.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
template <typename _InputIterator> array(int _E0, _InputIterator _Src_first) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0))
{
static_assert(_Rank == 1, "array(int, iterator) is only permissible on array<T, 1>");
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(details::_Select_default_accelerator().default_view, _Src_first, _Src_last, access_type_auto);
}
/// <summary>
/// Construct an array initialized from a pair of iterators into a container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, _InputIterator _Src_first, _InputIterator _Src_last) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1))
{
static_assert(_Rank == 2, "array(int, int, iterator, iterator) is only permissible on array<T, 2>");
_Initialize(details::_Select_default_accelerator().default_view, _Src_first, _Src_last, access_type_auto);
}
/// <summary>
/// Construct an array initialized from an iterator.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, _InputIterator _Src_first) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1))
{
static_assert(_Rank == 2, "array(int, int, iterator) is only permissible on array<T, 2>");
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(details::_Select_default_accelerator().default_view, _Src_first, _Src_last, access_type_auto);
}
/// <summary>
/// Construct an array initialized from an iterator.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, int _E2, _InputIterator _Src_first, _InputIterator _Src_last) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1, _E2))
{
static_assert(_Rank == 3, "array(int, int, int, iterator, iterator) is only permissible on array<T, 3>");
_Initialize(details::_Select_default_accelerator().default_view, _Src_first, _Src_last, access_type_auto);
}
/// <summary>
/// Construct an array initialized from an iterator.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, int _E2, _InputIterator _Src_first) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1, _E2))
{
static_assert(_Rank == 3, "array(int, int, int, iterator) is only permissible on array<T, 3>");
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(details::_Select_default_accelerator().default_view, _Src_first, _Src_last, access_type_auto);
}
/// <summary>
/// Construct an array initialized from a pair of iterators into a container, bound to a specific accelerator_view.
/// </summary>
/// <param name="_Extent">
/// An extent that describes the shape of the array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
template <typename _InputIterator> array(const Concurrency::extent<_Rank>& _Extent, _InputIterator _Src_first, _InputIterator _Src_last, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(_Extent)
{
_Initialize(_Av, _Src_first, _Src_last, _Cpu_access_type);
}
/// <summary>
/// Construct an array initialized from an iterator into a container, bound to a specific accelerator_view.
/// </summary>
/// <param name="_Extent">
/// An extent that describes the shape of the array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
template <typename _InputIterator> array(const Concurrency::extent<_Rank>& _Extent, _InputIterator _Src_first, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(_Extent)
{
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(_Av, _Src_first, _Src_last, _Cpu_access_type);
}
/// <summary>
/// Construct an array initialized from a pair of iterators into a container, bound to a specific accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
template <typename _InputIterator> array(int _E0, _InputIterator _Src_first, _InputIterator _Src_last, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0))
{
static_assert(_Rank == 1, "array(int, iterator, iterator) is only permissible on array<T, 1>");
_Initialize(_Av, _Src_first, _Src_last, _Cpu_access_type);
}
/// <summary>
/// Construct an array initialized from an iterator into a container, bound to a specific accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
template <typename _InputIterator> array(int _E0, _InputIterator _Src_first, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0))
{
static_assert(_Rank == 1, "array(int, iterator) is only permissible on array<T, 1>");
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(_Av, _Src_first, _Src_last, _Cpu_access_type);
}
/// <summary>
/// Construct an array initialized from a pair of iterators into a container, bound to a specific accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, _InputIterator _Src_first, _InputIterator _Src_last, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1))
{
static_assert(_Rank == 2, "array(int, int, iterator, iterator) is only permissible on array<T, 2>");
_Initialize(_Av, _Src_first, _Src_last, _Cpu_access_type);
}
/// <summary>
/// Construct an array initialized from an iterator into a container, bound to a specific accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, _InputIterator _Src_first, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1))
{
static_assert(_Rank == 2, "array(int, int, iterator) is only permissible on array<T, 2>");
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(_Av, _Src_first, _Src_last, _Cpu_access_type);
}
/// <summary>
/// Construct an array initialized from a pair of iterators into a container, bound to a specific accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, int _E2, _InputIterator _Src_first, _InputIterator _Src_last, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1, _E2))
{
static_assert(_Rank == 3, "array(int, int, int, iterator, iterator) is only permissible on array<T, 3>");
_Initialize(_Av, _Src_first, _Src_last, _Cpu_access_type);
}
/// <summary>
/// Construct an array initialized from an iterator into a container, bound to a specific accelerator_view.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, int _E2, _InputIterator _Src_first, Concurrency::accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1, _E2))
{
static_assert(_Rank == 3, "array(int, int, int, iterator) is only permissible on array<T, 3>");
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(_Av, _Src_first, _Src_last, _Cpu_access_type);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view, initialized from a pair of iterators into a container.
/// </summary>
/// <param name="_Extent">
/// An extent that describes the shape of the array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
template <typename _InputIterator> array(const Concurrency::extent<_Rank>& _Extent, _InputIterator _Src_first, _InputIterator _Src_last, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(_Extent)
{
_Initialize(_Av, _Associated_Av, _Src_first, _Src_last);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view, initialized from an iterator into a container.
/// </summary>
/// <param name="_Extent">
/// An extent that describes the shape of the array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
template <typename _InputIterator> array(const Concurrency::extent<_Rank>& _Extent, _InputIterator _Src_first, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(_Extent)
{
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(_Av, _Associated_Av, _Src_first, _Src_last);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view, initialized from a pair of iterators into a container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
template <typename _InputIterator> array(int _E0, _InputIterator _Src_first, _InputIterator _Src_last, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0))
{
static_assert(_Rank == 1, "array(int, iterator, iterator, accelerator_view, accelerator_view) is only permissible on array<T, 1>");
_Initialize(_Av, _Associated_Av, _Src_first, _Src_last);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view, initialized from an iterator into a container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
template <typename _InputIterator> array(int _E0, _InputIterator _Src_first, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av)
: _M_extent(Concurrency::extent<_Rank>(_E0))
{
static_assert(_Rank == 1, "array(int, iterator, accelerator_view, accelerator_view) is only permissible on array<T, 1>");
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(_Av, _Associated_Av, _Src_first, _Src_last);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view, initialized from a pair of iterators into a container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, _InputIterator _Src_first, _InputIterator _Src_last, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1))
{
static_assert(_Rank == 2, "array(int, int, iterator, iterator, accelerator_view, accelerator_view) is only permissible on array<T, 2>");
_Initialize(_Av, _Associated_Av, _Src_first, _Src_last);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view, initialized from an iterator into a container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, _InputIterator _Src_first, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1))
{
static_assert(_Rank == 2, "array(int, int, iterator, accelerator_view, accelerator_view) is only permissible on array<T, 2>");
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(_Av, _Associated_Av, _Src_first, _Src_last);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view, initialized from a pair of iterators into a container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_Src_last">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, int _E2, _InputIterator _Src_first, _InputIterator _Src_last, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1, _E2))
{
static_assert(_Rank == 3, "array(int, int, int, iterator, iterator, accelerator_view, accelerator_view) is only permissible on array<T, 3>");
_Initialize(_Av, _Associated_Av, _Src_first, _Src_last);
}
/// <summary>
/// Construct a staging array between two associated accelerator_view, initialized from an iterator into a container.
/// </summary>
/// <param name="_E0">
/// An integer that is the length of the most-significant dimension of this array.
/// </param>
/// <param name="_E1">
/// An integer that is the length of the next-to-most-significant dimension of this array.
/// </param>
/// <param name="_E2">
/// An integer that is the length of the least-significant dimension of this array.
/// </param>
/// <param name="_Src_first">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than this-&gt;extent.size(), undefined behavior results.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// An accelerator_view which specifies the preferred target location of the array.
/// </param>
template <typename _InputIterator> array(int _E0, int _E1, int _E2, _InputIterator _Src_first, Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
: _M_extent(Concurrency::extent<_Rank>(_E0, _E1, _E2))
{
static_assert(_Rank == 3, "array(int, int, int, iterator, accelerator_view, accelerator_view) is only permissible on array<T, 3>");
_InputIterator _Src_last = _Src_first;
std::advance(_Src_last, this->extent.size());
_Initialize(_Av, _Associated_Av, _Src_first, _Src_last);
}
/// <summary>
/// Construct an array initialized from an array_view.
/// </summary>
/// <param name="_Src">
/// An array_view to copy from.
/// </param>
explicit array(const array_view<const _Value_type,_Rank>& _Src) __CPU_ONLY
:_M_extent(_Src.extent)
{
_Initialize(details::_Select_default_accelerator().default_view, access_type_auto);
Concurrency::copy(_Src,*this);
}
/// <summary>
/// Construct an array initialized from an array_view, bound to a specific accelerator_view.
/// </summary>
/// <param name="_Src">
/// An array_view to copy from.
/// </param>
/// <param name="_Av">
/// An accelerator_view where this array resides.
/// </param>
/// <param name="_Cpu_access_type">
/// The desired access_type for the array on the CPU. This
/// parameter has a default value of access_type_auto leaving the
/// CPU access_type determination to the runtime. The actual
/// CPU access_type for the array can be queried using the
/// get_cpu_access_type method.
/// </param>
array(const array_view<const _Value_type,_Rank>& _Src, accelerator_view _Av, access_type _Cpu_access_type = access_type_auto) __CPU_ONLY
:_M_extent(_Src.extent)
{
_Initialize(_Av, _Cpu_access_type);
Concurrency::copy(_Src,*this);
}
/// <summary>
/// Construct a staging array between two associated accelerator_views, initialized from an array_view.
/// </summary>
/// <param name="_Src">
/// An array_view to copy from.
/// </param>
/// <param name="_Av">
/// An accelerator_view which specifies the location of the array.
/// </param>
/// <param name="_Associated_Av">
/// The accelerator_view that is associated with _Av.
/// </param>
array(const array_view<const _Value_type,_Rank>& _Src, accelerator_view _Av, accelerator_view _Associated_Av) __CPU_ONLY
:_M_extent(_Src.extent)
{
_Initialize(_Av, _Associated_Av);
Concurrency::copy(_Src,*this);
}
/// <summary>
/// Copy constructor. Deep copy.
/// </summary>
array(const array& _Other) __CPU_ONLY
: _M_extent(_Other._M_extent)
{
_Initialize(_Other.accelerator_view, _Other.associated_accelerator_view);
Concurrency::copy(_Other, *this);
}
/// <summary>
/// Move constructor.
/// </summary>
array(array && _Other) __CPU_ONLY
: _M_extent(_Other._M_extent), _M_multiplier(_Other._M_multiplier)
, _M_buffer_descriptor(_Other._M_buffer_descriptor)
{
// Register this
this->_Register_copy(_Other);
// Release the _Other array
_Other._Unregister();
_Other._M_buffer_descriptor._M_data_ptr = NULL;
_Other._M_buffer_descriptor._Set_buffer_ptr(NULL);
}
/// <summary>
/// Copy Assignment operator. Deep copy.
/// </summary>
array & operator= (const array & _Other) __CPU_ONLY
{
if (this != &_Other)
{
// First unregister myself from the current buffer
_Unregister();
_M_extent = _Other._M_extent;
_Initialize(_Other.accelerator_view, _Other.associated_accelerator_view);
Concurrency::copy(_Other, *this);
}
return *this;
}
/// <summary>
/// Move Assignment operator.
/// </summary>
array & operator= (array && _Other) __CPU_ONLY
{
if (this != &_Other)
{
// First unregister myself from the current buffer
_Unregister();
_M_extent = _Other._M_extent;
_M_multiplier = _Other._M_multiplier;
_M_buffer_descriptor = _Other._M_buffer_descriptor;
this->_Register_copy(_Other);
// Release the _Other array
_Other._Unregister();
_Other._M_buffer_descriptor._M_data_ptr = NULL;
_Other._M_buffer_descriptor._Set_buffer_ptr(NULL);
}
return *this;
}
/// <summary>
/// Assignment operator from an array_view
/// </summary>
array& operator=(const array_view<const _Value_type,_Rank>& _Src) __CPU_ONLY
{
Concurrency::copy(_Src,*this);
return *this;
}
/// <summary>
/// Copies elements from this array to the destination array.
/// </summary>
void copy_to(array<_Value_type,_Rank>& _Dest) const __CPU_ONLY
{
Concurrency::copy(*this, _Dest);
}
/// <summary>
/// Copies elements from this array to the destination array_view.
/// </summary>
void copy_to(const array_view<_Value_type,_Rank>& _Dest) const __CPU_ONLY
{
Concurrency::copy(*this,_Dest);
}
/// <summary>
/// Returns the extent that defines the shape of this array.
/// </summary>
__declspec(property(get=get_extent)) Concurrency::extent<_Rank> extent;
Concurrency::extent<_Rank> get_extent() const __GPU
{
return _M_extent;
}
/// <summary>
/// Returns the accelerator_view where this array is located.
/// </summary>
__declspec(property(get=get_accelerator_view)) Concurrency::accelerator_view accelerator_view;
Concurrency::accelerator_view get_accelerator_view() const __CPU_ONLY
{
return _Get_buffer()->_Get_master_buffer()->_Get_access_on_accelerator_view();
}
/// <summary>
/// Returns the accelerator_view that is the preferred target where this array can be copied.
/// </summary>
__declspec(property(get=get_associated_accelerator_view)) Concurrency::accelerator_view associated_accelerator_view;
Concurrency::accelerator_view get_associated_accelerator_view() const __CPU_ONLY
{
return _Get_buffer()->_Get_master_buffer()->_Get_accelerator_view();
}
/// <summary>
/// Returns the CPU access_type allowed for this array.
/// </summary>
__declspec(property(get=get_cpu_access_type)) access_type cpu_access_type;
access_type get_cpu_access_type() const __CPU_ONLY
{
return _Get_buffer()->_Get_master_buffer()->_Get_allowed_host_access_type();
}
/// <summary>
/// Get the element value indexed by _I
/// </summary>
/// <param name="_I">
/// The index.
/// </param>
/// <returns>
/// The element value indexed by _I
/// </returns>
value_type& operator[] (const index<_Rank>& _Index) __GPU
{
// Refresh the data ptr if needed
_Refresh_data_ptr(_Read_write_access);
_Value_type * _Ptr = reinterpret_cast<_Value_type *>(_M_buffer_descriptor._M_data_ptr);
return _Ptr[_Flatten_helper::func(_M_multiplier._M_base, _Index._M_base)];
}
/// <summary>
/// Get the element value indexed by _I
/// </summary>
/// <param name="_I">
/// The index.
/// </param>
/// <returns>
/// The element value indexed by _I
/// </returns>
const value_type& operator[] (const index<_Rank>& _Index) const __GPU
{
// Refresh the data ptr if needed
#pragma warning( push )
#pragma warning( disable : 4880 )
// Casting away constness in amp restricted scope might result in
// undefined behavior, therefore, the compiler will report a level 1 warning
// for it. But the following const_cast is harmless thus we are suppressing
// this warning just for this line.
const_cast<array*>(this)->_Refresh_data_ptr(_Read_access);
#pragma warning( pop )
_Value_type * _Ptr = reinterpret_cast<_Value_type *>(_M_buffer_descriptor._M_data_ptr);
return _Ptr[_Flatten_helper::func(_M_multiplier._M_base, _Index._M_base)];
}
/// <summary>
/// Projects the most-significant dimension of this array. If the array rank is 1, this
/// produces a single element; otherwise it produces an array_view with one fewer dimensions.
/// </summary>
/// <param name="_I">
/// The most-significant index component
/// </param>
/// <returns>
/// The element at index component _I, or an array_view projected on the most-significant dimension.
/// </returns>
typename details::_Projection_result_type<_Value_type,_Rank>::_Result_type operator[](int _I) __GPU
{
return details::_Array_projection_helper<_Value_type,_Rank>::_Project0(this,_I);
}
/// <summary>
/// Projects the most-significant dimension of this array. If the array rank is 1, this
/// produces a single element; otherwise it produces an array_view with one fewer dimensions.
/// </summary>
/// <param name="_I">
/// The most-significant index component
/// </param>
/// <returns>
/// The element at index component _I, or an array_view projected on the most-significant dimension.
/// </returns>
typename details::_Projection_result_type<_Value_type,_Rank>::_Const_result_type operator[](int _I) const __GPU
{
return details::_Const_array_projection_helper<_Value_type,_Rank>::_Project0(this,_I);
}
/// <summary>
/// Get the element value indexed by _I
/// </summary>
/// <param name="_I">
/// The index.
/// </param>
/// <returns>
/// The element value indexed by _I
/// </returns>
value_type& operator() (const index<_Rank>& _Index) __GPU
{
return this->operator[](_Index);
}
/// <summary>
/// Get the element value indexed by _Index
/// </summary>
/// <param name="_Index">
/// The index.
/// </param>
/// <returns>
/// The element value indexed by _Index
/// </returns>
const value_type& operator() (const index<_Rank>& _Index) const __GPU
{
return this->operator[](_Index);
}
/// <summary>
/// Get the element value indexed by (_I0,_I1)
/// </summary>
/// <param name="_I0">
/// The most-significant component of the index
/// </param>
/// <param name="_I1">
/// The least-significant component of the index
/// </param>
/// <returns>
/// The element value indexed by (_I0,_I1)
/// </returns>
value_type& operator() (int _I0, int _I1) __GPU
{
static_assert(_Rank == 2, "value_type& array::operator()(int, int) is only permissible on array<T, 2>");
return this->operator[](index<2>(_I0, _I1));
}
/// <summary>
/// Get the element value indexed by (_I0,_I1)
/// </summary>
/// <param name="_I0">
/// The most-significant component of the index
/// </param>
/// <param name="_I1">
/// The least-significant component of the index
/// </param>
/// <returns>
/// The element value indexed by (_I0,_I1)
/// </returns>
const value_type& operator() (int _I0, int _I1) const __GPU
{
static_assert(_Rank == 2, "const value_type& array::operator()(int, int) is only permissible on array<T, 2>");
return this->operator[](index<2>(_I0, _I1));
}
/// <summary>
/// Get the element value indexed by (_I0,_I1,_I2)
/// </summary>
/// <param name="_I0">
/// The most-significant component of the index
/// </param>
/// <param name="_I1">
/// The next-to-most-significant component of the index
/// </param>
/// <param name="_I2">
/// The least-significant component of the index
/// </param>
/// <returns>
/// The element value indexed by (_I0,_I1,_I2)
/// </returns>
value_type& operator() (int _I0, int _I1, int _I2) __GPU
{
static_assert(_Rank == 3, "value_type& array::operator()(int, int, int) is only permissible on array<T, 3>");
return this->operator[](index<3>(_I0, _I1, _I2));
}
/// <summary>
/// Get the element value indexed by (_I0,_I1,_I2)
/// </summary>
/// <param name="_I0">
/// The most-significant component of the index
/// </param>
/// <param name="_I1">
/// The next-to-most-significant component of the index
/// </param>
/// <param name="_I2">
/// The least-significant component of the index
/// </param>
/// <returns>
/// The element value indexed by (_I0,_I1,_I2)
/// </returns>
const value_type& operator() (int _I0, int _I1, int _I2) const __GPU
{
static_assert(_Rank == 3, "const value_type& array::operator()(int, int, int) const is only permissible on array<T, 3>");
return this->operator[](index<3>(_I0, _I1, _I2));
}
/// <summary>
/// Projects the most-significant dimension of this array. If the array rank is 1, this
/// produces a single element; otherwise it produces an array_view with one fewer dimensions.
/// </summary>
/// <param name="_I">
/// The most-significant index component
/// </param>
/// <returns>
/// The element at index component _I, or an array_view projected on the most-significant dimension.
/// </returns>
typename details::_Projection_result_type<_Value_type,_Rank>::_Result_type operator()(int _I) __GPU
{
return details::_Array_projection_helper<_Value_type,_Rank>::_Project0(this,_I);
}
/// <summary>
/// Projects the most-significant dimension of this array. If the array rank is 1, this
/// produces a single element; otherwise it produces an array_view with one fewer dimensions.
/// </summary>
/// <param name="_I">
/// The most-significant index component
/// </param>
/// <returns>
/// The element at index component _I, or an array_view projected on the most-significant dimension.
/// </returns>
typename details::_Projection_result_type<_Value_type,_Rank>::_Const_result_type operator()(int _I) const __GPU
{
return details::_Const_array_projection_helper<_Value_type,_Rank>::_Project0(this,_I);
}
/// <summary>
/// Produces a subsection of the source array at the given origin and extent.
/// </summary>
/// <param name="_Section_origin">
/// The origin of the section.
/// </param>
/// <param name="_Section_extent">
/// The extent of the section
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<_Value_type,_Rank> section(const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) __GPU
{
array_view<_Value_type,_Rank> _T1(*this);
return _T1.section(_Section_origin, _Section_extent);
}
/// <summary>
/// Produces a subsection of the source array at the given origin and extent.
/// </summary>
/// <param name="_Section_origin">
/// The origin of the section.
/// </param>
/// <param name="_Section_extent">
/// The extent of the section
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<const _Value_type,_Rank> section(const Concurrency::index<_Rank>& _Section_origin, const Concurrency::extent<_Rank>& _Section_extent) const __GPU
{
array_view<const _Value_type,_Rank> _T1(*this);
return _T1.section(_Section_origin, _Section_extent);
}
/// <summary>
/// Produces a subsection of the source array_view with origin of zero, with
/// an extent of _Ext.
/// </summary>
/// <param name="_Ext">
/// The extent of this section
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view<_Value_type,_Rank> section(const Concurrency::extent<_Rank>& _Ext) __GPU
{
return section(Concurrency::index<_Rank>(), _Ext);
}
/// <summary>
/// Produces a subsection of the source array_view with origin of zero, with
/// an extent of _Ext.
/// </summary>
/// <param name="_Ext">
/// The extent of this section
/// </param>
/// <returns>
/// A subsection of the array_view.
/// </returns>
array_view<const _Value_type,_Rank> section(const Concurrency::extent<_Rank>& _Ext) const __GPU
{
return section(Concurrency::index<_Rank>(), _Ext);
}
/// <summary>
/// Produces a subsection of the source array with origin specified by an index, with
/// an extent of (this-&gt;exent - _Idx).
/// </summary>
/// <param name="_Idx">
/// The index that specifies the origin of this section.
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<_Value_type,_Rank> section(const index<_Rank>& _Idx) __GPU
{
array_view<_Value_type,_Rank> _T1(*this);
return _T1.section(_Idx);
}
/// <summary>
/// Produces a subsection of the source array with origin specified by an index, with
/// an extent of (this-&gt;exent - _Idx).
/// </summary>
/// <param name="_Idx">
/// The index that specifies the origin of this section.
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<const _Value_type,_Rank> section(const index<_Rank>& _Idx) const __GPU
{
array_view<const _Value_type,_Rank> _T1(*this);
return _T1.section(_Idx);
}
/// <summary>
/// Produces a one-dimensional subsection of the source array with origin specified by the index
/// components _I0, with extent _E0.
/// </summary>
/// <param name="_I0">
/// The origin of this section.
/// </param>
/// <param name="_E0">
/// The extent of this section.
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<_Value_type,1> section(int _I0, int _E0) __GPU
{
array_view<_Value_type,_Rank> _T1(*this);
return _T1.section(_I0,_E0);
}
/// <summary>
/// Produces a one-dimensional subsection of the source array with origin specified by the index
/// components _I0, with extent _E0.
/// </summary>
/// <param name="_I0">
/// The origin of this section.
/// </param>
/// <param name="_E0">
/// The extent of this section.
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<const _Value_type,1> section(int _I0, int _E0) const __GPU
{
array_view<const _Value_type,_Rank> _T1(*this);
return _T1.section(_I0,_E0);
}
/// <summary>
/// Produces a two-dimensional subsection of the source array with origin specified by the index
/// components (_I0,_I1), with extent (_E0,_E1).
/// </summary>
/// <param name="_I0">
/// The most-significant component of the origin of this section.
/// </param>
/// <param name="_I1">
/// The least-significant component of the origin of this section.
/// </param>
/// <param name="_E0">
/// The most-significant component of the extent of this section.
/// </param>
/// <param name="_E1">
/// The least-significant component of the extent of this section.
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<_Value_type,2> section(int _I0, int _I1, int _E0, int _E1) __GPU
{
array_view<_Value_type,_Rank> _T1(*this);
return _T1.section(_I0,_I1,_E0,_E1);
}
/// <summary>
/// Produces a two-dimensional subsection of the source array with origin specified by the index
/// components (_I0,_I1), with extent (_E0,_E1).
/// </summary>
/// <param name="_I0">
/// The most-significant component of the origin of this section.
/// </param>
/// <param name="_I1">
/// The least-significant component of the origin of this section.
/// </param>
/// <param name="_E0">
/// The most-significant component of the extent of this section.
/// </param>
/// <param name="_E1">
/// The least-significant component of the extent of this section.
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<const _Value_type,2> section(int _I0, int _I1, int _E0, int _E1) const __GPU
{
array_view<const _Value_type,_Rank> _T1(*this);
return _T1.section(_I0,_I1,_E0,_E1);
}
/// <summary>
/// Produces a three-dimensional subsection of the source array with origin specified by the index
/// components (_I0,_I1,_I2), with extent (_E0,_E1,_E2).
/// </summary>
/// <param name="_I0">
/// The most-significant component of the origin of this section.
/// </param>
/// <param name="_I1">
/// The next-to-most-significant component of the origin of this section.
/// </param>
/// <param name="_I2">
/// The least-significant component of the origin of this section.
/// </param>
/// <param name="_E0">
/// The most-significant component of the extent of this section.
/// </param>
/// <param name="_E1">
/// The next-to-most-significant component of the extent of this section.
/// </param>
/// <param name="_E2">
/// The least-significant component of the extent of this section.
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<_Value_type,3> section(int _I0, int _I1, int _I2, int _E0, int _E1, int _E2) __GPU
{
array_view<_Value_type,_Rank> _T1(*this);
return _T1.section(_I0,_I1,_I2,_E0,_E1,_E2);
}
/// <summary>
/// Produces a three-dimensional subsection of the source array with origin specified by the index
/// components (_I0,_I1,_I2), with extent (_E0,_E1,_E2).
/// </summary>
/// <param name="_I0">
/// The most-significant component of the origin of this section.
/// </param>
/// <param name="_I1">
/// The next-to-most-significant component of the origin of this section.
/// </param>
/// <param name="_I2">
/// The least-significant component of the origin of this section.
/// </param>
/// <param name="_E0">
/// The most-significant component of the extent of this section.
/// </param>
/// <param name="_E1">
/// The next-to-most-significant component of the extent of this section.
/// </param>
/// <param name="_E2">
/// The least-significant component of the extent of this section.
/// </param>
/// <returns>
/// A subsection of the array.
/// </returns>
array_view<const _Value_type,3> section(int _I0, int _I1, int _I2, int _E0, int _E1, int _E2) const __GPU
{
array_view<const _Value_type,_Rank> _T1(*this);
return _T1.section(_I0,_I1,_I2,_E0,_E1,_E2);
}
/// <summary>
/// Produces a (possibly unsafe) reinterpretation of this array that is linear and with
/// a different element type.
/// </summary>
/// <returns>
/// A linear array_view with a reinterpreted element type.
/// </returns>
template <typename _Value_type2> array_view<_Value_type2,1> reinterpret_as() __GPU
{
return array_view<_Value_type,1>(_M_buffer_descriptor, Concurrency::extent<1>(extent.size())).template reinterpret_as<_Value_type2>();
}
/// <summary>
/// Produces a (possibly unsafe) reinterpretation of this array that is linear and with
/// a different element type.
/// </summary>
/// <returns>
/// A linear array_view with a reinterpreted element type.
/// </returns>
template <typename _Value_type2> array_view<const _Value_type2,1> reinterpret_as() const __GPU
{
#pragma warning( push )
#pragma warning( disable : 4880 )
// Casting away constness in amp restricted scope might result in
// undefined behavior, therefore, the compiler will report a level 1 warning
// for it. But the following const_cast is harmless thus we are suppressing
// this warning just for this line.
return const_cast<array*>(this)->reinterpret_as<_Value_type2>();
#pragma warning( pop )
}
/// <summary>
/// Produces an array_view of a different rank over this array's data.
/// </summary>
/// <param name="_View_extent">
/// The reshaping extent.
/// </param>
/// <returns>
/// A reshaped array_view.
/// </returns>
template <int _New_rank> array_view<_Value_type,_New_rank> view_as(const Concurrency::extent<_New_rank>& _View_extent) __GPU
{
return array_view<_Value_type,_New_rank>(_M_buffer_descriptor, _View_extent);
}
/// <summary>
/// Produces an array_view of a different rank over this array's data.
/// </summary>
/// <param name="_View_extent">
/// The reshaping extent.
/// </param>
/// <returns>
/// A reshaped array_view.
/// </returns>
template <int _New_rank> array_view<const _Value_type,_New_rank> view_as(const Concurrency::extent<_New_rank>& _View_extent) const __GPU
{
#pragma warning( push )
#pragma warning( disable : 4880 )
// Casting away constness in amp restricted scope might result in
// undefined behavior, therefore, the compiler will report a level 1 warning
// for it. But the following const_cast is harmless thus we are suppressing
// this warning just for this line.
return const_cast<array*>(this)->view_as<_New_rank>(_View_extent);
#pragma warning( pop )
}
/// <summary>
/// Implicitly converts this array into a vector by copying.
/// </summary>
operator std::vector<_Value_type>() const __CPU_ONLY
{
std::vector<_Value_type> _return_vector(extent.size());
Concurrency::copy(*this, _return_vector.begin());
return _return_vector;
}
/// <summary>
/// Returns a pointer to the raw data of this array.
/// </summary>
_Ret_ _Value_type* data() __GPU
{
_Refresh_data_ptr(_Read_write_access, false /* _Exception */);
return reinterpret_cast<_Value_type*>(_M_buffer_descriptor._M_data_ptr);
}
/// <summary>
/// Returns a pointer to the raw data of this array.
/// </summary>
const _Value_type* data() const __GPU
{
#pragma warning( push )
#pragma warning( disable : 4880 )
// Casting away constness in amp restricted scope might result in
// undefined behavior, therefore, the compiler will report a level 1 warning
// for it. But the following const_cast is harmless thus we are suppressing
// this warning just for this line.
const_cast<array*>(this)->_Refresh_data_ptr(_Read_access, false /* _Exception */);
#pragma warning( pop )
return reinterpret_cast<const _Value_type*>(_M_buffer_descriptor._M_data_ptr);
}
/// <summary>
/// Destroys this array and reclaims resources.
/// </summary>
~array() __CPU_ONLY
{
bool _Can_throw = (std::current_exception() == nullptr);
// Destructor should not throw if we are already processing
// an exception and another exception will result in termination
try {
_Unregister();
}
catch(...)
{
if (_Can_throw) {
throw;
}
}
}
private:
// No default constructor
array() __CPU_ONLY;
// Private constructor used by direct3d::make_array
array(const Concurrency::extent<_Rank>& _Extent, _Buffer_descriptor _Buffer_descriptor)
: _M_extent(_Extent), _M_buffer_descriptor(_Buffer_descriptor)
{
_Initialize();
// Register this
this->_Register();
}
// Initialize
unsigned int _Initialize() __CPU_ONLY
{
details::_Is_valid_extent(_M_extent);
// Arrays always have a type access mode of '_Is_array_mode'
// This is the mechanism for differentiating between arrays and array_views by the runtime
_M_buffer_descriptor._M_type_access_mode = _Is_array_mode;
unsigned int totalExtent = _M_extent[_Rank-1];
details::_Array_init_helper<Concurrency::extent<_Rank>, Concurrency::extent<_Rank>>::func(totalExtent, _M_multiplier, _M_extent);
return totalExtent;
}
// Initialize and allocate on specified accelerator_view
void _Initialize(Concurrency::accelerator_view _Av, access_type _Cpu_access_type) __CPU_ONLY
{
unsigned int totalExtent = _Initialize();
// release the existing buffer if any before allocation new one
_M_buffer_descriptor._Set_buffer_ptr(NULL);
_Buffer_ptr _PBuf = _Buffer::_Create_buffer(_Av, _Av, totalExtent, sizeof(_Value_type), false /* _Is_temp */, _Cpu_access_type);
_M_buffer_descriptor._Set_buffer_ptr(_Ubiquitous_buffer::_Create_ubiquitous_buffer(_PBuf));
_Register();
}
// Initialize and allocate on specified accelerator_view and copy specified data
template <typename _InputIterator>
void _Initialize(Concurrency::accelerator_view _Av, _InputIterator _Src_first, _InputIterator _Src_last, access_type _Cpu_access_type) __CPU_ONLY
{
_Initialize(_Av, _Cpu_access_type);
copy(_Src_first, _Src_last, *this);
}
// Initialize and allocate on specified accelerator_views
void _Initialize(Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av) __CPU_ONLY
{
unsigned int totalExtent = _Initialize();
// Staging arrays can only be created if the accelerator_view is on the cpu_accelerator
_Buffer_ptr _PBuf = NULL;
// release the existing buffer if any before allocation new one
_M_buffer_descriptor._Set_buffer_ptr(NULL);
if (_Is_cpu_accelerator(_Av.accelerator))
{
// If the accelerator _Associated_Av supports zero-copy and the default cpu access type
// for the accelerator is access_type_read_write, create a zero-copy buffer instead of a
// staging buffer
if (_Associated_Av.accelerator.supports_cpu_shared_memory && (_Get_recommended_buffer_host_access_mode(_Associated_Av) == _Read_write_access)) {
_PBuf = _Buffer::_Create_buffer(_Associated_Av, _Av, totalExtent, sizeof(_Value_type), false /* _Is_temp */, access_type_read_write);
}
else {
_PBuf = _Buffer::_Create_stage_buffer(_Associated_Av, _Av, totalExtent, sizeof(_Value_type));
}
_PBuf->_Map_buffer(_Read_write_access, true /* _Wait */);
}
else
{
_PBuf = _Buffer::_Create_buffer(_Av, _Av, totalExtent, sizeof(_Value_type), false /* _Is_temp */, access_type_auto);
}
_M_buffer_descriptor._Set_buffer_ptr(_Ubiquitous_buffer::_Create_ubiquitous_buffer(_PBuf));
_Register();
}
// Initialize and allocate on specified accelerator_views
template <typename _InputIterator>
void _Initialize(Concurrency::accelerator_view _Av, Concurrency::accelerator_view _Associated_Av, _InputIterator _Src_first, _InputIterator _Src_last) __CPU_ONLY
{
_Initialize(_Av, _Associated_Av);
copy(_Src_first, _Src_last, *this);
}
void _Register() __CPU_ONLY
{
Concurrency::accelerator_view cpuAv = _Is_cpu_accelerator(this->accelerator_view.accelerator) ?
this->accelerator_view : accelerator(accelerator::cpu_accelerator).default_view;
_M_buffer_descriptor._Get_buffer_ptr()->_Register_view(_M_buffer_descriptor._Get_view_key(), cpuAv, _Create_buffer_view_shape());
_M_buffer_descriptor._Get_buffer_ptr()->_Discard(_M_buffer_descriptor._Get_view_key());
// If the array is on the CPU accelerator then we will ensure that the descriptor
// indicates CPU access
if (_Is_cpu_accelerator(this->accelerator_view.accelerator))
{
_Buffer_ptr _PBuf = NULL;
this->_Get_access_async(_Read_write_access, _PBuf, false)._Get();
}
}
void _Register_copy(const array &_Other) __CPU_ONLY
{
_M_buffer_descriptor._Get_buffer_ptr()->_Register_view_copy(_M_buffer_descriptor._Get_view_key(), _Other._M_buffer_descriptor._Get_view_key());
}
void _Unregister() __CPU_ONLY
{
// No need to unregister if the array was moved causing the buffer ptr to be set to NULL
if (_M_buffer_descriptor._Get_buffer_ptr() != NULL) {
_M_buffer_descriptor._Get_buffer_ptr()->_Unregister_view(_M_buffer_descriptor._Get_view_key());
}
}
_Ret_ _Ubiquitous_buffer* _Get_buffer() __CPU_ONLY const
{
return _M_buffer_descriptor._Get_buffer_ptr();
}
_Event _Get_access_async(_Access_mode _Mode, _Buffer_ptr &_Buf_ptr, bool _Zero_copy_cpu_access = false) __CPU_ONLY const
{
_ASSERTE(!_Zero_copy_cpu_access || (_Get_buffer()->_Get_master_buffer()->_Get_allowed_host_access_mode() != _No_access));
_Buffer_ptr _PBuf;
Concurrency::accelerator_view _Access_av = _Zero_copy_cpu_access ? accelerator(accelerator::cpu_accelerator).default_view : this->accelerator_view;
_Event _Ev = details::_Get_access_async(_M_buffer_descriptor._Get_view_key(),
_Access_av,
_Mode, _PBuf);
_Buf_ptr = _PBuf;
if (_Is_cpu_accelerator(_Access_av.accelerator)) {
_Ev = _Ev._Add_continuation(std::function<_Event()>([_PBuf, this]() mutable -> _Event {
const_cast<array*>(this)->_M_buffer_descriptor._M_data_ptr = _PBuf->_Get_host_ptr();
return _Event();
}));
}
return _Ev;
}
_Ret_ _View_shape* _Create_buffer_view_shape() const
{
_ASSERTE(_Get_buffer()->_Get_master_buffer_elem_size() == sizeof(_Value_type));
unsigned int _ZeroOffset[_Rank] = {0};
unsigned int _View_extent[_Rank];
for(int i=0; i<_Rank; ++i)
{
_View_extent[i] = static_cast<unsigned int>(this->_M_extent[i]);
}
return _View_shape::_Create_view_shape(static_cast<unsigned int>(_Rank), 0, &_View_extent[0], &_ZeroOffset[0], &_View_extent[0]);
}
bool _Has_cpu_access() const __CPU_ONLY
{
return (_Get_buffer()->_Get_master_buffer()->_Get_allowed_host_access_mode() != _No_access);
}
void _Refresh_data_ptr(_Access_mode _Requested_mode, bool _Exception = true) __CPU_ONLY
{
_ASSERTE(_Is_valid_access_mode(_Requested_mode));
// For an array that has CPU access, the maximum CPU access allowed is that allowed by
// the underlying _Buffer allocation
_Requested_mode = static_cast<_Access_mode>(_Requested_mode & _Get_buffer()->_Get_master_buffer()->_Get_allowed_host_access_mode());
// Refresh the data ptr if we do not have requested access
if ((_Requested_mode == _No_access) || ((_M_buffer_descriptor._M_curr_cpu_access_mode & _Requested_mode) != _Requested_mode))
{
if (_Has_cpu_access() && (_Requested_mode != _No_access))
{
auto _Span_id = details::_Get_amp_trace()->_Start_array_view_synchronize_event_helper(_M_buffer_descriptor);
_Buffer_ptr _PBuf;
bool _Zero_copy_cpu_access = !_Is_cpu_accelerator(this->accelerator_view.accelerator);
this->_Get_access_async(_Requested_mode, _PBuf, _Zero_copy_cpu_access)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
else
{
if (_Exception)
{
if (!_Has_cpu_access()) {
throw runtime_exception("The array is not accessible on CPU.", E_FAIL);
}
else {
throw runtime_exception("The array is not accessible for reading on CPU.", E_FAIL);
}
}
}
}
}
void _Refresh_data_ptr(_Access_mode _Requested_mode, bool _Exception = true) __GPU_ONLY
{
UNREFERENCED_PARAMETER(_Requested_mode);
UNREFERENCED_PARAMETER(_Exception);
}
private:
// Data members
Concurrency::extent<_Rank> _M_extent;
// Descriptor of the buffer underlying the array
_Buffer_descriptor _M_buffer_descriptor;
// The vector used for index calculation.
Concurrency::extent<_Rank> _M_multiplier;
};
namespace details
{
template <typename _Value_type, int _Rank>
_Event _Copy_async_impl(const array<_Value_type,_Rank>& _Src, array<_Value_type,_Rank>& _Dest)
{
if (_Src.extent.size() > _Dest.extent.size())
{
throw runtime_exception("Invalid _Src argument. _Src size exceeds total size of the _Dest.", E_INVALIDARG);
}
// We can obliterate the exisiting content of dest if it is about to be totally overwritten
_Access_mode _Dest_access_mode = (_Src.extent.size() == _Dest.extent.size()) ? _Write_access : _Read_write_access;
_Buffer_ptr _PBufSrc, _PBufDest;
_Event _Ev = _Get_access_async(_Src, _Read_access, _PBufSrc);
_Ev = _Ev._Add_event(_Get_access_async(_Dest, _Dest_access_mode, _PBufDest));
size_t _NumElemsToCopy = (_Src.extent.size() * sizeof(_Value_type)) / _PBufSrc->_Get_elem_size();
return _Ev._Add_continuation(std::function<_Event()>([_PBufSrc, _PBufDest, _NumElemsToCopy]() mutable -> _Event {
return details::_Copy_impl(_PBufSrc, 0, _PBufDest, 0, _NumElemsToCopy);
}));
}
template <typename InputIterator, typename _Value_type, int _Rank>
_Event _Copy_async_impl(InputIterator _SrcFirst, InputIterator _SrcLast, array<_Value_type, _Rank> &_Dest)
{
size_t _NumElemsToCopy = std::distance(_SrcFirst, _SrcLast);
// We can obliterate the exisiting content of dest if it is about to be totally overwritten
_Access_mode _Dest_access_mode = (_NumElemsToCopy == _Dest.extent.size()) ? _Write_access : _Read_write_access;
_Buffer_ptr _PDestBuf;
_Event _Ev = _Get_access_async(_Dest, _Dest_access_mode, _PDestBuf);
return _Ev._Add_continuation(std::function<_Event()>([_SrcFirst, _SrcLast, _PDestBuf, _NumElemsToCopy]() mutable -> _Event {
return details::_Copy_impl<InputIterator, _Value_type>(_SrcFirst, _SrcLast, _NumElemsToCopy, _PDestBuf, 0);
}));
}
template <typename OutputIterator, typename _Value_type, int _Rank>
_Event _Copy_async_impl(const array<_Value_type, _Rank> &_Src, OutputIterator _DestIter)
{
_Buffer_ptr _PSrcBuf;
_Event _Ev = _Get_access_async(_Src, _Read_access, _PSrcBuf);
size_t _NumElemsToCopy = (_Src.extent.size() * sizeof(_Value_type)) / _PSrcBuf->_Get_elem_size();
return _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _NumElemsToCopy, _DestIter]() mutable -> _Event {
return details::_Copy_impl<OutputIterator, _Value_type>(_PSrcBuf, 0, _NumElemsToCopy, _DestIter);
}));
}
template <typename _Value_type, int _Rank>
_Event _Copy_async_impl(const array<_Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest)
{
const _Buffer_descriptor &_SrcBufDesc = _Get_buffer_descriptor(_Src);
const _Buffer_descriptor &_DestBufDesc = _Get_buffer_descriptor(_Dest);
if (_SrcBufDesc._Get_buffer_ptr() == _DestBufDesc._Get_buffer_ptr()) {
throw runtime_exception("Cannot copy between overlapping regions of the same buffer.", E_INVALIDARG);
}
_Buffer_ptr _PSrcBuf, _PDestBuf;
_Event _Ev = _Get_access_async(_Src, _Read_access, _PSrcBuf);
// The source accelerator_view is driven by array's master location,
// therefore we can pass nullptr to avoid unnecessary computation
auto _AccelInfo = _Get_src_dest_accelerator_view(nullptr, &_DestBufDesc);
_Ev = _Ev._Add_event(_Get_access_async(_DestBufDesc._Get_view_key(), _AccelInfo.second, _Write_access, _PDestBuf));
_View_shape_ptr _PSrcShape = _Get_buffer_view_shape(_SrcBufDesc);
_View_shape_ptr _PDestShape = _Get_buffer_view_shape(_DestBufDesc);
return _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _PSrcShape, _PDestBuf, _PDestShape]() mutable -> _Event {
return details::_Copy_impl(_PSrcBuf, _PSrcShape, _PDestBuf, _PDestShape);
}));
}
template <typename _Value_type, int _Rank>
_Event _Copy_async_impl(const array_view<const _Value_type, _Rank>& _Src, array<_Value_type, _Rank>& _Dest)
{
const _Buffer_descriptor &_SrcBufDesc = _Get_buffer_descriptor(_Src);
const _Buffer_descriptor &_DestBufDesc = _Get_buffer_descriptor(_Dest);
if (_SrcBufDesc._Get_buffer_ptr() == _DestBufDesc._Get_buffer_ptr()) {
throw runtime_exception("Cannot copy between overlapping regions of the same buffer.", E_INVALIDARG);
}
auto _AccelInfo = _Get_src_dest_accelerator_view(&_SrcBufDesc, &_DestBufDesc);
_Buffer_ptr _PSrcBuf, _PDestBuf;
_Event _Ev = _Get_access_async(_SrcBufDesc._Get_view_key(), _AccelInfo.first, _Read_access, _PSrcBuf);
_Ev = _Ev._Add_event(_Get_access_async(_Dest, _Write_access, _PDestBuf));
_View_shape_ptr _PSrcShape = _Get_buffer_view_shape(_SrcBufDesc);
_View_shape_ptr _PDestShape = _Get_buffer_view_shape(_DestBufDesc);
return _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _PSrcShape, _PDestBuf, _PDestShape]() mutable -> _Event {
return details::_Copy_impl(_PSrcBuf, _PSrcShape, _PDestBuf, _PDestShape);
}));
}
template <typename _Value_type, int _Rank>
_Event _Copy_async_impl(const array_view<const _Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest)
{
const _Buffer_descriptor &_SrcBufDesc = _Get_buffer_descriptor(_Src);
const _Buffer_descriptor &_DestBufDesc = _Get_buffer_descriptor(_Dest);
_View_shape_ptr _PSrcShape = _Get_buffer_view_shape(_SrcBufDesc);
_View_shape_ptr _PDestShape = _Get_buffer_view_shape(_DestBufDesc);
if ((_SrcBufDesc._Get_buffer_ptr() == _DestBufDesc._Get_buffer_ptr()) && _PSrcShape->_Overlaps(_PDestShape)) {
throw runtime_exception("Cannot copy between overlapping regions of the same buffer.", E_INVALIDARG);
}
auto _AccelInfo = _Get_src_dest_accelerator_view(&_SrcBufDesc, &_DestBufDesc);
_Buffer_ptr _PSrcBuf, _PDestBuf;
_Event _Ev = _Get_access_async(_SrcBufDesc._Get_view_key(), _AccelInfo.first, _Read_access, _PSrcBuf);
_Ev = _Ev._Add_event(_Get_access_async(_DestBufDesc._Get_view_key(), _AccelInfo.second, _Write_access, _PDestBuf));
return _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _PSrcShape, _PDestBuf, _PDestShape]() mutable -> _Event {
return details::_Copy_impl(_PSrcBuf, _PSrcShape, _PDestBuf, _PDestShape);
}));
}
template <typename InputIterator, typename _Value_type, int _Rank>
_Event _Copy_async_impl(InputIterator _SrcFirst, InputIterator _SrcLast, const array_view<_Value_type, _Rank> &_Dest)
{
static_assert(!std::is_const<_Value_type>::value, "Cannot copy to array_view<const _Value_type, _Rank>.");
size_t _Src_size = std::distance(_SrcFirst, _SrcLast);
// Source cannot be greater than destination
if (_Src_size > _Dest.extent.size())
{
throw runtime_exception("Number of elements in range between [_SrcFirst, _SrcLast) exceeds total size of the _Dest.", E_INVALIDARG);
}
#pragma warning( push )
#pragma warning( disable : 4127 ) // Disable warning about constant conditional expression
// Higher ranks need to have as many elements as in _Dest array_view
if ((_Rank > 1) && (_Src_size != _Dest.extent.size()))
{
throw runtime_exception("For _Rank > 1 the number of elements in range between [_SrcFirst, _SrcLast) has to be equal to total size of the _Dest.", E_INVALIDARG);
}
#pragma warning( pop )
// We can obliterate the exisiting content of dest if it is about to be totally overwritten
_Access_mode _Dest_access_mode = (_Src_size == _Dest.extent.size()) ? _Write_access : _Read_write_access;
// Get read-write access for array_view on cpu_accelerator and take underlying pointer to data
const _Buffer_descriptor &_DestBufDesc = _Get_buffer_descriptor(_Dest);
auto _AccelInfo = _Get_src_dest_accelerator_view(nullptr, &_DestBufDesc);
_Buffer_ptr _PDestBuf;
_Event _Ev = _Get_access_async(_DestBufDesc._Get_view_key(), _AccelInfo.second, _Dest_access_mode, _PDestBuf);
_View_shape_ptr _Dst_shape = _Get_buffer_view_shape(_DestBufDesc);
// If the _Dst shape is linear then perform a linear copy
unsigned int _Dst_linear_offset, _Dst_linear_size;
if (_Dst_shape->_Is_view_linear(_Dst_linear_offset, _Dst_linear_size))
{
_Ev = _Ev._Add_continuation(std::function<_Event()>([_PDestBuf, _SrcFirst, _SrcLast, _Src_size, _Dst_linear_offset]() mutable -> _Event {
return details::_Copy_impl<InputIterator, _Value_type>(_SrcFirst, _SrcLast, _Src_size, _PDestBuf, _Dst_linear_offset);
}));
}
else
{
_View_shape_ptr _Reinterpreted_dst_shape = _Create_reinterpreted_shape(_Dst_shape, _PDestBuf->_Get_elem_size(), sizeof(_Value_type));
// Source has as many elements as in destination, reshape source to match destination shape
std::vector<unsigned int> _Src_offset(_Reinterpreted_dst_shape->_Get_rank(), 0);
_View_shape_ptr _Src_shape = details::_View_shape::_Create_view_shape(_Reinterpreted_dst_shape->_Get_rank(), 0 /* linear offset*/,
_Reinterpreted_dst_shape->_Get_view_extent(), _Src_offset.data(),
_Reinterpreted_dst_shape->_Get_view_extent());
_Ev = _Ev._Add_continuation(std::function<_Event()>([_PDestBuf, _SrcFirst, _Src_shape, _Dst_shape]() mutable -> _Event {
return details::_Copy_impl<InputIterator, _Value_type>(_SrcFirst, _Src_shape, _PDestBuf, _Dst_shape);
}));
}
return _Ev;
}
template <typename OutputIterator, typename _Value_type, int _Rank>
_Event _Copy_async_impl(const array_view<_Value_type, _Rank> &_Src, OutputIterator _DestIter)
{
// Caller is responsible for passing valid _DestIter
// Get read access for array_view on cpu_accelerator and take underlying pointer to data
const _Buffer_descriptor &_SrcBufDesc = _Get_buffer_descriptor(_Src);
auto _AccelInfo = _Get_src_dest_accelerator_view(&_SrcBufDesc, nullptr);
_Buffer_ptr _PSrcBuf;
_Event _Ev = _Get_access_async(_SrcBufDesc._Get_view_key(), _AccelInfo.first, _Read_access, _PSrcBuf);
// Get source shape
_View_shape_ptr _Src_shape = _Get_buffer_view_shape(_SrcBufDesc);
// If the _Src_shape is linear then perform a linear copy
unsigned int _Src_linear_offset, _Src_linear_size;
if (_Src_shape->_Is_view_linear(_Src_linear_offset, _Src_linear_size))
{
_Ev = _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _Src_linear_offset, _Src_linear_size, _DestIter]() mutable -> _Event {
return details::_Copy_impl<OutputIterator, _Value_type>(_PSrcBuf, _Src_linear_offset, _Src_linear_size, _DestIter);
}));
}
else
{
_View_shape_ptr _Reinterpreted_src_shape = _Create_reinterpreted_shape(_Src_shape, _PSrcBuf->_Get_elem_size(), sizeof(_Value_type));
// Valid destination should have space for as many elements as in source array_view, reshape to match source view shape
std::vector<unsigned int> _Dst_offset(_Reinterpreted_src_shape->_Get_rank(), 0);
_View_shape_ptr _Dst_shape = details::_View_shape::_Create_view_shape(_Reinterpreted_src_shape->_Get_rank(), 0 /* linear offset*/,
_Reinterpreted_src_shape->_Get_view_extent(), _Dst_offset.data(),
_Reinterpreted_src_shape->_Get_view_extent());
_Ev = _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _Src_shape, _DestIter, _Dst_shape]() mutable -> _Event {
return details::_Copy_impl<OutputIterator, _Value_type>(_PSrcBuf, _Src_shape, _DestIter, _Dst_shape);
}));
}
return _Ev;
}
}
/// <summary>
/// Asynchronously copies the contents of the source array into the destination array.
/// </summary>
/// <param name="_Src">
/// The source array.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array<_Value_type,_Rank>& _Src, array<_Value_type,_Rank>& _Dest)
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_async_copy_event_helper(details::_Get_buffer_descriptor(_Src),
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * _Src.extent.size());
auto _Ev = _Copy_async_impl(_Src, _Dest);
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Copies the contents of the source array into the destination array.
/// </summary>
/// <param name="_Src">
/// The source array.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
template <typename _Value_type, int _Rank> void copy(const array<_Value_type,_Rank>& _Src, array<_Value_type,_Rank>& _Dest)
{
auto _Span_id = details::_Get_amp_trace()->_Start_copy_event_helper(details::_Get_buffer_descriptor(_Src),
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * _Src.extent.size());
_Copy_async_impl(_Src, _Dest)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Asynchronously copies the elements in the range [_SrcFirst, _SrcLast) into the destination array.
/// </summary>
/// <param name="_SrcFirst">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_SrcLast">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename InputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(InputIterator _SrcFirst, InputIterator _SrcLast, array<_Value_type, _Rank> &_Dest)
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_async_copy_event_helper(nullptr,
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * std::distance(_SrcFirst, _SrcLast));
_Event _Ev = _Copy_async_impl(_SrcFirst, _SrcLast, _Dest);
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Copies the elements in the range [_SrcFirst, _SrcLast) into the destination array.
/// </summary>
/// <param name="_SrcFirst">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_SrcLast">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
template <typename InputIterator, typename _Value_type, int _Rank> void copy(InputIterator _SrcFirst, InputIterator _SrcLast, array<_Value_type, _Rank> &_Dest)
{
auto _Span_id = details::_Get_amp_trace()->_Start_copy_event_helper(nullptr,
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * std::distance(_SrcFirst, _SrcLast));
_Copy_async_impl(_SrcFirst, _SrcLast, _Dest)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Asynchronously copies the elements beginning at _SrcFirst into the destination array.
/// </summary>
/// <param name="_SrcFirst">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than _Dest.extent.size(), undefined behavior results.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename InputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(InputIterator _SrcFirst, array<_Value_type, _Rank> &_Dest)
{
InputIterator _SrcLast = _SrcFirst;
std::advance(_SrcLast, _Dest.extent.size());
return copy_async(_SrcFirst, _SrcLast, _Dest);
}
/// <summary>
/// Copies the elements beginning at _SrcFirst into the destination array.
/// </summary>
/// <param name="_SrcFirst">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than _Dest.extent.size(), undefined behavior results.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
template <typename InputIterator, typename _Value_type, int _Rank> void copy(InputIterator _SrcFirst, array<_Value_type, _Rank> &_Dest)
{
InputIterator _SrcLast = _SrcFirst;
std::advance(_SrcLast, _Dest.extent.size());
copy(_SrcFirst, _SrcLast, _Dest);
}
/// <summary>
/// Asynchronously copies the contents of the array into the destination beginning at _DestIter.
/// </summary>
/// <param name="_Src">
/// The source array.
/// </param>
/// <param name="_DestIter">
/// An output iterator to the beginning position at destination.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename OutputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array<_Value_type, _Rank> &_Src, OutputIterator _DestIter)
{
_CPP_AMP_VERIFY_MUTABLE_ITERATOR(OutputIterator);
auto _Async_op_id = details::_Get_amp_trace()->_Launch_async_copy_event_helper(details::_Get_buffer_descriptor(_Src),
nullptr,
sizeof(_Value_type) * _Src.extent.size());
_Event _Ev = _Copy_async_impl(_Src, _DestIter);
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Copies the contents of the array into the destination beginning at _DestIter.
/// </summary>
/// <param name="_Src">
/// The source array.
/// </param>
/// <param name="_DestIter">
/// An output iterator to the beginning position at destination.
/// </param>
template <typename OutputIterator, typename _Value_type, int _Rank> void copy(const array<_Value_type, _Rank> &_Src, OutputIterator _DestIter)
{
_CPP_AMP_VERIFY_MUTABLE_ITERATOR(OutputIterator);
auto _Span_id = details::_Get_amp_trace()->_Start_copy_event_helper(details::_Get_buffer_descriptor(_Src),
nullptr,
sizeof(_Value_type) * _Src.extent.size());
_Copy_async_impl(_Src, _DestIter)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Asynchronously copies the contents of the source array into the destination array_view.
/// </summary>
/// <param name="_Src">
/// The source array.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array<_Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest)
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_async_copy_event_helper(details::_Get_buffer_descriptor(_Src),
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * _Src.extent.size());
_Event _Ev = _Copy_async_impl(_Src, _Dest);
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Copies the contents of the source array into the destination array_view.
/// </summary>
/// <param name="_Src">
/// The source array.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
template <typename _Value_type, int _Rank> void copy(const array<_Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest)
{
auto _Span_id = details::_Get_amp_trace()->_Start_copy_event_helper(details::_Get_buffer_descriptor(_Src),
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * _Src.extent.size());
_Copy_async_impl(_Src, _Dest)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Asynchronously copies the contents of the source array_view into the destination array.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<const _Value_type, _Rank>& _Src, array<_Value_type, _Rank>& _Dest)
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_async_copy_event_helper(details::_Get_buffer_descriptor(_Src),
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * _Src.extent.size());
_Event _Ev = _Copy_async_impl(_Src, _Dest);
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Copies the contents of the source array_view into the destination array.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
template <typename _Value_type, int _Rank> void copy(const array_view<const _Value_type, _Rank>& _Src, array<_Value_type, _Rank>& _Dest)
{
auto _Span_id = details::_Get_amp_trace()->_Start_copy_event_helper(details::_Get_buffer_descriptor(_Src),
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * _Src.extent.size());
_Copy_async_impl(_Src, _Dest)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Asynchronously copies the contents of the source array_view into the destination array.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<_Value_type, _Rank>& _Src, array<_Value_type, _Rank>& _Dest)
{
return copy_async<_Value_type, _Rank>(array_view<const _Value_type, _Rank>(_Src), _Dest);
}
/// <summary>
/// Copies the contents of the source array_view into the destination array.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_Dest">
/// The destination array.
/// </param>
template <typename _Value_type, int _Rank> void copy(const array_view<_Value_type, _Rank>& _Src, array<_Value_type, _Rank>& _Dest)
{
copy<_Value_type, _Rank>(array_view<const _Value_type, _Rank>(_Src), _Dest);
}
/// <summary>
/// Asynchronously copies the contents of the source array_view into the destination array_view.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<const _Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest)
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_async_copy_event_helper(details::_Get_buffer_descriptor(_Src),
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * _Src.extent.size());
_Event _Ev = _Copy_async_impl(_Src, _Dest);
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Copies the contents of the source array_view into the destination array_view.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
template <typename _Value_type, int _Rank> void copy(const array_view<const _Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest)
{
auto _Span_id = details::_Get_amp_trace()->_Start_copy_event_helper(details::_Get_buffer_descriptor(_Src),
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * _Src.extent.size());
_Copy_async_impl(_Src, _Dest)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Asynchronously copies the contents of the source array_view into the destination array_view.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<_Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest)
{
return copy_async<_Value_type, _Rank>(array_view<const _Value_type, _Rank>(_Src), _Dest);
}
/// <summary>
/// Copies the contents of the source array_view into the destination array_view.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
template <typename _Value_type, int _Rank> void copy(const array_view<_Value_type, _Rank>& _Src, const array_view<_Value_type, _Rank>& _Dest)
{
copy<_Value_type, _Rank>(array_view<const _Value_type, _Rank>(_Src), _Dest);
}
/// <summary>
/// Asynchronously copies the elements in the range [_SrcFirst, _SrcLast) into the destination array_view.
/// </summary>
/// <param name="_SrcFirst">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_SrcLast">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename InputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(InputIterator _SrcFirst, InputIterator _SrcLast, const array_view<_Value_type, _Rank> &_Dest)
{
auto _Async_op_id = details::_Get_amp_trace()->_Launch_async_copy_event_helper(nullptr,
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * std::distance(_SrcFirst, _SrcLast));
_Event _Ev = _Copy_async_impl(_SrcFirst, _SrcLast, _Dest);
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Asynchronously copies the elements beginning at _SrcFirst into the destination array_view.
/// </summary>
/// <param name="_SrcFirst">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than _Dest.extent.size(), undefined behavior results.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename InputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(InputIterator _SrcFirst, const array_view<_Value_type, _Rank> &_Dest)
{
InputIterator _SrcLast = _SrcFirst;
std::advance(_SrcLast, _Dest.extent.size());
return copy_async(_SrcFirst, _SrcLast, _Dest);
}
/// <summary>
/// Copies the elements in the range [_SrcFirst, _SrcLast) into the destination array_view.
/// </summary>
/// <param name="_SrcFirst">
/// A beginning iterator into the source container.
/// </param>
/// <param name="_SrcLast">
/// An ending iterator into the source container.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
template <typename InputIterator, typename _Value_type, int _Rank> void copy(InputIterator _SrcFirst, InputIterator _SrcLast, const array_view<_Value_type, _Rank> &_Dest)
{
auto _Span_id = details::_Get_amp_trace()->_Start_copy_event_helper(nullptr,
details::_Get_buffer_descriptor(_Dest),
sizeof(_Value_type) * std::distance(_SrcFirst, _SrcLast));
_Copy_async_impl(_SrcFirst, _SrcLast, _Dest)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
/// <summary>
/// Copies the contents of an STL container into the destination array_view.
/// </summary>
/// <param name="_SrcFirst">
/// A beginning iterator into the source container; if the number of available container elements starting at this iterator position is less
/// than _Dest.extent.size(), undefined behavior results.
/// </param>
/// <param name="_Dest">
/// The destination array_view.
/// </param>
template <typename InputIterator, typename _Value_type, int _Rank> void copy(InputIterator _SrcFirst, const array_view<_Value_type, _Rank> &_Dest)
{
InputIterator _SrcLast = _SrcFirst;
std::advance(_SrcLast, _Dest.extent.size());
copy(_SrcFirst, _SrcLast, _Dest);
}
/// <summary>
/// Asynchronously copies the contents of the array_view into the destination beginning at _DestIter.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_DestIter">
/// An output iterator to the beginning position at destination.
/// </param>
/// <returns>
/// A future upon which to wait for the operation to complete.
/// </returns>
template <typename OutputIterator, typename _Value_type, int _Rank> concurrency::completion_future copy_async(const array_view<_Value_type, _Rank> &_Src, OutputIterator _DestIter)
{
_CPP_AMP_VERIFY_MUTABLE_ITERATOR(OutputIterator);
// Caller is responsible for passing valid _DestIter
auto _Async_op_id = details::_Get_amp_trace()->_Launch_async_copy_event_helper(details::_Get_buffer_descriptor(_Src),
nullptr,
sizeof(_Value_type) * _Src.extent.size());
_Event _Ev = _Copy_async_impl(_Src, _DestIter);
return details::_Get_amp_trace()->_Start_async_op_wait_event_helper(_Async_op_id, _Ev);
}
/// <summary>
/// Copies the contents of the array_view into the destination beginning at _DestIter.
/// </summary>
/// <param name="_Src">
/// The source array_view.
/// </param>
/// <param name="_DestIter">
/// An output iterator to the beginning position at destination.
/// </param>
template <typename OutputIterator, typename _Value_type, int _Rank> void copy(const array_view<_Value_type, _Rank> &_Src, OutputIterator _DestIter)
{
_CPP_AMP_VERIFY_MUTABLE_ITERATOR(OutputIterator);
auto _Span_id = details::_Get_amp_trace()->_Start_copy_event_helper(details::_Get_buffer_descriptor(_Src),
nullptr,
sizeof(_Value_type) * _Src.extent.size());
_Copy_async_impl(_Src, _DestIter)._Get();
details::_Get_amp_trace()->_Write_end_event(_Span_id);
}
// Namespace for Direct3D specific functionality
namespace direct3d
{
/// <summary>
/// Get the D3D buffer interface underlying an array.
/// </summary>
/// <param name="_Rank">
/// The rank of the array to get underlying D3D buffer of.
/// </param>
/// <param name="_Value_type">
/// The type of the elements in the array to get underlying D3D buffer of.
/// </param>
/// <param name="_Array">
/// A array on a D3D accelerator_view for which the underlying D3D buffer interface is returned.
/// </param>
/// <returns>
/// The IUnknown interface pointer corresponding to the D3D buffer underlying the array.
/// </returns>
template<typename _Value_type, int _Rank> _Ret_ IUnknown *get_buffer(const array<_Value_type, _Rank> &_Array) __CPU_ONLY
{
_Buffer_ptr _PBuf;
_Get_access_async(_Array, _Read_write_access, _PBuf)._Get();
return details::_D3D_interop::_Get_D3D_buffer(_PBuf);
}
/// <summary>
/// Create an array from a D3D buffer interface pointer.
/// </summary>
/// <param name="_Rank">
/// The rank of the array to be created from the D3D buffer.
/// </param>
/// <param name="_Value_type">
/// The type of the elements of the array to be created from the D3D buffer.
/// </param>
/// <param name="_Extent">
/// An extent that describes the shape of the array aggregate.
/// </param>
/// <param name="_Av">
/// A D3D accelerator_view on which the array is to be created.
/// </param>
/// <param name="_D3D_buffer">
/// IUnknown interface pointer of the D3D buffer to create the array from.
/// </param>
/// <returns>
/// A array created using the provided D3D buffer.
/// </returns>
template<typename _Value_type, int _Rank> array<_Value_type, _Rank> make_array(const Concurrency::extent<_Rank> &_Extent, const Concurrency::accelerator_view &_Av, _In_ IUnknown *_D3D_buffer) __CPU_ONLY
{
details::_Is_valid_extent(_Extent);
if (_D3D_buffer == NULL)
{
throw runtime_exception("NULL D3D buffer pointer.", E_INVALIDARG);
}
if (!details::_Is_D3D_accelerator_view(_Av))
{
throw runtime_exception("Cannot create D3D buffer on a non-D3D accelerator_view.", E_INVALIDARG);
}
_Ubiquitous_buffer_ptr _PBuf = _Ubiquitous_buffer::_Create_ubiquitous_buffer(_Buffer::_Create_buffer(_D3D_buffer, _Av, _Extent.size(), sizeof(_Value_type)));
return array<_Value_type, _Rank>(_Extent, _Buffer_descriptor(_PBuf->_Get_master_buffer()->_Get_host_ptr(), _PBuf, _Is_array_mode, _Read_write_access));
}
} // namespace Concurrency::direct3d
//=============================================================================
// Atomic Operation Library
//=============================================================================
#define AS_UINT_PTR(p) reinterpret_cast<unsigned int *>(p)
#define AS_UINT(v) *(reinterpret_cast<unsigned int *>(&(v)))
#define AS_INT(v) *(reinterpret_cast<int *>(&(v)))
#define AS_FLOAT(v) *(reinterpret_cast<float *>(&(v)))
/// <summary>
/// Performs an atomic addition of _Value to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be added to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_fetch_add(_Inout_ int * _Dest, int _Value) __GPU_ONLY
{
unsigned int _Ret;
_Ret = __dp_d3d_interlocked_add(AS_UINT_PTR(_Dest), AS_UINT(_Value));
return AS_INT(_Ret);
}
/// <summary>
/// Performs an atomic addition of _Value to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be added to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_fetch_add(_Inout_ unsigned int * _Dest, unsigned int _Value) __GPU_ONLY
{
return __dp_d3d_interlocked_add(_Dest, _Value);
}
/// <summary>
/// Performs an atomic subtraction of _Value from the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be subtracted from the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_fetch_sub(_Inout_ int * _Dest, int _Value) __GPU_ONLY
{
unsigned int _Ret;
int _Neg = -_Value;
_Ret = __dp_d3d_interlocked_add(AS_UINT_PTR(_Dest), AS_UINT(_Neg));
return AS_INT(_Ret);
}
/// <summary>
/// Performs an atomic subtraction of _Value from the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be subtracted from the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_fetch_sub(_Inout_ unsigned int * _Dest, unsigned int _Value) __GPU_ONLY
{
#pragma warning( push )
#pragma warning( disable : 4146 )
// Warning 4146: unary minus operator applied to unsigned type, result
// still unsigned.
//
// This is what we want here. The resulted unsigned value have the
// right binary representation for achieving subtraction
return __dp_d3d_interlocked_add(_Dest, (-_Value));
#pragma warning( pop )
}
/// <summary>
/// Performs an atomic increment to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_fetch_inc(_Inout_ int * _Dest) __GPU_ONLY
{
unsigned int _Ret;
_Ret = __dp_d3d_interlocked_add(AS_UINT_PTR(_Dest), 1U);
return AS_INT(_Ret);
}
/// <summary>
/// Performs an atomic increment to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_fetch_inc(_Inout_ unsigned int * _Dest) __GPU_ONLY
{
return __dp_d3d_interlocked_add(_Dest, 1U);
}
/// <summary>
/// Performs an atomic decrement to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_fetch_dec(_Inout_ int * _Dest) __GPU_ONLY
{
#pragma warning( push )
#pragma warning( disable : 4146 )
// Warning 4146: unary minus operator applied to unsigned type, result
// still unsigned.
unsigned int _Ret;
_Ret = __dp_d3d_interlocked_add(AS_UINT_PTR(_Dest), (-(1U)));
return AS_INT(_Ret);
#pragma warning( pop )
}
/// <summary>
/// Performs an atomic decrement to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_fetch_dec(_Inout_ unsigned int * _Dest) __GPU_ONLY
{
#pragma warning( push )
#pragma warning( disable : 4146 )
// Warning 4146: unary minus operator applied to unsigned type, result
// still unsigned.
return __dp_d3d_interlocked_add(_Dest, (-(1U)));
#pragma warning( pop )
}
/// <summary>
/// Sets the value of location pointed to by _Dest to _Value as an atomic operation
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be set to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_exchange(_Inout_ int * _Dest, int _Value) __GPU_ONLY
{
unsigned int _Ret = __dp_d3d_interlocked_exchange(AS_UINT_PTR(_Dest), AS_UINT(_Value));
return AS_INT(_Ret);
}
/// <summary>
/// Sets the value of location pointed to by _Dest to _Value as an atomic operation
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be set to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_exchange(_Inout_ unsigned int * _Dest, unsigned int _Value) __GPU_ONLY
{
return __dp_d3d_interlocked_exchange(_Dest, _Value);
}
/// <summary>
/// Sets the value of location pointed to by _Dest to _Value as an atomic operation
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be set to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline float atomic_exchange(_Inout_ float * _Dest, float _Value) __GPU_ONLY
{
unsigned int _Ret = __dp_d3d_interlocked_exchange(AS_UINT_PTR(_Dest), AS_UINT(_Value));
return AS_FLOAT(_Ret);
}
/// <summary>
/// Atomically, compares the value pointed to by _Dest for equality with that pointed to by _Expected_value,
/// and if true, returns true and replaces the value with _Value, and if false, returns false and updates the value
/// pointed to by _Expected_value with the value pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Expected_value">
/// Pointer to the the value being compared to the value pointed to by _Dest. If the comparison is unsuccessful,
/// the value is updated with the value pointed to by _Dest
/// </param>
/// <param name="_Value">
/// The value to be stored to the location pointed to by _Dest if the comparison is successful
/// </param>
/// <returns>
/// If the operation is successful, return true. Otherwise, false
/// </returns>
inline bool atomic_compare_exchange(_Inout_ int * _Dest, _Inout_ int * _Expected_value, int _Value) __GPU_ONLY
{
int _Old = *_Expected_value;
unsigned int _Ret = __dp_d3d_interlocked_compare_exchange(AS_UINT_PTR(_Dest), AS_UINT(_Value), AS_UINT(_Old));
if (_Ret == AS_UINT(_Old))
{
return true;
}
else
{
*_Expected_value = AS_INT(_Ret);
return false;
}
}
/// <summary>
/// Atomically, compares the value pointed to by _Dest for equality with that pointed to by _Expected_value,
/// and if true, returns true and replaces the value with _Value, and if false, returns false and updates the value
/// pointed to by _Expected_value with the value pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Expected_value">
/// Pointer to the the value being compared to the value pointed to by _Dest. If the comparison is unsuccessful,
/// the value is updated with the value pointed to by _Dest
/// </param>
/// <param name="_Value">
/// The value to be stored to the location pointed to by _Dest if the comparison is successful
/// </param>
/// <returns>
/// If the operation is successful, return true. Otherwise, false
/// </returns>
inline bool atomic_compare_exchange(_Inout_ unsigned int * _Dest, _Inout_ unsigned int * _Expected_value, unsigned int _Value) __GPU_ONLY
{
unsigned int _Old = *_Expected_value;
unsigned int _Ret = __dp_d3d_interlocked_compare_exchange(_Dest, _Value, _Old);
if (_Ret == _Old)
{
return true;
}
else
{
*_Expected_value = _Ret;
return false;
}
}
/// <summary>
/// Atomically computes the maximum of _Value and the value of the memory location pointed to
/// by _Dest, and stores the maximum value to the memory location
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be compared to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_fetch_max(_Inout_ int * _Dest, int _Value) __GPU_ONLY
{
return __dp_d3d_interlocked_max_int(_Dest, _Value);
}
/// <summary>
/// Atomically computes the maximum of _Value and the value of the memory location pointed to
/// by _Dest, and stores the maximum value to the memory location
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be compared to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_fetch_max(_Inout_ unsigned int * _Dest, unsigned int _Value) __GPU_ONLY
{
return __dp_d3d_interlocked_max_uint(_Dest, _Value);
}
/// <summary>
/// Atomically computes the minimum of _Value and the value of the memory location pointed to
/// by _Dest, and stores the minimum value to the memory location
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be compared to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_fetch_min(_Inout_ int * _Dest, int _Value) __GPU_ONLY
{
return __dp_d3d_interlocked_min_int(_Dest, _Value);
}
/// <summary>
/// Atomically computes the minimum of _Value and the value of the memory location pointed to
/// by _Dest, and stores the minimum value to the memory location
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to be compared to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_fetch_min(_Inout_ unsigned int * _Dest, unsigned int _Value) __GPU_ONLY
{
return __dp_d3d_interlocked_min_uint(_Dest, _Value);
}
/// <summary>
/// Performs an atomic bitwise and operation of _Value to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to bitwise and to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_fetch_and(_Inout_ int * _Dest, int _Value) __GPU_ONLY
{
unsigned int _Ret;
_Ret = __dp_d3d_interlocked_and(AS_UINT_PTR(_Dest), AS_UINT(_Value));
return AS_INT(_Ret);
}
/// <summary>
/// Performs an atomic bitwise and operation of _Value to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to bitwise and to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_fetch_and(_Inout_ unsigned int * _Dest, unsigned int _Value) __GPU_ONLY
{
return __dp_d3d_interlocked_and(_Dest, _Value);
}
/// <summary>
/// Performs an atomic bitwise or operation of _Value to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to bitwise or to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_fetch_or(_Inout_ int * _Dest, int _Value) __GPU_ONLY
{
unsigned int _Ret;
_Ret = __dp_d3d_interlocked_or(AS_UINT_PTR(_Dest), AS_UINT(_Value));
return AS_INT(_Ret);
}
/// <summary>
/// Performs an atomic bitwise or operation of _Value to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to bitwise or to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_fetch_or(_Inout_ unsigned int * _Dest, unsigned int _Value) __GPU_ONLY
{
return __dp_d3d_interlocked_or(_Dest, _Value);
}
/// <summary>
/// Performs an atomic bitwise xor operation of _Value to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to bitwise xor to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline int atomic_fetch_xor(_Inout_ int * _Dest, int _Value) __GPU_ONLY
{
unsigned int _Ret;
_Ret = __dp_d3d_interlocked_xor(AS_UINT_PTR(_Dest), AS_UINT(_Value));
return AS_INT(_Ret);
}
/// <summary>
/// Performs an atomic bitwise xor operation of _Value to the memory location pointed to by _Dest
/// </summary>
/// <param name="_Dest">
/// Pointer to the destination location
/// </param>
/// <param name="_Value">
/// The value to bitwise xor to the location pointed to by _Dest
/// </param>
/// <returns>
/// The original value of the location pointed to by _Dest
/// </returns>
inline unsigned int atomic_fetch_xor(_Inout_ unsigned int * _Dest, unsigned int _Value) __GPU_ONLY
{
return __dp_d3d_interlocked_xor(_Dest, _Value);
}
//=============================================================================
// parallel_for_each
//=============================================================================
/// <summary>
/// Invokes a parallel computation of a kernel function over a compute domain on an accelerator_view.
/// The accelerator_view is determined from the arrays and/or array_views captured by the kernel function,
/// or if no accelerator_view can be derived, the default is chosen.
/// </summary>
/// <param name="_Compute_domain">
/// An extent which represents the set of indices that form the compute domain.
/// </param>
/// <param name="_Kernel">
/// A function object that takes an argument of type "index&lt;_Rank&gt;" which performs the parallel computation.
/// </param>
template <int _Rank, typename _Kernel_type> void parallel_for_each(const extent<_Rank>& _Compute_domain, const _Kernel_type &_Kernel)
{
_Host_Scheduling_info _SchedulingInfo = {accelerator::get_auto_selection_view()};
details::_Parallel_for_each(&_SchedulingInfo, _Compute_domain, _Kernel);
}
/// <summary>
/// Invokes a parallel computation of a kernel function over a compute domain that has been tiled into 3-dimensional
/// regions. The accelerator is determined from the arrays and/or array_views captured by the kernel function,
/// or if no accelerator can be derived, the default is chosen.
/// </summary>
/// <param name="_Compute_domain">
/// A tiled_extent&lt;_Dim0,_Dim1,_Dim2&gt; which represents the tiled set of indices that form the compute domain.
/// </param>
/// <param name="_Kernel">
/// A function object that takes an argument of type "tiled_index&lt;_Dim0,_Dim1,_Dim2&gt;" which performs the parallel computation.
/// </param>
template <int _Dim0, int _Dim1, int _Dim2, typename _Kernel_type> void parallel_for_each(const tiled_extent<_Dim0, _Dim1, _Dim2>& _Compute_domain, const _Kernel_type& _Kernel)
{
_Host_Scheduling_info _SchedulingInfo = {accelerator::get_auto_selection_view()};
details::_Parallel_for_each(&_SchedulingInfo, _Compute_domain, _Kernel);
}
/// <summary>
/// Invokes a parallel computation of a kernel function over a compute domain that has been tiled into 2-dimensional
/// regions. The accelerator is determined from the arrays and/or array_views captured by the kernel function,
/// or if no accelerator can be derived, the default is chosen.
/// </summary>
/// <param name="_Compute_domain">
/// A tiled_extent&lt;_Dim0,_Dim1&gt; which represents the tiled set of indices that form the compute domain.
/// </param>
/// <param name="_Kernel">
/// A function object that takes an argument of type "tiled_index&lt;_Dim0,_Dim1&gt;" which performs the parallel computation.
/// </param>
template <int _Dim0, int _Dim1, typename _Kernel_type> void parallel_for_each(const tiled_extent<_Dim0, _Dim1>& _Compute_domain, const _Kernel_type& _Kernel)
{
_Host_Scheduling_info _SchedulingInfo = {accelerator::get_auto_selection_view()};
details::_Parallel_for_each(&_SchedulingInfo, _Compute_domain, _Kernel);
}
/// <summary>
/// Invokes a parallel computation of a kernel function over a compute domain that has been tiled into 1-dimensional
/// regions. The accelerator is determined from the arrays and/or array_views captured by the kernel function,
/// or if no accelerator can be derived, the default is chosen.
/// </summary>
/// <param name="_Compute_domain">
/// A tiled_extent&lt;_Dim0&gt; which represents the tiled set of indices that form the compute domain.
/// </param>
/// <param name="_Kernel">
/// A function object that takes an argument of type "tiled_index&lt;_Dim0&gt;" which performs the parallel computation.
/// </param>
template <int _Dim0, typename _Kernel_type> void parallel_for_each(const tiled_extent<_Dim0>& _Compute_domain, const _Kernel_type& _Kernel)
{
_Host_Scheduling_info _SchedulingInfo = {accelerator::get_auto_selection_view()};
details::_Parallel_for_each(&_SchedulingInfo, _Compute_domain, _Kernel);
}
/// <summary>
/// Invokes a parallel computation of a kernel function over a compute domain on an accelerator.
/// </summary>
/// <param name="_Accl_view">
/// The accelerator_view upon which to run this parallel computation.
/// </param>
/// <param name="_Compute_domain">
/// An extent which represents the set of indices that form the compute domain.
/// </param>
/// <param name="_Kernel">
/// A function object that takes an argument of type "index&lt;_Rank&gt;" which performs the parallel computation.
/// </param>
template <int _Rank, typename _Kernel_type> void parallel_for_each(const accelerator_view& _Accl_view, const extent<_Rank>& _Compute_domain, const _Kernel_type& _Kernel)
{
_Host_Scheduling_info _SchedulingInfo = {_Accl_view};
details::_Parallel_for_each(&_SchedulingInfo, _Compute_domain, _Kernel);
}
/// <summary>
/// Invokes a parallel computation of a kernel function over a compute domain that has been tiled into 3-dimensional
/// regions.
/// </summary>
/// <param name="_Accl_view">
/// The accelerator_view upon which to run this parallel computation.
/// </param>
/// <param name="_Compute_domain">
/// A tiled_extent&lt;_Dim0,_Dim1,_Dim2&gt; which represents the tiled set of indices that form the compute domain.
/// </param>
/// <param name="_Kernel">
/// A function object that takes an argument of type "tiled_index&lt;_Dim0,_Dim1,_Dim2&gt;" which performs the parallel computation.
/// </param>
template <int _Dim0, int _Dim1, int _Dim2, typename _Kernel_type> void parallel_for_each(const accelerator_view& _Accl_view, const tiled_extent<_Dim0, _Dim1, _Dim2>& _Compute_domain, const _Kernel_type& _Kernel)
{
_Host_Scheduling_info _SchedulingInfo = {_Accl_view};
details::_Parallel_for_each(&_SchedulingInfo, _Compute_domain, _Kernel);
}
/// <summary>
/// Invokes a parallel computation of a kernel function over a compute domain that has been tiled into 2-dimensional
/// regions.
/// </summary>
/// <param name="_Accl_view">
/// The accelerator_view upon which to run this parallel computation.
/// </param>
/// <param name="_Compute_domain">
/// A tiled_extent&lt;_Dim0,_Dim1&gt; which represents the tiled set of indices that form the compute domain.
/// </param>
/// <param name="_Kernel">
/// A function object that takes an argument of type "tiled_index&lt;_Dim0,_Dim1&gt;" which performs the parallel computation.
/// </param>
template <int _Dim0, int _Dim1, typename _Kernel_type> void parallel_for_each(const accelerator_view& _Accl_view, const tiled_extent<_Dim0, _Dim1>& _Compute_domain, const _Kernel_type& _Kernel)
{
_Host_Scheduling_info _SchedulingInfo = {_Accl_view};
details::_Parallel_for_each(&_SchedulingInfo, _Compute_domain, _Kernel);
}
/// <summary>
/// Invokes a parallel computation of a kernel function over a compute domain that has been tiled into 1-dimensional
/// regions.
/// </summary>
/// <param name="_Accl_view">
/// The accelerator_view upon which to run this parallel computation.
/// </param>
/// <param name="_Compute_domain">
/// A tiled_extent&lt;_Dim0&gt; which represents the tiled set of indices that form the compute domain.
/// </param>
/// <param name="_Kernel">
/// A function object that takes an argument of type "tiled_index&lt;_Dim0&gt;" which performs the parallel computation.
/// </param>
template <int _Dim0, typename _Kernel_type> void parallel_for_each(const accelerator_view& _Accl_view, const tiled_extent<_Dim0>& _Compute_domain, const _Kernel_type& _Kernel)
{
_Host_Scheduling_info _SchedulingInfo = {_Accl_view};
details::_Parallel_for_each(&_SchedulingInfo, _Compute_domain, _Kernel);
}
//=============================================================================
extern "C"
{
// Debugging intrinsics
void direct3d_abort() __GPU_ONLY;
void direct3d_errorf(const char *, ...) __GPU_ONLY;
void direct3d_printf(const char *, ...) __GPU_ONLY;
}
//////////////////////////////////////////////////////////////////////
/// Memory fences and tile barriers
#pragma warning( push )
#pragma warning( disable : 4100 ) // unreferenced formal parameter
/// <summary>
/// Ensures that memory accesses are visible to other threads in the thread tile, and are executed according to program order
/// </summary>
/// <param name="_Barrier">
/// A tile_barrier object
/// </param>
inline void all_memory_fence(const tile_barrier & _Barrier) __GPU_ONLY
{
__dp_d3d_all_memory_fence();
}
/// <summary>
/// Ensures that global memory accesses are visible to other threads in the thread tile, and are executed according to program order
/// </summary>
/// <param name="_Barrier">
/// A tile_barrier object
/// </param>
inline void global_memory_fence(const tile_barrier & _Barrier) __GPU_ONLY
{
__dp_d3d_device_memory_fence();
}
/// <summary>
/// Ensures that tile_static memory accesses are visible to other threads in the thread tile, and are executed according to program order
/// </summary>
/// <param name="_Barrier">
/// A tile_barrier object
/// </param>
inline void tile_static_memory_fence(const tile_barrier & _Barrier) __GPU_ONLY
{
__dp_d3d_tile_static_memory_fence();
}
#pragma warning( pop )
namespace direct3d
{
/// <summary>
/// Returns the absolute value of the argument
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <returns>
/// Returns the absolute value of the argument
/// </returns>
inline int abs(int _X) __GPU_ONLY
{
return __dp_d3d_absi(_X);
}
/// <summary>
/// Clamps _X to the specified _Min and _Max range
/// </summary>
/// <param name="_X">
/// Floating-point value
/// </param>
/// <param name="_Min">
/// Floating-point value
/// </param>
/// <param name="_Max">
/// Floating-point value
/// </param>
/// <returns>
/// Returns the clamped value of _X
/// </returns>
inline float clamp(float _X, float _Min, float _Max) __GPU_ONLY
{
return __dp_d3d_clampf(_X, _Min, _Max);
}
/// <summary>
/// Clamps _X to the specified _Min and _Max range
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <param name="_Min">
/// Integer value
/// </param>
/// <param name="_Max">
/// Integer value
/// </param>
/// <returns>
/// Returns the clamped value of _X
/// </returns>
inline int clamp(int _X, int _Min, int _Max) __GPU_ONLY
{
return __dp_d3d_clampi(_X, _Min, _Max);
}
/// <summary>
/// Counts the number of set bits in _X
/// </summary>
/// <param name="_X">
/// Unsigned integer value
/// </param>
/// <returns>
/// Returns the number of set bits in _X
/// </returns>
inline unsigned int countbits(unsigned int _X) __GPU_ONLY
{
return __dp_d3d_countbitsu(_X);
}
/// <summary>
/// Gets the location of the first set bit in _X, starting from the highest order bit and working downward
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <returns>
/// Returns The location of the first set bit
/// </returns>
inline int firstbithigh(int _X) __GPU_ONLY
{
return __dp_d3d_firstbithighi(_X);
}
/// <summary>
/// Gets the location of the first set bit in _X, starting from the lowest order bit and working upward
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <returns>
/// Returns The location of the first set bit
/// </returns>
inline int firstbitlow(int _X) __GPU_ONLY
{
return __dp_d3d_firstbitlowi(_X);
}
/// <summary>
/// Determine the maximum numeric value of the arguments
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <param name="_Y">
/// Integer value
/// </param>
/// <returns>
/// Return the maximum numeric value of the arguments
/// </returns>
inline int imax(int _X, int _Y) __GPU_ONLY
{
return __dp_d3d_maxi(_X, _Y);
}
/// <summary>
/// Determine the minimum numeric value of the arguments
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <param name="_Y">
/// Integer value
/// </param>
/// <returns>
/// Return the minimum numeric value of the arguments
/// </returns>
inline int imin(int _X, int _Y) __GPU_ONLY
{
return __dp_d3d_mini(_X, _Y);
}
/// <summary>
/// Determine the maximum numeric value of the arguments
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <param name="_Y">
/// Integer value
/// </param>
/// <returns>
/// Return the maximum numeric value of the arguments
/// </returns>
inline unsigned int umax(unsigned int _X, unsigned int _Y) __GPU_ONLY
{
return __dp_d3d_maxu(_X, _Y);
}
/// <summary>
/// Determine the minimum numeric value of the arguments
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <param name="_Y">
/// Integer value
/// </param>
/// <returns>
/// Return the minimum numeric value of the arguments
/// </returns>
inline unsigned int umin(unsigned int _X, unsigned int _Y) __GPU_ONLY
{
return __dp_d3d_minu(_X, _Y);
}
/// <summary>
/// Performs an arithmetic multiply/add operation on three arguments: _X * _Y + _Z
/// </summary>
/// <param name="_X">
/// Floating-point value
/// </param>
/// <param name="_Y">
/// Floating-point value
/// </param>
/// <param name="_Z">
/// Floating-point value
/// </param>
/// <returns>
/// Returns _X * _Y + _Z
/// </returns>
inline float mad(float _X, float _Y, float _Z) __GPU_ONLY
{
return __dp_d3d_madf(_X, _Y, _Z);
}
/// <summary>
/// Performs an arithmetic multiply/add operation on three arguments: _X * _Y + _Z
/// </summary>
/// <param name="_X">
/// Floating-point value
/// </param>
/// <param name="_Y">
/// Floating-point value
/// </param>
/// <param name="_Z">
/// Floating-point value
/// </param>
/// <returns>
/// Returns _X * _Y + _Z
/// </returns>
inline double mad(double _X, double _Y, double _Z) __GPU_ONLY
{
return __dp_d3d_madd(_X, _Y, _Z);
}
/// <summary>
/// Performs an arithmetic multiply/add operation on three arguments: _X * _Y + _Z
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <param name="_Y">
/// Integer value
/// </param>
/// <param name="_Z">
/// Integer value
/// </param>
/// <returns>
/// Returns _X * _Y + _Z
/// </returns>
inline int mad(int _X, int _Y, int _Z) __GPU_ONLY
{
return __dp_d3d_madi(_X, _Y, _Z);
}
/// <summary>
/// Performs an arithmetic multiply/add operation on three arguments: _X * _Y + _Z
/// </summary>
/// <param name="_X">
/// Unsigned integer value
/// </param>
/// <param name="_Y">
/// Unsigned integer value
/// </param>
/// <param name="_Z">
/// Unsigned integer value
/// </param>
/// <returns>
/// Returns _X * _Y + _Z
/// </returns>
inline unsigned int mad(unsigned int _X, unsigned int _Y, unsigned int _Z) __GPU_ONLY
{
return __dp_d3d_madu(_X, _Y, _Z);
}
/// <summary>
/// Generates a random value using the Perlin noise algorithm
/// </summary>
/// <param name="_X">
/// Floating-point value from which to generate Perlin noise
/// </param>
/// <returns>
/// Returns The Perlin noise value within a range between -1 and 1
/// </returns>
inline float noise(float _X) __GPU_ONLY
{
return __dp_d3d_noisef(_X);
}
/// <summary>
/// Converts _X from degrees to radians
/// </summary>
/// <param name="_X">
/// Floating-point value
/// </param>
/// <returns>
/// Returns _X converted from degrees to radians
/// </returns>
inline float radians(float _X) __GPU_ONLY
{
return __dp_d3d_radiansf(_X);
}
/// <summary>
/// Calculates a fast, approximate reciprocal of the argument
/// </summary>
/// <param name="_X">
/// Floating-point value
/// </param>
/// <returns>
/// Returns a fast, approximate reciprocal of the argument
/// </returns>
inline float rcp(float _X) __GPU_ONLY
{
return __dp_d3d_rcpf(_X);
}
/// <summary>
/// Reverses the order of the bits in _X
/// </summary>
/// <param name="_X">
/// Unsigned integer value
/// </param>
/// <returns>
/// Returns the value with the bit order reversed in _X
/// </returns>
inline unsigned int reversebits(unsigned int _X) __GPU_ONLY
{
return __dp_d3d_reversebitsu(_X);
}
/// <summary>
/// Clamps _X within the range of 0 to 1
/// </summary>
/// <param name="_X">
/// Floating-point value
/// </param>
/// <returns>
/// Returns _X clamped within the range of 0 to 1
/// </returns>
inline float saturate(float _X) __GPU_ONLY
{
return __dp_d3d_saturatef(_X);
}
/// <summary>
/// Returns the sign of the argument
/// </summary>
/// <param name="_X">
/// Integer value
/// </param>
/// <returns>
/// Returns the sign of the argument
/// </returns>
inline int sign(int _X) __GPU_ONLY
{
return __dp_d3d_signi(_X);
}
/// <summary>
/// Returns a smooth Hermite interpolation between 0 and 1, if _X is in the range [_Min, _Max].
/// </summary>
/// <param name="_X">
/// Floating-point value
/// </param>
/// <param name="_Min">
/// Floating-point value
/// </param>
/// <param name="_Max">
/// Floating-point value
/// </param>
/// <returns>
/// Returns 0 if _X is less than _Min; 1 if _X is greater than _Max; otherwise, a value between 0 and 1 if _X is in the range [_Min, _Max]
/// </returns>
inline float smoothstep(float _Min, float _Max, float _X) __GPU_ONLY
{
return __dp_d3d_smoothstepf(_Min, _Max, _X);
}
/// <summary>
/// Compares two values, returning 0 or 1 based on which value is greater
/// </summary>
/// <param name="_Y">
/// Floating-point value
/// </param>
/// <param name="_X">
/// Floating-point value
/// </param>
/// <returns>
/// Returns 1 if the _X is greater than or equal to _Y; otherwise, 0
/// </returns>
inline float step(float _Y, float _X) __GPU_ONLY
{
return __dp_d3d_stepf(_Y, _X);
}
} // namespace Concurrency::direct3d
} // namespace Concurrency
#include <xxamp_inl.h>
namespace concurrency = Concurrency;
#pragma pack(pop)
// End of file