From 4b907c5d51f1f657d9debf965abd13190740f038 Mon Sep 17 00:00:00 2001 From: Denis Demidov Date: Tue, 15 Jan 2019 14:07:56 +0300 Subject: [PATCH 1/6] [wip] [skip ci] Test OpenCL backend with amgcl solver Enable vexcl backend for amgcl solver, which makes it possible to use GPGPU (either CUDA or OpenCL) in order to accelerate solution. * CMake option AMGCL_GPGPU (default: off) controls whether to compile GPGPU support. * CMake option AMGCL_GPGPU_BACKEND (default: OpenCL) selects vexcl backend (OpenCL/CUDA) * New setting in linear solver parameters: `use_gpgpu` enables GPGPU at runtime. * Environment variable OCL_DEVICE may be used to select a particular compute device. --- .../trilinos_application/CMakeLists.txt | 1 + .../amgcl_mpi_solver_impl.cpp | 219 +++++++++++++++++ .../amgcl_mpi_solve_functions.h | 77 +----- kratos/CMakeLists.txt | 19 ++ kratos/linear_solvers/amgcl_solver.h | 96 ++------ kratos/linear_solvers/amgcl_solver_impl.cpp | 221 ++++++++++++++++++ 6 files changed, 484 insertions(+), 149 deletions(-) create mode 100644 applications/trilinos_application/amgcl_mpi_solver_impl.cpp create mode 100644 kratos/linear_solvers/amgcl_solver_impl.cpp diff --git a/applications/trilinos_application/CMakeLists.txt b/applications/trilinos_application/CMakeLists.txt index a2ab2522f874..701a83974c60 100644 --- a/applications/trilinos_application/CMakeLists.txt +++ b/applications/trilinos_application/CMakeLists.txt @@ -50,6 +50,7 @@ set( KRATOS_TRILINOS_APPLICATION_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/custom_factories/trilinos_linear_solver_factory.cpp ${CMAKE_SOURCE_DIR}/applications/FluidDynamicsApplication/fluid_dynamics_application_variables.cpp #TODO: this should REALLY NOT BE HERE ${CMAKE_CURRENT_SOURCE_DIR}/custom_utilities/mpi_normal_calculation_utilities.cpp; + ${CMAKE_CURRENT_SOURCE_DIR}/amgcl_mpi_solver_impl.cpp; ) ## Kratos tests sources. Enabled by default diff --git a/applications/trilinos_application/amgcl_mpi_solver_impl.cpp b/applications/trilinos_application/amgcl_mpi_solver_impl.cpp new file mode 100644 index 000000000000..755e88760efd --- /dev/null +++ b/applications/trilinos_application/amgcl_mpi_solver_impl.cpp @@ -0,0 +1,219 @@ +#ifndef KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H +#define KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef AMGCL_GPGPU +# include +# include +#endif + +#include "Epetra_FECrsMatrix.h" +#include "Epetra_FEVector.h" +#include "trilinos_space.h" + +namespace Kratos +{ + +#ifdef AMGCL_GPGPU +vex::Context& vexcl_context(); + +template +void register_vexcl_static_matrix_type(); +#endif + +// Spacialization of AMGCLScalarSolve for distribued systems. +template +typename std::enable_if::type +AMGCLScalarSolve( + typename TSparseSpaceType::MatrixType& rA, + typename TSparseSpaceType::VectorType& rX, + typename TSparseSpaceType::VectorType& rB, + typename TSparseSpaceType::IndexType& rIterationNumber, + double& rResidual, + const boost::property_tree::ptree &amgclParams, + int verbosity_level, + bool use_gpgpu + ) +{ +#ifdef AMGCL_GPGPU + if (use_gpgpu && vexcl_context()) { + auto &ctx = vexcl_context(); + + typedef amgcl::backend::vexcl Backend; + + typedef + amgcl::mpi::make_solver< + amgcl::runtime::mpi::preconditioner, + amgcl::runtime::solver::wrapper + > + Solver; + + Backend::params bprm; + bprm.q = ctx; + + Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams, bprm); + + std::size_t n = rA.NumMyRows(); + + vex::vector b(ctx, n, rB.Values()); + vex::vector x(ctx, n, rX.Values()); + + std::tie(rIterationNumber, rResidual) = solve(b, x); + + vex::copy(x.begin(), x.end(), rX.Values()); + } else +#endif + { + typedef amgcl::backend::builtin Backend; + + typedef + amgcl::mpi::make_solver< + amgcl::runtime::mpi::preconditioner, + amgcl::runtime::solver::wrapper + > + Solver; + + Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams); + + std::size_t n = rA.NumMyRows(); + + auto b_range = boost::make_iterator_range(rB.Values(), rB.Values() + n); + auto x_range = boost::make_iterator_range(rX.Values(), rX.Values() + n); + + std::tie(rIterationNumber, rResidual) = solve(b_range, x_range); + } +} + +// Spacialization of AMGCLBlockSolve for distribued systems. +template +typename std::enable_if::type +AMGCLBlockSolve( + typename TSparseSpaceType::MatrixType & rA, + typename TSparseSpaceType::VectorType& rX, + typename TSparseSpaceType::VectorType& rB, + typename TSparseSpaceType::IndexType& rIterationNumber, + double& rResidual, + boost::property_tree::ptree amgclParams, + int verbosity_level, + bool use_gpgpu + ) +{ + if(amgclParams.get("precond.class") != "amg") + amgclParams.erase("precond.coarsening"); + else + amgclParams.put("precond.coarsening.aggr.block_size",1); + + typedef amgcl::static_matrix val_type; + typedef amgcl::static_matrix rhs_type; + + std::size_t n = rA.RowMap().NumMyElements(); + std::size_t nb = n / TBlockSize; + +#ifdef AMGCL_GPGPU + if (use_gpgpu && vexcl_context()) { + auto &ctx = vexcl_context(); + register_vexcl_static_matrix_type(); + + typedef amgcl::backend::vexcl Backend; + + typedef + amgcl::mpi::make_solver< + amgcl::runtime::mpi::preconditioner, + amgcl::runtime::solver::wrapper + > + Solver; + + typename Backend::params bprm; + bprm.q = ctx; + + Solver solve( + MPI_COMM_WORLD, + amgcl::adapter::block_matrix(amgcl::adapter::map(rA)), + amgclParams, bprm + ); + + auto b_begin = reinterpret_cast(rB.Values()); + auto x_begin = reinterpret_cast(rX.Values()); + + vex::vector x(ctx, nb, x_begin); + vex::vector b(ctx, nb, b_begin); + + std::tie(rIterationNumber, rResidual) = solve(b, x); + + vex::copy(x.begin(), x.end(), x_begin); + } else +#endif + { + typedef amgcl::backend::builtin Backend; + + typedef + amgcl::mpi::make_solver< + amgcl::runtime::mpi::preconditioner, + amgcl::runtime::solver::wrapper + > + Solver; + + Solver solve( + MPI_COMM_WORLD, + amgcl::adapter::block_matrix(amgcl::adapter::map(rA)), + amgclParams + ); + + auto b_begin = reinterpret_cast(rB.Values()); + auto x_begin = reinterpret_cast(rX.Values()); + + auto b_range = boost::make_iterator_range(b_begin, b_begin + nb); + auto x_range = boost::make_iterator_range(x_begin, x_begin + nb); + + std::tie(rIterationNumber, rResidual) = solve(b_range, x_range); + } +} + +// Exlplicit instantiations: +template void AMGCLScalarSolve< TrilinosSpace >( + TrilinosSpace::MatrixType& rA, + TrilinosSpace::VectorType& rX, + TrilinosSpace::VectorType& rB, + TrilinosSpace::IndexType& rIterationNumber, + double& rResidual, + const boost::property_tree::ptree &amgclParams, + int verbosity_level, + bool use_gpgpu + ); + +#define INSTANTIATE_BLOCK_SOLVER(B) \ +template void AMGCLBlockSolve >( \ + TrilinosSpace::MatrixType& rA, \ + TrilinosSpace::VectorType& rX, \ + TrilinosSpace::VectorType& rB, \ + TrilinosSpace::IndexType& rIterationNumber, \ + double& rResidual, \ + boost::property_tree::ptree amgclParams, \ + int verbosity_level, \ + bool use_gpgpu \ + ) + +INSTANTIATE_BLOCK_SOLVER(2); +INSTANTIATE_BLOCK_SOLVER(3); +INSTANTIATE_BLOCK_SOLVER(4); + +#undef INSTANTIATE_BLOCK_SOLVER + +} // namespace Kratos + +#endif diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_solve_functions.h b/applications/trilinos_application/external_includes/amgcl_mpi_solve_functions.h index 786eae4503f2..a810cb145568 100644 --- a/applications/trilinos_application/external_includes/amgcl_mpi_solve_functions.h +++ b/applications/trilinos_application/external_includes/amgcl_mpi_solve_functions.h @@ -1,20 +1,7 @@ #ifndef KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H #define KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include +#include namespace Kratos { @@ -29,27 +16,9 @@ AMGCLScalarSolve( typename TSparseSpaceType::IndexType& rIterationNumber, double& rResidual, const boost::property_tree::ptree &amgclParams, - int verbosity_level - ) -{ - typedef amgcl::backend::builtin Backend; - - typedef - amgcl::mpi::make_solver< - amgcl::runtime::mpi::preconditioner, - amgcl::runtime::solver::wrapper - > - Solver; - - Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams); - - std::size_t n = rA.NumMyRows(); - - auto b_range = boost::make_iterator_range(rB.Values(), rB.Values() + n); - auto x_range = boost::make_iterator_range(rX.Values(), rX.Values() + n); - - std::tie(rIterationNumber, rResidual) = solve(b_range, x_range); -} + int verbosity_level, + bool use_gpgpu + ); // Spacialization of AMGCLBlockSolve for distribued systems. template @@ -61,41 +30,9 @@ AMGCLBlockSolve( typename TSparseSpaceType::IndexType& rIterationNumber, double& rResidual, boost::property_tree::ptree amgclParams, - int verbosity_level - ) -{ - if(amgclParams.get("precond.class") != "amg") - amgclParams.erase("precond.coarsening"); - else - amgclParams.put("precond.coarsening.aggr.block_size",1); - - typedef amgcl::static_matrix val_type; - typedef amgcl::static_matrix rhs_type; - typedef amgcl::backend::builtin Backend; - - std::size_t n = rA.RowMap().NumMyElements(); - - typedef - amgcl::mpi::make_solver< - amgcl::runtime::mpi::preconditioner, - amgcl::runtime::solver::wrapper - > - Solver; - - Solver solve( - MPI_COMM_WORLD, - amgcl::adapter::block_matrix(amgcl::adapter::map(rA)), - amgclParams - ); - - auto b_begin = reinterpret_cast(rB.Values()); - auto x_begin = reinterpret_cast(rX.Values()); - - auto b_range = boost::make_iterator_range(b_begin, b_begin + n / TBlockSize); - auto x_range = boost::make_iterator_range(x_begin, x_begin + n / TBlockSize); - - std::tie(rIterationNumber, rResidual) = solve(b_range, x_range); -} + int verbosity_level, + bool use_gpgpu + ); } // namespace Kratos diff --git a/kratos/CMakeLists.txt b/kratos/CMakeLists.txt index d21ee5cc5c2c..c27e943ae11f 100644 --- a/kratos/CMakeLists.txt +++ b/kratos/CMakeLists.txt @@ -89,6 +89,7 @@ set( KRATOS_CORE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/elements/mesh_element.cpp; ${CMAKE_CURRENT_SOURCE_DIR}/conditions/mesh_condition.cpp; ${CMAKE_CURRENT_SOURCE_DIR}/processes/apply_periodic_boundary_condition_process.cpp; + ${CMAKE_CURRENT_SOURCE_DIR}/linear_solvers/amgcl_solver_impl.cpp; ) ## Kratos python interface code @@ -156,6 +157,24 @@ add_library(KratosCore SHARED ${KRATOS_CORE_SOURCES} ${KRATOS_CORE_TESTING_ENGIN target_link_libraries(KratosCore PUBLIC gidpost) #${Boost_LIBRARIES} ${PYTHON_LIBRARIES} gidpost ) set_target_properties(KratosCore PROPERTIES COMPILE_DEFINITIONS "KRATOS_CORE=IMPORT,API") +option(AMGCL_GPGPU "Enable GPGPU backend for AMGCL linear solver" OFF) +if (AMGCL_GPGPU) + find_package(VexCL) + + set(AMGCL_GPGPU_BACKEND "OpenCL" CACHE STRING "Select AMGCL GPGPU backend (OpenCL/CUDA)") + set_property(CACHE AMGCL_GPGPU_BACKEND PROPERTY STRINGS "OpenCL" "CUDA") + + if ("${AMGCL_GPGPU_BACKEND}" STREQUAL "OpenCL" AND TARGET VexCL::OpenCL) + target_link_libraries(KratosCore PUBLIC VexCL::OpenCL) + target_compile_definitions(KratosCore PUBLIC AMGCL_GPGPU) + elseif ("${AMGCL_GPGPU_BACKEND}" STREQUAL "CUDA" AND TARGET VexCL::CUDA) + target_link_libraries(KratosCore PUBLIC VexCL::CUDA) + target_compile_definitions(KratosCore PUBLIC AMGCL_GPGPU) + else() + message(WARNING "AMGCL GPGPU backend not found") + endif() +endif() + ## Define library Kratos which defines the basic python interface pybind11_add_module(Kratos MODULE THIN_LTO ${KRATOS_PYTHON_SOURCES}) # add_library(Kratos SHARED ${KRATOS_PYTHON_SOURCES}) diff --git a/kratos/linear_solvers/amgcl_solver.h b/kratos/linear_solvers/amgcl_solver.h index 37e7497b066c..7bb3e1f7f2c2 100644 --- a/kratos/linear_solvers/amgcl_solver.h +++ b/kratos/linear_solvers/amgcl_solver.h @@ -30,20 +30,6 @@ #include #include -/* AMGCL */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - // Project includes #include "includes/define.h" #include "includes/kratos_parameters.h" @@ -98,21 +84,9 @@ AMGCLScalarSolve( typename TSparseSpaceType::IndexType& rIterationNumber, double& rResidual, const boost::property_tree::ptree &amgclParams, - int verbosity_level - ) -{ - typedef amgcl::backend::builtin Backend; - - amgcl::make_solver< - amgcl::runtime::preconditioner, - amgcl::runtime::solver::wrapper - > solve(amgcl::adapter::zero_copy(TSparseSpaceType::Size1(rA), rA.index1_data().begin(), rA.index2_data().begin(), rA.value_data().begin()), amgclParams); - - std::tie(rIterationNumber, rResidual) = solve(rB, rX); - - if(verbosity_level > 1 ) - std::cout << "AMGCL Memory Occupation : " << amgcl::human_readable_memory(amgcl::backend::bytes(solve)) << std::endl; -} + int verbosity_level, + bool use_gpgpu + ); /** * @brief This method solves by block a Ax=b system @@ -131,37 +105,9 @@ AMGCLBlockSolve( typename TSparseSpaceType::IndexType& rIterationNumber, double& rResidual, boost::property_tree::ptree amgclParams, - int verbosity_level - ) -{ - if(amgclParams.get("precond.class") != "amg") - amgclParams.erase("precond.coarsening"); - else - amgclParams.put("precond.coarsening.aggr.block_size",1); - - typedef amgcl::static_matrix value_type; - typedef amgcl::static_matrix rhs_type; - typedef amgcl::backend::builtin Backend; - - std::size_t n = TSparseSpaceType::Size1(rA); - - amgcl::make_solver< - amgcl::runtime::preconditioner, - amgcl::runtime::solver::wrapper - > solve( amgcl::adapter::block_matrix(std::tie(n,rA.index1_data(),rA.index2_data(),rA.value_data() )), amgclParams); - - rhs_type* x_begin = reinterpret_cast(&rX[0]); - boost::iterator_range x_range = boost::make_iterator_range(x_begin, x_begin + n / TBlockSize); - - const rhs_type* b_begin = reinterpret_cast(&rB[0]); - boost::iterator_range b_range = boost::make_iterator_range(b_begin, b_begin + n / TBlockSize); - - std::tie(rIterationNumber, rResidual) = solve(b_range, x_range); - - if(verbosity_level > 1 ) - std::cout << "AMGCL Memory Occupation : " << amgcl::human_readable_memory(amgcl::backend::bytes(solve)) << std::endl; - -} + int verbosity_level, + bool use_gpgpu + ); ///@} ///@name Kratos Classes @@ -238,7 +184,8 @@ class AMGCLSolver : public LinearSolver< TSparseSpaceType, "coarse_enough" : 1000, "max_levels" : -1, "pre_sweeps" : 1, - "post_sweeps" : 1 + "post_sweeps" : 1, + "use_gpgpu" : false } )" ); // Now validate agains defaults -- this also ensures no type mismatch @@ -330,8 +277,7 @@ class AMGCLSolver : public LinearSolver< TSparseSpaceType, ThisParameters["use_block_matrices_if_possible"].SetBool(false); } - - + mUseGPGPU = ThisParameters["use_gpgpu"].GetBool(); } /** @@ -516,21 +462,21 @@ class AMGCLSolver : public LinearSolver< TSparseSpaceType, if(mUseBlockMatricesIfPossible) { KRATOS_ERROR_IF(TSparseSpaceType::Size1(rA)%mBlockSize != 0) << "The block size employed " << mBlockSize << " is not an exact multiple of the matrix size " << TSparseSpaceType::Size1(rA) << std::endl; - if(mBlockSize == 1) AMGCLScalarSolve(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity); - else if(mBlockSize == 2) AMGCLBlockSolve<2, TSparseSpaceType>(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity); - else if(mBlockSize == 3) AMGCLBlockSolve<3, TSparseSpaceType>(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity); - else if(mBlockSize == 4) AMGCLBlockSolve<4, TSparseSpaceType>(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity); + if(mBlockSize == 1) AMGCLScalarSolve(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity, mUseGPGPU); + else if(mBlockSize == 2) AMGCLBlockSolve<2, TSparseSpaceType>(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity, mUseGPGPU); + else if(mBlockSize == 3) AMGCLBlockSolve<3, TSparseSpaceType>(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity, mUseGPGPU); + else if(mBlockSize == 4) AMGCLBlockSolve<4, TSparseSpaceType>(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity, mUseGPGPU); else - AMGCLScalarSolve(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity); + AMGCLScalarSolve(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity, mUseGPGPU); } else { - AMGCLScalarSolve(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity); + AMGCLScalarSolve(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity, mUseGPGPU); } } //please do not remove this parenthesis! if(mFallbackToGMRES && resid > mTolerance ) { mAMGCLParameters.put("solver.type", "gmres"); mAMGCLParameters.put("solver.M", mGMRESSize); - AMGCLScalarSolve(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity); + AMGCLScalarSolve(rA,rX,rB, iters, resid, mAMGCLParameters, mVerbosity, mUseGPGPU); } KRATOS_WARNING_IF("AMGCL Linear Solver", mTolerance < resid)<<"Non converged linear solution. ["<< resid << " > "<< mTolerance << "]" << std::endl; @@ -752,12 +698,10 @@ class AMGCLSolver : public LinearSolver< TSparseSpaceType, bool mFallbackToGMRES; /// Of consider GMRES as fallback (TODO: Local flag?) bool mProvideCoordinates; /// If the coordinates are provided (TODO: Local flag?) bool mUseBlockMatricesIfPossible; /// If use the bloack matrices if possible (TODO: Local flag?) + bool mUseGPGPU; /// Use GPGPU if available std::vector > mCoordinates; /// The vector containing the local coordinates - amgcl::runtime::coarsening::type mCoarsening; /// The coarsening type considered - amgcl::runtime::relaxation::type mRelaxation; /// The relaxation type considered - amgcl::runtime::solver::type mIterativeSolver; /// The iterative solver considered boost::property_tree::ptree mAMGCLParameters; /// The configuration parameters of the AMGCl double mResidualNorm = 0.0; /// The current residual norm @@ -779,37 +723,31 @@ class AMGCLSolver : public LinearSolver< TSparseSpaceType, case SPAI0: { mAMGCLParameters.put("precond.relax.type","spai0"); - mRelaxation = amgcl::runtime::relaxation::spai0; break; } case SPAI1: { mAMGCLParameters.put("precond.relax.type","spai1"); - mRelaxation = amgcl::runtime::relaxation::spai1; break; } case ILU0: { mAMGCLParameters.put("precond.relax.type","ilu0"); - mRelaxation = amgcl::runtime::relaxation::ilu0; break; } case DAMPED_JACOBI: { mAMGCLParameters.put("precond.relax.type","damped_jacobi"); - mRelaxation = amgcl::runtime::relaxation::damped_jacobi; break; } case GAUSS_SEIDEL: { mAMGCLParameters.put("precond.relax.type","gauss_seidel"); - mRelaxation = amgcl::runtime::relaxation::gauss_seidel; break; } case CHEBYSHEV: { mAMGCLParameters.put("precond.relax.type","chebyshev"); - mRelaxation = amgcl::runtime::relaxation::chebyshev; break; } }; diff --git a/kratos/linear_solvers/amgcl_solver_impl.cpp b/kratos/linear_solvers/amgcl_solver_impl.cpp new file mode 100644 index 000000000000..4f884439e820 --- /dev/null +++ b/kratos/linear_solvers/amgcl_solver_impl.cpp @@ -0,0 +1,221 @@ +/* AMGCL */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef AMGCL_GPGPU +# include +# include +#endif + +#include "spaces/ublas_space.h" + +namespace Kratos { + +#ifdef AMGCL_GPGPU +vex::Context& vexcl_context() { + static vex::Context ctx(vex::Filter::Env); + static bool run_once = [](){ + std::cout << "VexCL context:\n" << ctx << std::endl; + return true; + }(); + (void)run_once; // suppress "unused variable" warnings + return ctx; +} + +template +void register_vexcl_static_matrix_type() { + static vex::scoped_program_header header(vexcl_context(), + amgcl::backend::vexcl_static_matrix_declaration()); +} +#endif + +template +typename std::enable_if::type +AMGCLScalarSolve( + typename TSparseSpaceType::MatrixType& rA, + typename TSparseSpaceType::VectorType& rX, + typename TSparseSpaceType::VectorType& rB, + typename TSparseSpaceType::IndexType& rIterationNumber, + double& rResidual, + const boost::property_tree::ptree &amgclParams, + int verbosity_level, + bool use_gpgpu + ) +{ +#ifdef AMGCL_GPGPU + if (use_gpgpu && vexcl_context()) { + auto &ctx = vexcl_context(); + + typedef amgcl::backend::vexcl Backend; + typedef amgcl::make_solver< + amgcl::runtime::preconditioner, + amgcl::runtime::solver::wrapper + > Solver; + + Backend::params bprm; + bprm.q = ctx; + + Solver solve(amgcl::adapter::zero_copy( + TSparseSpaceType::Size1(rA), + rA.index1_data().begin(), + rA.index2_data().begin(), + rA.value_data().begin()), + amgclParams, bprm); + + vex::vector b(ctx, rB.size(), &rB[0]); + vex::vector x(ctx, rX.size(), &rX[0]); + + std::tie(rIterationNumber, rResidual) = solve(b, x); + + vex::copy(x.begin(), x.end(), rX.begin()); + + if(verbosity_level > 1 ) + std::cout << "AMGCL Memory Occupation : " << amgcl::human_readable_memory(amgcl::backend::bytes(solve)) << std::endl; + } else +#endif + { + typedef amgcl::backend::builtin Backend; + typedef amgcl::make_solver< + amgcl::runtime::preconditioner, + amgcl::runtime::solver::wrapper + > Solver; + + Solver solve(amgcl::adapter::zero_copy( + TSparseSpaceType::Size1(rA), + rA.index1_data().begin(), + rA.index2_data().begin(), + rA.value_data().begin()), + amgclParams); + + std::tie(rIterationNumber, rResidual) = solve(rB, rX); + + if(verbosity_level > 1 ) + std::cout << "AMGCL Memory Occupation : " << amgcl::human_readable_memory(amgcl::backend::bytes(solve)) << std::endl; + } +} + +template +typename std::enable_if::type +AMGCLBlockSolve( + typename TSparseSpaceType::MatrixType & rA, + typename TSparseSpaceType::VectorType& rX, + typename TSparseSpaceType::VectorType& rB, + typename TSparseSpaceType::IndexType& rIterationNumber, + double& rResidual, + boost::property_tree::ptree amgclParams, + int verbosity_level, + bool use_gpgpu + ) +{ + if(amgclParams.get("precond.class") != "amg") + amgclParams.erase("precond.coarsening"); + else + amgclParams.put("precond.coarsening.aggr.block_size",1); + + typedef amgcl::static_matrix value_type; + typedef amgcl::static_matrix rhs_type; + + std::size_t n = TSparseSpaceType::Size1(rA); + std::size_t nb = n / TBlockSize; + +#ifdef AMGCL_GPGPU + if (use_gpgpu && vexcl_context()) { + auto &ctx = vexcl_context(); + register_vexcl_static_matrix_type(); + + typedef amgcl::backend::vexcl Backend; + + typedef amgcl::make_solver< + amgcl::runtime::preconditioner, + amgcl::runtime::solver::wrapper + > Solver; + + typename Backend::params bprm; + bprm.q = ctx; + + Solver solve( + amgcl::adapter::block_matrix( + std::tie(n,rA.index1_data(),rA.index2_data(),rA.value_data() )), + amgclParams, bprm); + + auto x_begin = reinterpret_cast(&rX[0]); + auto b_begin = reinterpret_cast(&rB[0]); + + vex::vector x(ctx, nb, x_begin); + vex::vector b(ctx, nb, b_begin); + + std::tie(rIterationNumber, rResidual) = solve(b, x); + + vex::copy(x.begin(), x.end(), x_begin); + + if(verbosity_level > 1 ) + std::cout << "AMGCL Memory Occupation : " << amgcl::human_readable_memory(amgcl::backend::bytes(solve)) << std::endl; + } else +#endif + { + typedef amgcl::backend::builtin Backend; + + typedef amgcl::make_solver< + amgcl::runtime::preconditioner, + amgcl::runtime::solver::wrapper + > Solver; + + Solver solve( + amgcl::adapter::block_matrix( + std::tie(n,rA.index1_data(),rA.index2_data(),rA.value_data() )), + amgclParams); + + auto x_begin = reinterpret_cast(&rX[0]); + boost::iterator_range x_range = boost::make_iterator_range(x_begin, x_begin + nb); + + auto b_begin = reinterpret_cast(&rB[0]); + boost::iterator_range b_range = boost::make_iterator_range(b_begin, b_begin + nb); + + std::tie(rIterationNumber, rResidual) = solve(b_range, x_range); + + if(verbosity_level > 1 ) + std::cout << "AMGCL Memory Occupation : " << amgcl::human_readable_memory(amgcl::backend::bytes(solve)) << std::endl; + } +} + +// Exlplicit instantiations: +template void AMGCLScalarSolve< TUblasSparseSpace >( + TUblasSparseSpace::MatrixType& rA, + TUblasSparseSpace::VectorType& rX, + TUblasSparseSpace::VectorType& rB, + TUblasSparseSpace::IndexType& rIterationNumber, + double& rResidual, + const boost::property_tree::ptree &amgclParams, + int verbosity_level, + bool use_gpgpu + ); + +#define INSTANTIATE_BLOCK_SOLVER(B) \ +template void AMGCLBlockSolve >( \ + TUblasSparseSpace::MatrixType& rA, \ + TUblasSparseSpace::VectorType& rX, \ + TUblasSparseSpace::VectorType& rB, \ + TUblasSparseSpace::IndexType& rIterationNumber, \ + double& rResidual, \ + boost::property_tree::ptree amgclParams, \ + int verbosity_level, \ + bool use_gpgpu \ + ) + +INSTANTIATE_BLOCK_SOLVER(2); +INSTANTIATE_BLOCK_SOLVER(3); +INSTANTIATE_BLOCK_SOLVER(4); + +#undef INSTANTIATE_BLOCK_SOLVER + +} From 1a576a96c89f0bafabd148febbf3982436b8f794 Mon Sep 17 00:00:00 2001 From: Denis Demidov Date: Wed, 23 Jan 2019 11:56:15 +0300 Subject: [PATCH 2/6] See if cotire works this way --- kratos/CMakeLists.txt | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/kratos/CMakeLists.txt b/kratos/CMakeLists.txt index c27e943ae11f..a6d6caaa8651 100644 --- a/kratos/CMakeLists.txt +++ b/kratos/CMakeLists.txt @@ -157,19 +157,15 @@ add_library(KratosCore SHARED ${KRATOS_CORE_SOURCES} ${KRATOS_CORE_TESTING_ENGIN target_link_libraries(KratosCore PUBLIC gidpost) #${Boost_LIBRARIES} ${PYTHON_LIBRARIES} gidpost ) set_target_properties(KratosCore PROPERTIES COMPILE_DEFINITIONS "KRATOS_CORE=IMPORT,API") -option(AMGCL_GPGPU "Enable GPGPU backend for AMGCL linear solver" OFF) -if (AMGCL_GPGPU) +if (${AMGCL_GPGPU} MATCHES ON) find_package(VexCL) - set(AMGCL_GPGPU_BACKEND "OpenCL" CACHE STRING "Select AMGCL GPGPU backend (OpenCL/CUDA)") - set_property(CACHE AMGCL_GPGPU_BACKEND PROPERTY STRINGS "OpenCL" "CUDA") - if ("${AMGCL_GPGPU_BACKEND}" STREQUAL "OpenCL" AND TARGET VexCL::OpenCL) - target_link_libraries(KratosCore PUBLIC VexCL::OpenCL) - target_compile_definitions(KratosCore PUBLIC AMGCL_GPGPU) + target_link_libraries(KratosCore VexCL::OpenCL) + add_definitions(-DAMGCL_GPGPU) elseif ("${AMGCL_GPGPU_BACKEND}" STREQUAL "CUDA" AND TARGET VexCL::CUDA) - target_link_libraries(KratosCore PUBLIC VexCL::CUDA) - target_compile_definitions(KratosCore PUBLIC AMGCL_GPGPU) + target_link_libraries(KratosCore VexCL::CUDA) + add_definitions(-DAMGCL_GPGPU) else() message(WARNING "AMGCL GPGPU backend not found") endif() From 9b83d1aa9520d5c6dd87a47a687a1b70b5c8a94b Mon Sep 17 00:00:00 2001 From: Denis Demidov Date: Wed, 23 Jan 2019 13:43:57 +0300 Subject: [PATCH 3/6] Patch COTIRE to support cmake generator expressions See https://github.com/sakra/cotire/pull/155 --- cmake_modules/cotire.cmake | 348 ++++++++++++++++++++++++++----------- 1 file changed, 247 insertions(+), 101 deletions(-) diff --git a/cmake_modules/cotire.cmake b/cmake_modules/cotire.cmake index 62cd23db987b..98c5661b9b12 100644 --- a/cmake_modules/cotire.cmake +++ b/cmake_modules/cotire.cmake @@ -3,7 +3,7 @@ # See the cotire manual for usage hints. # #============================================================================= -# Copyright 2012-2017 Sascha Kratky +# Copyright 2012-2018 Sascha Kratky # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -43,7 +43,7 @@ if (NOT CMAKE_SCRIPT_MODE_FILE) endif() set (COTIRE_CMAKE_MODULE_FILE "${CMAKE_CURRENT_LIST_FILE}") -set (COTIRE_CMAKE_MODULE_VERSION "1.7.10") +set (COTIRE_CMAKE_MODULE_VERSION "1.8.0") # activate select policies if (POLICY CMP0025) @@ -239,7 +239,13 @@ function (cotire_filter_language_source_files _language _target _sourceFilesVar # add to excluded sources, if file has custom compile flags list (APPEND _excludedSourceFiles "${_sourceFile}") else() - list (APPEND _sourceFiles "${_sourceFile}") + get_source_file_property(_sourceCompileOptions "${_sourceFile}" COMPILE_OPTIONS) + if (_sourceCompileOptions) + # add to excluded sources, if file has list of custom compile options + list (APPEND _excludedSourceFiles "${_sourceFile}") + else() + list (APPEND _sourceFiles "${_sourceFile}") + endif() endif() endif() endforeach() @@ -721,7 +727,7 @@ function (cotire_get_target_compile_definitions _config _language _target _defin endif() endforeach() # parse additional compile definitions from target compile flags - # and don't look at directory compile definitions, which we already handled + # and do not look at directory compile definitions, which we already handled set (_targetFlags "") cotire_get_target_compile_flags("${_config}" "${_language}" "${_target}" _targetFlags) cotire_filter_compile_flags("${_language}" "D" _definitions _ignore ${_targetFlags}) @@ -889,6 +895,9 @@ function (cotire_init_compile_cmd _cmdVar _language _compilerLauncher _compilerE if (NOT _compilerArg1) set (_compilerArg1 ${CMAKE_${_language}_COMPILER_ARG1}) endif() + if (WIN32) + file (TO_NATIVE_PATH "${_compilerExe}" _compilerExe) + endif() string (STRIP "${_compilerArg1}" _compilerArg1) if ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") # compiler launcher is only supported for Makefile and Ninja @@ -1083,12 +1092,11 @@ endmacro() macro (cotire_parse_line _line _headerFileVar _headerDepthVar) if (MSVC) - # cl.exe /showIncludes output looks different depending on the language pack used, e.g.: + # cl.exe /showIncludes produces different output, depending on the language pack used, e.g.: # English: "Note: including file: C:\directory\file" # German: "Hinweis: Einlesen der Datei: C:\directory\file" # We use a very general regular expression, relying on the presence of the : characters if (_line MATCHES "( +)([a-zA-Z]:[^:]+)$") - # Visual Studio compiler output string (LENGTH "${CMAKE_MATCH_1}" ${_headerDepthVar}) get_filename_component(${_headerFileVar} "${CMAKE_MATCH_2}" ABSOLUTE) else() @@ -1246,11 +1254,19 @@ function (cotire_scan_includes _includesVar) set (${_includesVar} "" PARENT_SCOPE) return() endif() - list (APPEND _cmd ${_existingSourceFiles}) + # add source files to be scanned + if (WIN32) + foreach (_sourceFile ${_existingSourceFiles}) + file (TO_NATIVE_PATH "${_sourceFile}" _sourceFileNative) + list (APPEND _cmd "${_sourceFileNative}") + endforeach() + else() + list (APPEND _cmd ${_existingSourceFiles}) + endif() if (COTIRE_VERBOSE) message (STATUS "execute_process: ${_cmd}") endif() - if (_option_COMPILER_ID MATCHES "MSVC") + if (MSVC_IDE OR _option_COMPILER_ID MATCHES "MSVC") # cl.exe messes with the output streams unless the environment variable VS_UNICODE_OUTPUT is cleared unset (ENV{VS_UNICODE_OUTPUT}) endif() @@ -1486,11 +1502,16 @@ function (cotire_generate_prefix_header _prefixFile) if (_unparsedLines) if (COTIRE_VERBOSE OR _scanResult OR NOT _selectedHeaders) list (LENGTH _unparsedLines _skippedLineCount) - message (STATUS "${_skippedLineCount} line(s) skipped, see ${_unparsedLinesFile}") + if (WIN32) + file (TO_NATIVE_PATH "${_unparsedLinesFile}" _unparsedLinesLogPath) + else() + set (_unparsedLinesLogPath "${_unparsedLinesFile}") + endif() + message (STATUS "${_skippedLineCount} line(s) skipped, see ${_unparsedLinesLogPath}") endif() string (REPLACE ";" "\n" _unparsedLines "${_unparsedLines}") endif() - file (WRITE "${_unparsedLinesFile}" "${_unparsedLines}") + file (WRITE "${_unparsedLinesFile}" "${_unparsedLines}\n") endfunction() function (cotire_add_makedep_flags _language _compilerID _compilerVersion _flagsVar) @@ -1520,7 +1541,7 @@ function (cotire_add_makedep_flags _language _compilerID _compilerVersion _flags # append to list list (APPEND _flags -H -E) if (NOT "${_compilerVersion}" VERSION_LESS "4.3.0") - list (APPEND _flags "-fdirectives-only") + list (APPEND _flags -fdirectives-only) endif() else() # return as a flag string @@ -1530,16 +1551,36 @@ function (cotire_add_makedep_flags _language _compilerID _compilerVersion _flags endif() endif() elseif (_compilerID MATCHES "Clang") - # Clang options used - # -H print the name of each header file used - # -E invoke preprocessor - # -fno-color-diagnostics don't prints diagnostics in color - if (_flags) - # append to list - list (APPEND _flags -H -E -fno-color-diagnostics) - else() - # return as a flag string - set (_flags "-H -E -fno-color-diagnostics") + if (UNIX) + # Clang options used + # -H print the name of each header file used + # -E invoke preprocessor + # -fno-color-diagnostics do not print diagnostics in color + # -Eonly just run preprocessor, no output + if (_flags) + # append to list + list (APPEND _flags -H -E -fno-color-diagnostics -Xclang -Eonly) + else() + # return as a flag string + set (_flags "-H -E -fno-color-diagnostics -Xclang -Eonly") + endif() + elseif (WIN32) + # Clang-cl.exe options used + # /TC treat all files named on the command line as C source files + # /TP treat all files named on the command line as C++ source files + # /EP preprocess to stdout without #line directives + # -H print the name of each header file used + # -fno-color-diagnostics do not print diagnostics in color + # -Eonly just run preprocessor, no output + set (_sourceFileTypeC "/TC") + set (_sourceFileTypeCXX "/TP") + if (_flags) + # append to list + list (APPEND _flags "${_sourceFileType${_language}}" /EP -fno-color-diagnostics -Xclang -H -Xclang -Eonly) + else() + # return as a flag string + set (_flags "${_sourceFileType${_language}} /EP -fno-color-diagnostics -Xclang -H -Xclang -Eonly") + endif() endif() elseif (_compilerID MATCHES "Intel") if (WIN32) @@ -1613,8 +1654,8 @@ function (cotire_add_pch_compilation_flags _language _compilerID _compilerVersio set (_flags "${_flags} /Zm${COTIRE_PCH_MEMORY_SCALING_FACTOR}") endif() endif() - elseif (_compilerID MATCHES "GNU|Clang") - # GCC / Clang options used + elseif (_compilerID MATCHES "GNU") + # GCC options used # -x specify the source language # -c compile but do not link # -o place output in file @@ -1624,11 +1665,52 @@ function (cotire_add_pch_compilation_flags _language _compilerID _compilerVersio set (_xLanguage_CXX "c++-header") if (_flags) # append to list - list (APPEND _flags "-x" "${_xLanguage_${_language}}" "-c" "${_prefixFile}" -o "${_pchFile}") + list (APPEND _flags -x "${_xLanguage_${_language}}" -c "${_prefixFile}" -o "${_pchFile}") else() # return as a flag string set (_flags "-x ${_xLanguage_${_language}} -c \"${_prefixFile}\" -o \"${_pchFile}\"") endif() + elseif (_compilerID MATCHES "Clang") + if (UNIX) + # Clang options used + # -x specify the source language + # -c compile but do not link + # -o place output in file + # -fno-pch-timestamp disable inclusion of timestamp in precompiled headers (clang 4.0.0+) + set (_xLanguage_C "c-header") + set (_xLanguage_CXX "c++-header") + if (_flags) + # append to list + list (APPEND _flags -x "${_xLanguage_${_language}}" -c "${_prefixFile}" -o "${_pchFile}") + if (NOT "${_compilerVersion}" VERSION_LESS "4.0.0") + list (APPEND _flags -Xclang -fno-pch-timestamp) + endif() + else() + # return as a flag string + set (_flags "-x ${_xLanguage_${_language}} -c \"${_prefixFile}\" -o \"${_pchFile}\"") + if (NOT "${_compilerVersion}" VERSION_LESS "4.0.0") + set (_flags "${_flags} -Xclang -fno-pch-timestamp") + endif() + endif() + elseif (WIN32) + # Clang-cl.exe options used + # /Yc creates a precompiled header file + # /Fp specifies precompiled header binary file name + # /FI forces inclusion of file + # /Zs syntax check only + # /TC treat all files named on the command line as C source files + # /TP treat all files named on the command line as C++ source files + set (_sourceFileTypeC "/TC") + set (_sourceFileTypeCXX "/TP") + if (_flags) + # append to list + list (APPEND _flags "${_sourceFileType${_language}}" + "/Yc${_prefixFile}" "/Fp${_pchFile}" "/FI${_prefixFile}" /Zs "${_hostFile}") + else() + # return as a flag string + set (_flags "/Yc\"${_prefixFile}\" /Fp\"${_pchFile}\" /FI\"${_prefixFile}\"") + endif() + endif() elseif (_compilerID MATCHES "Intel") if (WIN32) file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileNative) @@ -1679,10 +1761,10 @@ function (cotire_add_pch_compilation_flags _language _compilerID _compilerVersio if ("${_language}" STREQUAL "CXX") list (APPEND _flags -Kc++) endif() - list (APPEND _flags "-include" "${_prefixFile}" "-pch-dir" "${_pchDir}" "-pch-create" "${_pchName}" "-fsyntax-only" "${_hostFile}") + list (APPEND _flags -include "${_prefixFile}" -pch-dir "${_pchDir}" -pch-create "${_pchName}" -fsyntax-only "${_hostFile}") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") if (NOT _pchSuppressMessages) - list (APPEND _flags "-Wpch-messages") + list (APPEND _flags -Wpch-messages) endif() endif() else() @@ -1742,23 +1824,46 @@ function (cotire_add_prefix_pch_inclusion_flags _language _compilerID _compilerV # note: ccache requires the -include flag to be used in order to process precompiled header correctly if (_flags) # append to list - list (APPEND _flags "-Winvalid-pch" "-include" "${_prefixFile}") + list (APPEND _flags -Winvalid-pch -include "${_prefixFile}") else() # return as a flag string set (_flags "-Winvalid-pch -include \"${_prefixFile}\"") endif() elseif (_compilerID MATCHES "Clang") - # Clang options used - # -include process include file as the first line of the primary source file - # -include-pch include precompiled header file - # -Qunused-arguments don't emit warning for unused driver arguments - # note: ccache requires the -include flag to be used in order to process precompiled header correctly - if (_flags) - # append to list - list (APPEND _flags "-Qunused-arguments" "-include" "${_prefixFile}") - else() - # return as a flag string - set (_flags "-Qunused-arguments -include \"${_prefixFile}\"") + if (UNIX) + # Clang options used + # -include process include file as the first line of the primary source file + # note: ccache requires the -include flag to be used in order to process precompiled header correctly + if (_flags) + # append to list + list (APPEND _flags -include "${_prefixFile}") + else() + # return as a flag string + set (_flags "-include \"${_prefixFile}\"") + endif() + elseif (WIN32) + # Clang-cl.exe options used + # /Yu uses a precompiled header file during build + # /Fp specifies precompiled header binary file name + # /FI forces inclusion of file + if (_pchFile) + if (_flags) + # append to list + list (APPEND _flags "/Yu${_prefixFile}" "/Fp${_pchFile}" "/FI${_prefixFile}") + else() + # return as a flag string + set (_flags "/Yu\"${_prefixFile}\" /Fp\"${_pchFile}\" /FI\"${_prefixFile}\"") + endif() + else() + # no precompiled header, force inclusion of prefix header + if (_flags) + # append to list + list (APPEND _flags "/FI${_prefixFile}") + else() + # return as a flag string + set (_flags "/FI\"${_prefixFile}\"") + endif() + endif() endif() elseif (_compilerID MATCHES "Intel") if (WIN32) @@ -1808,10 +1913,10 @@ function (cotire_add_prefix_pch_inclusion_flags _language _compilerID _compilerV endif() if (_flags) # append to list - list (APPEND _flags "-include" "${_prefixFile}" "-pch-dir" "${_pchDir}" "-pch-use" "${_pchName}") + list (APPEND _flags -include "${_prefixFile}" -pch-dir "${_pchDir}" -pch-use "${_pchName}") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") if (NOT _pchSuppressMessages) - list (APPEND _flags "-Wpch-messages") + list (APPEND _flags -Wpch-messages) endif() endif() else() @@ -1827,7 +1932,7 @@ function (cotire_add_prefix_pch_inclusion_flags _language _compilerID _compilerV # no precompiled header, force inclusion of prefix header if (_flags) # append to list - list (APPEND _flags "-include" "${_prefixFile}") + list (APPEND _flags -include "${_prefixFile}") else() # return as a flag string set (_flags "-include \"${_prefixFile}\"") @@ -1865,13 +1970,13 @@ function (cotire_precompile_prefix_header _prefixFile _pchFile _hostFile) if (COTIRE_VERBOSE) message (STATUS "execute_process: ${_cmd}") endif() - if (_option_COMPILER_ID MATCHES "MSVC") + if (MSVC_IDE OR _option_COMPILER_ID MATCHES "MSVC") # cl.exe messes with the output streams unless the environment variable VS_UNICODE_OUTPUT is cleared unset (ENV{VS_UNICODE_OUTPUT}) - elseif (_option_COMPILER_ID MATCHES "GNU|Clang") + elseif (_option_COMPILER_ID MATCHES "Clang" AND _option_COMPILER_VERSION VERSION_LESS "4.0.0") if (_option_COMPILER_LAUNCHER MATCHES "ccache" OR _option_COMPILER_EXECUTABLE MATCHES "ccache") - # Newer versions of Clang and GCC seem to embed a compilation timestamp into the precompiled header binary, + # Newer versions of Clang embed a compilation timestamp into the precompiled header binary, # which results in "file has been modified since the precompiled header was built" errors if ccache is used. # We work around the problem by disabling ccache upon pre-compiling the prefix header. set (ENV{CCACHE_DISABLE} "true") @@ -1890,7 +1995,7 @@ function (cotire_check_precompiled_header_support _language _target _msgVar) set (_unsupportedCompiler "Precompiled headers not supported for ${_language} compiler ${CMAKE_${_language}_COMPILER_ID}") if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC") - # supported since Visual Studio C++ 6.0 + # PCH supported since Visual Studio C++ 6.0 # and CMake does not support an earlier version set (${_msgVar} "" PARENT_SCOPE) elseif (CMAKE_${_language}_COMPILER_ID MATCHES "GNU") @@ -1901,8 +2006,16 @@ function (cotire_check_precompiled_header_support _language _target _msgVar) set (${_msgVar} "" PARENT_SCOPE) endif() elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Clang") - # all Clang versions have PCH support - set (${_msgVar} "" PARENT_SCOPE) + if (UNIX) + # all Unix Clang versions have PCH support + set (${_msgVar} "" PARENT_SCOPE) + elseif (WIN32) + # only clang-cl is supported under Windows + get_filename_component(_compilerName "${CMAKE_${_language}_COMPILER}" NAME_WE) + if (NOT _compilerName MATCHES "cl$") + set (${_msgVar} "${_unsupportedCompiler} version ${CMAKE_${_language}_COMPILER_VERSION}. Use clang-cl instead." PARENT_SCOPE) + endif() + endif() elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Intel") # Intel PCH support requires version >= 8.0.0 if ("${CMAKE_${_language}_COMPILER_VERSION}" VERSION_LESS "8.0.0") @@ -1913,19 +2026,25 @@ function (cotire_check_precompiled_header_support _language _target _msgVar) else() set (${_msgVar} "${_unsupportedCompiler}." PARENT_SCOPE) endif() + # check if ccache is used as a compiler launcher get_target_property(_launcher ${_target} ${_language}_COMPILER_LAUNCHER) - if (CMAKE_${_language}_COMPILER MATCHES "ccache" OR _launcher MATCHES "ccache") + get_filename_component(_realCompilerExe "${CMAKE_${_language}_COMPILER}" REALPATH) + if (_realCompilerExe MATCHES "ccache" OR _launcher MATCHES "ccache") + # verify that ccache configuration is compatible with precompiled headers + # always check environment variable CCACHE_SLOPPINESS, because earlier versions of ccache + # do not report the "sloppiness" setting correctly upon printing ccache configuration if (DEFINED ENV{CCACHE_SLOPPINESS}) - if (NOT "$ENV{CCACHE_SLOPPINESS}" MATCHES "pch_defines" OR NOT "$ENV{CCACHE_SLOPPINESS}" MATCHES "time_macros") + if (NOT "$ENV{CCACHE_SLOPPINESS}" MATCHES "pch_defines" OR + NOT "$ENV{CCACHE_SLOPPINESS}" MATCHES "time_macros") set (${_msgVar} "ccache requires the environment variable CCACHE_SLOPPINESS to be set to \"pch_defines,time_macros\"." PARENT_SCOPE) endif() else() - if (_launcher MATCHES "ccache") - get_filename_component(_ccacheExe "${_launcher}" REALPATH) + if (_realCompilerExe MATCHES "ccache") + set (_ccacheExe "${_realCompilerExe}") else() - get_filename_component(_ccacheExe "${CMAKE_${_language}_COMPILER}" REALPATH) + set (_ccacheExe "${_launcher}") endif() execute_process( COMMAND "${_ccacheExe}" "--print-config" @@ -1933,9 +2052,10 @@ function (cotire_check_precompiled_header_support _language _target _msgVar) RESULT_VARIABLE _result OUTPUT_VARIABLE _ccacheConfig OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) - if (_result OR NOT - _ccacheConfig MATCHES "sloppiness.*=.*time_macros" OR NOT - _ccacheConfig MATCHES "sloppiness.*=.*pch_defines") + if (_result) + set (${_msgVar} "ccache configuration cannot be determined." PARENT_SCOPE) + elseif (NOT _ccacheConfig MATCHES "sloppiness.*=.*time_macros" OR + NOT _ccacheConfig MATCHES "sloppiness.*=.*pch_defines") set (${_msgVar} "ccache requires configuration setting \"sloppiness\" to be set to \"pch_defines,time_macros\"." PARENT_SCOPE) @@ -2235,12 +2355,22 @@ function (cotire_generate_target_script _language _configurations _target _targe CMAKE_${_language}_SOURCE_FILE_EXTENSIONS) if (DEFINED ${_var}) string (REPLACE "\"" "\\\"" _value "${${_var}}") - set (_contents "${_contents}set (${_var} \"${_value}\")\n") - if (NOT _contentsHasGeneratorExpressions) - if ("${_value}" MATCHES "\\$<.*>") + + if ("${_value}" MATCHES "\\$<.*>") + # We have to evaluate generator expressions + if (NOT _contentsHasGeneratorExpressions) set (_contentsHasGeneratorExpressions TRUE) endif() + + # Expand various generator expressions which can only be evaluated on binary targets manually + foreach(_currentReplacedGeneratorExpression "C_COMPILER_ID" "CXX_COMPILER_ID") + set(_currentReplacement ${CMAKE_${_currentReplacedGeneratorExpression}}) + + string (REGEX REPLACE "\\$<${_currentReplacedGeneratorExpression}:([a-zA-Z0-9]*)>" "$" _value "${_value}") + endforeach(_currentReplacedGeneratorExpression) endif() + + set (_contents "${_contents}set (${_var} \"${_value}\")\n") endif() endforeach() # generate target script file @@ -2261,8 +2391,9 @@ endfunction() function (cotire_setup_pch_file_compilation _language _target _targetScript _prefixFile _pchFile _hostFile) set (_sourceFiles ${ARGN}) - if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") - # for Visual Studio and Intel, we attach the precompiled header compilation to the host file + if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" OR + (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) + # for MSVC, Intel and Clang-cl, we attach the precompiled header compilation to the host file # the remaining files include the precompiled header, see cotire_setup_pch_file_inclusion if (_sourceFiles) set (_flags "") @@ -2307,8 +2438,9 @@ function (cotire_setup_pch_file_compilation _language _target _targetScript _pre endfunction() function (cotire_setup_pch_file_inclusion _language _target _wholeTarget _prefixFile _pchFile _hostFile) - if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") - # for Visual Studio and Intel, we include the precompiled header in all but the host file + if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" OR + (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) + # for MSVC, Intel and clang-cl, we include the precompiled header in all but the host file # the host file does the precompiled header compilation, see cotire_setup_pch_file_compilation set (_sourceFiles ${ARGN}) list (LENGTH _sourceFiles _numberOfSourceFiles) @@ -2454,9 +2586,10 @@ function (cotire_setup_target_pch_usage _languages _target _wholeTarget) # if this is a single-language target without any excluded files if (_wholeTarget) set (_language "${_languages}") - # for Visual Studio and Intel, precompiled header inclusion is always done on the source file level + # for MSVC, Intel and clang-cl, precompiled header inclusion is always done on the source file level # see cotire_setup_pch_file_inclusion - if (NOT CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") + if (NOT CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" AND NOT + (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) get_property(_prefixFile TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER) if (_prefixFile) get_property(_pchFile TARGET ${_target} PROPERTY COTIRE_${_language}_PRECOMPILED_HEADER) @@ -2491,7 +2624,8 @@ function (cotire_setup_unity_generation_commands _language _target _targetScript set_property (SOURCE "${_unityFile}" PROPERTY OBJECT_DEPENDS ${_objectDependsPaths}) endif() if (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") - # unity file compilation results in potentially huge object file, thus use /bigobj by default unter MSVC and Windows Intel + # unity file compilation results in potentially huge object file, + # thus use /bigobj by default unter cl.exe and Windows Intel set_property (SOURCE "${_unityFile}" APPEND_STRING PROPERTY COMPILE_FLAGS "/bigobj") endif() cotire_set_cmd_to_prologue(_unityCmd) @@ -2697,6 +2831,9 @@ function (cotire_make_target_message _target _languages _disableMsg _targetMsgVa else() set (_targetMsg "${_languagesStr} target ${_target} cotired without unity build.") endif() + if (_disableMsg) + set (_targetMsg "${_targetMsg} ${_disableMsg}") + endif() else() if (_excludedStr) set (_targetMsg "${_languagesStr} target ${_target} cotired ${_excludedStr}.") @@ -2786,6 +2923,20 @@ function (cotire_choose_target_languages _target _targetLanguagesVar _wholeTarge set (_targetUsePCH FALSE) endif() endif() + if (_targetAddSCU) + # disable unity builds if automatic Qt processing is used + get_target_property(_targetAutoMoc ${_target} AUTOMOC) + get_target_property(_targetAutoUic ${_target} AUTOUIC) + get_target_property(_targetAutoRcc ${_target} AUTORCC) + if (_targetAutoMoc OR _targetAutoUic OR _targetAutoRcc) + if (_disableMsg) + set (_disableMsg "${_disableMsg} Target uses automatic CMake Qt processing.") + else() + set (_disableMsg "Target uses automatic CMake Qt processing.") + endif() + set (_targetAddSCU FALSE) + endif() + endif() set_property(TARGET ${_target} PROPERTY COTIRE_ENABLE_PRECOMPILED_HEADER ${_targetUsePCH}) set_property(TARGET ${_target} PROPERTY COTIRE_ADD_UNITY_BUILD ${_targetAddSCU}) cotire_make_target_message(${_target} "${_targetLanguages}" "${_disableMsg}" _targetMsg ${_allExcludedSourceFiles}) @@ -2813,7 +2964,11 @@ function (cotire_compute_unity_max_number_of_includes _target _maxIncludesVar) set (_sourceFiles ${ARGN}) get_target_property(_maxIncludes ${_target} COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES) if (_maxIncludes MATCHES "(-j|--parallel|--jobs) ?([0-9]*)") - set (_numberOfThreads "${CMAKE_MATCH_2}") + if (DEFINED CMAKE_MATCH_2) + set (_numberOfThreads "${CMAKE_MATCH_2}") + else() + set (_numberOfThreads "") + endif() if (NOT _numberOfThreads) # use all available cores ProcessorCount(_numberOfThreads) @@ -2926,8 +3081,9 @@ function (cotire_setup_pch_target _languages _configurations _target) set (_dependsFiles "") foreach (_language ${_languages}) set (_props COTIRE_${_language}_PREFIX_HEADER COTIRE_${_language}_UNITY_SOURCE) - if (NOT CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") - # Visual Studio and Intel only create precompiled header as a side effect + if (NOT CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" AND NOT + (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) + # MSVC, Intel and clang-cl only create precompiled header as a side effect list (INSERT _props 0 COTIRE_${_language}_PRECOMPILED_HEADER) endif() cotire_get_first_set_property_value(_dependsFile TARGET ${_target} ${_props}) @@ -2975,6 +3131,7 @@ function (cotire_collect_unity_target_sources _target _languages _unityTargetSou list (APPEND _unityTargetSources ${_unityFiles}) endif() endforeach() + # handle object libraries which are part of the target's sources get_target_property(_linkLibrariesStrategy ${_target} COTIRE_UNITY_LINK_LIBRARIES_INIT) if ("${_linkLibrariesStrategy}" MATCHES "^COPY_UNITY$") cotire_filter_object_libraries(${_target} _objectLibraries ${_targetSourceFiles}) @@ -3019,21 +3176,6 @@ function (cotire_setup_unity_build_target _languages _configurations _target) # determine unity target sources set (_unityTargetSources "") cotire_collect_unity_target_sources(${_target} "${_languages}" _unityTargetSources) - # handle automatic Qt processing - get_target_property(_targetAutoMoc ${_target} AUTOMOC) - get_target_property(_targetAutoUic ${_target} AUTOUIC) - get_target_property(_targetAutoRcc ${_target} AUTORCC) - if (_targetAutoMoc OR _targetAutoUic OR _targetAutoRcc) - # if the original target sources are subject to CMake's automatic Qt processing, - # also include implicitly generated _automoc.cpp file - if (CMAKE_VERSION VERSION_LESS "3.8.0") - list (APPEND _unityTargetSources "${_target}_automoc.cpp") - set_property (SOURCE "${_target}_automoc.cpp" PROPERTY GENERATED TRUE) - else() - list (APPEND _unityTargetSources "${_target}_autogen/moc_compilation.cpp") - set_property (SOURCE "${_target}_autogen/moc_compilation.cpp" PROPERTY GENERATED TRUE) - endif() - endif() # prevent AUTOMOC, AUTOUIC and AUTORCC properties from being set when the unity target is created set (CMAKE_AUTOMOC OFF) set (CMAKE_AUTOUIC OFF) @@ -3047,21 +3189,6 @@ function (cotire_setup_unity_build_target _languages _configurations _target) else() add_library(${_unityTargetName} ${_unityTargetSubType} EXCLUDE_FROM_ALL ${_unityTargetSources}) endif() - if ("${CMAKE_GENERATOR}" MATCHES "Visual Studio") - # depend on original target's automoc target, if it exists - if (TARGET ${_target}_automoc) - add_dependencies(${_unityTargetName} ${_target}_automoc) - endif() - else() - if (_targetAutoMoc OR _targetAutoUic OR _targetAutoRcc) - # depend on the original target's implicity generated _automoc target - if (CMAKE_VERSION VERSION_LESS "3.8.0") - add_dependencies(${_unityTargetName} ${_target}_automoc) - else() - add_dependencies(${_unityTargetName} ${_target}_autogen) - endif() - endif() - endif() # copy output location properties set (_outputDirProperties ARCHIVE_OUTPUT_DIRECTORY ARCHIVE_OUTPUT_DIRECTORY_ @@ -3132,7 +3259,8 @@ function (cotire_setup_unity_build_target _languages _configurations _target) INTERFACE_AUTOUIC_OPTIONS NO_SYSTEM_FROM_IMPORTED) # copy link stuff cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} - BUILD_WITH_INSTALL_RPATH INSTALL_RPATH INSTALL_RPATH_USE_LINK_PATH SKIP_BUILD_RPATH + BUILD_WITH_INSTALL_RPATH BUILD_WITH_INSTALL_NAME_DIR + INSTALL_RPATH INSTALL_RPATH_USE_LINK_PATH SKIP_BUILD_RPATH LINKER_LANGUAGE LINK_DEPENDS LINK_DEPENDS_NO_SHARED LINK_FLAGS LINK_FLAGS_ LINK_INTERFACE_LIBRARIES LINK_INTERFACE_LIBRARIES_ @@ -3174,6 +3302,10 @@ function (cotire_setup_unity_build_target _languages _configurations _target) ANDROID_NATIVE_LIB_DEPENDENCIES ANDROID_NATIVE_LIB_DIRECTORIES ANDROID_PROCESS_MAX ANDROID_PROGUARD ANDROID_PROGUARD_CONFIG_PATH ANDROID_SECURE_PROPS_PATH ANDROID_SKIP_ANT_STEP ANDROID_STL_TYPE) + # copy CUDA platform specific stuff + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + CUDA_PTX_COMPILATION CUDA_SEPARABLE_COMPILATION CUDA_RESOLVE_DEVICE_SYMBOLS + CUDA_EXTENSIONS CUDA_STANDARD CUDA_STANDARD_REQUIRED) # use output name from original target get_target_property(_targetOutputName ${_unityTargetName} OUTPUT_NAME) if (NOT _targetOutputName) @@ -3187,6 +3319,13 @@ function (cotire_setup_unity_build_target _languages _configurations _target) set_property(TARGET ${_unityTargetName} PROPERTY ENABLE_EXPORTS TRUE) endif() endif() + # enable parallel compilation for MSVC + if (MSVC AND "${CMAKE_GENERATOR}" MATCHES "Visual Studio") + list (LENGTH _unityTargetSources _numberOfUnityTargetSources) + if (_numberOfUnityTargetSources GREATER 1) + set_property(TARGET ${_unityTargetName} APPEND PROPERTY COMPILE_OPTIONS "/MP") + endif() + endif() cotire_init_target(${_unityTargetName}) cotire_add_to_unity_all_target(${_unityTargetName}) set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_TARGET_NAME "${_unityTargetName}") @@ -3345,6 +3484,13 @@ function (cotire_target_link_libraries _target) message (STATUS "unity target ${_unityTargetName} interface link libraries: ${_unityLinkInterfaceLibraries}") endif() endif() + get_target_property(_manualDependencies ${_target} MANUALLY_ADDED_DEPENDENCIES) + if (_manualDependencies) + cotire_map_libraries("${_linkLibrariesStrategy}" _unityManualDependencies ${_manualDependencies}) + if (_unityManualDependencies) + add_dependencies("${_unityTargetName}" ${_unityManualDependencies}) + endif() + endif() endif() endif() endfunction(cotire_target_link_libraries) @@ -3641,7 +3787,7 @@ else() set (COTIRE_UNITY_SOURCE_EXCLUDE_EXTENSIONS "m;mm" CACHE STRING "Ignore sources with the listed file extensions from the generated unity source.") - set (COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES "3" CACHE STRING + set (COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES "2" CACHE STRING "Minimum number of sources in target required to enable use of precompiled header.") if (NOT DEFINED COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT) @@ -3740,7 +3886,7 @@ else() FULL_DOCS "The variable can be set to an integer > 0." "If a target contains less than that number of source files, cotire will not enable the use of the precompiled header for the target." - "If not defined, defaults to 3." + "If not defined, defaults to 2." ) define_property( From 271886c6999ad5c938bcb03e4f0fe41347899701 Mon Sep 17 00:00:00 2001 From: Denis Demidov Date: Fri, 8 Feb 2019 16:35:24 +0300 Subject: [PATCH 4/6] Add vexcl source to external_libraries --- external_libraries/vexcl/.appveyor.yml | 47 + external_libraries/vexcl/.gitignore | 3 + external_libraries/vexcl/.travis.yml | 73 + .../vexcl/.travis/install_boost.sh | 15 + .../vexcl/.travis/opencl_install.sh | 14 + external_libraries/vexcl/CL/cl.hpp | 12934 ++++++++++++++++ external_libraries/vexcl/CMakeLists.txt | 359 + external_libraries/vexcl/DEPENDS.md | 10 + external_libraries/vexcl/INSTALL.md | 54 + external_libraries/vexcl/LICENSE.md | 22 + external_libraries/vexcl/README.md | 19 + external_libraries/vexcl/cmake/FindFFTW.cmake | 113 + .../vexcl/cmake/VexCLBoostTargets.cmake | 46 + .../vexcl/cmake/VexCLConfig.cmake.in | 75 + .../vexcl/cmake/VexCLOpenCLTarget.cmake | 16 + .../vexcl/cmake/VexCLTools.cmake | 58 + external_libraries/vexcl/docs/.gitignore | 3 + external_libraries/vexcl/docs/CMakeLists.txt | 25 + external_libraries/vexcl/docs/Doxyfile | 2407 +++ .../vexcl/docs/_static/custom.css | 31 + .../vexcl/docs/_templates/page.html | 5 + external_libraries/vexcl/docs/cmake.rst | 46 + external_libraries/vexcl/docs/conf.py | 306 + .../vexcl/docs/customkernel.rst | 53 + .../vexcl/docs/doxygen_objdb_8335.tmp | 0 external_libraries/vexcl/docs/expressions.rst | 819 + external_libraries/vexcl/docs/git_version.py | 45 + external_libraries/vexcl/docs/index.rst | 31 + external_libraries/vexcl/docs/indices.rst | 5 + external_libraries/vexcl/docs/initialize.rst | 200 + external_libraries/vexcl/docs/interop.rst | 50 + external_libraries/vexcl/docs/memory.rst | 124 + external_libraries/vexcl/docs/multiexpr.rst | 80 + .../vexcl/docs/partitioning.png | Bin 0 -> 65346 bytes external_libraries/vexcl/docs/primitives.rst | 194 + external_libraries/vexcl/docs/symbolic.rst | 170 + external_libraries/vexcl/docs/talks.rst | 49 + .../vexcl/examples/CMakeLists.txt | 98 + .../vexcl/examples/benchmark.cpp | 1013 ++ .../vexcl/examples/complex_simple.cpp | 60 + .../vexcl/examples/complex_spmv.cpp | 93 + external_libraries/vexcl/examples/devlist.cpp | 105 + .../vexcl/examples/exclusive.cpp | 19 + .../vexcl/examples/fft_benchmark.cpp | 231 + .../vexcl/examples/fft_profile.cpp | 43 + .../vexcl/examples/mba_benchmark.cpp | 75 + .../vexcl/examples/show_profiling_data.m | 96 + .../vexcl/examples/simple/CMakeLists.txt | 6 + .../vexcl/examples/simple/hello.cpp | 6 + .../vexcl/examples/symbolic.cpp | 119 + .../vexcl/examples/thrust-sort.cpp | 53 + .../vexcl/examples/thrust-sort.cu | 25 + .../vexcl/examples/thrust-sort.hpp | 7 + .../vexcl/examples/viennacl/CMakeLists.txt | 7 + .../examples/viennacl/viennacl_solvers.cpp | 93 + external_libraries/vexcl/tests/CMakeLists.txt | 169 + .../vexcl/tests/boost_compute_scan.cpp | 48 + .../vexcl/tests/boost_compute_sort.cpp | 26 + .../vexcl/tests/boost_version.cpp | 6 + external_libraries/vexcl/tests/cast.cpp | 50 + external_libraries/vexcl/tests/clogs_scan.cpp | 63 + external_libraries/vexcl/tests/clogs_sort.cpp | 57 + external_libraries/vexcl/tests/constants.cpp | 22 + external_libraries/vexcl/tests/context.cpp | 30 + .../vexcl/tests/context_setup.hpp | 82 + external_libraries/vexcl/tests/cusparse.cpp | 90 + .../vexcl/tests/custom_kernel.cpp | 93 + external_libraries/vexcl/tests/deduce.cpp | 156 + external_libraries/vexcl/tests/dummy1.cpp | 14 + external_libraries/vexcl/tests/dummy2.cpp | 6 + external_libraries/vexcl/tests/eval.cpp | 30 + external_libraries/vexcl/tests/events.cpp | 106 + external_libraries/vexcl/tests/fft.cpp | 155 + external_libraries/vexcl/tests/generator.cpp | 242 + external_libraries/vexcl/tests/image.cpp | 155 + external_libraries/vexcl/tests/logical.cpp | 28 + external_libraries/vexcl/tests/mba.cpp | 65 + .../vexcl/tests/multi_array.cpp | 84 + .../vexcl/tests/multivector_arithmetics.cpp | 243 + .../vexcl/tests/multivector_create.cpp | 92 + external_libraries/vexcl/tests/random.cpp | 79 + .../vexcl/tests/random_matrix.hpp | 42 + .../vexcl/tests/random_vector.hpp | 51 + .../vexcl/tests/reduce_by_key.cpp | 131 + .../vexcl/tests/reinterpret.cpp | 20 + external_libraries/vexcl/tests/scan.cpp | 43 + .../vexcl/tests/scan_by_key.cpp | 126 + external_libraries/vexcl/tests/sort.cpp | 173 + .../vexcl/tests/sparse_matrices.cpp | 285 + external_libraries/vexcl/tests/spmv.cpp | 549 + external_libraries/vexcl/tests/stencil.cpp | 220 + external_libraries/vexcl/tests/svm.cpp | 70 + .../vexcl/tests/tagged_terminal.cpp | 108 + external_libraries/vexcl/tests/temporary.cpp | 121 + external_libraries/vexcl/tests/tensordot.cpp | 137 + external_libraries/vexcl/tests/threads.cpp | 37 + external_libraries/vexcl/tests/types.cpp | 36 + .../vexcl/tests/vector_arithmetics.cpp | 329 + .../vexcl/tests/vector_copy.cpp | 112 + .../vexcl/tests/vector_create.cpp | 213 + external_libraries/vexcl/tests/vector_io.cpp | 26 + .../vexcl/tests/vector_pointer.cpp | 126 + .../vexcl/tests/vector_view.cpp | 447 + external_libraries/vexcl/vexcl/backend.hpp | 98 + .../vexcl/vexcl/backend/common.hpp | 263 + .../vexcl/vexcl/backend/compute.hpp | 60 + .../vexcl/vexcl/backend/compute/compiler.hpp | 74 + .../vexcl/vexcl/backend/compute/context.hpp | 192 + .../vexcl/backend/compute/device_vector.hpp | 166 + .../vexcl/vexcl/backend/compute/error.hpp | 56 + .../vexcl/vexcl/backend/compute/event.hpp | 106 + .../vexcl/vexcl/backend/compute/filter.hpp | 303 + .../vexcl/vexcl/backend/compute/image.hpp | 58 + .../vexcl/vexcl/backend/compute/kernel.hpp | 238 + .../vexcl/backend/compute/svm_vector.hpp | 107 + .../vexcl/vexcl/backend/cuda.hpp | 49 + .../vexcl/vexcl/backend/cuda/compiler.hpp | 121 + .../vexcl/vexcl/backend/cuda/context.hpp | 432 + .../vexcl/vexcl/backend/cuda/csr.inl | 156 + .../vexcl/vexcl/backend/cuda/cusparse.hpp | 310 + .../vexcl/backend/cuda/device_vector.hpp | 218 + .../vexcl/vexcl/backend/cuda/error.hpp | 160 + .../vexcl/vexcl/backend/cuda/event.hpp | 130 + .../vexcl/vexcl/backend/cuda/filter.hpp | 110 + .../vexcl/vexcl/backend/cuda/hybrid_ell.inl | 156 + .../vexcl/vexcl/backend/cuda/kernel.hpp | 246 + .../vexcl/vexcl/backend/cuda/source.hpp | 298 + .../vexcl/vexcl/backend/cuda/svm_vector.hpp | 104 + .../vexcl/backend/cuda/texture_object.hpp | 52 + .../vexcl/vexcl/backend/jit.hpp | 45 + .../vexcl/vexcl/backend/jit/compiler.hpp | 144 + .../vexcl/vexcl/backend/jit/context.hpp | 181 + .../vexcl/vexcl/backend/jit/device_vector.hpp | 137 + .../vexcl/vexcl/backend/jit/event.hpp | 82 + .../vexcl/vexcl/backend/jit/filter.hpp | 79 + .../vexcl/vexcl/backend/jit/kernel.hpp | 195 + .../vexcl/vexcl/backend/jit/source.hpp | 421 + .../vexcl/vexcl/backend/opencl.hpp | 50 + .../vexcl/vexcl/backend/opencl/compiler.hpp | 186 + .../vexcl/vexcl/backend/opencl/context.hpp | 205 + .../vexcl/vexcl/backend/opencl/defines.hpp | 12 + .../vexcl/backend/opencl/device_vector.hpp | 151 + .../vexcl/vexcl/backend/opencl/error.hpp | 121 + .../vexcl/vexcl/backend/opencl/event.hpp | 115 + .../vexcl/vexcl/backend/opencl/filter.hpp | 320 + .../vexcl/vexcl/backend/opencl/image.hpp | 57 + .../vexcl/vexcl/backend/opencl/kernel.hpp | 266 + .../vexcl/vexcl/backend/opencl/source.hpp | 350 + .../vexcl/vexcl/backend/opencl/svm_vector.hpp | 141 + external_libraries/vexcl/vexcl/cache.hpp | 188 + external_libraries/vexcl/vexcl/cast.hpp | 211 + .../vexcl/vexcl/constant_address_space.hpp | 135 + external_libraries/vexcl/vexcl/constants.hpp | 240 + .../vexcl/vexcl/detail/backtrace.hpp | 67 + .../vexcl/vexcl/detail/fusion.hpp | 254 + external_libraries/vexcl/vexcl/devlist.hpp | 416 + .../vexcl/vexcl/element_index.hpp | 118 + external_libraries/vexcl/vexcl/enqueue.hpp | 149 + external_libraries/vexcl/vexcl/eval.hpp | 126 + .../vexcl/vexcl/external/boost_compute.hpp | 194 + .../vexcl/vexcl/external/clogs.hpp | 292 + .../vexcl/vexcl/external/viennacl.hpp | 68 + external_libraries/vexcl/vexcl/fft.hpp | 151 + .../vexcl/vexcl/fft/kernels.hpp | 664 + external_libraries/vexcl/vexcl/fft/plan.hpp | 412 + .../vexcl/vexcl/fft/unrolled_dft.hpp | 4636 ++++++ external_libraries/vexcl/vexcl/function.hpp | 511 + external_libraries/vexcl/vexcl/gather.hpp | 174 + external_libraries/vexcl/vexcl/generator.hpp | 854 + external_libraries/vexcl/vexcl/image.hpp | 44 + external_libraries/vexcl/vexcl/logical.hpp | 181 + external_libraries/vexcl/vexcl/mba.hpp | 800 + .../vexcl/vexcl/multi_array.hpp | 147 + .../vexcl/vexcl/multivector.hpp | 565 + external_libraries/vexcl/vexcl/operations.hpp | 2326 +++ external_libraries/vexcl/vexcl/profiler.hpp | 368 + external_libraries/vexcl/vexcl/random.hpp | 281 + .../vexcl/vexcl/random/philox.hpp | 187 + .../vexcl/vexcl/random/threefry.hpp | 230 + .../vexcl/vexcl/reduce_by_key.hpp | 585 + external_libraries/vexcl/vexcl/reductor.hpp | 579 + external_libraries/vexcl/vexcl/scan.hpp | 522 + .../vexcl/vexcl/scan_by_key.hpp | 762 + external_libraries/vexcl/vexcl/sort.hpp | 2186 +++ external_libraries/vexcl/vexcl/sparse/csr.hpp | 201 + .../vexcl/vexcl/sparse/distributed.hpp | 432 + external_libraries/vexcl/vexcl/sparse/ell.hpp | 513 + .../vexcl/vexcl/sparse/matrix.hpp | 155 + .../vexcl/vexcl/sparse/product.hpp | 136 + .../vexcl/vexcl/sparse/spmv_ops.hpp | 64 + external_libraries/vexcl/vexcl/spmat.hpp | 487 + external_libraries/vexcl/vexcl/spmat/ccsr.hpp | 280 + external_libraries/vexcl/vexcl/spmat/csr.inl | 258 + .../vexcl/vexcl/spmat/hybrid_ell.inl | 403 + .../vexcl/vexcl/spmat/inline_spmv.hpp | 210 + external_libraries/vexcl/vexcl/stencil.hpp | 680 + external_libraries/vexcl/vexcl/svm_vector.hpp | 221 + .../vexcl/vexcl/tagged_terminal.hpp | 269 + external_libraries/vexcl/vexcl/temporary.hpp | 337 + external_libraries/vexcl/vexcl/tensordot.hpp | 380 + external_libraries/vexcl/vexcl/types.hpp | 348 + external_libraries/vexcl/vexcl/util.hpp | 171 + external_libraries/vexcl/vexcl/vector.hpp | 1293 ++ .../vexcl/vexcl/vector_pointer.hpp | 185 + .../vexcl/vexcl/vector_view.hpp | 1129 ++ external_libraries/vexcl/vexcl/vexcl.hpp | 73 + kratos/CMakeLists.txt | 8 +- 207 files changed, 61617 insertions(+), 3 deletions(-) create mode 100644 external_libraries/vexcl/.appveyor.yml create mode 100644 external_libraries/vexcl/.gitignore create mode 100644 external_libraries/vexcl/.travis.yml create mode 100644 external_libraries/vexcl/.travis/install_boost.sh create mode 100644 external_libraries/vexcl/.travis/opencl_install.sh create mode 100644 external_libraries/vexcl/CL/cl.hpp create mode 100644 external_libraries/vexcl/CMakeLists.txt create mode 100644 external_libraries/vexcl/DEPENDS.md create mode 100644 external_libraries/vexcl/INSTALL.md create mode 100644 external_libraries/vexcl/LICENSE.md create mode 100644 external_libraries/vexcl/README.md create mode 100644 external_libraries/vexcl/cmake/FindFFTW.cmake create mode 100644 external_libraries/vexcl/cmake/VexCLBoostTargets.cmake create mode 100644 external_libraries/vexcl/cmake/VexCLConfig.cmake.in create mode 100644 external_libraries/vexcl/cmake/VexCLOpenCLTarget.cmake create mode 100644 external_libraries/vexcl/cmake/VexCLTools.cmake create mode 100644 external_libraries/vexcl/docs/.gitignore create mode 100644 external_libraries/vexcl/docs/CMakeLists.txt create mode 100644 external_libraries/vexcl/docs/Doxyfile create mode 100644 external_libraries/vexcl/docs/_static/custom.css create mode 100644 external_libraries/vexcl/docs/_templates/page.html create mode 100644 external_libraries/vexcl/docs/cmake.rst create mode 100644 external_libraries/vexcl/docs/conf.py create mode 100644 external_libraries/vexcl/docs/customkernel.rst create mode 100644 external_libraries/vexcl/docs/doxygen_objdb_8335.tmp create mode 100644 external_libraries/vexcl/docs/expressions.rst create mode 100644 external_libraries/vexcl/docs/git_version.py create mode 100644 external_libraries/vexcl/docs/index.rst create mode 100644 external_libraries/vexcl/docs/indices.rst create mode 100644 external_libraries/vexcl/docs/initialize.rst create mode 100644 external_libraries/vexcl/docs/interop.rst create mode 100644 external_libraries/vexcl/docs/memory.rst create mode 100644 external_libraries/vexcl/docs/multiexpr.rst create mode 100644 external_libraries/vexcl/docs/partitioning.png create mode 100644 external_libraries/vexcl/docs/primitives.rst create mode 100644 external_libraries/vexcl/docs/symbolic.rst create mode 100644 external_libraries/vexcl/docs/talks.rst create mode 100644 external_libraries/vexcl/examples/CMakeLists.txt create mode 100644 external_libraries/vexcl/examples/benchmark.cpp create mode 100644 external_libraries/vexcl/examples/complex_simple.cpp create mode 100644 external_libraries/vexcl/examples/complex_spmv.cpp create mode 100644 external_libraries/vexcl/examples/devlist.cpp create mode 100644 external_libraries/vexcl/examples/exclusive.cpp create mode 100644 external_libraries/vexcl/examples/fft_benchmark.cpp create mode 100644 external_libraries/vexcl/examples/fft_profile.cpp create mode 100644 external_libraries/vexcl/examples/mba_benchmark.cpp create mode 100644 external_libraries/vexcl/examples/show_profiling_data.m create mode 100644 external_libraries/vexcl/examples/simple/CMakeLists.txt create mode 100644 external_libraries/vexcl/examples/simple/hello.cpp create mode 100644 external_libraries/vexcl/examples/symbolic.cpp create mode 100644 external_libraries/vexcl/examples/thrust-sort.cpp create mode 100644 external_libraries/vexcl/examples/thrust-sort.cu create mode 100644 external_libraries/vexcl/examples/thrust-sort.hpp create mode 100644 external_libraries/vexcl/examples/viennacl/CMakeLists.txt create mode 100644 external_libraries/vexcl/examples/viennacl/viennacl_solvers.cpp create mode 100644 external_libraries/vexcl/tests/CMakeLists.txt create mode 100644 external_libraries/vexcl/tests/boost_compute_scan.cpp create mode 100644 external_libraries/vexcl/tests/boost_compute_sort.cpp create mode 100644 external_libraries/vexcl/tests/boost_version.cpp create mode 100644 external_libraries/vexcl/tests/cast.cpp create mode 100644 external_libraries/vexcl/tests/clogs_scan.cpp create mode 100644 external_libraries/vexcl/tests/clogs_sort.cpp create mode 100644 external_libraries/vexcl/tests/constants.cpp create mode 100644 external_libraries/vexcl/tests/context.cpp create mode 100644 external_libraries/vexcl/tests/context_setup.hpp create mode 100644 external_libraries/vexcl/tests/cusparse.cpp create mode 100644 external_libraries/vexcl/tests/custom_kernel.cpp create mode 100644 external_libraries/vexcl/tests/deduce.cpp create mode 100644 external_libraries/vexcl/tests/dummy1.cpp create mode 100644 external_libraries/vexcl/tests/dummy2.cpp create mode 100644 external_libraries/vexcl/tests/eval.cpp create mode 100644 external_libraries/vexcl/tests/events.cpp create mode 100644 external_libraries/vexcl/tests/fft.cpp create mode 100644 external_libraries/vexcl/tests/generator.cpp create mode 100644 external_libraries/vexcl/tests/image.cpp create mode 100644 external_libraries/vexcl/tests/logical.cpp create mode 100644 external_libraries/vexcl/tests/mba.cpp create mode 100644 external_libraries/vexcl/tests/multi_array.cpp create mode 100644 external_libraries/vexcl/tests/multivector_arithmetics.cpp create mode 100644 external_libraries/vexcl/tests/multivector_create.cpp create mode 100644 external_libraries/vexcl/tests/random.cpp create mode 100644 external_libraries/vexcl/tests/random_matrix.hpp create mode 100644 external_libraries/vexcl/tests/random_vector.hpp create mode 100644 external_libraries/vexcl/tests/reduce_by_key.cpp create mode 100644 external_libraries/vexcl/tests/reinterpret.cpp create mode 100644 external_libraries/vexcl/tests/scan.cpp create mode 100644 external_libraries/vexcl/tests/scan_by_key.cpp create mode 100644 external_libraries/vexcl/tests/sort.cpp create mode 100644 external_libraries/vexcl/tests/sparse_matrices.cpp create mode 100644 external_libraries/vexcl/tests/spmv.cpp create mode 100644 external_libraries/vexcl/tests/stencil.cpp create mode 100644 external_libraries/vexcl/tests/svm.cpp create mode 100644 external_libraries/vexcl/tests/tagged_terminal.cpp create mode 100644 external_libraries/vexcl/tests/temporary.cpp create mode 100644 external_libraries/vexcl/tests/tensordot.cpp create mode 100644 external_libraries/vexcl/tests/threads.cpp create mode 100644 external_libraries/vexcl/tests/types.cpp create mode 100644 external_libraries/vexcl/tests/vector_arithmetics.cpp create mode 100644 external_libraries/vexcl/tests/vector_copy.cpp create mode 100644 external_libraries/vexcl/tests/vector_create.cpp create mode 100644 external_libraries/vexcl/tests/vector_io.cpp create mode 100644 external_libraries/vexcl/tests/vector_pointer.cpp create mode 100644 external_libraries/vexcl/tests/vector_view.cpp create mode 100644 external_libraries/vexcl/vexcl/backend.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/common.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute/compiler.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute/context.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute/device_vector.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute/error.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute/event.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute/filter.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute/image.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute/kernel.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/compute/svm_vector.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/compiler.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/context.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/csr.inl create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/cusparse.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/device_vector.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/error.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/event.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/filter.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/hybrid_ell.inl create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/kernel.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/source.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/svm_vector.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/cuda/texture_object.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/jit.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/jit/compiler.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/jit/context.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/jit/device_vector.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/jit/event.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/jit/filter.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/jit/kernel.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/jit/source.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/compiler.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/context.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/defines.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/device_vector.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/error.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/event.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/filter.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/image.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/kernel.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/source.hpp create mode 100644 external_libraries/vexcl/vexcl/backend/opencl/svm_vector.hpp create mode 100644 external_libraries/vexcl/vexcl/cache.hpp create mode 100644 external_libraries/vexcl/vexcl/cast.hpp create mode 100644 external_libraries/vexcl/vexcl/constant_address_space.hpp create mode 100644 external_libraries/vexcl/vexcl/constants.hpp create mode 100644 external_libraries/vexcl/vexcl/detail/backtrace.hpp create mode 100644 external_libraries/vexcl/vexcl/detail/fusion.hpp create mode 100644 external_libraries/vexcl/vexcl/devlist.hpp create mode 100644 external_libraries/vexcl/vexcl/element_index.hpp create mode 100644 external_libraries/vexcl/vexcl/enqueue.hpp create mode 100644 external_libraries/vexcl/vexcl/eval.hpp create mode 100644 external_libraries/vexcl/vexcl/external/boost_compute.hpp create mode 100644 external_libraries/vexcl/vexcl/external/clogs.hpp create mode 100644 external_libraries/vexcl/vexcl/external/viennacl.hpp create mode 100644 external_libraries/vexcl/vexcl/fft.hpp create mode 100644 external_libraries/vexcl/vexcl/fft/kernels.hpp create mode 100644 external_libraries/vexcl/vexcl/fft/plan.hpp create mode 100644 external_libraries/vexcl/vexcl/fft/unrolled_dft.hpp create mode 100644 external_libraries/vexcl/vexcl/function.hpp create mode 100644 external_libraries/vexcl/vexcl/gather.hpp create mode 100644 external_libraries/vexcl/vexcl/generator.hpp create mode 100644 external_libraries/vexcl/vexcl/image.hpp create mode 100644 external_libraries/vexcl/vexcl/logical.hpp create mode 100644 external_libraries/vexcl/vexcl/mba.hpp create mode 100644 external_libraries/vexcl/vexcl/multi_array.hpp create mode 100644 external_libraries/vexcl/vexcl/multivector.hpp create mode 100644 external_libraries/vexcl/vexcl/operations.hpp create mode 100644 external_libraries/vexcl/vexcl/profiler.hpp create mode 100644 external_libraries/vexcl/vexcl/random.hpp create mode 100644 external_libraries/vexcl/vexcl/random/philox.hpp create mode 100644 external_libraries/vexcl/vexcl/random/threefry.hpp create mode 100644 external_libraries/vexcl/vexcl/reduce_by_key.hpp create mode 100644 external_libraries/vexcl/vexcl/reductor.hpp create mode 100644 external_libraries/vexcl/vexcl/scan.hpp create mode 100644 external_libraries/vexcl/vexcl/scan_by_key.hpp create mode 100644 external_libraries/vexcl/vexcl/sort.hpp create mode 100644 external_libraries/vexcl/vexcl/sparse/csr.hpp create mode 100644 external_libraries/vexcl/vexcl/sparse/distributed.hpp create mode 100644 external_libraries/vexcl/vexcl/sparse/ell.hpp create mode 100644 external_libraries/vexcl/vexcl/sparse/matrix.hpp create mode 100644 external_libraries/vexcl/vexcl/sparse/product.hpp create mode 100644 external_libraries/vexcl/vexcl/sparse/spmv_ops.hpp create mode 100644 external_libraries/vexcl/vexcl/spmat.hpp create mode 100644 external_libraries/vexcl/vexcl/spmat/ccsr.hpp create mode 100644 external_libraries/vexcl/vexcl/spmat/csr.inl create mode 100644 external_libraries/vexcl/vexcl/spmat/hybrid_ell.inl create mode 100644 external_libraries/vexcl/vexcl/spmat/inline_spmv.hpp create mode 100644 external_libraries/vexcl/vexcl/stencil.hpp create mode 100644 external_libraries/vexcl/vexcl/svm_vector.hpp create mode 100644 external_libraries/vexcl/vexcl/tagged_terminal.hpp create mode 100644 external_libraries/vexcl/vexcl/temporary.hpp create mode 100644 external_libraries/vexcl/vexcl/tensordot.hpp create mode 100644 external_libraries/vexcl/vexcl/types.hpp create mode 100644 external_libraries/vexcl/vexcl/util.hpp create mode 100644 external_libraries/vexcl/vexcl/vector.hpp create mode 100644 external_libraries/vexcl/vexcl/vector_pointer.hpp create mode 100644 external_libraries/vexcl/vexcl/vector_view.hpp create mode 100644 external_libraries/vexcl/vexcl/vexcl.hpp diff --git a/external_libraries/vexcl/.appveyor.yml b/external_libraries/vexcl/.appveyor.yml new file mode 100644 index 000000000000..6b2f7c7c84da --- /dev/null +++ b/external_libraries/vexcl/.appveyor.yml @@ -0,0 +1,47 @@ +environment: + matrix: + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + GENERATOR: Visual Studio 15 2017 Win64 + BOOST_ROOT: C:\Libraries\boost_1_67_0 + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 + GENERATOR: Visual Studio 14 2015 Win64 + BOOST_ROOT: C:\Libraries\boost_1_63_0 + - APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu1804 + GENERATOR: Unix Makefiles + - APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu1604 + GENERATOR: Unix Makefiles + +before_build: + # Install OpenCL headers and libraries + - set NUGETDIR=C:\NUGET + - nuget install opencl-nug -Version 0.777.77 -OutputDirectory %NUGETDIR% + - dir %NUGETDIR%\opencl-nug.0.777.77\build\native\ + - set OCL_ROOT=%NUGETDIR%\opencl-nug.0.777.77\build\native + # Install OpenCL Runtime + - choco install opencl-intel-cpu-runtime + # Check if it's working + - ps: appveyor DownloadFile "https://ci.appveyor.com/api/projects/oblomov/clinfo/artifacts/clinfo.exe?job=platform:+x64" -FileName clinfo.exe + +build_script: + - cmd: .\clinfo + - sh: clinfo + - mkdir build && cd build + - cmd: cmake .. -G"%GENERATOR%" -DVEXCL_BUILD_EXAMPLES=ON -DVEXCL_BUILD_TESTS=ON + - sh: cmake .. -G"$GENERATOR" -DVEXCL_BUILD_EXAMPLES=ON -DVEXCL_BUILD_TESTS=ON + - cmake --build . --config Release + +test_script: + - ctest -C Release --output-on-failure + + +for: +- + matrix: + only: + - GENERATOR: Unix Makefiles + + before_build: + - sudo apt-get update + - sudo apt-get install libboost-all-dev -y + - sudo apt-get install ocl-icd-opencl-dev opencl-headers clinfo -y + - source .travis/opencl_install.sh diff --git a/external_libraries/vexcl/.gitignore b/external_libraries/vexcl/.gitignore new file mode 100644 index 000000000000..3899b067aba3 --- /dev/null +++ b/external_libraries/vexcl/.gitignore @@ -0,0 +1,3 @@ +build* +.*.swp +tags diff --git a/external_libraries/vexcl/.travis.yml b/external_libraries/vexcl/.travis.yml new file mode 100644 index 000000000000..89df72727a30 --- /dev/null +++ b/external_libraries/vexcl/.travis.yml @@ -0,0 +1,73 @@ +language: cpp +dist: xenial + +env: + global: + - OMP_NUM_THREADS=4 + - BOOST_BASENAME=boost_1_67_0 + - AMDAPPSDKROOT=/opt/intel/opencl/ + +matrix: + include: + - compiler: gcc + env: VEXCL_BACKEND=OpenCL + before_install: &opencl_install + - source .travis/opencl_install.sh + - source .travis/install_boost.sh + - compiler: gcc + env: VEXCL_BACKEND=Compute + before_install: *opencl_install + - compiler: gcc + env: VEXCL_BACKEND=JIT + before_install: *opencl_install + - compiler: clang + env: VEXCL_BACKEND=OpenCL + before_install: *opencl_install + - compiler: clang + env: VEXCL_BACKEND=Compute + before_install: *opencl_install + - compiler: clang + env: + - VEXCL_BACKEND=JIT + - VEXCL_JIT_COMPILER_FLAGS=-stdlib=libc++ + before_install: *opencl_install + - os: osx + env: VEXCL_BACKEND=OpenCL + - os: osx + env: VEXCL_BACKEND=Compute + - os: osx + env: VEXCL_BACKEND=JIT + +addons: + apt: + packages: + - opencl-headers + - lcov + - clinfo + homebrew: + packages: + - boost + - libomp + - cmake + - lcov + - clinfo + +cache: + directories: + - ${HOME}/${BOOST_BASENAME}/ + +script: + - cmake --version + - clinfo + - mkdir -p build && cd build + - cmake -DVEXCL_TEST_COVERAGE=ON -DVEXCL_BUILD_TESTS=ON -DVEXCL_BUILD_EXAMPLES=ON -DVEXCL_BACKEND=${VEXCL_BACKEND} -DVEXCL_JIT_COMPILER_FLAGS=${VEXCL_JIT_COMPILER_FLAGS} .. + - make -j2 + - export CPATH=${CPATH}:${BOOST_ROOT} + - if [ "$VEXCL_BACKEND" = "JIT" ] && [ "$CC" = "clang" ] ; then export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(dirname $(which clang++))/../lib ; fi + - ./examples/devlist + - ctest --output-on-failure + +after_success: + - lcov --directory tests --base-directory ../vexcl --capture --output-file coverage.info + - lcov --remove coverage.info '/usr*' '*/cl.hpp' -o coverage.info + - bash <(curl -s https://codecov.io/bash) diff --git a/external_libraries/vexcl/.travis/install_boost.sh b/external_libraries/vexcl/.travis/install_boost.sh new file mode 100644 index 000000000000..215b861d8cd8 --- /dev/null +++ b/external_libraries/vexcl/.travis/install_boost.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +export BOOST_ROOT=${HOME}/${BOOST_BASENAME} + +if [ ! -e ${BOOST_ROOT}/boost/config.hpp ]; then + pushd ${HOME} + BOOST_VERSION=$(echo $BOOST_BASENAME | awk -F '_' '{print $2 "." $3 "." $4 }') + wget https://dl.bintray.com/boostorg/release/$BOOST_VERSION/source/$BOOST_BASENAME.tar.bz2 + rm -rf $BOOST_BASENAME + tar xf ${BOOST_BASENAME}.tar.bz2 + cd ${BOOST_BASENAME} + ./bootstrap.sh --with-libraries=atomic,chrono,date_time,filesystem,program_options,system,test,thread + ./b2 + popd +fi diff --git a/external_libraries/vexcl/.travis/opencl_install.sh b/external_libraries/vexcl/.travis/opencl_install.sh new file mode 100644 index 000000000000..ed466fb07d85 --- /dev/null +++ b/external_libraries/vexcl/.travis/opencl_install.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +########################### +# Get Intel OpenCL Runtime +########################### + +# https://software.intel.com/en-us/articles/opencl-drivers#latest_CPU_runtime +PACKAGE_URL=http://registrationcenter-download.intel.com/akdlm/irc_nas/12556/opencl_runtime_16.1.2_x64_rh_6.4.0.37.tgz +PACKAGE_NAME=opencl_runtime_16.1.2_x64_rh_6.4.0.37 + +wget -q ${PACKAGE_URL} -O /tmp/opencl_runtime.tgz +tar -xzf /tmp/opencl_runtime.tgz -C /tmp +sed 's/decline/accept/g' -i /tmp/${PACKAGE_NAME}/silent.cfg +sudo /tmp/${PACKAGE_NAME}/install.sh -s /tmp/${PACKAGE_NAME}/silent.cfg diff --git a/external_libraries/vexcl/CL/cl.hpp b/external_libraries/vexcl/CL/cl.hpp new file mode 100644 index 000000000000..f80dde80dc59 --- /dev/null +++ b/external_libraries/vexcl/CL/cl.hpp @@ -0,0 +1,12934 @@ +/******************************************************************************* + * Copyright (c) 2008-2015 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/*! \file + * + * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and + * OpenCL 1.2 (rev 15) + * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes + * + * Additions and fixes from: + * Brian Cole, March 3rd 2010 and April 2012 + * Matt Gruenke, April 2012. + * Bruce Merry, February 2013. + * Tom Deakin and Simon McIntosh-Smith, July 2013 + * + * \version 1.2.8 + * \date October 2015 + * + * Optional extension support + * + * cl + * cl_ext_device_fission + * #define USE_CL_DEVICE_FISSION + */ + +/*! \mainpage + * \section intro Introduction + * For many large applications C++ is the language of choice and so it seems + * reasonable to define C++ bindings for OpenCL. + * + * + * The interface is contained with a single C++ header file \em cl.hpp and all + * definitions are contained within the namespace \em cl. There is no additional + * requirement to include \em cl.h and to use either the C++ or original C + * bindings it is enough to simply include \em cl.hpp. + * + * The bindings themselves are lightweight and correspond closely to the + * underlying C API. Using the C++ bindings introduces no additional execution + * overhead. + * + * For detail documentation on the bindings see: + * + * The OpenCL C++ Wrapper API 1.2 (revision 09) + * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf + * + * \section example Example + * + * The following example shows a general use case for the C++ + * bindings, including support for the optional exception feature and + * also the supplied vector and string classes, see following sections for + * decriptions of these features. + * + * \code + * #define __CL_ENABLE_EXCEPTIONS + * + * #if defined(__APPLE__) || defined(__MACOSX) + * #include + * #else + * #include + * #endif + * #include + * #include + * #include + * + * const char * helloStr = "__kernel void " + * "hello(void) " + * "{ " + * " " + * "} "; + * + * int + * main(void) + * { + * cl_int err = CL_SUCCESS; + * try { + * + * std::vector platforms; + * cl::Platform::get(&platforms); + * if (platforms.size() == 0) { + * std::cout << "Platform size 0\n"; + * return -1; + * } + * + * cl_context_properties properties[] = + * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; + * cl::Context context(CL_DEVICE_TYPE_CPU, properties); + * + * std::vector devices = context.getInfo(); + * + * cl::Program::Sources source(1, + * std::make_pair(helloStr,strlen(helloStr))); + * cl::Program program_ = cl::Program(context, source); + * program_.build(devices); + * + * cl::Kernel kernel(program_, "hello", &err); + * + * cl::Event event; + * cl::CommandQueue queue(context, devices[0], 0, &err); + * queue.enqueueNDRangeKernel( + * kernel, + * cl::NullRange, + * cl::NDRange(4,4), + * cl::NullRange, + * NULL, + * &event); + * + * event.wait(); + * } + * catch (cl::Error err) { + * std::cerr + * << "ERROR: " + * << err.what() + * << "(" + * << err.err() + * << ")" + * << std::endl; + * } + * + * return EXIT_SUCCESS; + * } + * + * \endcode + * + */ +#ifndef CL_HPP_ +#define CL_HPP_ + +#ifdef _WIN32 + +#include + +#if defined(USE_DX_INTEROP) +#include +#include +#endif +#endif // _WIN32 + +#if defined(_MSC_VER) +#include +#endif // _MSC_VER + +// +#if defined(USE_CL_DEVICE_FISSION) +#include +#endif + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#else +#include +#endif // !__APPLE__ + +#if (_MSC_VER >= 1700) || (__cplusplus >= 201103L) +#define CL_HPP_RVALUE_REFERENCES_SUPPORTED +#define CL_HPP_CPP11_ATOMICS_SUPPORTED +#include +#endif + +#if (__cplusplus >= 201103L) +#define CL_HPP_NOEXCEPT noexcept +#else +#define CL_HPP_NOEXCEPT +#endif + + +// To avoid accidentally taking ownership of core OpenCL types +// such as cl_kernel constructors are made explicit +// under OpenCL 1.2 +#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS explicit +#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +// Define deprecated prefixes and suffixes to ensure compilation +// in case they are not pre-defined +#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) + +#if !defined(CL_CALLBACK) +#define CL_CALLBACK +#endif //CL_CALLBACK + +#include +#include +#include + +#if defined(__CL_ENABLE_EXCEPTIONS) +#include +#endif // #if defined(__CL_ENABLE_EXCEPTIONS) + +#if !defined(__NO_STD_VECTOR) +#include +#endif + +#if !defined(__NO_STD_STRING) +#include +#endif + +#if defined(__ANDROID__) || defined(linux) || defined(__APPLE__) || defined(__MACOSX) +#include +#endif // linux + +#include + + +/*! \namespace cl + * + * \brief The OpenCL C++ bindings are defined within this namespace. + * + */ +namespace cl { + +class Memory; + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#define __INIT_CL_EXT_FCN_PTR(name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddress(#name); \ + if(!pfn_##name) { \ + } \ + } +#endif // #if defined(CL_VERSION_1_1) + +#if defined(CL_VERSION_1_2) +#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddressForPlatform(platform, #name); \ + if(!pfn_##name) { \ + } \ + } +#endif // #if defined(CL_VERSION_1_1) + +class Program; +class Device; +class Context; +class CommandQueue; +class Memory; +class Buffer; + +#if defined(__CL_ENABLE_EXCEPTIONS) +/*! \brief Exception class + * + * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. + */ +class Error : public std::exception +{ +private: + cl_int err_; + const char * errStr_; +public: + /*! \brief Create a new CL error exception for a given error code + * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). + */ + Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) + {} + + ~Error() throw() {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char * what() const throw () override + { + if (errStr_ == NULL) { + return "empty"; + } + else { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } +}; + +#define __ERR_STR(x) #x +#else +#define __ERR_STR(x) NULL +#endif // __CL_ENABLE_EXCEPTIONS + + +namespace detail +{ +#if defined(__CL_ENABLE_EXCEPTIONS) +static inline cl_int errHandler ( + cl_int err, + const char * errStr = NULL) +{ + if (err != CL_SUCCESS) { + throw Error(err, errStr); + } + return err; +} +#else +static inline cl_int errHandler (cl_int err, const char * errStr = NULL) +{ + (void) errStr; // suppress unused variable warning + return err; +} +#endif // __CL_ENABLE_EXCEPTIONS +} + + + +//! \cond DOXYGEN_DETAIL +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) +#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) +#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) +#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) +#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) +#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) +#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) +#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) +#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) +#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) +#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) +#if defined(CL_VERSION_1_2) +#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) +#endif // #if defined(CL_VERSION_1_2) +#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) +#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) +#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) +#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) + +#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) +#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) +#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) + +#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) +#define __COPY_ERR __ERR_STR(cl::copy) +#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) +#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) +#if defined(CL_VERSION_1_2) +#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) +#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) +#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) +#endif // #if defined(CL_VERSION_1_2) +#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) +#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) + +#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) +#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) +#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) +#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) + +#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) +#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) +#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) +#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) +#if defined(CL_VERSION_1_2) +#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) +#endif // #if defined(CL_VERSION_1_2) +#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) +#if defined(CL_VERSION_1_2) +#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) +#define __LINK_PROGRAM_ERR __ERR_STR(clLinkProgram) +#endif // #if defined(CL_VERSION_1_2) +#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) + +#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) +#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) +#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) +#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) +#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) +#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) +#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) +#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) +#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) +#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) +#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) +#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) +#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) +#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) +#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) +#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) +#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) +#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) +#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) +#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) +#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) +#endif // #if defined(CL_VERSION_1_2) + +#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) +#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) + + +#define __RETAIN_ERR __ERR_STR(Retain Object) +#define __RELEASE_ERR __ERR_STR(Release Object) +#define __FLUSH_ERR __ERR_STR(clFlush) +#define __FINISH_ERR __ERR_STR(clFinish) +#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) + +/** + * CL 1.2 version that uses device fission. + */ +#if defined(CL_VERSION_1_2) +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) +#else +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) +#endif // #if defined(CL_VERSION_1_2) + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) +#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) +#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) +#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) +#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) +#endif // #if defined(CL_VERSION_1_1) + +#endif // __CL_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + +/** + * CL 1.2 marker and barrier commands + */ +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) +#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) +#endif // #if defined(CL_VERSION_1_2) + +#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) +typedef std::string STRING_CLASS; +#elif !defined(__USE_DEV_STRING) + +/*! \class string + * \brief Simple string class, that provides a limited subset of std::string + * functionality but avoids many of the issues that come with that class. + + * \note Deprecated. Please use std::string as default or + * re-define the string class to match the std::string + * interface by defining STRING_CLASS + */ +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +{ +private: + ::size_t size_; + char * str_; +public: + //! \brief Constructs an empty string, allocating no memory. + string(void) : size_(0), str_(NULL) + { + } + + /*! \brief Constructs a string populated from an arbitrary value of + * specified size. + * + * An extra '\0' is added, in case none was contained in str. + * + * \param str the initial value of the string instance. Note that '\0' + * characters receive no special treatment. If NULL, + * the string is left empty, with a size of 0. + * + * \param size the number of characters to copy from str. + */ + string(const char * str, ::size_t size) : + size_(size), + str_(NULL) + { + if( size > 0 ) { + str_ = new char[size_+1]; + if (str_ != NULL) { + memcpy(str_, str, size_ * sizeof(char)); + str_[size_] = '\0'; + } + else { + size_ = 0; + } + } + } + + /*! \brief Constructs a string populated from a null-terminated value. + * + * \param str the null-terminated initial value of the string instance. + * If NULL, the string is left empty, with a size of 0. + */ + string(const char * str) : + size_(0), + str_(NULL) + { + if( str ) { + size_= ::strlen(str); + } + if( size_ > 0 ) { + str_ = new char[size_ + 1]; + if (str_ != NULL) { + memcpy(str_, str, (size_ + 1) * sizeof(char)); + } + } + } + + void resize( ::size_t n ) + { + if( size_ == n ) { + return; + } + if (n == 0) { + if( str_ ) { + delete [] str_; + } + str_ = NULL; + size_ = 0; + } + else { + char *newString = new char[n + 1]; + ::size_t copySize = n; + if( size_ < n ) { + copySize = size_; + } + size_ = n; + + if(str_) { + memcpy(newString, str_, (copySize + 1) * sizeof(char)); + } + if( copySize < size_ ) { + memset(newString + copySize, 0, size_ - copySize); + } + newString[size_] = '\0'; + + delete [] str_; + str_ = newString; + } + } + + const char& operator[] ( ::size_t pos ) const + { + return str_[pos]; + } + + char& operator[] ( ::size_t pos ) + { + return str_[pos]; + } + + /*! \brief Copies the value of another string to this one. + * + * \param rhs the string to copy. + * + * \returns a reference to the modified instance. + */ + string& operator=(const string& rhs) + { + if (this == &rhs) { + return *this; + } + + if( str_ != NULL ) { + delete [] str_; + str_ = NULL; + size_ = 0; + } + + if (rhs.size_ == 0 || rhs.str_ == NULL) { + str_ = NULL; + size_ = 0; + } + else { + str_ = new char[rhs.size_ + 1]; + size_ = rhs.size_; + + if (str_ != NULL) { + memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); + } + else { + size_ = 0; + } + } + + return *this; + } + + /*! \brief Constructs a string by copying the value of another instance. + * + * \param rhs the string to copy. + */ + string(const string& rhs) : + size_(0), + str_(NULL) + { + *this = rhs; + } + + //! \brief Destructor - frees memory used to hold the current value. + ~string() + { + delete[] str_; + str_ = NULL; + } + + //! \brief Queries the length of the string, excluding any added '\0's. + ::size_t size(void) const { return size_; } + + //! \brief Queries the length of the string, excluding any added '\0's. + ::size_t length(void) const { return size(); } + + /*! \brief Returns a pointer to the private copy held by this instance, + * or "" if empty/unset. + */ + const char * c_str(void) const { return (str_) ? str_ : "";} +}; +typedef cl::string STRING_CLASS; +#endif // #elif !defined(__USE_DEV_STRING) + +#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) +#define VECTOR_CLASS std::vector +#elif !defined(__USE_DEV_VECTOR) +#define VECTOR_CLASS cl::vector + +#if !defined(__MAX_DEFAULT_VECTOR_SIZE) +#define __MAX_DEFAULT_VECTOR_SIZE 10 +#endif + +/*! \class vector + * \brief Fixed sized vector implementation that mirroring + * + * \note Deprecated. Please use std::vector as default or + * re-define the vector class to match the std::vector + * interface by defining VECTOR_CLASS + + * \note Not recommended for use with custom objects as + * current implementation will construct N elements + * + * std::vector functionality. + * \brief Fixed sized vector compatible with std::vector. + * + * \note + * This differs from std::vector<> not just in memory allocation, + * but also in terms of when members are constructed, destroyed, + * and assigned instead of being copy constructed. + * + * \param T type of element contained in the vector. + * + * \param N maximum size of the vector. + */ +template +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector +{ +private: + T data_[N]; + unsigned int size_; + +public: + //! \brief Constructs an empty vector with no memory allocated. + vector() : + size_(static_cast(0)) + {} + + //! \brief Deallocates the vector's memory and destroys all of its elements. + ~vector() + { + clear(); + } + + //! \brief Returns the number of elements currently contained. + unsigned int size(void) const + { + return size_; + } + + /*! \brief Empties the vector of all elements. + * \note + * This does not deallocate memory but will invoke destructors + * on contained elements. + */ + void clear() + { + while(!empty()) { + pop_back(); + } + } + + /*! \brief Appends an element after the last valid element. + * Calling this on a vector that has reached capacity will throw an + * exception if exceptions are enabled. + */ + void push_back (const T& x) + { + if (size() < N) { + new (&data_[size_]) T(x); + size_++; + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + } + + /*! \brief Removes the last valid element from the vector. + * Calling this on an empty vector will throw an exception + * if exceptions are enabled. + */ + void pop_back(void) + { + if (size_ != 0) { + --size_; + data_[size_].~T(); + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + } + + /*! \brief Constructs with a value copied from another. + * + * \param vec the vector to copy. + */ + vector(const vector& vec) : + size_(vec.size_) + { + if (size_ != 0) { + assign(vec.begin(), vec.end()); + } + } + + /*! \brief Constructs with a specified number of initial elements. + * + * \param size number of initial elements. + * + * \param val value of initial elements. + */ + vector(unsigned int size, const T& val = T()) : + size_(0) + { + for (unsigned int i = 0; i < size; i++) { + push_back(val); + } + } + + /*! \brief Overwrites the current content with that copied from another + * instance. + * + * \param rhs vector to copy. + * + * \returns a reference to this. + */ + vector& operator=(const vector& rhs) + { + if (this == &rhs) { + return *this; + } + + if (rhs.size_ != 0) { + assign(rhs.begin(), rhs.end()); + } else { + clear(); + } + + return *this; + } + + /*! \brief Tests equality against another instance. + * + * \param vec the vector against which to compare. + */ + bool operator==(vector &vec) + { + if (size() != vec.size()) { + return false; + } + + for( unsigned int i = 0; i < size(); ++i ) { + if( operator[](i) != vec[i] ) { + return false; + } + } + return true; + } + + //! \brief Conversion operator to T*. + operator T* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const T* () const { return data_; } + + //! \brief Tests whether this instance has any elements. + bool empty (void) const + { + return size_==0; + } + + //! \brief Returns the maximum number of elements this instance can hold. + unsigned int max_size (void) const + { + return N; + } + + //! \brief Returns the maximum number of elements this instance can hold. + unsigned int capacity () const + { + return N; + } + + //! \brief Resizes the vector to the given size + void resize(unsigned int newSize, T fill = T()) + { + if (newSize > N) + { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + else + { + while (size_ < newSize) + { + new (&data_[size_]) T(fill); + size_++; + } + while (size_ > newSize) + { + --size_; + data_[size_].~T(); + } + } + } + + /*! \brief Returns a reference to a given element. + * + * \param index which element to access. * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ + T& operator[](int index) + { + return data_[index]; + } + + /*! \brief Returns a const reference to a given element. + * + * \param index which element to access. + * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ + const T& operator[](int index) const + { + return data_[index]; + } + + /*! \brief Assigns elements of the vector based on a source iterator range. + * + * \param start Beginning iterator of source range + * \param end Enditerator of source range + * + * \note + * Will throw an exception if exceptions are enabled and size exceeded. + */ + template + void assign(I start, I end) + { + clear(); + while(start != end) { + push_back(*start); + start++; + } + } + + /*! \class iterator + * \brief Const iterator class for vectors + */ + class iterator + { + private: + const vector *vec_; + int index_; + + /** + * Internal iterator constructor to capture reference + * to the vector it iterates over rather than taking + * the vector by copy. + */ + iterator (const vector &vec, int index) : + vec_(&vec) + { + if( !vec.empty() ) { + index_ = index; + } else { + index_ = -1; + } + } + + public: + iterator(void) : + index_(-1), + vec_(NULL) + { + } + + iterator(const iterator& rhs) : + vec_(rhs.vec_), + index_(rhs.index_) + { + } + + ~iterator(void) {} + + static iterator begin(const cl::vector &vec) + { + iterator i(vec, 0); + + return i; + } + + static iterator end(const cl::vector &vec) + { + iterator i(vec, vec.size()); + + return i; + } + + bool operator==(iterator i) + { + return ((vec_ == i.vec_) && + (index_ == i.index_)); + } + + bool operator!=(iterator i) + { + return (!(*this==i)); + } + + iterator& operator++() + { + ++index_; + return *this; + } + + iterator operator++(int) + { + iterator retVal(*this); + ++index_; + return retVal; + } + + iterator& operator--() + { + --index_; + return *this; + } + + iterator operator--(int) + { + iterator retVal(*this); + --index_; + return retVal; + } + + const T& operator *() const + { + return (*vec_)[index_]; + } + }; + + iterator begin(void) + { + return iterator::begin(*this); + } + + iterator begin(void) const + { + return iterator::begin(*this); + } + + iterator end(void) + { + return iterator::end(*this); + } + + iterator end(void) const + { + return iterator::end(*this); + } + + T& front(void) + { + return data_[0]; + } + + T& back(void) + { + return data_[size_]; + } + + const T& front(void) const + { + return data_[0]; + } + + const T& back(void) const + { + return data_[size_-1]; + } +} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) + + + + + +namespace detail { +#define __DEFAULT_NOT_INITIALIZED 1 +#define __DEFAULT_BEING_INITIALIZED 2 +#define __DEFAULT_INITIALIZED 4 + + /* + * Compare and exchange primitives are needed for handling of defaults + */ + +#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED + inline int compare_exchange(std::atomic * dest, int exchange, int comparand) +#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED + inline int compare_exchange(volatile int * dest, int exchange, int comparand) +#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED + { +#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED + std::atomic_compare_exchange_strong(dest, &comparand, exchange); + return comparand; +#elif _MSC_VER + return (int)(_InterlockedCompareExchange( + (volatile long*)dest, + (long)exchange, + (long)comparand)); +#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED + return (__sync_val_compare_and_swap( + dest, + comparand, + exchange)); +#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED + } + + inline void fence() { +#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED + std::atomic_thread_fence(std::memory_order_seq_cst); +#elif _MSC_VER // !CL_HPP_CPP11_ATOMICS_SUPPORTED + _ReadWriteBarrier(); +#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED + __sync_synchronize(); +#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED + } +} // namespace detail + + +/*! \brief class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, whose + * size is known statically. + */ +template +class size_t +{ +private: + ::size_t data_[N]; + +public: + //! \brief Initialize size_t to all 0s + size_t() + { + for( int i = 0; i < N; ++i ) { + data_[i] = 0; + } + } + + ::size_t& operator[](int index) + { + return data_[index]; + } + + const ::size_t& operator[](int index) const + { + return data_[index]; + } + + //! \brief Conversion operator to T*. + operator ::size_t* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const ::size_t* () const { return data_; } +}; + +namespace detail { + +// Generic getInfoHelper. The final parameter is used to guide overload +// resolution: the actual parameter passed is an int, which makes this +// a worse conversion sequence than a specialization that declares the +// parameter as an int. +template +inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) +{ + return f(name, sizeof(T), param, NULL); +} + +// Specialized getInfoHelper for VECTOR_CLASS params +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + T* value = (T*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + param->assign(&value[0], &value[required/sizeof(T)]); + return CL_SUCCESS; +} + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + typename T::cl_type * value = (typename T::cl_type *) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t elements = required / sizeof(typename T::cl_type); + param->assign(&value[0], &value[elements]); + for (::size_t i = 0; i < elements; i++) + { + if (value[i] != NULL) + { + err = (*param)[i].retain(); + if (err != CL_SUCCESS) { + return err; + } + } + } + return CL_SUCCESS; +} + +// Specialized for getInfo +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) +{ + cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); + + if (err != CL_SUCCESS) { + return err; + } + + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for STRING_CLASS params +template +inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) +{ +#if defined(__NO_STD_VECTOR) || defined(__NO_STD_STRING) + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + char* value = (char*)alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + *param = value; + return CL_SUCCESS; +#else + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + // std::string has a constant data member + // a char vector does not + VECTOR_CLASS value(required); + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + if (param) { + param->assign(value.begin(), value.end()); + } +#endif + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for cl::size_t params +template +inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t* value = (::size_t*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + for(int i = 0; i < N; ++i) { + (*param)[i] = value[i]; + } + + return CL_SUCCESS; +} + +template struct ReferenceHandler; + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) +{ + typename T::cl_type value; + cl_int err = f(name, sizeof(value), &value, NULL); + if (err != CL_SUCCESS) { + return err; + } + *param = value; + if (value != NULL) + { + err = param->retain(); + if (err != CL_SUCCESS) { + return err; + } + } + return CL_SUCCESS; +} + +#define __PARAM_NAME_INFO_1_0(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ + F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ + F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ + F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ + F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ + F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ + F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ + \ + F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ + F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ + F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ + F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_int) \ + \ + F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ + \ + F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ + F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ + F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ + F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ + F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ + \ + F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ + F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ + F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ + \ + F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ + F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ + \ + F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ + F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ + F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ + F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ + F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ + \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ + F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ + F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ + F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ + \ + F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ + F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) + +#if defined(CL_VERSION_1_1) +#define __PARAM_NAME_INFO_1_1(F) \ + F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ + \ + F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ + F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ + \ + F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) +#endif // CL_VERSION_1_1 + + +#if defined(CL_VERSION_1_2) +#define __PARAM_NAME_INFO_1_2(F) \ + F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ + \ + F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ + F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ + \ + F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ + \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ + \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \ + F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) +#endif // #if defined(CL_VERSION_1_2) + +#if defined(USE_CL_DEVICE_FISSION) +#define __PARAM_NAME_DEVICE_FISSION(F) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) +#endif // USE_CL_DEVICE_FISSION + +template +struct param_traits {}; + +#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ +struct token; \ +template<> \ +struct param_traits \ +{ \ + enum { value = param_name }; \ + typedef T param_type; \ +}; + +__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) +#if defined(CL_VERSION_1_1) +__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 +#if defined(CL_VERSION_1_2) +__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 + +#if defined(USE_CL_DEVICE_FISSION) +__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); +#endif // USE_CL_DEVICE_FISSION + +#ifdef CL_PLATFORM_ICD_SUFFIX_KHR +__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) +#endif + +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) +#endif + +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) +#endif + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) +#endif +#ifdef CL_DEVICE_WARP_SIZE_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) +#endif +#ifdef CL_DEVICE_GPU_OVERLAP_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) +#endif +#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) +#endif +#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) +#endif + +// Convenience functions + +template +inline cl_int +getInfo(Func f, cl_uint name, T* param) +{ + return getInfoHelper(f, name, param, 0); +} + +template +struct GetInfoFunctor0 +{ + Func f_; const Arg0& arg0_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, param, size, value, size_ret); } +}; + +template +struct GetInfoFunctor1 +{ + Func f_; const Arg0& arg0_; const Arg1& arg1_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, arg1_, param, size, value, size_ret); } +}; + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) +{ + GetInfoFunctor0 f0 = { f, arg0 }; + return getInfoHelper(f0, name, param, 0); +} + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) +{ + GetInfoFunctor1 f0 = { f, arg0, arg1 }; + return getInfoHelper(f0, name, param, 0); +} + +template +struct ReferenceHandler +{ }; + +#if defined(CL_VERSION_1_2) +/** + * OpenCL 1.2 devices do have retain/release. + */ +template <> +struct ReferenceHandler +{ + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int retain(cl_device_id device) + { return ::clRetainDevice(device); } + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int release(cl_device_id device) + { return ::clReleaseDevice(device); } +}; +#else // #if defined(CL_VERSION_1_2) +/** + * OpenCL 1.1 devices do not have retain/release. + */ +template <> +struct ReferenceHandler +{ + // cl_device_id does not have retain(). + static cl_int retain(cl_device_id) + { return CL_SUCCESS; } + // cl_device_id does not have release(). + static cl_int release(cl_device_id) + { return CL_SUCCESS; } +}; +#endif // #if defined(CL_VERSION_1_2) + +template <> +struct ReferenceHandler +{ + // cl_platform_id does not have retain(). + static cl_int retain(cl_platform_id) + { return CL_SUCCESS; } + // cl_platform_id does not have release(). + static cl_int release(cl_platform_id) + { return CL_SUCCESS; } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_context context) + { return ::clRetainContext(context); } + static cl_int release(cl_context context) + { return ::clReleaseContext(context); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_command_queue queue) + { return ::clRetainCommandQueue(queue); } + static cl_int release(cl_command_queue queue) + { return ::clReleaseCommandQueue(queue); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_mem memory) + { return ::clRetainMemObject(memory); } + static cl_int release(cl_mem memory) + { return ::clReleaseMemObject(memory); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_sampler sampler) + { return ::clRetainSampler(sampler); } + static cl_int release(cl_sampler sampler) + { return ::clReleaseSampler(sampler); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_program program) + { return ::clRetainProgram(program); } + static cl_int release(cl_program program) + { return ::clReleaseProgram(program); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_kernel kernel) + { return ::clRetainKernel(kernel); } + static cl_int release(cl_kernel kernel) + { return ::clReleaseKernel(kernel); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_event event) + { return ::clRetainEvent(event); } + static cl_int release(cl_event event) + { return ::clReleaseEvent(event); } +}; + + +// Extracts version number with major in the upper 16 bits, minor in the lower 16 +static cl_uint getVersion(const char *versionInfo) +{ + int highVersion = 0; + int lowVersion = 0; + int index = 7; + while(versionInfo[index] != '.' ) { + highVersion *= 10; + highVersion += versionInfo[index]-'0'; + ++index; + } + ++index; + while(versionInfo[index] != ' ' && versionInfo[index] != '\0') { + lowVersion *= 10; + lowVersion += versionInfo[index]-'0'; + ++index; + } + return (highVersion << 16) | lowVersion; +} + +static cl_uint getPlatformVersion(cl_platform_id platform) +{ + ::size_t size = 0; + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); + char *versionInfo = (char *) alloca(size); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); + return getVersion(versionInfo); +} + +static cl_uint getDevicePlatformVersion(cl_device_id device) +{ + cl_platform_id platform; + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); + return getPlatformVersion(platform); +} + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +static cl_uint getContextPlatformVersion(cl_context context) +{ + // The platform cannot be queried directly, so we first have to grab a + // device and obtain its context + ::size_t size = 0; + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + if (size == 0) + return 0; + cl_device_id *devices = (cl_device_id *) alloca(size); + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); + return getDevicePlatformVersion(devices[0]); +} +#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +template +class Wrapper +{ +public: + typedef T cl_type; + +protected: + cl_type object_; + +public: + Wrapper() : object_(NULL) { } + + Wrapper(const cl_type &obj) : object_(obj) { } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT + { + object_ = rhs.object_; + rhs.object_ = NULL; + } +#endif + + Wrapper& operator = (const Wrapper& rhs) + { + if (this != &rhs) { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + Wrapper& operator = (Wrapper&& rhs) + { + if (this != &rhs) { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + rhs.object_ = NULL; + } + return *this; + } +#endif + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + cl_int retain() const + { + return ReferenceHandler::retain(object_); + } + + cl_int release() const + { + return ReferenceHandler::release(object_); + } +}; + +template <> +class Wrapper +{ +public: + typedef cl_device_id cl_type; + +protected: + cl_type object_; + bool referenceCountable_; + + static bool isReferenceCountable(cl_device_id device) + { + bool retVal = false; + if (device != NULL) { + int version = getDevicePlatformVersion(device); + if(version > ((1 << 16) + 1)) { + retVal = true; + } + } + return retVal; + } + +public: + Wrapper() : object_(NULL), referenceCountable_(false) + { + } + + Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) + { + referenceCountable_ = isReferenceCountable(obj); + } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + referenceCountable_ = isReferenceCountable(object_); + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT + { + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + rhs.object_ = NULL; + rhs.referenceCountable_ = false; + } +#endif + + Wrapper& operator = (const Wrapper& rhs) + { + if (this != &rhs) { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + Wrapper& operator = (Wrapper&& rhs) + { + if (this != &rhs) { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + rhs.object_ = NULL; + rhs.referenceCountable_ = false; + } + return *this; + } +#endif + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; + referenceCountable_ = isReferenceCountable(object_); + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + template + friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); + + cl_int retain() const + { + if( referenceCountable_ ) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } + } + + cl_int release() const + { + if( referenceCountable_ ) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } + } +}; + +} // namespace detail +//! \endcond + +/*! \stuct ImageFormat + * \brief Adds constructors and member functions for cl_image_format. + * + * \see cl_image_format + */ +struct ImageFormat : public cl_image_format +{ + //! \brief Default constructor - performs no initialization. + ImageFormat(){} + + //! \brief Initializing constructor. + ImageFormat(cl_channel_order order, cl_channel_type type) + { + image_channel_order = order; + image_channel_data_type = type; + } + + //! \brief Assignment operator. + ImageFormat& operator = (const ImageFormat& rhs) + { + if (this != &rhs) { + this->image_channel_data_type = rhs.image_channel_data_type; + this->image_channel_order = rhs.image_channel_order; + } + return *this; + } +}; + +/*! \brief Class interface for cl_device_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_device_id + */ +class Device : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Device() : detail::Wrapper() { } + + /*! \brief Constructor from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + __CL_EXPLICIT_CONSTRUCTORS Device(const cl_device_id &device) : detail::Wrapper(device) { } + + /*! \brief Returns the first device on the default context. + * + * \see Context::getDefault() + */ + static Device getDefault(cl_int * err = NULL); + + /*! \brief Assignment operator from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const cl_device_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Device(const Device& dev) : detail::Wrapper(dev) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Device& operator = (const Device &dev) + { + detail::Wrapper::operator=(dev); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Device(Device&& dev) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(dev)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Device& operator = (Device &&dev) + { + detail::Wrapper::operator=(std::move(dev)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + + //! \brief Wrapper for clGetDeviceInfo(). + template + cl_int getInfo(cl_device_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetDeviceInfo, object_, name, param), + __GET_DEVICE_INFO_ERR); + } + + //! \brief Wrapper for clGetDeviceInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_device_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /** + * CL 1.2 version + */ +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clCreateSubDevicesEXT(). + cl_int createSubDevices( + const cl_device_partition_property * properties, + VECTOR_CLASS* devices) + { + cl_uint n = 0; + cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = clCreateSubDevices(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif // #if defined(CL_VERSION_1_2) + +/** + * CL 1.1 version that uses device fission. + */ +#if defined(CL_VERSION_1_1) +#if defined(USE_CL_DEVICE_FISSION) + cl_int createSubDevices( + const cl_device_partition_property_ext * properties, + VECTOR_CLASS* devices) + { + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; + __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); + + cl_uint n = 0; + cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif // #if defined(USE_CL_DEVICE_FISSION) +#endif // #if defined(CL_VERSION_1_1) +}; + +/*! \brief Class interface for cl_platform_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_platform_id + */ +class Platform : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Platform() : detail::Wrapper() { } + + /*! \brief Constructor from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + __CL_EXPLICIT_CONSTRUCTORS Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } + + /*! \brief Assignment operator from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const cl_platform_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetPlatformInfo(). + cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPlatformInfo, object_, name, param), + __GET_PLATFORM_INFO_ERR); + } + + //! \brief Wrapper for clGetPlatformInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_platform_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of devices for this platform. + * + * Wraps clGetDeviceIDs(). + */ + cl_int getDevices( + cl_device_type type, + VECTOR_CLASS* devices) const + { + cl_uint n = 0; + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = ::clGetDeviceIDs(object_, type, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + +#if defined(USE_DX_INTEROP) + /*! \brief Get the list of available D3D10 devices. + * + * \param d3d_device_source. + * + * \param d3d_object. + * + * \param d3d_device_set. + * + * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device + * values returned in devices can be used to identify a specific OpenCL + * device. If \a devices argument is NULL, this argument is ignored. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully. + * + * The application can query specific capabilities of the OpenCL device(s) + * returned by cl::getDevices. This can be used by the application to + * determine which device(s) to use. + * + * \note In the case that exceptions are enabled and a return value + * other than CL_SUCCESS is generated, then cl::Error exception is + * generated. + */ + cl_int getDevices( + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + VECTOR_CLASS* devices) const + { + typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint* num_devices); + + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; + __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); + + cl_uint n = 0; + cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + 0, + NULL, + &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + n, + ids, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif + + /*! \brief Gets a list of available platforms. + * + * Wraps clGetPlatformIDs(). + */ + static cl_int get( + VECTOR_CLASS* platforms) + { + cl_uint n = 0; + + if( platforms == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + platforms->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static cl_int get( + Platform * platform) + { + cl_uint n = 0; + + if( platform == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + *platform = ids[0]; + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform, returning it by value. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static Platform get( + cl_int * errResult = NULL) + { + Platform platform; + cl_uint n = 0; + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + if (errResult != NULL) { + *errResult = err; + } + return Platform(); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + if (errResult != NULL) { + *errResult = err; + } + return Platform(); + } + + + return Platform(ids[0]); + } + + static Platform getDefault( + cl_int *errResult = NULL ) + { + return get(errResult); + } + + +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clUnloadCompiler(). + cl_int + unloadCompiler() + { + return ::clUnloadPlatformCompiler(object_); + } +#endif // #if defined(CL_VERSION_1_2) +}; // class Platform + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +/** + * Unload the OpenCL compiler. + * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int +UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline cl_int +UnloadCompiler() +{ + return ::clUnloadCompiler(); +} +#endif // #if defined(CL_VERSION_1_1) + +/*! \brief Class interface for cl_context. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_context as the original. For details, see + * clRetainContext() and clReleaseContext(). + * + * \see cl_context + */ +class Context + : public detail::Wrapper +{ +private: + +#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED + static std::atomic default_initialized_; +#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED + static volatile int default_initialized_; +#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED + static Context default_; + static volatile cl_int default_error_; +public: + /*! \brief Constructs a context including a list of specified devices. + * + * Wraps clCreateContext(). + */ + Context( + const VECTOR_CLASS& devices, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateContext( + properties, (cl_uint) numDevices, + deviceIDs, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + Context( + const Device& device, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_device_id deviceID = device(); + + object_ = ::clCreateContext( + properties, 1, + &deviceID, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a context including all or a subset of devices of a specified type. + * + * Wraps clCreateContextFromType(). + */ + Context( + cl_device_type type, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + +#if !defined(__APPLE__) && !defined(__MACOS) + cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; + + if (properties == NULL) { + // Get a valid platform ID as we cannot send in a blank one + VECTOR_CLASS platforms; + error = Platform::get(&platforms); + if (error != CL_SUCCESS) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + // Check the platforms we found for a device of our specified type + cl_context_properties platform_id = 0; + for (unsigned int i = 0; i < platforms.size(); i++) { + + VECTOR_CLASS devices; + +#if defined(__CL_ENABLE_EXCEPTIONS) + try { +#endif + + error = platforms[i].getDevices(type, &devices); + +#if defined(__CL_ENABLE_EXCEPTIONS) + } catch (Error) {} + // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type + // We do error checking next anyway, and can throw there if needed +#endif + + // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + if (devices.size() > 0) { + platform_id = (cl_context_properties)platforms[i](); + break; + } + } + + if (platform_id == 0) { + detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = CL_DEVICE_NOT_FOUND; + } + return; + } + + prop[1] = platform_id; + properties = &prop[0]; + } +#endif + object_ = ::clCreateContextFromType( + properties, type, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Context(const Context& ctx) : detail::Wrapper(ctx) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Context& operator = (const Context &ctx) + { + detail::Wrapper::operator=(ctx); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Context(Context&& ctx) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(ctx)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Context& operator = (Context &&ctx) + { + detail::Wrapper::operator=(std::move(ctx)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + + /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. + * + * \note All calls to this function return the same cl_context as the first. + */ + static Context getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + default_ = Context( + CL_DEVICE_TYPE_DEFAULT, + NULL, + NULL, + NULL, + &error); + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + //! \brief Default constructor - initializes to NULL. + Context() : detail::Wrapper() { } + + /*! \brief Constructor from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_context + * into the new Context object. + */ + __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } + + /*! \brief Assignment operator from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseContext() on the value previously held by this instance. + */ + Context& operator = (const cl_context& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetContextInfo(). + template + cl_int getInfo(cl_context_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetContextInfo, object_, name, param), + __GET_CONTEXT_INFO_ERR); + } + + //! \brief Wrapper for clGetContextInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_context_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of supported image formats. + * + * Wraps clGetSupportedImageFormats(). + */ + cl_int getSupportedImageFormats( + cl_mem_flags flags, + cl_mem_object_type type, + VECTOR_CLASS* formats) const + { + cl_uint numEntries; + + if (!formats) { + return CL_SUCCESS; + } + + cl_int err = ::clGetSupportedImageFormats( + object_, + flags, + type, + 0, + NULL, + &numEntries); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + if (numEntries > 0) { + ImageFormat* value = (ImageFormat*) + alloca(numEntries * sizeof(ImageFormat)); + err = ::clGetSupportedImageFormats( + object_, + flags, + type, + numEntries, + (cl_image_format*)value, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(&value[0], &value[numEntries]); + } + else { + formats->clear(); + } + return CL_SUCCESS; + } +}; + +inline Device Device::getDefault(cl_int * err) +{ + cl_int error; + Device device; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + device = context.getInfo()[0]; + if (err != NULL) { + *err = CL_SUCCESS; + } + } + + return device; +} + + +#ifdef _WIN32 +#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED +__declspec(selectany) std::atomic Context::default_initialized_; +#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED +__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED +__declspec(selectany) Context Context::default_; +__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; +#else // !_WIN32 +#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED +__attribute__((weak)) std::atomic Context::default_initialized_; +#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED +__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED +__attribute__((weak)) Context Context::default_; +__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; +#endif // !_WIN32 + +/*! \brief Class interface for cl_event. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_event as the original. For details, see + * clRetainEvent() and clReleaseEvent(). + * + * \see cl_event + */ +class Event : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Event() : detail::Wrapper() { } + + /*! \brief Constructor from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_event + * into the new Event object. + */ + __CL_EXPLICIT_CONSTRUCTORS Event(const cl_event& event) : detail::Wrapper(event) { } + + /*! \brief Assignment operator from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseEvent() on the value previously held by this instance. + */ + Event& operator = (const cl_event& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetEventInfo(). + template + cl_int getInfo(cl_event_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetEventInfo, object_, name, param), + __GET_EVENT_INFO_ERR); + } + + //! \brief Wrapper for clGetEventInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_event_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + //! \brief Wrapper for clGetEventProfilingInfo(). + template + cl_int getProfilingInfo(cl_profiling_info name, T* param) const + { + return detail::errHandler(detail::getInfo( + &::clGetEventProfilingInfo, object_, name, param), + __GET_EVENT_PROFILE_INFO_ERR); + } + + //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. + template typename + detail::param_traits::param_type + getProfilingInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_profiling_info, name>::param_type param; + cl_int result = getProfilingInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Blocks the calling thread until this event completes. + * + * Wraps clWaitForEvents(). + */ + cl_int wait() const + { + return detail::errHandler( + ::clWaitForEvents(1, &object_), + __WAIT_FOR_EVENTS_ERR); + } + +#if defined(CL_VERSION_1_1) + /*! \brief Registers a user callback function for a specific command execution status. + * + * Wraps clSetEventCallback(). + */ + cl_int setCallback( + cl_int type, + void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetEventCallback( + object_, + type, + pfn_notify, + user_data), + __SET_EVENT_CALLBACK_ERR); + } +#endif + + /*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ + static cl_int + waitForEvents(const VECTOR_CLASS& events) + { + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), + __WAIT_FOR_EVENTS_ERR); + } +}; + +#if defined(CL_VERSION_1_1) +/*! \brief Class interface for user events (a subset of cl_event's). + * + * See Event for details about copy semantics, etc. + */ +class UserEvent : public Event +{ +public: + /*! \brief Constructs a user event on a given context. + * + * Wraps clCreateUserEvent(). + */ + UserEvent( + const Context& context, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateUserEvent( + context(), + &error); + + detail::errHandler(error, __CREATE_USER_EVENT_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + UserEvent() : Event() { } + + /*! \brief Sets the execution status of a user event object. + * + * Wraps clSetUserEventStatus(). + */ + cl_int setStatus(cl_int status) + { + return detail::errHandler( + ::clSetUserEventStatus(object_,status), + __SET_USER_EVENT_STATUS_ERR); + } +}; +#endif + +/*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ +inline static cl_int +WaitForEvents(const VECTOR_CLASS& events) +{ + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), + __WAIT_FOR_EVENTS_ERR); +} + +/*! \brief Class interface for cl_mem. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_mem as the original. For details, see + * clRetainMemObject() and clReleaseMemObject(). + * + * \see cl_mem + */ +class Memory : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Memory() : detail::Wrapper() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_mem + * into the new Memory object. + */ + __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } + + /*! \brief Assignment operator from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseMemObject() on the value previously held by this instance. + */ + Memory& operator = (const cl_mem& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Memory(const Memory& mem) : detail::Wrapper(mem) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Memory& operator = (const Memory &mem) + { + detail::Wrapper::operator=(mem); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Memory(Memory&& mem) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(mem)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Memory& operator = (Memory &&mem) + { + detail::Wrapper::operator=(std::move(mem)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_mem_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetMemObjectInfo, object_, name, param), + __GET_MEM_OBJECT_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_mem_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_1) + /*! \brief Registers a callback function to be called when the memory object + * is no longer needed. + * + * Wraps clSetMemObjectDestructorCallback(). + * + * Repeated calls to this function, for a given cl_mem value, will append + * to the list of functions called (in reverse order) when memory object's + * resources are freed and the memory object is deleted. + * + * \note + * The registered callbacks are associated with the underlying cl_mem + * value - not the Memory class instance. + */ + cl_int setDestructorCallback( + void (CL_CALLBACK * pfn_notify)(cl_mem, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetMemObjectDestructorCallback( + object_, + pfn_notify, + user_data), + __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); + } +#endif + +}; + +// Pre-declare copy functions +class Buffer; +template< typename IteratorType > +cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); + + +/*! \brief Class interface for Buffer Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Buffer : public Memory +{ +public: + + /*! \brief Constructs a Buffer in a specified context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ + Buffer( + const Context& context, + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Buffer in the default context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer( + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer( + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr = false, + cl_int* err = NULL) + { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + Context context = Context::getDefault(err); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + error = cl::copy(startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified context. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified queue. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. + Buffer() : Memory() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Buffer(const Buffer& buf) : Memory(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Buffer& operator = (const Buffer &buf) + { + Memory::operator=(buf); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Buffer(Buffer&& buf) CL_HPP_NOEXCEPT : Memory(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Buffer& operator = (Buffer &&buf) + { + Memory::operator=(std::move(buf)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + +#if defined(CL_VERSION_1_1) + /*! \brief Creates a new buffer object from this. + * + * Wraps clCreateSubBuffer(). + */ + Buffer createSubBuffer( + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * err = NULL) + { + Buffer result; + cl_int error; + result.object_ = ::clCreateSubBuffer( + object_, + flags, + buffer_create_type, + buffer_create_info, + &error); + + detail::errHandler(error, __CREATE_SUBBUFFER_ERR); + if (err != NULL) { + *err = error; + } + + return result; + } +#endif +}; + +#if defined (USE_DX_INTEROP) +/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. + * + * This is provided to facilitate interoperability with Direct3D. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferD3D10 : public Buffer +{ +public: + typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, + cl_int* errcode_ret); + + /*! \brief Constructs a BufferD3D10, in a specified context, from a + * given ID3D10Buffer. + * + * Wraps clCreateFromD3D10BufferKHR(). + */ + BufferD3D10( + const Context& context, + cl_mem_flags flags, + ID3D10Buffer* bufobj, + cl_int * err = NULL) + { + static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; + +#if defined(CL_VERSION_1_2) + vector props = context.getInfo(); + cl_platform platform = -1; + for( int i = 0; i < props.size(); ++i ) { + if( props[i] == CL_CONTEXT_PLATFORM ) { + platform = props[i+1]; + } + } + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); +#endif +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); +#endif + + cl_int error; + object_ = pfn_clCreateFromD3D10BufferKHR( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferD3D10() : Buffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10(const BufferD3D10& buf) : Buffer(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10& operator = (const BufferD3D10 &buf) + { + Buffer::operator=(buf); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10(BufferD3D10&& buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10& operator = (BufferD3D10 &&buf) + { + Buffer::operator=(std::move(buf)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; +#endif + +/*! \brief Class interface for GL Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferGL : public Buffer +{ +public: + /*! \brief Constructs a BufferGL in a specified context, from a given + * GL buffer. + * + * Wraps clCreateFromGLBuffer(). + */ + BufferGL( + const Context& context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLBuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferGL() : Buffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferGL(const BufferGL& buf) : Buffer(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferGL& operator = (const BufferGL &buf) + { + Buffer::operator=(buf); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferGL(BufferGL&& buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferGL& operator = (BufferGL &&buf) + { + Buffer::operator=(std::move(buf)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + cl_GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief C++ base class for Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image : public Memory +{ +protected: + //! \brief Default constructor - initializes to NULL. + Image() : Memory() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image(const Image& img) : Memory(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image& operator = (const Image &img) + { + Memory::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image(Image&& img) CL_HPP_NOEXCEPT : Memory(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image& operator = (Image &&img) + { + Memory::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + +public: + //! \brief Wrapper for clGetImageInfo(). + template + cl_int getImageInfo(cl_image_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetImageInfo, object_, name, param), + __GET_IMAGE_INFO_ERR); + } + + //! \brief Wrapper for clGetImageInfo() that returns by value. + template typename + detail::param_traits::param_type + getImageInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_image_info, name>::param_type param; + cl_int result = getImageInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +#if defined(CL_VERSION_1_2) +/*! \brief Class interface for 1D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image1D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image1D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D, + width, + 0, 0, 0, 0, 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image1D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1D(const Image1D& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1D& operator = (const Image1D &img) + { + Image::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1D(Image1D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1D& operator = (Image1D &&img) + { + Image::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; + +/*! \class Image1DBuffer + * \brief Image interface for 1D buffer images. + */ +class Image1DBuffer : public Image +{ +public: + Image1DBuffer( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + const Buffer &buffer, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_BUFFER, + width, + 0, 0, 0, 0, 0, 0, 0, + buffer() + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + NULL, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DBuffer() { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } + + Image1DBuffer& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer(const Image1DBuffer& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer& operator = (const Image1DBuffer &img) + { + Image::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer(Image1DBuffer&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer& operator = (Image1DBuffer &&img) + { + Image::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; + +/*! \class Image1DArray + * \brief Image interface for arrays of 1D images. + */ +class Image1DArray : public Image +{ +public: + Image1DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t rowPitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_ARRAY, + width, + 0, 0, // height, depth (unused) + arraySize, + rowPitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DArray() { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image1DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DArray(const Image1DArray& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DArray& operator = (const Image1DArray &img) + { + Image::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DArray(Image1DArray&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DArray& operator = (Image1DArray &&img) + { + Image::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; +#endif // #if defined(CL_VERSION_1_2) + + +/*! \brief Class interface for 2D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image2D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t row_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + width, + height, + 0, 0, // depth, array size (unused) + row_pitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage2D( + context(), flags,&format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + } + + //! \brief Default constructor - initializes to NULL. + Image2D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2D(const Image2D& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2D& operator = (const Image2D &img) + { + Image::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2D(Image2D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2D& operator = (Image2D &&img) + { + Image::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; + + +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 2D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. + */ +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D +{ +public: + /*! \brief Constructs an Image2DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture2D(). + */ + Image2DGL( + const Context& context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture2D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); + if (err != NULL) { + *err = error; + } + + } + + //! \brief Default constructor - initializes to NULL. + Image2DGL() : Image2D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL& operator = (const cl_mem& rhs) + { + Image2D::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DGL(const Image2DGL& img) : Image2D(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DGL& operator = (const Image2DGL &img) + { + Image2D::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DGL(Image2DGL&& img) CL_HPP_NOEXCEPT : Image2D(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DGL& operator = (Image2DGL &&img) + { + Image2D::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class Image2DArray + * \brief Image interface for arrays of 2D images. + */ +class Image2DArray : public Image +{ +public: + Image2DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t height, + ::size_t rowPitch, + ::size_t slicePitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D_ARRAY, + width, + height, + 0, // depth (unused) + arraySize, + rowPitch, + slicePitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2DArray() { } + + __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image2DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DArray(const Image2DArray& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DArray& operator = (const Image2DArray &img) + { + Image::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DArray(Image2DArray&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DArray& operator = (Image2DArray &&img) + { + Image::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for 3D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3D : public Image +{ +public: + /*! \brief Constructs a 3D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image3D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t depth, + ::size_t row_pitch = 0, + ::size_t slice_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE3D, + width, + height, + depth, + 0, // array size (unused) + row_pitch, + slice_pitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage3D( + context(), flags, &format, width, height, depth, row_pitch, + slice_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + } + + //! \brief Default constructor - initializes to NULL. + Image3D() : Image() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3D(const Image3D& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3D& operator = (const Image3D &img) + { + Image::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3D(Image3D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3D& operator = (Image3D &&img) + { + Image::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; + +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 3D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3DGL : public Image3D +{ +public: + /*! \brief Constructs an Image3DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture3D(). + */ + Image3DGL( + const Context& context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture3D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image3DGL() : Image3D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const cl_mem& rhs) + { + Image3D::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3DGL(const Image3DGL& img) : Image3D(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3DGL& operator = (const Image3DGL &img) + { + Image3D::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3DGL(Image3DGL&& img) CL_HPP_NOEXCEPT : Image3D(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3DGL& operator = (Image3DGL &&img) + { + Image3D::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class ImageGL + * \brief general image interface for GL interop. + * We abstract the 2D and 3D GL images into a single instance here + * that wraps all GL sourced images on the grounds that setup information + * was performed by OpenCL anyway. + */ +class ImageGL : public Image +{ +public: + ImageGL( + const Context& context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); + if (err != NULL) { + *err = error; + } + } + + ImageGL() : Image() { } + + __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } + + ImageGL& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + ImageGL(const ImageGL& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + ImageGL& operator = (const ImageGL &img) + { + Image::operator=(img); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + ImageGL(ImageGL&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + ImageGL& operator = (ImageGL &&img) + { + Image::operator=(std::move(img)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for GL Render Buffer Memory Objects. +* +* This is provided to facilitate interoperability with OpenGL. +* +* See Memory for details about copy semantics, etc. +* +* \see Memory +*/ +class BufferRenderGL : +#if defined(CL_VERSION_1_2) + public ImageGL +#else // #if defined(CL_VERSION_1_2) + public Image2DGL +#endif //#if defined(CL_VERSION_1_2) +{ +public: + /*! \brief Constructs a BufferRenderGL in a specified context, from a given + * GL Renderbuffer. + * + * Wraps clCreateFromGLRenderbuffer(). + */ + BufferRenderGL( + const Context& context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. +#if defined(CL_VERSION_1_2) + BufferRenderGL() : ImageGL() {}; +#else // #if defined(CL_VERSION_1_2) + BufferRenderGL() : Image2DGL() {}; +#endif //#if defined(CL_VERSION_1_2) + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ +#if defined(CL_VERSION_1_2) + __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : ImageGL(buffer) { } +#else // #if defined(CL_VERSION_1_2) + __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Image2DGL(buffer) { } +#endif //#if defined(CL_VERSION_1_2) + + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const cl_mem& rhs) + { +#if defined(CL_VERSION_1_2) + ImageGL::operator=(rhs); +#else // #if defined(CL_VERSION_1_2) + Image2DGL::operator=(rhs); +#endif //#if defined(CL_VERSION_1_2) + + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ +#if defined(CL_VERSION_1_2) + BufferRenderGL(const BufferRenderGL& buf) : ImageGL(buf) {} +#else // #if defined(CL_VERSION_1_2) + BufferRenderGL(const BufferRenderGL& buf) : Image2DGL(buf) {} +#endif //#if defined(CL_VERSION_1_2) + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL& operator = (const BufferRenderGL &rhs) + { +#if defined(CL_VERSION_1_2) + ImageGL::operator=(rhs); +#else // #if defined(CL_VERSION_1_2) + Image2DGL::operator=(rhs); +#endif //#if defined(CL_VERSION_1_2) + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ +#if defined(CL_VERSION_1_2) + BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT : ImageGL(std::move(buf)) {} +#else // #if defined(CL_VERSION_1_2) + BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT : Image2DGL(std::move(buf)) {} +#endif //#if defined(CL_VERSION_1_2) + + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL& operator = (BufferRenderGL &&buf) + { +#if defined(CL_VERSION_1_2) + ImageGL::operator=(std::move(buf)); +#else // #if defined(CL_VERSION_1_2) + Image2DGL::operator=(std::move(buf)); +#endif //#if defined(CL_VERSION_1_2) + + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + cl_GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_, type, gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief Class interface for cl_sampler. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_sampler as the original. For details, see + * clRetainSampler() and clReleaseSampler(). + * + * \see cl_sampler + */ +class Sampler : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Sampler() { } + + /*! \brief Constructs a Sampler in a specified context. + * + * Wraps clCreateSampler(). + */ + Sampler( + const Context& context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateSampler( + context(), + normalized_coords, + addressing_mode, + filter_mode, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructor from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_sampler + * into the new Sampler object. + */ + __CL_EXPLICIT_CONSTRUCTORS Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } + + /*! \brief Assignment operator from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseSampler() on the value previously held by this instance. + */ + Sampler& operator = (const cl_sampler& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Sampler(const Sampler& sam) : detail::Wrapper(sam) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Sampler& operator = (const Sampler &sam) + { + detail::Wrapper::operator=(sam); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Sampler(Sampler&& sam) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(sam)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Sampler& operator = (Sampler &&sam) + { + detail::Wrapper::operator=(std::move(sam)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + + //! \brief Wrapper for clGetSamplerInfo(). + template + cl_int getInfo(cl_sampler_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetSamplerInfo, object_, name, param), + __GET_SAMPLER_INFO_ERR); + } + + //! \brief Wrapper for clGetSamplerInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_sampler_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +class Program; +class CommandQueue; +class Kernel; + +//! \brief Class interface for specifying NDRange values. +class NDRange +{ +private: + size_t<3> sizes_; + cl_uint dimensions_; + +public: + //! \brief Default constructor - resulting range has zero dimensions. + NDRange() + : dimensions_(0) + { } + + //! \brief Constructs one-dimensional range. + NDRange(::size_t size0) + : dimensions_(1) + { + sizes_[0] = size0; + } + + //! \brief Constructs two-dimensional range. + NDRange(::size_t size0, ::size_t size1) + : dimensions_(2) + { + sizes_[0] = size0; + sizes_[1] = size1; + } + + //! \brief Constructs three-dimensional range. + NDRange(::size_t size0, ::size_t size1, ::size_t size2) + : dimensions_(3) + { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = size2; + } + + /*! \brief Conversion operator to const ::size_t *. + * + * \returns a pointer to the size of the first dimension. + */ + operator const ::size_t*() const { + return (const ::size_t*) sizes_; + } + + //! \brief Queries the number of dimensions in the range. + ::size_t dimensions() const { return dimensions_; } +}; + +//! \brief A zero-dimensional range. +static const NDRange NullRange; + +//! \brief Local address wrapper for use with Kernel::setArg +struct LocalSpaceArg +{ + ::size_t size_; +}; + +namespace detail { + +template +struct KernelArgumentHandler +{ + static ::size_t size(const T&) { return sizeof(T); } + static const T* ptr(const T& value) { return &value; } +}; + +template <> +struct KernelArgumentHandler +{ + static ::size_t size(const LocalSpaceArg& value) { return value.size_; } + static const void* ptr(const LocalSpaceArg&) { return NULL; } +}; + +} +//! \endcond + +/*! __local + * \brief Helper function for generating LocalSpaceArg objects. + * Deprecated. Replaced with Local. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg +__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline LocalSpaceArg +__local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +/*! Local + * \brief Helper function for generating LocalSpaceArg objects. + */ +inline LocalSpaceArg +Local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +//class KernelFunctor; + +/*! \brief Class interface for cl_kernel. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_kernel as the original. For details, see + * clRetainKernel() and clReleaseKernel(). + * + * \see cl_kernel + */ +class Kernel : public detail::Wrapper +{ +public: + inline Kernel(const Program& program, const char* name, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. + Kernel() { } + + /*! \brief Constructor from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_kernel + * into the new Kernel object. + */ + __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } + + /*! \brief Assignment operator from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseKernel() on the value previously held by this instance. + */ + Kernel& operator = (const cl_kernel& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Kernel(const Kernel& kernel) : detail::Wrapper(kernel) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Kernel& operator = (const Kernel &kernel) + { + detail::Wrapper::operator=(kernel); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Kernel(Kernel&& kernel) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(kernel)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Kernel& operator = (Kernel &&kernel) + { + detail::Wrapper::operator=(std::move(kernel)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + + template + cl_int getInfo(cl_kernel_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelInfo, object_, name, param), + __GET_KERNEL_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_2) + template + cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), + __GET_KERNEL_ARG_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getArgInfo(cl_uint argIndex, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_arg_info, name>::param_type param; + cl_int result = getArgInfo(argIndex, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // #if defined(CL_VERSION_1_2) + + template + cl_int getWorkGroupInfo( + const Device& device, cl_kernel_work_group_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetKernelWorkGroupInfo, object_, device(), name, param), + __GET_KERNEL_WORK_GROUP_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getWorkGroupInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_work_group_info, name>::param_type param; + cl_int result = getWorkGroupInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int setArg(cl_uint index, const T &value) + { + return detail::errHandler( + ::clSetKernelArg( + object_, + index, + detail::KernelArgumentHandler::size(value), + detail::KernelArgumentHandler::ptr(value)), + __SET_KERNEL_ARGS_ERR); + } + + cl_int setArg(cl_uint index, ::size_t size, const void* argPtr) + { + return detail::errHandler( + ::clSetKernelArg(object_, index, size, argPtr), + __SET_KERNEL_ARGS_ERR); + } +}; + +/*! \class Program + * \brief Program interface that implements cl_program. + */ +class Program : public detail::Wrapper +{ +public: + typedef VECTOR_CLASS > Binaries; + typedef VECTOR_CLASS > Sources; + + Program( + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + Context context = Context::getDefault(err); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t n = (::size_t)sources.size(); + ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); + const char** strings = (const char**) alloca(n * sizeof(const char*)); + + for (::size_t i = 0; i < n; ++i) { + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings, lengths, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + /** + * Construct a program object from a list of devices and a per-device list of binaries. + * \param context A valid OpenCL context in which to construct the program. + * \param devices A vector of OpenCL device objects for which the program will be created. + * \param binaries A vector of pairs of a pointer to a binary object and its length. + * \param binaryStatus An optional vector that on completion will be resized to + * match the size of binaries and filled with values to specify if each binary + * was successfully loaded. + * Set to CL_SUCCESS if the binary was successfully loaded. + * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. + * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. + * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: + * CL_INVALID_CONTEXT if context is not a valid context. + * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; + * or if any entry in binaries is NULL or has length 0. + * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. + * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. + * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. + */ + Program( + const Context& context, + const VECTOR_CLASS& devices, + const Binaries& binaries, + VECTOR_CLASS* binaryStatus = NULL, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t numDevices = devices.size(); + + // Catch size mismatch early and return + if(binaries.size() != numDevices) { + error = CL_INVALID_VALUE; + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); + const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); + + for (::size_t i = 0; i < numDevices; ++i) { + images[i] = (const unsigned char*)binaries[i].first; + lengths[i] = binaries[(int)i].second; + } + + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + if(binaryStatus) { + binaryStatus->resize(numDevices); + } + + object_ = ::clCreateProgramWithBinary( + context(), (cl_uint) devices.size(), + deviceIDs, + lengths, images, (binaryStatus != NULL && numDevices > 0) + ? &binaryStatus->front() + : NULL, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + } + + +#if defined(CL_VERSION_1_2) + /** + * Create program using builtin kernels. + * \param kernelNames Semi-colon separated list of builtin kernel names + */ + Program( + const Context& context, + const VECTOR_CLASS& devices, + const STRING_CLASS& kernelNames, + cl_int* err = NULL) + { + cl_int error; + + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateProgramWithBuiltInKernels( + context(), + (cl_uint) devices.size(), + deviceIDs, + kernelNames.c_str(), + &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) + + Program() { } + + __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } + + Program& operator = (const cl_program& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Program(const Program& program) : detail::Wrapper(program) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Program& operator = (const Program &program) + { + detail::Wrapper::operator=(program); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Program(Program&& program) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(program)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Program& operator = (Program &&program) + { + detail::Wrapper::operator=(std::move(program)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + + cl_int build( + const VECTOR_CLASS& devices, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + return detail::errHandler( + ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + deviceIDs, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + + cl_int build( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clBuildProgram( + object_, + 0, + NULL, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + +#if defined(CL_VERSION_1_2) + cl_int compile( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clCompileProgram( + object_, + 0, + NULL, + options, + 0, + NULL, + NULL, + notifyFptr, + data), + __COMPILE_PROGRAM_ERR); + } +#endif + + template + cl_int getInfo(cl_program_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int getBuildInfo( + const Device& device, cl_program_build_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetProgramBuildInfo, object_, device(), name, param), + __GET_PROGRAM_BUILD_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getBuildInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + cl_int result = getBuildInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int createKernels(VECTOR_CLASS* kernels) + { + cl_uint numKernels; + cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); + err = ::clCreateKernelsInProgram( + object_, numKernels, (cl_kernel*) value, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + kernels->assign(&value[0], &value[numKernels]); + return CL_SUCCESS; + } +}; + +#if defined(CL_VERSION_1_2) +inline Program linkProgram( + Program input1, + Program input2, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int error_local = CL_SUCCESS; + + cl_program programs[2] = { input1(), input2() }; + + Context ctx = input1.getInfo(&error_local); + if(error_local!=CL_SUCCESS) { + detail::errHandler(error_local, __LINK_PROGRAM_ERR); + } + + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + 2, + programs, + notifyFptr, + data, + &error_local); + + detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = error_local; + } + + return Program(prog); +} + +inline Program linkProgram( + VECTOR_CLASS inputPrograms, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int error_local = CL_SUCCESS; + + cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); + + if (programs != NULL) { + for (unsigned int i = 0; i < inputPrograms.size(); i++) { + programs[i] = inputPrograms[i](); + } + } + + Context ctx; + if(inputPrograms.size() > 0) { + ctx = inputPrograms[0].getInfo(&error_local); + if(error_local!=CL_SUCCESS) { + detail::errHandler(error_local, __LINK_PROGRAM_ERR); + } + } + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + (cl_uint)inputPrograms.size(), + programs, + notifyFptr, + data, + &error_local); + + detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = error_local; + } + + return Program(prog); +} +#endif + +template<> +inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const +{ + VECTOR_CLASS< ::size_t> sizes = getInfo(); + VECTOR_CLASS binaries; + for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) + { + char *ptr = NULL; + if (*s != 0) + ptr = new char[*s]; + binaries.push_back(ptr); + } + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); + if (err != NULL) { + *err = result; + } + return binaries; +} + +inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) +{ + cl_int error; + + object_ = ::clCreateKernel(program(), name, &error); + detail::errHandler(error, __CREATE_KERNEL_ERR); + + if (err != NULL) { + *err = error; + } + +} + +/*! \class CommandQueue + * \brief CommandQueue interface for cl_command_queue. + */ +class CommandQueue : public detail::Wrapper +{ +private: +#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED + static std::atomic default_initialized_; +#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED + static volatile int default_initialized_; +#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED + static CommandQueue default_; + static volatile cl_int default_error_; +public: + CommandQueue( + cl_command_queue_properties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + } + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + */ + explicit CommandQueue( + const Context& context, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + VECTOR_CLASS devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + + object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (err != NULL) { + *err = error; + } + + } + + CommandQueue( + const Context& context, + const Device& device, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + CommandQueue(const CommandQueue& queue) : detail::Wrapper(queue) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + CommandQueue& operator = (const CommandQueue &queue) + { + detail::Wrapper::operator=(queue); + return *this; + } + +#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + CommandQueue(CommandQueue&& queue) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(queue)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + CommandQueue& operator = (CommandQueue &&queue) + { + detail::Wrapper::operator=(std::move(queue)); + return *this; + } +#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + + static CommandQueue getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + default_ = CommandQueue(context, device, 0, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + CommandQueue() { } + + __CL_EXPLICIT_CONSTRUCTORS CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } + + CommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBuffer( + object_, src(), dst(), src_offset, dst_offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferRect( + object_, + src(), + dst(), + (const ::size_t *)src_origin, + (const ::size_t *)dst_origin, + (const ::size_t *)region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill a buffer object with a pattern + * of a given size. The pattern is specified a as vector. + * \tparam PatternType The datatype of the pattern field. + * The pattern type must be an accepted OpenCL data type. + */ + template + cl_int enqueueFillBuffer( + const Buffer& buffer, + PatternType pattern, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillBuffer( + object_, + buffer(), + static_cast(&pattern), + sizeof(PatternType), + offset, + size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImage( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *)dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA floating-point color value if + * the image channel data type is not an unnormalized signed or + * unsigned data type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_float4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA signed integer color value if + * the image channel data type is an unnormalized signed integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_int4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA unsigned integer color value if + * the image channel data type is an unnormalized unsigned integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_uint4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImageToBuffer( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *) region, dst_offset, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferToImage( + object_, src(), dst(), src_offset, + (const ::size_t *) dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_event tmp; + cl_int error; + void * result = ::clEnqueueMapBuffer( + object_, buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + if (event != NULL && error == CL_SUCCESS) + *event = tmp; + + return result; + } + + void* enqueueMapImage( + const Image& buffer, + cl_bool blocking, + cl_map_flags flags, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t * row_pitch, + ::size_t * slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_event tmp; + cl_int error; + void * result = ::clEnqueueMapImage( + object_, buffer(), blocking, flags, + (const ::size_t *) origin, (const ::size_t *) region, + row_pitch, slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + if (event != NULL && error == CL_SUCCESS) + *event = tmp; + return result; + } + + cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueues a marker command which waits for either a list of events to complete, + * or all previously enqueued commands to complete. + * + * Enqueues a marker command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command returns an event which can be waited on, + * i.e. this event can be waited on to insure that all events either in the event_wait_list + * or all previously enqueued commands, queued before this command to command_queue, + * have completed. + */ + cl_int enqueueMarkerWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarkerWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * A synchronization point that enqueues a barrier operation. + * + * Enqueues a barrier command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command blocks command execution, that is, any + * following commands enqueued after it do not execute until it completes. This command + * returns an event which can be waited on, i.e. this event can be waited on to insure that + * all events either in the event_wait_list or all previously enqueued commands, queued + * before this command to command_queue, have completed. + */ + cl_int enqueueBarrierWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueBarrierWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_BARRIER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command to indicate with which device a set of memory objects + * should be associated. + */ + cl_int enqueueMigrateMemObjects( + const VECTOR_CLASS &memObjects, + cl_mem_migration_flags flags, + const VECTOR_CLASS* events = NULL, + Event* event = NULL + ) + { + cl_event tmp; + + cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); + for( int i = 0; i < (int)memObjects.size(); ++i ) { + localMemObjects[i] = memObjects[i](); + } + + + cl_int err = detail::errHandler( + ::clEnqueueMigrateMemObjects( + object_, + (cl_uint)memObjects.size(), + static_cast(localMemObjects), + flags, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueNDRangeKernel( + const Kernel& kernel, + const NDRange& offset, + const NDRange& global, + const NDRange& local = NullRange, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNDRangeKernel( + object_, kernel(), (cl_uint) global.dimensions(), + offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, + (const ::size_t*) global, + local.dimensions() != 0 ? (const ::size_t*) local : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NDRANGE_KERNEL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueTask( + const Kernel& kernel, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueTask( + object_, kernel(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_TASK_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueNativeKernel( + void (CL_CALLBACK *userFptr)(void *), + std::pair args, + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* mem_locs = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) + ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) + : NULL; + + if (mems != NULL) { + for (unsigned int i = 0; i < mem_objects->size(); i++) { + mems[i] = ((*mem_objects)[i])(); + } + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNativeKernel( + object_, userFptr, args.first, args.second, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + mems, + (mem_locs != NULL && mem_locs->size() > 0) ? (const void **) &mem_locs->front() : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NATIVE_KERNEL); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarker( + object_, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueWaitForEvents( + object_, + (cl_uint) events.size(), + events.size() > 0 ? (const cl_event*) &events.front() : NULL), + __ENQUEUE_WAIT_FOR_EVENTS_ERR); + } +#endif // #if defined(CL_VERSION_1_1) + + cl_int enqueueAcquireGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueAcquireGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReleaseGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined (USE_DX_INTEROP) +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); + + cl_int enqueueAcquireD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); +#endif +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); +#endif + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueAcquireD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_2) +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_1) + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueReleaseD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueBarrier(object_), + __ENQUEUE_BARRIER_ERR); + } +#endif // #if defined(CL_VERSION_1_1) + + cl_int flush() const + { + return detail::errHandler(::clFlush(object_), __FLUSH_ERR); + } + + cl_int finish() const + { + return detail::errHandler(::clFinish(object_), __FINISH_ERR); + } +}; + +#ifdef _WIN32 +#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED +__declspec(selectany) std::atomic CommandQueue::default_initialized_; +#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED +__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED +__declspec(selectany) CommandQueue CommandQueue::default_; +__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#else // !_WIN32 +#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED +__attribute__((weak)) std::atomic CommandQueue::default_initialized_; +#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED +__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED +__attribute__((weak)) CommandQueue CommandQueue::default_; +__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#endif // !_WIN32 + +template< typename IteratorType > +Buffer::Buffer( + const Context &context, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + CommandQueue queue(context, 0, &error); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +template< typename IteratorType > +Buffer::Buffer( + const CommandQueue &queue, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if (readOnly) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if (useHostPtr) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + Context context = queue.getInfo(); + + if (useHostPtr) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } + else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if (!useHostPtr) { + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +inline cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + void * result = ::clEnqueueMapBuffer( + queue(), buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; +} + +inline cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (error != CL_SUCCESS) { + return error; + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + queue(), memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; +} + +inline cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, startIterator, endIterator, buffer); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, buffer, startIterator, endIterator); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } +#if defined(_MSC_VER) + std::copy( + startIterator, + endIterator, + stdext::checked_array_iterator( + pointer, length)); +#else + std::copy(startIterator, endIterator, pointer); +#endif + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } + std::copy(pointer, pointer + length, startIterator); + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +#if defined(CL_VERSION_1_1) +inline cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferRect( + src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); +} +#endif + +inline cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImage( + src, + dst, + src_origin, + dst_origin, + region, + events, + event); +} + +inline cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImageToBuffer( + src, + dst, + src_origin, + region, + dst_offset, + events, + event); +} + +inline cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferToImage( + src, + dst, + src_offset, + dst_origin, + region, + events, + event); +} + + +inline cl_int flush(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.flush(); +} + +inline cl_int finish(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + + return queue.finish(); +} + +// Kernel Functor support +// New interface as of September 2011 +// Requires the C++11 std::tr1::function (note do not support TR1) +// Visual Studio 2010 and GCC 4.2 + +struct EnqueueArgs +{ + CommandQueue queue_; + const NDRange offset_; + const NDRange global_; + const NDRange local_; + VECTOR_CLASS events_; + + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(Event e, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } +}; + +namespace detail { + +class NullType {}; + +template +struct SetArg +{ + static void set (Kernel kernel, T0 arg) + { + kernel.setArg(index, arg); + } +}; + +template +struct SetArg +{ + static void set (Kernel, NullType) + { + } +}; + +template < + typename T0, typename T1, typename T2, typename T3, + typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, + typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, + typename T20, typename T21, typename T22, typename T23, + typename T24, typename T25, typename T26, typename T27, + typename T28, typename T29, typename T30, typename T31 +> +class KernelFunctorGlobal +{ +private: + Kernel kernel_; + +public: + KernelFunctorGlobal( + Kernel kernel) : + kernel_(kernel) + {} + + KernelFunctorGlobal( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + kernel_(program, name.c_str(), err) + {} + + Event operator() ( + const EnqueueArgs& args, + T0 t0, + T1 t1 = NullType(), + T2 t2 = NullType(), + T3 t3 = NullType(), + T4 t4 = NullType(), + T5 t5 = NullType(), + T6 t6 = NullType(), + T7 t7 = NullType(), + T8 t8 = NullType(), + T9 t9 = NullType(), + T10 t10 = NullType(), + T11 t11 = NullType(), + T12 t12 = NullType(), + T13 t13 = NullType(), + T14 t14 = NullType(), + T15 t15 = NullType(), + T16 t16 = NullType(), + T17 t17 = NullType(), + T18 t18 = NullType(), + T19 t19 = NullType(), + T20 t20 = NullType(), + T21 t21 = NullType(), + T22 t22 = NullType(), + T23 t23 = NullType(), + T24 t24 = NullType(), + T25 t25 = NullType(), + T26 t26 = NullType(), + T27 t27 = NullType(), + T28 t28 = NullType(), + T29 t29 = NullType(), + T30 t30 = NullType(), + T31 t31 = NullType() + ) + { + Event event; + SetArg<0, T0>::set(kernel_, t0); + SetArg<1, T1>::set(kernel_, t1); + SetArg<2, T2>::set(kernel_, t2); + SetArg<3, T3>::set(kernel_, t3); + SetArg<4, T4>::set(kernel_, t4); + SetArg<5, T5>::set(kernel_, t5); + SetArg<6, T6>::set(kernel_, t6); + SetArg<7, T7>::set(kernel_, t7); + SetArg<8, T8>::set(kernel_, t8); + SetArg<9, T9>::set(kernel_, t9); + SetArg<10, T10>::set(kernel_, t10); + SetArg<11, T11>::set(kernel_, t11); + SetArg<12, T12>::set(kernel_, t12); + SetArg<13, T13>::set(kernel_, t13); + SetArg<14, T14>::set(kernel_, t14); + SetArg<15, T15>::set(kernel_, t15); + SetArg<16, T16>::set(kernel_, t16); + SetArg<17, T17>::set(kernel_, t17); + SetArg<18, T18>::set(kernel_, t18); + SetArg<19, T19>::set(kernel_, t19); + SetArg<20, T20>::set(kernel_, t20); + SetArg<21, T21>::set(kernel_, t21); + SetArg<22, T22>::set(kernel_, t22); + SetArg<23, T23>::set(kernel_, t23); + SetArg<24, T24>::set(kernel_, t24); + SetArg<25, T25>::set(kernel_, t25); + SetArg<26, T26>::set(kernel_, t26); + SetArg<27, T27>::set(kernel_, t27); + SetArg<28, T28>::set(kernel_, t28); + SetArg<29, T29>::set(kernel_, t29); + SetArg<30, T30>::set(kernel_, t30); + SetArg<31, T31>::set(kernel_, t31); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } + +}; + +//------------------------------------------------------------------------------------------------------ + + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31> +struct functionImplementation_ +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3); + } + + +}; + +template< + typename T0, + typename T1, + typename T2> +struct functionImplementation_ +< T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2); + } + + +}; + +template< + typename T0, + typename T1> +struct functionImplementation_ +< T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1) + { + return functor_( + enqueueArgs, + arg0, + arg1); + } + + +}; + +template< + typename T0> +struct functionImplementation_ +< T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0) + { + return functor_( + enqueueArgs, + arg0); + } + + +}; + + + + + +} // namespace detail + +//---------------------------------------------------------------------------------------------- + +template < + typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, + typename T3 = detail::NullType, typename T4 = detail::NullType, + typename T5 = detail::NullType, typename T6 = detail::NullType, + typename T7 = detail::NullType, typename T8 = detail::NullType, + typename T9 = detail::NullType, typename T10 = detail::NullType, + typename T11 = detail::NullType, typename T12 = detail::NullType, + typename T13 = detail::NullType, typename T14 = detail::NullType, + typename T15 = detail::NullType, typename T16 = detail::NullType, + typename T17 = detail::NullType, typename T18 = detail::NullType, + typename T19 = detail::NullType, typename T20 = detail::NullType, + typename T21 = detail::NullType, typename T22 = detail::NullType, + typename T23 = detail::NullType, typename T24 = detail::NullType, + typename T25 = detail::NullType, typename T26 = detail::NullType, + typename T27 = detail::NullType, typename T28 = detail::NullType, + typename T29 = detail::NullType, typename T30 = detail::NullType, + typename T31 = detail::NullType +> +struct make_kernel : + public detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > +{ +public: + typedef detail::KernelFunctorGlobal< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > FunctorType; + + make_kernel( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(program, name, err)) + {} + + make_kernel( + const Kernel kernel) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(kernel)) + {} +}; + + +//---------------------------------------------------------------------------------------------------------------------- + +#undef __ERR_STR +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#undef __GET_DEVICE_INFO_ERR +#undef __GET_PLATFORM_INFO_ERR +#undef __GET_DEVICE_IDS_ERR +#undef __GET_CONTEXT_INFO_ERR +#undef __GET_EVENT_INFO_ERR +#undef __GET_EVENT_PROFILE_INFO_ERR +#undef __GET_MEM_OBJECT_INFO_ERR +#undef __GET_IMAGE_INFO_ERR +#undef __GET_SAMPLER_INFO_ERR +#undef __GET_KERNEL_INFO_ERR +#undef __GET_KERNEL_ARG_INFO_ERR +#undef __GET_KERNEL_WORK_GROUP_INFO_ERR +#undef __GET_PROGRAM_INFO_ERR +#undef __GET_PROGRAM_BUILD_INFO_ERR +#undef __GET_COMMAND_QUEUE_INFO_ERR + +#undef __CREATE_CONTEXT_ERR +#undef __CREATE_CONTEXT_FROM_TYPE_ERR +#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR + +#undef __CREATE_BUFFER_ERR +#undef __CREATE_SUBBUFFER_ERR +#undef __CREATE_IMAGE2D_ERR +#undef __CREATE_IMAGE3D_ERR +#undef __CREATE_SAMPLER_ERR +#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR + +#undef __CREATE_USER_EVENT_ERR +#undef __SET_USER_EVENT_STATUS_ERR +#undef __SET_EVENT_CALLBACK_ERR +#undef __SET_PRINTF_CALLBACK_ERR + +#undef __WAIT_FOR_EVENTS_ERR + +#undef __CREATE_KERNEL_ERR +#undef __SET_KERNEL_ARGS_ERR +#undef __CREATE_PROGRAM_WITH_SOURCE_ERR +#undef __CREATE_PROGRAM_WITH_BINARY_ERR +#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR +#undef __BUILD_PROGRAM_ERR +#undef __CREATE_KERNELS_IN_PROGRAM_ERR + +#undef __CREATE_COMMAND_QUEUE_ERR +#undef __SET_COMMAND_QUEUE_PROPERTY_ERR +#undef __ENQUEUE_READ_BUFFER_ERR +#undef __ENQUEUE_WRITE_BUFFER_ERR +#undef __ENQUEUE_READ_BUFFER_RECT_ERR +#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR +#undef __ENQEUE_COPY_BUFFER_ERR +#undef __ENQEUE_COPY_BUFFER_RECT_ERR +#undef __ENQUEUE_READ_IMAGE_ERR +#undef __ENQUEUE_WRITE_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR +#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR +#undef __ENQUEUE_MAP_BUFFER_ERR +#undef __ENQUEUE_MAP_IMAGE_ERR +#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR +#undef __ENQUEUE_NDRANGE_KERNEL_ERR +#undef __ENQUEUE_TASK_ERR +#undef __ENQUEUE_NATIVE_KERNEL + +#undef __CL_EXPLICIT_CONSTRUCTORS + +#undef __UNLOAD_COMPILER_ERR +#endif //__CL_USER_OVERRIDE_ERROR_STRINGS + +#undef __CL_FUNCTION_TYPE + +// Extensions +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_VERSION_1_1) +#undef __INIT_CL_EXT_FCN_PTR +#endif // #if defined(CL_VERSION_1_1) +#undef __CREATE_SUB_DEVICES + +#if defined(USE_CL_DEVICE_FISSION) +#undef __PARAM_NAME_DEVICE_FISSION +#endif // USE_CL_DEVICE_FISSION + +#undef __DEFAULT_NOT_INITIALIZED +#undef __DEFAULT_BEING_INITIALIZED +#undef __DEFAULT_INITIALIZED + +#undef CL_HPP_RVALUE_REFERENCES_SUPPORTED +#undef CL_HPP_NOEXCEPT + +} // namespace cl + +#endif // CL_HPP_ diff --git a/external_libraries/vexcl/CMakeLists.txt b/external_libraries/vexcl/CMakeLists.txt new file mode 100644 index 000000000000..b500521006c3 --- /dev/null +++ b/external_libraries/vexcl/CMakeLists.txt @@ -0,0 +1,359 @@ +cmake_minimum_required(VERSION 3.1) + +if(${CMAKE_VERSION} VERSION_LESS 3.12) + cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}) +else() + cmake_policy(VERSION 3.12) +endif() + +if (NOT CMAKE_BUILD_TYPE) + message(STATUS "No build type selected, default to RelWithDebInfo") + set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Build type") +endif() + +project(VexCL) + +set(VEXCL_MASTER_PROJECT OFF) +if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + set(VEXCL_MASTER_PROJECT ON) +endif() + +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) + +#---------------------------------------------------------------------------- +# Compile-time options +#---------------------------------------------------------------------------- +option(VEXCL_SHOW_KERNELS "Show generated kernels in tests and examples" OFF) +option(VEXCL_CACHE_KERNELS "Cache compiled kernels offline" ON) +option(VEXCL_SHOW_COPIES "Log vector copies to stdout for debugging purposes" OFF) +option(VEXCL_AMD_SI_WORKAROUND "Implement workaround for AMD SI GPUs" OFF) +set(VEXCL_CHECK_SIZES 0 CACHE STRING "Check that expressions have correct sizes") + +#---------------------------------------------------------------------------- +# Installation options +#---------------------------------------------------------------------------- +option(VEXCL_INSTALL_CL_HPP "Install the OpenCL C++ header provided by VexCL" OFF) + +#---------------------------------------------------------------------------- +# Find Boost +#---------------------------------------------------------------------------- +option(Boost_USE_STATIC_LIBS "Use static versions of Boost libraries" OFF) +if (WIN32) + set(Boost_USE_STATIC_LIBS ON) +endif () + +find_package(Boost REQUIRED COMPONENTS + chrono + date_time + filesystem + program_options + system + thread + unit_test_framework + ) + +# Ensure all targets are available +include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLBoostTargets.cmake") + +#---------------------------------------------------------------------------- +# Generic target +#---------------------------------------------------------------------------- +add_library(Common INTERFACE) +add_library(VexCL::Common ALIAS Common) + +target_compile_features(Common INTERFACE + cxx_auto_type + cxx_nullptr + cxx_rvalue_references + cxx_right_angle_brackets + cxx_static_assert + cxx_variadic_templates + cxx_decltype +) + +if (VEXCL_AMD_SI_WORKAROUND) + target_compile_definitions(Common INTERFACE VEXCL_AMD_SI_WORKAROUND) +endif() + +target_include_directories(Common INTERFACE + $ + $ + ) + + +target_link_libraries(Common INTERFACE + Boost::filesystem + Boost::system + Boost::thread + ) + +if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + target_link_libraries(Common INTERFACE + Boost::chrono + Boost::date_time + ) +endif() + +target_compile_options(Common INTERFACE + # g++ + $<$:$> + $<$:-Wno-missing-braces> + $<$:-Wno-deprecated-declarations> + $<$:-Wno-ignored-attributes> + $<$:-Wno-unused-local-typedefs> + # Clang + $<$:$> + $<$:-Wno-missing-braces> + $<$:-Wno-deprecated-declarations> + $<$:-Wno-ignored-attributes> + # MSVC + $<$:/bigobj> + $<$:/wd4003> + $<$:/wd4996> + ) + +target_compile_definitions(Common INTERFACE + # MSVC + $<$:NOMINMAX> + $<$:_VARIADIC_MAX=10> + ) + +#---------------------------------------------------------------------------- +# Find VexCL backends +#---------------------------------------------------------------------------- +find_package(OpenCL) + +# Ensure all targets are available +include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLOpenCLTarget.cmake") + +if(OpenCL_FOUND) + add_library(OpenCL INTERFACE) + add_library(VexCL::OpenCL ALIAS OpenCL) + + target_link_libraries(OpenCL INTERFACE Common OpenCL::OpenCL) + target_compile_definitions(OpenCL INTERFACE VEXCL_BACKEND_OPENCL) + + target_compile_options(Common INTERFACE + $<$:-Wno-catch-value> + ) + + message(STATUS "Found VexCL::OpenCL") + + if (NOT "${Boost_VERSION}" STRLESS "106100") + add_library(Compute INTERFACE) + add_library(VexCL::Compute ALIAS Compute) + + target_link_libraries(Compute INTERFACE Common OpenCL::OpenCL) + target_compile_definitions(Compute INTERFACE VEXCL_BACKEND_COMPUTE) + + message(STATUS "Found VexCL::Compute") + endif() +endif() + +find_package(CUDA) +if(CUDA_FOUND) + add_library(CUDA INTERFACE) + add_library(VexCL::CUDA ALIAS CUDA) + + target_include_directories(CUDA INTERFACE "${CUDA_INCLUDE_DIRS}") + target_link_libraries(CUDA INTERFACE Common "${CUDA_CUDA_LIBRARY}") + target_compile_definitions(CUDA INTERFACE VEXCL_BACKEND_CUDA) + + message(STATUS "Found VexCL::CUDA") +endif() + +if (NOT "${Boost_VERSION}" STRLESS "106100") + + if(OpenCL_INCLUDE_DIR) + add_library(JIT INTERFACE) + target_include_directories(JIT INTERFACE "${OpenCL_INCLUDE_DIR}") + else() + include(CheckIncludeFile) + check_include_file("CL/cl_platform.h" HAVE_OpenCL_PLATFORM_H) + if(HAVE_OpenCL_PLATFORM_H) + add_library(JIT INTERFACE) + endif() + endif() + + if(NOT TARGET JIT) + message(WARNING "The JIT interface requires OpenCL headers to be available." + "You can download them from https://github.com/KhronosGroup/OpenCL-Headers" + "Set OpenCL_INCLUDE_DIR to the location of the headers." + "For now, disabling the JIT target.") + endif() +endif() + + +if(TARGET JIT) + add_library(VexCL::JIT ALIAS JIT) + + set(VEXCL_JIT_COMPILER_FLAGS "" CACHE STRING "VexCL JIT compiler flags") + target_compile_definitions(JIT INTERFACE VEXCL_JIT_COMPILER_FLAGS=${VEXCL_JIT_COMPILER_FLAGS}) + + find_package(OpenMP) + + # Have to check several OPENMP_FOUND due to bug in + # one version of CMake and the docs (fixed in patch release) + # OpenMP is missing on macOS llvm default, for example + if(OpenMP_FOUND OR OPENMP_FOUND OR OpenMP_CXX_FOUND) + + # CMake 3.9 FindOpenMP allows correct linking with Clang in more cases + if(TARGET OpenMP::OpenMP_CXX) + target_link_libraries(JIT INTERFACE OpenMP::OpenMP_CXX Common) + else() + # Clang may need -fopenmp=libiomp5 instead, can't be detected here without CMake 3.9 + target_link_libraries(JIT INTERFACE + $<$:${OpenMP_CXX_FLAGS}> + $<$:${OpenMP_CXX_FLAGS}> + $<$:${OpenMP_CXX_FLAGS}> + ) + target_compile_options(JIT INTERFACE ${OpenMP_CXX_FLAGS}) + endif() + + set(VEXCL_OMP_FLAGS "${OpenMP_CXX_FLAGS}") + + # We only need to add libraries to link to if this is using a preprocessor only OpenMP flag + if("${OpenMP_CXX_FLAGS}" MATCHES ".*X(clang|preprocessor).*") + foreach(item ${OpenMP_CXX_LIBRARIES}) + set(VEXCL_OMP_FLAGS "${VEXCL_OMP_FLAGS} ${item}") + endforeach() + endif() + + # Pass the required flags to code + target_compile_definitions(JIT INTERFACE VEXCL_OMP_FLAGS=${VEXCL_OMP_FLAGS}) + endif() + + target_link_libraries(JIT INTERFACE Common ${CMAKE_DL_LIBS}) + target_compile_definitions(JIT INTERFACE VEXCL_BACKEND_JIT) + + message(STATUS "Found VexCL::JIT") +endif() + +include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLTools.cmake") + +#---------------------------------------------------------------------------- +if (VEXCL_MASTER_PROJECT) + option(VEXCL_BUILD_TESTS OFF) + option(VEXCL_BUILD_EXAMPLES OFF) + + set(VEXCL_BACKEND "OpenCL" CACHE STRING "Select VexCL backend (OpenCL/CUDA/Compute/JIT/All)") + set_property(CACHE VEXCL_BACKEND PROPERTY STRINGS "All" "OpenCL" "CUDA" "Compute" "JIT") + + if("${VEXCL_BACKEND}" STREQUAL "OpenCL") + add_library(VexCL::Backend ALIAS OpenCL) + elseif("${VEXCL_BACKEND}" STREQUAL "Compute") + add_library(VexCL::Backend ALIAS Compute) + elseif("${VEXCL_BACKEND}" STREQUAL "CUDA") + add_library(VexCL::Backend ALIAS CUDA) + elseif("${VEXCL_BACKEND}" STREQUAL "JIT") + add_library(VexCL::Backend ALIAS JIT) + endif() + + #------------------------------------------------------------------------ + # Interoperation with Boost.compute + #------------------------------------------------------------------------ + option(VEXCL_HAVE_BOOST_COMPUTE "Use Boost.Compute algorithms" OFF) + if (VEXCL_HAVE_BOOST_COMPUTE) + find_path(BOOST_COMPUTE_INCLUDE boost/compute.hpp) + + add_library(compute_target INTERFACE) + target_include_directories(compute_target INTERFACE ${BOOST_COMPUTE_INCLUDE}) + target_compile_definitions(compute_target INTERFACE VEXCL_HAVE_BOOST_COMPUTE) + endif () + + #------------------------------------------------------------------------ + # Interoperation with clogs + #------------------------------------------------------------------------ + option(VEXCL_CLOGS "Use clogs algorithms" OFF) + if (VEXCL_CLOGS) + find_path(CLOGS_INCLUDE clogs/clogs.h) + find_library(CLOGS_LIBRARY clogs) + + add_library(clogs_target INTERFACE) + target_include_directories(clogs_target INTERFACE ${CLOGS_INCLUDE}) + target_compile_definitions(clogs_target INTERFACE VEXCL_HAVE_CLOGS) + target_link_libraries(clogs_target INTERFACE ${CLOGS_LIBRARY}) + endif () + + if (VEXCL_BUILD_TESTS) + enable_testing() + add_subdirectory(tests) + endif() + + if (VEXCL_BUILD_EXAMPLES) + add_subdirectory(examples) + endif() + + add_subdirectory(docs) + + install(DIRECTORY vexcl DESTINATION include) + + install(TARGETS Common EXPORT VexCLTargets) + + if (TARGET VexCL::OpenCL) + if(VEXCL_INSTALL_CL_HPP) + install(DIRECTORY CL DESTINATION include) + endif() + install(TARGETS OpenCL EXPORT VexCLTargets) + endif() + + if (TARGET VexCL::Compute) + install(TARGETS Compute EXPORT VexCLTargets) + endif() + + if (TARGET VexCL::CUDA) + install(TARGETS CUDA EXPORT VexCLTargets) + endif() + + if (TARGET VexCL::JIT) + install(TARGETS JIT EXPORT VexCLTargets) + endif() + + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLConfig.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/cmake/VexCLConfig.cmake" + @ONLY + ) + + # Copies needed so that VexCLConfig can find these files + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLTools.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/cmake/VexCLTools.cmake" + COPYONLY + ) + + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLOpenCLTarget.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/cmake/VexCLOpenCLTarget.cmake" + COPYONLY + ) + + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLBoostTargets.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/cmake/VexCLBoostTargets.cmake" + COPYONLY + ) + + export(EXPORT VexCLTargets + FILE "${CMAKE_CURRENT_BINARY_DIR}/cmake/VexCLTargets.cmake" + NAMESPACE VexCL:: + ) + + export(PACKAGE VexCL) + + install(EXPORT VexCLTargets + FILE VexCLTargets.cmake + NAMESPACE VexCL:: + DESTINATION share/vexcl/cmake + ) + + install( + FILES + ${CMAKE_CURRENT_BINARY_DIR}/cmake/VexCLConfig.cmake + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLTools.cmake + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLOpenCLTarget.cmake + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VexCLBoostTargets.cmake + DESTINATION + share/vexcl/cmake + ) +endif() diff --git a/external_libraries/vexcl/DEPENDS.md b/external_libraries/vexcl/DEPENDS.md new file mode 100644 index 000000000000..f220bf1337a3 --- /dev/null +++ b/external_libraries/vexcl/DEPENDS.md @@ -0,0 +1,10 @@ +Dependencies +------------ + +VexCL depends on [Boost](http://www.boost.org). In particular, you may need to +link to Boost.chrono, Boost.date_time, Boost.filesystem, Boost.system, +Boost.thread. + +Boost versions 1.48 and above should work. However, there is a known issue with +Boost v1.49 through 1.51 resulting in a segmentation fault in code that uses +sparse matrix vector products or stencil convolutions. diff --git a/external_libraries/vexcl/INSTALL.md b/external_libraries/vexcl/INSTALL.md new file mode 100644 index 000000000000..e932d6fcb45a --- /dev/null +++ b/external_libraries/vexcl/INSTALL.md @@ -0,0 +1,54 @@ +# Installation + +Since VexCL is header-only library, installation is straightforward: you just +need to copy vexcl folder somewhere and tell your compiler to scan it for +include files. + +* [Gentoo Linux ebuild](https://github.com/ddemidov/ebuilds/blob/master/dev-util/vexcl) +* [Arch Linux PKGBUILD](https://aur.archlinux.org/packages/vexcl-git) + + +## Building boost on OSX + +_Here are instructions that worked for at least one of VexCL users on a MacOSX +system (thanks, @d-meiser!):_ + +Because VexCL requires c++11 features I had to tweak the boost libraries on my +system a bit. The following instructions worked for me. I build boost version +1.53. Download boost sources from sourceforge +(http://sourceforge.net/projects/boost/files/boost/1.53.0/). In the boost +source directory do: + + ./bootstrap.sh --with-toolset=clang --prefix=${BOOST_DIR} + ./b2 link=shared threading=multi variant=release -s NO_COMPRESSION=1 \ + --layout=system --without-mpi --prefix=${BOOST_DIR} \ + toolset=clang \ + cxxflags="-std=c++ -stdlib=libc++" \ + linkflags="-stdlib=libc++" \ + install + +`${BOOST_DIR}` is the boost installation directory. I install in my home +directory just so it's easier for me to uninstall and so there are no +unintended interactions with the system boost libraries. The `cxxflags` and +`linkflags` were necessary to run the tests. Without them the boost unit test +framework segfaults + + +## Making cmake find boost headers and libraries + +It took me a while to figure out how to make cmake find the right boost +libraries instead of the system libraries. The following command line worked +for me + + cmake \ + -DBoost_NO_BOOST_CMAKE:BOOL=TRUE \ + -DBoost_NO_SYSTEM_PATHS:BOOL=TRUE \ + -DBOOST_ROOT=${BOOST_DIR} \ + -DUSE_LIBCPP:BOOL=TRUE \ + ${VEXCL_SOURCE_DIR} + +`${BOOST_DIR}` is the location of your boost library, e.g. the one installed +here. `${VEXCL_SOURCE_DIR}` is the location of the VexCL sources. Note that +without the `-DBoost_NO_BOOST_CMAKE:BOOL=TRUE` flag the `-DBOOST_ROOT` option +was being ignored. Without `-DUSE_LIBCPP:BOOL=TRUE` the `tuple` header was not +found. diff --git a/external_libraries/vexcl/LICENSE.md b/external_libraries/vexcl/LICENSE.md new file mode 100644 index 000000000000..1a077d0439b6 --- /dev/null +++ b/external_libraries/vexcl/LICENSE.md @@ -0,0 +1,22 @@ +The MIT License +--------------- + +Copyright (c) 2012-2018 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/external_libraries/vexcl/README.md b/external_libraries/vexcl/README.md new file mode 100644 index 000000000000..b7d0ade21d82 --- /dev/null +++ b/external_libraries/vexcl/README.md @@ -0,0 +1,19 @@ +# VexCL + +[![Documentation Status](https://readthedocs.org/projects/vexcl/badge/?version=latest)](http://vexcl.readthedocs.io/en/latest/?badge=latest) +[![DOI](https://zenodo.org/badge/4479068.svg)](https://zenodo.org/badge/latestdoi/4479068) +[![Build Status](https://travis-ci.org/ddemidov/vexcl.svg?branch=master)](https://travis-ci.org/ddemidov/vexcl) +[![Build status](https://ci.appveyor.com/api/projects/status/nmky93cldeywgru7?svg=true)](https://ci.appveyor.com/project/ddemidov/vexcl) +[![codecov](https://codecov.io/gh/ddemidov/vexcl/branch/master/graph/badge.svg)](https://codecov.io/gh/ddemidov/vexcl) +[![Coverity Scan Build Status](https://scan.coverity.com/projects/5306/badge.svg)](https://scan.coverity.com/projects/5306) + + +VexCL is a vector expression template library for OpenCL/CUDA. It has been +created for ease of GPGPU development with C++. VexCL strives to reduce amount +of boilerplate code needed to develop GPGPU applications. The library provides +convenient and intuitive notation for vector arithmetic, reduction, sparse +matrix-vector products, etc. Multi-device and even multi-platform computations +are supported. The source code of the library is distributed under very +permissive MIT license. + +See VexCL documentation at http://vexcl.readthedocs.io/ diff --git a/external_libraries/vexcl/cmake/FindFFTW.cmake b/external_libraries/vexcl/cmake/FindFFTW.cmake new file mode 100644 index 000000000000..1d633c399e71 --- /dev/null +++ b/external_libraries/vexcl/cmake/FindFFTW.cmake @@ -0,0 +1,113 @@ +# - Find the FFTW library +# +# Usage: +# find_package(FFTW [REQUIRED] [QUIET] ) +# +# It sets the following variables: +# FFTW_FOUND ... true if fftw is found on the system +# FFTW_LIBRARIES ... full path to fftw library +# FFTW_INCLUDES ... fftw include directory +# +# The following variables will be checked by the function +# FFTW_USE_STATIC_LIBS ... if true, only static libraries are found +# FFTW_ROOT ... if set, the libraries are exclusively searched +# under this path +# FFTW_LIBRARY ... fftw library to use +# FFTW_INCLUDE_DIR ... fftw include directory +# + +#If environment variable FFTWDIR is specified, it has same effect as FFTW_ROOT +if( NOT FFTW_ROOT AND ENV{FFTWDIR} ) + set( FFTW_ROOT $ENV{FFTWDIR} ) +endif() + +# Check if we can use PkgConfig +find_package(PkgConfig QUIET) + +#Determine from PKG +if( PKG_CONFIG_FOUND AND NOT FFTW_ROOT ) + pkg_check_modules( PKG_FFTW QUIET "fftw3" ) +endif() + +#Check whether to search static or dynamic libs +set( CMAKE_FIND_LIBRARY_SUFFIXES_SAV ${CMAKE_FIND_LIBRARY_SUFFIXES} ) + +if( ${FFTW_USE_STATIC_LIBS} ) + set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ) +else() + set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_SHARED_LIBRARY_SUFFIX} ) +endif() + +if( FFTW_ROOT ) + + find_library( + FFTW_LIB + NAMES "fftw3" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTWF_LIB + NAMES "fftw3f" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTWF_OMP_LIB + NAMES "fftw3f_omp" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_path( + FFTW_INCLUDES + NAMES "fftw3.h" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "include" + NO_DEFAULT_PATH + ) + +else() + + find_library( + FFTW_LIB + NAMES "fftw3" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library( + FFTWF_LIB + NAMES "fftw3f" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + + find_library( + FFTWF_OMP_LIB + NAMES "fftw3f_omp" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_path( + FFTW_INCLUDES + NAMES "fftw3.h" + PATHS ${PKG_FFTW_INCLUDE_DIRS} ${INCLUDE_INSTALL_DIR} + ) + +endif( FFTW_ROOT ) + +set(FFTW_LIBRARIES ${FFTW_LIB} ${FFTWF_LIB} ${FFTWF_OMP_LIB}) + +set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_SAV} ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FFTW DEFAULT_MSG + FFTW_INCLUDES FFTW_LIBRARIES) + +mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES) + diff --git a/external_libraries/vexcl/cmake/VexCLBoostTargets.cmake b/external_libraries/vexcl/cmake/VexCLBoostTargets.cmake new file mode 100644 index 000000000000..83207503e083 --- /dev/null +++ b/external_libraries/vexcl/cmake/VexCLBoostTargets.cmake @@ -0,0 +1,46 @@ + +# Support for CMake < 3.6 or so +# Unlike the real targets in CMake 3.6+, this does not resolve all +# dependencies based on Boost version. Use with care. +# This also may be used when CMake < 3.11 (3.10?) and CMake is older than Boost + +if(NOT TARGET Boost::system) + add_library(Boost::system IMPORTED INTERFACE) + set_target_properties(Boost::system PROPERTIES + INTERFACE_LINK_LIBRARIES "${Boost_SYSTEM_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${Boost_INCLUDE_DIRS}" + ) +endif() + +if(NOT TARGET Boost::filesystem) + add_library(Boost::filesystem IMPORTED INTERFACE) + set_target_properties(Boost::filesystem PROPERTIES + INTERFACE_LINK_LIBRARIES "${Boost_FILESYSTEM_LIBRARY};Boost::system" + INTERFACE_INCLUDE_DIRECTORIES "${Boost_INCLUDE_DIRS}" + ) +endif() + +if(NOT TARGET Boost::thread) + find_package(Threads) + add_library(Boost::thread IMPORTED INTERFACE) + set_target_properties(Boost::thread PROPERTIES + INTERFACE_LINK_LIBRARIES "${Boost_THREAD_LIBRARY};Threads::Threads" + INTERFACE_INCLUDE_DIRECTORIES "${Boost_INCLUDE_DIRS}" + ) +endif() + +if(NOT TARGET Boost::date_time) + add_library(Boost::date_time IMPORTED INTERFACE) + set_target_properties(Boost::date_time PROPERTIES + INTERFACE_LINK_LIBRARIES "${Boost_DATE_TIME_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${Boost_INCLUDE_DIRS}" + ) +endif() + +if(NOT TARGET Boost::chrono) + add_library(Boost::chrono IMPORTED INTERFACE) + set_target_properties(Boost::chrono PROPERTIES + INTERFACE_LINK_LIBRARIES "${Boost_CHRONO_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${Boost_INCLUDE_DIRS}" + ) +endif() diff --git a/external_libraries/vexcl/cmake/VexCLConfig.cmake.in b/external_libraries/vexcl/cmake/VexCLConfig.cmake.in new file mode 100644 index 000000000000..e719db40c60e --- /dev/null +++ b/external_libraries/vexcl/cmake/VexCLConfig.cmake.in @@ -0,0 +1,75 @@ +# Distributed under the MIT License. +# Copyright (c) 2012-2018 Denis Demidov + +#.rst +# FindVexCL +# --------- +# +# Try to find VexCL include dirs and libraries. Use this module by invoking +# find_package with the form:: +# +# find_package(VexCL +# [REQUIRED] # Fail with error if VexCL is not found +# [COMPONENTS ...] # Find the specified VexCL backends +# ) +# +# The module defines the following :prop_tgt:`IMPORTED` targets:: +# +# VexCL::OpenCL +# VexCL::Compute +# VexCL::CUDA +# VexCL::JIT + +include(CMakeFindDependencyMacro) + +# Capture the OpenCL copy used in original VexCL build +if(@OpenCL_FOUND@) + set(OpenCL_INCLUDE_DIR "@OpenCL_INCLUDE_DIR@") + set(OpenCL_LIBRARY "@OpenCL_LIBRARY@") + find_package(OpenCL) + + # Support for CMake < 3.7 + include("${CMAKE_CURRENT_LIST_DIR}/VexCLOpenCLTarget.cmake") +endif() + + +find_package(OpenMP) + + +# Capture the Boost copy used in original VexCL build (and main setting) +set(Boost_USE_STATIC_LIBS @Boost_USE_STATIC_LIBS@) +if(NOT BOOST_ROOT) + set(BOOST_ROOT "@BOOST_ROOT@") +endif() + +find_package(Boost REQUIRED COMPONENTS + chrono + date_time + filesystem + program_options + system + thread + unit_test_framework + ) + +include("${CMAKE_CURRENT_LIST_DIR}/VexCLBoostTargets.cmake") + +include("${CMAKE_CURRENT_LIST_DIR}/VexCLTargets.cmake") + +if(TARGET VexCL::OpenCL) + message(STATUS "Found VexCL::OpenCL") +endif() + +if(TARGET VexCL::Compute) + message(STATUS "Found VexCL::Compute") +endif() + +if(TARGET VexCL::CUDA) + message(STATUS "Found VexCL::CUDA") +endif() + +if(TARGET VexCL::JIT) + message(STATUS "Found VexCL::JIT") +endif() + +include("${CMAKE_CURRENT_LIST_DIR}/VexCLTools.cmake") diff --git a/external_libraries/vexcl/cmake/VexCLOpenCLTarget.cmake b/external_libraries/vexcl/cmake/VexCLOpenCLTarget.cmake new file mode 100644 index 000000000000..0d020e45b349 --- /dev/null +++ b/external_libraries/vexcl/cmake/VexCLOpenCLTarget.cmake @@ -0,0 +1,16 @@ +# Copied from CMake FindOpenCL +# Can be removed if CMake >= 3.7 becomes the minimum required CMake. + +if(OpenCL_FOUND AND NOT TARGET OpenCL::OpenCL) + if(OpenCL_LIBRARY MATCHES "/([^/]+)\\.framework$") + add_library(OpenCL::OpenCL INTERFACE IMPORTED) + set_target_properties(OpenCL::OpenCL PROPERTIES + INTERFACE_LINK_LIBRARIES "${OpenCL_LIBRARY}") + else() + add_library(OpenCL::OpenCL UNKNOWN IMPORTED) + set_target_properties(OpenCL::OpenCL PROPERTIES + IMPORTED_LOCATION "${OpenCL_LIBRARY}") + endif() + set_target_properties(OpenCL::OpenCL PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${OpenCL_INCLUDE_DIRS}") +endif() diff --git a/external_libraries/vexcl/cmake/VexCLTools.cmake b/external_libraries/vexcl/cmake/VexCLTools.cmake new file mode 100644 index 000000000000..8cd1e9ebbc12 --- /dev/null +++ b/external_libraries/vexcl/cmake/VexCLTools.cmake @@ -0,0 +1,58 @@ +# Distributed under the MIT License. +# Copyright (c) 2012-2018 Denis Demidov + +function(vexcl_add_executables targetname) + add_library(${targetname} INTERFACE) + + if(TARGET VexCL::OpenCL) + add_executable(${targetname}_cl ${ARGN}) + target_link_libraries(${targetname}_cl PUBLIC VexCL::OpenCL ${targetname}) + set_target_properties(${targetname}_cl PROPERTIES CXX_EXTENSIONS OFF) + endif() + if(TARGET VexCL::Compute) + add_executable(${targetname}_comp ${ARGN}) + target_link_libraries(${targetname}_comp PUBLIC VexCL::Compute ${targetname}) + set_target_properties(${targetname}_comp PROPERTIES CXX_EXTENSIONS OFF) + endif() + if(TARGET VexCL::CUDA) + add_executable(${targetname}_cuda ${ARGN}) + target_link_libraries(${targetname}_cuda PUBLIC VexCL::CUDA ${targetname}) + set_target_properties(${targetname}_cuda PROPERTIES CXX_EXTENSIONS OFF) + endif() + if(TARGET VexCL::JIT) + add_executable(${targetname}_jit ${ARGN}) + target_link_libraries(${targetname}_jit PUBLIC VexCL::JIT ${targetname}) + set_target_properties(${targetname}_jit PROPERTIES CXX_EXTENSIONS OFF) + endif() +endfunction() + +function(vexcl_add_libraries targetname typeoflib) + add_library(${targetname} INTERFACE) + add_library(${targetname}::Common ALIAS ${targetname}) + + if(TARGET VexCL::OpenCL) + add_library(${targetname}_cl ${TYPEOFLIB} ${ARGN}) + target_link_libraries(${targetname}_cl PUBLIC VexCL::OpenCL ${targetname}) + set_target_properties(${targetname}_cl PROPERTIES CXX_EXTENSIONS OFF) + add_library(${targetname}::OpenCl ALIAS ${targetname}_cl) + endif() + if(TARGET VexCL::Compute) + add_library(${targetname}_comp ${TYPEOFLIB} ${ARGN}) + target_link_libraries(${targetname}_comp PUBLIC VexCL::Compute ${targetname}) + set_target_properties(${targetname}_comp PROPERTIES CXX_EXTENSIONS OFF) + add_library(${targetname}::Compute ALIAS ${targetname}_comp) + endif() + if(TARGET VexCL::CUDA) + add_library(${targetname}_cuda ${TYPEOFLIB} ${ARGN}) + target_link_libraries(${targetname}_cuda PUBLIC VexCL::CUDA ${targetname}) + set_target_properties(${targetname}_cuda PROPERTIES CXX_EXTENSIONS OFF) + add_library(${targetname}::CUDA ALIAS ${targetname}_cuda) + endif() + if(TARGET VexCL::JIT) + add_library(${targetname}_jit ${TYPEOFLIB} ${ARGN}) + target_link_libraries(${targetname}_jit PUBLIC VexCL::JIT ${targetname}) + set_target_properties(${targetname}_jit PROPERTIES CXX_EXTENSIONS OFF) + add_library(${targetname}::JIT ALIAS ${targetname}_jit) + endif() +endfunction() + diff --git a/external_libraries/vexcl/docs/.gitignore b/external_libraries/vexcl/docs/.gitignore new file mode 100644 index 000000000000..5cc4098ec6e0 --- /dev/null +++ b/external_libraries/vexcl/docs/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +_build +xml diff --git a/external_libraries/vexcl/docs/CMakeLists.txt b/external_libraries/vexcl/docs/CMakeLists.txt new file mode 100644 index 000000000000..5bac857ac90d --- /dev/null +++ b/external_libraries/vexcl/docs/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 2.6) +project(docs) + +find_package(Doxygen QUIET) + +if (DOXYGEN_FOUND) + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/conf.py + ${CMAKE_CURRENT_BINARY_DIR}/conf.py + COPYONLY + ) + + add_custom_target(doxygen + COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Build Doxygen-generated documentation" + ) + + add_custom_target(doc + COMMAND sphinx-build -b html -c ${CMAKE_CURRENT_SOURCE_DIR} . ${CMAKE_CURRENT_BINARY_DIR}/html + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS doxygen + COMMENT "Generate HTML documentation" + ) +endif() diff --git a/external_libraries/vexcl/docs/Doxyfile b/external_libraries/vexcl/docs/Doxyfile new file mode 100644 index 000000000000..5605ea676057 --- /dev/null +++ b/external_libraries/vexcl/docs/Doxyfile @@ -0,0 +1,2407 @@ +# Doxyfile 1.8.11 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = VexCL + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "C++ vector expression template library for OpenCL/CUDA" + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = "rst=\verbatim embed:rst:leading-asterisks" +ALIASES += "endrst=\endverbatim" + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = YES + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = YES + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO, these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES, upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = YES + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = NO + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = NO + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = NO + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. +# The default value is: NO. + +WARN_AS_ERROR = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text " + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING +# Note: If this tag is empty the current directory is searched. + +INPUT = ../vexcl + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, +# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f, *.for, *.tcl, +# *.vhd, *.vhdl, *.ucf, *.qsf, *.as and *.js. + +FILE_PATTERNS = *.hpp + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = NO + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = NO + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 10 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = YES + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = hhc + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the master .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = CP1251 + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 12 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /