Skip to content

Commit

Permalink
Refining defines for CUDA compilation mode
Browse files Browse the repository at this point in the history
  • Loading branch information
sithhell committed Sep 5, 2017
1 parent 75c9475 commit 96f5de4
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 12 deletions.
26 changes: 22 additions & 4 deletions hpx/compute/cuda/allocator.hpp
Expand Up @@ -168,8 +168,14 @@ namespace hpx { namespace compute { namespace cuda
// Constructs count objects of type T in allocated uninitialized
// storage pointed to by p, using placement-new
template <typename ... Args>
void bulk_construct(pointer p, std::size_t count, Args &&... args)
HPX_HOST_DEVICE void bulk_construct(pointer p, std::size_t count, Args &&... args)
{
#if defined(HPX_COMPUTE_DEVICE_CODE)
for (std::size_t idx = 0; idx < count; ++idx)
{
::new (p + idx) T (std::forward<Args>(args)...);
}
#else
int threads_per_block = (std::min)(1024, int(count));
int num_blocks =
int((count + threads_per_block - 1) / threads_per_block);
Expand All @@ -186,13 +192,17 @@ namespace hpx { namespace compute { namespace cuda
},
p.device_ptr(), count, std::forward<Args>(args)...);
target_.synchronize();
#endif
}

// Constructs an object of type T in allocated uninitialized storage
// pointed to by p, using placement-new
template <typename ... Args>
void construct(pointer p, Args &&... args)
HPX_HOST_DEVICE void construct(pointer p, Args &&... args)
{
#if defined(HPX_COMPUTE_DEVICE_CODE)
::new (p) T (std::forward<Args>(args)...);
#else
detail::launch(
target_, 1, 1,
[] HPX_DEVICE (T* p, Args const&... args)
Expand All @@ -201,11 +211,18 @@ namespace hpx { namespace compute { namespace cuda
},
p.device_ptr(), std::forward<Args>(args)...);
target_.synchronize();
#endif
}

// Calls the destructor of count objects pointed to by p
void bulk_destroy(pointer p, std::size_t count)
HPX_HOST_DEVICE void bulk_destroy(pointer p, std::size_t count)
{
#if defined(HPX_COMPUTE_DEVICE_CODE)
for (std::size_t idx = 0; idx < count; ++idx)
{
(p + idx)->~T();
}
#else
int threads_per_block = (std::min)(1024, int(count));
int num_blocks =
int((count + threads_per_block) / threads_per_block) - 1;
Expand All @@ -222,10 +239,11 @@ namespace hpx { namespace compute { namespace cuda
},
p.device_ptr(), count);
target_.synchronize();
#endif
}

// Calls the destructor of the object pointed to by p
void destroy(pointer p)
HPX_HOST_DEVICE void destroy(pointer p)
{
bulk_destroy(p, 1);
}
Expand Down
2 changes: 1 addition & 1 deletion hpx/compute/cuda/detail/launch.hpp
Expand Up @@ -97,7 +97,7 @@ namespace hpx { namespace compute { namespace cuda { namespace detail
static_assert(sizeof(Closure) < 256,
"We currently require the closure to be less than 256 bytes");

#if !defined(HPX_COMPUTE_DEVICE_CODE)
#if defined(HPX_COMPUTE_HOST_CODE)
detail::scoped_active_target active(tgt);

launch_function<<<gridDim, blockDim, 0, active.stream()>>>(
Expand Down
31 changes: 24 additions & 7 deletions hpx/compute/cuda/target.hpp
Expand Up @@ -92,34 +92,49 @@ namespace hpx { namespace compute { namespace cuda

// Constructs default target
HPX_HOST_DEVICE target()
: handle_(), locality_(hpx::find_here())
: handle_()
#if !defined(HPX_COMPUTE_DEVICE_CODE)
, locality_(hpx::find_here())
#endif
{}

// Constructs target from a given device ID
explicit HPX_HOST_DEVICE target(int device)
: handle_(device), locality_(hpx::find_here())
: handle_(device)
#if !defined(HPX_COMPUTE_DEVICE_CODE)
, locality_(hpx::find_here())
#endif
{}

HPX_HOST_DEVICE target(hpx::id_type const& locality, int device)
: handle_(device), locality_(locality)
: handle_(device)
#if !defined(HPX_COMPUTE_DEVICE_CODE)
, locality_(locality)
#endif
{}

HPX_HOST_DEVICE target(target const& rhs) noexcept
: handle_(rhs.handle_),
locality_(rhs.locality_)
: handle_(rhs.handle_)
#if !defined(HPX_COMPUTE_DEVICE_CODE)
, locality_(rhs.locality_)
#endif
{}

HPX_HOST_DEVICE target(target && rhs) noexcept
: handle_(std::move(rhs.handle_)),
locality_(std::move(rhs.locality_))
: handle_(std::move(rhs.handle_))
#if !defined(HPX_COMPUTE_DEVICE_CODE)
, locality_(std::move(rhs.locality_))
#endif
{}

HPX_HOST_DEVICE target& operator=(target const& rhs) noexcept
{
if (&rhs != this)
{
handle_ = rhs.handle_;
#if !defined(HPX_COMPUTE_DEVICE_CODE)
locality_ = rhs.locality_;
#endif
}
return *this;
}
Expand All @@ -129,7 +144,9 @@ namespace hpx { namespace compute { namespace cuda
if (&rhs != this)
{
handle_ = std::move(rhs.handle_);
#if !defined(HPX_COMPUTE_DEVICE_CODE)
locality_ = std::move(rhs.locality_);
#endif
}
return *this;
}
Expand Down

0 comments on commit 96f5de4

Please sign in to comment.