17ec681f3Smrg//
27ec681f3Smrg// Copyright 2012 Francisco Jerez
37ec681f3Smrg//
47ec681f3Smrg// Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg// copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg// to deal in the Software without restriction, including without limitation
77ec681f3Smrg// the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg// and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg// Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg//
117ec681f3Smrg// The above copyright notice and this permission notice shall be included in
127ec681f3Smrg// all copies or substantial portions of the Software.
137ec681f3Smrg//
147ec681f3Smrg// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
157ec681f3Smrg// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
167ec681f3Smrg// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
177ec681f3Smrg// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
187ec681f3Smrg// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
197ec681f3Smrg// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
207ec681f3Smrg// OTHER DEALINGS IN THE SOFTWARE.
217ec681f3Smrg//
227ec681f3Smrg
237ec681f3Smrg#include <cstring>
247ec681f3Smrg
257ec681f3Smrg#include "util/bitscan.h"
267ec681f3Smrg
277ec681f3Smrg#include "api/dispatch.hpp"
287ec681f3Smrg#include "api/util.hpp"
297ec681f3Smrg#include "core/event.hpp"
307ec681f3Smrg#include "core/memory.hpp"
317ec681f3Smrg
327ec681f3Smrgusing namespace clover;
337ec681f3Smrg
347ec681f3Smrgnamespace {
357ec681f3Smrg   typedef resource::vector vector_t;
367ec681f3Smrg
377ec681f3Smrg   vector_t
387ec681f3Smrg   vector(const size_t *p) {
397ec681f3Smrg      if (!p)
407ec681f3Smrg         throw error(CL_INVALID_VALUE);
417ec681f3Smrg      return range(p, 3);
427ec681f3Smrg   }
437ec681f3Smrg
447ec681f3Smrg   vector_t
457ec681f3Smrg   pitch(const vector_t &region, vector_t pitch) {
467ec681f3Smrg      for (auto x : zip(tail(pitch),
477ec681f3Smrg                        map(multiplies(), region, pitch))) {
487ec681f3Smrg         // The spec defines a value of zero as the natural pitch,
497ec681f3Smrg         // i.e. the unaligned size of the previous dimension.
507ec681f3Smrg         if (std::get<0>(x) == 0)
517ec681f3Smrg            std::get<0>(x) = std::get<1>(x);
527ec681f3Smrg      }
537ec681f3Smrg
547ec681f3Smrg      return pitch;
557ec681f3Smrg   }
567ec681f3Smrg
577ec681f3Smrg   ///
587ec681f3Smrg   /// Size of a region in bytes.
597ec681f3Smrg   ///
607ec681f3Smrg   size_t
617ec681f3Smrg   size(const vector_t &pitch, const vector_t &region) {
627ec681f3Smrg      if (any_of(is_zero(), region))
637ec681f3Smrg         return 0;
647ec681f3Smrg      else
657ec681f3Smrg         return dot(pitch, region - vector_t{ 0, 1, 1 });
667ec681f3Smrg   }
677ec681f3Smrg
687ec681f3Smrg   ///
697ec681f3Smrg   /// Common argument checking shared by memory transfer commands.
707ec681f3Smrg   ///
717ec681f3Smrg   void
727ec681f3Smrg   validate_common(command_queue &q,
737ec681f3Smrg                   const ref_vector<event> &deps) {
747ec681f3Smrg      if (any_of([&](const event &ev) {
757ec681f3Smrg               return ev.context() != q.context();
767ec681f3Smrg            }, deps))
777ec681f3Smrg         throw error(CL_INVALID_CONTEXT);
787ec681f3Smrg   }
797ec681f3Smrg
807ec681f3Smrg   ///
817ec681f3Smrg   /// Common error checking for a buffer object argument.
827ec681f3Smrg   ///
837ec681f3Smrg   void
847ec681f3Smrg   validate_object(command_queue &q, buffer &mem, const vector_t &origin,
857ec681f3Smrg                   const vector_t &pitch, const vector_t &region) {
867ec681f3Smrg      if (mem.context() != q.context())
877ec681f3Smrg         throw error(CL_INVALID_CONTEXT);
887ec681f3Smrg
897ec681f3Smrg      // The region must fit within the specified pitch,
907ec681f3Smrg      if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
917ec681f3Smrg         throw error(CL_INVALID_VALUE);
927ec681f3Smrg
937ec681f3Smrg      // ...and within the specified object.
947ec681f3Smrg      if (dot(pitch, origin) + size(pitch, region) > mem.size())
957ec681f3Smrg         throw error(CL_INVALID_VALUE);
967ec681f3Smrg
977ec681f3Smrg      if (any_of(is_zero(), region))
987ec681f3Smrg         throw error(CL_INVALID_VALUE);
997ec681f3Smrg   }
1007ec681f3Smrg
1017ec681f3Smrg   ///
1027ec681f3Smrg   /// Common error checking for an image argument.
1037ec681f3Smrg   ///
1047ec681f3Smrg   void
1057ec681f3Smrg   validate_object(command_queue &q, image &img,
1067ec681f3Smrg                   const vector_t &orig, const vector_t &region) {
1077ec681f3Smrg      vector_t size = { img.width(), img.height(), img.depth() };
1087ec681f3Smrg      const auto &dev = q.device();
1097ec681f3Smrg
1107ec681f3Smrg      if (!dev.image_support())
1117ec681f3Smrg         throw error(CL_INVALID_OPERATION);
1127ec681f3Smrg
1137ec681f3Smrg      if (img.context() != q.context())
1147ec681f3Smrg         throw error(CL_INVALID_CONTEXT);
1157ec681f3Smrg
1167ec681f3Smrg      if (any_of(greater(), orig + region, size))
1177ec681f3Smrg         throw error(CL_INVALID_VALUE);
1187ec681f3Smrg
1197ec681f3Smrg      if (any_of(is_zero(), region))
1207ec681f3Smrg         throw error(CL_INVALID_VALUE);
1217ec681f3Smrg
1227ec681f3Smrg      switch (img.type()) {
1237ec681f3Smrg      case CL_MEM_OBJECT_IMAGE1D: {
1247ec681f3Smrg         const size_t max = dev.max_image_size();
1257ec681f3Smrg         if (img.width() > max)
1267ec681f3Smrg            throw error(CL_INVALID_IMAGE_SIZE);
1277ec681f3Smrg         break;
1287ec681f3Smrg      }
1297ec681f3Smrg      case CL_MEM_OBJECT_IMAGE2D: {
1307ec681f3Smrg         const size_t max = dev.max_image_size();
1317ec681f3Smrg         if (img.width() > max || img.height() > max)
1327ec681f3Smrg            throw error(CL_INVALID_IMAGE_SIZE);
1337ec681f3Smrg         break;
1347ec681f3Smrg      }
1357ec681f3Smrg      case CL_MEM_OBJECT_IMAGE3D: {
1367ec681f3Smrg         const size_t max = dev.max_image_size_3d();
1377ec681f3Smrg         if (img.width() > max || img.height() > max || img.depth() > max)
1387ec681f3Smrg            throw error(CL_INVALID_IMAGE_SIZE);
1397ec681f3Smrg         break;
1407ec681f3Smrg      }
1417ec681f3Smrg      // XXX: Implement missing checks once Clover supports more image types.
1427ec681f3Smrg      default:
1437ec681f3Smrg         throw error(CL_INVALID_IMAGE_SIZE);
1447ec681f3Smrg      }
1457ec681f3Smrg   }
1467ec681f3Smrg
1477ec681f3Smrg   ///
1487ec681f3Smrg   /// Common error checking for a host pointer argument.
1497ec681f3Smrg   ///
1507ec681f3Smrg   void
1517ec681f3Smrg   validate_object(command_queue &q, const void *ptr, const vector_t &orig,
1527ec681f3Smrg                   const vector_t &pitch, const vector_t &region) {
1537ec681f3Smrg      if (!ptr)
1547ec681f3Smrg         throw error(CL_INVALID_VALUE);
1557ec681f3Smrg
1567ec681f3Smrg      // The region must fit within the specified pitch.
1577ec681f3Smrg      if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
1587ec681f3Smrg         throw error(CL_INVALID_VALUE);
1597ec681f3Smrg   }
1607ec681f3Smrg
1617ec681f3Smrg   ///
1627ec681f3Smrg   /// Common argument checking for a copy between two buffer objects.
1637ec681f3Smrg   ///
1647ec681f3Smrg   void
1657ec681f3Smrg   validate_copy(command_queue &q, buffer &dst_mem,
1667ec681f3Smrg                 const vector_t &dst_orig, const vector_t &dst_pitch,
1677ec681f3Smrg                 buffer &src_mem,
1687ec681f3Smrg                 const vector_t &src_orig, const vector_t &src_pitch,
1697ec681f3Smrg                 const vector_t &region) {
1707ec681f3Smrg      if (dst_mem == src_mem) {
1717ec681f3Smrg         auto dst_offset = dot(dst_pitch, dst_orig);
1727ec681f3Smrg         auto src_offset = dot(src_pitch, src_orig);
1737ec681f3Smrg
1747ec681f3Smrg         if (interval_overlaps()(
1757ec681f3Smrg                dst_offset, dst_offset + size(dst_pitch, region),
1767ec681f3Smrg                src_offset, src_offset + size(src_pitch, region)))
1777ec681f3Smrg            throw error(CL_MEM_COPY_OVERLAP);
1787ec681f3Smrg      }
1797ec681f3Smrg   }
1807ec681f3Smrg
1817ec681f3Smrg   ///
1827ec681f3Smrg   /// Common argument checking for a copy between two image objects.
1837ec681f3Smrg   ///
1847ec681f3Smrg   void
1857ec681f3Smrg   validate_copy(command_queue &q,
1867ec681f3Smrg                 image &dst_img, const vector_t &dst_orig,
1877ec681f3Smrg                 image &src_img, const vector_t &src_orig,
1887ec681f3Smrg                 const vector_t &region) {
1897ec681f3Smrg      if (dst_img.format() != src_img.format())
1907ec681f3Smrg         throw error(CL_IMAGE_FORMAT_MISMATCH);
1917ec681f3Smrg
1927ec681f3Smrg      if (dst_img == src_img) {
1937ec681f3Smrg         if (all_of(interval_overlaps(),
1947ec681f3Smrg                    dst_orig, dst_orig + region,
1957ec681f3Smrg                    src_orig, src_orig + region))
1967ec681f3Smrg            throw error(CL_MEM_COPY_OVERLAP);
1977ec681f3Smrg      }
1987ec681f3Smrg   }
1997ec681f3Smrg
2007ec681f3Smrg   ///
2017ec681f3Smrg   /// Checks that the host access flags of the memory object are
2027ec681f3Smrg   /// within the allowed set \a flags.
2037ec681f3Smrg   ///
2047ec681f3Smrg   void
2057ec681f3Smrg   validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
2067ec681f3Smrg      if (mem.flags() & ~flags &
2077ec681f3Smrg          (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
2087ec681f3Smrg           CL_MEM_HOST_NO_ACCESS))
2097ec681f3Smrg         throw error(CL_INVALID_OPERATION);
2107ec681f3Smrg   }
2117ec681f3Smrg
2127ec681f3Smrg   ///
2137ec681f3Smrg   /// Checks that the mapping flags are correct.
2147ec681f3Smrg   ///
2157ec681f3Smrg   void
2167ec681f3Smrg   validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
2177ec681f3Smrg      if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
2187ec681f3Smrg          (flags & CL_MAP_WRITE_INVALIDATE_REGION))
2197ec681f3Smrg         throw error(CL_INVALID_VALUE);
2207ec681f3Smrg
2217ec681f3Smrg      if (flags & CL_MAP_READ)
2227ec681f3Smrg         validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
2237ec681f3Smrg
2247ec681f3Smrg      if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
2257ec681f3Smrg         validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
2267ec681f3Smrg   }
2277ec681f3Smrg
2287ec681f3Smrg   ///
2297ec681f3Smrg   /// Checks that the memory migration flags are correct.
2307ec681f3Smrg   ///
2317ec681f3Smrg   void
2327ec681f3Smrg   validate_mem_migration_flags(const cl_mem_migration_flags flags) {
2337ec681f3Smrg      const cl_mem_migration_flags valid =
2347ec681f3Smrg         CL_MIGRATE_MEM_OBJECT_HOST |
2357ec681f3Smrg         CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED;
2367ec681f3Smrg
2377ec681f3Smrg      if (flags & ~valid)
2387ec681f3Smrg         throw error(CL_INVALID_VALUE);
2397ec681f3Smrg   }
2407ec681f3Smrg
2417ec681f3Smrg   ///
2427ec681f3Smrg   /// Class that encapsulates the task of mapping an object of type
2437ec681f3Smrg   /// \a T.  The return value of get() should be implicitly
2447ec681f3Smrg   /// convertible to \a void *.
2457ec681f3Smrg   ///
2467ec681f3Smrg   template<typename T>
2477ec681f3Smrg   struct _map;
2487ec681f3Smrg
2497ec681f3Smrg   template<>
2507ec681f3Smrg   struct _map<image*> {
2517ec681f3Smrg      _map(command_queue &q, image *img, cl_map_flags flags,
2527ec681f3Smrg           vector_t offset, vector_t pitch, vector_t region) :
2537ec681f3Smrg         map(q, img->resource_in(q), flags, true, offset, region),
2547ec681f3Smrg         pitch(map.pitch())
2557ec681f3Smrg      { }
2567ec681f3Smrg
2577ec681f3Smrg      template<typename T>
2587ec681f3Smrg      operator T *() const {
2597ec681f3Smrg         return static_cast<T *>(map);
2607ec681f3Smrg      }
2617ec681f3Smrg
2627ec681f3Smrg      mapping map;
2637ec681f3Smrg      vector_t pitch;
2647ec681f3Smrg   };
2657ec681f3Smrg
2667ec681f3Smrg   template<>
2677ec681f3Smrg   struct _map<buffer*> {
2687ec681f3Smrg      _map(command_queue &q, buffer *mem, cl_map_flags flags,
2697ec681f3Smrg           vector_t offset, vector_t pitch, vector_t region) :
2707ec681f3Smrg         map(q, mem->resource_in(q), flags, true,
2717ec681f3Smrg             {{ dot(pitch, offset) }}, {{ size(pitch, region) }}),
2727ec681f3Smrg         pitch(pitch)
2737ec681f3Smrg      { }
2747ec681f3Smrg
2757ec681f3Smrg      template<typename T>
2767ec681f3Smrg      operator T *() const {
2777ec681f3Smrg         return static_cast<T *>(map);
2787ec681f3Smrg      }
2797ec681f3Smrg
2807ec681f3Smrg      mapping map;
2817ec681f3Smrg      vector_t pitch;
2827ec681f3Smrg   };
2837ec681f3Smrg
2847ec681f3Smrg   template<typename P>
2857ec681f3Smrg   struct _map<P *> {
2867ec681f3Smrg      _map(command_queue &q, P *ptr, cl_map_flags flags,
2877ec681f3Smrg           vector_t offset, vector_t pitch, vector_t region) :
2887ec681f3Smrg         ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch)
2897ec681f3Smrg      { }
2907ec681f3Smrg
2917ec681f3Smrg      template<typename T>
2927ec681f3Smrg      operator T *() const {
2937ec681f3Smrg         return static_cast<T *>(ptr);
2947ec681f3Smrg      }
2957ec681f3Smrg
2967ec681f3Smrg      P *ptr;
2977ec681f3Smrg      vector_t pitch;
2987ec681f3Smrg   };
2997ec681f3Smrg
3007ec681f3Smrg   ///
3017ec681f3Smrg   /// Software copy from \a src_obj to \a dst_obj.  They can be
3027ec681f3Smrg   /// either pointers or memory objects.
3037ec681f3Smrg   ///
3047ec681f3Smrg   template<typename T, typename S>
3057ec681f3Smrg   std::function<void (event &)>
3067ec681f3Smrg   soft_copy_op(command_queue &q,
3077ec681f3Smrg                T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
3087ec681f3Smrg                S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
3097ec681f3Smrg                const vector_t &region) {
3107ec681f3Smrg      return [=, &q](event &) {
3117ec681f3Smrg         _map<T> dst = { q, dst_obj, CL_MAP_WRITE,
3127ec681f3Smrg                         dst_orig, dst_pitch, region };
3137ec681f3Smrg         _map<S> src = { q, src_obj, CL_MAP_READ,
3147ec681f3Smrg                         src_orig, src_pitch, region };
3157ec681f3Smrg         assert(src.pitch[0] == dst.pitch[0]);
3167ec681f3Smrg         vector_t v = {};
3177ec681f3Smrg
3187ec681f3Smrg         for (v[2] = 0; v[2] < region[2]; ++v[2]) {
3197ec681f3Smrg            for (v[1] = 0; v[1] < region[1]; ++v[1]) {
3207ec681f3Smrg               std::memcpy(
3217ec681f3Smrg                  static_cast<char *>(dst) + dot(dst.pitch, v),
3227ec681f3Smrg                  static_cast<const char *>(src) + dot(src.pitch, v),
3237ec681f3Smrg                  src.pitch[0] * region[0]);
3247ec681f3Smrg            }
3257ec681f3Smrg         }
3267ec681f3Smrg      };
3277ec681f3Smrg   }
3287ec681f3Smrg
3297ec681f3Smrg   ///
3307ec681f3Smrg   /// Hardware copy from \a src_obj to \a dst_obj.
3317ec681f3Smrg   ///
3327ec681f3Smrg   template<typename T, typename S>
3337ec681f3Smrg   std::function<void (event &)>
3347ec681f3Smrg   hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
3357ec681f3Smrg                S src_obj, const vector_t &src_orig, const vector_t &region) {
3367ec681f3Smrg      return [=, &q](event &) {
3377ec681f3Smrg         dst_obj->resource_in(q).copy(q, dst_orig, region,
3387ec681f3Smrg                                      src_obj->resource_in(q), src_orig);
3397ec681f3Smrg      };
3407ec681f3Smrg   }
3417ec681f3Smrg}
3427ec681f3Smrg
3437ec681f3SmrgCLOVER_API cl_int
3447ec681f3SmrgclEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
3457ec681f3Smrg                    size_t offset, size_t size, void *ptr,
3467ec681f3Smrg                    cl_uint num_deps, const cl_event *d_deps,
3477ec681f3Smrg                    cl_event *rd_ev) try {
3487ec681f3Smrg   auto &q = obj(d_q);
3497ec681f3Smrg   auto &mem = obj<buffer>(d_mem);
3507ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
3517ec681f3Smrg   vector_t region = { size, 1, 1 };
3527ec681f3Smrg   vector_t obj_origin = { offset };
3537ec681f3Smrg   auto obj_pitch = pitch(region, {{ 1 }});
3547ec681f3Smrg
3557ec681f3Smrg   validate_common(q, deps);
3567ec681f3Smrg   validate_object(q, ptr, {}, obj_pitch, region);
3577ec681f3Smrg   validate_object(q, mem, obj_origin, obj_pitch, region);
3587ec681f3Smrg   validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
3597ec681f3Smrg
3607ec681f3Smrg   auto hev = create<hard_event>(
3617ec681f3Smrg      q, CL_COMMAND_READ_BUFFER, deps,
3627ec681f3Smrg      soft_copy_op(q, ptr, {}, obj_pitch,
3637ec681f3Smrg                   &mem, obj_origin, obj_pitch,
3647ec681f3Smrg                   region));
3657ec681f3Smrg
3667ec681f3Smrg   if (blocking)
3677ec681f3Smrg       hev().wait_signalled();
3687ec681f3Smrg
3697ec681f3Smrg   ret_object(rd_ev, hev);
3707ec681f3Smrg   return CL_SUCCESS;
3717ec681f3Smrg
3727ec681f3Smrg} catch (error &e) {
3737ec681f3Smrg   return e.get();
3747ec681f3Smrg}
3757ec681f3Smrg
3767ec681f3SmrgCLOVER_API cl_int
3777ec681f3SmrgclEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
3787ec681f3Smrg                     size_t offset, size_t size, const void *ptr,
3797ec681f3Smrg                     cl_uint num_deps, const cl_event *d_deps,
3807ec681f3Smrg                     cl_event *rd_ev) try {
3817ec681f3Smrg   auto &q = obj(d_q);
3827ec681f3Smrg   auto &mem = obj<buffer>(d_mem);
3837ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
3847ec681f3Smrg   vector_t region = { size, 1, 1 };
3857ec681f3Smrg   vector_t obj_origin = { offset };
3867ec681f3Smrg   auto obj_pitch = pitch(region, {{ 1 }});
3877ec681f3Smrg
3887ec681f3Smrg   validate_common(q, deps);
3897ec681f3Smrg   validate_object(q, mem, obj_origin, obj_pitch, region);
3907ec681f3Smrg   validate_object(q, ptr, {}, obj_pitch, region);
3917ec681f3Smrg   validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
3927ec681f3Smrg
3937ec681f3Smrg   auto hev = create<hard_event>(
3947ec681f3Smrg      q, CL_COMMAND_WRITE_BUFFER, deps,
3957ec681f3Smrg      soft_copy_op(q, &mem, obj_origin, obj_pitch,
3967ec681f3Smrg                   ptr, {}, obj_pitch,
3977ec681f3Smrg                   region));
3987ec681f3Smrg
3997ec681f3Smrg   if (blocking)
4007ec681f3Smrg       hev().wait_signalled();
4017ec681f3Smrg
4027ec681f3Smrg   ret_object(rd_ev, hev);
4037ec681f3Smrg   return CL_SUCCESS;
4047ec681f3Smrg
4057ec681f3Smrg} catch (error &e) {
4067ec681f3Smrg   return e.get();
4077ec681f3Smrg}
4087ec681f3Smrg
4097ec681f3SmrgCLOVER_API cl_int
4107ec681f3SmrgclEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
4117ec681f3Smrg                        const size_t *p_obj_origin,
4127ec681f3Smrg                        const size_t *p_host_origin,
4137ec681f3Smrg                        const size_t *p_region,
4147ec681f3Smrg                        size_t obj_row_pitch, size_t obj_slice_pitch,
4157ec681f3Smrg                        size_t host_row_pitch, size_t host_slice_pitch,
4167ec681f3Smrg                        void *ptr,
4177ec681f3Smrg                        cl_uint num_deps, const cl_event *d_deps,
4187ec681f3Smrg                        cl_event *rd_ev) try {
4197ec681f3Smrg   auto &q = obj(d_q);
4207ec681f3Smrg   auto &mem = obj<buffer>(d_mem);
4217ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
4227ec681f3Smrg   auto region = vector(p_region);
4237ec681f3Smrg   auto obj_origin = vector(p_obj_origin);
4247ec681f3Smrg   auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
4257ec681f3Smrg   auto host_origin = vector(p_host_origin);
4267ec681f3Smrg   auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
4277ec681f3Smrg
4287ec681f3Smrg   validate_common(q, deps);
4297ec681f3Smrg   validate_object(q, ptr, host_origin, host_pitch, region);
4307ec681f3Smrg   validate_object(q, mem, obj_origin, obj_pitch, region);
4317ec681f3Smrg   validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
4327ec681f3Smrg
4337ec681f3Smrg   auto hev = create<hard_event>(
4347ec681f3Smrg      q, CL_COMMAND_READ_BUFFER_RECT, deps,
4357ec681f3Smrg      soft_copy_op(q, ptr, host_origin, host_pitch,
4367ec681f3Smrg                   &mem, obj_origin, obj_pitch,
4377ec681f3Smrg                   region));
4387ec681f3Smrg
4397ec681f3Smrg   if (blocking)
4407ec681f3Smrg       hev().wait_signalled();
4417ec681f3Smrg
4427ec681f3Smrg   ret_object(rd_ev, hev);
4437ec681f3Smrg   return CL_SUCCESS;
4447ec681f3Smrg
4457ec681f3Smrg} catch (error &e) {
4467ec681f3Smrg   return e.get();
4477ec681f3Smrg}
4487ec681f3Smrg
4497ec681f3SmrgCLOVER_API cl_int
4507ec681f3SmrgclEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
4517ec681f3Smrg                         const size_t *p_obj_origin,
4527ec681f3Smrg                         const size_t *p_host_origin,
4537ec681f3Smrg                         const size_t *p_region,
4547ec681f3Smrg                         size_t obj_row_pitch, size_t obj_slice_pitch,
4557ec681f3Smrg                         size_t host_row_pitch, size_t host_slice_pitch,
4567ec681f3Smrg                         const void *ptr,
4577ec681f3Smrg                         cl_uint num_deps, const cl_event *d_deps,
4587ec681f3Smrg                         cl_event *rd_ev) try {
4597ec681f3Smrg   auto &q = obj(d_q);
4607ec681f3Smrg   auto &mem = obj<buffer>(d_mem);
4617ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
4627ec681f3Smrg   auto region = vector(p_region);
4637ec681f3Smrg   auto obj_origin = vector(p_obj_origin);
4647ec681f3Smrg   auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
4657ec681f3Smrg   auto host_origin = vector(p_host_origin);
4667ec681f3Smrg   auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
4677ec681f3Smrg
4687ec681f3Smrg   validate_common(q, deps);
4697ec681f3Smrg   validate_object(q, mem, obj_origin, obj_pitch, region);
4707ec681f3Smrg   validate_object(q, ptr, host_origin, host_pitch, region);
4717ec681f3Smrg   validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
4727ec681f3Smrg
4737ec681f3Smrg   auto hev = create<hard_event>(
4747ec681f3Smrg      q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
4757ec681f3Smrg      soft_copy_op(q, &mem, obj_origin, obj_pitch,
4767ec681f3Smrg                   ptr, host_origin, host_pitch,
4777ec681f3Smrg                   region));
4787ec681f3Smrg
4797ec681f3Smrg   if (blocking)
4807ec681f3Smrg       hev().wait_signalled();
4817ec681f3Smrg
4827ec681f3Smrg   ret_object(rd_ev, hev);
4837ec681f3Smrg   return CL_SUCCESS;
4847ec681f3Smrg
4857ec681f3Smrg} catch (error &e) {
4867ec681f3Smrg   return e.get();
4877ec681f3Smrg}
4887ec681f3Smrg
4897ec681f3SmrgCLOVER_API cl_int
4907ec681f3SmrgclEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,
4917ec681f3Smrg                    const void *pattern, size_t pattern_size,
4927ec681f3Smrg                    size_t offset, size_t size,
4937ec681f3Smrg                    cl_uint num_deps, const cl_event *d_deps,
4947ec681f3Smrg                    cl_event *rd_ev) try {
4957ec681f3Smrg   auto &q = obj(d_queue);
4967ec681f3Smrg   auto &mem = obj<buffer>(d_mem);
4977ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
4987ec681f3Smrg   vector_t region = { size, 1, 1 };
4997ec681f3Smrg   vector_t origin = { offset };
5007ec681f3Smrg   auto dst_pitch = pitch(region, {{ 1 }});
5017ec681f3Smrg
5027ec681f3Smrg   validate_common(q, deps);
5037ec681f3Smrg   validate_object(q, mem, origin, dst_pitch, region);
5047ec681f3Smrg
5057ec681f3Smrg   if (!pattern)
5067ec681f3Smrg      return CL_INVALID_VALUE;
5077ec681f3Smrg
5087ec681f3Smrg   if (!util_is_power_of_two_nonzero(pattern_size) ||
5097ec681f3Smrg      pattern_size > 128 || size % pattern_size
5107ec681f3Smrg      || offset % pattern_size) {
5117ec681f3Smrg      return CL_INVALID_VALUE;
5127ec681f3Smrg   }
5137ec681f3Smrg
5147ec681f3Smrg   auto sub = dynamic_cast<sub_buffer *>(&mem);
5157ec681f3Smrg   if (sub && sub->offset() % q.device().mem_base_addr_align()) {
5167ec681f3Smrg      return CL_MISALIGNED_SUB_BUFFER_OFFSET;
5177ec681f3Smrg   }
5187ec681f3Smrg
5197ec681f3Smrg   std::string data = std::string((char *)pattern, pattern_size);
5207ec681f3Smrg   auto hev = create<hard_event>(
5217ec681f3Smrg      q, CL_COMMAND_FILL_BUFFER, deps,
5227ec681f3Smrg      [=, &q, &mem](event &) {
5237ec681f3Smrg         mem.resource_in(q).clear(q, origin, region, data);
5247ec681f3Smrg      });
5257ec681f3Smrg
5267ec681f3Smrg   ret_object(rd_ev, hev);
5277ec681f3Smrg   return CL_SUCCESS;
5287ec681f3Smrg
5297ec681f3Smrg} catch (error &e) {
5307ec681f3Smrg   return e.get();
5317ec681f3Smrg}
5327ec681f3Smrg
5337ec681f3SmrgCLOVER_API cl_int
5347ec681f3SmrgclEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
5357ec681f3Smrg                    size_t src_offset, size_t dst_offset, size_t size,
5367ec681f3Smrg                    cl_uint num_deps, const cl_event *d_deps,
5377ec681f3Smrg                    cl_event *rd_ev) try {
5387ec681f3Smrg   auto &q = obj(d_q);
5397ec681f3Smrg   auto &src_mem = obj<buffer>(d_src_mem);
5407ec681f3Smrg   auto &dst_mem = obj<buffer>(d_dst_mem);
5417ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
5427ec681f3Smrg   vector_t region = { size, 1, 1 };
5437ec681f3Smrg   vector_t dst_origin = { dst_offset };
5447ec681f3Smrg   auto dst_pitch = pitch(region, {{ 1 }});
5457ec681f3Smrg   vector_t src_origin = { src_offset };
5467ec681f3Smrg   auto src_pitch = pitch(region, {{ 1 }});
5477ec681f3Smrg
5487ec681f3Smrg   validate_common(q, deps);
5497ec681f3Smrg   validate_object(q, dst_mem, dst_origin, dst_pitch, region);
5507ec681f3Smrg   validate_object(q, src_mem, src_origin, src_pitch, region);
5517ec681f3Smrg   validate_copy(q, dst_mem, dst_origin, dst_pitch,
5527ec681f3Smrg                 src_mem, src_origin, src_pitch, region);
5537ec681f3Smrg
5547ec681f3Smrg   auto hev = create<hard_event>(
5557ec681f3Smrg      q, CL_COMMAND_COPY_BUFFER, deps,
5567ec681f3Smrg      hard_copy_op(q, &dst_mem, dst_origin,
5577ec681f3Smrg                   &src_mem, src_origin, region));
5587ec681f3Smrg
5597ec681f3Smrg   ret_object(rd_ev, hev);
5607ec681f3Smrg   return CL_SUCCESS;
5617ec681f3Smrg
5627ec681f3Smrg} catch (error &e) {
5637ec681f3Smrg   return e.get();
5647ec681f3Smrg}
5657ec681f3Smrg
5667ec681f3SmrgCLOVER_API cl_int
5677ec681f3SmrgclEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
5687ec681f3Smrg                        cl_mem d_dst_mem,
5697ec681f3Smrg                        const size_t *p_src_origin, const size_t *p_dst_origin,
5707ec681f3Smrg                        const size_t *p_region,
5717ec681f3Smrg                        size_t src_row_pitch, size_t src_slice_pitch,
5727ec681f3Smrg                        size_t dst_row_pitch, size_t dst_slice_pitch,
5737ec681f3Smrg                        cl_uint num_deps, const cl_event *d_deps,
5747ec681f3Smrg                        cl_event *rd_ev) try {
5757ec681f3Smrg   auto &q = obj(d_q);
5767ec681f3Smrg   auto &src_mem = obj<buffer>(d_src_mem);
5777ec681f3Smrg   auto &dst_mem = obj<buffer>(d_dst_mem);
5787ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
5797ec681f3Smrg   auto region = vector(p_region);
5807ec681f3Smrg   auto dst_origin = vector(p_dst_origin);
5817ec681f3Smrg   auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
5827ec681f3Smrg   auto src_origin = vector(p_src_origin);
5837ec681f3Smrg   auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
5847ec681f3Smrg
5857ec681f3Smrg   validate_common(q, deps);
5867ec681f3Smrg   validate_object(q, dst_mem, dst_origin, dst_pitch, region);
5877ec681f3Smrg   validate_object(q, src_mem, src_origin, src_pitch, region);
5887ec681f3Smrg   validate_copy(q, dst_mem, dst_origin, dst_pitch,
5897ec681f3Smrg                 src_mem, src_origin, src_pitch, region);
5907ec681f3Smrg
5917ec681f3Smrg   auto hev = create<hard_event>(
5927ec681f3Smrg      q, CL_COMMAND_COPY_BUFFER_RECT, deps,
5937ec681f3Smrg      soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
5947ec681f3Smrg                   &src_mem, src_origin, src_pitch,
5957ec681f3Smrg                   region));
5967ec681f3Smrg
5977ec681f3Smrg   ret_object(rd_ev, hev);
5987ec681f3Smrg   return CL_SUCCESS;
5997ec681f3Smrg
6007ec681f3Smrg} catch (error &e) {
6017ec681f3Smrg   return e.get();
6027ec681f3Smrg}
6037ec681f3Smrg
6047ec681f3SmrgCLOVER_API cl_int
6057ec681f3SmrgclEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
6067ec681f3Smrg                   const size_t *p_origin, const size_t *p_region,
6077ec681f3Smrg                   size_t row_pitch, size_t slice_pitch, void *ptr,
6087ec681f3Smrg                   cl_uint num_deps, const cl_event *d_deps,
6097ec681f3Smrg                   cl_event *rd_ev) try {
6107ec681f3Smrg   auto &q = obj(d_q);
6117ec681f3Smrg   auto &img = obj<image>(d_mem);
6127ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
6137ec681f3Smrg   auto region = vector(p_region);
6147ec681f3Smrg   auto dst_pitch = pitch(region, {{ img.pixel_size(),
6157ec681f3Smrg                                     row_pitch, slice_pitch }});
6167ec681f3Smrg   auto src_origin = vector(p_origin);
6177ec681f3Smrg   auto src_pitch = pitch(region, {{ img.pixel_size(),
6187ec681f3Smrg                                     img.row_pitch(), img.slice_pitch() }});
6197ec681f3Smrg
6207ec681f3Smrg   validate_common(q, deps);
6217ec681f3Smrg   validate_object(q, ptr, {}, dst_pitch, region);
6227ec681f3Smrg   validate_object(q, img, src_origin, region);
6237ec681f3Smrg   validate_object_access(img, CL_MEM_HOST_READ_ONLY);
6247ec681f3Smrg
6257ec681f3Smrg   auto hev = create<hard_event>(
6267ec681f3Smrg      q, CL_COMMAND_READ_IMAGE, deps,
6277ec681f3Smrg      soft_copy_op(q, ptr, {}, dst_pitch,
6287ec681f3Smrg                   &img, src_origin, src_pitch,
6297ec681f3Smrg                   region));
6307ec681f3Smrg
6317ec681f3Smrg   if (blocking)
6327ec681f3Smrg       hev().wait_signalled();
6337ec681f3Smrg
6347ec681f3Smrg   ret_object(rd_ev, hev);
6357ec681f3Smrg   return CL_SUCCESS;
6367ec681f3Smrg
6377ec681f3Smrg} catch (error &e) {
6387ec681f3Smrg   return e.get();
6397ec681f3Smrg}
6407ec681f3Smrg
6417ec681f3SmrgCLOVER_API cl_int
6427ec681f3SmrgclEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
6437ec681f3Smrg                    const size_t *p_origin, const size_t *p_region,
6447ec681f3Smrg                    size_t row_pitch, size_t slice_pitch, const void *ptr,
6457ec681f3Smrg                    cl_uint num_deps, const cl_event *d_deps,
6467ec681f3Smrg                    cl_event *rd_ev) try {
6477ec681f3Smrg   auto &q = obj(d_q);
6487ec681f3Smrg   auto &img = obj<image>(d_mem);
6497ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
6507ec681f3Smrg   auto region = vector(p_region);
6517ec681f3Smrg   auto dst_origin = vector(p_origin);
6527ec681f3Smrg   auto dst_pitch = pitch(region, {{ img.pixel_size(),
6537ec681f3Smrg                                     img.row_pitch(), img.slice_pitch() }});
6547ec681f3Smrg   auto src_pitch = pitch(region, {{ img.pixel_size(),
6557ec681f3Smrg                                     row_pitch, slice_pitch }});
6567ec681f3Smrg
6577ec681f3Smrg   validate_common(q, deps);
6587ec681f3Smrg   validate_object(q, img, dst_origin, region);
6597ec681f3Smrg   validate_object(q, ptr, {}, src_pitch, region);
6607ec681f3Smrg   validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
6617ec681f3Smrg
6627ec681f3Smrg   auto hev = create<hard_event>(
6637ec681f3Smrg      q, CL_COMMAND_WRITE_IMAGE, deps,
6647ec681f3Smrg      soft_copy_op(q, &img, dst_origin, dst_pitch,
6657ec681f3Smrg                   ptr, {}, src_pitch,
6667ec681f3Smrg                   region));
6677ec681f3Smrg
6687ec681f3Smrg   if (blocking)
6697ec681f3Smrg       hev().wait_signalled();
6707ec681f3Smrg
6717ec681f3Smrg   ret_object(rd_ev, hev);
6727ec681f3Smrg   return CL_SUCCESS;
6737ec681f3Smrg
6747ec681f3Smrg} catch (error &e) {
6757ec681f3Smrg   return e.get();
6767ec681f3Smrg}
6777ec681f3Smrg
6787ec681f3SmrgCLOVER_API cl_int
6797ec681f3SmrgclEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem,
6807ec681f3Smrg                   const void *fill_color,
6817ec681f3Smrg                   const size_t *p_origin, const size_t *p_region,
6827ec681f3Smrg                   cl_uint num_deps, const cl_event *d_deps,
6837ec681f3Smrg                   cl_event *rd_ev) try {
6847ec681f3Smrg   auto &q = obj(d_queue);
6857ec681f3Smrg   auto &img = obj<image>(d_mem);
6867ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
6877ec681f3Smrg   auto origin = vector(p_origin);
6887ec681f3Smrg   auto region = vector(p_region);
6897ec681f3Smrg
6907ec681f3Smrg   validate_common(q, deps);
6917ec681f3Smrg   validate_object(q, img, origin, region);
6927ec681f3Smrg
6937ec681f3Smrg   if (!fill_color)
6947ec681f3Smrg      return CL_INVALID_VALUE;
6957ec681f3Smrg
6967ec681f3Smrg   std::string data = std::string((char *)fill_color, sizeof(cl_uint4));
6977ec681f3Smrg   auto hev = create<hard_event>(
6987ec681f3Smrg      q, CL_COMMAND_FILL_IMAGE, deps,
6997ec681f3Smrg      [=, &q, &img](event &) {
7007ec681f3Smrg         img.resource_in(q).clear(q, origin, region, data);
7017ec681f3Smrg      });
7027ec681f3Smrg
7037ec681f3Smrg   ret_object(rd_ev, hev);
7047ec681f3Smrg   return CL_SUCCESS;
7057ec681f3Smrg
7067ec681f3Smrg} catch (error &e) {
7077ec681f3Smrg   return e.get();
7087ec681f3Smrg}
7097ec681f3Smrg
7107ec681f3SmrgCLOVER_API cl_int
7117ec681f3SmrgclEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
7127ec681f3Smrg                   const size_t *p_src_origin, const size_t *p_dst_origin,
7137ec681f3Smrg                   const size_t *p_region,
7147ec681f3Smrg                   cl_uint num_deps, const cl_event *d_deps,
7157ec681f3Smrg                   cl_event *rd_ev) try {
7167ec681f3Smrg   auto &q = obj(d_q);
7177ec681f3Smrg   auto &src_img = obj<image>(d_src_mem);
7187ec681f3Smrg   auto &dst_img = obj<image>(d_dst_mem);
7197ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
7207ec681f3Smrg   auto region = vector(p_region);
7217ec681f3Smrg   auto dst_origin = vector(p_dst_origin);
7227ec681f3Smrg   auto src_origin = vector(p_src_origin);
7237ec681f3Smrg
7247ec681f3Smrg   validate_common(q, deps);
7257ec681f3Smrg   validate_object(q, dst_img, dst_origin, region);
7267ec681f3Smrg   validate_object(q, src_img, src_origin, region);
7277ec681f3Smrg   validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
7287ec681f3Smrg
7297ec681f3Smrg   auto hev = create<hard_event>(
7307ec681f3Smrg      q, CL_COMMAND_COPY_IMAGE, deps,
7317ec681f3Smrg      hard_copy_op(q, &dst_img, dst_origin,
7327ec681f3Smrg                   &src_img, src_origin,
7337ec681f3Smrg                   region));
7347ec681f3Smrg
7357ec681f3Smrg   ret_object(rd_ev, hev);
7367ec681f3Smrg   return CL_SUCCESS;
7377ec681f3Smrg
7387ec681f3Smrg} catch (error &e) {
7397ec681f3Smrg   return e.get();
7407ec681f3Smrg}
7417ec681f3Smrg
7427ec681f3SmrgCLOVER_API cl_int
7437ec681f3SmrgclEnqueueCopyImageToBuffer(cl_command_queue d_q,
7447ec681f3Smrg                           cl_mem d_src_mem, cl_mem d_dst_mem,
7457ec681f3Smrg                           const size_t *p_src_origin, const size_t *p_region,
7467ec681f3Smrg                           size_t dst_offset,
7477ec681f3Smrg                           cl_uint num_deps, const cl_event *d_deps,
7487ec681f3Smrg                           cl_event *rd_ev) try {
7497ec681f3Smrg   auto &q = obj(d_q);
7507ec681f3Smrg   auto &src_img = obj<image>(d_src_mem);
7517ec681f3Smrg   auto &dst_mem = obj<buffer>(d_dst_mem);
7527ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
7537ec681f3Smrg   auto region = vector(p_region);
7547ec681f3Smrg   vector_t dst_origin = { dst_offset };
7557ec681f3Smrg   auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
7567ec681f3Smrg   auto src_origin = vector(p_src_origin);
7577ec681f3Smrg   auto src_pitch = pitch(region, {{ src_img.pixel_size(),
7587ec681f3Smrg                                     src_img.row_pitch(),
7597ec681f3Smrg                                     src_img.slice_pitch() }});
7607ec681f3Smrg
7617ec681f3Smrg   validate_common(q, deps);
7627ec681f3Smrg   validate_object(q, dst_mem, dst_origin, dst_pitch, region);
7637ec681f3Smrg   validate_object(q, src_img, src_origin, region);
7647ec681f3Smrg
7657ec681f3Smrg   auto hev = create<hard_event>(
7667ec681f3Smrg      q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
7677ec681f3Smrg      soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
7687ec681f3Smrg                   &src_img, src_origin, src_pitch,
7697ec681f3Smrg                   region));
7707ec681f3Smrg
7717ec681f3Smrg   ret_object(rd_ev, hev);
7727ec681f3Smrg   return CL_SUCCESS;
7737ec681f3Smrg
7747ec681f3Smrg} catch (error &e) {
7757ec681f3Smrg   return e.get();
7767ec681f3Smrg}
7777ec681f3Smrg
7787ec681f3SmrgCLOVER_API cl_int
7797ec681f3SmrgclEnqueueCopyBufferToImage(cl_command_queue d_q,
7807ec681f3Smrg                           cl_mem d_src_mem, cl_mem d_dst_mem,
7817ec681f3Smrg                           size_t src_offset,
7827ec681f3Smrg                           const size_t *p_dst_origin, const size_t *p_region,
7837ec681f3Smrg                           cl_uint num_deps, const cl_event *d_deps,
7847ec681f3Smrg                           cl_event *rd_ev) try {
7857ec681f3Smrg   auto &q = obj(d_q);
7867ec681f3Smrg   auto &src_mem = obj<buffer>(d_src_mem);
7877ec681f3Smrg   auto &dst_img = obj<image>(d_dst_mem);
7887ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
7897ec681f3Smrg   auto region = vector(p_region);
7907ec681f3Smrg   auto dst_origin = vector(p_dst_origin);
7917ec681f3Smrg   auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
7927ec681f3Smrg                                     dst_img.row_pitch(),
7937ec681f3Smrg                                     dst_img.slice_pitch() }});
7947ec681f3Smrg   vector_t src_origin = { src_offset };
7957ec681f3Smrg   auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
7967ec681f3Smrg
7977ec681f3Smrg   validate_common(q, deps);
7987ec681f3Smrg   validate_object(q, dst_img, dst_origin, region);
7997ec681f3Smrg   validate_object(q, src_mem, src_origin, src_pitch, region);
8007ec681f3Smrg
8017ec681f3Smrg   auto hev = create<hard_event>(
8027ec681f3Smrg      q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
8037ec681f3Smrg      soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
8047ec681f3Smrg                   &src_mem, src_origin, src_pitch,
8057ec681f3Smrg                   region));
8067ec681f3Smrg
8077ec681f3Smrg   ret_object(rd_ev, hev);
8087ec681f3Smrg   return CL_SUCCESS;
8097ec681f3Smrg
8107ec681f3Smrg} catch (error &e) {
8117ec681f3Smrg   return e.get();
8127ec681f3Smrg}
8137ec681f3Smrg
8147ec681f3SmrgCLOVER_API void *
8157ec681f3SmrgclEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
8167ec681f3Smrg                   cl_map_flags flags, size_t offset, size_t size,
8177ec681f3Smrg                   cl_uint num_deps, const cl_event *d_deps,
8187ec681f3Smrg                   cl_event *rd_ev, cl_int *r_errcode) try {
8197ec681f3Smrg   auto &q = obj(d_q);
8207ec681f3Smrg   auto &mem = obj<buffer>(d_mem);
8217ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
8227ec681f3Smrg   vector_t region = { size, 1, 1 };
8237ec681f3Smrg   vector_t obj_origin = { offset };
8247ec681f3Smrg   auto obj_pitch = pitch(region, {{ 1 }});
8257ec681f3Smrg
8267ec681f3Smrg   validate_common(q, deps);
8277ec681f3Smrg   validate_object(q, mem, obj_origin, obj_pitch, region);
8287ec681f3Smrg   validate_map_flags(mem, flags);
8297ec681f3Smrg
8307ec681f3Smrg   auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region);
8317ec681f3Smrg
8327ec681f3Smrg   auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
8337ec681f3Smrg   if (blocking)
8347ec681f3Smrg       hev().wait_signalled();
8357ec681f3Smrg
8367ec681f3Smrg   ret_object(rd_ev, hev);
8377ec681f3Smrg   ret_error(r_errcode, CL_SUCCESS);
8387ec681f3Smrg   return *map;
8397ec681f3Smrg
8407ec681f3Smrg} catch (error &e) {
8417ec681f3Smrg   ret_error(r_errcode, e);
8427ec681f3Smrg   return NULL;
8437ec681f3Smrg}
8447ec681f3Smrg
8457ec681f3SmrgCLOVER_API void *
8467ec681f3SmrgclEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
8477ec681f3Smrg                  cl_map_flags flags,
8487ec681f3Smrg                  const size_t *p_origin, const size_t *p_region,
8497ec681f3Smrg                  size_t *row_pitch, size_t *slice_pitch,
8507ec681f3Smrg                  cl_uint num_deps, const cl_event *d_deps,
8517ec681f3Smrg                  cl_event *rd_ev, cl_int *r_errcode) try {
8527ec681f3Smrg   auto &q = obj(d_q);
8537ec681f3Smrg   auto &img = obj<image>(d_mem);
8547ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
8557ec681f3Smrg   auto region = vector(p_region);
8567ec681f3Smrg   auto origin = vector(p_origin);
8577ec681f3Smrg
8587ec681f3Smrg   validate_common(q, deps);
8597ec681f3Smrg   validate_object(q, img, origin, region);
8607ec681f3Smrg   validate_map_flags(img, flags);
8617ec681f3Smrg
8627ec681f3Smrg   if (!row_pitch)
8637ec681f3Smrg      throw error(CL_INVALID_VALUE);
8647ec681f3Smrg
8657ec681f3Smrg   if (img.slice_pitch() && !slice_pitch)
8667ec681f3Smrg      throw error(CL_INVALID_VALUE);
8677ec681f3Smrg
8687ec681f3Smrg   auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region);
8697ec681f3Smrg   *row_pitch = map->pitch()[1];
8707ec681f3Smrg   if (slice_pitch)
8717ec681f3Smrg      *slice_pitch = map->pitch()[2];
8727ec681f3Smrg
8737ec681f3Smrg   auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
8747ec681f3Smrg   if (blocking)
8757ec681f3Smrg       hev().wait_signalled();
8767ec681f3Smrg
8777ec681f3Smrg   ret_object(rd_ev, hev);
8787ec681f3Smrg   ret_error(r_errcode, CL_SUCCESS);
8797ec681f3Smrg   return *map;
8807ec681f3Smrg
8817ec681f3Smrg} catch (error &e) {
8827ec681f3Smrg   ret_error(r_errcode, e);
8837ec681f3Smrg   return NULL;
8847ec681f3Smrg}
8857ec681f3Smrg
8867ec681f3SmrgCLOVER_API cl_int
8877ec681f3SmrgclEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
8887ec681f3Smrg                        cl_uint num_deps, const cl_event *d_deps,
8897ec681f3Smrg                        cl_event *rd_ev) try {
8907ec681f3Smrg   auto &q = obj(d_q);
8917ec681f3Smrg   auto &mem = obj(d_mem);
8927ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
8937ec681f3Smrg
8947ec681f3Smrg   validate_common(q, deps);
8957ec681f3Smrg
8967ec681f3Smrg   auto hev = create<hard_event>(
8977ec681f3Smrg      q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
8987ec681f3Smrg      [=, &q, &mem](event &) {
8997ec681f3Smrg         mem.resource_in(q).del_map(ptr);
9007ec681f3Smrg      });
9017ec681f3Smrg
9027ec681f3Smrg   ret_object(rd_ev, hev);
9037ec681f3Smrg   return CL_SUCCESS;
9047ec681f3Smrg
9057ec681f3Smrg} catch (error &e) {
9067ec681f3Smrg   return e.get();
9077ec681f3Smrg}
9087ec681f3Smrg
9097ec681f3SmrgCLOVER_API cl_int
9107ec681f3SmrgclEnqueueMigrateMemObjects(cl_command_queue d_q,
9117ec681f3Smrg                           cl_uint num_mems,
9127ec681f3Smrg                           const cl_mem *d_mems,
9137ec681f3Smrg                           cl_mem_migration_flags flags,
9147ec681f3Smrg                           cl_uint num_deps,
9157ec681f3Smrg                           const cl_event *d_deps,
9167ec681f3Smrg                           cl_event *rd_ev) try {
9177ec681f3Smrg   auto &q = obj(d_q);
9187ec681f3Smrg   auto mems = objs<memory_obj>(d_mems, num_mems);
9197ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
9207ec681f3Smrg
9217ec681f3Smrg   validate_common(q, deps);
9227ec681f3Smrg   validate_mem_migration_flags(flags);
9237ec681f3Smrg
9247ec681f3Smrg   if (any_of([&](const memory_obj &m) {
9257ec681f3Smrg         return m.context() != q.context();
9267ec681f3Smrg         }, mems))
9277ec681f3Smrg      throw error(CL_INVALID_CONTEXT);
9287ec681f3Smrg
9297ec681f3Smrg   auto hev = create<hard_event>(
9307ec681f3Smrg      q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
9317ec681f3Smrg      [=, &q](event &) {
9327ec681f3Smrg         for (auto &mem: mems) {
9337ec681f3Smrg            if (flags & CL_MIGRATE_MEM_OBJECT_HOST) {
9347ec681f3Smrg               if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
9357ec681f3Smrg                  mem.resource_out(q);
9367ec681f3Smrg
9377ec681f3Smrg               // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be
9387ec681f3Smrg               // efficient we would need cl*ReadBuffer* to implement
9397ec681f3Smrg               // reading from host memory.
9407ec681f3Smrg
9417ec681f3Smrg            } else {
9427ec681f3Smrg               if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)
9437ec681f3Smrg                  mem.resource_undef(q);
9447ec681f3Smrg               else
9457ec681f3Smrg                  mem.resource_in(q);
9467ec681f3Smrg            }
9477ec681f3Smrg         }
9487ec681f3Smrg      });
9497ec681f3Smrg
9507ec681f3Smrg   ret_object(rd_ev, hev);
9517ec681f3Smrg   return CL_SUCCESS;;
9527ec681f3Smrg
9537ec681f3Smrg} catch (error &e) {
9547ec681f3Smrg   return e.get();
9557ec681f3Smrg}
9567ec681f3Smrg
9577ec681f3Smrgcl_int
9587ec681f3Smrgclover::EnqueueSVMFree(cl_command_queue d_q,
9597ec681f3Smrg                       cl_uint num_svm_pointers,
9607ec681f3Smrg                       void *svm_pointers[],
9617ec681f3Smrg                       void (CL_CALLBACK *pfn_free_func) (
9627ec681f3Smrg                           cl_command_queue queue, cl_uint num_svm_pointers,
9637ec681f3Smrg                           void *svm_pointers[], void *user_data),
9647ec681f3Smrg                       void *user_data,
9657ec681f3Smrg                       cl_uint num_events_in_wait_list,
9667ec681f3Smrg                       const cl_event *event_wait_list,
9677ec681f3Smrg                       cl_event *event,
9687ec681f3Smrg                       cl_int cmd) try {
9697ec681f3Smrg
9707ec681f3Smrg   if (bool(num_svm_pointers) != bool(svm_pointers))
9717ec681f3Smrg      return CL_INVALID_VALUE;
9727ec681f3Smrg
9737ec681f3Smrg   auto &q = obj(d_q);
9747ec681f3Smrg
9757ec681f3Smrg   if (!q.device().svm_support())
9767ec681f3Smrg      return CL_INVALID_OPERATION;
9777ec681f3Smrg
9787ec681f3Smrg   bool can_emulate = q.device().has_system_svm();
9797ec681f3Smrg   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
9807ec681f3Smrg
9817ec681f3Smrg   validate_common(q, deps);
9827ec681f3Smrg
9837ec681f3Smrg   std::vector<void *> svm_pointers_cpy(svm_pointers,
9847ec681f3Smrg                                        svm_pointers + num_svm_pointers);
9857ec681f3Smrg   if (!pfn_free_func) {
9867ec681f3Smrg      if (!can_emulate) {
9877ec681f3Smrg         CLOVER_NOT_SUPPORTED_UNTIL("2.0");
9887ec681f3Smrg         return CL_INVALID_VALUE;
9897ec681f3Smrg      }
9907ec681f3Smrg      pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers,
9917ec681f3Smrg                         void *svm_pointers[], void *) {
9927ec681f3Smrg         clover::context &ctx = obj(d_q).context();
9937ec681f3Smrg         for (void *p : range(svm_pointers, num_svm_pointers)) {
9947ec681f3Smrg            ctx.remove_svm_allocation(p);
9957ec681f3Smrg            free(p);
9967ec681f3Smrg         }
9977ec681f3Smrg      };
9987ec681f3Smrg   }
9997ec681f3Smrg
10007ec681f3Smrg   auto hev = create<hard_event>(q, cmd, deps,
10017ec681f3Smrg      [=](clover::event &) mutable {
10027ec681f3Smrg         pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
10037ec681f3Smrg                       user_data);
10047ec681f3Smrg      });
10057ec681f3Smrg
10067ec681f3Smrg   ret_object(event, hev);
10077ec681f3Smrg   return CL_SUCCESS;
10087ec681f3Smrg
10097ec681f3Smrg} catch (error &e) {
10107ec681f3Smrg   return e.get();
10117ec681f3Smrg}
10127ec681f3Smrg
10137ec681f3SmrgCLOVER_API cl_int
10147ec681f3SmrgclEnqueueSVMFree(cl_command_queue d_q,
10157ec681f3Smrg                 cl_uint num_svm_pointers,
10167ec681f3Smrg                 void *svm_pointers[],
10177ec681f3Smrg                 void (CL_CALLBACK *pfn_free_func) (
10187ec681f3Smrg                    cl_command_queue queue, cl_uint num_svm_pointers,
10197ec681f3Smrg                    void *svm_pointers[], void *user_data),
10207ec681f3Smrg                 void *user_data,
10217ec681f3Smrg                 cl_uint num_events_in_wait_list,
10227ec681f3Smrg                 const cl_event *event_wait_list,
10237ec681f3Smrg                 cl_event *event) {
10247ec681f3Smrg
10257ec681f3Smrg   return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
10267ec681f3Smrg                         pfn_free_func, user_data, num_events_in_wait_list,
10277ec681f3Smrg                         event_wait_list, event, CL_COMMAND_SVM_FREE);
10287ec681f3Smrg}
10297ec681f3Smrg
10307ec681f3Smrgcl_int
10317ec681f3Smrgclover::EnqueueSVMMemcpy(cl_command_queue d_q,
10327ec681f3Smrg                         cl_bool blocking_copy,
10337ec681f3Smrg                         void *dst_ptr,
10347ec681f3Smrg                         const void *src_ptr,
10357ec681f3Smrg                         size_t size,
10367ec681f3Smrg                         cl_uint num_events_in_wait_list,
10377ec681f3Smrg                         const cl_event *event_wait_list,
10387ec681f3Smrg                         cl_event *event,
10397ec681f3Smrg                         cl_int cmd) try {
10407ec681f3Smrg   auto &q = obj(d_q);
10417ec681f3Smrg
10427ec681f3Smrg   if (!q.device().svm_support())
10437ec681f3Smrg      return CL_INVALID_OPERATION;
10447ec681f3Smrg
10457ec681f3Smrg   if (dst_ptr == nullptr || src_ptr == nullptr)
10467ec681f3Smrg      return CL_INVALID_VALUE;
10477ec681f3Smrg
10487ec681f3Smrg   if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
10497ec681f3Smrg                               reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
10507ec681f3Smrg      return CL_MEM_COPY_OVERLAP;
10517ec681f3Smrg
10527ec681f3Smrg
10537ec681f3Smrg   bool can_emulate = q.device().has_system_svm();
10547ec681f3Smrg   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
10557ec681f3Smrg
10567ec681f3Smrg   validate_common(q, deps);
10577ec681f3Smrg
10587ec681f3Smrg   if (can_emulate) {
10597ec681f3Smrg      auto hev = create<hard_event>(q, cmd, deps,
10607ec681f3Smrg         [=](clover::event &) {
10617ec681f3Smrg            memcpy(dst_ptr, src_ptr, size);
10627ec681f3Smrg         });
10637ec681f3Smrg
10647ec681f3Smrg      if (blocking_copy)
10657ec681f3Smrg         hev().wait();
10667ec681f3Smrg      ret_object(event, hev);
10677ec681f3Smrg      return CL_SUCCESS;
10687ec681f3Smrg   }
10697ec681f3Smrg
10707ec681f3Smrg   CLOVER_NOT_SUPPORTED_UNTIL("2.0");
10717ec681f3Smrg   return CL_INVALID_VALUE;
10727ec681f3Smrg
10737ec681f3Smrg} catch (error &e) {
10747ec681f3Smrg   return e.get();
10757ec681f3Smrg}
10767ec681f3Smrg
10777ec681f3SmrgCLOVER_API cl_int
10787ec681f3SmrgclEnqueueSVMMemcpy(cl_command_queue d_q,
10797ec681f3Smrg                   cl_bool blocking_copy,
10807ec681f3Smrg                   void *dst_ptr,
10817ec681f3Smrg                   const void *src_ptr,
10827ec681f3Smrg                   size_t size,
10837ec681f3Smrg                   cl_uint num_events_in_wait_list,
10847ec681f3Smrg                   const cl_event *event_wait_list,
10857ec681f3Smrg                   cl_event *event) {
10867ec681f3Smrg
10877ec681f3Smrg   return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
10887ec681f3Smrg                           size, num_events_in_wait_list, event_wait_list,
10897ec681f3Smrg                           event, CL_COMMAND_SVM_MEMCPY);
10907ec681f3Smrg}
10917ec681f3Smrg
10927ec681f3Smrgcl_int
10937ec681f3Smrgclover::EnqueueSVMMemFill(cl_command_queue d_q,
10947ec681f3Smrg                          void *svm_ptr,
10957ec681f3Smrg                          const void *pattern,
10967ec681f3Smrg                          size_t pattern_size,
10977ec681f3Smrg                          size_t size,
10987ec681f3Smrg                          cl_uint num_events_in_wait_list,
10997ec681f3Smrg                          const cl_event *event_wait_list,
11007ec681f3Smrg                          cl_event *event,
11017ec681f3Smrg                          cl_int cmd) try {
11027ec681f3Smrg   auto &q = obj(d_q);
11037ec681f3Smrg
11047ec681f3Smrg   if (!q.device().svm_support())
11057ec681f3Smrg      return CL_INVALID_OPERATION;
11067ec681f3Smrg
11077ec681f3Smrg   if (svm_ptr == nullptr || pattern == nullptr ||
11087ec681f3Smrg       !util_is_power_of_two_nonzero(pattern_size) ||
11097ec681f3Smrg       pattern_size > 128 ||
11107ec681f3Smrg       !ptr_is_aligned(svm_ptr, pattern_size) ||
11117ec681f3Smrg       size % pattern_size)
11127ec681f3Smrg      return CL_INVALID_VALUE;
11137ec681f3Smrg
11147ec681f3Smrg   bool can_emulate = q.device().has_system_svm();
11157ec681f3Smrg   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
11167ec681f3Smrg
11177ec681f3Smrg   validate_common(q, deps);
11187ec681f3Smrg
11197ec681f3Smrg   if (can_emulate) {
11207ec681f3Smrg      auto hev = create<hard_event>(q, cmd, deps,
11217ec681f3Smrg         [=](clover::event &) {
11227ec681f3Smrg            void *ptr = svm_ptr;
11237ec681f3Smrg            for (size_t s = size; s; s -= pattern_size) {
11247ec681f3Smrg               memcpy(ptr, pattern, pattern_size);
11257ec681f3Smrg               ptr = static_cast<uint8_t*>(ptr) + pattern_size;
11267ec681f3Smrg            }
11277ec681f3Smrg         });
11287ec681f3Smrg
11297ec681f3Smrg      ret_object(event, hev);
11307ec681f3Smrg      return CL_SUCCESS;
11317ec681f3Smrg   }
11327ec681f3Smrg
11337ec681f3Smrg   CLOVER_NOT_SUPPORTED_UNTIL("2.0");
11347ec681f3Smrg   return CL_INVALID_VALUE;
11357ec681f3Smrg
11367ec681f3Smrg} catch (error &e) {
11377ec681f3Smrg   return e.get();
11387ec681f3Smrg}
11397ec681f3Smrg
11407ec681f3SmrgCLOVER_API cl_int
11417ec681f3SmrgclEnqueueSVMMemFill(cl_command_queue d_q,
11427ec681f3Smrg                    void *svm_ptr,
11437ec681f3Smrg                    const void *pattern,
11447ec681f3Smrg                    size_t pattern_size,
11457ec681f3Smrg                    size_t size,
11467ec681f3Smrg                    cl_uint num_events_in_wait_list,
11477ec681f3Smrg                    const cl_event *event_wait_list,
11487ec681f3Smrg                    cl_event *event) {
11497ec681f3Smrg
11507ec681f3Smrg   return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
11517ec681f3Smrg                            size, num_events_in_wait_list, event_wait_list,
11527ec681f3Smrg                            event, CL_COMMAND_SVM_MEMFILL);
11537ec681f3Smrg}
11547ec681f3Smrg
11557ec681f3Smrgcl_int
11567ec681f3Smrgclover::EnqueueSVMMap(cl_command_queue d_q,
11577ec681f3Smrg                      cl_bool blocking_map,
11587ec681f3Smrg                      cl_map_flags map_flags,
11597ec681f3Smrg                      void *svm_ptr,
11607ec681f3Smrg                      size_t size,
11617ec681f3Smrg                      cl_uint num_events_in_wait_list,
11627ec681f3Smrg                      const cl_event *event_wait_list,
11637ec681f3Smrg                      cl_event *event,
11647ec681f3Smrg                      cl_int cmd) try {
11657ec681f3Smrg   auto &q = obj(d_q);
11667ec681f3Smrg
11677ec681f3Smrg   if (!q.device().svm_support())
11687ec681f3Smrg      return CL_INVALID_OPERATION;
11697ec681f3Smrg
11707ec681f3Smrg   if (svm_ptr == nullptr || size == 0)
11717ec681f3Smrg      return CL_INVALID_VALUE;
11727ec681f3Smrg
11737ec681f3Smrg   bool can_emulate = q.device().has_system_svm();
11747ec681f3Smrg   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
11757ec681f3Smrg
11767ec681f3Smrg   validate_common(q, deps);
11777ec681f3Smrg
11787ec681f3Smrg   if (can_emulate) {
11797ec681f3Smrg      auto hev = create<hard_event>(q, cmd, deps,
11807ec681f3Smrg         [](clover::event &) { });
11817ec681f3Smrg
11827ec681f3Smrg      ret_object(event, hev);
11837ec681f3Smrg      return CL_SUCCESS;
11847ec681f3Smrg   }
11857ec681f3Smrg
11867ec681f3Smrg   CLOVER_NOT_SUPPORTED_UNTIL("2.0");
11877ec681f3Smrg   return CL_INVALID_VALUE;
11887ec681f3Smrg
11897ec681f3Smrg} catch (error &e) {
11907ec681f3Smrg   return e.get();
11917ec681f3Smrg}
11927ec681f3Smrg
11937ec681f3SmrgCLOVER_API cl_int
11947ec681f3SmrgclEnqueueSVMMap(cl_command_queue d_q,
11957ec681f3Smrg                cl_bool blocking_map,
11967ec681f3Smrg                cl_map_flags map_flags,
11977ec681f3Smrg                void *svm_ptr,
11987ec681f3Smrg                size_t size,
11997ec681f3Smrg                cl_uint num_events_in_wait_list,
12007ec681f3Smrg                const cl_event *event_wait_list,
12017ec681f3Smrg                cl_event *event) {
12027ec681f3Smrg
12037ec681f3Smrg   return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
12047ec681f3Smrg                        num_events_in_wait_list, event_wait_list, event,
12057ec681f3Smrg                        CL_COMMAND_SVM_MAP);
12067ec681f3Smrg}
12077ec681f3Smrg
12087ec681f3Smrgcl_int
12097ec681f3Smrgclover::EnqueueSVMUnmap(cl_command_queue d_q,
12107ec681f3Smrg                        void *svm_ptr,
12117ec681f3Smrg                        cl_uint num_events_in_wait_list,
12127ec681f3Smrg                        const cl_event *event_wait_list,
12137ec681f3Smrg                        cl_event *event,
12147ec681f3Smrg                        cl_int cmd) try {
12157ec681f3Smrg   auto &q = obj(d_q);
12167ec681f3Smrg
12177ec681f3Smrg   if (!q.device().svm_support())
12187ec681f3Smrg      return CL_INVALID_OPERATION;
12197ec681f3Smrg
12207ec681f3Smrg   if (svm_ptr == nullptr)
12217ec681f3Smrg      return CL_INVALID_VALUE;
12227ec681f3Smrg
12237ec681f3Smrg   bool can_emulate = q.device().has_system_svm();
12247ec681f3Smrg   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
12257ec681f3Smrg
12267ec681f3Smrg   validate_common(q, deps);
12277ec681f3Smrg
12287ec681f3Smrg   if (can_emulate) {
12297ec681f3Smrg      auto hev = create<hard_event>(q, cmd, deps,
12307ec681f3Smrg         [](clover::event &) { });
12317ec681f3Smrg
12327ec681f3Smrg      ret_object(event, hev);
12337ec681f3Smrg      return CL_SUCCESS;
12347ec681f3Smrg   }
12357ec681f3Smrg
12367ec681f3Smrg   CLOVER_NOT_SUPPORTED_UNTIL("2.0");
12377ec681f3Smrg   return CL_INVALID_VALUE;
12387ec681f3Smrg
12397ec681f3Smrg} catch (error &e) {
12407ec681f3Smrg   return e.get();
12417ec681f3Smrg}
12427ec681f3Smrg
12437ec681f3SmrgCLOVER_API cl_int
12447ec681f3SmrgclEnqueueSVMUnmap(cl_command_queue d_q,
12457ec681f3Smrg                  void *svm_ptr,
12467ec681f3Smrg                  cl_uint num_events_in_wait_list,
12477ec681f3Smrg                  const cl_event *event_wait_list,
12487ec681f3Smrg                  cl_event *event) {
12497ec681f3Smrg
12507ec681f3Smrg   return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
12517ec681f3Smrg                          event_wait_list, event, CL_COMMAND_SVM_UNMAP);
12527ec681f3Smrg}
12537ec681f3Smrg
12547ec681f3SmrgCLOVER_API cl_int
12557ec681f3SmrgclEnqueueSVMMigrateMem(cl_command_queue d_q,
12567ec681f3Smrg                       cl_uint num_svm_pointers,
12577ec681f3Smrg                       const void **svm_pointers,
12587ec681f3Smrg                       const size_t *sizes,
12597ec681f3Smrg                       const cl_mem_migration_flags flags,
12607ec681f3Smrg                       cl_uint num_deps,
12617ec681f3Smrg                       const cl_event *d_deps,
12627ec681f3Smrg                       cl_event *rd_ev) try {
12637ec681f3Smrg   auto &q = obj(d_q);
12647ec681f3Smrg   auto deps = objs<wait_list_tag>(d_deps, num_deps);
12657ec681f3Smrg
12667ec681f3Smrg   validate_common(q, deps);
12677ec681f3Smrg   validate_mem_migration_flags(flags);
12687ec681f3Smrg
12697ec681f3Smrg   if (!q.device().svm_support())
12707ec681f3Smrg      return CL_INVALID_OPERATION;
12717ec681f3Smrg
12727ec681f3Smrg   if (!num_svm_pointers || !svm_pointers)
12737ec681f3Smrg      return CL_INVALID_VALUE;
12747ec681f3Smrg
12757ec681f3Smrg   std::vector<size_t> sizes_copy(num_svm_pointers);
12767ec681f3Smrg   std::vector<const void*>  ptrs(num_svm_pointers);
12777ec681f3Smrg
12787ec681f3Smrg   for (unsigned i = 0; i < num_svm_pointers; ++i) {
12797ec681f3Smrg      const void *ptr = svm_pointers[i];
12807ec681f3Smrg      size_t size = sizes ? sizes[i] : 0;
12817ec681f3Smrg      if (!ptr)
12827ec681f3Smrg         return CL_INVALID_VALUE;
12837ec681f3Smrg
12847ec681f3Smrg      auto p = q.context().find_svm_allocation(ptr);
12857ec681f3Smrg      if (!p.first)
12867ec681f3Smrg         return CL_INVALID_VALUE;
12877ec681f3Smrg
12887ec681f3Smrg      std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first;
12897ec681f3Smrg      if (size && size + pdiff > p.second)
12907ec681f3Smrg         return CL_INVALID_VALUE;
12917ec681f3Smrg
12927ec681f3Smrg      sizes_copy[i] = size ? size : p.second;
12937ec681f3Smrg      ptrs[i] = size ? svm_pointers[i] : p.first;
12947ec681f3Smrg   }
12957ec681f3Smrg
12967ec681f3Smrg   auto hev = create<hard_event>(
12977ec681f3Smrg      q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
12987ec681f3Smrg      [=, &q](event &) {
12997ec681f3Smrg         q.svm_migrate(ptrs, sizes_copy, flags);
13007ec681f3Smrg      });
13017ec681f3Smrg
13027ec681f3Smrg   ret_object(rd_ev, hev);
13037ec681f3Smrg   return CL_SUCCESS;
13047ec681f3Smrg
13057ec681f3Smrg} catch (error &e) {
13067ec681f3Smrg   return e.get();
13077ec681f3Smrg}
1308