17ec681f3Smrg// 27ec681f3Smrg// Copyright 2012 Francisco Jerez 37ec681f3Smrg// 47ec681f3Smrg// Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg// copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg// to deal in the Software without restriction, including without limitation 77ec681f3Smrg// the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg// and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg// Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg// 117ec681f3Smrg// The above copyright notice and this permission notice shall be included in 127ec681f3Smrg// all copies or substantial portions of the Software. 137ec681f3Smrg// 147ec681f3Smrg// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 157ec681f3Smrg// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 167ec681f3Smrg// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 177ec681f3Smrg// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 187ec681f3Smrg// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 197ec681f3Smrg// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 207ec681f3Smrg// OTHER DEALINGS IN THE SOFTWARE. 217ec681f3Smrg// 227ec681f3Smrg 237ec681f3Smrg#include <cstring> 247ec681f3Smrg 257ec681f3Smrg#include "util/bitscan.h" 267ec681f3Smrg 277ec681f3Smrg#include "api/dispatch.hpp" 287ec681f3Smrg#include "api/util.hpp" 297ec681f3Smrg#include "core/event.hpp" 307ec681f3Smrg#include "core/memory.hpp" 317ec681f3Smrg 327ec681f3Smrgusing namespace clover; 337ec681f3Smrg 347ec681f3Smrgnamespace { 357ec681f3Smrg typedef resource::vector vector_t; 367ec681f3Smrg 377ec681f3Smrg vector_t 387ec681f3Smrg vector(const size_t *p) { 397ec681f3Smrg if (!p) 407ec681f3Smrg throw error(CL_INVALID_VALUE); 417ec681f3Smrg return range(p, 3); 427ec681f3Smrg } 437ec681f3Smrg 447ec681f3Smrg vector_t 457ec681f3Smrg pitch(const vector_t ®ion, vector_t pitch) { 467ec681f3Smrg for (auto x : zip(tail(pitch), 477ec681f3Smrg map(multiplies(), region, pitch))) { 487ec681f3Smrg // The spec defines a value of zero as the natural pitch, 497ec681f3Smrg // i.e. the unaligned size of the previous dimension. 507ec681f3Smrg if (std::get<0>(x) == 0) 517ec681f3Smrg std::get<0>(x) = std::get<1>(x); 527ec681f3Smrg } 537ec681f3Smrg 547ec681f3Smrg return pitch; 557ec681f3Smrg } 567ec681f3Smrg 577ec681f3Smrg /// 587ec681f3Smrg /// Size of a region in bytes. 597ec681f3Smrg /// 607ec681f3Smrg size_t 617ec681f3Smrg size(const vector_t &pitch, const vector_t ®ion) { 627ec681f3Smrg if (any_of(is_zero(), region)) 637ec681f3Smrg return 0; 647ec681f3Smrg else 657ec681f3Smrg return dot(pitch, region - vector_t{ 0, 1, 1 }); 667ec681f3Smrg } 677ec681f3Smrg 687ec681f3Smrg /// 697ec681f3Smrg /// Common argument checking shared by memory transfer commands. 707ec681f3Smrg /// 717ec681f3Smrg void 727ec681f3Smrg validate_common(command_queue &q, 737ec681f3Smrg const ref_vector<event> &deps) { 747ec681f3Smrg if (any_of([&](const event &ev) { 757ec681f3Smrg return ev.context() != q.context(); 767ec681f3Smrg }, deps)) 777ec681f3Smrg throw error(CL_INVALID_CONTEXT); 787ec681f3Smrg } 797ec681f3Smrg 807ec681f3Smrg /// 817ec681f3Smrg /// Common error checking for a buffer object argument. 827ec681f3Smrg /// 837ec681f3Smrg void 847ec681f3Smrg validate_object(command_queue &q, buffer &mem, const vector_t &origin, 857ec681f3Smrg const vector_t &pitch, const vector_t ®ion) { 867ec681f3Smrg if (mem.context() != q.context()) 877ec681f3Smrg throw error(CL_INVALID_CONTEXT); 887ec681f3Smrg 897ec681f3Smrg // The region must fit within the specified pitch, 907ec681f3Smrg if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch))) 917ec681f3Smrg throw error(CL_INVALID_VALUE); 927ec681f3Smrg 937ec681f3Smrg // ...and within the specified object. 947ec681f3Smrg if (dot(pitch, origin) + size(pitch, region) > mem.size()) 957ec681f3Smrg throw error(CL_INVALID_VALUE); 967ec681f3Smrg 977ec681f3Smrg if (any_of(is_zero(), region)) 987ec681f3Smrg throw error(CL_INVALID_VALUE); 997ec681f3Smrg } 1007ec681f3Smrg 1017ec681f3Smrg /// 1027ec681f3Smrg /// Common error checking for an image argument. 1037ec681f3Smrg /// 1047ec681f3Smrg void 1057ec681f3Smrg validate_object(command_queue &q, image &img, 1067ec681f3Smrg const vector_t &orig, const vector_t ®ion) { 1077ec681f3Smrg vector_t size = { img.width(), img.height(), img.depth() }; 1087ec681f3Smrg const auto &dev = q.device(); 1097ec681f3Smrg 1107ec681f3Smrg if (!dev.image_support()) 1117ec681f3Smrg throw error(CL_INVALID_OPERATION); 1127ec681f3Smrg 1137ec681f3Smrg if (img.context() != q.context()) 1147ec681f3Smrg throw error(CL_INVALID_CONTEXT); 1157ec681f3Smrg 1167ec681f3Smrg if (any_of(greater(), orig + region, size)) 1177ec681f3Smrg throw error(CL_INVALID_VALUE); 1187ec681f3Smrg 1197ec681f3Smrg if (any_of(is_zero(), region)) 1207ec681f3Smrg throw error(CL_INVALID_VALUE); 1217ec681f3Smrg 1227ec681f3Smrg switch (img.type()) { 1237ec681f3Smrg case CL_MEM_OBJECT_IMAGE1D: { 1247ec681f3Smrg const size_t max = dev.max_image_size(); 1257ec681f3Smrg if (img.width() > max) 1267ec681f3Smrg throw error(CL_INVALID_IMAGE_SIZE); 1277ec681f3Smrg break; 1287ec681f3Smrg } 1297ec681f3Smrg case CL_MEM_OBJECT_IMAGE2D: { 1307ec681f3Smrg const size_t max = dev.max_image_size(); 1317ec681f3Smrg if (img.width() > max || img.height() > max) 1327ec681f3Smrg throw error(CL_INVALID_IMAGE_SIZE); 1337ec681f3Smrg break; 1347ec681f3Smrg } 1357ec681f3Smrg case CL_MEM_OBJECT_IMAGE3D: { 1367ec681f3Smrg const size_t max = dev.max_image_size_3d(); 1377ec681f3Smrg if (img.width() > max || img.height() > max || img.depth() > max) 1387ec681f3Smrg throw error(CL_INVALID_IMAGE_SIZE); 1397ec681f3Smrg break; 1407ec681f3Smrg } 1417ec681f3Smrg // XXX: Implement missing checks once Clover supports more image types. 1427ec681f3Smrg default: 1437ec681f3Smrg throw error(CL_INVALID_IMAGE_SIZE); 1447ec681f3Smrg } 1457ec681f3Smrg } 1467ec681f3Smrg 1477ec681f3Smrg /// 1487ec681f3Smrg /// Common error checking for a host pointer argument. 1497ec681f3Smrg /// 1507ec681f3Smrg void 1517ec681f3Smrg validate_object(command_queue &q, const void *ptr, const vector_t &orig, 1527ec681f3Smrg const vector_t &pitch, const vector_t ®ion) { 1537ec681f3Smrg if (!ptr) 1547ec681f3Smrg throw error(CL_INVALID_VALUE); 1557ec681f3Smrg 1567ec681f3Smrg // The region must fit within the specified pitch. 1577ec681f3Smrg if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch))) 1587ec681f3Smrg throw error(CL_INVALID_VALUE); 1597ec681f3Smrg } 1607ec681f3Smrg 1617ec681f3Smrg /// 1627ec681f3Smrg /// Common argument checking for a copy between two buffer objects. 1637ec681f3Smrg /// 1647ec681f3Smrg void 1657ec681f3Smrg validate_copy(command_queue &q, buffer &dst_mem, 1667ec681f3Smrg const vector_t &dst_orig, const vector_t &dst_pitch, 1677ec681f3Smrg buffer &src_mem, 1687ec681f3Smrg const vector_t &src_orig, const vector_t &src_pitch, 1697ec681f3Smrg const vector_t ®ion) { 1707ec681f3Smrg if (dst_mem == src_mem) { 1717ec681f3Smrg auto dst_offset = dot(dst_pitch, dst_orig); 1727ec681f3Smrg auto src_offset = dot(src_pitch, src_orig); 1737ec681f3Smrg 1747ec681f3Smrg if (interval_overlaps()( 1757ec681f3Smrg dst_offset, dst_offset + size(dst_pitch, region), 1767ec681f3Smrg src_offset, src_offset + size(src_pitch, region))) 1777ec681f3Smrg throw error(CL_MEM_COPY_OVERLAP); 1787ec681f3Smrg } 1797ec681f3Smrg } 1807ec681f3Smrg 1817ec681f3Smrg /// 1827ec681f3Smrg /// Common argument checking for a copy between two image objects. 1837ec681f3Smrg /// 1847ec681f3Smrg void 1857ec681f3Smrg validate_copy(command_queue &q, 1867ec681f3Smrg image &dst_img, const vector_t &dst_orig, 1877ec681f3Smrg image &src_img, const vector_t &src_orig, 1887ec681f3Smrg const vector_t ®ion) { 1897ec681f3Smrg if (dst_img.format() != src_img.format()) 1907ec681f3Smrg throw error(CL_IMAGE_FORMAT_MISMATCH); 1917ec681f3Smrg 1927ec681f3Smrg if (dst_img == src_img) { 1937ec681f3Smrg if (all_of(interval_overlaps(), 1947ec681f3Smrg dst_orig, dst_orig + region, 1957ec681f3Smrg src_orig, src_orig + region)) 1967ec681f3Smrg throw error(CL_MEM_COPY_OVERLAP); 1977ec681f3Smrg } 1987ec681f3Smrg } 1997ec681f3Smrg 2007ec681f3Smrg /// 2017ec681f3Smrg /// Checks that the host access flags of the memory object are 2027ec681f3Smrg /// within the allowed set \a flags. 2037ec681f3Smrg /// 2047ec681f3Smrg void 2057ec681f3Smrg validate_object_access(const memory_obj &mem, const cl_mem_flags flags) { 2067ec681f3Smrg if (mem.flags() & ~flags & 2077ec681f3Smrg (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | 2087ec681f3Smrg CL_MEM_HOST_NO_ACCESS)) 2097ec681f3Smrg throw error(CL_INVALID_OPERATION); 2107ec681f3Smrg } 2117ec681f3Smrg 2127ec681f3Smrg /// 2137ec681f3Smrg /// Checks that the mapping flags are correct. 2147ec681f3Smrg /// 2157ec681f3Smrg void 2167ec681f3Smrg validate_map_flags(const memory_obj &mem, const cl_map_flags flags) { 2177ec681f3Smrg if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) && 2187ec681f3Smrg (flags & CL_MAP_WRITE_INVALIDATE_REGION)) 2197ec681f3Smrg throw error(CL_INVALID_VALUE); 2207ec681f3Smrg 2217ec681f3Smrg if (flags & CL_MAP_READ) 2227ec681f3Smrg validate_object_access(mem, CL_MEM_HOST_READ_ONLY); 2237ec681f3Smrg 2247ec681f3Smrg if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) 2257ec681f3Smrg validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY); 2267ec681f3Smrg } 2277ec681f3Smrg 2287ec681f3Smrg /// 2297ec681f3Smrg /// Checks that the memory migration flags are correct. 2307ec681f3Smrg /// 2317ec681f3Smrg void 2327ec681f3Smrg validate_mem_migration_flags(const cl_mem_migration_flags flags) { 2337ec681f3Smrg const cl_mem_migration_flags valid = 2347ec681f3Smrg CL_MIGRATE_MEM_OBJECT_HOST | 2357ec681f3Smrg CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED; 2367ec681f3Smrg 2377ec681f3Smrg if (flags & ~valid) 2387ec681f3Smrg throw error(CL_INVALID_VALUE); 2397ec681f3Smrg } 2407ec681f3Smrg 2417ec681f3Smrg /// 2427ec681f3Smrg /// Class that encapsulates the task of mapping an object of type 2437ec681f3Smrg /// \a T. The return value of get() should be implicitly 2447ec681f3Smrg /// convertible to \a void *. 2457ec681f3Smrg /// 2467ec681f3Smrg template<typename T> 2477ec681f3Smrg struct _map; 2487ec681f3Smrg 2497ec681f3Smrg template<> 2507ec681f3Smrg struct _map<image*> { 2517ec681f3Smrg _map(command_queue &q, image *img, cl_map_flags flags, 2527ec681f3Smrg vector_t offset, vector_t pitch, vector_t region) : 2537ec681f3Smrg map(q, img->resource_in(q), flags, true, offset, region), 2547ec681f3Smrg pitch(map.pitch()) 2557ec681f3Smrg { } 2567ec681f3Smrg 2577ec681f3Smrg template<typename T> 2587ec681f3Smrg operator T *() const { 2597ec681f3Smrg return static_cast<T *>(map); 2607ec681f3Smrg } 2617ec681f3Smrg 2627ec681f3Smrg mapping map; 2637ec681f3Smrg vector_t pitch; 2647ec681f3Smrg }; 2657ec681f3Smrg 2667ec681f3Smrg template<> 2677ec681f3Smrg struct _map<buffer*> { 2687ec681f3Smrg _map(command_queue &q, buffer *mem, cl_map_flags flags, 2697ec681f3Smrg vector_t offset, vector_t pitch, vector_t region) : 2707ec681f3Smrg map(q, mem->resource_in(q), flags, true, 2717ec681f3Smrg {{ dot(pitch, offset) }}, {{ size(pitch, region) }}), 2727ec681f3Smrg pitch(pitch) 2737ec681f3Smrg { } 2747ec681f3Smrg 2757ec681f3Smrg template<typename T> 2767ec681f3Smrg operator T *() const { 2777ec681f3Smrg return static_cast<T *>(map); 2787ec681f3Smrg } 2797ec681f3Smrg 2807ec681f3Smrg mapping map; 2817ec681f3Smrg vector_t pitch; 2827ec681f3Smrg }; 2837ec681f3Smrg 2847ec681f3Smrg template<typename P> 2857ec681f3Smrg struct _map<P *> { 2867ec681f3Smrg _map(command_queue &q, P *ptr, cl_map_flags flags, 2877ec681f3Smrg vector_t offset, vector_t pitch, vector_t region) : 2887ec681f3Smrg ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch) 2897ec681f3Smrg { } 2907ec681f3Smrg 2917ec681f3Smrg template<typename T> 2927ec681f3Smrg operator T *() const { 2937ec681f3Smrg return static_cast<T *>(ptr); 2947ec681f3Smrg } 2957ec681f3Smrg 2967ec681f3Smrg P *ptr; 2977ec681f3Smrg vector_t pitch; 2987ec681f3Smrg }; 2997ec681f3Smrg 3007ec681f3Smrg /// 3017ec681f3Smrg /// Software copy from \a src_obj to \a dst_obj. They can be 3027ec681f3Smrg /// either pointers or memory objects. 3037ec681f3Smrg /// 3047ec681f3Smrg template<typename T, typename S> 3057ec681f3Smrg std::function<void (event &)> 3067ec681f3Smrg soft_copy_op(command_queue &q, 3077ec681f3Smrg T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch, 3087ec681f3Smrg S src_obj, const vector_t &src_orig, const vector_t &src_pitch, 3097ec681f3Smrg const vector_t ®ion) { 3107ec681f3Smrg return [=, &q](event &) { 3117ec681f3Smrg _map<T> dst = { q, dst_obj, CL_MAP_WRITE, 3127ec681f3Smrg dst_orig, dst_pitch, region }; 3137ec681f3Smrg _map<S> src = { q, src_obj, CL_MAP_READ, 3147ec681f3Smrg src_orig, src_pitch, region }; 3157ec681f3Smrg assert(src.pitch[0] == dst.pitch[0]); 3167ec681f3Smrg vector_t v = {}; 3177ec681f3Smrg 3187ec681f3Smrg for (v[2] = 0; v[2] < region[2]; ++v[2]) { 3197ec681f3Smrg for (v[1] = 0; v[1] < region[1]; ++v[1]) { 3207ec681f3Smrg std::memcpy( 3217ec681f3Smrg static_cast<char *>(dst) + dot(dst.pitch, v), 3227ec681f3Smrg static_cast<const char *>(src) + dot(src.pitch, v), 3237ec681f3Smrg src.pitch[0] * region[0]); 3247ec681f3Smrg } 3257ec681f3Smrg } 3267ec681f3Smrg }; 3277ec681f3Smrg } 3287ec681f3Smrg 3297ec681f3Smrg /// 3307ec681f3Smrg /// Hardware copy from \a src_obj to \a dst_obj. 3317ec681f3Smrg /// 3327ec681f3Smrg template<typename T, typename S> 3337ec681f3Smrg std::function<void (event &)> 3347ec681f3Smrg hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig, 3357ec681f3Smrg S src_obj, const vector_t &src_orig, const vector_t ®ion) { 3367ec681f3Smrg return [=, &q](event &) { 3377ec681f3Smrg dst_obj->resource_in(q).copy(q, dst_orig, region, 3387ec681f3Smrg src_obj->resource_in(q), src_orig); 3397ec681f3Smrg }; 3407ec681f3Smrg } 3417ec681f3Smrg} 3427ec681f3Smrg 3437ec681f3SmrgCLOVER_API cl_int 3447ec681f3SmrgclEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 3457ec681f3Smrg size_t offset, size_t size, void *ptr, 3467ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 3477ec681f3Smrg cl_event *rd_ev) try { 3487ec681f3Smrg auto &q = obj(d_q); 3497ec681f3Smrg auto &mem = obj<buffer>(d_mem); 3507ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 3517ec681f3Smrg vector_t region = { size, 1, 1 }; 3527ec681f3Smrg vector_t obj_origin = { offset }; 3537ec681f3Smrg auto obj_pitch = pitch(region, {{ 1 }}); 3547ec681f3Smrg 3557ec681f3Smrg validate_common(q, deps); 3567ec681f3Smrg validate_object(q, ptr, {}, obj_pitch, region); 3577ec681f3Smrg validate_object(q, mem, obj_origin, obj_pitch, region); 3587ec681f3Smrg validate_object_access(mem, CL_MEM_HOST_READ_ONLY); 3597ec681f3Smrg 3607ec681f3Smrg auto hev = create<hard_event>( 3617ec681f3Smrg q, CL_COMMAND_READ_BUFFER, deps, 3627ec681f3Smrg soft_copy_op(q, ptr, {}, obj_pitch, 3637ec681f3Smrg &mem, obj_origin, obj_pitch, 3647ec681f3Smrg region)); 3657ec681f3Smrg 3667ec681f3Smrg if (blocking) 3677ec681f3Smrg hev().wait_signalled(); 3687ec681f3Smrg 3697ec681f3Smrg ret_object(rd_ev, hev); 3707ec681f3Smrg return CL_SUCCESS; 3717ec681f3Smrg 3727ec681f3Smrg} catch (error &e) { 3737ec681f3Smrg return e.get(); 3747ec681f3Smrg} 3757ec681f3Smrg 3767ec681f3SmrgCLOVER_API cl_int 3777ec681f3SmrgclEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 3787ec681f3Smrg size_t offset, size_t size, const void *ptr, 3797ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 3807ec681f3Smrg cl_event *rd_ev) try { 3817ec681f3Smrg auto &q = obj(d_q); 3827ec681f3Smrg auto &mem = obj<buffer>(d_mem); 3837ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 3847ec681f3Smrg vector_t region = { size, 1, 1 }; 3857ec681f3Smrg vector_t obj_origin = { offset }; 3867ec681f3Smrg auto obj_pitch = pitch(region, {{ 1 }}); 3877ec681f3Smrg 3887ec681f3Smrg validate_common(q, deps); 3897ec681f3Smrg validate_object(q, mem, obj_origin, obj_pitch, region); 3907ec681f3Smrg validate_object(q, ptr, {}, obj_pitch, region); 3917ec681f3Smrg validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY); 3927ec681f3Smrg 3937ec681f3Smrg auto hev = create<hard_event>( 3947ec681f3Smrg q, CL_COMMAND_WRITE_BUFFER, deps, 3957ec681f3Smrg soft_copy_op(q, &mem, obj_origin, obj_pitch, 3967ec681f3Smrg ptr, {}, obj_pitch, 3977ec681f3Smrg region)); 3987ec681f3Smrg 3997ec681f3Smrg if (blocking) 4007ec681f3Smrg hev().wait_signalled(); 4017ec681f3Smrg 4027ec681f3Smrg ret_object(rd_ev, hev); 4037ec681f3Smrg return CL_SUCCESS; 4047ec681f3Smrg 4057ec681f3Smrg} catch (error &e) { 4067ec681f3Smrg return e.get(); 4077ec681f3Smrg} 4087ec681f3Smrg 4097ec681f3SmrgCLOVER_API cl_int 4107ec681f3SmrgclEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 4117ec681f3Smrg const size_t *p_obj_origin, 4127ec681f3Smrg const size_t *p_host_origin, 4137ec681f3Smrg const size_t *p_region, 4147ec681f3Smrg size_t obj_row_pitch, size_t obj_slice_pitch, 4157ec681f3Smrg size_t host_row_pitch, size_t host_slice_pitch, 4167ec681f3Smrg void *ptr, 4177ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 4187ec681f3Smrg cl_event *rd_ev) try { 4197ec681f3Smrg auto &q = obj(d_q); 4207ec681f3Smrg auto &mem = obj<buffer>(d_mem); 4217ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 4227ec681f3Smrg auto region = vector(p_region); 4237ec681f3Smrg auto obj_origin = vector(p_obj_origin); 4247ec681f3Smrg auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }}); 4257ec681f3Smrg auto host_origin = vector(p_host_origin); 4267ec681f3Smrg auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }}); 4277ec681f3Smrg 4287ec681f3Smrg validate_common(q, deps); 4297ec681f3Smrg validate_object(q, ptr, host_origin, host_pitch, region); 4307ec681f3Smrg validate_object(q, mem, obj_origin, obj_pitch, region); 4317ec681f3Smrg validate_object_access(mem, CL_MEM_HOST_READ_ONLY); 4327ec681f3Smrg 4337ec681f3Smrg auto hev = create<hard_event>( 4347ec681f3Smrg q, CL_COMMAND_READ_BUFFER_RECT, deps, 4357ec681f3Smrg soft_copy_op(q, ptr, host_origin, host_pitch, 4367ec681f3Smrg &mem, obj_origin, obj_pitch, 4377ec681f3Smrg region)); 4387ec681f3Smrg 4397ec681f3Smrg if (blocking) 4407ec681f3Smrg hev().wait_signalled(); 4417ec681f3Smrg 4427ec681f3Smrg ret_object(rd_ev, hev); 4437ec681f3Smrg return CL_SUCCESS; 4447ec681f3Smrg 4457ec681f3Smrg} catch (error &e) { 4467ec681f3Smrg return e.get(); 4477ec681f3Smrg} 4487ec681f3Smrg 4497ec681f3SmrgCLOVER_API cl_int 4507ec681f3SmrgclEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 4517ec681f3Smrg const size_t *p_obj_origin, 4527ec681f3Smrg const size_t *p_host_origin, 4537ec681f3Smrg const size_t *p_region, 4547ec681f3Smrg size_t obj_row_pitch, size_t obj_slice_pitch, 4557ec681f3Smrg size_t host_row_pitch, size_t host_slice_pitch, 4567ec681f3Smrg const void *ptr, 4577ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 4587ec681f3Smrg cl_event *rd_ev) try { 4597ec681f3Smrg auto &q = obj(d_q); 4607ec681f3Smrg auto &mem = obj<buffer>(d_mem); 4617ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 4627ec681f3Smrg auto region = vector(p_region); 4637ec681f3Smrg auto obj_origin = vector(p_obj_origin); 4647ec681f3Smrg auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }}); 4657ec681f3Smrg auto host_origin = vector(p_host_origin); 4667ec681f3Smrg auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }}); 4677ec681f3Smrg 4687ec681f3Smrg validate_common(q, deps); 4697ec681f3Smrg validate_object(q, mem, obj_origin, obj_pitch, region); 4707ec681f3Smrg validate_object(q, ptr, host_origin, host_pitch, region); 4717ec681f3Smrg validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY); 4727ec681f3Smrg 4737ec681f3Smrg auto hev = create<hard_event>( 4747ec681f3Smrg q, CL_COMMAND_WRITE_BUFFER_RECT, deps, 4757ec681f3Smrg soft_copy_op(q, &mem, obj_origin, obj_pitch, 4767ec681f3Smrg ptr, host_origin, host_pitch, 4777ec681f3Smrg region)); 4787ec681f3Smrg 4797ec681f3Smrg if (blocking) 4807ec681f3Smrg hev().wait_signalled(); 4817ec681f3Smrg 4827ec681f3Smrg ret_object(rd_ev, hev); 4837ec681f3Smrg return CL_SUCCESS; 4847ec681f3Smrg 4857ec681f3Smrg} catch (error &e) { 4867ec681f3Smrg return e.get(); 4877ec681f3Smrg} 4887ec681f3Smrg 4897ec681f3SmrgCLOVER_API cl_int 4907ec681f3SmrgclEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem, 4917ec681f3Smrg const void *pattern, size_t pattern_size, 4927ec681f3Smrg size_t offset, size_t size, 4937ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 4947ec681f3Smrg cl_event *rd_ev) try { 4957ec681f3Smrg auto &q = obj(d_queue); 4967ec681f3Smrg auto &mem = obj<buffer>(d_mem); 4977ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 4987ec681f3Smrg vector_t region = { size, 1, 1 }; 4997ec681f3Smrg vector_t origin = { offset }; 5007ec681f3Smrg auto dst_pitch = pitch(region, {{ 1 }}); 5017ec681f3Smrg 5027ec681f3Smrg validate_common(q, deps); 5037ec681f3Smrg validate_object(q, mem, origin, dst_pitch, region); 5047ec681f3Smrg 5057ec681f3Smrg if (!pattern) 5067ec681f3Smrg return CL_INVALID_VALUE; 5077ec681f3Smrg 5087ec681f3Smrg if (!util_is_power_of_two_nonzero(pattern_size) || 5097ec681f3Smrg pattern_size > 128 || size % pattern_size 5107ec681f3Smrg || offset % pattern_size) { 5117ec681f3Smrg return CL_INVALID_VALUE; 5127ec681f3Smrg } 5137ec681f3Smrg 5147ec681f3Smrg auto sub = dynamic_cast<sub_buffer *>(&mem); 5157ec681f3Smrg if (sub && sub->offset() % q.device().mem_base_addr_align()) { 5167ec681f3Smrg return CL_MISALIGNED_SUB_BUFFER_OFFSET; 5177ec681f3Smrg } 5187ec681f3Smrg 5197ec681f3Smrg std::string data = std::string((char *)pattern, pattern_size); 5207ec681f3Smrg auto hev = create<hard_event>( 5217ec681f3Smrg q, CL_COMMAND_FILL_BUFFER, deps, 5227ec681f3Smrg [=, &q, &mem](event &) { 5237ec681f3Smrg mem.resource_in(q).clear(q, origin, region, data); 5247ec681f3Smrg }); 5257ec681f3Smrg 5267ec681f3Smrg ret_object(rd_ev, hev); 5277ec681f3Smrg return CL_SUCCESS; 5287ec681f3Smrg 5297ec681f3Smrg} catch (error &e) { 5307ec681f3Smrg return e.get(); 5317ec681f3Smrg} 5327ec681f3Smrg 5337ec681f3SmrgCLOVER_API cl_int 5347ec681f3SmrgclEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem, 5357ec681f3Smrg size_t src_offset, size_t dst_offset, size_t size, 5367ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 5377ec681f3Smrg cl_event *rd_ev) try { 5387ec681f3Smrg auto &q = obj(d_q); 5397ec681f3Smrg auto &src_mem = obj<buffer>(d_src_mem); 5407ec681f3Smrg auto &dst_mem = obj<buffer>(d_dst_mem); 5417ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 5427ec681f3Smrg vector_t region = { size, 1, 1 }; 5437ec681f3Smrg vector_t dst_origin = { dst_offset }; 5447ec681f3Smrg auto dst_pitch = pitch(region, {{ 1 }}); 5457ec681f3Smrg vector_t src_origin = { src_offset }; 5467ec681f3Smrg auto src_pitch = pitch(region, {{ 1 }}); 5477ec681f3Smrg 5487ec681f3Smrg validate_common(q, deps); 5497ec681f3Smrg validate_object(q, dst_mem, dst_origin, dst_pitch, region); 5507ec681f3Smrg validate_object(q, src_mem, src_origin, src_pitch, region); 5517ec681f3Smrg validate_copy(q, dst_mem, dst_origin, dst_pitch, 5527ec681f3Smrg src_mem, src_origin, src_pitch, region); 5537ec681f3Smrg 5547ec681f3Smrg auto hev = create<hard_event>( 5557ec681f3Smrg q, CL_COMMAND_COPY_BUFFER, deps, 5567ec681f3Smrg hard_copy_op(q, &dst_mem, dst_origin, 5577ec681f3Smrg &src_mem, src_origin, region)); 5587ec681f3Smrg 5597ec681f3Smrg ret_object(rd_ev, hev); 5607ec681f3Smrg return CL_SUCCESS; 5617ec681f3Smrg 5627ec681f3Smrg} catch (error &e) { 5637ec681f3Smrg return e.get(); 5647ec681f3Smrg} 5657ec681f3Smrg 5667ec681f3SmrgCLOVER_API cl_int 5677ec681f3SmrgclEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem, 5687ec681f3Smrg cl_mem d_dst_mem, 5697ec681f3Smrg const size_t *p_src_origin, const size_t *p_dst_origin, 5707ec681f3Smrg const size_t *p_region, 5717ec681f3Smrg size_t src_row_pitch, size_t src_slice_pitch, 5727ec681f3Smrg size_t dst_row_pitch, size_t dst_slice_pitch, 5737ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 5747ec681f3Smrg cl_event *rd_ev) try { 5757ec681f3Smrg auto &q = obj(d_q); 5767ec681f3Smrg auto &src_mem = obj<buffer>(d_src_mem); 5777ec681f3Smrg auto &dst_mem = obj<buffer>(d_dst_mem); 5787ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 5797ec681f3Smrg auto region = vector(p_region); 5807ec681f3Smrg auto dst_origin = vector(p_dst_origin); 5817ec681f3Smrg auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }}); 5827ec681f3Smrg auto src_origin = vector(p_src_origin); 5837ec681f3Smrg auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }}); 5847ec681f3Smrg 5857ec681f3Smrg validate_common(q, deps); 5867ec681f3Smrg validate_object(q, dst_mem, dst_origin, dst_pitch, region); 5877ec681f3Smrg validate_object(q, src_mem, src_origin, src_pitch, region); 5887ec681f3Smrg validate_copy(q, dst_mem, dst_origin, dst_pitch, 5897ec681f3Smrg src_mem, src_origin, src_pitch, region); 5907ec681f3Smrg 5917ec681f3Smrg auto hev = create<hard_event>( 5927ec681f3Smrg q, CL_COMMAND_COPY_BUFFER_RECT, deps, 5937ec681f3Smrg soft_copy_op(q, &dst_mem, dst_origin, dst_pitch, 5947ec681f3Smrg &src_mem, src_origin, src_pitch, 5957ec681f3Smrg region)); 5967ec681f3Smrg 5977ec681f3Smrg ret_object(rd_ev, hev); 5987ec681f3Smrg return CL_SUCCESS; 5997ec681f3Smrg 6007ec681f3Smrg} catch (error &e) { 6017ec681f3Smrg return e.get(); 6027ec681f3Smrg} 6037ec681f3Smrg 6047ec681f3SmrgCLOVER_API cl_int 6057ec681f3SmrgclEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 6067ec681f3Smrg const size_t *p_origin, const size_t *p_region, 6077ec681f3Smrg size_t row_pitch, size_t slice_pitch, void *ptr, 6087ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 6097ec681f3Smrg cl_event *rd_ev) try { 6107ec681f3Smrg auto &q = obj(d_q); 6117ec681f3Smrg auto &img = obj<image>(d_mem); 6127ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 6137ec681f3Smrg auto region = vector(p_region); 6147ec681f3Smrg auto dst_pitch = pitch(region, {{ img.pixel_size(), 6157ec681f3Smrg row_pitch, slice_pitch }}); 6167ec681f3Smrg auto src_origin = vector(p_origin); 6177ec681f3Smrg auto src_pitch = pitch(region, {{ img.pixel_size(), 6187ec681f3Smrg img.row_pitch(), img.slice_pitch() }}); 6197ec681f3Smrg 6207ec681f3Smrg validate_common(q, deps); 6217ec681f3Smrg validate_object(q, ptr, {}, dst_pitch, region); 6227ec681f3Smrg validate_object(q, img, src_origin, region); 6237ec681f3Smrg validate_object_access(img, CL_MEM_HOST_READ_ONLY); 6247ec681f3Smrg 6257ec681f3Smrg auto hev = create<hard_event>( 6267ec681f3Smrg q, CL_COMMAND_READ_IMAGE, deps, 6277ec681f3Smrg soft_copy_op(q, ptr, {}, dst_pitch, 6287ec681f3Smrg &img, src_origin, src_pitch, 6297ec681f3Smrg region)); 6307ec681f3Smrg 6317ec681f3Smrg if (blocking) 6327ec681f3Smrg hev().wait_signalled(); 6337ec681f3Smrg 6347ec681f3Smrg ret_object(rd_ev, hev); 6357ec681f3Smrg return CL_SUCCESS; 6367ec681f3Smrg 6377ec681f3Smrg} catch (error &e) { 6387ec681f3Smrg return e.get(); 6397ec681f3Smrg} 6407ec681f3Smrg 6417ec681f3SmrgCLOVER_API cl_int 6427ec681f3SmrgclEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 6437ec681f3Smrg const size_t *p_origin, const size_t *p_region, 6447ec681f3Smrg size_t row_pitch, size_t slice_pitch, const void *ptr, 6457ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 6467ec681f3Smrg cl_event *rd_ev) try { 6477ec681f3Smrg auto &q = obj(d_q); 6487ec681f3Smrg auto &img = obj<image>(d_mem); 6497ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 6507ec681f3Smrg auto region = vector(p_region); 6517ec681f3Smrg auto dst_origin = vector(p_origin); 6527ec681f3Smrg auto dst_pitch = pitch(region, {{ img.pixel_size(), 6537ec681f3Smrg img.row_pitch(), img.slice_pitch() }}); 6547ec681f3Smrg auto src_pitch = pitch(region, {{ img.pixel_size(), 6557ec681f3Smrg row_pitch, slice_pitch }}); 6567ec681f3Smrg 6577ec681f3Smrg validate_common(q, deps); 6587ec681f3Smrg validate_object(q, img, dst_origin, region); 6597ec681f3Smrg validate_object(q, ptr, {}, src_pitch, region); 6607ec681f3Smrg validate_object_access(img, CL_MEM_HOST_WRITE_ONLY); 6617ec681f3Smrg 6627ec681f3Smrg auto hev = create<hard_event>( 6637ec681f3Smrg q, CL_COMMAND_WRITE_IMAGE, deps, 6647ec681f3Smrg soft_copy_op(q, &img, dst_origin, dst_pitch, 6657ec681f3Smrg ptr, {}, src_pitch, 6667ec681f3Smrg region)); 6677ec681f3Smrg 6687ec681f3Smrg if (blocking) 6697ec681f3Smrg hev().wait_signalled(); 6707ec681f3Smrg 6717ec681f3Smrg ret_object(rd_ev, hev); 6727ec681f3Smrg return CL_SUCCESS; 6737ec681f3Smrg 6747ec681f3Smrg} catch (error &e) { 6757ec681f3Smrg return e.get(); 6767ec681f3Smrg} 6777ec681f3Smrg 6787ec681f3SmrgCLOVER_API cl_int 6797ec681f3SmrgclEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem, 6807ec681f3Smrg const void *fill_color, 6817ec681f3Smrg const size_t *p_origin, const size_t *p_region, 6827ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 6837ec681f3Smrg cl_event *rd_ev) try { 6847ec681f3Smrg auto &q = obj(d_queue); 6857ec681f3Smrg auto &img = obj<image>(d_mem); 6867ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 6877ec681f3Smrg auto origin = vector(p_origin); 6887ec681f3Smrg auto region = vector(p_region); 6897ec681f3Smrg 6907ec681f3Smrg validate_common(q, deps); 6917ec681f3Smrg validate_object(q, img, origin, region); 6927ec681f3Smrg 6937ec681f3Smrg if (!fill_color) 6947ec681f3Smrg return CL_INVALID_VALUE; 6957ec681f3Smrg 6967ec681f3Smrg std::string data = std::string((char *)fill_color, sizeof(cl_uint4)); 6977ec681f3Smrg auto hev = create<hard_event>( 6987ec681f3Smrg q, CL_COMMAND_FILL_IMAGE, deps, 6997ec681f3Smrg [=, &q, &img](event &) { 7007ec681f3Smrg img.resource_in(q).clear(q, origin, region, data); 7017ec681f3Smrg }); 7027ec681f3Smrg 7037ec681f3Smrg ret_object(rd_ev, hev); 7047ec681f3Smrg return CL_SUCCESS; 7057ec681f3Smrg 7067ec681f3Smrg} catch (error &e) { 7077ec681f3Smrg return e.get(); 7087ec681f3Smrg} 7097ec681f3Smrg 7107ec681f3SmrgCLOVER_API cl_int 7117ec681f3SmrgclEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem, 7127ec681f3Smrg const size_t *p_src_origin, const size_t *p_dst_origin, 7137ec681f3Smrg const size_t *p_region, 7147ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 7157ec681f3Smrg cl_event *rd_ev) try { 7167ec681f3Smrg auto &q = obj(d_q); 7177ec681f3Smrg auto &src_img = obj<image>(d_src_mem); 7187ec681f3Smrg auto &dst_img = obj<image>(d_dst_mem); 7197ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 7207ec681f3Smrg auto region = vector(p_region); 7217ec681f3Smrg auto dst_origin = vector(p_dst_origin); 7227ec681f3Smrg auto src_origin = vector(p_src_origin); 7237ec681f3Smrg 7247ec681f3Smrg validate_common(q, deps); 7257ec681f3Smrg validate_object(q, dst_img, dst_origin, region); 7267ec681f3Smrg validate_object(q, src_img, src_origin, region); 7277ec681f3Smrg validate_copy(q, dst_img, dst_origin, src_img, src_origin, region); 7287ec681f3Smrg 7297ec681f3Smrg auto hev = create<hard_event>( 7307ec681f3Smrg q, CL_COMMAND_COPY_IMAGE, deps, 7317ec681f3Smrg hard_copy_op(q, &dst_img, dst_origin, 7327ec681f3Smrg &src_img, src_origin, 7337ec681f3Smrg region)); 7347ec681f3Smrg 7357ec681f3Smrg ret_object(rd_ev, hev); 7367ec681f3Smrg return CL_SUCCESS; 7377ec681f3Smrg 7387ec681f3Smrg} catch (error &e) { 7397ec681f3Smrg return e.get(); 7407ec681f3Smrg} 7417ec681f3Smrg 7427ec681f3SmrgCLOVER_API cl_int 7437ec681f3SmrgclEnqueueCopyImageToBuffer(cl_command_queue d_q, 7447ec681f3Smrg cl_mem d_src_mem, cl_mem d_dst_mem, 7457ec681f3Smrg const size_t *p_src_origin, const size_t *p_region, 7467ec681f3Smrg size_t dst_offset, 7477ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 7487ec681f3Smrg cl_event *rd_ev) try { 7497ec681f3Smrg auto &q = obj(d_q); 7507ec681f3Smrg auto &src_img = obj<image>(d_src_mem); 7517ec681f3Smrg auto &dst_mem = obj<buffer>(d_dst_mem); 7527ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 7537ec681f3Smrg auto region = vector(p_region); 7547ec681f3Smrg vector_t dst_origin = { dst_offset }; 7557ec681f3Smrg auto dst_pitch = pitch(region, {{ src_img.pixel_size() }}); 7567ec681f3Smrg auto src_origin = vector(p_src_origin); 7577ec681f3Smrg auto src_pitch = pitch(region, {{ src_img.pixel_size(), 7587ec681f3Smrg src_img.row_pitch(), 7597ec681f3Smrg src_img.slice_pitch() }}); 7607ec681f3Smrg 7617ec681f3Smrg validate_common(q, deps); 7627ec681f3Smrg validate_object(q, dst_mem, dst_origin, dst_pitch, region); 7637ec681f3Smrg validate_object(q, src_img, src_origin, region); 7647ec681f3Smrg 7657ec681f3Smrg auto hev = create<hard_event>( 7667ec681f3Smrg q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps, 7677ec681f3Smrg soft_copy_op(q, &dst_mem, dst_origin, dst_pitch, 7687ec681f3Smrg &src_img, src_origin, src_pitch, 7697ec681f3Smrg region)); 7707ec681f3Smrg 7717ec681f3Smrg ret_object(rd_ev, hev); 7727ec681f3Smrg return CL_SUCCESS; 7737ec681f3Smrg 7747ec681f3Smrg} catch (error &e) { 7757ec681f3Smrg return e.get(); 7767ec681f3Smrg} 7777ec681f3Smrg 7787ec681f3SmrgCLOVER_API cl_int 7797ec681f3SmrgclEnqueueCopyBufferToImage(cl_command_queue d_q, 7807ec681f3Smrg cl_mem d_src_mem, cl_mem d_dst_mem, 7817ec681f3Smrg size_t src_offset, 7827ec681f3Smrg const size_t *p_dst_origin, const size_t *p_region, 7837ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 7847ec681f3Smrg cl_event *rd_ev) try { 7857ec681f3Smrg auto &q = obj(d_q); 7867ec681f3Smrg auto &src_mem = obj<buffer>(d_src_mem); 7877ec681f3Smrg auto &dst_img = obj<image>(d_dst_mem); 7887ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 7897ec681f3Smrg auto region = vector(p_region); 7907ec681f3Smrg auto dst_origin = vector(p_dst_origin); 7917ec681f3Smrg auto dst_pitch = pitch(region, {{ dst_img.pixel_size(), 7927ec681f3Smrg dst_img.row_pitch(), 7937ec681f3Smrg dst_img.slice_pitch() }}); 7947ec681f3Smrg vector_t src_origin = { src_offset }; 7957ec681f3Smrg auto src_pitch = pitch(region, {{ dst_img.pixel_size() }}); 7967ec681f3Smrg 7977ec681f3Smrg validate_common(q, deps); 7987ec681f3Smrg validate_object(q, dst_img, dst_origin, region); 7997ec681f3Smrg validate_object(q, src_mem, src_origin, src_pitch, region); 8007ec681f3Smrg 8017ec681f3Smrg auto hev = create<hard_event>( 8027ec681f3Smrg q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps, 8037ec681f3Smrg soft_copy_op(q, &dst_img, dst_origin, dst_pitch, 8047ec681f3Smrg &src_mem, src_origin, src_pitch, 8057ec681f3Smrg region)); 8067ec681f3Smrg 8077ec681f3Smrg ret_object(rd_ev, hev); 8087ec681f3Smrg return CL_SUCCESS; 8097ec681f3Smrg 8107ec681f3Smrg} catch (error &e) { 8117ec681f3Smrg return e.get(); 8127ec681f3Smrg} 8137ec681f3Smrg 8147ec681f3SmrgCLOVER_API void * 8157ec681f3SmrgclEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 8167ec681f3Smrg cl_map_flags flags, size_t offset, size_t size, 8177ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 8187ec681f3Smrg cl_event *rd_ev, cl_int *r_errcode) try { 8197ec681f3Smrg auto &q = obj(d_q); 8207ec681f3Smrg auto &mem = obj<buffer>(d_mem); 8217ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 8227ec681f3Smrg vector_t region = { size, 1, 1 }; 8237ec681f3Smrg vector_t obj_origin = { offset }; 8247ec681f3Smrg auto obj_pitch = pitch(region, {{ 1 }}); 8257ec681f3Smrg 8267ec681f3Smrg validate_common(q, deps); 8277ec681f3Smrg validate_object(q, mem, obj_origin, obj_pitch, region); 8287ec681f3Smrg validate_map_flags(mem, flags); 8297ec681f3Smrg 8307ec681f3Smrg auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region); 8317ec681f3Smrg 8327ec681f3Smrg auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps); 8337ec681f3Smrg if (blocking) 8347ec681f3Smrg hev().wait_signalled(); 8357ec681f3Smrg 8367ec681f3Smrg ret_object(rd_ev, hev); 8377ec681f3Smrg ret_error(r_errcode, CL_SUCCESS); 8387ec681f3Smrg return *map; 8397ec681f3Smrg 8407ec681f3Smrg} catch (error &e) { 8417ec681f3Smrg ret_error(r_errcode, e); 8427ec681f3Smrg return NULL; 8437ec681f3Smrg} 8447ec681f3Smrg 8457ec681f3SmrgCLOVER_API void * 8467ec681f3SmrgclEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 8477ec681f3Smrg cl_map_flags flags, 8487ec681f3Smrg const size_t *p_origin, const size_t *p_region, 8497ec681f3Smrg size_t *row_pitch, size_t *slice_pitch, 8507ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 8517ec681f3Smrg cl_event *rd_ev, cl_int *r_errcode) try { 8527ec681f3Smrg auto &q = obj(d_q); 8537ec681f3Smrg auto &img = obj<image>(d_mem); 8547ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 8557ec681f3Smrg auto region = vector(p_region); 8567ec681f3Smrg auto origin = vector(p_origin); 8577ec681f3Smrg 8587ec681f3Smrg validate_common(q, deps); 8597ec681f3Smrg validate_object(q, img, origin, region); 8607ec681f3Smrg validate_map_flags(img, flags); 8617ec681f3Smrg 8627ec681f3Smrg if (!row_pitch) 8637ec681f3Smrg throw error(CL_INVALID_VALUE); 8647ec681f3Smrg 8657ec681f3Smrg if (img.slice_pitch() && !slice_pitch) 8667ec681f3Smrg throw error(CL_INVALID_VALUE); 8677ec681f3Smrg 8687ec681f3Smrg auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region); 8697ec681f3Smrg *row_pitch = map->pitch()[1]; 8707ec681f3Smrg if (slice_pitch) 8717ec681f3Smrg *slice_pitch = map->pitch()[2]; 8727ec681f3Smrg 8737ec681f3Smrg auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps); 8747ec681f3Smrg if (blocking) 8757ec681f3Smrg hev().wait_signalled(); 8767ec681f3Smrg 8777ec681f3Smrg ret_object(rd_ev, hev); 8787ec681f3Smrg ret_error(r_errcode, CL_SUCCESS); 8797ec681f3Smrg return *map; 8807ec681f3Smrg 8817ec681f3Smrg} catch (error &e) { 8827ec681f3Smrg ret_error(r_errcode, e); 8837ec681f3Smrg return NULL; 8847ec681f3Smrg} 8857ec681f3Smrg 8867ec681f3SmrgCLOVER_API cl_int 8877ec681f3SmrgclEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr, 8887ec681f3Smrg cl_uint num_deps, const cl_event *d_deps, 8897ec681f3Smrg cl_event *rd_ev) try { 8907ec681f3Smrg auto &q = obj(d_q); 8917ec681f3Smrg auto &mem = obj(d_mem); 8927ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 8937ec681f3Smrg 8947ec681f3Smrg validate_common(q, deps); 8957ec681f3Smrg 8967ec681f3Smrg auto hev = create<hard_event>( 8977ec681f3Smrg q, CL_COMMAND_UNMAP_MEM_OBJECT, deps, 8987ec681f3Smrg [=, &q, &mem](event &) { 8997ec681f3Smrg mem.resource_in(q).del_map(ptr); 9007ec681f3Smrg }); 9017ec681f3Smrg 9027ec681f3Smrg ret_object(rd_ev, hev); 9037ec681f3Smrg return CL_SUCCESS; 9047ec681f3Smrg 9057ec681f3Smrg} catch (error &e) { 9067ec681f3Smrg return e.get(); 9077ec681f3Smrg} 9087ec681f3Smrg 9097ec681f3SmrgCLOVER_API cl_int 9107ec681f3SmrgclEnqueueMigrateMemObjects(cl_command_queue d_q, 9117ec681f3Smrg cl_uint num_mems, 9127ec681f3Smrg const cl_mem *d_mems, 9137ec681f3Smrg cl_mem_migration_flags flags, 9147ec681f3Smrg cl_uint num_deps, 9157ec681f3Smrg const cl_event *d_deps, 9167ec681f3Smrg cl_event *rd_ev) try { 9177ec681f3Smrg auto &q = obj(d_q); 9187ec681f3Smrg auto mems = objs<memory_obj>(d_mems, num_mems); 9197ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 9207ec681f3Smrg 9217ec681f3Smrg validate_common(q, deps); 9227ec681f3Smrg validate_mem_migration_flags(flags); 9237ec681f3Smrg 9247ec681f3Smrg if (any_of([&](const memory_obj &m) { 9257ec681f3Smrg return m.context() != q.context(); 9267ec681f3Smrg }, mems)) 9277ec681f3Smrg throw error(CL_INVALID_CONTEXT); 9287ec681f3Smrg 9297ec681f3Smrg auto hev = create<hard_event>( 9307ec681f3Smrg q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps, 9317ec681f3Smrg [=, &q](event &) { 9327ec681f3Smrg for (auto &mem: mems) { 9337ec681f3Smrg if (flags & CL_MIGRATE_MEM_OBJECT_HOST) { 9347ec681f3Smrg if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) 9357ec681f3Smrg mem.resource_out(q); 9367ec681f3Smrg 9377ec681f3Smrg // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be 9387ec681f3Smrg // efficient we would need cl*ReadBuffer* to implement 9397ec681f3Smrg // reading from host memory. 9407ec681f3Smrg 9417ec681f3Smrg } else { 9427ec681f3Smrg if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) 9437ec681f3Smrg mem.resource_undef(q); 9447ec681f3Smrg else 9457ec681f3Smrg mem.resource_in(q); 9467ec681f3Smrg } 9477ec681f3Smrg } 9487ec681f3Smrg }); 9497ec681f3Smrg 9507ec681f3Smrg ret_object(rd_ev, hev); 9517ec681f3Smrg return CL_SUCCESS;; 9527ec681f3Smrg 9537ec681f3Smrg} catch (error &e) { 9547ec681f3Smrg return e.get(); 9557ec681f3Smrg} 9567ec681f3Smrg 9577ec681f3Smrgcl_int 9587ec681f3Smrgclover::EnqueueSVMFree(cl_command_queue d_q, 9597ec681f3Smrg cl_uint num_svm_pointers, 9607ec681f3Smrg void *svm_pointers[], 9617ec681f3Smrg void (CL_CALLBACK *pfn_free_func) ( 9627ec681f3Smrg cl_command_queue queue, cl_uint num_svm_pointers, 9637ec681f3Smrg void *svm_pointers[], void *user_data), 9647ec681f3Smrg void *user_data, 9657ec681f3Smrg cl_uint num_events_in_wait_list, 9667ec681f3Smrg const cl_event *event_wait_list, 9677ec681f3Smrg cl_event *event, 9687ec681f3Smrg cl_int cmd) try { 9697ec681f3Smrg 9707ec681f3Smrg if (bool(num_svm_pointers) != bool(svm_pointers)) 9717ec681f3Smrg return CL_INVALID_VALUE; 9727ec681f3Smrg 9737ec681f3Smrg auto &q = obj(d_q); 9747ec681f3Smrg 9757ec681f3Smrg if (!q.device().svm_support()) 9767ec681f3Smrg return CL_INVALID_OPERATION; 9777ec681f3Smrg 9787ec681f3Smrg bool can_emulate = q.device().has_system_svm(); 9797ec681f3Smrg auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 9807ec681f3Smrg 9817ec681f3Smrg validate_common(q, deps); 9827ec681f3Smrg 9837ec681f3Smrg std::vector<void *> svm_pointers_cpy(svm_pointers, 9847ec681f3Smrg svm_pointers + num_svm_pointers); 9857ec681f3Smrg if (!pfn_free_func) { 9867ec681f3Smrg if (!can_emulate) { 9877ec681f3Smrg CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 9887ec681f3Smrg return CL_INVALID_VALUE; 9897ec681f3Smrg } 9907ec681f3Smrg pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers, 9917ec681f3Smrg void *svm_pointers[], void *) { 9927ec681f3Smrg clover::context &ctx = obj(d_q).context(); 9937ec681f3Smrg for (void *p : range(svm_pointers, num_svm_pointers)) { 9947ec681f3Smrg ctx.remove_svm_allocation(p); 9957ec681f3Smrg free(p); 9967ec681f3Smrg } 9977ec681f3Smrg }; 9987ec681f3Smrg } 9997ec681f3Smrg 10007ec681f3Smrg auto hev = create<hard_event>(q, cmd, deps, 10017ec681f3Smrg [=](clover::event &) mutable { 10027ec681f3Smrg pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(), 10037ec681f3Smrg user_data); 10047ec681f3Smrg }); 10057ec681f3Smrg 10067ec681f3Smrg ret_object(event, hev); 10077ec681f3Smrg return CL_SUCCESS; 10087ec681f3Smrg 10097ec681f3Smrg} catch (error &e) { 10107ec681f3Smrg return e.get(); 10117ec681f3Smrg} 10127ec681f3Smrg 10137ec681f3SmrgCLOVER_API cl_int 10147ec681f3SmrgclEnqueueSVMFree(cl_command_queue d_q, 10157ec681f3Smrg cl_uint num_svm_pointers, 10167ec681f3Smrg void *svm_pointers[], 10177ec681f3Smrg void (CL_CALLBACK *pfn_free_func) ( 10187ec681f3Smrg cl_command_queue queue, cl_uint num_svm_pointers, 10197ec681f3Smrg void *svm_pointers[], void *user_data), 10207ec681f3Smrg void *user_data, 10217ec681f3Smrg cl_uint num_events_in_wait_list, 10227ec681f3Smrg const cl_event *event_wait_list, 10237ec681f3Smrg cl_event *event) { 10247ec681f3Smrg 10257ec681f3Smrg return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers, 10267ec681f3Smrg pfn_free_func, user_data, num_events_in_wait_list, 10277ec681f3Smrg event_wait_list, event, CL_COMMAND_SVM_FREE); 10287ec681f3Smrg} 10297ec681f3Smrg 10307ec681f3Smrgcl_int 10317ec681f3Smrgclover::EnqueueSVMMemcpy(cl_command_queue d_q, 10327ec681f3Smrg cl_bool blocking_copy, 10337ec681f3Smrg void *dst_ptr, 10347ec681f3Smrg const void *src_ptr, 10357ec681f3Smrg size_t size, 10367ec681f3Smrg cl_uint num_events_in_wait_list, 10377ec681f3Smrg const cl_event *event_wait_list, 10387ec681f3Smrg cl_event *event, 10397ec681f3Smrg cl_int cmd) try { 10407ec681f3Smrg auto &q = obj(d_q); 10417ec681f3Smrg 10427ec681f3Smrg if (!q.device().svm_support()) 10437ec681f3Smrg return CL_INVALID_OPERATION; 10447ec681f3Smrg 10457ec681f3Smrg if (dst_ptr == nullptr || src_ptr == nullptr) 10467ec681f3Smrg return CL_INVALID_VALUE; 10477ec681f3Smrg 10487ec681f3Smrg if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) - 10497ec681f3Smrg reinterpret_cast<ptrdiff_t>(src_ptr))) < size) 10507ec681f3Smrg return CL_MEM_COPY_OVERLAP; 10517ec681f3Smrg 10527ec681f3Smrg 10537ec681f3Smrg bool can_emulate = q.device().has_system_svm(); 10547ec681f3Smrg auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 10557ec681f3Smrg 10567ec681f3Smrg validate_common(q, deps); 10577ec681f3Smrg 10587ec681f3Smrg if (can_emulate) { 10597ec681f3Smrg auto hev = create<hard_event>(q, cmd, deps, 10607ec681f3Smrg [=](clover::event &) { 10617ec681f3Smrg memcpy(dst_ptr, src_ptr, size); 10627ec681f3Smrg }); 10637ec681f3Smrg 10647ec681f3Smrg if (blocking_copy) 10657ec681f3Smrg hev().wait(); 10667ec681f3Smrg ret_object(event, hev); 10677ec681f3Smrg return CL_SUCCESS; 10687ec681f3Smrg } 10697ec681f3Smrg 10707ec681f3Smrg CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 10717ec681f3Smrg return CL_INVALID_VALUE; 10727ec681f3Smrg 10737ec681f3Smrg} catch (error &e) { 10747ec681f3Smrg return e.get(); 10757ec681f3Smrg} 10767ec681f3Smrg 10777ec681f3SmrgCLOVER_API cl_int 10787ec681f3SmrgclEnqueueSVMMemcpy(cl_command_queue d_q, 10797ec681f3Smrg cl_bool blocking_copy, 10807ec681f3Smrg void *dst_ptr, 10817ec681f3Smrg const void *src_ptr, 10827ec681f3Smrg size_t size, 10837ec681f3Smrg cl_uint num_events_in_wait_list, 10847ec681f3Smrg const cl_event *event_wait_list, 10857ec681f3Smrg cl_event *event) { 10867ec681f3Smrg 10877ec681f3Smrg return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr, 10887ec681f3Smrg size, num_events_in_wait_list, event_wait_list, 10897ec681f3Smrg event, CL_COMMAND_SVM_MEMCPY); 10907ec681f3Smrg} 10917ec681f3Smrg 10927ec681f3Smrgcl_int 10937ec681f3Smrgclover::EnqueueSVMMemFill(cl_command_queue d_q, 10947ec681f3Smrg void *svm_ptr, 10957ec681f3Smrg const void *pattern, 10967ec681f3Smrg size_t pattern_size, 10977ec681f3Smrg size_t size, 10987ec681f3Smrg cl_uint num_events_in_wait_list, 10997ec681f3Smrg const cl_event *event_wait_list, 11007ec681f3Smrg cl_event *event, 11017ec681f3Smrg cl_int cmd) try { 11027ec681f3Smrg auto &q = obj(d_q); 11037ec681f3Smrg 11047ec681f3Smrg if (!q.device().svm_support()) 11057ec681f3Smrg return CL_INVALID_OPERATION; 11067ec681f3Smrg 11077ec681f3Smrg if (svm_ptr == nullptr || pattern == nullptr || 11087ec681f3Smrg !util_is_power_of_two_nonzero(pattern_size) || 11097ec681f3Smrg pattern_size > 128 || 11107ec681f3Smrg !ptr_is_aligned(svm_ptr, pattern_size) || 11117ec681f3Smrg size % pattern_size) 11127ec681f3Smrg return CL_INVALID_VALUE; 11137ec681f3Smrg 11147ec681f3Smrg bool can_emulate = q.device().has_system_svm(); 11157ec681f3Smrg auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 11167ec681f3Smrg 11177ec681f3Smrg validate_common(q, deps); 11187ec681f3Smrg 11197ec681f3Smrg if (can_emulate) { 11207ec681f3Smrg auto hev = create<hard_event>(q, cmd, deps, 11217ec681f3Smrg [=](clover::event &) { 11227ec681f3Smrg void *ptr = svm_ptr; 11237ec681f3Smrg for (size_t s = size; s; s -= pattern_size) { 11247ec681f3Smrg memcpy(ptr, pattern, pattern_size); 11257ec681f3Smrg ptr = static_cast<uint8_t*>(ptr) + pattern_size; 11267ec681f3Smrg } 11277ec681f3Smrg }); 11287ec681f3Smrg 11297ec681f3Smrg ret_object(event, hev); 11307ec681f3Smrg return CL_SUCCESS; 11317ec681f3Smrg } 11327ec681f3Smrg 11337ec681f3Smrg CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 11347ec681f3Smrg return CL_INVALID_VALUE; 11357ec681f3Smrg 11367ec681f3Smrg} catch (error &e) { 11377ec681f3Smrg return e.get(); 11387ec681f3Smrg} 11397ec681f3Smrg 11407ec681f3SmrgCLOVER_API cl_int 11417ec681f3SmrgclEnqueueSVMMemFill(cl_command_queue d_q, 11427ec681f3Smrg void *svm_ptr, 11437ec681f3Smrg const void *pattern, 11447ec681f3Smrg size_t pattern_size, 11457ec681f3Smrg size_t size, 11467ec681f3Smrg cl_uint num_events_in_wait_list, 11477ec681f3Smrg const cl_event *event_wait_list, 11487ec681f3Smrg cl_event *event) { 11497ec681f3Smrg 11507ec681f3Smrg return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size, 11517ec681f3Smrg size, num_events_in_wait_list, event_wait_list, 11527ec681f3Smrg event, CL_COMMAND_SVM_MEMFILL); 11537ec681f3Smrg} 11547ec681f3Smrg 11557ec681f3Smrgcl_int 11567ec681f3Smrgclover::EnqueueSVMMap(cl_command_queue d_q, 11577ec681f3Smrg cl_bool blocking_map, 11587ec681f3Smrg cl_map_flags map_flags, 11597ec681f3Smrg void *svm_ptr, 11607ec681f3Smrg size_t size, 11617ec681f3Smrg cl_uint num_events_in_wait_list, 11627ec681f3Smrg const cl_event *event_wait_list, 11637ec681f3Smrg cl_event *event, 11647ec681f3Smrg cl_int cmd) try { 11657ec681f3Smrg auto &q = obj(d_q); 11667ec681f3Smrg 11677ec681f3Smrg if (!q.device().svm_support()) 11687ec681f3Smrg return CL_INVALID_OPERATION; 11697ec681f3Smrg 11707ec681f3Smrg if (svm_ptr == nullptr || size == 0) 11717ec681f3Smrg return CL_INVALID_VALUE; 11727ec681f3Smrg 11737ec681f3Smrg bool can_emulate = q.device().has_system_svm(); 11747ec681f3Smrg auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 11757ec681f3Smrg 11767ec681f3Smrg validate_common(q, deps); 11777ec681f3Smrg 11787ec681f3Smrg if (can_emulate) { 11797ec681f3Smrg auto hev = create<hard_event>(q, cmd, deps, 11807ec681f3Smrg [](clover::event &) { }); 11817ec681f3Smrg 11827ec681f3Smrg ret_object(event, hev); 11837ec681f3Smrg return CL_SUCCESS; 11847ec681f3Smrg } 11857ec681f3Smrg 11867ec681f3Smrg CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 11877ec681f3Smrg return CL_INVALID_VALUE; 11887ec681f3Smrg 11897ec681f3Smrg} catch (error &e) { 11907ec681f3Smrg return e.get(); 11917ec681f3Smrg} 11927ec681f3Smrg 11937ec681f3SmrgCLOVER_API cl_int 11947ec681f3SmrgclEnqueueSVMMap(cl_command_queue d_q, 11957ec681f3Smrg cl_bool blocking_map, 11967ec681f3Smrg cl_map_flags map_flags, 11977ec681f3Smrg void *svm_ptr, 11987ec681f3Smrg size_t size, 11997ec681f3Smrg cl_uint num_events_in_wait_list, 12007ec681f3Smrg const cl_event *event_wait_list, 12017ec681f3Smrg cl_event *event) { 12027ec681f3Smrg 12037ec681f3Smrg return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size, 12047ec681f3Smrg num_events_in_wait_list, event_wait_list, event, 12057ec681f3Smrg CL_COMMAND_SVM_MAP); 12067ec681f3Smrg} 12077ec681f3Smrg 12087ec681f3Smrgcl_int 12097ec681f3Smrgclover::EnqueueSVMUnmap(cl_command_queue d_q, 12107ec681f3Smrg void *svm_ptr, 12117ec681f3Smrg cl_uint num_events_in_wait_list, 12127ec681f3Smrg const cl_event *event_wait_list, 12137ec681f3Smrg cl_event *event, 12147ec681f3Smrg cl_int cmd) try { 12157ec681f3Smrg auto &q = obj(d_q); 12167ec681f3Smrg 12177ec681f3Smrg if (!q.device().svm_support()) 12187ec681f3Smrg return CL_INVALID_OPERATION; 12197ec681f3Smrg 12207ec681f3Smrg if (svm_ptr == nullptr) 12217ec681f3Smrg return CL_INVALID_VALUE; 12227ec681f3Smrg 12237ec681f3Smrg bool can_emulate = q.device().has_system_svm(); 12247ec681f3Smrg auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 12257ec681f3Smrg 12267ec681f3Smrg validate_common(q, deps); 12277ec681f3Smrg 12287ec681f3Smrg if (can_emulate) { 12297ec681f3Smrg auto hev = create<hard_event>(q, cmd, deps, 12307ec681f3Smrg [](clover::event &) { }); 12317ec681f3Smrg 12327ec681f3Smrg ret_object(event, hev); 12337ec681f3Smrg return CL_SUCCESS; 12347ec681f3Smrg } 12357ec681f3Smrg 12367ec681f3Smrg CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 12377ec681f3Smrg return CL_INVALID_VALUE; 12387ec681f3Smrg 12397ec681f3Smrg} catch (error &e) { 12407ec681f3Smrg return e.get(); 12417ec681f3Smrg} 12427ec681f3Smrg 12437ec681f3SmrgCLOVER_API cl_int 12447ec681f3SmrgclEnqueueSVMUnmap(cl_command_queue d_q, 12457ec681f3Smrg void *svm_ptr, 12467ec681f3Smrg cl_uint num_events_in_wait_list, 12477ec681f3Smrg const cl_event *event_wait_list, 12487ec681f3Smrg cl_event *event) { 12497ec681f3Smrg 12507ec681f3Smrg return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list, 12517ec681f3Smrg event_wait_list, event, CL_COMMAND_SVM_UNMAP); 12527ec681f3Smrg} 12537ec681f3Smrg 12547ec681f3SmrgCLOVER_API cl_int 12557ec681f3SmrgclEnqueueSVMMigrateMem(cl_command_queue d_q, 12567ec681f3Smrg cl_uint num_svm_pointers, 12577ec681f3Smrg const void **svm_pointers, 12587ec681f3Smrg const size_t *sizes, 12597ec681f3Smrg const cl_mem_migration_flags flags, 12607ec681f3Smrg cl_uint num_deps, 12617ec681f3Smrg const cl_event *d_deps, 12627ec681f3Smrg cl_event *rd_ev) try { 12637ec681f3Smrg auto &q = obj(d_q); 12647ec681f3Smrg auto deps = objs<wait_list_tag>(d_deps, num_deps); 12657ec681f3Smrg 12667ec681f3Smrg validate_common(q, deps); 12677ec681f3Smrg validate_mem_migration_flags(flags); 12687ec681f3Smrg 12697ec681f3Smrg if (!q.device().svm_support()) 12707ec681f3Smrg return CL_INVALID_OPERATION; 12717ec681f3Smrg 12727ec681f3Smrg if (!num_svm_pointers || !svm_pointers) 12737ec681f3Smrg return CL_INVALID_VALUE; 12747ec681f3Smrg 12757ec681f3Smrg std::vector<size_t> sizes_copy(num_svm_pointers); 12767ec681f3Smrg std::vector<const void*> ptrs(num_svm_pointers); 12777ec681f3Smrg 12787ec681f3Smrg for (unsigned i = 0; i < num_svm_pointers; ++i) { 12797ec681f3Smrg const void *ptr = svm_pointers[i]; 12807ec681f3Smrg size_t size = sizes ? sizes[i] : 0; 12817ec681f3Smrg if (!ptr) 12827ec681f3Smrg return CL_INVALID_VALUE; 12837ec681f3Smrg 12847ec681f3Smrg auto p = q.context().find_svm_allocation(ptr); 12857ec681f3Smrg if (!p.first) 12867ec681f3Smrg return CL_INVALID_VALUE; 12877ec681f3Smrg 12887ec681f3Smrg std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first; 12897ec681f3Smrg if (size && size + pdiff > p.second) 12907ec681f3Smrg return CL_INVALID_VALUE; 12917ec681f3Smrg 12927ec681f3Smrg sizes_copy[i] = size ? size : p.second; 12937ec681f3Smrg ptrs[i] = size ? svm_pointers[i] : p.first; 12947ec681f3Smrg } 12957ec681f3Smrg 12967ec681f3Smrg auto hev = create<hard_event>( 12977ec681f3Smrg q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps, 12987ec681f3Smrg [=, &q](event &) { 12997ec681f3Smrg q.svm_migrate(ptrs, sizes_copy, flags); 13007ec681f3Smrg }); 13017ec681f3Smrg 13027ec681f3Smrg ret_object(rd_ev, hev); 13037ec681f3Smrg return CL_SUCCESS; 13047ec681f3Smrg 13057ec681f3Smrg} catch (error &e) { 13067ec681f3Smrg return e.get(); 13077ec681f3Smrg} 1308