transfer.cpp revision 7ec681f3
1// 2// Copyright 2012 Francisco Jerez 3// 4// Permission is hereby granted, free of charge, to any person obtaining a 5// copy of this software and associated documentation files (the "Software"), 6// to deal in the Software without restriction, including without limitation 7// the rights to use, copy, modify, merge, publish, distribute, sublicense, 8// and/or sell copies of the Software, and to permit persons to whom the 9// Software is furnished to do so, subject to the following conditions: 10// 11// The above copyright notice and this permission notice shall be included in 12// all copies or substantial portions of the Software. 13// 14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20// OTHER DEALINGS IN THE SOFTWARE. 21// 22 23#include <cstring> 24 25#include "util/bitscan.h" 26 27#include "api/dispatch.hpp" 28#include "api/util.hpp" 29#include "core/event.hpp" 30#include "core/memory.hpp" 31 32using namespace clover; 33 34namespace { 35 typedef resource::vector vector_t; 36 37 vector_t 38 vector(const size_t *p) { 39 if (!p) 40 throw error(CL_INVALID_VALUE); 41 return range(p, 3); 42 } 43 44 vector_t 45 pitch(const vector_t ®ion, vector_t pitch) { 46 for (auto x : zip(tail(pitch), 47 map(multiplies(), region, pitch))) { 48 // The spec defines a value of zero as the natural pitch, 49 // i.e. the unaligned size of the previous dimension. 50 if (std::get<0>(x) == 0) 51 std::get<0>(x) = std::get<1>(x); 52 } 53 54 return pitch; 55 } 56 57 /// 58 /// Size of a region in bytes. 59 /// 60 size_t 61 size(const vector_t &pitch, const vector_t ®ion) { 62 if (any_of(is_zero(), region)) 63 return 0; 64 else 65 return dot(pitch, region - vector_t{ 0, 1, 1 }); 66 } 67 68 /// 69 /// Common argument checking shared by memory transfer commands. 70 /// 71 void 72 validate_common(command_queue &q, 73 const ref_vector<event> &deps) { 74 if (any_of([&](const event &ev) { 75 return ev.context() != q.context(); 76 }, deps)) 77 throw error(CL_INVALID_CONTEXT); 78 } 79 80 /// 81 /// Common error checking for a buffer object argument. 82 /// 83 void 84 validate_object(command_queue &q, buffer &mem, const vector_t &origin, 85 const vector_t &pitch, const vector_t ®ion) { 86 if (mem.context() != q.context()) 87 throw error(CL_INVALID_CONTEXT); 88 89 // The region must fit within the specified pitch, 90 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch))) 91 throw error(CL_INVALID_VALUE); 92 93 // ...and within the specified object. 94 if (dot(pitch, origin) + size(pitch, region) > mem.size()) 95 throw error(CL_INVALID_VALUE); 96 97 if (any_of(is_zero(), region)) 98 throw error(CL_INVALID_VALUE); 99 } 100 101 /// 102 /// Common error checking for an image argument. 103 /// 104 void 105 validate_object(command_queue &q, image &img, 106 const vector_t &orig, const vector_t ®ion) { 107 vector_t size = { img.width(), img.height(), img.depth() }; 108 const auto &dev = q.device(); 109 110 if (!dev.image_support()) 111 throw error(CL_INVALID_OPERATION); 112 113 if (img.context() != q.context()) 114 throw error(CL_INVALID_CONTEXT); 115 116 if (any_of(greater(), orig + region, size)) 117 throw error(CL_INVALID_VALUE); 118 119 if (any_of(is_zero(), region)) 120 throw error(CL_INVALID_VALUE); 121 122 switch (img.type()) { 123 case CL_MEM_OBJECT_IMAGE1D: { 124 const size_t max = dev.max_image_size(); 125 if (img.width() > max) 126 throw error(CL_INVALID_IMAGE_SIZE); 127 break; 128 } 129 case CL_MEM_OBJECT_IMAGE2D: { 130 const size_t max = dev.max_image_size(); 131 if (img.width() > max || img.height() > max) 132 throw error(CL_INVALID_IMAGE_SIZE); 133 break; 134 } 135 case CL_MEM_OBJECT_IMAGE3D: { 136 const size_t max = dev.max_image_size_3d(); 137 if (img.width() > max || img.height() > max || img.depth() > max) 138 throw error(CL_INVALID_IMAGE_SIZE); 139 break; 140 } 141 // XXX: Implement missing checks once Clover supports more image types. 142 default: 143 throw error(CL_INVALID_IMAGE_SIZE); 144 } 145 } 146 147 /// 148 /// Common error checking for a host pointer argument. 149 /// 150 void 151 validate_object(command_queue &q, const void *ptr, const vector_t &orig, 152 const vector_t &pitch, const vector_t ®ion) { 153 if (!ptr) 154 throw error(CL_INVALID_VALUE); 155 156 // The region must fit within the specified pitch. 157 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch))) 158 throw error(CL_INVALID_VALUE); 159 } 160 161 /// 162 /// Common argument checking for a copy between two buffer objects. 163 /// 164 void 165 validate_copy(command_queue &q, buffer &dst_mem, 166 const vector_t &dst_orig, const vector_t &dst_pitch, 167 buffer &src_mem, 168 const vector_t &src_orig, const vector_t &src_pitch, 169 const vector_t ®ion) { 170 if (dst_mem == src_mem) { 171 auto dst_offset = dot(dst_pitch, dst_orig); 172 auto src_offset = dot(src_pitch, src_orig); 173 174 if (interval_overlaps()( 175 dst_offset, dst_offset + size(dst_pitch, region), 176 src_offset, src_offset + size(src_pitch, region))) 177 throw error(CL_MEM_COPY_OVERLAP); 178 } 179 } 180 181 /// 182 /// Common argument checking for a copy between two image objects. 183 /// 184 void 185 validate_copy(command_queue &q, 186 image &dst_img, const vector_t &dst_orig, 187 image &src_img, const vector_t &src_orig, 188 const vector_t ®ion) { 189 if (dst_img.format() != src_img.format()) 190 throw error(CL_IMAGE_FORMAT_MISMATCH); 191 192 if (dst_img == src_img) { 193 if (all_of(interval_overlaps(), 194 dst_orig, dst_orig + region, 195 src_orig, src_orig + region)) 196 throw error(CL_MEM_COPY_OVERLAP); 197 } 198 } 199 200 /// 201 /// Checks that the host access flags of the memory object are 202 /// within the allowed set \a flags. 203 /// 204 void 205 validate_object_access(const memory_obj &mem, const cl_mem_flags flags) { 206 if (mem.flags() & ~flags & 207 (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | 208 CL_MEM_HOST_NO_ACCESS)) 209 throw error(CL_INVALID_OPERATION); 210 } 211 212 /// 213 /// Checks that the mapping flags are correct. 214 /// 215 void 216 validate_map_flags(const memory_obj &mem, const cl_map_flags flags) { 217 if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) && 218 (flags & CL_MAP_WRITE_INVALIDATE_REGION)) 219 throw error(CL_INVALID_VALUE); 220 221 if (flags & CL_MAP_READ) 222 validate_object_access(mem, CL_MEM_HOST_READ_ONLY); 223 224 if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) 225 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY); 226 } 227 228 /// 229 /// Checks that the memory migration flags are correct. 230 /// 231 void 232 validate_mem_migration_flags(const cl_mem_migration_flags flags) { 233 const cl_mem_migration_flags valid = 234 CL_MIGRATE_MEM_OBJECT_HOST | 235 CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED; 236 237 if (flags & ~valid) 238 throw error(CL_INVALID_VALUE); 239 } 240 241 /// 242 /// Class that encapsulates the task of mapping an object of type 243 /// \a T. The return value of get() should be implicitly 244 /// convertible to \a void *. 245 /// 246 template<typename T> 247 struct _map; 248 249 template<> 250 struct _map<image*> { 251 _map(command_queue &q, image *img, cl_map_flags flags, 252 vector_t offset, vector_t pitch, vector_t region) : 253 map(q, img->resource_in(q), flags, true, offset, region), 254 pitch(map.pitch()) 255 { } 256 257 template<typename T> 258 operator T *() const { 259 return static_cast<T *>(map); 260 } 261 262 mapping map; 263 vector_t pitch; 264 }; 265 266 template<> 267 struct _map<buffer*> { 268 _map(command_queue &q, buffer *mem, cl_map_flags flags, 269 vector_t offset, vector_t pitch, vector_t region) : 270 map(q, mem->resource_in(q), flags, true, 271 {{ dot(pitch, offset) }}, {{ size(pitch, region) }}), 272 pitch(pitch) 273 { } 274 275 template<typename T> 276 operator T *() const { 277 return static_cast<T *>(map); 278 } 279 280 mapping map; 281 vector_t pitch; 282 }; 283 284 template<typename P> 285 struct _map<P *> { 286 _map(command_queue &q, P *ptr, cl_map_flags flags, 287 vector_t offset, vector_t pitch, vector_t region) : 288 ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch) 289 { } 290 291 template<typename T> 292 operator T *() const { 293 return static_cast<T *>(ptr); 294 } 295 296 P *ptr; 297 vector_t pitch; 298 }; 299 300 /// 301 /// Software copy from \a src_obj to \a dst_obj. They can be 302 /// either pointers or memory objects. 303 /// 304 template<typename T, typename S> 305 std::function<void (event &)> 306 soft_copy_op(command_queue &q, 307 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch, 308 S src_obj, const vector_t &src_orig, const vector_t &src_pitch, 309 const vector_t ®ion) { 310 return [=, &q](event &) { 311 _map<T> dst = { q, dst_obj, CL_MAP_WRITE, 312 dst_orig, dst_pitch, region }; 313 _map<S> src = { q, src_obj, CL_MAP_READ, 314 src_orig, src_pitch, region }; 315 assert(src.pitch[0] == dst.pitch[0]); 316 vector_t v = {}; 317 318 for (v[2] = 0; v[2] < region[2]; ++v[2]) { 319 for (v[1] = 0; v[1] < region[1]; ++v[1]) { 320 std::memcpy( 321 static_cast<char *>(dst) + dot(dst.pitch, v), 322 static_cast<const char *>(src) + dot(src.pitch, v), 323 src.pitch[0] * region[0]); 324 } 325 } 326 }; 327 } 328 329 /// 330 /// Hardware copy from \a src_obj to \a dst_obj. 331 /// 332 template<typename T, typename S> 333 std::function<void (event &)> 334 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig, 335 S src_obj, const vector_t &src_orig, const vector_t ®ion) { 336 return [=, &q](event &) { 337 dst_obj->resource_in(q).copy(q, dst_orig, region, 338 src_obj->resource_in(q), src_orig); 339 }; 340 } 341} 342 343CLOVER_API cl_int 344clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 345 size_t offset, size_t size, void *ptr, 346 cl_uint num_deps, const cl_event *d_deps, 347 cl_event *rd_ev) try { 348 auto &q = obj(d_q); 349 auto &mem = obj<buffer>(d_mem); 350 auto deps = objs<wait_list_tag>(d_deps, num_deps); 351 vector_t region = { size, 1, 1 }; 352 vector_t obj_origin = { offset }; 353 auto obj_pitch = pitch(region, {{ 1 }}); 354 355 validate_common(q, deps); 356 validate_object(q, ptr, {}, obj_pitch, region); 357 validate_object(q, mem, obj_origin, obj_pitch, region); 358 validate_object_access(mem, CL_MEM_HOST_READ_ONLY); 359 360 auto hev = create<hard_event>( 361 q, CL_COMMAND_READ_BUFFER, deps, 362 soft_copy_op(q, ptr, {}, obj_pitch, 363 &mem, obj_origin, obj_pitch, 364 region)); 365 366 if (blocking) 367 hev().wait_signalled(); 368 369 ret_object(rd_ev, hev); 370 return CL_SUCCESS; 371 372} catch (error &e) { 373 return e.get(); 374} 375 376CLOVER_API cl_int 377clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 378 size_t offset, size_t size, const void *ptr, 379 cl_uint num_deps, const cl_event *d_deps, 380 cl_event *rd_ev) try { 381 auto &q = obj(d_q); 382 auto &mem = obj<buffer>(d_mem); 383 auto deps = objs<wait_list_tag>(d_deps, num_deps); 384 vector_t region = { size, 1, 1 }; 385 vector_t obj_origin = { offset }; 386 auto obj_pitch = pitch(region, {{ 1 }}); 387 388 validate_common(q, deps); 389 validate_object(q, mem, obj_origin, obj_pitch, region); 390 validate_object(q, ptr, {}, obj_pitch, region); 391 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY); 392 393 auto hev = create<hard_event>( 394 q, CL_COMMAND_WRITE_BUFFER, deps, 395 soft_copy_op(q, &mem, obj_origin, obj_pitch, 396 ptr, {}, obj_pitch, 397 region)); 398 399 if (blocking) 400 hev().wait_signalled(); 401 402 ret_object(rd_ev, hev); 403 return CL_SUCCESS; 404 405} catch (error &e) { 406 return e.get(); 407} 408 409CLOVER_API cl_int 410clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 411 const size_t *p_obj_origin, 412 const size_t *p_host_origin, 413 const size_t *p_region, 414 size_t obj_row_pitch, size_t obj_slice_pitch, 415 size_t host_row_pitch, size_t host_slice_pitch, 416 void *ptr, 417 cl_uint num_deps, const cl_event *d_deps, 418 cl_event *rd_ev) try { 419 auto &q = obj(d_q); 420 auto &mem = obj<buffer>(d_mem); 421 auto deps = objs<wait_list_tag>(d_deps, num_deps); 422 auto region = vector(p_region); 423 auto obj_origin = vector(p_obj_origin); 424 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }}); 425 auto host_origin = vector(p_host_origin); 426 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }}); 427 428 validate_common(q, deps); 429 validate_object(q, ptr, host_origin, host_pitch, region); 430 validate_object(q, mem, obj_origin, obj_pitch, region); 431 validate_object_access(mem, CL_MEM_HOST_READ_ONLY); 432 433 auto hev = create<hard_event>( 434 q, CL_COMMAND_READ_BUFFER_RECT, deps, 435 soft_copy_op(q, ptr, host_origin, host_pitch, 436 &mem, obj_origin, obj_pitch, 437 region)); 438 439 if (blocking) 440 hev().wait_signalled(); 441 442 ret_object(rd_ev, hev); 443 return CL_SUCCESS; 444 445} catch (error &e) { 446 return e.get(); 447} 448 449CLOVER_API cl_int 450clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 451 const size_t *p_obj_origin, 452 const size_t *p_host_origin, 453 const size_t *p_region, 454 size_t obj_row_pitch, size_t obj_slice_pitch, 455 size_t host_row_pitch, size_t host_slice_pitch, 456 const void *ptr, 457 cl_uint num_deps, const cl_event *d_deps, 458 cl_event *rd_ev) try { 459 auto &q = obj(d_q); 460 auto &mem = obj<buffer>(d_mem); 461 auto deps = objs<wait_list_tag>(d_deps, num_deps); 462 auto region = vector(p_region); 463 auto obj_origin = vector(p_obj_origin); 464 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }}); 465 auto host_origin = vector(p_host_origin); 466 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }}); 467 468 validate_common(q, deps); 469 validate_object(q, mem, obj_origin, obj_pitch, region); 470 validate_object(q, ptr, host_origin, host_pitch, region); 471 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY); 472 473 auto hev = create<hard_event>( 474 q, CL_COMMAND_WRITE_BUFFER_RECT, deps, 475 soft_copy_op(q, &mem, obj_origin, obj_pitch, 476 ptr, host_origin, host_pitch, 477 region)); 478 479 if (blocking) 480 hev().wait_signalled(); 481 482 ret_object(rd_ev, hev); 483 return CL_SUCCESS; 484 485} catch (error &e) { 486 return e.get(); 487} 488 489CLOVER_API cl_int 490clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem, 491 const void *pattern, size_t pattern_size, 492 size_t offset, size_t size, 493 cl_uint num_deps, const cl_event *d_deps, 494 cl_event *rd_ev) try { 495 auto &q = obj(d_queue); 496 auto &mem = obj<buffer>(d_mem); 497 auto deps = objs<wait_list_tag>(d_deps, num_deps); 498 vector_t region = { size, 1, 1 }; 499 vector_t origin = { offset }; 500 auto dst_pitch = pitch(region, {{ 1 }}); 501 502 validate_common(q, deps); 503 validate_object(q, mem, origin, dst_pitch, region); 504 505 if (!pattern) 506 return CL_INVALID_VALUE; 507 508 if (!util_is_power_of_two_nonzero(pattern_size) || 509 pattern_size > 128 || size % pattern_size 510 || offset % pattern_size) { 511 return CL_INVALID_VALUE; 512 } 513 514 auto sub = dynamic_cast<sub_buffer *>(&mem); 515 if (sub && sub->offset() % q.device().mem_base_addr_align()) { 516 return CL_MISALIGNED_SUB_BUFFER_OFFSET; 517 } 518 519 std::string data = std::string((char *)pattern, pattern_size); 520 auto hev = create<hard_event>( 521 q, CL_COMMAND_FILL_BUFFER, deps, 522 [=, &q, &mem](event &) { 523 mem.resource_in(q).clear(q, origin, region, data); 524 }); 525 526 ret_object(rd_ev, hev); 527 return CL_SUCCESS; 528 529} catch (error &e) { 530 return e.get(); 531} 532 533CLOVER_API cl_int 534clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem, 535 size_t src_offset, size_t dst_offset, size_t size, 536 cl_uint num_deps, const cl_event *d_deps, 537 cl_event *rd_ev) try { 538 auto &q = obj(d_q); 539 auto &src_mem = obj<buffer>(d_src_mem); 540 auto &dst_mem = obj<buffer>(d_dst_mem); 541 auto deps = objs<wait_list_tag>(d_deps, num_deps); 542 vector_t region = { size, 1, 1 }; 543 vector_t dst_origin = { dst_offset }; 544 auto dst_pitch = pitch(region, {{ 1 }}); 545 vector_t src_origin = { src_offset }; 546 auto src_pitch = pitch(region, {{ 1 }}); 547 548 validate_common(q, deps); 549 validate_object(q, dst_mem, dst_origin, dst_pitch, region); 550 validate_object(q, src_mem, src_origin, src_pitch, region); 551 validate_copy(q, dst_mem, dst_origin, dst_pitch, 552 src_mem, src_origin, src_pitch, region); 553 554 auto hev = create<hard_event>( 555 q, CL_COMMAND_COPY_BUFFER, deps, 556 hard_copy_op(q, &dst_mem, dst_origin, 557 &src_mem, src_origin, region)); 558 559 ret_object(rd_ev, hev); 560 return CL_SUCCESS; 561 562} catch (error &e) { 563 return e.get(); 564} 565 566CLOVER_API cl_int 567clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem, 568 cl_mem d_dst_mem, 569 const size_t *p_src_origin, const size_t *p_dst_origin, 570 const size_t *p_region, 571 size_t src_row_pitch, size_t src_slice_pitch, 572 size_t dst_row_pitch, size_t dst_slice_pitch, 573 cl_uint num_deps, const cl_event *d_deps, 574 cl_event *rd_ev) try { 575 auto &q = obj(d_q); 576 auto &src_mem = obj<buffer>(d_src_mem); 577 auto &dst_mem = obj<buffer>(d_dst_mem); 578 auto deps = objs<wait_list_tag>(d_deps, num_deps); 579 auto region = vector(p_region); 580 auto dst_origin = vector(p_dst_origin); 581 auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }}); 582 auto src_origin = vector(p_src_origin); 583 auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }}); 584 585 validate_common(q, deps); 586 validate_object(q, dst_mem, dst_origin, dst_pitch, region); 587 validate_object(q, src_mem, src_origin, src_pitch, region); 588 validate_copy(q, dst_mem, dst_origin, dst_pitch, 589 src_mem, src_origin, src_pitch, region); 590 591 auto hev = create<hard_event>( 592 q, CL_COMMAND_COPY_BUFFER_RECT, deps, 593 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch, 594 &src_mem, src_origin, src_pitch, 595 region)); 596 597 ret_object(rd_ev, hev); 598 return CL_SUCCESS; 599 600} catch (error &e) { 601 return e.get(); 602} 603 604CLOVER_API cl_int 605clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 606 const size_t *p_origin, const size_t *p_region, 607 size_t row_pitch, size_t slice_pitch, void *ptr, 608 cl_uint num_deps, const cl_event *d_deps, 609 cl_event *rd_ev) try { 610 auto &q = obj(d_q); 611 auto &img = obj<image>(d_mem); 612 auto deps = objs<wait_list_tag>(d_deps, num_deps); 613 auto region = vector(p_region); 614 auto dst_pitch = pitch(region, {{ img.pixel_size(), 615 row_pitch, slice_pitch }}); 616 auto src_origin = vector(p_origin); 617 auto src_pitch = pitch(region, {{ img.pixel_size(), 618 img.row_pitch(), img.slice_pitch() }}); 619 620 validate_common(q, deps); 621 validate_object(q, ptr, {}, dst_pitch, region); 622 validate_object(q, img, src_origin, region); 623 validate_object_access(img, CL_MEM_HOST_READ_ONLY); 624 625 auto hev = create<hard_event>( 626 q, CL_COMMAND_READ_IMAGE, deps, 627 soft_copy_op(q, ptr, {}, dst_pitch, 628 &img, src_origin, src_pitch, 629 region)); 630 631 if (blocking) 632 hev().wait_signalled(); 633 634 ret_object(rd_ev, hev); 635 return CL_SUCCESS; 636 637} catch (error &e) { 638 return e.get(); 639} 640 641CLOVER_API cl_int 642clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 643 const size_t *p_origin, const size_t *p_region, 644 size_t row_pitch, size_t slice_pitch, const void *ptr, 645 cl_uint num_deps, const cl_event *d_deps, 646 cl_event *rd_ev) try { 647 auto &q = obj(d_q); 648 auto &img = obj<image>(d_mem); 649 auto deps = objs<wait_list_tag>(d_deps, num_deps); 650 auto region = vector(p_region); 651 auto dst_origin = vector(p_origin); 652 auto dst_pitch = pitch(region, {{ img.pixel_size(), 653 img.row_pitch(), img.slice_pitch() }}); 654 auto src_pitch = pitch(region, {{ img.pixel_size(), 655 row_pitch, slice_pitch }}); 656 657 validate_common(q, deps); 658 validate_object(q, img, dst_origin, region); 659 validate_object(q, ptr, {}, src_pitch, region); 660 validate_object_access(img, CL_MEM_HOST_WRITE_ONLY); 661 662 auto hev = create<hard_event>( 663 q, CL_COMMAND_WRITE_IMAGE, deps, 664 soft_copy_op(q, &img, dst_origin, dst_pitch, 665 ptr, {}, src_pitch, 666 region)); 667 668 if (blocking) 669 hev().wait_signalled(); 670 671 ret_object(rd_ev, hev); 672 return CL_SUCCESS; 673 674} catch (error &e) { 675 return e.get(); 676} 677 678CLOVER_API cl_int 679clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem, 680 const void *fill_color, 681 const size_t *p_origin, const size_t *p_region, 682 cl_uint num_deps, const cl_event *d_deps, 683 cl_event *rd_ev) try { 684 auto &q = obj(d_queue); 685 auto &img = obj<image>(d_mem); 686 auto deps = objs<wait_list_tag>(d_deps, num_deps); 687 auto origin = vector(p_origin); 688 auto region = vector(p_region); 689 690 validate_common(q, deps); 691 validate_object(q, img, origin, region); 692 693 if (!fill_color) 694 return CL_INVALID_VALUE; 695 696 std::string data = std::string((char *)fill_color, sizeof(cl_uint4)); 697 auto hev = create<hard_event>( 698 q, CL_COMMAND_FILL_IMAGE, deps, 699 [=, &q, &img](event &) { 700 img.resource_in(q).clear(q, origin, region, data); 701 }); 702 703 ret_object(rd_ev, hev); 704 return CL_SUCCESS; 705 706} catch (error &e) { 707 return e.get(); 708} 709 710CLOVER_API cl_int 711clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem, 712 const size_t *p_src_origin, const size_t *p_dst_origin, 713 const size_t *p_region, 714 cl_uint num_deps, const cl_event *d_deps, 715 cl_event *rd_ev) try { 716 auto &q = obj(d_q); 717 auto &src_img = obj<image>(d_src_mem); 718 auto &dst_img = obj<image>(d_dst_mem); 719 auto deps = objs<wait_list_tag>(d_deps, num_deps); 720 auto region = vector(p_region); 721 auto dst_origin = vector(p_dst_origin); 722 auto src_origin = vector(p_src_origin); 723 724 validate_common(q, deps); 725 validate_object(q, dst_img, dst_origin, region); 726 validate_object(q, src_img, src_origin, region); 727 validate_copy(q, dst_img, dst_origin, src_img, src_origin, region); 728 729 auto hev = create<hard_event>( 730 q, CL_COMMAND_COPY_IMAGE, deps, 731 hard_copy_op(q, &dst_img, dst_origin, 732 &src_img, src_origin, 733 region)); 734 735 ret_object(rd_ev, hev); 736 return CL_SUCCESS; 737 738} catch (error &e) { 739 return e.get(); 740} 741 742CLOVER_API cl_int 743clEnqueueCopyImageToBuffer(cl_command_queue d_q, 744 cl_mem d_src_mem, cl_mem d_dst_mem, 745 const size_t *p_src_origin, const size_t *p_region, 746 size_t dst_offset, 747 cl_uint num_deps, const cl_event *d_deps, 748 cl_event *rd_ev) try { 749 auto &q = obj(d_q); 750 auto &src_img = obj<image>(d_src_mem); 751 auto &dst_mem = obj<buffer>(d_dst_mem); 752 auto deps = objs<wait_list_tag>(d_deps, num_deps); 753 auto region = vector(p_region); 754 vector_t dst_origin = { dst_offset }; 755 auto dst_pitch = pitch(region, {{ src_img.pixel_size() }}); 756 auto src_origin = vector(p_src_origin); 757 auto src_pitch = pitch(region, {{ src_img.pixel_size(), 758 src_img.row_pitch(), 759 src_img.slice_pitch() }}); 760 761 validate_common(q, deps); 762 validate_object(q, dst_mem, dst_origin, dst_pitch, region); 763 validate_object(q, src_img, src_origin, region); 764 765 auto hev = create<hard_event>( 766 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps, 767 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch, 768 &src_img, src_origin, src_pitch, 769 region)); 770 771 ret_object(rd_ev, hev); 772 return CL_SUCCESS; 773 774} catch (error &e) { 775 return e.get(); 776} 777 778CLOVER_API cl_int 779clEnqueueCopyBufferToImage(cl_command_queue d_q, 780 cl_mem d_src_mem, cl_mem d_dst_mem, 781 size_t src_offset, 782 const size_t *p_dst_origin, const size_t *p_region, 783 cl_uint num_deps, const cl_event *d_deps, 784 cl_event *rd_ev) try { 785 auto &q = obj(d_q); 786 auto &src_mem = obj<buffer>(d_src_mem); 787 auto &dst_img = obj<image>(d_dst_mem); 788 auto deps = objs<wait_list_tag>(d_deps, num_deps); 789 auto region = vector(p_region); 790 auto dst_origin = vector(p_dst_origin); 791 auto dst_pitch = pitch(region, {{ dst_img.pixel_size(), 792 dst_img.row_pitch(), 793 dst_img.slice_pitch() }}); 794 vector_t src_origin = { src_offset }; 795 auto src_pitch = pitch(region, {{ dst_img.pixel_size() }}); 796 797 validate_common(q, deps); 798 validate_object(q, dst_img, dst_origin, region); 799 validate_object(q, src_mem, src_origin, src_pitch, region); 800 801 auto hev = create<hard_event>( 802 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps, 803 soft_copy_op(q, &dst_img, dst_origin, dst_pitch, 804 &src_mem, src_origin, src_pitch, 805 region)); 806 807 ret_object(rd_ev, hev); 808 return CL_SUCCESS; 809 810} catch (error &e) { 811 return e.get(); 812} 813 814CLOVER_API void * 815clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 816 cl_map_flags flags, size_t offset, size_t size, 817 cl_uint num_deps, const cl_event *d_deps, 818 cl_event *rd_ev, cl_int *r_errcode) try { 819 auto &q = obj(d_q); 820 auto &mem = obj<buffer>(d_mem); 821 auto deps = objs<wait_list_tag>(d_deps, num_deps); 822 vector_t region = { size, 1, 1 }; 823 vector_t obj_origin = { offset }; 824 auto obj_pitch = pitch(region, {{ 1 }}); 825 826 validate_common(q, deps); 827 validate_object(q, mem, obj_origin, obj_pitch, region); 828 validate_map_flags(mem, flags); 829 830 auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region); 831 832 auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps); 833 if (blocking) 834 hev().wait_signalled(); 835 836 ret_object(rd_ev, hev); 837 ret_error(r_errcode, CL_SUCCESS); 838 return *map; 839 840} catch (error &e) { 841 ret_error(r_errcode, e); 842 return NULL; 843} 844 845CLOVER_API void * 846clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 847 cl_map_flags flags, 848 const size_t *p_origin, const size_t *p_region, 849 size_t *row_pitch, size_t *slice_pitch, 850 cl_uint num_deps, const cl_event *d_deps, 851 cl_event *rd_ev, cl_int *r_errcode) try { 852 auto &q = obj(d_q); 853 auto &img = obj<image>(d_mem); 854 auto deps = objs<wait_list_tag>(d_deps, num_deps); 855 auto region = vector(p_region); 856 auto origin = vector(p_origin); 857 858 validate_common(q, deps); 859 validate_object(q, img, origin, region); 860 validate_map_flags(img, flags); 861 862 if (!row_pitch) 863 throw error(CL_INVALID_VALUE); 864 865 if (img.slice_pitch() && !slice_pitch) 866 throw error(CL_INVALID_VALUE); 867 868 auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region); 869 *row_pitch = map->pitch()[1]; 870 if (slice_pitch) 871 *slice_pitch = map->pitch()[2]; 872 873 auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps); 874 if (blocking) 875 hev().wait_signalled(); 876 877 ret_object(rd_ev, hev); 878 ret_error(r_errcode, CL_SUCCESS); 879 return *map; 880 881} catch (error &e) { 882 ret_error(r_errcode, e); 883 return NULL; 884} 885 886CLOVER_API cl_int 887clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr, 888 cl_uint num_deps, const cl_event *d_deps, 889 cl_event *rd_ev) try { 890 auto &q = obj(d_q); 891 auto &mem = obj(d_mem); 892 auto deps = objs<wait_list_tag>(d_deps, num_deps); 893 894 validate_common(q, deps); 895 896 auto hev = create<hard_event>( 897 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps, 898 [=, &q, &mem](event &) { 899 mem.resource_in(q).del_map(ptr); 900 }); 901 902 ret_object(rd_ev, hev); 903 return CL_SUCCESS; 904 905} catch (error &e) { 906 return e.get(); 907} 908 909CLOVER_API cl_int 910clEnqueueMigrateMemObjects(cl_command_queue d_q, 911 cl_uint num_mems, 912 const cl_mem *d_mems, 913 cl_mem_migration_flags flags, 914 cl_uint num_deps, 915 const cl_event *d_deps, 916 cl_event *rd_ev) try { 917 auto &q = obj(d_q); 918 auto mems = objs<memory_obj>(d_mems, num_mems); 919 auto deps = objs<wait_list_tag>(d_deps, num_deps); 920 921 validate_common(q, deps); 922 validate_mem_migration_flags(flags); 923 924 if (any_of([&](const memory_obj &m) { 925 return m.context() != q.context(); 926 }, mems)) 927 throw error(CL_INVALID_CONTEXT); 928 929 auto hev = create<hard_event>( 930 q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps, 931 [=, &q](event &) { 932 for (auto &mem: mems) { 933 if (flags & CL_MIGRATE_MEM_OBJECT_HOST) { 934 if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) 935 mem.resource_out(q); 936 937 // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be 938 // efficient we would need cl*ReadBuffer* to implement 939 // reading from host memory. 940 941 } else { 942 if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) 943 mem.resource_undef(q); 944 else 945 mem.resource_in(q); 946 } 947 } 948 }); 949 950 ret_object(rd_ev, hev); 951 return CL_SUCCESS;; 952 953} catch (error &e) { 954 return e.get(); 955} 956 957cl_int 958clover::EnqueueSVMFree(cl_command_queue d_q, 959 cl_uint num_svm_pointers, 960 void *svm_pointers[], 961 void (CL_CALLBACK *pfn_free_func) ( 962 cl_command_queue queue, cl_uint num_svm_pointers, 963 void *svm_pointers[], void *user_data), 964 void *user_data, 965 cl_uint num_events_in_wait_list, 966 const cl_event *event_wait_list, 967 cl_event *event, 968 cl_int cmd) try { 969 970 if (bool(num_svm_pointers) != bool(svm_pointers)) 971 return CL_INVALID_VALUE; 972 973 auto &q = obj(d_q); 974 975 if (!q.device().svm_support()) 976 return CL_INVALID_OPERATION; 977 978 bool can_emulate = q.device().has_system_svm(); 979 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 980 981 validate_common(q, deps); 982 983 std::vector<void *> svm_pointers_cpy(svm_pointers, 984 svm_pointers + num_svm_pointers); 985 if (!pfn_free_func) { 986 if (!can_emulate) { 987 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 988 return CL_INVALID_VALUE; 989 } 990 pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers, 991 void *svm_pointers[], void *) { 992 clover::context &ctx = obj(d_q).context(); 993 for (void *p : range(svm_pointers, num_svm_pointers)) { 994 ctx.remove_svm_allocation(p); 995 free(p); 996 } 997 }; 998 } 999 1000 auto hev = create<hard_event>(q, cmd, deps, 1001 [=](clover::event &) mutable { 1002 pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(), 1003 user_data); 1004 }); 1005 1006 ret_object(event, hev); 1007 return CL_SUCCESS; 1008 1009} catch (error &e) { 1010 return e.get(); 1011} 1012 1013CLOVER_API cl_int 1014clEnqueueSVMFree(cl_command_queue d_q, 1015 cl_uint num_svm_pointers, 1016 void *svm_pointers[], 1017 void (CL_CALLBACK *pfn_free_func) ( 1018 cl_command_queue queue, cl_uint num_svm_pointers, 1019 void *svm_pointers[], void *user_data), 1020 void *user_data, 1021 cl_uint num_events_in_wait_list, 1022 const cl_event *event_wait_list, 1023 cl_event *event) { 1024 1025 return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers, 1026 pfn_free_func, user_data, num_events_in_wait_list, 1027 event_wait_list, event, CL_COMMAND_SVM_FREE); 1028} 1029 1030cl_int 1031clover::EnqueueSVMMemcpy(cl_command_queue d_q, 1032 cl_bool blocking_copy, 1033 void *dst_ptr, 1034 const void *src_ptr, 1035 size_t size, 1036 cl_uint num_events_in_wait_list, 1037 const cl_event *event_wait_list, 1038 cl_event *event, 1039 cl_int cmd) try { 1040 auto &q = obj(d_q); 1041 1042 if (!q.device().svm_support()) 1043 return CL_INVALID_OPERATION; 1044 1045 if (dst_ptr == nullptr || src_ptr == nullptr) 1046 return CL_INVALID_VALUE; 1047 1048 if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) - 1049 reinterpret_cast<ptrdiff_t>(src_ptr))) < size) 1050 return CL_MEM_COPY_OVERLAP; 1051 1052 1053 bool can_emulate = q.device().has_system_svm(); 1054 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 1055 1056 validate_common(q, deps); 1057 1058 if (can_emulate) { 1059 auto hev = create<hard_event>(q, cmd, deps, 1060 [=](clover::event &) { 1061 memcpy(dst_ptr, src_ptr, size); 1062 }); 1063 1064 if (blocking_copy) 1065 hev().wait(); 1066 ret_object(event, hev); 1067 return CL_SUCCESS; 1068 } 1069 1070 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 1071 return CL_INVALID_VALUE; 1072 1073} catch (error &e) { 1074 return e.get(); 1075} 1076 1077CLOVER_API cl_int 1078clEnqueueSVMMemcpy(cl_command_queue d_q, 1079 cl_bool blocking_copy, 1080 void *dst_ptr, 1081 const void *src_ptr, 1082 size_t size, 1083 cl_uint num_events_in_wait_list, 1084 const cl_event *event_wait_list, 1085 cl_event *event) { 1086 1087 return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr, 1088 size, num_events_in_wait_list, event_wait_list, 1089 event, CL_COMMAND_SVM_MEMCPY); 1090} 1091 1092cl_int 1093clover::EnqueueSVMMemFill(cl_command_queue d_q, 1094 void *svm_ptr, 1095 const void *pattern, 1096 size_t pattern_size, 1097 size_t size, 1098 cl_uint num_events_in_wait_list, 1099 const cl_event *event_wait_list, 1100 cl_event *event, 1101 cl_int cmd) try { 1102 auto &q = obj(d_q); 1103 1104 if (!q.device().svm_support()) 1105 return CL_INVALID_OPERATION; 1106 1107 if (svm_ptr == nullptr || pattern == nullptr || 1108 !util_is_power_of_two_nonzero(pattern_size) || 1109 pattern_size > 128 || 1110 !ptr_is_aligned(svm_ptr, pattern_size) || 1111 size % pattern_size) 1112 return CL_INVALID_VALUE; 1113 1114 bool can_emulate = q.device().has_system_svm(); 1115 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 1116 1117 validate_common(q, deps); 1118 1119 if (can_emulate) { 1120 auto hev = create<hard_event>(q, cmd, deps, 1121 [=](clover::event &) { 1122 void *ptr = svm_ptr; 1123 for (size_t s = size; s; s -= pattern_size) { 1124 memcpy(ptr, pattern, pattern_size); 1125 ptr = static_cast<uint8_t*>(ptr) + pattern_size; 1126 } 1127 }); 1128 1129 ret_object(event, hev); 1130 return CL_SUCCESS; 1131 } 1132 1133 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 1134 return CL_INVALID_VALUE; 1135 1136} catch (error &e) { 1137 return e.get(); 1138} 1139 1140CLOVER_API cl_int 1141clEnqueueSVMMemFill(cl_command_queue d_q, 1142 void *svm_ptr, 1143 const void *pattern, 1144 size_t pattern_size, 1145 size_t size, 1146 cl_uint num_events_in_wait_list, 1147 const cl_event *event_wait_list, 1148 cl_event *event) { 1149 1150 return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size, 1151 size, num_events_in_wait_list, event_wait_list, 1152 event, CL_COMMAND_SVM_MEMFILL); 1153} 1154 1155cl_int 1156clover::EnqueueSVMMap(cl_command_queue d_q, 1157 cl_bool blocking_map, 1158 cl_map_flags map_flags, 1159 void *svm_ptr, 1160 size_t size, 1161 cl_uint num_events_in_wait_list, 1162 const cl_event *event_wait_list, 1163 cl_event *event, 1164 cl_int cmd) try { 1165 auto &q = obj(d_q); 1166 1167 if (!q.device().svm_support()) 1168 return CL_INVALID_OPERATION; 1169 1170 if (svm_ptr == nullptr || size == 0) 1171 return CL_INVALID_VALUE; 1172 1173 bool can_emulate = q.device().has_system_svm(); 1174 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 1175 1176 validate_common(q, deps); 1177 1178 if (can_emulate) { 1179 auto hev = create<hard_event>(q, cmd, deps, 1180 [](clover::event &) { }); 1181 1182 ret_object(event, hev); 1183 return CL_SUCCESS; 1184 } 1185 1186 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 1187 return CL_INVALID_VALUE; 1188 1189} catch (error &e) { 1190 return e.get(); 1191} 1192 1193CLOVER_API cl_int 1194clEnqueueSVMMap(cl_command_queue d_q, 1195 cl_bool blocking_map, 1196 cl_map_flags map_flags, 1197 void *svm_ptr, 1198 size_t size, 1199 cl_uint num_events_in_wait_list, 1200 const cl_event *event_wait_list, 1201 cl_event *event) { 1202 1203 return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size, 1204 num_events_in_wait_list, event_wait_list, event, 1205 CL_COMMAND_SVM_MAP); 1206} 1207 1208cl_int 1209clover::EnqueueSVMUnmap(cl_command_queue d_q, 1210 void *svm_ptr, 1211 cl_uint num_events_in_wait_list, 1212 const cl_event *event_wait_list, 1213 cl_event *event, 1214 cl_int cmd) try { 1215 auto &q = obj(d_q); 1216 1217 if (!q.device().svm_support()) 1218 return CL_INVALID_OPERATION; 1219 1220 if (svm_ptr == nullptr) 1221 return CL_INVALID_VALUE; 1222 1223 bool can_emulate = q.device().has_system_svm(); 1224 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 1225 1226 validate_common(q, deps); 1227 1228 if (can_emulate) { 1229 auto hev = create<hard_event>(q, cmd, deps, 1230 [](clover::event &) { }); 1231 1232 ret_object(event, hev); 1233 return CL_SUCCESS; 1234 } 1235 1236 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 1237 return CL_INVALID_VALUE; 1238 1239} catch (error &e) { 1240 return e.get(); 1241} 1242 1243CLOVER_API cl_int 1244clEnqueueSVMUnmap(cl_command_queue d_q, 1245 void *svm_ptr, 1246 cl_uint num_events_in_wait_list, 1247 const cl_event *event_wait_list, 1248 cl_event *event) { 1249 1250 return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list, 1251 event_wait_list, event, CL_COMMAND_SVM_UNMAP); 1252} 1253 1254CLOVER_API cl_int 1255clEnqueueSVMMigrateMem(cl_command_queue d_q, 1256 cl_uint num_svm_pointers, 1257 const void **svm_pointers, 1258 const size_t *sizes, 1259 const cl_mem_migration_flags flags, 1260 cl_uint num_deps, 1261 const cl_event *d_deps, 1262 cl_event *rd_ev) try { 1263 auto &q = obj(d_q); 1264 auto deps = objs<wait_list_tag>(d_deps, num_deps); 1265 1266 validate_common(q, deps); 1267 validate_mem_migration_flags(flags); 1268 1269 if (!q.device().svm_support()) 1270 return CL_INVALID_OPERATION; 1271 1272 if (!num_svm_pointers || !svm_pointers) 1273 return CL_INVALID_VALUE; 1274 1275 std::vector<size_t> sizes_copy(num_svm_pointers); 1276 std::vector<const void*> ptrs(num_svm_pointers); 1277 1278 for (unsigned i = 0; i < num_svm_pointers; ++i) { 1279 const void *ptr = svm_pointers[i]; 1280 size_t size = sizes ? sizes[i] : 0; 1281 if (!ptr) 1282 return CL_INVALID_VALUE; 1283 1284 auto p = q.context().find_svm_allocation(ptr); 1285 if (!p.first) 1286 return CL_INVALID_VALUE; 1287 1288 std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first; 1289 if (size && size + pdiff > p.second) 1290 return CL_INVALID_VALUE; 1291 1292 sizes_copy[i] = size ? size : p.second; 1293 ptrs[i] = size ? svm_pointers[i] : p.first; 1294 } 1295 1296 auto hev = create<hard_event>( 1297 q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps, 1298 [=, &q](event &) { 1299 q.svm_migrate(ptrs, sizes_copy, flags); 1300 }); 1301 1302 ret_object(rd_ev, hev); 1303 return CL_SUCCESS; 1304 1305} catch (error &e) { 1306 return e.get(); 1307} 1308