17ec681f3Smrg/* 27ec681f3Smrg * Copyright 2011 Joakim Sindholt <opensource@zhasha.com> 37ec681f3Smrg * Copyright 2015 Patrick Rudolph <siro@das-labor.org> 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 97ec681f3Smrg * license, and/or sell copies of the Software, and to permit persons to whom 107ec681f3Smrg * the Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 207ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 217ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 227ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 237ec681f3Smrg 247ec681f3Smrg#include "buffer9.h" 257ec681f3Smrg#include "device9.h" 267ec681f3Smrg#include "indexbuffer9.h" 277ec681f3Smrg#include "nine_buffer_upload.h" 287ec681f3Smrg#include "nine_helpers.h" 297ec681f3Smrg#include "nine_pipe.h" 307ec681f3Smrg 317ec681f3Smrg#include "pipe/p_screen.h" 327ec681f3Smrg#include "pipe/p_context.h" 337ec681f3Smrg#include "pipe/p_state.h" 347ec681f3Smrg#include "pipe/p_defines.h" 357ec681f3Smrg#include "pipe/p_format.h" 367ec681f3Smrg#include "util/u_box.h" 377ec681f3Smrg#include "util/u_inlines.h" 387ec681f3Smrg 397ec681f3Smrg#define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER) 407ec681f3Smrg 417ec681f3SmrgHRESULT 427ec681f3SmrgNineBuffer9_ctor( struct NineBuffer9 *This, 437ec681f3Smrg struct NineUnknownParams *pParams, 447ec681f3Smrg D3DRESOURCETYPE Type, 457ec681f3Smrg DWORD Usage, 467ec681f3Smrg UINT Size, 477ec681f3Smrg D3DPOOL Pool ) 487ec681f3Smrg{ 497ec681f3Smrg struct pipe_resource *info = &This->base.info; 507ec681f3Smrg HRESULT hr; 517ec681f3Smrg 527ec681f3Smrg DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, Size, Usage, Pool); 537ec681f3Smrg 547ec681f3Smrg user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL); 557ec681f3Smrg 567ec681f3Smrg This->maps = MALLOC(sizeof(struct NineTransfer)); 577ec681f3Smrg if (!This->maps) 587ec681f3Smrg return E_OUTOFMEMORY; 597ec681f3Smrg This->nlocks = 0; 607ec681f3Smrg This->nmaps = 0; 617ec681f3Smrg This->maxmaps = 1; 627ec681f3Smrg This->size = Size; 637ec681f3Smrg 647ec681f3Smrg info->screen = pParams->device->screen; 657ec681f3Smrg info->target = PIPE_BUFFER; 667ec681f3Smrg info->format = PIPE_FORMAT_R8_UNORM; 677ec681f3Smrg info->width0 = Size; 687ec681f3Smrg info->flags = 0; 697ec681f3Smrg 707ec681f3Smrg /* Note: WRITEONLY is just tip for resource placement, the resource 717ec681f3Smrg * can still be read (but slower). */ 727ec681f3Smrg info->bind = (Type == D3DRTYPE_INDEXBUFFER) ? PIPE_BIND_INDEX_BUFFER : PIPE_BIND_VERTEX_BUFFER; 737ec681f3Smrg 747ec681f3Smrg /* Software vertex processing: 757ec681f3Smrg * If the device is full software vertex processing, 767ec681f3Smrg * then the buffer is supposed to be used only for sw processing. 777ec681f3Smrg * For mixed vertex processing, buffers with D3DUSAGE_SOFTWAREPROCESSING 787ec681f3Smrg * can be used for both sw and hw processing. 797ec681f3Smrg * These buffers are expected to be stored in RAM. 807ec681f3Smrg * Apps expect locking the full buffer with no flags, then 817ec681f3Smrg * render a a few primitive, then locking again, etc 827ec681f3Smrg * to be a fast pattern. Only the SYSTEMMEM DYNAMIC path 837ec681f3Smrg * will give that pattern ok performance in our case. 847ec681f3Smrg * An alternative would be when sw processing is detected to 857ec681f3Smrg * convert Draw* calls to Draw*Up calls. */ 867ec681f3Smrg if (Usage & D3DUSAGE_SOFTWAREPROCESSING || 877ec681f3Smrg pParams->device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) { 887ec681f3Smrg Pool = D3DPOOL_SYSTEMMEM; 897ec681f3Smrg Usage |= D3DUSAGE_DYNAMIC; 907ec681f3Smrg /* Note: the application cannot retrieve Pool and Usage */ 917ec681f3Smrg } 927ec681f3Smrg 937ec681f3Smrg /* Always use the DYNAMIC path for SYSTEMMEM. 947ec681f3Smrg * If the app uses the vertex buffer is a dynamic fashion, 957ec681f3Smrg * this is going to be very significantly faster that way. 967ec681f3Smrg * If the app uses the vertex buffer in a static fashion, 977ec681f3Smrg * instead of being filled all at once, the buffer will be filled 987ec681f3Smrg * little per little, until it is fully filled, thus the perf hit 997ec681f3Smrg * will be very small. */ 1007ec681f3Smrg if (Pool == D3DPOOL_SYSTEMMEM) 1017ec681f3Smrg Usage |= D3DUSAGE_DYNAMIC; 1027ec681f3Smrg 1037ec681f3Smrg /* It is hard to find clear information on where to place the buffer in 1047ec681f3Smrg * memory depending on the flag. 1057ec681f3Smrg * MSDN: resources are static, except for those with DYNAMIC, thus why you 1067ec681f3Smrg * can only use DISCARD on them. 1077ec681f3Smrg * ATI doc: The driver has the liberty it wants for having things static 1087ec681f3Smrg * or not. 1097ec681f3Smrg * MANAGED: Ram + uploads to Vram copy at unlock (msdn and nvidia doc say 1107ec681f3Smrg * at first draw call using the buffer) 1117ec681f3Smrg * DEFAULT + Usage = 0 => System memory backing for easy read access 1127ec681f3Smrg * (That doc is very unclear on the details, like whether some copies to 1137ec681f3Smrg * vram copy are involved or not). 1147ec681f3Smrg * DEFAULT + WRITEONLY => Vram 1157ec681f3Smrg * DEFAULT + WRITEONLY + DYNAMIC => Either Vram buffer or GTT_WC, depending on what the driver wants. 1167ec681f3Smrg * SYSTEMMEM: Same as MANAGED, but handled by the driver instead of the runtime (which means 1177ec681f3Smrg * some small behavior differences between vendors). Implementing exactly as MANAGED should 1187ec681f3Smrg * be fine. 1197ec681f3Smrg */ 1207ec681f3Smrg if (Pool == D3DPOOL_SYSTEMMEM && Usage & D3DUSAGE_DYNAMIC) 1217ec681f3Smrg info->usage = PIPE_USAGE_STREAM; 1227ec681f3Smrg else if (Pool != D3DPOOL_DEFAULT) 1237ec681f3Smrg info->usage = PIPE_USAGE_DEFAULT; 1247ec681f3Smrg else if (Usage & D3DUSAGE_DYNAMIC && Usage & D3DUSAGE_WRITEONLY) 1257ec681f3Smrg info->usage = PIPE_USAGE_STREAM; 1267ec681f3Smrg else if (Usage & D3DUSAGE_WRITEONLY) 1277ec681f3Smrg info->usage = PIPE_USAGE_DEFAULT; 1287ec681f3Smrg /* For the remaining two, PIPE_USAGE_STAGING would probably be 1297ec681f3Smrg * a good fit according to the doc. However it seems rather a mistake 1307ec681f3Smrg * from apps to use these (mistakes that do really happen). Try 1317ec681f3Smrg * to put the flags that are the best compromise between the real 1327ec681f3Smrg * behaviour and what buggy apps should get for better performance. */ 1337ec681f3Smrg else if (Usage & D3DUSAGE_DYNAMIC) 1347ec681f3Smrg info->usage = PIPE_USAGE_STREAM; 1357ec681f3Smrg else 1367ec681f3Smrg info->usage = PIPE_USAGE_DYNAMIC; 1377ec681f3Smrg 1387ec681f3Smrg /* When Writeonly is not set, we don't want to enable the 1397ec681f3Smrg * optimizations */ 1407ec681f3Smrg This->discard_nooverwrite_only = !!(Usage & D3DUSAGE_WRITEONLY) && 1417ec681f3Smrg pParams->device->buffer_upload; 1427ec681f3Smrg /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */ 1437ec681f3Smrg /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */ 1447ec681f3Smrg /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */ 1457ec681f3Smrg /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */ 1467ec681f3Smrg /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */ 1477ec681f3Smrg /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */ 1487ec681f3Smrg 1497ec681f3Smrg info->height0 = 1; 1507ec681f3Smrg info->depth0 = 1; 1517ec681f3Smrg info->array_size = 1; 1527ec681f3Smrg info->last_level = 0; 1537ec681f3Smrg info->nr_samples = 0; 1547ec681f3Smrg info->nr_storage_samples = 0; 1557ec681f3Smrg 1567ec681f3Smrg hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE, 1577ec681f3Smrg Type, Pool, Usage); 1587ec681f3Smrg 1597ec681f3Smrg if (FAILED(hr)) 1607ec681f3Smrg return hr; 1617ec681f3Smrg 1627ec681f3Smrg if (Pool != D3DPOOL_DEFAULT) { 1637ec681f3Smrg This->managed.data = align_calloc( 1647ec681f3Smrg nine_format_get_level_alloc_size(This->base.info.format, 1657ec681f3Smrg Size, 1, 0), 32); 1667ec681f3Smrg if (!This->managed.data) 1677ec681f3Smrg return E_OUTOFMEMORY; 1687ec681f3Smrg This->managed.dirty = TRUE; 1697ec681f3Smrg u_box_1d(0, Size, &This->managed.dirty_box); 1707ec681f3Smrg u_box_1d(0, 0, &This->managed.valid_region); 1717ec681f3Smrg u_box_1d(0, 0, &This->managed.required_valid_region); 1727ec681f3Smrg u_box_1d(0, 0, &This->managed.filled_region); 1737ec681f3Smrg This->managed.can_unsynchronized = true; 1747ec681f3Smrg This->managed.num_worker_thread_syncs = 0; 1757ec681f3Smrg list_inithead(&This->managed.list); 1767ec681f3Smrg list_inithead(&This->managed.list2); 1777ec681f3Smrg list_add(&This->managed.list2, &pParams->device->managed_buffers); 1787ec681f3Smrg } 1797ec681f3Smrg 1807ec681f3Smrg return D3D_OK; 1817ec681f3Smrg} 1827ec681f3Smrg 1837ec681f3Smrgvoid 1847ec681f3SmrgNineBuffer9_dtor( struct NineBuffer9 *This ) 1857ec681f3Smrg{ 1867ec681f3Smrg DBG("This=%p\n", This); 1877ec681f3Smrg 1887ec681f3Smrg if (This->maps) { 1897ec681f3Smrg while (This->nlocks) { 1907ec681f3Smrg NineBuffer9_Unlock(This); 1917ec681f3Smrg } 1927ec681f3Smrg assert(!This->nmaps); 1937ec681f3Smrg FREE(This->maps); 1947ec681f3Smrg } 1957ec681f3Smrg 1967ec681f3Smrg if (This->base.pool != D3DPOOL_DEFAULT) { 1977ec681f3Smrg if (This->managed.data) 1987ec681f3Smrg align_free(This->managed.data); 1997ec681f3Smrg if (list_is_linked(&This->managed.list)) 2007ec681f3Smrg list_del(&This->managed.list); 2017ec681f3Smrg if (list_is_linked(&This->managed.list2)) 2027ec681f3Smrg list_del(&This->managed.list2); 2037ec681f3Smrg } 2047ec681f3Smrg 2057ec681f3Smrg if (This->buf) 2067ec681f3Smrg nine_upload_release_buffer(This->base.base.device->buffer_upload, This->buf); 2077ec681f3Smrg 2087ec681f3Smrg NineResource9_dtor(&This->base); 2097ec681f3Smrg} 2107ec681f3Smrg 2117ec681f3Smrgstruct pipe_resource * 2127ec681f3SmrgNineBuffer9_GetResource( struct NineBuffer9 *This, unsigned *offset ) 2137ec681f3Smrg{ 2147ec681f3Smrg if (This->buf) 2157ec681f3Smrg return nine_upload_buffer_resource_and_offset(This->buf, offset); 2167ec681f3Smrg *offset = 0; 2177ec681f3Smrg return NineResource9_GetResource(&This->base); 2187ec681f3Smrg} 2197ec681f3Smrg 2207ec681f3Smrgstatic void 2217ec681f3SmrgNineBuffer9_RebindIfRequired( struct NineBuffer9 *This, 2227ec681f3Smrg struct NineDevice9 *device, 2237ec681f3Smrg struct pipe_resource *resource, 2247ec681f3Smrg unsigned offset ) 2257ec681f3Smrg{ 2267ec681f3Smrg int i; 2277ec681f3Smrg 2287ec681f3Smrg if (!This->bind_count) 2297ec681f3Smrg return; 2307ec681f3Smrg for (i = 0; i < device->caps.MaxStreams; i++) { 2317ec681f3Smrg if (device->state.stream[i] == (struct NineVertexBuffer9 *)This) 2327ec681f3Smrg nine_context_set_stream_source_apply(device, i, 2337ec681f3Smrg resource, 2347ec681f3Smrg device->state.vtxbuf[i].buffer_offset + offset, 2357ec681f3Smrg device->state.vtxbuf[i].stride); 2367ec681f3Smrg } 2377ec681f3Smrg if (device->state.idxbuf == (struct NineIndexBuffer9 *)This) 2387ec681f3Smrg nine_context_set_indices_apply(device, resource, 2397ec681f3Smrg ((struct NineIndexBuffer9 *)This)->index_size, 2407ec681f3Smrg offset); 2417ec681f3Smrg} 2427ec681f3Smrg 2437ec681f3SmrgHRESULT NINE_WINAPI 2447ec681f3SmrgNineBuffer9_Lock( struct NineBuffer9 *This, 2457ec681f3Smrg UINT OffsetToLock, 2467ec681f3Smrg UINT SizeToLock, 2477ec681f3Smrg void **ppbData, 2487ec681f3Smrg DWORD Flags ) 2497ec681f3Smrg{ 2507ec681f3Smrg struct NineDevice9 *device = This->base.base.device; 2517ec681f3Smrg struct pipe_box box; 2527ec681f3Smrg struct pipe_context *pipe; 2537ec681f3Smrg void *data; 2547ec681f3Smrg unsigned usage; 2557ec681f3Smrg 2567ec681f3Smrg DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n", 2577ec681f3Smrg This, This->base.resource, 2587ec681f3Smrg OffsetToLock, SizeToLock, Flags); 2597ec681f3Smrg 2607ec681f3Smrg user_assert(ppbData, E_POINTER); 2617ec681f3Smrg 2627ec681f3Smrg if (SizeToLock == 0) { 2637ec681f3Smrg SizeToLock = This->size - OffsetToLock; 2647ec681f3Smrg user_warn(OffsetToLock != 0); 2657ec681f3Smrg } 2667ec681f3Smrg 2677ec681f3Smrg /* Write out of bound seems to have to be taken into account for these. 2687ec681f3Smrg * TODO: Do more tests (is it only at buffer first lock ? etc). 2697ec681f3Smrg * Since these buffers are supposed to be locked once and never 2707ec681f3Smrg * writen again (MANAGED or DYNAMIC is used for the other uses cases), 2717ec681f3Smrg * performance should be unaffected. */ 2727ec681f3Smrg if (!(This->base.usage & D3DUSAGE_DYNAMIC) && This->base.pool == D3DPOOL_DEFAULT) 2737ec681f3Smrg SizeToLock = This->size - OffsetToLock; 2747ec681f3Smrg 2757ec681f3Smrg u_box_1d(OffsetToLock, SizeToLock, &box); 2767ec681f3Smrg 2777ec681f3Smrg if (This->base.pool != D3DPOOL_DEFAULT) { 2787ec681f3Smrg /* MANAGED: READONLY doesn't dirty the buffer, nor 2797ec681f3Smrg * wait the upload in the worker thread 2807ec681f3Smrg * SYSTEMMEM: AMD/NVidia: All locks dirty the full buffer. Not on Intel 2817ec681f3Smrg * For Nvidia, SYSTEMMEM behaves are if there is no worker thread. 2827ec681f3Smrg * On AMD, READONLY and NOOVERWRITE do dirty the buffer, but do not sync the previous uploads 2837ec681f3Smrg * in the worker thread. On Intel only NOOVERWRITE has that effect. 2847ec681f3Smrg * We implement the AMD behaviour. */ 2857ec681f3Smrg if (This->base.pool == D3DPOOL_MANAGED) { 2867ec681f3Smrg if (!(Flags & D3DLOCK_READONLY)) { 2877ec681f3Smrg if (!This->managed.dirty) { 2887ec681f3Smrg assert(list_is_empty(&This->managed.list)); 2897ec681f3Smrg This->managed.dirty = TRUE; 2907ec681f3Smrg This->managed.dirty_box = box; 2917ec681f3Smrg /* Flush if regions pending to be uploaded would be dirtied */ 2927ec681f3Smrg if (p_atomic_read(&This->managed.pending_upload)) { 2937ec681f3Smrg u_box_intersect_1d(&box, &box, &This->managed.upload_pending_regions); 2947ec681f3Smrg if (box.width != 0) 2957ec681f3Smrg nine_csmt_process(This->base.base.device); 2967ec681f3Smrg } 2977ec681f3Smrg } else 2987ec681f3Smrg u_box_union_1d(&This->managed.dirty_box, &This->managed.dirty_box, &box); 2997ec681f3Smrg /* Tests trying to draw while the buffer is locked show that 3007ec681f3Smrg * SYSTEMMEM/MANAGED buffers are made dirty at Lock time */ 3017ec681f3Smrg BASEBUF_REGISTER_UPDATE(This); 3027ec681f3Smrg } 3037ec681f3Smrg } else { 3047ec681f3Smrg if (!(Flags & (D3DLOCK_READONLY|D3DLOCK_NOOVERWRITE)) && 3057ec681f3Smrg p_atomic_read(&This->managed.pending_upload)) { 3067ec681f3Smrg This->managed.num_worker_thread_syncs++; 3077ec681f3Smrg /* If we sync too often, pick the vertex_uploader path */ 3087ec681f3Smrg if (This->managed.num_worker_thread_syncs >= 3) 3097ec681f3Smrg This->managed.can_unsynchronized = false; 3107ec681f3Smrg nine_csmt_process(This->base.base.device); 3117ec681f3Smrg /* Note: AS DISCARD is not relevant for SYSTEMMEM, 3127ec681f3Smrg * NOOVERWRITE might have a similar meaning as what is 3137ec681f3Smrg * in D3D7 doc. Basically that data from previous draws 3147ec681f3Smrg * OF THIS FRAME are unaffected. As we flush csmt in Present(), 3157ec681f3Smrg * we should be correct. In some parts of the doc, the notion 3167ec681f3Smrg * of frame is implied to be related to Begin/EndScene(), 3177ec681f3Smrg * but tests show NOOVERWRITE after EndScene() doesn't flush 3187ec681f3Smrg * the csmt thread. */ 3197ec681f3Smrg } 3207ec681f3Smrg This->managed.dirty = true; 3217ec681f3Smrg u_box_1d(0, This->size, &This->managed.dirty_box); /* systemmem non-dynamic */ 3227ec681f3Smrg u_box_1d(0, 0, &This->managed.valid_region); /* systemmem dynamic */ 3237ec681f3Smrg BASEBUF_REGISTER_UPDATE(This); 3247ec681f3Smrg } 3257ec681f3Smrg 3267ec681f3Smrg *ppbData = (char *)This->managed.data + OffsetToLock; 3277ec681f3Smrg DBG("returning pointer %p\n", *ppbData); 3287ec681f3Smrg This->nlocks++; 3297ec681f3Smrg return D3D_OK; 3307ec681f3Smrg } 3317ec681f3Smrg 3327ec681f3Smrg /* Driver ddi doc: READONLY is never passed to the device. So it can only 3337ec681f3Smrg * have effect on things handled by the driver (MANAGED pool for example). 3347ec681f3Smrg * Msdn doc: DISCARD and NOOVERWRITE are only for DYNAMIC. 3357ec681f3Smrg * ATI doc: You can use DISCARD and NOOVERWRITE without DYNAMIC. 3367ec681f3Smrg * Msdn doc: D3DLOCK_DONOTWAIT is not among the valid flags for buffers. 3377ec681f3Smrg * Our tests: On win 7 nvidia, D3DLOCK_DONOTWAIT does return 3387ec681f3Smrg * D3DERR_WASSTILLDRAWING if the resource is in use, except for DYNAMIC. 3397ec681f3Smrg * Our tests: some apps do use both DISCARD and NOOVERWRITE at the same 3407ec681f3Smrg * time. On windows it seems to return different pointer in some conditions, 3417ec681f3Smrg * creation flags and drivers. However these tests indicate having 3427ec681f3Smrg * NOOVERWRITE win is a valid behaviour (NVidia). 3437ec681f3Smrg */ 3447ec681f3Smrg 3457ec681f3Smrg /* Have NOOVERWRITE win over DISCARD. This is allowed (see above) and 3467ec681f3Smrg * it prevents overconsuming buffers if apps do use both at the same time. */ 3477ec681f3Smrg if ((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) 3487ec681f3Smrg Flags &= ~D3DLOCK_DISCARD; 3497ec681f3Smrg 3507ec681f3Smrg if (Flags & D3DLOCK_DISCARD) 3517ec681f3Smrg usage = PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE; 3527ec681f3Smrg else if (Flags & D3DLOCK_NOOVERWRITE) 3537ec681f3Smrg usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED; 3547ec681f3Smrg else 3557ec681f3Smrg /* Do not ask for READ if writeonly and default pool (should be safe enough, 3567ec681f3Smrg * as the doc says app shouldn't expect reading to work with writeonly). */ 3577ec681f3Smrg usage = (This->base.usage & D3DUSAGE_WRITEONLY) ? 3587ec681f3Smrg PIPE_MAP_WRITE : 3597ec681f3Smrg PIPE_MAP_READ_WRITE; 3607ec681f3Smrg if (Flags & D3DLOCK_DONOTWAIT && !(This->base.usage & D3DUSAGE_DYNAMIC)) 3617ec681f3Smrg usage |= PIPE_MAP_DONTBLOCK; 3627ec681f3Smrg 3637ec681f3Smrg This->discard_nooverwrite_only &= !!(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)); 3647ec681f3Smrg 3657ec681f3Smrg if (This->nmaps == This->maxmaps) { 3667ec681f3Smrg struct NineTransfer *newmaps = 3677ec681f3Smrg REALLOC(This->maps, sizeof(struct NineTransfer)*This->maxmaps, 3687ec681f3Smrg sizeof(struct NineTransfer)*(This->maxmaps << 1)); 3697ec681f3Smrg if (newmaps == NULL) 3707ec681f3Smrg return E_OUTOFMEMORY; 3717ec681f3Smrg 3727ec681f3Smrg This->maxmaps <<= 1; 3737ec681f3Smrg This->maps = newmaps; 3747ec681f3Smrg } 3757ec681f3Smrg 3767ec681f3Smrg if (This->buf && !This->discard_nooverwrite_only) { 3777ec681f3Smrg struct pipe_box src_box; 3787ec681f3Smrg unsigned offset; 3797ec681f3Smrg struct pipe_resource *src_res; 3807ec681f3Smrg DBG("Disabling nine_subbuffer for a buffer having" 3817ec681f3Smrg "used a nine_subbuffer buffer\n"); 3827ec681f3Smrg /* Copy buffer content to the buffer resource, which 3837ec681f3Smrg * we will now use. 3847ec681f3Smrg * Note: The behaviour may be different from what is expected 3857ec681f3Smrg * with double lock. However applications can't really make expectations 3867ec681f3Smrg * about double locks, and don't really use them, so that's ok. */ 3877ec681f3Smrg src_res = nine_upload_buffer_resource_and_offset(This->buf, &offset); 3887ec681f3Smrg u_box_1d(offset, This->size, &src_box); 3897ec681f3Smrg 3907ec681f3Smrg pipe = NineDevice9_GetPipe(device); 3917ec681f3Smrg pipe->resource_copy_region(pipe, This->base.resource, 0, 0, 0, 0, 3927ec681f3Smrg src_res, 0, &src_box); 3937ec681f3Smrg /* Release previous resource */ 3947ec681f3Smrg if (This->nmaps >= 1) 3957ec681f3Smrg This->maps[This->nmaps-1].should_destroy_buf = true; 3967ec681f3Smrg else 3977ec681f3Smrg nine_upload_release_buffer(device->buffer_upload, This->buf); 3987ec681f3Smrg This->buf = NULL; 3997ec681f3Smrg /* Rebind buffer */ 4007ec681f3Smrg NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0); 4017ec681f3Smrg } 4027ec681f3Smrg 4037ec681f3Smrg This->maps[This->nmaps].transfer = NULL; 4047ec681f3Smrg This->maps[This->nmaps].is_pipe_secondary = false; 4057ec681f3Smrg This->maps[This->nmaps].buf = NULL; 4067ec681f3Smrg This->maps[This->nmaps].should_destroy_buf = false; 4077ec681f3Smrg 4087ec681f3Smrg if (This->discard_nooverwrite_only) { 4097ec681f3Smrg if (This->buf && (Flags & D3DLOCK_DISCARD)) { 4107ec681f3Smrg /* Release previous buffer */ 4117ec681f3Smrg if (This->nmaps >= 1) 4127ec681f3Smrg This->maps[This->nmaps-1].should_destroy_buf = true; 4137ec681f3Smrg else 4147ec681f3Smrg nine_upload_release_buffer(device->buffer_upload, This->buf); 4157ec681f3Smrg This->buf = NULL; 4167ec681f3Smrg } 4177ec681f3Smrg 4187ec681f3Smrg if (!This->buf) { 4197ec681f3Smrg unsigned offset; 4207ec681f3Smrg struct pipe_resource *res; 4217ec681f3Smrg This->buf = nine_upload_create_buffer(device->buffer_upload, This->base.info.width0); 4227ec681f3Smrg res = nine_upload_buffer_resource_and_offset(This->buf, &offset); 4237ec681f3Smrg NineBuffer9_RebindIfRequired(This, device, res, offset); 4247ec681f3Smrg } 4257ec681f3Smrg 4267ec681f3Smrg if (This->buf) { 4277ec681f3Smrg This->maps[This->nmaps].buf = This->buf; 4287ec681f3Smrg This->nmaps++; 4297ec681f3Smrg This->nlocks++; 4307ec681f3Smrg DBG("Returning %p\n", nine_upload_buffer_get_map(This->buf) + OffsetToLock); 4317ec681f3Smrg *ppbData = nine_upload_buffer_get_map(This->buf) + OffsetToLock; 4327ec681f3Smrg return D3D_OK; 4337ec681f3Smrg } else { 4347ec681f3Smrg /* Fallback to normal path, and don't try again */ 4357ec681f3Smrg This->discard_nooverwrite_only = false; 4367ec681f3Smrg } 4377ec681f3Smrg } 4387ec681f3Smrg 4397ec681f3Smrg /* Previous mappings may need pending commands to write to the 4407ec681f3Smrg * buffer (staging buffer for example). Before a NOOVERWRITE, 4417ec681f3Smrg * we thus need a finish, to guarantee any upload is finished. 4427ec681f3Smrg * Note for discard_nooverwrite_only we don't need to do this 4437ec681f3Smrg * check as neither discard nor nooverwrite have issues there */ 4447ec681f3Smrg if (This->need_sync_if_nooverwrite && !(Flags & D3DLOCK_DISCARD) && 4457ec681f3Smrg (Flags & D3DLOCK_NOOVERWRITE)) { 4467ec681f3Smrg struct pipe_screen *screen = NineDevice9_GetScreen(device); 4477ec681f3Smrg struct pipe_fence_handle *fence = NULL; 4487ec681f3Smrg 4497ec681f3Smrg pipe = NineDevice9_GetPipe(device); 4507ec681f3Smrg pipe->flush(pipe, &fence, 0); 4517ec681f3Smrg (void) screen->fence_finish(screen, NULL, fence, PIPE_TIMEOUT_INFINITE); 4527ec681f3Smrg screen->fence_reference(screen, &fence, NULL); 4537ec681f3Smrg } 4547ec681f3Smrg This->need_sync_if_nooverwrite = !(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)); 4557ec681f3Smrg 4567ec681f3Smrg /* When csmt is active, we want to avoid stalls as much as possible, 4577ec681f3Smrg * and thus we want to create a new resource on discard and map it 4587ec681f3Smrg * with the secondary pipe, instead of waiting on the main pipe. */ 4597ec681f3Smrg if (Flags & D3DLOCK_DISCARD && device->csmt_active) { 4607ec681f3Smrg struct pipe_screen *screen = NineDevice9_GetScreen(device); 4617ec681f3Smrg struct pipe_resource *new_res = nine_resource_create_with_retry(device, screen, &This->base.info); 4627ec681f3Smrg if (new_res) { 4637ec681f3Smrg /* Use the new resource */ 4647ec681f3Smrg pipe_resource_reference(&This->base.resource, new_res); 4657ec681f3Smrg pipe_resource_reference(&new_res, NULL); 4667ec681f3Smrg usage = PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED; 4677ec681f3Smrg NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0); 4687ec681f3Smrg This->maps[This->nmaps].is_pipe_secondary = TRUE; 4697ec681f3Smrg } 4707ec681f3Smrg } else if (Flags & D3DLOCK_NOOVERWRITE && device->csmt_active) 4717ec681f3Smrg This->maps[This->nmaps].is_pipe_secondary = TRUE; 4727ec681f3Smrg 4737ec681f3Smrg if (This->maps[This->nmaps].is_pipe_secondary) 4747ec681f3Smrg pipe = device->pipe_secondary; 4757ec681f3Smrg else 4767ec681f3Smrg pipe = NineDevice9_GetPipe(device); 4777ec681f3Smrg 4787ec681f3Smrg data = pipe->buffer_map(pipe, This->base.resource, 0, 4797ec681f3Smrg usage, &box, &This->maps[This->nmaps].transfer); 4807ec681f3Smrg 4817ec681f3Smrg if (!data) { 4827ec681f3Smrg DBG("pipe::buffer_map failed\n" 4837ec681f3Smrg " usage = %x\n" 4847ec681f3Smrg " box.x = %u\n" 4857ec681f3Smrg " box.width = %u\n", 4867ec681f3Smrg usage, box.x, box.width); 4877ec681f3Smrg 4887ec681f3Smrg if (Flags & D3DLOCK_DONOTWAIT) 4897ec681f3Smrg return D3DERR_WASSTILLDRAWING; 4907ec681f3Smrg return D3DERR_INVALIDCALL; 4917ec681f3Smrg } 4927ec681f3Smrg 4937ec681f3Smrg DBG("returning pointer %p\n", data); 4947ec681f3Smrg This->nmaps++; 4957ec681f3Smrg This->nlocks++; 4967ec681f3Smrg *ppbData = data; 4977ec681f3Smrg 4987ec681f3Smrg return D3D_OK; 4997ec681f3Smrg} 5007ec681f3Smrg 5017ec681f3SmrgHRESULT NINE_WINAPI 5027ec681f3SmrgNineBuffer9_Unlock( struct NineBuffer9 *This ) 5037ec681f3Smrg{ 5047ec681f3Smrg struct NineDevice9 *device = This->base.base.device; 5057ec681f3Smrg struct pipe_context *pipe; 5067ec681f3Smrg int i; 5077ec681f3Smrg DBG("This=%p\n", This); 5087ec681f3Smrg 5097ec681f3Smrg user_assert(This->nlocks > 0, D3DERR_INVALIDCALL); 5107ec681f3Smrg This->nlocks--; 5117ec681f3Smrg if (This->nlocks > 0) 5127ec681f3Smrg return D3D_OK; /* Pending unlocks. Wait all unlocks before unmapping */ 5137ec681f3Smrg 5147ec681f3Smrg if (This->base.pool == D3DPOOL_DEFAULT) { 5157ec681f3Smrg for (i = 0; i < This->nmaps; i++) { 5167ec681f3Smrg if (!This->maps[i].buf) { 5177ec681f3Smrg pipe = This->maps[i].is_pipe_secondary ? 5187ec681f3Smrg device->pipe_secondary : 5197ec681f3Smrg nine_context_get_pipe_acquire(device); 5207ec681f3Smrg pipe->buffer_unmap(pipe, This->maps[i].transfer); 5217ec681f3Smrg /* We need to flush in case the driver does implicit copies */ 5227ec681f3Smrg if (This->maps[i].is_pipe_secondary) 5237ec681f3Smrg pipe->flush(pipe, NULL, 0); 5247ec681f3Smrg else 5257ec681f3Smrg nine_context_get_pipe_release(device); 5267ec681f3Smrg } else if (This->maps[i].should_destroy_buf) 5277ec681f3Smrg nine_upload_release_buffer(device->buffer_upload, This->maps[i].buf); 5287ec681f3Smrg } 5297ec681f3Smrg This->nmaps = 0; 5307ec681f3Smrg } 5317ec681f3Smrg return D3D_OK; 5327ec681f3Smrg} 5337ec681f3Smrg 5347ec681f3Smrgvoid 5357ec681f3SmrgNineBuffer9_SetDirty( struct NineBuffer9 *This ) 5367ec681f3Smrg{ 5377ec681f3Smrg assert(This->base.pool != D3DPOOL_DEFAULT); 5387ec681f3Smrg 5397ec681f3Smrg This->managed.dirty = TRUE; 5407ec681f3Smrg u_box_1d(0, This->size, &This->managed.dirty_box); 5417ec681f3Smrg BASEBUF_REGISTER_UPDATE(This); 5427ec681f3Smrg} 5437ec681f3Smrg 5447ec681f3Smrg/* Try to remove b from a, supposed to include b */ 5457ec681f3Smrgstatic void u_box_try_remove_region_1d(struct pipe_box *dst, 5467ec681f3Smrg const struct pipe_box *a, 5477ec681f3Smrg const struct pipe_box *b) 5487ec681f3Smrg{ 5497ec681f3Smrg int x, width; 5507ec681f3Smrg if (a->x == b->x) { 5517ec681f3Smrg x = a->x + b->width; 5527ec681f3Smrg width = a->width - b->width; 5537ec681f3Smrg } else if ((a->x + a->width) == (b->x + b->width)) { 5547ec681f3Smrg x = a->x; 5557ec681f3Smrg width = a->width - b->width; 5567ec681f3Smrg } else { 5577ec681f3Smrg x = a->x; 5587ec681f3Smrg width = a->width; 5597ec681f3Smrg } 5607ec681f3Smrg dst->x = x; 5617ec681f3Smrg dst->width = width; 5627ec681f3Smrg} 5637ec681f3Smrg 5647ec681f3Smrgvoid 5657ec681f3SmrgNineBuffer9_Upload( struct NineBuffer9 *This ) 5667ec681f3Smrg{ 5677ec681f3Smrg struct NineDevice9 *device = This->base.base.device; 5687ec681f3Smrg unsigned upload_flags = 0; 5697ec681f3Smrg struct pipe_box box_upload; 5707ec681f3Smrg 5717ec681f3Smrg assert(This->base.pool != D3DPOOL_DEFAULT && This->managed.dirty); 5727ec681f3Smrg 5737ec681f3Smrg if (This->base.pool == D3DPOOL_SYSTEMMEM && This->base.usage & D3DUSAGE_DYNAMIC) { 5747ec681f3Smrg struct pipe_box region_already_valid; 5757ec681f3Smrg struct pipe_box conflicting_region; 5767ec681f3Smrg struct pipe_box *valid_region = &This->managed.valid_region; 5777ec681f3Smrg struct pipe_box *required_valid_region = &This->managed.required_valid_region; 5787ec681f3Smrg struct pipe_box *filled_region = &This->managed.filled_region; 5797ec681f3Smrg /* Try to upload SYSTEMMEM DYNAMIC in an efficient fashion. 5807ec681f3Smrg * Unlike non-dynamic for which we upload the whole dirty region, try to 5817ec681f3Smrg * only upload the data needed for the draw. The draw call preparation 5827ec681f3Smrg * fills This->managed.required_valid_region for that */ 5837ec681f3Smrg u_box_intersect_1d(®ion_already_valid, 5847ec681f3Smrg valid_region, 5857ec681f3Smrg required_valid_region); 5867ec681f3Smrg /* If the required valid region is already valid, nothing to do */ 5877ec681f3Smrg if (region_already_valid.x == required_valid_region->x && 5887ec681f3Smrg region_already_valid.width == required_valid_region->width) { 5897ec681f3Smrg /* Rebind if the region happens to be valid in the original buffer 5907ec681f3Smrg * but we have since used vertex_uploader */ 5917ec681f3Smrg if (!This->managed.can_unsynchronized) 5927ec681f3Smrg NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0); 5937ec681f3Smrg u_box_1d(0, 0, required_valid_region); 5947ec681f3Smrg return; 5957ec681f3Smrg } 5967ec681f3Smrg /* (Try to) Remove valid areas from the region to upload */ 5977ec681f3Smrg u_box_try_remove_region_1d(&box_upload, 5987ec681f3Smrg required_valid_region, 5997ec681f3Smrg ®ion_already_valid); 6007ec681f3Smrg assert(box_upload.width > 0); 6017ec681f3Smrg /* To maintain correctly the valid region, as we will do union later with 6027ec681f3Smrg * box_upload, we must ensure box_upload is consecutive with valid_region */ 6037ec681f3Smrg if (box_upload.x > valid_region->x + valid_region->width && valid_region->width > 0) { 6047ec681f3Smrg box_upload.width = box_upload.x + box_upload.width - (valid_region->x + valid_region->width); 6057ec681f3Smrg box_upload.x = valid_region->x + valid_region->width; 6067ec681f3Smrg } else if (box_upload.x + box_upload.width < valid_region->x && valid_region->width > 0) { 6077ec681f3Smrg box_upload.width = valid_region->x - box_upload.x; 6087ec681f3Smrg } 6097ec681f3Smrg /* There is conflict if some areas, that are not valid but are filled for previous draw calls, 6107ec681f3Smrg * intersect with the region we plan to upload. Note by construction valid_region IS 6117ec681f3Smrg * included in filled_region, thus so is region_already_valid. */ 6127ec681f3Smrg u_box_intersect_1d(&conflicting_region, &box_upload, filled_region); 6137ec681f3Smrg /* As box_upload could still contain region_already_valid, check the intersection 6147ec681f3Smrg * doesn't happen to be exactly region_already_valid (it cannot be smaller, see above) */ 6157ec681f3Smrg if (This->managed.can_unsynchronized && (conflicting_region.width == 0 || 6167ec681f3Smrg (conflicting_region.x == region_already_valid.x && 6177ec681f3Smrg conflicting_region.width == region_already_valid.width))) { 6187ec681f3Smrg /* No conflicts. */ 6197ec681f3Smrg upload_flags |= PIPE_MAP_UNSYNCHRONIZED; 6207ec681f3Smrg } else { 6217ec681f3Smrg /* We cannot use PIPE_MAP_UNSYNCHRONIZED. We must choose between no flag and DISCARD. 6227ec681f3Smrg * Criterias to discard: 6237ec681f3Smrg * . Most of the resource was filled (but some apps do allocate a big buffer 6247ec681f3Smrg * to only use a small part in a round fashion) 6257ec681f3Smrg * . The region to upload is very small compared to the filled region and 6267ec681f3Smrg * at the start of the buffer (hints at round usage starting again) 6277ec681f3Smrg * . The region to upload is very big compared to the required region 6287ec681f3Smrg * . We have not discarded yet this frame 6297ec681f3Smrg * If the buffer use pattern seems to sync the worker thread too often, 6307ec681f3Smrg * revert to the vertex_uploader */ 6317ec681f3Smrg if (This->managed.num_worker_thread_syncs < 3 && 6327ec681f3Smrg (filled_region->width > (This->size / 2) || 6337ec681f3Smrg (10 * box_upload.width < filled_region->width && 6347ec681f3Smrg box_upload.x < (filled_region->x + filled_region->width)/2) || 6357ec681f3Smrg box_upload.width > 2 * required_valid_region->width || 6367ec681f3Smrg This->managed.frame_count_last_discard != device->frame_count)) { 6377ec681f3Smrg /* Avoid DISCARDING too much by discarding only if most of the buffer 6387ec681f3Smrg * has been used */ 6397ec681f3Smrg DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER, 6407ec681f3Smrg "Uploading %p DISCARD: valid %d %d, filled %d %d, required %d %d, box_upload %d %d, required already_valid %d %d, conficting %d %d\n", 6417ec681f3Smrg This, valid_region->x, valid_region->width, filled_region->x, filled_region->width, 6427ec681f3Smrg required_valid_region->x, required_valid_region->width, box_upload.x, box_upload.width, 6437ec681f3Smrg region_already_valid.x, region_already_valid.width, conflicting_region.x, conflicting_region.width 6447ec681f3Smrg ); 6457ec681f3Smrg upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE; 6467ec681f3Smrg u_box_1d(0, 0, filled_region); 6477ec681f3Smrg u_box_1d(0, 0, valid_region); 6487ec681f3Smrg box_upload = This->managed.required_valid_region; 6497ec681f3Smrg /* Rebind the buffer if we used intermediate alternative buffer */ 6507ec681f3Smrg if (!This->managed.can_unsynchronized) 6517ec681f3Smrg NineBuffer9_RebindIfRequired(This, device, This->base.resource, 0); 6527ec681f3Smrg This->managed.can_unsynchronized = true; 6537ec681f3Smrg This->managed.frame_count_last_discard = device->frame_count; 6547ec681f3Smrg } else { 6557ec681f3Smrg /* Once we use without UNSYNCHRONIZED, we cannot use it anymore. 6567ec681f3Smrg * Use a different buffer. */ 6577ec681f3Smrg unsigned buffer_offset = 0; 6587ec681f3Smrg struct pipe_resource *resource = NULL; 6597ec681f3Smrg This->managed.can_unsynchronized = false; 6607ec681f3Smrg u_upload_data(device->vertex_uploader, 6617ec681f3Smrg required_valid_region->x, 6627ec681f3Smrg required_valid_region->width, 6637ec681f3Smrg 64, 6647ec681f3Smrg This->managed.data + required_valid_region->x, 6657ec681f3Smrg &buffer_offset, 6667ec681f3Smrg &resource); 6677ec681f3Smrg buffer_offset -= required_valid_region->x; 6687ec681f3Smrg u_upload_unmap(device->vertex_uploader); 6697ec681f3Smrg if (resource) { 6707ec681f3Smrg NineBuffer9_RebindIfRequired(This, device, resource, buffer_offset); 6717ec681f3Smrg /* Note: This only works because for these types of buffers this function 6727ec681f3Smrg * is called before every draw call. Else it wouldn't work when the app 6737ec681f3Smrg * rebinds buffers. In addition it needs this function to be called only 6747ec681f3Smrg * once per buffers even if bound several times, which we do. */ 6757ec681f3Smrg u_box_1d(0, 0, required_valid_region); 6767ec681f3Smrg pipe_resource_reference(&resource, NULL); 6777ec681f3Smrg return; 6787ec681f3Smrg } 6797ec681f3Smrg } 6807ec681f3Smrg } 6817ec681f3Smrg 6827ec681f3Smrg u_box_union_1d(filled_region, 6837ec681f3Smrg filled_region, 6847ec681f3Smrg &box_upload); 6857ec681f3Smrg u_box_union_1d(valid_region, 6867ec681f3Smrg valid_region, 6877ec681f3Smrg &box_upload); 6887ec681f3Smrg u_box_1d(0, 0, required_valid_region); 6897ec681f3Smrg } else 6907ec681f3Smrg box_upload = This->managed.dirty_box; 6917ec681f3Smrg 6927ec681f3Smrg if (box_upload.x == 0 && box_upload.width == This->size) { 6937ec681f3Smrg upload_flags |= PIPE_MAP_DISCARD_WHOLE_RESOURCE; 6947ec681f3Smrg } 6957ec681f3Smrg 6967ec681f3Smrg if (This->managed.pending_upload) { 6977ec681f3Smrg u_box_union_1d(&This->managed.upload_pending_regions, 6987ec681f3Smrg &This->managed.upload_pending_regions, 6997ec681f3Smrg &box_upload); 7007ec681f3Smrg } else { 7017ec681f3Smrg This->managed.upload_pending_regions = box_upload; 7027ec681f3Smrg } 7037ec681f3Smrg 7047ec681f3Smrg DBG_FLAG(DBG_INDEXBUFFER|DBG_VERTEXBUFFER, 7057ec681f3Smrg "Uploading %p, offset=%d, size=%d, Flags=0x%x\n", 7067ec681f3Smrg This, box_upload.x, box_upload.width, upload_flags); 7077ec681f3Smrg nine_context_range_upload(device, &This->managed.pending_upload, 7087ec681f3Smrg (struct NineUnknown *)This, 7097ec681f3Smrg This->base.resource, 7107ec681f3Smrg box_upload.x, 7117ec681f3Smrg box_upload.width, 7127ec681f3Smrg upload_flags, 7137ec681f3Smrg (char *)This->managed.data + box_upload.x); 7147ec681f3Smrg This->managed.dirty = FALSE; 7157ec681f3Smrg} 716