1/*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24#define NINE_STATE
25
26#include "device9.h"
27#include "swapchain9.h"
28#include "basetexture9.h"
29#include "buffer9.h"
30#include "indexbuffer9.h"
31#include "surface9.h"
32#include "vertexbuffer9.h"
33#include "vertexdeclaration9.h"
34#include "vertexshader9.h"
35#include "pixelshader9.h"
36#include "nine_pipe.h"
37#include "nine_ff.h"
38#include "nine_limits.h"
39#include "pipe/p_context.h"
40#include "pipe/p_state.h"
41#include "cso_cache/cso_context.h"
42#include "util/u_atomic.h"
43#include "util/u_upload_mgr.h"
44#include "util/u_math.h"
45#include "util/u_box.h"
46#include "util/u_simple_shaders.h"
47#include "util/u_gen_mipmap.h"
48
49/* CSMT headers */
50#include "nine_queue.h"
51#include "nine_csmt_helper.h"
52#include "os/os_thread.h"
53
54#define DBG_CHANNEL DBG_DEVICE
55
56/* Nine CSMT */
57
58struct csmt_instruction {
59    int (* func)(struct NineDevice9 *This, struct csmt_instruction *instr);
60};
61
62struct csmt_context {
63    thrd_t worker;
64    struct nine_queue_pool* pool;
65    BOOL terminate;
66    cnd_t event_processed;
67    mtx_t mutex_processed;
68    struct NineDevice9 *device;
69    BOOL processed;
70    BOOL toPause;
71    BOOL hasPaused;
72    mtx_t thread_running;
73    mtx_t thread_resume;
74};
75
76/* Wait for instruction to be processed.
77 * Caller has to ensure that only one thread waits at time.
78 */
79static void
80nine_csmt_wait_processed(struct csmt_context *ctx)
81{
82    mtx_lock(&ctx->mutex_processed);
83    while (!p_atomic_read(&ctx->processed)) {
84        cnd_wait(&ctx->event_processed, &ctx->mutex_processed);
85    }
86    mtx_unlock(&ctx->mutex_processed);
87}
88
89/* CSMT worker thread */
90static
91int
92nine_csmt_worker(void *arg)
93{
94    struct csmt_context *ctx = arg;
95    struct csmt_instruction *instr;
96    DBG("CSMT worker spawned\n");
97
98    u_thread_setname("CSMT-Worker");
99
100    while (1) {
101        nine_queue_wait_flush(ctx->pool);
102        mtx_lock(&ctx->thread_running);
103
104        /* Get instruction. NULL on empty cmdbuf. */
105        while (!p_atomic_read(&ctx->terminate) &&
106               (instr = (struct csmt_instruction *)nine_queue_get(ctx->pool))) {
107
108            /* decode */
109            if (instr->func(ctx->device, instr)) {
110                mtx_lock(&ctx->mutex_processed);
111                p_atomic_set(&ctx->processed, TRUE);
112                cnd_signal(&ctx->event_processed);
113                mtx_unlock(&ctx->mutex_processed);
114            }
115            if (p_atomic_read(&ctx->toPause)) {
116                mtx_unlock(&ctx->thread_running);
117                /* will wait here the thread can be resumed */
118                mtx_lock(&ctx->thread_resume);
119                mtx_lock(&ctx->thread_running);
120                mtx_unlock(&ctx->thread_resume);
121            }
122        }
123
124        mtx_unlock(&ctx->thread_running);
125        if (p_atomic_read(&ctx->terminate)) {
126            mtx_lock(&ctx->mutex_processed);
127            p_atomic_set(&ctx->processed, TRUE);
128            cnd_signal(&ctx->event_processed);
129            mtx_unlock(&ctx->mutex_processed);
130            break;
131        }
132    }
133
134    DBG("CSMT worker destroyed\n");
135    return 0;
136}
137
138/* Create a CSMT context.
139 * Spawns a worker thread.
140 */
141struct csmt_context *
142nine_csmt_create( struct NineDevice9 *This )
143{
144    struct csmt_context *ctx;
145
146    ctx = CALLOC_STRUCT(csmt_context);
147    if (!ctx)
148        return NULL;
149
150    ctx->pool = nine_queue_create();
151    if (!ctx->pool) {
152        FREE(ctx);
153        return NULL;
154    }
155    cnd_init(&ctx->event_processed);
156    (void) mtx_init(&ctx->mutex_processed, mtx_plain);
157    (void) mtx_init(&ctx->thread_running, mtx_plain);
158    (void) mtx_init(&ctx->thread_resume, mtx_plain);
159
160#if defined(DEBUG) || !defined(NDEBUG)
161    u_thread_setname("Main thread");
162#endif
163
164    ctx->device = This;
165
166    ctx->worker = u_thread_create(nine_csmt_worker, ctx);
167    if (!ctx->worker) {
168        nine_queue_delete(ctx->pool);
169        FREE(ctx);
170        return NULL;
171    }
172
173    DBG("Returning context %p\n", ctx);
174
175    return ctx;
176}
177
178static int
179nop_func( struct NineDevice9 *This, struct csmt_instruction *instr )
180{
181    (void) This;
182    (void) instr;
183
184    return 1;
185}
186
187/* Push nop instruction and flush the queue.
188 * Waits for the worker to complete. */
189void
190nine_csmt_process( struct NineDevice9 *device )
191{
192    struct csmt_instruction* instr;
193    struct csmt_context *ctx = device->csmt_ctx;
194
195    if (!device->csmt_active)
196        return;
197
198    if (nine_queue_isempty(ctx->pool))
199        return;
200
201    DBG("device=%p\n", device);
202
203    /* NOP */
204    instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
205    assert(instr);
206    instr->func = nop_func;
207
208    p_atomic_set(&ctx->processed, FALSE);
209    nine_queue_flush(ctx->pool);
210
211    nine_csmt_wait_processed(ctx);
212}
213
214/* Destroys a CSMT context.
215 * Waits for the worker thread to terminate.
216 */
217void
218nine_csmt_destroy( struct NineDevice9 *device, struct csmt_context *ctx )
219{
220    struct csmt_instruction* instr;
221    thrd_t render_thread = ctx->worker;
222
223    DBG("device=%p ctx=%p\n", device, ctx);
224
225    /* Push nop and flush the queue. */
226    instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
227    assert(instr);
228    instr->func = nop_func;
229
230    p_atomic_set(&ctx->processed, FALSE);
231    /* Signal worker to terminate. */
232    p_atomic_set(&ctx->terminate, TRUE);
233    nine_queue_flush(ctx->pool);
234
235    nine_csmt_wait_processed(ctx);
236    nine_queue_delete(ctx->pool);
237
238    mtx_destroy(&ctx->thread_resume);
239    mtx_destroy(&ctx->thread_running);
240
241    mtx_destroy(&ctx->mutex_processed);
242    cnd_destroy(&ctx->event_processed);
243
244    FREE(ctx);
245
246    thrd_join(render_thread, NULL);
247}
248
249static void
250nine_csmt_pause( struct NineDevice9 *device )
251{
252    struct csmt_context *ctx = device->csmt_ctx;
253
254    if (!device->csmt_active)
255        return;
256
257    /* No need to pause the thread */
258    if (nine_queue_no_flushed_work(ctx->pool))
259        return;
260
261    mtx_lock(&ctx->thread_resume);
262    p_atomic_set(&ctx->toPause, TRUE);
263
264    /* Wait the thread is paused */
265    mtx_lock(&ctx->thread_running);
266    ctx->hasPaused = TRUE;
267    p_atomic_set(&ctx->toPause, FALSE);
268}
269
270static void
271nine_csmt_resume( struct NineDevice9 *device )
272{
273    struct csmt_context *ctx = device->csmt_ctx;
274
275    if (!device->csmt_active)
276        return;
277
278    if (!ctx->hasPaused)
279        return;
280
281    ctx->hasPaused = FALSE;
282    mtx_unlock(&ctx->thread_running);
283    mtx_unlock(&ctx->thread_resume);
284}
285
286struct pipe_context *
287nine_context_get_pipe( struct NineDevice9 *device )
288{
289    nine_csmt_process(device);
290    return device->context.pipe;
291}
292
293struct pipe_context *
294nine_context_get_pipe_multithread( struct NineDevice9 *device )
295{
296    struct csmt_context *ctx = device->csmt_ctx;
297
298    if (!device->csmt_active)
299        return device->context.pipe;
300
301    if (!u_thread_is_self(ctx->worker))
302        nine_csmt_process(device);
303
304    return device->context.pipe;
305}
306
307struct pipe_context *
308nine_context_get_pipe_acquire( struct NineDevice9 *device )
309{
310    nine_csmt_pause(device);
311    return device->context.pipe;
312}
313
314void
315nine_context_get_pipe_release( struct NineDevice9 *device )
316{
317    nine_csmt_resume(device);
318}
319
320/* Nine state functions */
321
322/* Check if some states need to be set dirty */
323
324static inline DWORD
325check_multisample(struct NineDevice9 *device)
326{
327    DWORD *rs = device->context.rs;
328    DWORD new_value = (rs[D3DRS_ZENABLE] || rs[D3DRS_STENCILENABLE]) &&
329                      device->context.rt[0]->desc.MultiSampleType >= 1 &&
330                      rs[D3DRS_MULTISAMPLEANTIALIAS];
331    if (rs[NINED3DRS_MULTISAMPLE] != new_value) {
332        rs[NINED3DRS_MULTISAMPLE] = new_value;
333        return NINE_STATE_RASTERIZER;
334    }
335    return 0;
336}
337
338/* State preparation only */
339
340static inline void
341prepare_blend(struct NineDevice9 *device)
342{
343    nine_convert_blend_state(&device->context.pipe_data.blend, device->context.rs);
344    device->context.commit |= NINE_STATE_COMMIT_BLEND;
345}
346
347static inline void
348prepare_dsa(struct NineDevice9 *device)
349{
350    nine_convert_dsa_state(&device->context.pipe_data.dsa, device->context.rs);
351    device->context.commit |= NINE_STATE_COMMIT_DSA;
352}
353
354static inline void
355prepare_rasterizer(struct NineDevice9 *device)
356{
357    nine_convert_rasterizer_state(device, &device->context.pipe_data.rast, device->context.rs);
358    device->context.commit |= NINE_STATE_COMMIT_RASTERIZER;
359}
360
361static void
362prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
363{
364    struct nine_context *context = &device->context;
365
366    if (context->changed.vs_const_f || context->changed.group & NINE_STATE_SWVP) {
367        struct pipe_constant_buffer cb;
368
369        cb.buffer_offset = 0;
370        cb.buffer_size = 4096 * sizeof(float[4]);
371        cb.user_buffer = context->vs_const_f_swvp;
372
373        if (context->vs->lconstf.ranges) {
374            const struct nine_lconstf *lconstf = &(context->vs->lconstf);
375            const struct nine_range *r = lconstf->ranges;
376            unsigned n = 0;
377            float *dst = context->vs_lconstf_temp;
378            float *src = (float *)cb.user_buffer;
379            memcpy(dst, src, cb.buffer_size);
380            while (r) {
381                unsigned p = r->bgn;
382                unsigned c = r->end - r->bgn;
383                memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
384                n += c;
385                r = r->next;
386            }
387            cb.user_buffer = dst;
388        }
389
390        context->pipe_data.cb0_swvp.buffer_offset = cb.buffer_offset;
391        context->pipe_data.cb0_swvp.buffer_size = cb.buffer_size;
392        context->pipe_data.cb0_swvp.user_buffer = cb.user_buffer;
393
394        cb.user_buffer = (char *)cb.user_buffer + 4096 * sizeof(float[4]);
395        context->pipe_data.cb1_swvp.buffer_offset = cb.buffer_offset;
396        context->pipe_data.cb1_swvp.buffer_size = cb.buffer_size;
397        context->pipe_data.cb1_swvp.user_buffer = cb.user_buffer;
398
399        context->changed.vs_const_f = 0;
400    }
401
402    if (context->changed.vs_const_i || context->changed.group & NINE_STATE_SWVP) {
403        struct pipe_constant_buffer cb;
404
405        cb.buffer_offset = 0;
406        cb.buffer_size = 2048 * sizeof(float[4]);
407        cb.user_buffer = context->vs_const_i;
408
409        context->pipe_data.cb2_swvp.buffer_offset = cb.buffer_offset;
410        context->pipe_data.cb2_swvp.buffer_size = cb.buffer_size;
411        context->pipe_data.cb2_swvp.user_buffer = cb.user_buffer;
412        context->changed.vs_const_i = 0;
413    }
414
415    if (context->changed.vs_const_b || context->changed.group & NINE_STATE_SWVP) {
416        struct pipe_constant_buffer cb;
417
418        cb.buffer_offset = 0;
419        cb.buffer_size = 512 * sizeof(float[4]);
420        cb.user_buffer = context->vs_const_b;
421
422        context->pipe_data.cb3_swvp.buffer_offset = cb.buffer_offset;
423        context->pipe_data.cb3_swvp.buffer_size = cb.buffer_size;
424        context->pipe_data.cb3_swvp.user_buffer = cb.user_buffer;
425        context->changed.vs_const_b = 0;
426    }
427
428    context->changed.group &= ~NINE_STATE_VS_CONST;
429    context->commit |= NINE_STATE_COMMIT_CONST_VS;
430}
431
432static void
433prepare_vs_constants_userbuf(struct NineDevice9 *device)
434{
435    struct nine_context *context = &device->context;
436    uint8_t *upload_ptr = NULL;
437    struct pipe_constant_buffer cb;
438    cb.buffer = NULL;
439    cb.buffer_offset = 0;
440    cb.buffer_size = context->cso_shader.vs_const_used_size;
441    cb.user_buffer = context->vs_const_f;
442
443    if (context->swvp) {
444        prepare_vs_constants_userbuf_swvp(device);
445        return;
446    }
447
448    if (context->changed.vs_const_i || context->changed.group & NINE_STATE_SWVP) {
449        int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
450        memcpy(idst, context->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4]));
451        context->changed.vs_const_i = 0;
452    }
453
454    if (context->changed.vs_const_b || context->changed.group & NINE_STATE_SWVP) {
455        int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
456        uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
457        memcpy(bdst, context->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL));
458        context->changed.vs_const_b = 0;
459    }
460
461    if (!cb.buffer_size)
462        return;
463
464    if (context->vs->lconstf.ranges) {
465        /* TODO: Can we make it so that we don't have to copy everything ? */
466        const struct nine_lconstf *lconstf =  &(context->vs->lconstf);
467        const struct nine_range *r = lconstf->ranges;
468        unsigned n = 0;
469        float *dst = context->vs_lconstf_temp;
470        float *src = (float *)cb.user_buffer;
471        memcpy(dst, src, cb.buffer_size);
472        while (r) {
473            unsigned p = r->bgn;
474            unsigned c = r->end - r->bgn;
475            memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
476            n += c;
477            r = r->next;
478        }
479        cb.user_buffer = dst;
480    }
481
482    /* Note: We probably don't want to do separate memcpy to
483     * upload_ptr directly, if we have to copy some constants
484     * at random locations (context->vs->lconstf.ranges),
485     * to have efficient WC. Thus for this case we really want
486     * that intermediate buffer. */
487
488    u_upload_alloc(context->pipe->const_uploader,
489                  0,
490                  cb.buffer_size,
491                  256, /* Be conservative about alignment */
492                  &(cb.buffer_offset),
493                  &(cb.buffer),
494                  (void**)&upload_ptr);
495
496    assert(cb.buffer && upload_ptr);
497
498    if (!context->cso_shader.vs_const_ranges) {
499        memcpy(upload_ptr, cb.user_buffer, cb.buffer_size);
500    } else {
501        unsigned i = 0;
502        unsigned offset = 0;
503        while (context->cso_shader.vs_const_ranges[i*2+1] != 0) {
504            memcpy(upload_ptr+offset,
505                   &((float*)cb.user_buffer)[4*context->cso_shader.vs_const_ranges[i*2]],
506                   context->cso_shader.vs_const_ranges[i*2+1] * sizeof(float[4]));
507            offset += context->cso_shader.vs_const_ranges[i*2+1] * sizeof(float[4]);
508            i++;
509        }
510    }
511
512    u_upload_unmap(context->pipe->const_uploader);
513    cb.user_buffer = NULL;
514
515    /* Free previous resource */
516    pipe_resource_reference(&context->pipe_data.cb_vs.buffer, NULL);
517
518    context->pipe_data.cb_vs = cb;
519    context->changed.vs_const_f = 0;
520
521    context->changed.group &= ~NINE_STATE_VS_CONST;
522    context->commit |= NINE_STATE_COMMIT_CONST_VS;
523}
524
525static void
526prepare_ps_constants_userbuf(struct NineDevice9 *device)
527{
528    struct nine_context *context = &device->context;
529    uint8_t *upload_ptr = NULL;
530    struct pipe_constant_buffer cb;
531    cb.buffer = NULL;
532    cb.buffer_offset = 0;
533    cb.buffer_size = context->cso_shader.ps_const_used_size;
534    cb.user_buffer = context->ps_const_f;
535
536    if (context->changed.ps_const_i) {
537        int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f];
538        memcpy(idst, context->ps_const_i, sizeof(context->ps_const_i));
539        context->changed.ps_const_i = 0;
540    }
541    if (context->changed.ps_const_b) {
542        int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f];
543        uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
544        memcpy(bdst, context->ps_const_b, sizeof(context->ps_const_b));
545        context->changed.ps_const_b = 0;
546    }
547
548    /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
549    if (context->ps->bumpenvmat_needed) {
550        memcpy(context->ps_lconstf_temp, cb.user_buffer, 8 * sizeof(float[4]));
551        memcpy(&context->ps_lconstf_temp[4 * 8], &device->context.bumpmap_vars, sizeof(device->context.bumpmap_vars));
552
553        cb.user_buffer = context->ps_lconstf_temp;
554    }
555
556    if (context->ps->byte_code.version < 0x30 &&
557        context->rs[D3DRS_FOGENABLE]) {
558        float *dst = &context->ps_lconstf_temp[4 * 32];
559        if (cb.user_buffer != context->ps_lconstf_temp) {
560            memcpy(context->ps_lconstf_temp, cb.user_buffer, 32 * sizeof(float[4]));
561            cb.user_buffer = context->ps_lconstf_temp;
562        }
563
564        d3dcolor_to_rgba(dst, context->rs[D3DRS_FOGCOLOR]);
565        if (context->rs[D3DRS_FOGTABLEMODE] == D3DFOG_LINEAR) {
566            dst[4] = asfloat(context->rs[D3DRS_FOGEND]);
567            dst[5] = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
568        } else if (context->rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE) {
569            dst[4] = asfloat(context->rs[D3DRS_FOGDENSITY]);
570        }
571    }
572
573    if (!cb.buffer_size)
574        return;
575
576    u_upload_alloc(context->pipe->const_uploader,
577                  0,
578                  cb.buffer_size,
579                  256, /* Be conservative about alignment */
580                  &(cb.buffer_offset),
581                  &(cb.buffer),
582                  (void**)&upload_ptr);
583
584    assert(cb.buffer && upload_ptr);
585
586    if (!context->cso_shader.ps_const_ranges) {
587        memcpy(upload_ptr, cb.user_buffer, cb.buffer_size);
588    } else {
589        unsigned i = 0;
590        unsigned offset = 0;
591        while (context->cso_shader.ps_const_ranges[i*2+1] != 0) {
592            memcpy(upload_ptr+offset,
593                   &((float*)cb.user_buffer)[4*context->cso_shader.ps_const_ranges[i*2]],
594                   context->cso_shader.ps_const_ranges[i*2+1] * sizeof(float[4]));
595            offset += context->cso_shader.ps_const_ranges[i*2+1] * sizeof(float[4]);
596            i++;
597        }
598    }
599
600    u_upload_unmap(context->pipe->const_uploader);
601    cb.user_buffer = NULL;
602
603    /* Free previous resource */
604    pipe_resource_reference(&context->pipe_data.cb_ps.buffer, NULL);
605
606    context->pipe_data.cb_ps = cb;
607    context->changed.ps_const_f = 0;
608
609    context->changed.group &= ~NINE_STATE_PS_CONST;
610    context->commit |= NINE_STATE_COMMIT_CONST_PS;
611}
612
613static inline uint32_t
614prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
615{
616    struct nine_context *context = &device->context;
617    struct NineVertexShader9 *vs = context->vs;
618    uint32_t changed_group = 0;
619    int has_key_changed = 0;
620
621    if (likely(context->programmable_vs))
622        has_key_changed = NineVertexShader9_UpdateKey(vs, device);
623
624    if (!shader_changed && !has_key_changed)
625        return 0;
626
627    /* likely because we dislike FF */
628    if (likely(context->programmable_vs)) {
629        context->cso_shader.vs = NineVertexShader9_GetVariant(vs,
630                                                              &context->cso_shader.vs_const_ranges,
631                                                              &context->cso_shader.vs_const_used_size);
632    } else {
633        vs = device->ff.vs;
634        context->cso_shader.vs = vs->ff_cso;
635    }
636
637    if (context->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
638        context->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
639        changed_group |= NINE_STATE_RASTERIZER;
640    }
641
642    if ((context->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
643        /* Bound dummy sampler. */
644        changed_group |= NINE_STATE_SAMPLER;
645
646    context->commit |= NINE_STATE_COMMIT_VS;
647    return changed_group;
648}
649
650static inline uint32_t
651prepare_ps(struct NineDevice9 *device, uint8_t shader_changed)
652{
653    struct nine_context *context = &device->context;
654    struct NinePixelShader9 *ps = context->ps;
655    uint32_t changed_group = 0;
656    int has_key_changed = 0;
657
658    if (likely(ps))
659        has_key_changed = NinePixelShader9_UpdateKey(ps, context);
660
661    if (!shader_changed && !has_key_changed)
662        return 0;
663
664    if (likely(ps)) {
665        context->cso_shader.ps = NinePixelShader9_GetVariant(ps,
666                                                             &context->cso_shader.ps_const_ranges,
667                                                             &context->cso_shader.ps_const_used_size);
668    } else {
669        ps = device->ff.ps;
670        context->cso_shader.ps = ps->ff_cso;
671    }
672
673    if ((context->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
674        /* Bound dummy sampler. */
675        changed_group |= NINE_STATE_SAMPLER;
676
677    context->commit |= NINE_STATE_COMMIT_PS;
678    return changed_group;
679}
680
681/* State preparation incremental */
682
683/* State preparation + State commit */
684
685static void
686update_framebuffer(struct NineDevice9 *device, bool is_clear)
687{
688    struct nine_context *context = &device->context;
689    struct pipe_context *pipe = context->pipe;
690    struct pipe_framebuffer_state *fb = &context->pipe_data.fb;
691    unsigned i;
692    struct NineSurface9 *rt0 = context->rt[0];
693    unsigned w = rt0->desc.Width;
694    unsigned h = rt0->desc.Height;
695    unsigned nr_samples = rt0->base.info.nr_samples;
696    unsigned ps_mask = context->ps ? context->ps->rt_mask : 1;
697    unsigned mask = is_clear ? 0xf : ps_mask;
698    const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
699
700    DBG("\n");
701
702    context->rt_mask = 0x0;
703    fb->nr_cbufs = 0;
704
705    /* all render targets must have the same size and the depth buffer must be
706     * bigger. Multisample has to match, according to spec. But some apps do
707     * things wrong there, and no error is returned. The behaviour they get
708     * apparently is that depth buffer is disabled if it doesn't match.
709     * Surely the same for render targets. */
710
711    /* Special case: D3DFMT_NULL is used to bound no real render target,
712     * but render to depth buffer. We have to not take into account the render
713     * target info. TODO: know what should happen when there are several render targers
714     * and the first one is D3DFMT_NULL */
715    if (rt0->desc.Format == D3DFMT_NULL && context->ds) {
716        w = context->ds->desc.Width;
717        h = context->ds->desc.Height;
718        nr_samples = context->ds->base.info.nr_samples;
719    }
720
721    for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
722        struct NineSurface9 *rt = context->rt[i];
723
724        if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) &&
725            rt->desc.Width == w && rt->desc.Height == h &&
726            rt->base.info.nr_samples == nr_samples) {
727            fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB);
728            context->rt_mask |= 1 << i;
729            fb->nr_cbufs = i + 1;
730        } else {
731            /* Color outputs must match RT slot,
732             * drivers will have to handle NULL entries for GL, too.
733             */
734            fb->cbufs[i] = NULL;
735        }
736    }
737
738    if (context->ds && context->ds->desc.Width >= w &&
739        context->ds->desc.Height >= h &&
740        context->ds->base.info.nr_samples == nr_samples) {
741        fb->zsbuf = NineSurface9_GetSurface(context->ds, 0);
742    } else {
743        fb->zsbuf = NULL;
744    }
745
746    fb->width = w;
747    fb->height = h;
748
749    pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
750
751    if (is_clear && context->rt_mask == ps_mask)
752        context->changed.group &= ~NINE_STATE_FB;
753}
754
755static void
756update_viewport(struct NineDevice9 *device)
757{
758    struct nine_context *context = &device->context;
759    const D3DVIEWPORT9 *vport = &context->viewport;
760    struct pipe_viewport_state pvport;
761
762    /* D3D coordinates are:
763     * -1 .. +1 for X,Y and
764     *  0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
765     */
766    pvport.scale[0] = (float)vport->Width * 0.5f;
767    pvport.scale[1] = (float)vport->Height * -0.5f;
768    pvport.scale[2] = vport->MaxZ - vport->MinZ;
769    pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X;
770    pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
771    pvport.translate[2] = vport->MinZ;
772
773    /* We found R600 and SI cards have some imprecision
774     * on the barycentric coordinates used for interpolation.
775     * Some shaders rely on having something precise.
776     * We found that the proprietary driver has the imprecision issue,
777     * except when the render target width and height are powers of two.
778     * It is using some sort of workaround for these cases
779     * which covers likely all the cases the applications rely
780     * on something precise.
781     * We haven't found the workaround, but it seems like it's better
782     * for applications if the imprecision is biased towards infinity
783     * instead of -infinity (which is what measured). So shift slightly
784     * the viewport: not enough to change rasterization result (in particular
785     * for multisampling), but enough to make the imprecision biased
786     * towards infinity. We do this shift only if render target width and
787     * height are powers of two.
788     * Solves 'red shadows' bug on UE3 games.
789     */
790    if (device->driver_bugs.buggy_barycentrics &&
791        ((vport->Width & (vport->Width-1)) == 0) &&
792        ((vport->Height & (vport->Height-1)) == 0)) {
793        pvport.translate[0] -= 1.0f / 128.0f;
794        pvport.translate[1] -= 1.0f / 128.0f;
795    }
796
797    cso_set_viewport(context->cso, &pvport);
798}
799
800/* Loop through VS inputs and pick the vertex elements with the declared
801 * usage from the vertex declaration, then insert the instance divisor from
802 * the stream source frequency setting.
803 */
804static void
805update_vertex_elements(struct NineDevice9 *device)
806{
807    struct nine_context *context = &device->context;
808    const struct NineVertexDeclaration9 *vdecl = device->context.vdecl;
809    const struct NineVertexShader9 *vs;
810    unsigned n, b, i;
811    int index;
812    char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
813    char used_streams[device->caps.MaxStreams];
814    int dummy_vbo_stream = -1;
815    BOOL need_dummy_vbo = FALSE;
816    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
817
818    context->stream_usage_mask = 0;
819    memset(vdecl_index_map, -1, 16);
820    memset(used_streams, 0, device->caps.MaxStreams);
821    vs = context->programmable_vs ? context->vs : device->ff.vs;
822
823    if (vdecl) {
824        for (n = 0; n < vs->num_inputs; ++n) {
825            DBG("looking up input %u (usage %u) from vdecl(%p)\n",
826                n, vs->input_map[n].ndecl, vdecl);
827
828            for (i = 0; i < vdecl->nelems; i++) {
829                if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
830                    vdecl_index_map[n] = i;
831                    used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
832                    break;
833                }
834            }
835            if (vdecl_index_map[n] < 0)
836                need_dummy_vbo = TRUE;
837        }
838    } else {
839        /* No vertex declaration. Likely will never happen in practice,
840         * but we need not crash on this */
841        need_dummy_vbo = TRUE;
842    }
843
844    if (need_dummy_vbo) {
845        for (i = 0; i < device->caps.MaxStreams; i++ ) {
846            if (!used_streams[i]) {
847                dummy_vbo_stream = i;
848                break;
849            }
850        }
851    }
852    /* there are less vertex shader inputs than stream slots,
853     * so if we need a slot for the dummy vbo, we should have found one */
854    assert (!need_dummy_vbo || dummy_vbo_stream != -1);
855
856    for (n = 0; n < vs->num_inputs; ++n) {
857        index = vdecl_index_map[n];
858        if (index >= 0) {
859            ve[n] = vdecl->elems[index];
860            b = ve[n].vertex_buffer_index;
861            context->stream_usage_mask |= 1 << b;
862            /* XXX wine just uses 1 here: */
863            if (context->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
864                ve[n].instance_divisor = context->stream_freq[b] & 0x7FFFFF;
865        } else {
866            /* if the vertex declaration is incomplete compared to what the
867             * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
868             * This is not precised by the spec, but is the behaviour
869             * tested on win */
870            ve[n].vertex_buffer_index = dummy_vbo_stream;
871            ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
872            ve[n].src_offset = 0;
873            ve[n].instance_divisor = 0;
874        }
875    }
876
877    if (context->dummy_vbo_bound_at != dummy_vbo_stream) {
878        if (context->dummy_vbo_bound_at >= 0)
879            context->changed.vtxbuf |= 1 << context->dummy_vbo_bound_at;
880        if (dummy_vbo_stream >= 0) {
881            context->changed.vtxbuf |= 1 << dummy_vbo_stream;
882            context->vbo_bound_done = FALSE;
883        }
884        context->dummy_vbo_bound_at = dummy_vbo_stream;
885    }
886
887    cso_set_vertex_elements(context->cso, vs->num_inputs, ve);
888}
889
890static void
891update_vertex_buffers(struct NineDevice9 *device)
892{
893    struct nine_context *context = &device->context;
894    struct pipe_context *pipe = context->pipe;
895    struct pipe_vertex_buffer dummy_vtxbuf;
896    uint32_t mask = context->changed.vtxbuf;
897    unsigned i;
898
899    DBG("mask=%x\n", mask);
900
901    if (context->dummy_vbo_bound_at >= 0) {
902        if (!context->vbo_bound_done) {
903            dummy_vtxbuf.buffer.resource = device->dummy_vbo;
904            dummy_vtxbuf.stride = 0;
905            dummy_vtxbuf.is_user_buffer = false;
906            dummy_vtxbuf.buffer_offset = 0;
907            pipe->set_vertex_buffers(pipe, context->dummy_vbo_bound_at,
908                                     1, &dummy_vtxbuf);
909            context->vbo_bound_done = TRUE;
910        }
911        mask &= ~(1 << context->dummy_vbo_bound_at);
912    }
913
914    for (i = 0; mask; mask >>= 1, ++i) {
915        if (mask & 1) {
916            if (context->vtxbuf[i].buffer.resource)
917                pipe->set_vertex_buffers(pipe, i, 1, &context->vtxbuf[i]);
918            else
919                pipe->set_vertex_buffers(pipe, i, 1, NULL);
920        }
921    }
922
923    context->changed.vtxbuf = 0;
924}
925
926static inline boolean
927update_sampler_derived(struct nine_context *context, unsigned s)
928{
929    boolean changed = FALSE;
930
931    if (context->samp[s][NINED3DSAMP_SHADOW] != context->texture[s].shadow) {
932        changed = TRUE;
933        context->samp[s][NINED3DSAMP_SHADOW] = context->texture[s].shadow;
934    }
935
936    if (context->samp[s][NINED3DSAMP_CUBETEX] !=
937        (context->texture[s].type == D3DRTYPE_CUBETEXTURE)) {
938        changed = TRUE;
939        context->samp[s][NINED3DSAMP_CUBETEX] =
940                context->texture[s].type == D3DRTYPE_CUBETEXTURE;
941    }
942
943    if (context->samp[s][D3DSAMP_MIPFILTER] != D3DTEXF_NONE) {
944        int lod = context->samp[s][D3DSAMP_MAXMIPLEVEL] - context->texture[s].lod;
945        if (lod < 0)
946            lod = 0;
947        if (context->samp[s][NINED3DSAMP_MINLOD] != lod) {
948            changed = TRUE;
949            context->samp[s][NINED3DSAMP_MINLOD] = lod;
950        }
951    } else {
952        context->changed.sampler[s] &= ~0x300; /* lod changes irrelevant */
953    }
954
955    return changed;
956}
957
958/* TODO: add sRGB override to pipe_sampler_state ? */
959static void
960update_textures_and_samplers(struct NineDevice9 *device)
961{
962    struct nine_context *context = &device->context;
963    struct pipe_sampler_view *view[NINE_MAX_SAMPLERS];
964    unsigned num_textures;
965    unsigned i;
966    boolean commit_samplers;
967    uint16_t sampler_mask = context->ps ? context->ps->sampler_mask :
968                            device->ff.ps->sampler_mask;
969
970    /* TODO: Can we reduce iterations here ? */
971
972    commit_samplers = FALSE;
973    context->bound_samplers_mask_ps = 0;
974    for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) {
975        const unsigned s = NINE_SAMPLER_PS(i);
976        int sRGB;
977
978        if (!context->texture[s].enabled && !(sampler_mask & (1 << i))) {
979            view[i] = NULL;
980            continue;
981        }
982
983        if (context->texture[s].enabled) {
984            sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
985
986            view[i] = context->texture[s].view[sRGB];
987            num_textures = i + 1;
988
989            if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) {
990                context->changed.sampler[s] = 0;
991                commit_samplers = TRUE;
992                nine_convert_sampler_state(context->cso, s, context->samp[s]);
993            }
994        } else {
995            /* Bind dummy sampler. We do not bind dummy sampler when
996             * it is not needed because it could add overhead. The
997             * dummy sampler should have r=g=b=0 and a=1. We do not
998             * unbind dummy sampler directly when they are not needed
999             * anymore, but they're going to be removed as long as texture
1000             * or sampler states are changed. */
1001            view[i] = device->dummy_sampler_view;
1002            num_textures = i + 1;
1003
1004            cso_single_sampler(context->cso, PIPE_SHADER_FRAGMENT,
1005                               s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state);
1006
1007            commit_samplers = TRUE;
1008            context->changed.sampler[s] = ~0;
1009        }
1010
1011        context->bound_samplers_mask_ps |= (1 << s);
1012    }
1013
1014    cso_set_sampler_views(context->cso, PIPE_SHADER_FRAGMENT, num_textures, view);
1015
1016    if (commit_samplers)
1017        cso_single_sampler_done(context->cso, PIPE_SHADER_FRAGMENT);
1018
1019    commit_samplers = FALSE;
1020    sampler_mask = context->programmable_vs ? context->vs->sampler_mask : 0;
1021    context->bound_samplers_mask_vs = 0;
1022    for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) {
1023        const unsigned s = NINE_SAMPLER_VS(i);
1024        int sRGB;
1025
1026        if (!context->texture[s].enabled && !(sampler_mask & (1 << i))) {
1027            view[i] = NULL;
1028            continue;
1029        }
1030
1031        if (context->texture[s].enabled) {
1032            sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
1033
1034            view[i] = context->texture[s].view[sRGB];
1035            num_textures = i + 1;
1036
1037            if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) {
1038                context->changed.sampler[s] = 0;
1039                commit_samplers = TRUE;
1040                nine_convert_sampler_state(context->cso, s, context->samp[s]);
1041            }
1042        } else {
1043            /* Bind dummy sampler. We do not bind dummy sampler when
1044             * it is not needed because it could add overhead. The
1045             * dummy sampler should have r=g=b=0 and a=1. We do not
1046             * unbind dummy sampler directly when they are not needed
1047             * anymore, but they're going to be removed as long as texture
1048             * or sampler states are changed. */
1049            view[i] = device->dummy_sampler_view;
1050            num_textures = i + 1;
1051
1052            cso_single_sampler(context->cso, PIPE_SHADER_VERTEX,
1053                               s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state);
1054
1055            commit_samplers = TRUE;
1056            context->changed.sampler[s] = ~0;
1057        }
1058
1059        context->bound_samplers_mask_vs |= (1 << i);
1060    }
1061
1062    cso_set_sampler_views(context->cso, PIPE_SHADER_VERTEX, num_textures, view);
1063
1064    if (commit_samplers)
1065        cso_single_sampler_done(context->cso, PIPE_SHADER_VERTEX);
1066}
1067
1068/* State commit only */
1069
1070static inline void
1071commit_blend(struct NineDevice9 *device)
1072{
1073    struct nine_context *context = &device->context;
1074
1075    cso_set_blend(context->cso, &context->pipe_data.blend);
1076}
1077
1078static inline void
1079commit_dsa(struct NineDevice9 *device)
1080{
1081    struct nine_context *context = &device->context;
1082
1083    cso_set_depth_stencil_alpha(context->cso, &context->pipe_data.dsa);
1084}
1085
1086static inline void
1087commit_scissor(struct NineDevice9 *device)
1088{
1089    struct nine_context *context = &device->context;
1090    struct pipe_context *pipe = context->pipe;
1091
1092    pipe->set_scissor_states(pipe, 0, 1, &context->scissor);
1093}
1094
1095static inline void
1096commit_rasterizer(struct NineDevice9 *device)
1097{
1098    struct nine_context *context = &device->context;
1099
1100    cso_set_rasterizer(context->cso, &context->pipe_data.rast);
1101}
1102
1103static inline void
1104commit_vs_constants(struct NineDevice9 *device)
1105{
1106    struct nine_context *context = &device->context;
1107    struct pipe_context *pipe = context->pipe;
1108
1109    if (unlikely(!context->programmable_vs))
1110        pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->pipe_data.cb_vs_ff);
1111    else {
1112        if (context->swvp) {
1113            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->pipe_data.cb0_swvp);
1114            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 1, &context->pipe_data.cb1_swvp);
1115            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 2, &context->pipe_data.cb2_swvp);
1116            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 3, &context->pipe_data.cb3_swvp);
1117        } else {
1118            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->pipe_data.cb_vs);
1119        }
1120    }
1121}
1122
1123static inline void
1124commit_ps_constants(struct NineDevice9 *device)
1125{
1126    struct nine_context *context = &device->context;
1127    struct pipe_context *pipe = context->pipe;
1128
1129    if (unlikely(!context->ps))
1130        pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->pipe_data.cb_ps_ff);
1131    else
1132        pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->pipe_data.cb_ps);
1133}
1134
1135static inline void
1136commit_vs(struct NineDevice9 *device)
1137{
1138    struct nine_context *context = &device->context;
1139
1140    context->pipe->bind_vs_state(context->pipe, context->cso_shader.vs);
1141}
1142
1143
1144static inline void
1145commit_ps(struct NineDevice9 *device)
1146{
1147    struct nine_context *context = &device->context;
1148
1149    context->pipe->bind_fs_state(context->pipe, context->cso_shader.ps);
1150}
1151/* State Update */
1152
1153#define NINE_STATE_SHADER_CHANGE_VS \
1154   (NINE_STATE_VS |         \
1155    NINE_STATE_TEXTURE |    \
1156    NINE_STATE_VS_PARAMS_MISC | \
1157    NINE_STATE_SWVP)
1158
1159#define NINE_STATE_SHADER_CHANGE_PS \
1160   (NINE_STATE_PS |         \
1161    NINE_STATE_TEXTURE |    \
1162    NINE_STATE_PS_PARAMS_MISC)
1163
1164#define NINE_STATE_FREQUENT \
1165   (NINE_STATE_RASTERIZER | \
1166    NINE_STATE_TEXTURE |    \
1167    NINE_STATE_SAMPLER |    \
1168    NINE_STATE_VS_CONST |   \
1169    NINE_STATE_PS_CONST |   \
1170    NINE_STATE_MULTISAMPLE)
1171
1172#define NINE_STATE_COMMON \
1173   (NINE_STATE_FB |       \
1174    NINE_STATE_BLEND |    \
1175    NINE_STATE_DSA |      \
1176    NINE_STATE_VIEWPORT | \
1177    NINE_STATE_VDECL |    \
1178    NINE_STATE_IDXBUF |   \
1179    NINE_STATE_STREAMFREQ)
1180
1181#define NINE_STATE_RARE      \
1182   (NINE_STATE_SCISSOR |     \
1183    NINE_STATE_BLEND_COLOR | \
1184    NINE_STATE_STENCIL_REF | \
1185    NINE_STATE_SAMPLE_MASK)
1186
1187static void
1188nine_update_state(struct NineDevice9 *device)
1189{
1190    struct nine_context *context = &device->context;
1191    struct pipe_context *pipe = context->pipe;
1192    uint32_t group;
1193
1194    DBG("changed state groups: %x\n", context->changed.group);
1195
1196    /* NOTE: We may want to use the cso cache for everything, or let
1197     * NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't
1198     * have to care about state being clobbered here and could merge this back
1199     * into update_textures. Except, we also need to re-validate textures that
1200     * may be dirty anyway, even if no texture bindings changed.
1201     */
1202
1203    /* ff_update may change VS/PS dirty bits */
1204    if (unlikely(!context->programmable_vs || !context->ps))
1205        nine_ff_update(device);
1206    group = context->changed.group;
1207
1208    if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) {
1209        if (group & NINE_STATE_SHADER_CHANGE_VS)
1210            group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/
1211        if (group & NINE_STATE_SHADER_CHANGE_PS)
1212            group |= prepare_ps(device, (group & NINE_STATE_PS) != 0);
1213    }
1214
1215    if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) {
1216        if (group & NINE_STATE_FB)
1217            update_framebuffer(device, FALSE);
1218        if (group & NINE_STATE_BLEND)
1219            prepare_blend(device);
1220        if (group & NINE_STATE_DSA)
1221            prepare_dsa(device);
1222        if (group & NINE_STATE_VIEWPORT)
1223            update_viewport(device);
1224        if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ))
1225            update_vertex_elements(device);
1226    }
1227
1228    if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS | NINE_STATE_SWVP))) {
1229        if (group & NINE_STATE_MULTISAMPLE)
1230            group |= check_multisample(device);
1231        if (group & NINE_STATE_RASTERIZER)
1232            prepare_rasterizer(device);
1233        if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
1234            update_textures_and_samplers(device);
1235        if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS | NINE_STATE_SWVP)) && context->programmable_vs)
1236            prepare_vs_constants_userbuf(device);
1237        if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && context->ps)
1238            prepare_ps_constants_userbuf(device);
1239    }
1240
1241    if (context->changed.vtxbuf)
1242        update_vertex_buffers(device);
1243
1244    if (context->commit & NINE_STATE_COMMIT_BLEND)
1245        commit_blend(device);
1246    if (context->commit & NINE_STATE_COMMIT_DSA)
1247        commit_dsa(device);
1248    if (context->commit & NINE_STATE_COMMIT_RASTERIZER)
1249        commit_rasterizer(device);
1250    if (context->commit & NINE_STATE_COMMIT_CONST_VS)
1251        commit_vs_constants(device);
1252    if (context->commit & NINE_STATE_COMMIT_CONST_PS)
1253        commit_ps_constants(device);
1254    if (context->commit & NINE_STATE_COMMIT_VS)
1255        commit_vs(device);
1256    if (context->commit & NINE_STATE_COMMIT_PS)
1257        commit_ps(device);
1258
1259    context->commit = 0;
1260
1261    if (unlikely(context->changed.ucp)) {
1262        pipe->set_clip_state(pipe, &context->clip);
1263        context->changed.ucp = FALSE;
1264    }
1265
1266    if (unlikely(group & NINE_STATE_RARE)) {
1267        if (group & NINE_STATE_SCISSOR)
1268            commit_scissor(device);
1269        if (group & NINE_STATE_BLEND_COLOR) {
1270            struct pipe_blend_color color;
1271            d3dcolor_to_rgba(&color.color[0], context->rs[D3DRS_BLENDFACTOR]);
1272            pipe->set_blend_color(pipe, &color);
1273        }
1274        if (group & NINE_STATE_SAMPLE_MASK) {
1275            if (context->rt[0]->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE) {
1276                pipe->set_sample_mask(pipe, ~0);
1277            } else {
1278                pipe->set_sample_mask(pipe, context->rs[D3DRS_MULTISAMPLEMASK]);
1279            }
1280        }
1281        if (group & NINE_STATE_STENCIL_REF) {
1282            struct pipe_stencil_ref ref;
1283            ref.ref_value[0] = context->rs[D3DRS_STENCILREF];
1284            ref.ref_value[1] = ref.ref_value[0];
1285            pipe->set_stencil_ref(pipe, &ref);
1286        }
1287    }
1288
1289    context->changed.group &=
1290        (NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST);
1291
1292    DBG("finished\n");
1293}
1294
1295#define RESZ_CODE 0x7fa05000
1296
1297static void
1298NineDevice9_ResolveZ( struct NineDevice9 *device )
1299{
1300    struct nine_context *context = &device->context;
1301    const struct util_format_description *desc;
1302    struct NineSurface9 *source = context->ds;
1303    struct pipe_resource *src, *dst;
1304    struct pipe_blit_info blit;
1305
1306    DBG("RESZ resolve\n");
1307
1308    if (!source || !context->texture[0].enabled ||
1309        context->texture[0].type != D3DRTYPE_TEXTURE)
1310        return;
1311
1312    src = source->base.resource;
1313    dst = context->texture[0].resource;
1314
1315    if (!src || !dst)
1316        return;
1317
1318    /* check dst is depth format. we know already for src */
1319    desc = util_format_description(dst->format);
1320    if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1321        return;
1322
1323    memset(&blit, 0, sizeof(blit));
1324    blit.src.resource = src;
1325    blit.src.level = 0;
1326    blit.src.format = src->format;
1327    blit.src.box.z = 0;
1328    blit.src.box.depth = 1;
1329    blit.src.box.x = 0;
1330    blit.src.box.y = 0;
1331    blit.src.box.width = src->width0;
1332    blit.src.box.height = src->height0;
1333
1334    blit.dst.resource = dst;
1335    blit.dst.level = 0;
1336    blit.dst.format = dst->format;
1337    blit.dst.box.z = 0;
1338    blit.dst.box.depth = 1;
1339    blit.dst.box.x = 0;
1340    blit.dst.box.y = 0;
1341    blit.dst.box.width = dst->width0;
1342    blit.dst.box.height = dst->height0;
1343
1344    blit.mask = PIPE_MASK_ZS;
1345    blit.filter = PIPE_TEX_FILTER_NEAREST;
1346    blit.scissor_enable = FALSE;
1347
1348    context->pipe->blit(context->pipe, &blit);
1349}
1350
1351#define ALPHA_TO_COVERAGE_ENABLE   MAKEFOURCC('A', '2', 'M', '1')
1352#define ALPHA_TO_COVERAGE_DISABLE  MAKEFOURCC('A', '2', 'M', '0')
1353
1354/* Nine_context functions.
1355 * Serialized through CSMT macros.
1356 */
1357
1358static void
1359nine_context_set_texture_apply(struct NineDevice9 *device,
1360                               DWORD stage,
1361                               BOOL enabled,
1362                               BOOL shadow,
1363                               DWORD lod,
1364                               D3DRESOURCETYPE type,
1365                               uint8_t pstype,
1366                               struct pipe_resource *res,
1367                               struct pipe_sampler_view *view0,
1368                               struct pipe_sampler_view *view1);
1369static void
1370nine_context_set_stream_source_apply(struct NineDevice9 *device,
1371                                    UINT StreamNumber,
1372                                    struct pipe_resource *res,
1373                                    UINT OffsetInBytes,
1374                                    UINT Stride);
1375
1376static void
1377nine_context_set_indices_apply(struct NineDevice9 *device,
1378                               struct pipe_resource *res,
1379                               UINT IndexSize,
1380                               UINT OffsetInBytes);
1381
1382static void
1383nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device,
1384                                                     UINT StartRegister,
1385                                                     const int *pConstantData,
1386                                                     unsigned pConstantData_size,
1387                                                     UINT Vector4iCount);
1388
1389CSMT_ITEM_NO_WAIT(nine_context_set_render_state,
1390                  ARG_VAL(D3DRENDERSTATETYPE, State),
1391                  ARG_VAL(DWORD, Value))
1392{
1393    struct nine_context *context = &device->context;
1394
1395    /* Amd hacks (equivalent to GL extensions) */
1396    if (unlikely(State == D3DRS_POINTSIZE)) {
1397        if (Value == RESZ_CODE) {
1398            NineDevice9_ResolveZ(device);
1399            return;
1400        }
1401
1402        if (Value == ALPHA_TO_COVERAGE_ENABLE ||
1403            Value == ALPHA_TO_COVERAGE_DISABLE) {
1404            context->rs[NINED3DRS_ALPHACOVERAGE] = (Value == ALPHA_TO_COVERAGE_ENABLE);
1405            context->changed.group |= NINE_STATE_BLEND;
1406            return;
1407        }
1408    }
1409
1410    /* NV hack */
1411    if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
1412        if (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && context->rs[NINED3DRS_ALPHACOVERAGE])) {
1413            context->rs[NINED3DRS_ALPHACOVERAGE] = (Value == D3DFMT_ATOC) ? 3 : 0;
1414            context->rs[NINED3DRS_ALPHACOVERAGE] &= context->rs[D3DRS_ALPHATESTENABLE] ? 3 : 2;
1415            context->changed.group |= NINE_STATE_BLEND;
1416            return;
1417        }
1418    }
1419    if (unlikely(State == D3DRS_ALPHATESTENABLE && (context->rs[NINED3DRS_ALPHACOVERAGE] & 2))) {
1420        DWORD alphacoverage_prev = context->rs[NINED3DRS_ALPHACOVERAGE];
1421        context->rs[NINED3DRS_ALPHACOVERAGE] = (Value ? 3 : 2);
1422        if (context->rs[NINED3DRS_ALPHACOVERAGE] != alphacoverage_prev)
1423            context->changed.group |= NINE_STATE_BLEND;
1424    }
1425
1426    context->rs[State] = nine_fix_render_state_value(State, Value);
1427    context->changed.group |= nine_render_state_group[State];
1428}
1429
1430CSMT_ITEM_NO_WAIT(nine_context_set_texture_apply,
1431                  ARG_VAL(DWORD, stage),
1432                  ARG_VAL(BOOL, enabled),
1433                  ARG_VAL(BOOL, shadow),
1434                  ARG_VAL(DWORD, lod),
1435                  ARG_VAL(D3DRESOURCETYPE, type),
1436                  ARG_VAL(uint8_t, pstype),
1437                  ARG_BIND_RES(struct pipe_resource, res),
1438                  ARG_BIND_VIEW(struct pipe_sampler_view, view0),
1439                  ARG_BIND_VIEW(struct pipe_sampler_view, view1))
1440{
1441    struct nine_context *context = &device->context;
1442
1443    context->texture[stage].enabled = enabled;
1444    context->samplers_shadow &= ~(1 << stage);
1445    context->samplers_shadow |= shadow << stage;
1446    context->texture[stage].shadow = shadow;
1447    context->texture[stage].lod = lod;
1448    context->texture[stage].type = type;
1449    context->texture[stage].pstype = pstype;
1450    pipe_resource_reference(&context->texture[stage].resource, res);
1451    pipe_sampler_view_reference(&context->texture[stage].view[0], view0);
1452    pipe_sampler_view_reference(&context->texture[stage].view[1], view1);
1453
1454    context->changed.group |= NINE_STATE_TEXTURE;
1455}
1456
1457void
1458nine_context_set_texture(struct NineDevice9 *device,
1459                         DWORD Stage,
1460                         struct NineBaseTexture9 *tex)
1461{
1462    BOOL enabled = FALSE;
1463    BOOL shadow = FALSE;
1464    DWORD lod = 0;
1465    D3DRESOURCETYPE type = D3DRTYPE_TEXTURE;
1466    uint8_t pstype = 0;
1467    struct pipe_resource *res = NULL;
1468    struct pipe_sampler_view *view0 = NULL, *view1 = NULL;
1469
1470    /* For managed pool, the data can be initially incomplete.
1471     * In that case, the texture is rebound later
1472     * (in NineBaseTexture9_Validate/NineBaseTexture9_UploadSelf). */
1473    if (tex && tex->base.resource) {
1474        enabled = TRUE;
1475        shadow = tex->shadow;
1476        lod = tex->managed.lod;
1477        type = tex->base.type;
1478        pstype = tex->pstype;
1479        res = tex->base.resource;
1480        view0 = NineBaseTexture9_GetSamplerView(tex, 0);
1481        view1 = NineBaseTexture9_GetSamplerView(tex, 1);
1482    }
1483
1484    nine_context_set_texture_apply(device, Stage, enabled,
1485                                   shadow, lod, type, pstype,
1486                                   res, view0, view1);
1487}
1488
1489CSMT_ITEM_NO_WAIT(nine_context_set_sampler_state,
1490                  ARG_VAL(DWORD, Sampler),
1491                  ARG_VAL(D3DSAMPLERSTATETYPE, Type),
1492                  ARG_VAL(DWORD, Value))
1493{
1494    struct nine_context *context = &device->context;
1495
1496    if (unlikely(!nine_check_sampler_state_value(Type, Value)))
1497        return;
1498
1499    context->samp[Sampler][Type] = Value;
1500    context->changed.group |= NINE_STATE_SAMPLER;
1501    context->changed.sampler[Sampler] |= 1 << Type;
1502}
1503
1504CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_apply,
1505                  ARG_VAL(UINT, StreamNumber),
1506                  ARG_BIND_RES(struct pipe_resource, res),
1507                  ARG_VAL(UINT, OffsetInBytes),
1508                  ARG_VAL(UINT, Stride))
1509{
1510    struct nine_context *context = &device->context;
1511    const unsigned i = StreamNumber;
1512
1513    context->vtxbuf[i].stride = Stride;
1514    context->vtxbuf[i].buffer_offset = OffsetInBytes;
1515    pipe_resource_reference(&context->vtxbuf[i].buffer.resource, res);
1516
1517    context->changed.vtxbuf |= 1 << StreamNumber;
1518}
1519
1520void
1521nine_context_set_stream_source(struct NineDevice9 *device,
1522                               UINT StreamNumber,
1523                               struct NineVertexBuffer9 *pVBuf9,
1524                               UINT OffsetInBytes,
1525                               UINT Stride)
1526{
1527    struct pipe_resource *res = NULL;
1528    unsigned offset = 0;
1529
1530    if (pVBuf9)
1531        res = NineVertexBuffer9_GetResource(pVBuf9, &offset);
1532    /* in the future when there is internal offset, add it
1533     * to OffsetInBytes */
1534
1535    nine_context_set_stream_source_apply(device, StreamNumber,
1536                                         res, offset + OffsetInBytes,
1537                                         Stride);
1538}
1539
1540CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_freq,
1541                  ARG_VAL(UINT, StreamNumber),
1542                  ARG_VAL(UINT, Setting))
1543{
1544    struct nine_context *context = &device->context;
1545
1546    context->stream_freq[StreamNumber] = Setting;
1547
1548    if (Setting & D3DSTREAMSOURCE_INSTANCEDATA)
1549        context->stream_instancedata_mask |= 1 << StreamNumber;
1550    else
1551        context->stream_instancedata_mask &= ~(1 << StreamNumber);
1552
1553    if (StreamNumber != 0)
1554        context->changed.group |= NINE_STATE_STREAMFREQ;
1555}
1556
1557CSMT_ITEM_NO_WAIT(nine_context_set_indices_apply,
1558                  ARG_BIND_RES(struct pipe_resource, res),
1559                  ARG_VAL(UINT, IndexSize),
1560                  ARG_VAL(UINT, OffsetInBytes))
1561{
1562    struct nine_context *context = &device->context;
1563
1564    context->index_size = IndexSize;
1565    context->index_offset = OffsetInBytes;
1566    pipe_resource_reference(&context->idxbuf, res);
1567
1568    context->changed.group |= NINE_STATE_IDXBUF;
1569}
1570
1571void
1572nine_context_set_indices(struct NineDevice9 *device,
1573                         struct NineIndexBuffer9 *idxbuf)
1574{
1575    struct pipe_resource *res = NULL;
1576    UINT IndexSize = 0;
1577    unsigned OffsetInBytes = 0;
1578
1579    if (idxbuf) {
1580        res = NineIndexBuffer9_GetBuffer(idxbuf, &OffsetInBytes);
1581        IndexSize = idxbuf->index_size;
1582    }
1583
1584    nine_context_set_indices_apply(device, res, IndexSize, OffsetInBytes);
1585}
1586
1587CSMT_ITEM_NO_WAIT(nine_context_set_vertex_declaration,
1588                  ARG_BIND_REF(struct NineVertexDeclaration9, vdecl))
1589{
1590    struct nine_context *context = &device->context;
1591    BOOL was_programmable_vs = context->programmable_vs;
1592
1593    nine_bind(&context->vdecl, vdecl);
1594
1595    context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
1596    if (was_programmable_vs != context->programmable_vs) {
1597        context->commit |= NINE_STATE_COMMIT_CONST_VS;
1598        context->changed.group |= NINE_STATE_VS;
1599    }
1600
1601    context->changed.group |= NINE_STATE_VDECL;
1602}
1603
1604CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader,
1605                  ARG_BIND_REF(struct NineVertexShader9, pShader))
1606{
1607    struct nine_context *context = &device->context;
1608    BOOL was_programmable_vs = context->programmable_vs;
1609
1610    nine_bind(&context->vs, pShader);
1611
1612    context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
1613
1614    /* ff -> non-ff: commit back non-ff constants */
1615    if (!was_programmable_vs && context->programmable_vs)
1616        context->commit |= NINE_STATE_COMMIT_CONST_VS;
1617
1618    context->changed.group |= NINE_STATE_VS;
1619}
1620
1621CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_f,
1622                  ARG_VAL(UINT, StartRegister),
1623                  ARG_MEM(float, pConstantData),
1624                  ARG_MEM_SIZE(unsigned, pConstantData_size),
1625                  ARG_VAL(UINT, Vector4fCount))
1626{
1627    struct nine_context *context = &device->context;
1628    float *vs_const_f = device->may_swvp ? context->vs_const_f_swvp : context->vs_const_f;
1629
1630    memcpy(&vs_const_f[StartRegister * 4],
1631           pConstantData,
1632           pConstantData_size);
1633
1634    if (device->may_swvp) {
1635        Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister;
1636        if (StartRegister < NINE_MAX_CONST_F)
1637            memcpy(&context->vs_const_f[StartRegister * 4],
1638                   pConstantData,
1639                   Vector4fCount * 4 * sizeof(context->vs_const_f[0]));
1640    }
1641
1642    context->changed.vs_const_f = TRUE;
1643    context->changed.group |= NINE_STATE_VS_CONST;
1644}
1645
1646CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_i,
1647                  ARG_VAL(UINT, StartRegister),
1648                  ARG_MEM(int, pConstantData),
1649                  ARG_MEM_SIZE(unsigned, pConstantData_size),
1650                  ARG_VAL(UINT, Vector4iCount))
1651{
1652    struct nine_context *context = &device->context;
1653    int i;
1654
1655    if (device->driver_caps.vs_integer) {
1656        memcpy(&context->vs_const_i[4 * StartRegister],
1657               pConstantData,
1658               pConstantData_size);
1659    } else {
1660        for (i = 0; i < Vector4iCount; i++) {
1661            context->vs_const_i[4 * (StartRegister + i)] = fui((float)(pConstantData[4 * i]));
1662            context->vs_const_i[4 * (StartRegister + i) + 1] = fui((float)(pConstantData[4 * i + 1]));
1663            context->vs_const_i[4 * (StartRegister + i) + 2] = fui((float)(pConstantData[4 * i + 2]));
1664            context->vs_const_i[4 * (StartRegister + i) + 3] = fui((float)(pConstantData[4 * i + 3]));
1665        }
1666    }
1667
1668    context->changed.vs_const_i = TRUE;
1669    context->changed.group |= NINE_STATE_VS_CONST | NINE_STATE_VS_PARAMS_MISC;
1670}
1671
1672CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_b,
1673                  ARG_VAL(UINT, StartRegister),
1674                  ARG_MEM(BOOL, pConstantData),
1675                  ARG_MEM_SIZE(unsigned, pConstantData_size),
1676                  ARG_VAL(UINT, BoolCount))
1677{
1678    struct nine_context *context = &device->context;
1679    int i;
1680    uint32_t bool_true = device->driver_caps.vs_integer ? 0xFFFFFFFF : fui(1.0f);
1681
1682    (void) pConstantData_size;
1683
1684    for (i = 0; i < BoolCount; i++)
1685        context->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
1686
1687    context->changed.vs_const_b = TRUE;
1688    context->changed.group |= NINE_STATE_VS_CONST | NINE_STATE_VS_PARAMS_MISC;
1689}
1690
1691CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader,
1692                  ARG_BIND_REF(struct NinePixelShader9, ps))
1693{
1694    struct nine_context *context = &device->context;
1695    unsigned old_mask = context->ps ? context->ps->rt_mask : 1;
1696    unsigned mask;
1697
1698    /* ff -> non-ff: commit back non-ff constants */
1699    if (!context->ps && ps)
1700        context->commit |= NINE_STATE_COMMIT_CONST_PS;
1701
1702    nine_bind(&context->ps, ps);
1703
1704    context->changed.group |= NINE_STATE_PS;
1705
1706    mask = context->ps ? context->ps->rt_mask : 1;
1707    /* We need to update cbufs if the pixel shader would
1708     * write to different render targets */
1709    if (mask != old_mask)
1710        context->changed.group |= NINE_STATE_FB;
1711}
1712
1713CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_f,
1714                  ARG_VAL(UINT, StartRegister),
1715                  ARG_MEM(float, pConstantData),
1716                  ARG_MEM_SIZE(unsigned, pConstantData_size),
1717                  ARG_VAL(UINT, Vector4fCount))
1718{
1719    struct nine_context *context = &device->context;
1720
1721    memcpy(&context->ps_const_f[StartRegister * 4],
1722           pConstantData,
1723           pConstantData_size);
1724
1725    context->changed.ps_const_f = TRUE;
1726    context->changed.group |= NINE_STATE_PS_CONST;
1727}
1728
1729/* For stateblocks */
1730CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i_transformed,
1731                  ARG_VAL(UINT, StartRegister),
1732                  ARG_MEM(int, pConstantData),
1733                  ARG_MEM_SIZE(unsigned, pConstantData_size),
1734                  ARG_VAL(UINT, Vector4iCount))
1735{
1736    struct nine_context *context = &device->context;
1737
1738    memcpy(&context->ps_const_i[StartRegister][0],
1739           pConstantData,
1740           Vector4iCount * sizeof(context->ps_const_i[0]));
1741
1742    context->changed.ps_const_i = TRUE;
1743    context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_PS_PARAMS_MISC;
1744}
1745
1746CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i,
1747                  ARG_VAL(UINT, StartRegister),
1748                  ARG_MEM(int, pConstantData),
1749                  ARG_MEM_SIZE(unsigned, pConstantData_size),
1750                  ARG_VAL(UINT, Vector4iCount))
1751{
1752    struct nine_context *context = &device->context;
1753    int i;
1754
1755    if (device->driver_caps.ps_integer) {
1756        memcpy(&context->ps_const_i[StartRegister][0],
1757               pConstantData,
1758               pConstantData_size);
1759    } else {
1760        for (i = 0; i < Vector4iCount; i++) {
1761            context->ps_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i]));
1762            context->ps_const_i[StartRegister+i][1] = fui((float)(pConstantData[4*i+1]));
1763            context->ps_const_i[StartRegister+i][2] = fui((float)(pConstantData[4*i+2]));
1764            context->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
1765        }
1766    }
1767    context->changed.ps_const_i = TRUE;
1768    context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_PS_PARAMS_MISC;
1769}
1770
1771CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_b,
1772                  ARG_VAL(UINT, StartRegister),
1773                  ARG_MEM(BOOL, pConstantData),
1774                  ARG_MEM_SIZE(unsigned, pConstantData_size),
1775                  ARG_VAL(UINT, BoolCount))
1776{
1777    struct nine_context *context = &device->context;
1778    int i;
1779    uint32_t bool_true = device->driver_caps.ps_integer ? 0xFFFFFFFF : fui(1.0f);
1780
1781    (void) pConstantData_size;
1782
1783    for (i = 0; i < BoolCount; i++)
1784        context->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
1785
1786    context->changed.ps_const_b = TRUE;
1787    context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_PS_PARAMS_MISC;
1788}
1789
1790/* XXX: use resource, as resource might change */
1791CSMT_ITEM_NO_WAIT(nine_context_set_render_target,
1792                  ARG_VAL(DWORD, RenderTargetIndex),
1793                  ARG_BIND_REF(struct NineSurface9, rt))
1794{
1795    struct nine_context *context = &device->context;
1796    const unsigned i = RenderTargetIndex;
1797
1798    if (i == 0) {
1799        context->viewport.X = 0;
1800        context->viewport.Y = 0;
1801        context->viewport.Width = rt->desc.Width;
1802        context->viewport.Height = rt->desc.Height;
1803        context->viewport.MinZ = 0.0f;
1804        context->viewport.MaxZ = 1.0f;
1805
1806        context->scissor.minx = 0;
1807        context->scissor.miny = 0;
1808        context->scissor.maxx = rt->desc.Width;
1809        context->scissor.maxy = rt->desc.Height;
1810
1811        context->changed.group |= NINE_STATE_VIEWPORT | NINE_STATE_SCISSOR | NINE_STATE_MULTISAMPLE;
1812
1813        if (context->rt[0] &&
1814            (context->rt[0]->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE) !=
1815            (rt->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE))
1816            context->changed.group |= NINE_STATE_SAMPLE_MASK;
1817    }
1818
1819    if (context->rt[i] != rt) {
1820       nine_bind(&context->rt[i], rt);
1821       context->changed.group |= NINE_STATE_FB;
1822    }
1823}
1824
1825/* XXX: use resource instead of ds, as resource might change */
1826CSMT_ITEM_NO_WAIT(nine_context_set_depth_stencil,
1827                  ARG_BIND_REF(struct NineSurface9, ds))
1828{
1829    struct nine_context *context = &device->context;
1830
1831    nine_bind(&context->ds, ds);
1832    context->changed.group |= NINE_STATE_FB;
1833}
1834
1835CSMT_ITEM_NO_WAIT(nine_context_set_viewport,
1836                  ARG_COPY_REF(D3DVIEWPORT9, viewport))
1837{
1838    struct nine_context *context = &device->context;
1839
1840    context->viewport = *viewport;
1841    context->changed.group |= NINE_STATE_VIEWPORT;
1842}
1843
1844CSMT_ITEM_NO_WAIT(nine_context_set_scissor,
1845                  ARG_COPY_REF(struct pipe_scissor_state, scissor))
1846{
1847    struct nine_context *context = &device->context;
1848
1849    context->scissor = *scissor;
1850    context->changed.group |= NINE_STATE_SCISSOR;
1851}
1852
1853CSMT_ITEM_NO_WAIT(nine_context_set_transform,
1854                  ARG_VAL(D3DTRANSFORMSTATETYPE, State),
1855                  ARG_COPY_REF(D3DMATRIX, pMatrix))
1856{
1857    struct nine_context *context = &device->context;
1858    D3DMATRIX *M = nine_state_access_transform(&context->ff, State, TRUE);
1859
1860    *M = *pMatrix;
1861    context->ff.changed.transform[State / 32] |= 1 << (State % 32);
1862    context->changed.group |= NINE_STATE_FF;
1863}
1864
1865CSMT_ITEM_NO_WAIT(nine_context_set_material,
1866                  ARG_COPY_REF(D3DMATERIAL9, pMaterial))
1867{
1868    struct nine_context *context = &device->context;
1869
1870    context->ff.material = *pMaterial;
1871    context->changed.group |= NINE_STATE_FF_MATERIAL;
1872}
1873
1874CSMT_ITEM_NO_WAIT(nine_context_set_light,
1875                  ARG_VAL(DWORD, Index),
1876                  ARG_COPY_REF(D3DLIGHT9, pLight))
1877{
1878    struct nine_context *context = &device->context;
1879
1880    (void)nine_state_set_light(&context->ff, Index, pLight);
1881    context->changed.group |= NINE_STATE_FF_LIGHTING;
1882}
1883
1884
1885/* For stateblocks */
1886static void
1887nine_context_light_enable_stateblock(struct NineDevice9 *device,
1888                                     const uint16_t active_light[NINE_MAX_LIGHTS_ACTIVE], /* TODO: use pointer that convey size for csmt */
1889                                     unsigned int num_lights_active)
1890{
1891    struct nine_context *context = &device->context;
1892
1893    /* TODO: Use CSMT_* to avoid calling nine_csmt_process */
1894    nine_csmt_process(device);
1895    memcpy(context->ff.active_light, active_light, NINE_MAX_LIGHTS_ACTIVE * sizeof(context->ff.active_light[0]));
1896    context->ff.num_lights_active = num_lights_active;
1897    context->changed.group |= NINE_STATE_FF_LIGHTING;
1898}
1899
1900CSMT_ITEM_NO_WAIT(nine_context_light_enable,
1901                  ARG_VAL(DWORD, Index),
1902                  ARG_VAL(BOOL, Enable))
1903{
1904    struct nine_context *context = &device->context;
1905
1906    nine_state_light_enable(&context->ff, Index, Enable);
1907    context->changed.group |= NINE_STATE_FF_LIGHTING;
1908}
1909
1910CSMT_ITEM_NO_WAIT(nine_context_set_texture_stage_state,
1911                  ARG_VAL(DWORD, Stage),
1912                  ARG_VAL(D3DTEXTURESTAGESTATETYPE, Type),
1913                  ARG_VAL(DWORD, Value))
1914{
1915    struct nine_context *context = &device->context;
1916    int bumpmap_index = -1;
1917
1918    context->ff.tex_stage[Stage][Type] = Value;
1919    switch (Type) {
1920    case D3DTSS_BUMPENVMAT00:
1921        bumpmap_index = 4 * Stage;
1922        break;
1923    case D3DTSS_BUMPENVMAT01:
1924        bumpmap_index = 4 * Stage + 1;
1925        break;
1926    case D3DTSS_BUMPENVMAT10:
1927        bumpmap_index = 4 * Stage + 2;
1928        break;
1929    case D3DTSS_BUMPENVMAT11:
1930        bumpmap_index = 4 * Stage + 3;
1931        break;
1932    case D3DTSS_BUMPENVLSCALE:
1933        bumpmap_index = 4 * 8 + 2 * Stage;
1934        break;
1935    case D3DTSS_BUMPENVLOFFSET:
1936        bumpmap_index = 4 * 8 + 2 * Stage + 1;
1937        break;
1938    case D3DTSS_TEXTURETRANSFORMFLAGS:
1939        context->changed.group |= NINE_STATE_PS_PARAMS_MISC;
1940        break;
1941    default:
1942        break;
1943    }
1944
1945    if (bumpmap_index >= 0) {
1946        context->bumpmap_vars[bumpmap_index] = Value;
1947        context->changed.group |= NINE_STATE_PS_CONST;
1948    }
1949
1950    context->changed.group |= NINE_STATE_FF_PS_CONSTS;
1951    context->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
1952}
1953
1954CSMT_ITEM_NO_WAIT(nine_context_set_clip_plane,
1955                  ARG_VAL(DWORD, Index),
1956                  ARG_COPY_REF(struct nine_clipplane, pPlane))
1957{
1958    struct nine_context *context = &device->context;
1959
1960    memcpy(&context->clip.ucp[Index][0], pPlane, sizeof(context->clip.ucp[0]));
1961    context->changed.ucp = TRUE;
1962}
1963
1964CSMT_ITEM_NO_WAIT(nine_context_set_swvp,
1965                  ARG_VAL(boolean, swvp))
1966{
1967    struct nine_context *context = &device->context;
1968
1969    context->swvp = swvp;
1970    context->changed.group |= NINE_STATE_SWVP;
1971}
1972
1973/* Do not write to nine_context directly. Slower,
1974 * but works with csmt. TODO: write a special csmt version that
1975 * would record the list of commands as much as possible,
1976 * and use the version above else.
1977 */
1978void
1979nine_context_apply_stateblock(struct NineDevice9 *device,
1980                              const struct nine_state *src)
1981{
1982    int i;
1983
1984    /* No need to apply src->changed.group, since all calls do
1985    * set context->changed.group */
1986
1987    for (i = 0; i < ARRAY_SIZE(src->changed.rs); ++i) {
1988        uint32_t m = src->changed.rs[i];
1989        while (m) {
1990            const int r = ffs(m) - 1;
1991            m &= ~(1 << r);
1992            nine_context_set_render_state(device, i * 32 + r, src->rs_advertised[i * 32 + r]);
1993        }
1994    }
1995
1996    /* Textures */
1997    if (src->changed.texture) {
1998        uint32_t m = src->changed.texture;
1999        unsigned s;
2000
2001        for (s = 0; m; ++s, m >>= 1) {
2002            struct NineBaseTexture9 *tex = src->texture[s];
2003            if (!(m & 1))
2004                continue;
2005            nine_context_set_texture(device, s, tex);
2006        }
2007    }
2008
2009    /* Sampler state */
2010    if (src->changed.group & NINE_STATE_SAMPLER) {
2011        unsigned s;
2012
2013        for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
2014            uint32_t m = src->changed.sampler[s];
2015            while (m) {
2016                const int i = ffs(m) - 1;
2017                m &= ~(1 << i);
2018                nine_context_set_sampler_state(device, s, i, src->samp_advertised[s][i]);
2019            }
2020        }
2021    }
2022
2023    /* Vertex buffers */
2024    if (src->changed.vtxbuf | src->changed.stream_freq) {
2025        uint32_t m = src->changed.vtxbuf | src->changed.stream_freq;
2026        for (i = 0; m; ++i, m >>= 1) {
2027            if (src->changed.vtxbuf & (1 << i))
2028                nine_context_set_stream_source(device, i, src->stream[i], src->vtxbuf[i].buffer_offset, src->vtxbuf[i].stride);
2029            if (src->changed.stream_freq & (1 << i))
2030                nine_context_set_stream_source_freq(device, i, src->stream_freq[i]);
2031        }
2032    }
2033
2034    /* Index buffer */
2035    if (src->changed.group & NINE_STATE_IDXBUF)
2036        nine_context_set_indices(device, src->idxbuf);
2037
2038    /* Vertex declaration */
2039    if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
2040        nine_context_set_vertex_declaration(device, src->vdecl);
2041
2042    /* Vertex shader */
2043    if (src->changed.group & NINE_STATE_VS)
2044        nine_context_set_vertex_shader(device, src->vs);
2045
2046    /* Pixel shader */
2047    if (src->changed.group & NINE_STATE_PS)
2048        nine_context_set_pixel_shader(device, src->ps);
2049
2050    /* Vertex constants */
2051    if (src->changed.group & NINE_STATE_VS_CONST) {
2052        struct nine_range *r;
2053        for (r = src->changed.vs_const_f; r; r = r->next)
2054            nine_context_set_vertex_shader_constant_f(device, r->bgn,
2055                                                      &src->vs_const_f[r->bgn * 4],
2056                                                      sizeof(float[4]) * (r->end - r->bgn),
2057                                                      r->end - r->bgn);
2058        for (r = src->changed.vs_const_i; r; r = r->next)
2059            nine_context_set_vertex_shader_constant_i(device, r->bgn,
2060                                                      &src->vs_const_i[r->bgn * 4],
2061                                                      sizeof(int[4]) * (r->end - r->bgn),
2062                                                      r->end - r->bgn);
2063        for (r = src->changed.vs_const_b; r; r = r->next)
2064            nine_context_set_vertex_shader_constant_b(device, r->bgn,
2065                                                      &src->vs_const_b[r->bgn * 4],
2066                                                      sizeof(BOOL) * (r->end - r->bgn),
2067                                                      r->end - r->bgn);
2068    }
2069
2070    /* Pixel constants */
2071    if (src->changed.group & NINE_STATE_PS_CONST) {
2072        struct nine_range *r;
2073        for (r = src->changed.ps_const_f; r; r = r->next)
2074            nine_context_set_pixel_shader_constant_f(device, r->bgn,
2075                                                     &src->ps_const_f[r->bgn * 4],
2076                                                     sizeof(float[4]) * (r->end - r->bgn),
2077                                                     r->end - r->bgn);
2078        if (src->changed.ps_const_i) {
2079            uint16_t m = src->changed.ps_const_i;
2080            for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
2081                if (m & 1)
2082                    nine_context_set_pixel_shader_constant_i_transformed(device, i,
2083                                                                         src->ps_const_i[i], sizeof(int[4]), 1);
2084        }
2085        if (src->changed.ps_const_b) {
2086            uint16_t m = src->changed.ps_const_b;
2087            for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
2088                if (m & 1)
2089                    nine_context_set_pixel_shader_constant_b(device, i,
2090                                                             &src->ps_const_b[i], sizeof(BOOL), 1);
2091        }
2092    }
2093
2094    /* Viewport */
2095    if (src->changed.group & NINE_STATE_VIEWPORT)
2096        nine_context_set_viewport(device, &src->viewport);
2097
2098    /* Scissor */
2099    if (src->changed.group & NINE_STATE_SCISSOR)
2100        nine_context_set_scissor(device, &src->scissor);
2101
2102    /* User Clip Planes */
2103    if (src->changed.ucp)
2104        for (i = 0; i < PIPE_MAX_CLIP_PLANES; ++i)
2105            if (src->changed.ucp & (1 << i))
2106                nine_context_set_clip_plane(device, i, (struct nine_clipplane*)&src->clip.ucp[i][0]);
2107
2108    if (!(src->changed.group & NINE_STATE_FF))
2109        return;
2110
2111    /* Fixed function state. */
2112
2113    if (src->changed.group & NINE_STATE_FF_MATERIAL)
2114        nine_context_set_material(device, &src->ff.material);
2115
2116    if (src->changed.group & NINE_STATE_FF_PS_CONSTS) {
2117        unsigned s;
2118        for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) {
2119            for (i = 0; i < NINED3DTSS_COUNT; ++i)
2120                if (src->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32)))
2121                   nine_context_set_texture_stage_state(device, s, i, src->ff.tex_stage[s][i]);
2122        }
2123    }
2124    if (src->changed.group & NINE_STATE_FF_LIGHTING) {
2125        for (i = 0; i < src->ff.num_lights; ++i)
2126            if (src->ff.light[i].Type != NINED3DLIGHT_INVALID)
2127                nine_context_set_light(device, i, &src->ff.light[i]);
2128
2129        nine_context_light_enable_stateblock(device, src->ff.active_light, src->ff.num_lights_active);
2130    }
2131    if (src->changed.group & NINE_STATE_FF_VSTRANSF) {
2132        for (i = 0; i < ARRAY_SIZE(src->ff.changed.transform); ++i) {
2133            unsigned s;
2134            if (!src->ff.changed.transform[i])
2135                continue;
2136            for (s = i * 32; s < (i * 32 + 32); ++s) {
2137                if (!(src->ff.changed.transform[i] & (1 << (s % 32))))
2138                    continue;
2139                /* MaxVertexBlendMatrixIndex is 8, which means
2140                 * we don't read past index D3DTS_WORLDMATRIX(8).
2141                 * swvp is supposed to allow all 256, but we don't
2142                 * implement it for now. */
2143                if (s > D3DTS_WORLDMATRIX(8))
2144                    break;
2145                nine_context_set_transform(device, s,
2146                                           nine_state_access_transform(
2147                                               (struct nine_ff_state *)&src->ff,
2148                                                                       s, FALSE));
2149            }
2150        }
2151    }
2152}
2153
2154static void
2155nine_update_state_framebuffer_clear(struct NineDevice9 *device)
2156{
2157    struct nine_context *context = &device->context;
2158
2159    if (context->changed.group & NINE_STATE_FB)
2160        update_framebuffer(device, TRUE);
2161}
2162
2163CSMT_ITEM_NO_WAIT(nine_context_clear_fb,
2164                  ARG_VAL(DWORD, Count),
2165                  ARG_COPY_REF(D3DRECT, pRects),
2166                  ARG_VAL(DWORD, Flags),
2167                  ARG_VAL(D3DCOLOR, Color),
2168                  ARG_VAL(float, Z),
2169                  ARG_VAL(DWORD, Stencil))
2170{
2171    struct nine_context *context = &device->context;
2172    const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
2173    struct pipe_surface *cbuf, *zsbuf;
2174    struct pipe_context *pipe = context->pipe;
2175    struct NineSurface9 *zsbuf_surf = context->ds;
2176    struct NineSurface9 *rt;
2177    unsigned bufs = 0;
2178    unsigned r, i;
2179    union pipe_color_union rgba;
2180    unsigned rt_mask = 0;
2181    D3DRECT rect;
2182
2183    nine_update_state_framebuffer_clear(device);
2184
2185    if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR;
2186    /* Ignore Z buffer if not bound */
2187    if (context->pipe_data.fb.zsbuf != NULL) {
2188        if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
2189        if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
2190    }
2191    if (!bufs)
2192        return;
2193    d3dcolor_to_pipe_color_union(&rgba, Color);
2194
2195    rect.x1 = context->viewport.X;
2196    rect.y1 = context->viewport.Y;
2197    rect.x2 = context->viewport.Width + rect.x1;
2198    rect.y2 = context->viewport.Height + rect.y1;
2199
2200    /* Both rectangles apply, which is weird, but that's D3D9. */
2201    if (context->rs[D3DRS_SCISSORTESTENABLE]) {
2202        rect.x1 = MAX2(rect.x1, context->scissor.minx);
2203        rect.y1 = MAX2(rect.y1, context->scissor.miny);
2204        rect.x2 = MIN2(rect.x2, context->scissor.maxx);
2205        rect.y2 = MIN2(rect.y2, context->scissor.maxy);
2206    }
2207
2208    if (Count) {
2209        /* Maybe apps like to specify a large rect ? */
2210        if (pRects[0].x1 <= rect.x1 && pRects[0].x2 >= rect.x2 &&
2211            pRects[0].y1 <= rect.y1 && pRects[0].y2 >= rect.y2) {
2212            DBG("First rect covers viewport.\n");
2213            Count = 0;
2214            pRects = NULL;
2215        }
2216    }
2217
2218    if (rect.x1 >= context->pipe_data.fb.width || rect.y1 >= context->pipe_data.fb.height)
2219        return;
2220
2221    for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
2222        if (context->rt[i] && context->rt[i]->desc.Format != D3DFMT_NULL)
2223            rt_mask |= 1 << i;
2224    }
2225
2226    /* fast path, clears everything at once */
2227    if (!Count &&
2228        (!(bufs & PIPE_CLEAR_COLOR) || (rt_mask == context->rt_mask)) &&
2229        rect.x1 == 0 && rect.y1 == 0 &&
2230        /* Case we clear only render target. Check clear region vs rt. */
2231        ((!(bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
2232         rect.x2 >= context->pipe_data.fb.width &&
2233         rect.y2 >= context->pipe_data.fb.height) ||
2234        /* Case we clear depth buffer (and eventually rt too).
2235         * depth buffer size is always >= rt size. Compare to clear region */
2236        ((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
2237         rect.x2 >= zsbuf_surf->desc.Width &&
2238         rect.y2 >= zsbuf_surf->desc.Height))) {
2239        DBG("Clear fast path\n");
2240        pipe->clear(pipe, bufs, &rgba, Z, Stencil);
2241        return;
2242    }
2243
2244    if (!Count) {
2245        Count = 1;
2246        pRects = &rect;
2247    }
2248
2249    for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
2250        rt = context->rt[i];
2251        if (!rt || rt->desc.Format == D3DFMT_NULL ||
2252            !(bufs & PIPE_CLEAR_COLOR))
2253            continue; /* save space, compiler should hoist this */
2254        cbuf = NineSurface9_GetSurface(rt, sRGB);
2255        for (r = 0; r < Count; ++r) {
2256            /* Don't trust users to pass these in the right order. */
2257            unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
2258            unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
2259            unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
2260            unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
2261#ifndef NINE_LAX
2262            /* Drop negative rectangles (like wine expects). */
2263            if (pRects[r].x1 > pRects[r].x2) continue;
2264            if (pRects[r].y1 > pRects[r].y2) continue;
2265#endif
2266
2267            x1 = MAX2(x1, rect.x1);
2268            y1 = MAX2(y1, rect.y1);
2269            x2 = MIN3(x2, rect.x2, rt->desc.Width);
2270            y2 = MIN3(y2, rect.y2, rt->desc.Height);
2271
2272            DBG("Clearing (%u..%u)x(%u..%u)\n", x1, x2, y1, y2);
2273            pipe->clear_render_target(pipe, cbuf, &rgba,
2274                                      x1, y1, x2 - x1, y2 - y1, false);
2275        }
2276    }
2277    if (!(bufs & PIPE_CLEAR_DEPTHSTENCIL))
2278        return;
2279
2280    bufs &= PIPE_CLEAR_DEPTHSTENCIL;
2281
2282    for (r = 0; r < Count; ++r) {
2283        unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
2284        unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
2285        unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
2286        unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
2287#ifndef NINE_LAX
2288        /* Drop negative rectangles. */
2289        if (pRects[r].x1 > pRects[r].x2) continue;
2290        if (pRects[r].y1 > pRects[r].y2) continue;
2291#endif
2292
2293        x1 = MIN2(x1, rect.x1);
2294        y1 = MIN2(y1, rect.y1);
2295        x2 = MIN3(x2, rect.x2, zsbuf_surf->desc.Width);
2296        y2 = MIN3(y2, rect.y2, zsbuf_surf->desc.Height);
2297
2298        zsbuf = NineSurface9_GetSurface(zsbuf_surf, 0);
2299        assert(zsbuf);
2300        pipe->clear_depth_stencil(pipe, zsbuf, bufs, Z, Stencil,
2301                                  x1, y1, x2 - x1, y2 - y1, false);
2302    }
2303    return;
2304}
2305
2306
2307static inline void
2308init_draw_info(struct pipe_draw_info *info,
2309               struct NineDevice9 *dev, D3DPRIMITIVETYPE type, UINT count)
2310{
2311    info->mode = d3dprimitivetype_to_pipe_prim(type);
2312    info->count = prim_count_to_vertex_count(type, count);
2313    info->start_instance = 0;
2314    info->instance_count = 1;
2315    if (dev->context.stream_instancedata_mask & dev->context.stream_usage_mask)
2316        info->instance_count = MAX2(dev->context.stream_freq[0] & 0x7FFFFF, 1);
2317    info->primitive_restart = FALSE;
2318    info->has_user_indices = FALSE;
2319    info->restart_index = 0;
2320    info->count_from_stream_output = NULL;
2321    info->indirect = NULL;
2322}
2323
2324CSMT_ITEM_NO_WAIT(nine_context_draw_primitive,
2325                  ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2326                  ARG_VAL(UINT, StartVertex),
2327                  ARG_VAL(UINT, PrimitiveCount))
2328{
2329    struct nine_context *context = &device->context;
2330    struct pipe_draw_info info;
2331
2332    nine_update_state(device);
2333
2334    init_draw_info(&info, device, PrimitiveType, PrimitiveCount);
2335    info.index_size = 0;
2336    info.start = StartVertex;
2337    info.index_bias = 0;
2338    info.min_index = info.start;
2339    info.max_index = info.count - 1;
2340    info.index.resource = NULL;
2341
2342    context->pipe->draw_vbo(context->pipe, &info);
2343}
2344
2345CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive,
2346                  ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2347                  ARG_VAL(INT, BaseVertexIndex),
2348                  ARG_VAL(UINT, MinVertexIndex),
2349                  ARG_VAL(UINT, NumVertices),
2350                  ARG_VAL(UINT, StartIndex),
2351                  ARG_VAL(UINT, PrimitiveCount))
2352{
2353    struct nine_context *context = &device->context;
2354    struct pipe_draw_info info;
2355
2356    nine_update_state(device);
2357
2358    init_draw_info(&info, device, PrimitiveType, PrimitiveCount);
2359    info.index_size = context->index_size;
2360    info.start = context->index_offset / context->index_size + StartIndex;
2361    info.index_bias = BaseVertexIndex;
2362    /* These don't include index bias: */
2363    info.min_index = MinVertexIndex;
2364    info.max_index = MinVertexIndex + NumVertices - 1;
2365    info.index.resource = context->idxbuf;
2366
2367    context->pipe->draw_vbo(context->pipe, &info);
2368}
2369
2370CSMT_ITEM_NO_WAIT(nine_context_draw_primitive_from_vtxbuf,
2371                  ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2372                  ARG_VAL(UINT, PrimitiveCount),
2373                  ARG_BIND_VBUF(struct pipe_vertex_buffer, vtxbuf))
2374{
2375    struct nine_context *context = &device->context;
2376    struct pipe_draw_info info;
2377
2378    nine_update_state(device);
2379
2380    init_draw_info(&info, device, PrimitiveType, PrimitiveCount);
2381    info.index_size = 0;
2382    info.start = 0;
2383    info.index_bias = 0;
2384    info.min_index = 0;
2385    info.max_index = info.count - 1;
2386    info.index.resource = NULL;
2387
2388    context->pipe->set_vertex_buffers(context->pipe, 0, 1, vtxbuf);
2389
2390    context->pipe->draw_vbo(context->pipe, &info);
2391}
2392
2393CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf,
2394                  ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2395                  ARG_VAL(UINT, MinVertexIndex),
2396                  ARG_VAL(UINT, NumVertices),
2397                  ARG_VAL(UINT, PrimitiveCount),
2398                  ARG_BIND_VBUF(struct pipe_vertex_buffer, vbuf),
2399                  ARG_BIND_RES(struct pipe_resource, ibuf),
2400                  ARG_VAL(void *, user_ibuf),
2401                  ARG_VAL(UINT, index_offset),
2402                  ARG_VAL(UINT, index_size))
2403{
2404    struct nine_context *context = &device->context;
2405    struct pipe_draw_info info;
2406
2407    nine_update_state(device);
2408
2409    init_draw_info(&info, device, PrimitiveType, PrimitiveCount);
2410    info.index_size = index_size;
2411    info.start = index_offset / info.index_size;
2412    info.index_bias = 0;
2413    info.min_index = MinVertexIndex;
2414    info.max_index = MinVertexIndex + NumVertices - 1;
2415    info.has_user_indices = ibuf == NULL;
2416    if (ibuf)
2417        info.index.resource = ibuf;
2418    else
2419        info.index.user = user_ibuf;
2420
2421    context->pipe->set_vertex_buffers(context->pipe, 0, 1, vbuf);
2422
2423    context->pipe->draw_vbo(context->pipe, &info);
2424}
2425
2426CSMT_ITEM_NO_WAIT(nine_context_resource_copy_region,
2427                  ARG_BIND_REF(struct NineUnknown, dst),
2428                  ARG_BIND_REF(struct NineUnknown, src),
2429                  ARG_BIND_RES(struct pipe_resource, dst_res),
2430                  ARG_VAL(unsigned, dst_level),
2431                  ARG_COPY_REF(struct pipe_box, dst_box),
2432                  ARG_BIND_RES(struct pipe_resource, src_res),
2433                  ARG_VAL(unsigned, src_level),
2434                  ARG_COPY_REF(struct pipe_box, src_box))
2435{
2436    struct nine_context *context = &device->context;
2437
2438    (void) dst;
2439    (void) src;
2440
2441    context->pipe->resource_copy_region(context->pipe,
2442            dst_res, dst_level,
2443            dst_box->x, dst_box->y, dst_box->z,
2444            src_res, src_level,
2445            src_box);
2446}
2447
2448CSMT_ITEM_NO_WAIT(nine_context_blit,
2449                  ARG_BIND_REF(struct NineUnknown, dst),
2450                  ARG_BIND_REF(struct NineUnknown, src),
2451                  ARG_BIND_BLIT(struct pipe_blit_info, blit))
2452{
2453    struct nine_context *context = &device->context;
2454
2455    (void) dst;
2456    (void) src;
2457
2458    context->pipe->blit(context->pipe, blit);
2459}
2460
2461CSMT_ITEM_NO_WAIT(nine_context_clear_render_target,
2462                  ARG_BIND_REF(struct NineSurface9, surface),
2463                  ARG_VAL(D3DCOLOR, color),
2464                  ARG_VAL(UINT, x),
2465                  ARG_VAL(UINT, y),
2466                  ARG_VAL(UINT, width),
2467                  ARG_VAL(UINT, height))
2468{
2469    struct nine_context *context = &device->context;
2470    struct pipe_surface *surf;
2471    union pipe_color_union rgba;
2472
2473    d3dcolor_to_pipe_color_union(&rgba, color);
2474    surf = NineSurface9_GetSurface(surface, 0);
2475    context->pipe->clear_render_target(context->pipe, surf, &rgba, x, y, width, height, false);
2476}
2477
2478CSMT_ITEM_NO_WAIT(nine_context_gen_mipmap,
2479                  ARG_BIND_REF(struct NineUnknown, dst),
2480                  ARG_BIND_RES(struct pipe_resource, res),
2481                  ARG_VAL(UINT, base_level),
2482                  ARG_VAL(UINT, last_level),
2483                  ARG_VAL(UINT, first_layer),
2484                  ARG_VAL(UINT, last_layer),
2485                  ARG_VAL(UINT, filter))
2486{
2487    struct nine_context *context = &device->context;
2488
2489    /* We just bind dst for the bind count */
2490    (void)dst;
2491
2492    util_gen_mipmap(context->pipe, res, res->format, base_level,
2493                    last_level, first_layer, last_layer, filter);
2494}
2495
2496CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload,
2497                               ARG_BIND_REF(struct NineUnknown, src_ref),
2498                               ARG_BIND_RES(struct pipe_resource, res),
2499                               ARG_VAL(unsigned, offset),
2500                               ARG_VAL(unsigned, size),
2501                               ARG_VAL(const void *, data))
2502{
2503    struct nine_context *context = &device->context;
2504
2505    /* Binding src_ref avoids release before upload */
2506    (void)src_ref;
2507
2508    context->pipe->buffer_subdata(context->pipe, res, 0, offset, size, data);
2509}
2510
2511CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload,
2512                               ARG_BIND_REF(struct NineUnknown, src_ref),
2513                               ARG_BIND_RES(struct pipe_resource, res),
2514                               ARG_VAL(unsigned, level),
2515                               ARG_COPY_REF(struct pipe_box, dst_box),
2516                               ARG_VAL(enum pipe_format, src_format),
2517                               ARG_VAL(const void *, src),
2518                               ARG_VAL(unsigned, src_stride),
2519                               ARG_VAL(unsigned, src_layer_stride),
2520                               ARG_COPY_REF(struct pipe_box, src_box))
2521{
2522    struct nine_context *context = &device->context;
2523    struct pipe_context *pipe = context->pipe;
2524    struct pipe_transfer *transfer = NULL;
2525    uint8_t *map;
2526
2527    /* Binding src_ref avoids release before upload */
2528    (void)src_ref;
2529
2530    map = pipe->transfer_map(pipe,
2531                             res,
2532                             level,
2533                             PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
2534                             dst_box, &transfer);
2535    if (!map)
2536        return;
2537
2538    /* Note: if formats are the sames, it will revert
2539     * to normal memcpy */
2540    (void) util_format_translate_3d(res->format,
2541                                    map, transfer->stride,
2542                                    transfer->layer_stride,
2543                                    0, 0, 0,
2544                                    src_format,
2545                                    src, src_stride,
2546                                    src_layer_stride,
2547                                    src_box->x, src_box->y, src_box->z,
2548                                    dst_box->width, dst_box->height,
2549                                    dst_box->depth);
2550
2551    pipe_transfer_unmap(pipe, transfer);
2552}
2553
2554struct pipe_query *
2555nine_context_create_query(struct NineDevice9 *device, unsigned query_type)
2556{
2557    struct pipe_context *pipe;
2558    struct pipe_query *res;
2559
2560    pipe = nine_context_get_pipe_acquire(device);
2561    res = pipe->create_query(pipe, query_type, 0);
2562    nine_context_get_pipe_release(device);
2563    return res;
2564}
2565
2566CSMT_ITEM_DO_WAIT(nine_context_destroy_query,
2567                  ARG_REF(struct pipe_query, query))
2568{
2569    struct nine_context *context = &device->context;
2570
2571    context->pipe->destroy_query(context->pipe, query);
2572}
2573
2574CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_begin_query,
2575                               ARG_REF(struct pipe_query, query))
2576{
2577    struct nine_context *context = &device->context;
2578
2579    (void) context->pipe->begin_query(context->pipe, query);
2580}
2581
2582CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_end_query,
2583                               ARG_REF(struct pipe_query, query))
2584{
2585    struct nine_context *context = &device->context;
2586
2587    (void) context->pipe->end_query(context->pipe, query);
2588}
2589
2590boolean
2591nine_context_get_query_result(struct NineDevice9 *device, struct pipe_query *query,
2592                              unsigned *counter, boolean flush, boolean wait,
2593                              union pipe_query_result *result)
2594{
2595    struct pipe_context *pipe;
2596    boolean ret;
2597
2598    if (wait)
2599        nine_csmt_process(device);
2600    else if (p_atomic_read(counter) > 0) {
2601        if (flush && device->csmt_active)
2602            nine_queue_flush(device->csmt_ctx->pool);
2603        DBG("Pending begin/end. Returning\n");
2604        return false;
2605    }
2606
2607    pipe = nine_context_get_pipe_acquire(device);
2608    ret = pipe->get_query_result(pipe, query, wait, result);
2609    nine_context_get_pipe_release(device);
2610
2611    DBG("Query result %s\n", ret ? "found" : "not yet available");
2612    return ret;
2613}
2614
2615/* State defaults */
2616
2617static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] =
2618{
2619 /* [D3DRS_ZENABLE] = D3DZB_TRUE; wine: auto_depth_stencil */
2620    [D3DRS_ZENABLE] = D3DZB_FALSE,
2621    [D3DRS_FILLMODE] = D3DFILL_SOLID,
2622    [D3DRS_SHADEMODE] = D3DSHADE_GOURAUD,
2623/*  [D3DRS_LINEPATTERN] = 0x00000000, */
2624    [D3DRS_ZWRITEENABLE] = TRUE,
2625    [D3DRS_ALPHATESTENABLE] = FALSE,
2626    [D3DRS_LASTPIXEL] = TRUE,
2627    [D3DRS_SRCBLEND] = D3DBLEND_ONE,
2628    [D3DRS_DESTBLEND] = D3DBLEND_ZERO,
2629    [D3DRS_CULLMODE] = D3DCULL_CCW,
2630    [D3DRS_ZFUNC] = D3DCMP_LESSEQUAL,
2631    [D3DRS_ALPHAFUNC] = D3DCMP_ALWAYS,
2632    [D3DRS_ALPHAREF] = 0,
2633    [D3DRS_DITHERENABLE] = FALSE,
2634    [D3DRS_ALPHABLENDENABLE] = FALSE,
2635    [D3DRS_FOGENABLE] = FALSE,
2636    [D3DRS_SPECULARENABLE] = FALSE,
2637/*  [D3DRS_ZVISIBLE] = 0, */
2638    [D3DRS_FOGCOLOR] = 0,
2639    [D3DRS_FOGTABLEMODE] = D3DFOG_NONE,
2640    [D3DRS_FOGSTART] = 0x00000000,
2641    [D3DRS_FOGEND] = 0x3F800000,
2642    [D3DRS_FOGDENSITY] = 0x3F800000,
2643/*  [D3DRS_EDGEANTIALIAS] = FALSE, */
2644    [D3DRS_RANGEFOGENABLE] = FALSE,
2645    [D3DRS_STENCILENABLE] = FALSE,
2646    [D3DRS_STENCILFAIL] = D3DSTENCILOP_KEEP,
2647    [D3DRS_STENCILZFAIL] = D3DSTENCILOP_KEEP,
2648    [D3DRS_STENCILPASS] = D3DSTENCILOP_KEEP,
2649    [D3DRS_STENCILREF] = 0,
2650    [D3DRS_STENCILMASK] = 0xFFFFFFFF,
2651    [D3DRS_STENCILFUNC] = D3DCMP_ALWAYS,
2652    [D3DRS_STENCILWRITEMASK] = 0xFFFFFFFF,
2653    [D3DRS_TEXTUREFACTOR] = 0xFFFFFFFF,
2654    [D3DRS_WRAP0] = 0,
2655    [D3DRS_WRAP1] = 0,
2656    [D3DRS_WRAP2] = 0,
2657    [D3DRS_WRAP3] = 0,
2658    [D3DRS_WRAP4] = 0,
2659    [D3DRS_WRAP5] = 0,
2660    [D3DRS_WRAP6] = 0,
2661    [D3DRS_WRAP7] = 0,
2662    [D3DRS_CLIPPING] = TRUE,
2663    [D3DRS_LIGHTING] = TRUE,
2664    [D3DRS_AMBIENT] = 0,
2665    [D3DRS_FOGVERTEXMODE] = D3DFOG_NONE,
2666    [D3DRS_COLORVERTEX] = TRUE,
2667    [D3DRS_LOCALVIEWER] = TRUE,
2668    [D3DRS_NORMALIZENORMALS] = FALSE,
2669    [D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1,
2670    [D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2,
2671    [D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL,
2672    [D3DRS_EMISSIVEMATERIALSOURCE] = D3DMCS_MATERIAL,
2673    [D3DRS_VERTEXBLEND] = D3DVBF_DISABLE,
2674    [D3DRS_CLIPPLANEENABLE] = 0,
2675/*  [D3DRS_SOFTWAREVERTEXPROCESSING] = FALSE, */
2676    [D3DRS_POINTSIZE] = 0x3F800000,
2677    [D3DRS_POINTSIZE_MIN] = 0x3F800000,
2678    [D3DRS_POINTSPRITEENABLE] = FALSE,
2679    [D3DRS_POINTSCALEENABLE] = FALSE,
2680    [D3DRS_POINTSCALE_A] = 0x3F800000,
2681    [D3DRS_POINTSCALE_B] = 0x00000000,
2682    [D3DRS_POINTSCALE_C] = 0x00000000,
2683    [D3DRS_MULTISAMPLEANTIALIAS] = TRUE,
2684    [D3DRS_MULTISAMPLEMASK] = 0xFFFFFFFF,
2685    [D3DRS_PATCHEDGESTYLE] = D3DPATCHEDGE_DISCRETE,
2686/*  [D3DRS_PATCHSEGMENTS] = 0x3F800000, */
2687    [D3DRS_DEBUGMONITORTOKEN] = 0xDEADCAFE,
2688    [D3DRS_POINTSIZE_MAX] = 0x3F800000, /* depends on cap */
2689    [D3DRS_INDEXEDVERTEXBLENDENABLE] = FALSE,
2690    [D3DRS_COLORWRITEENABLE] = 0x0000000f,
2691    [D3DRS_TWEENFACTOR] = 0x00000000,
2692    [D3DRS_BLENDOP] = D3DBLENDOP_ADD,
2693    [D3DRS_POSITIONDEGREE] = D3DDEGREE_CUBIC,
2694    [D3DRS_NORMALDEGREE] = D3DDEGREE_LINEAR,
2695    [D3DRS_SCISSORTESTENABLE] = FALSE,
2696    [D3DRS_SLOPESCALEDEPTHBIAS] = 0,
2697    [D3DRS_MINTESSELLATIONLEVEL] = 0x3F800000,
2698    [D3DRS_MAXTESSELLATIONLEVEL] = 0x3F800000,
2699    [D3DRS_ANTIALIASEDLINEENABLE] = FALSE,
2700    [D3DRS_ADAPTIVETESS_X] = 0x00000000,
2701    [D3DRS_ADAPTIVETESS_Y] = 0x00000000,
2702    [D3DRS_ADAPTIVETESS_Z] = 0x3F800000,
2703    [D3DRS_ADAPTIVETESS_W] = 0x00000000,
2704    [D3DRS_ENABLEADAPTIVETESSELLATION] = FALSE,
2705    [D3DRS_TWOSIDEDSTENCILMODE] = FALSE,
2706    [D3DRS_CCW_STENCILFAIL] = D3DSTENCILOP_KEEP,
2707    [D3DRS_CCW_STENCILZFAIL] = D3DSTENCILOP_KEEP,
2708    [D3DRS_CCW_STENCILPASS] = D3DSTENCILOP_KEEP,
2709    [D3DRS_CCW_STENCILFUNC] = D3DCMP_ALWAYS,
2710    [D3DRS_COLORWRITEENABLE1] = 0x0000000F,
2711    [D3DRS_COLORWRITEENABLE2] = 0x0000000F,
2712    [D3DRS_COLORWRITEENABLE3] = 0x0000000F,
2713    [D3DRS_BLENDFACTOR] = 0xFFFFFFFF,
2714    [D3DRS_SRGBWRITEENABLE] = 0,
2715    [D3DRS_DEPTHBIAS] = 0,
2716    [D3DRS_WRAP8] = 0,
2717    [D3DRS_WRAP9] = 0,
2718    [D3DRS_WRAP10] = 0,
2719    [D3DRS_WRAP11] = 0,
2720    [D3DRS_WRAP12] = 0,
2721    [D3DRS_WRAP13] = 0,
2722    [D3DRS_WRAP14] = 0,
2723    [D3DRS_WRAP15] = 0,
2724    [D3DRS_SEPARATEALPHABLENDENABLE] = FALSE,
2725    [D3DRS_SRCBLENDALPHA] = D3DBLEND_ONE,
2726    [D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO,
2727    [D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD,
2728    [NINED3DRS_VSPOINTSIZE] = FALSE,
2729    [NINED3DRS_RTMASK] = 0xf,
2730    [NINED3DRS_ALPHACOVERAGE] = FALSE,
2731    [NINED3DRS_MULTISAMPLE] = FALSE
2732};
2733static const DWORD nine_tex_stage_state_defaults[NINED3DTSS_LAST + 1] =
2734{
2735    [D3DTSS_COLOROP] = D3DTOP_DISABLE,
2736    [D3DTSS_ALPHAOP] = D3DTOP_DISABLE,
2737    [D3DTSS_COLORARG1] = D3DTA_TEXTURE,
2738    [D3DTSS_COLORARG2] = D3DTA_CURRENT,
2739    [D3DTSS_COLORARG0] = D3DTA_CURRENT,
2740    [D3DTSS_ALPHAARG1] = D3DTA_TEXTURE,
2741    [D3DTSS_ALPHAARG2] = D3DTA_CURRENT,
2742    [D3DTSS_ALPHAARG0] = D3DTA_CURRENT,
2743    [D3DTSS_RESULTARG] = D3DTA_CURRENT,
2744    [D3DTSS_BUMPENVMAT00] = 0,
2745    [D3DTSS_BUMPENVMAT01] = 0,
2746    [D3DTSS_BUMPENVMAT10] = 0,
2747    [D3DTSS_BUMPENVMAT11] = 0,
2748    [D3DTSS_BUMPENVLSCALE] = 0,
2749    [D3DTSS_BUMPENVLOFFSET] = 0,
2750    [D3DTSS_TEXCOORDINDEX] = 0,
2751    [D3DTSS_TEXTURETRANSFORMFLAGS] = D3DTTFF_DISABLE,
2752};
2753static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] =
2754{
2755    [D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP,
2756    [D3DSAMP_ADDRESSV] = D3DTADDRESS_WRAP,
2757    [D3DSAMP_ADDRESSW] = D3DTADDRESS_WRAP,
2758    [D3DSAMP_BORDERCOLOR] = 0,
2759    [D3DSAMP_MAGFILTER] = D3DTEXF_POINT,
2760    [D3DSAMP_MINFILTER] = D3DTEXF_POINT,
2761    [D3DSAMP_MIPFILTER] = D3DTEXF_NONE,
2762    [D3DSAMP_MIPMAPLODBIAS] = 0,
2763    [D3DSAMP_MAXMIPLEVEL] = 0,
2764    [D3DSAMP_MAXANISOTROPY] = 1,
2765    [D3DSAMP_SRGBTEXTURE] = 0,
2766    [D3DSAMP_ELEMENTINDEX] = 0,
2767    [D3DSAMP_DMAPOFFSET] = 0,
2768    [NINED3DSAMP_MINLOD] = 0,
2769    [NINED3DSAMP_SHADOW] = 0,
2770    [NINED3DSAMP_CUBETEX] = 0
2771};
2772
2773/* Note: The following 4 functions assume there is no
2774 * pending commands */
2775
2776void nine_state_restore_non_cso(struct NineDevice9 *device)
2777{
2778    struct nine_context *context = &device->context;
2779
2780    context->changed.group = NINE_STATE_ALL;
2781    context->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
2782    context->changed.ucp = TRUE;
2783    context->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS;
2784}
2785
2786void
2787nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
2788                        boolean is_reset)
2789{
2790    struct nine_state *state = &device->state;
2791    struct nine_context *context = &device->context;
2792    unsigned s;
2793
2794    /* Initialize defaults.
2795     */
2796    memcpy(context->rs, nine_render_state_defaults, sizeof(context->rs));
2797
2798    for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s) {
2799        memcpy(&state->ff.tex_stage[s], nine_tex_stage_state_defaults,
2800               sizeof(state->ff.tex_stage[s]));
2801        state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] = s;
2802    }
2803    state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE;
2804    state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1;
2805
2806    for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s)
2807        memcpy(&context->ff.tex_stage[s], state->ff.tex_stage[s],
2808               sizeof(state->ff.tex_stage[s]));
2809
2810    memset(&context->bumpmap_vars, 0, sizeof(context->bumpmap_vars));
2811
2812    for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
2813        memcpy(&context->samp[s], nine_samp_state_defaults,
2814               sizeof(context->samp[s]));
2815        memcpy(&state->samp_advertised[s], nine_samp_state_defaults,
2816               sizeof(state->samp_advertised[s]));
2817    }
2818
2819    memset(state->vs_const_f, 0, VS_CONST_F_SIZE(device));
2820    memset(context->vs_const_f, 0, device->vs_const_size);
2821    if (context->vs_const_f_swvp)
2822        memset(context->vs_const_f_swvp, 0, NINE_MAX_CONST_F_SWVP * sizeof(float[4]));
2823    memset(state->vs_const_i, 0, VS_CONST_I_SIZE(device));
2824    memset(context->vs_const_i, 0, VS_CONST_I_SIZE(device));
2825    memset(state->vs_const_b, 0, VS_CONST_B_SIZE(device));
2826    memset(context->vs_const_b, 0, VS_CONST_B_SIZE(device));
2827    memset(state->ps_const_f, 0, device->ps_const_size);
2828    memset(context->ps_const_f, 0, device->ps_const_size);
2829    memset(state->ps_const_i, 0, sizeof(state->ps_const_i));
2830    memset(context->ps_const_i, 0, sizeof(context->ps_const_i));
2831    memset(state->ps_const_b, 0, sizeof(state->ps_const_b));
2832    memset(context->ps_const_b, 0, sizeof(context->ps_const_b));
2833
2834    /* Cap dependent initial state:
2835     */
2836    context->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize);
2837
2838    memcpy(state->rs_advertised, context->rs, sizeof(context->rs));
2839
2840    /* Set changed flags to initialize driver.
2841     */
2842    context->changed.group = NINE_STATE_ALL;
2843    context->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
2844    context->changed.ucp = TRUE;
2845
2846    context->ff.changed.transform[0] = ~0;
2847    context->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32);
2848
2849    if (!is_reset) {
2850        state->viewport.MinZ = context->viewport.MinZ = 0.0f;
2851        state->viewport.MaxZ = context->viewport.MaxZ = 1.0f;
2852    }
2853
2854    for (s = 0; s < NINE_MAX_SAMPLERS; ++s)
2855        context->changed.sampler[s] = ~0;
2856
2857    if (!is_reset) {
2858        context->dummy_vbo_bound_at = -1;
2859        context->vbo_bound_done = FALSE;
2860    }
2861}
2862
2863void
2864nine_device_state_clear(struct NineDevice9 *device)
2865{
2866    struct nine_state *state = &device->state;
2867    unsigned i;
2868
2869    for (i = 0; i < ARRAY_SIZE(state->rt); ++i)
2870       nine_bind(&state->rt[i], NULL);
2871    nine_bind(&state->ds, NULL);
2872    nine_bind(&state->vs, NULL);
2873    nine_bind(&state->ps, NULL);
2874    nine_bind(&state->vdecl, NULL);
2875    for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
2876        NineBindBufferToDevice(device,
2877                               (struct NineBuffer9 **)&state->stream[i],
2878                               NULL);
2879    NineBindBufferToDevice(device,
2880                           (struct NineBuffer9 **)&state->idxbuf,
2881                           NULL);
2882
2883    for (i = 0; i < NINE_MAX_SAMPLERS; ++i)
2884        NineBindTextureToDevice(device, &state->texture[i], NULL);
2885}
2886
2887void
2888nine_context_clear(struct NineDevice9 *device)
2889{
2890    struct nine_context *context = &device->context;
2891    struct pipe_context *pipe = context->pipe;
2892    struct cso_context *cso = context->cso;
2893    unsigned i;
2894
2895    /* Early device ctor failure. Nothing to do */
2896    if (!pipe || !cso)
2897        return;
2898
2899    pipe->bind_vs_state(pipe, NULL);
2900    pipe->bind_fs_state(pipe, NULL);
2901
2902    /* Don't unbind constant buffers, they're device-private and
2903     * do not change on Reset.
2904     */
2905
2906    cso_set_samplers(cso, PIPE_SHADER_VERTEX, 0, NULL);
2907    cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
2908
2909    cso_set_sampler_views(cso, PIPE_SHADER_VERTEX, 0, NULL);
2910    cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
2911
2912    pipe->set_vertex_buffers(pipe, 0, device->caps.MaxStreams, NULL);
2913
2914    for (i = 0; i < ARRAY_SIZE(context->rt); ++i)
2915       nine_bind(&context->rt[i], NULL);
2916    nine_bind(&context->ds, NULL);
2917    nine_bind(&context->vs, NULL);
2918    nine_bind(&context->ps, NULL);
2919    nine_bind(&context->vdecl, NULL);
2920    for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
2921        pipe_vertex_buffer_unreference(&context->vtxbuf[i]);
2922    pipe_resource_reference(&context->idxbuf, NULL);
2923    pipe_resource_reference(&context->pipe_data.cb_vs.buffer, NULL);
2924    pipe_resource_reference(&context->pipe_data.cb_ps.buffer, NULL);
2925
2926    for (i = 0; i < NINE_MAX_SAMPLERS; ++i) {
2927        context->texture[i].enabled = FALSE;
2928        pipe_resource_reference(&context->texture[i].resource,
2929                                NULL);
2930        pipe_sampler_view_reference(&context->texture[i].view[0],
2931                                    NULL);
2932        pipe_sampler_view_reference(&context->texture[i].view[1],
2933                                    NULL);
2934    }
2935}
2936
2937void
2938nine_state_init_sw(struct NineDevice9 *device)
2939{
2940    struct pipe_context *pipe_sw = device->pipe_sw;
2941    struct pipe_rasterizer_state rast;
2942    struct pipe_blend_state blend;
2943    struct pipe_depth_stencil_alpha_state dsa;
2944    struct pipe_framebuffer_state fb;
2945
2946    /* Only used with Streamout */
2947    memset(&rast, 0, sizeof(rast));
2948    rast.rasterizer_discard = true;
2949    rast.point_quad_rasterization = 1; /* to make llvmpipe happy */
2950    cso_set_rasterizer(device->cso_sw, &rast);
2951
2952    /* dummy settings */
2953    memset(&blend, 0, sizeof(blend));
2954    memset(&dsa, 0, sizeof(dsa));
2955    memset(&fb, 0, sizeof(fb));
2956    cso_set_blend(device->cso_sw, &blend);
2957    cso_set_depth_stencil_alpha(device->cso_sw, &dsa);
2958    cso_set_framebuffer(device->cso_sw, &fb);
2959    cso_set_viewport_dims(device->cso_sw, 1.0, 1.0, false);
2960    cso_set_fragment_shader_handle(device->cso_sw, util_make_empty_fragment_shader(pipe_sw));
2961}
2962
2963/* There is duplication with update_vertex_elements.
2964 * TODO: Share the code */
2965
2966static void
2967update_vertex_elements_sw(struct NineDevice9 *device)
2968{
2969    struct nine_state *state = &device->state;
2970    const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
2971    const struct NineVertexShader9 *vs;
2972    unsigned n, b, i;
2973    int index;
2974    char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
2975    char used_streams[device->caps.MaxStreams];
2976    int dummy_vbo_stream = -1;
2977    BOOL need_dummy_vbo = FALSE;
2978    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
2979    bool programmable_vs = state->vs && !(state->vdecl && state->vdecl->position_t);
2980
2981    memset(vdecl_index_map, -1, 16);
2982    memset(used_streams, 0, device->caps.MaxStreams);
2983    vs = programmable_vs ? device->state.vs : device->ff.vs;
2984
2985    if (vdecl) {
2986        for (n = 0; n < vs->num_inputs; ++n) {
2987            DBG("looking up input %u (usage %u) from vdecl(%p)\n",
2988                n, vs->input_map[n].ndecl, vdecl);
2989
2990            for (i = 0; i < vdecl->nelems; i++) {
2991                if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
2992                    vdecl_index_map[n] = i;
2993                    used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
2994                    break;
2995                }
2996            }
2997            if (vdecl_index_map[n] < 0)
2998                need_dummy_vbo = TRUE;
2999        }
3000    } else {
3001        /* No vertex declaration. Likely will never happen in practice,
3002         * but we need not crash on this */
3003        need_dummy_vbo = TRUE;
3004    }
3005
3006    if (need_dummy_vbo) {
3007        for (i = 0; i < device->caps.MaxStreams; i++ ) {
3008            if (!used_streams[i]) {
3009                dummy_vbo_stream = i;
3010                break;
3011            }
3012        }
3013    }
3014    /* TODO handle dummy_vbo */
3015    assert (!need_dummy_vbo);
3016
3017    for (n = 0; n < vs->num_inputs; ++n) {
3018        index = vdecl_index_map[n];
3019        if (index >= 0) {
3020            ve[n] = vdecl->elems[index];
3021            b = ve[n].vertex_buffer_index;
3022            /* XXX wine just uses 1 here: */
3023            if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
3024                ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
3025        } else {
3026            /* if the vertex declaration is incomplete compared to what the
3027             * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
3028             * This is not precised by the spec, but is the behaviour
3029             * tested on win */
3030            ve[n].vertex_buffer_index = dummy_vbo_stream;
3031            ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
3032            ve[n].src_offset = 0;
3033            ve[n].instance_divisor = 0;
3034        }
3035    }
3036
3037    cso_set_vertex_elements(device->cso_sw, vs->num_inputs, ve);
3038}
3039
3040static void
3041update_vertex_buffers_sw(struct NineDevice9 *device, int start_vertice, int num_vertices)
3042{
3043    struct pipe_context *pipe = nine_context_get_pipe_acquire(device);
3044    struct pipe_context *pipe_sw = device->pipe_sw;
3045    struct nine_state *state = &device->state;
3046    struct nine_state_sw_internal *sw_internal = &device->state_sw_internal;
3047    struct pipe_vertex_buffer vtxbuf;
3048    uint32_t mask = 0xf;
3049    unsigned i;
3050
3051    DBG("mask=%x\n", mask);
3052
3053    /* TODO: handle dummy_vbo_bound_at */
3054
3055    for (i = 0; mask; mask >>= 1, ++i) {
3056        if (mask & 1) {
3057            if (state->stream[i]) {
3058                unsigned offset;
3059                struct pipe_resource *buf;
3060                struct pipe_box box;
3061                void *userbuf;
3062
3063                vtxbuf = state->vtxbuf[i];
3064                buf = NineVertexBuffer9_GetResource(state->stream[i], &offset);
3065
3066                DBG("Locking %p (offset %d, length %d)\n", buf,
3067                    vtxbuf.buffer_offset, num_vertices * vtxbuf.stride);
3068
3069                u_box_1d(vtxbuf.buffer_offset + offset + start_vertice * vtxbuf.stride,
3070                         num_vertices * vtxbuf.stride, &box);
3071
3072                userbuf = pipe->transfer_map(pipe, buf, 0, PIPE_TRANSFER_READ, &box,
3073                                             &(sw_internal->transfers_so[i]));
3074                vtxbuf.is_user_buffer = true;
3075                vtxbuf.buffer.user = userbuf;
3076
3077                if (!device->driver_caps.user_sw_vbufs) {
3078                    vtxbuf.buffer.resource = NULL;
3079                    vtxbuf.is_user_buffer = false;
3080                    u_upload_data(device->pipe_sw->stream_uploader,
3081                                  0,
3082                                  box.width,
3083                                  16,
3084                                  userbuf,
3085                                  &(vtxbuf.buffer_offset),
3086                                  &(vtxbuf.buffer.resource));
3087                    u_upload_unmap(device->pipe_sw->stream_uploader);
3088                }
3089                pipe_sw->set_vertex_buffers(pipe_sw, i, 1, &vtxbuf);
3090                pipe_vertex_buffer_unreference(&vtxbuf);
3091            } else
3092                pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL);
3093        }
3094    }
3095    nine_context_get_pipe_release(device);
3096}
3097
3098static void
3099update_vs_constants_sw(struct NineDevice9 *device)
3100{
3101    struct nine_state *state = &device->state;
3102    struct pipe_context *pipe_sw = device->pipe_sw;
3103
3104    DBG("updating\n");
3105
3106    {
3107        struct pipe_constant_buffer cb;
3108        const void *buf;
3109
3110        cb.buffer = NULL;
3111        cb.buffer_offset = 0;
3112        cb.buffer_size = 4096 * sizeof(float[4]);
3113        cb.user_buffer = state->vs_const_f;
3114
3115        if (state->vs->lconstf.ranges) {
3116            const struct nine_lconstf *lconstf =  &device->state.vs->lconstf;
3117            const struct nine_range *r = lconstf->ranges;
3118            unsigned n = 0;
3119            float *dst = device->state.vs_lconstf_temp;
3120            float *src = (float *)cb.user_buffer;
3121            memcpy(dst, src, 8192 * sizeof(float[4]));
3122            while (r) {
3123                unsigned p = r->bgn;
3124                unsigned c = r->end - r->bgn;
3125                memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
3126                n += c;
3127                r = r->next;
3128            }
3129            cb.user_buffer = dst;
3130        }
3131
3132        buf = cb.user_buffer;
3133
3134        pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 0, &cb);
3135        if (cb.buffer)
3136            pipe_resource_reference(&cb.buffer, NULL);
3137
3138        cb.user_buffer = (char *)buf + 4096 * sizeof(float[4]);
3139
3140        pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 1, &cb);
3141        if (cb.buffer)
3142            pipe_resource_reference(&cb.buffer, NULL);
3143    }
3144
3145    {
3146        struct pipe_constant_buffer cb;
3147
3148        cb.buffer = NULL;
3149        cb.buffer_offset = 0;
3150        cb.buffer_size = 2048 * sizeof(float[4]);
3151        cb.user_buffer = state->vs_const_i;
3152
3153        pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 2, &cb);
3154        if (cb.buffer)
3155            pipe_resource_reference(&cb.buffer, NULL);
3156    }
3157
3158    {
3159        struct pipe_constant_buffer cb;
3160
3161        cb.buffer = NULL;
3162        cb.buffer_offset = 0;
3163        cb.buffer_size = 512 * sizeof(float[4]);
3164        cb.user_buffer = state->vs_const_b;
3165
3166        pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 3, &cb);
3167        if (cb.buffer)
3168            pipe_resource_reference(&cb.buffer, NULL);
3169    }
3170
3171    {
3172        struct pipe_constant_buffer cb;
3173        const D3DVIEWPORT9 *vport = &device->state.viewport;
3174        float viewport_data[8] = {(float)vport->Width * 0.5f,
3175            (float)vport->Height * -0.5f, vport->MaxZ - vport->MinZ, 0.f,
3176            (float)vport->Width * 0.5f + (float)vport->X,
3177            (float)vport->Height * 0.5f + (float)vport->Y,
3178            vport->MinZ, 0.f};
3179
3180        cb.buffer = NULL;
3181        cb.buffer_offset = 0;
3182        cb.buffer_size = 2 * sizeof(float[4]);
3183        cb.user_buffer = viewport_data;
3184
3185        {
3186            u_upload_data(device->pipe_sw->const_uploader,
3187                          0,
3188                          cb.buffer_size,
3189                          16,
3190                          cb.user_buffer,
3191                          &(cb.buffer_offset),
3192                          &(cb.buffer));
3193            u_upload_unmap(device->pipe_sw->const_uploader);
3194            cb.user_buffer = NULL;
3195        }
3196
3197        pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 4, &cb);
3198        if (cb.buffer)
3199            pipe_resource_reference(&cb.buffer, NULL);
3200    }
3201
3202}
3203
3204void
3205nine_state_prepare_draw_sw(struct NineDevice9 *device, struct NineVertexDeclaration9 *vdecl_out,
3206                           int start_vertice, int num_vertices, struct pipe_stream_output_info *so)
3207{
3208    struct nine_state *state = &device->state;
3209    bool programmable_vs = state->vs && !(state->vdecl && state->vdecl->position_t);
3210    struct NineVertexShader9 *vs = programmable_vs ? device->state.vs : device->ff.vs;
3211
3212    assert(programmable_vs);
3213
3214    DBG("Preparing draw\n");
3215    cso_set_vertex_shader_handle(device->cso_sw,
3216                                 NineVertexShader9_GetVariantProcessVertices(vs, vdecl_out, so));
3217    update_vertex_elements_sw(device);
3218    update_vertex_buffers_sw(device, start_vertice, num_vertices);
3219    update_vs_constants_sw(device);
3220    DBG("Preparation succeeded\n");
3221}
3222
3223void
3224nine_state_after_draw_sw(struct NineDevice9 *device)
3225{
3226    struct nine_state_sw_internal *sw_internal = &device->state_sw_internal;
3227    struct pipe_context *pipe = nine_context_get_pipe_acquire(device);
3228    struct pipe_context *pipe_sw = device->pipe_sw;
3229    int i;
3230
3231    for (i = 0; i < 4; i++) {
3232        pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL);
3233        if (sw_internal->transfers_so[i])
3234            pipe->transfer_unmap(pipe, sw_internal->transfers_so[i]);
3235        sw_internal->transfers_so[i] = NULL;
3236    }
3237    nine_context_get_pipe_release(device);
3238}
3239
3240void
3241nine_state_destroy_sw(struct NineDevice9 *device)
3242{
3243    (void) device;
3244    /* Everything destroyed with cso */
3245}
3246
3247/*
3248static const DWORD nine_render_states_pixel[] =
3249{
3250    D3DRS_ALPHABLENDENABLE,
3251    D3DRS_ALPHAFUNC,
3252    D3DRS_ALPHAREF,
3253    D3DRS_ALPHATESTENABLE,
3254    D3DRS_ANTIALIASEDLINEENABLE,
3255    D3DRS_BLENDFACTOR,
3256    D3DRS_BLENDOP,
3257    D3DRS_BLENDOPALPHA,
3258    D3DRS_CCW_STENCILFAIL,
3259    D3DRS_CCW_STENCILPASS,
3260    D3DRS_CCW_STENCILZFAIL,
3261    D3DRS_COLORWRITEENABLE,
3262    D3DRS_COLORWRITEENABLE1,
3263    D3DRS_COLORWRITEENABLE2,
3264    D3DRS_COLORWRITEENABLE3,
3265    D3DRS_DEPTHBIAS,
3266    D3DRS_DESTBLEND,
3267    D3DRS_DESTBLENDALPHA,
3268    D3DRS_DITHERENABLE,
3269    D3DRS_FILLMODE,
3270    D3DRS_FOGDENSITY,
3271    D3DRS_FOGEND,
3272    D3DRS_FOGSTART,
3273    D3DRS_LASTPIXEL,
3274    D3DRS_SCISSORTESTENABLE,
3275    D3DRS_SEPARATEALPHABLENDENABLE,
3276    D3DRS_SHADEMODE,
3277    D3DRS_SLOPESCALEDEPTHBIAS,
3278    D3DRS_SRCBLEND,
3279    D3DRS_SRCBLENDALPHA,
3280    D3DRS_SRGBWRITEENABLE,
3281    D3DRS_STENCILENABLE,
3282    D3DRS_STENCILFAIL,
3283    D3DRS_STENCILFUNC,
3284    D3DRS_STENCILMASK,
3285    D3DRS_STENCILPASS,
3286    D3DRS_STENCILREF,
3287    D3DRS_STENCILWRITEMASK,
3288    D3DRS_STENCILZFAIL,
3289    D3DRS_TEXTUREFACTOR,
3290    D3DRS_TWOSIDEDSTENCILMODE,
3291    D3DRS_WRAP0,
3292    D3DRS_WRAP1,
3293    D3DRS_WRAP10,
3294    D3DRS_WRAP11,
3295    D3DRS_WRAP12,
3296    D3DRS_WRAP13,
3297    D3DRS_WRAP14,
3298    D3DRS_WRAP15,
3299    D3DRS_WRAP2,
3300    D3DRS_WRAP3,
3301    D3DRS_WRAP4,
3302    D3DRS_WRAP5,
3303    D3DRS_WRAP6,
3304    D3DRS_WRAP7,
3305    D3DRS_WRAP8,
3306    D3DRS_WRAP9,
3307    D3DRS_ZENABLE,
3308    D3DRS_ZFUNC,
3309    D3DRS_ZWRITEENABLE
3310};
3311*/
3312const uint32_t nine_render_states_pixel[(NINED3DRS_LAST + 31) / 32] =
3313{
3314    0x0f99c380, 0x1ff00070, 0x00000000, 0x00000000,
3315    0x000000ff, 0xde01c900, 0x0003ffcf
3316};
3317
3318/*
3319static const DWORD nine_render_states_vertex[] =
3320{
3321    D3DRS_ADAPTIVETESS_W,
3322    D3DRS_ADAPTIVETESS_X,
3323    D3DRS_ADAPTIVETESS_Y,
3324    D3DRS_ADAPTIVETESS_Z,
3325    D3DRS_AMBIENT,
3326    D3DRS_AMBIENTMATERIALSOURCE,
3327    D3DRS_CLIPPING,
3328    D3DRS_CLIPPLANEENABLE,
3329    D3DRS_COLORVERTEX,
3330    D3DRS_CULLMODE,
3331    D3DRS_DIFFUSEMATERIALSOURCE,
3332    D3DRS_EMISSIVEMATERIALSOURCE,
3333    D3DRS_ENABLEADAPTIVETESSELLATION,
3334    D3DRS_FOGCOLOR,
3335    D3DRS_FOGDENSITY,
3336    D3DRS_FOGENABLE,
3337    D3DRS_FOGEND,
3338    D3DRS_FOGSTART,
3339    D3DRS_FOGTABLEMODE,
3340    D3DRS_FOGVERTEXMODE,
3341    D3DRS_INDEXEDVERTEXBLENDENABLE,
3342    D3DRS_LIGHTING,
3343    D3DRS_LOCALVIEWER,
3344    D3DRS_MAXTESSELLATIONLEVEL,
3345    D3DRS_MINTESSELLATIONLEVEL,
3346    D3DRS_MULTISAMPLEANTIALIAS,
3347    D3DRS_MULTISAMPLEMASK,
3348    D3DRS_NORMALDEGREE,
3349    D3DRS_NORMALIZENORMALS,
3350    D3DRS_PATCHEDGESTYLE,
3351    D3DRS_POINTSCALE_A,
3352    D3DRS_POINTSCALE_B,
3353    D3DRS_POINTSCALE_C,
3354    D3DRS_POINTSCALEENABLE,
3355    D3DRS_POINTSIZE,
3356    D3DRS_POINTSIZE_MAX,
3357    D3DRS_POINTSIZE_MIN,
3358    D3DRS_POINTSPRITEENABLE,
3359    D3DRS_POSITIONDEGREE,
3360    D3DRS_RANGEFOGENABLE,
3361    D3DRS_SHADEMODE,
3362    D3DRS_SPECULARENABLE,
3363    D3DRS_SPECULARMATERIALSOURCE,
3364    D3DRS_TWEENFACTOR,
3365    D3DRS_VERTEXBLEND
3366};
3367*/
3368const uint32_t nine_render_states_vertex[(NINED3DRS_LAST + 31) / 32] =
3369{
3370    0x30400200, 0x0001007c, 0x00000000, 0x00000000,
3371    0xfd9efb00, 0x01fc34cf, 0x00000000
3372};
3373
3374/* TODO: put in the right values */
3375const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
3376{
3377    [D3DRS_ZENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE,
3378    [D3DRS_FILLMODE] = NINE_STATE_RASTERIZER,
3379    [D3DRS_SHADEMODE] = NINE_STATE_RASTERIZER,
3380    [D3DRS_ZWRITEENABLE] = NINE_STATE_DSA,
3381    [D3DRS_ALPHATESTENABLE] = NINE_STATE_DSA,
3382    [D3DRS_LASTPIXEL] = NINE_STATE_RASTERIZER,
3383    [D3DRS_SRCBLEND] = NINE_STATE_BLEND,
3384    [D3DRS_DESTBLEND] = NINE_STATE_BLEND,
3385    [D3DRS_CULLMODE] = NINE_STATE_RASTERIZER,
3386    [D3DRS_ZFUNC] = NINE_STATE_DSA,
3387    [D3DRS_ALPHAREF] = NINE_STATE_DSA,
3388    [D3DRS_ALPHAFUNC] = NINE_STATE_DSA,
3389    [D3DRS_DITHERENABLE] = NINE_STATE_BLEND,
3390    [D3DRS_ALPHABLENDENABLE] = NINE_STATE_BLEND,
3391    [D3DRS_FOGENABLE] = NINE_STATE_FF_SHADER | NINE_STATE_VS_PARAMS_MISC | NINE_STATE_PS_PARAMS_MISC | NINE_STATE_PS_CONST,
3392    [D3DRS_SPECULARENABLE] = NINE_STATE_FF_LIGHTING,
3393    [D3DRS_FOGCOLOR] = NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3394    [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_SHADER | NINE_STATE_PS_PARAMS_MISC | NINE_STATE_PS_CONST,
3395    [D3DRS_FOGSTART] = NINE_STATE_FF_VS_OTHER | NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3396    [D3DRS_FOGEND] = NINE_STATE_FF_VS_OTHER | NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3397    [D3DRS_FOGDENSITY] = NINE_STATE_FF_VS_OTHER | NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3398    [D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_SHADER,
3399    [D3DRS_STENCILENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE,
3400    [D3DRS_STENCILFAIL] = NINE_STATE_DSA,
3401    [D3DRS_STENCILZFAIL] = NINE_STATE_DSA,
3402    [D3DRS_STENCILPASS] = NINE_STATE_DSA,
3403    [D3DRS_STENCILFUNC] = NINE_STATE_DSA,
3404    [D3DRS_STENCILREF] = NINE_STATE_STENCIL_REF,
3405    [D3DRS_STENCILMASK] = NINE_STATE_DSA,
3406    [D3DRS_STENCILWRITEMASK] = NINE_STATE_DSA,
3407    [D3DRS_TEXTUREFACTOR] = NINE_STATE_FF_PS_CONSTS,
3408    [D3DRS_WRAP0] = NINE_STATE_UNHANDLED, /* cylindrical wrap is crazy */
3409    [D3DRS_WRAP1] = NINE_STATE_UNHANDLED,
3410    [D3DRS_WRAP2] = NINE_STATE_UNHANDLED,
3411    [D3DRS_WRAP3] = NINE_STATE_UNHANDLED,
3412    [D3DRS_WRAP4] = NINE_STATE_UNHANDLED,
3413    [D3DRS_WRAP5] = NINE_STATE_UNHANDLED,
3414    [D3DRS_WRAP6] = NINE_STATE_UNHANDLED,
3415    [D3DRS_WRAP7] = NINE_STATE_UNHANDLED,
3416    [D3DRS_CLIPPING] = 0, /* software vertex processing only */
3417    [D3DRS_LIGHTING] = NINE_STATE_FF_LIGHTING,
3418    [D3DRS_AMBIENT] = NINE_STATE_FF_LIGHTING | NINE_STATE_FF_MATERIAL,
3419    [D3DRS_FOGVERTEXMODE] = NINE_STATE_FF_SHADER,
3420    [D3DRS_COLORVERTEX] = NINE_STATE_FF_LIGHTING,
3421    [D3DRS_LOCALVIEWER] = NINE_STATE_FF_LIGHTING,
3422    [D3DRS_NORMALIZENORMALS] = NINE_STATE_FF_SHADER,
3423    [D3DRS_DIFFUSEMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3424    [D3DRS_SPECULARMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3425    [D3DRS_AMBIENTMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3426    [D3DRS_EMISSIVEMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3427    [D3DRS_VERTEXBLEND] = NINE_STATE_FF_SHADER,
3428    [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER,
3429    [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER | NINE_STATE_FF_VS_OTHER,
3430    [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER | NINE_STATE_FF_VS_OTHER | NINE_STATE_VS_PARAMS_MISC,
3431    [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER,
3432    [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_SHADER,
3433    [D3DRS_POINTSCALE_A] = NINE_STATE_FF_VS_OTHER,
3434    [D3DRS_POINTSCALE_B] = NINE_STATE_FF_VS_OTHER,
3435    [D3DRS_POINTSCALE_C] = NINE_STATE_FF_VS_OTHER,
3436    [D3DRS_MULTISAMPLEANTIALIAS] = NINE_STATE_MULTISAMPLE,
3437    [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK,
3438    [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED,
3439    [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED,
3440    [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER | NINE_STATE_FF_VS_OTHER | NINE_STATE_VS_PARAMS_MISC,
3441    [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_SHADER,
3442    [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND,
3443    [D3DRS_TWEENFACTOR] = NINE_STATE_FF_VS_OTHER,
3444    [D3DRS_BLENDOP] = NINE_STATE_BLEND,
3445    [D3DRS_POSITIONDEGREE] = NINE_STATE_UNHANDLED,
3446    [D3DRS_NORMALDEGREE] = NINE_STATE_UNHANDLED,
3447    [D3DRS_SCISSORTESTENABLE] = NINE_STATE_RASTERIZER,
3448    [D3DRS_SLOPESCALEDEPTHBIAS] = NINE_STATE_RASTERIZER,
3449    [D3DRS_ANTIALIASEDLINEENABLE] = NINE_STATE_RASTERIZER,
3450    [D3DRS_MINTESSELLATIONLEVEL] = NINE_STATE_UNHANDLED,
3451    [D3DRS_MAXTESSELLATIONLEVEL] = NINE_STATE_UNHANDLED,
3452    [D3DRS_ADAPTIVETESS_X] = NINE_STATE_UNHANDLED,
3453    [D3DRS_ADAPTIVETESS_Y] = NINE_STATE_UNHANDLED,
3454    [D3DRS_ADAPTIVETESS_Z] = NINE_STATE_UNHANDLED,
3455    [D3DRS_ADAPTIVETESS_W] = NINE_STATE_UNHANDLED,
3456    [D3DRS_ENABLEADAPTIVETESSELLATION] = NINE_STATE_UNHANDLED,
3457    [D3DRS_TWOSIDEDSTENCILMODE] = NINE_STATE_DSA,
3458    [D3DRS_CCW_STENCILFAIL] = NINE_STATE_DSA,
3459    [D3DRS_CCW_STENCILZFAIL] = NINE_STATE_DSA,
3460    [D3DRS_CCW_STENCILPASS] = NINE_STATE_DSA,
3461    [D3DRS_CCW_STENCILFUNC] = NINE_STATE_DSA,
3462    [D3DRS_COLORWRITEENABLE1] = NINE_STATE_BLEND,
3463    [D3DRS_COLORWRITEENABLE2] = NINE_STATE_BLEND,
3464    [D3DRS_COLORWRITEENABLE3] = NINE_STATE_BLEND,
3465    [D3DRS_BLENDFACTOR] = NINE_STATE_BLEND_COLOR,
3466    [D3DRS_SRGBWRITEENABLE] = NINE_STATE_FB,
3467    [D3DRS_DEPTHBIAS] = NINE_STATE_RASTERIZER,
3468    [D3DRS_WRAP8] = NINE_STATE_UNHANDLED, /* cylwrap has to be done via GP */
3469    [D3DRS_WRAP9] = NINE_STATE_UNHANDLED,
3470    [D3DRS_WRAP10] = NINE_STATE_UNHANDLED,
3471    [D3DRS_WRAP11] = NINE_STATE_UNHANDLED,
3472    [D3DRS_WRAP12] = NINE_STATE_UNHANDLED,
3473    [D3DRS_WRAP13] = NINE_STATE_UNHANDLED,
3474    [D3DRS_WRAP14] = NINE_STATE_UNHANDLED,
3475    [D3DRS_WRAP15] = NINE_STATE_UNHANDLED,
3476    [D3DRS_SEPARATEALPHABLENDENABLE] = NINE_STATE_BLEND,
3477    [D3DRS_SRCBLENDALPHA] = NINE_STATE_BLEND,
3478    [D3DRS_DESTBLENDALPHA] = NINE_STATE_BLEND,
3479    [D3DRS_BLENDOPALPHA] = NINE_STATE_BLEND
3480};
3481
3482/* Misc */
3483
3484static D3DMATRIX nine_state_identity = { .m[0] = { 1, 0, 0, 0 },
3485                                         .m[1] = { 0, 1, 0, 0 },
3486                                         .m[2] = { 0, 0, 1, 0 },
3487                                         .m[3] = { 0, 0, 0, 1 } };
3488
3489void
3490nine_state_resize_transform(struct nine_ff_state *ff_state, unsigned N)
3491{
3492    unsigned n = ff_state->num_transforms;
3493
3494    if (N <= n)
3495        return;
3496
3497    ff_state->transform = REALLOC(ff_state->transform,
3498                                  n * sizeof(D3DMATRIX),
3499                                  N * sizeof(D3DMATRIX));
3500    for (; n < N; ++n)
3501        ff_state->transform[n] = nine_state_identity;
3502    ff_state->num_transforms = N;
3503}
3504
3505D3DMATRIX *
3506nine_state_access_transform(struct nine_ff_state *ff_state, D3DTRANSFORMSTATETYPE t,
3507                            boolean alloc)
3508{
3509    unsigned index;
3510
3511    switch (t) {
3512    case D3DTS_VIEW: index = 0; break;
3513    case D3DTS_PROJECTION: index = 1; break;
3514    case D3DTS_TEXTURE0: index = 2; break;
3515    case D3DTS_TEXTURE1: index = 3; break;
3516    case D3DTS_TEXTURE2: index = 4; break;
3517    case D3DTS_TEXTURE3: index = 5; break;
3518    case D3DTS_TEXTURE4: index = 6; break;
3519    case D3DTS_TEXTURE5: index = 7; break;
3520    case D3DTS_TEXTURE6: index = 8; break;
3521    case D3DTS_TEXTURE7: index = 9; break;
3522    default:
3523        if (!(t >= D3DTS_WORLDMATRIX(0) && t <= D3DTS_WORLDMATRIX(255)))
3524            return NULL;
3525        index = 10 + (t - D3DTS_WORLDMATRIX(0));
3526        break;
3527    }
3528
3529    if (index >= ff_state->num_transforms) {
3530        if (!alloc)
3531            return &nine_state_identity;
3532        nine_state_resize_transform(ff_state, index + 1);
3533    }
3534    return &ff_state->transform[index];
3535}
3536
3537HRESULT
3538nine_state_set_light(struct nine_ff_state *ff_state, DWORD Index,
3539                     const D3DLIGHT9 *pLight)
3540{
3541    if (Index >= ff_state->num_lights) {
3542        unsigned n = ff_state->num_lights;
3543        unsigned N = Index + 1;
3544
3545        ff_state->light = REALLOC(ff_state->light, n * sizeof(D3DLIGHT9),
3546                                                   N * sizeof(D3DLIGHT9));
3547        if (!ff_state->light)
3548            return E_OUTOFMEMORY;
3549        ff_state->num_lights = N;
3550
3551        for (; n < Index; ++n) {
3552            memset(&ff_state->light[n], 0, sizeof(D3DLIGHT9));
3553            ff_state->light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
3554        }
3555    }
3556    ff_state->light[Index] = *pLight;
3557
3558    if (pLight->Type == D3DLIGHT_SPOT && pLight->Theta >= pLight->Phi) {
3559        DBG("Warning: clamping D3DLIGHT9.Theta\n");
3560        ff_state->light[Index].Theta = ff_state->light[Index].Phi;
3561    }
3562    return D3D_OK;
3563}
3564
3565HRESULT
3566nine_state_light_enable(struct nine_ff_state *ff_state,
3567                        DWORD Index, BOOL Enable)
3568{
3569    unsigned i;
3570
3571    user_assert(Index < ff_state->num_lights, D3DERR_INVALIDCALL);
3572
3573    for (i = 0; i < ff_state->num_lights_active; ++i) {
3574        if (ff_state->active_light[i] == Index)
3575            break;
3576    }
3577
3578    if (Enable) {
3579        if (i < ff_state->num_lights_active)
3580            return D3D_OK;
3581        /* XXX wine thinks this should still succeed:
3582         */
3583        user_assert(i < NINE_MAX_LIGHTS_ACTIVE, D3DERR_INVALIDCALL);
3584
3585        ff_state->active_light[i] = Index;
3586        ff_state->num_lights_active++;
3587    } else {
3588        if (i == ff_state->num_lights_active)
3589            return D3D_OK;
3590        --ff_state->num_lights_active;
3591        for (; i < ff_state->num_lights_active; ++i)
3592            ff_state->active_light[i] = ff_state->active_light[i + 1];
3593    }
3594
3595    return D3D_OK;
3596}
3597
3598#define D3DRS_TO_STRING_CASE(n) case D3DRS_##n: return "D3DRS_"#n
3599const char *nine_d3drs_to_string(DWORD State)
3600{
3601    switch (State) {
3602    D3DRS_TO_STRING_CASE(ZENABLE);
3603    D3DRS_TO_STRING_CASE(FILLMODE);
3604    D3DRS_TO_STRING_CASE(SHADEMODE);
3605    D3DRS_TO_STRING_CASE(ZWRITEENABLE);
3606    D3DRS_TO_STRING_CASE(ALPHATESTENABLE);
3607    D3DRS_TO_STRING_CASE(LASTPIXEL);
3608    D3DRS_TO_STRING_CASE(SRCBLEND);
3609    D3DRS_TO_STRING_CASE(DESTBLEND);
3610    D3DRS_TO_STRING_CASE(CULLMODE);
3611    D3DRS_TO_STRING_CASE(ZFUNC);
3612    D3DRS_TO_STRING_CASE(ALPHAREF);
3613    D3DRS_TO_STRING_CASE(ALPHAFUNC);
3614    D3DRS_TO_STRING_CASE(DITHERENABLE);
3615    D3DRS_TO_STRING_CASE(ALPHABLENDENABLE);
3616    D3DRS_TO_STRING_CASE(FOGENABLE);
3617    D3DRS_TO_STRING_CASE(SPECULARENABLE);
3618    D3DRS_TO_STRING_CASE(FOGCOLOR);
3619    D3DRS_TO_STRING_CASE(FOGTABLEMODE);
3620    D3DRS_TO_STRING_CASE(FOGSTART);
3621    D3DRS_TO_STRING_CASE(FOGEND);
3622    D3DRS_TO_STRING_CASE(FOGDENSITY);
3623    D3DRS_TO_STRING_CASE(RANGEFOGENABLE);
3624    D3DRS_TO_STRING_CASE(STENCILENABLE);
3625    D3DRS_TO_STRING_CASE(STENCILFAIL);
3626    D3DRS_TO_STRING_CASE(STENCILZFAIL);
3627    D3DRS_TO_STRING_CASE(STENCILPASS);
3628    D3DRS_TO_STRING_CASE(STENCILFUNC);
3629    D3DRS_TO_STRING_CASE(STENCILREF);
3630    D3DRS_TO_STRING_CASE(STENCILMASK);
3631    D3DRS_TO_STRING_CASE(STENCILWRITEMASK);
3632    D3DRS_TO_STRING_CASE(TEXTUREFACTOR);
3633    D3DRS_TO_STRING_CASE(WRAP0);
3634    D3DRS_TO_STRING_CASE(WRAP1);
3635    D3DRS_TO_STRING_CASE(WRAP2);
3636    D3DRS_TO_STRING_CASE(WRAP3);
3637    D3DRS_TO_STRING_CASE(WRAP4);
3638    D3DRS_TO_STRING_CASE(WRAP5);
3639    D3DRS_TO_STRING_CASE(WRAP6);
3640    D3DRS_TO_STRING_CASE(WRAP7);
3641    D3DRS_TO_STRING_CASE(CLIPPING);
3642    D3DRS_TO_STRING_CASE(LIGHTING);
3643    D3DRS_TO_STRING_CASE(AMBIENT);
3644    D3DRS_TO_STRING_CASE(FOGVERTEXMODE);
3645    D3DRS_TO_STRING_CASE(COLORVERTEX);
3646    D3DRS_TO_STRING_CASE(LOCALVIEWER);
3647    D3DRS_TO_STRING_CASE(NORMALIZENORMALS);
3648    D3DRS_TO_STRING_CASE(DIFFUSEMATERIALSOURCE);
3649    D3DRS_TO_STRING_CASE(SPECULARMATERIALSOURCE);
3650    D3DRS_TO_STRING_CASE(AMBIENTMATERIALSOURCE);
3651    D3DRS_TO_STRING_CASE(EMISSIVEMATERIALSOURCE);
3652    D3DRS_TO_STRING_CASE(VERTEXBLEND);
3653    D3DRS_TO_STRING_CASE(CLIPPLANEENABLE);
3654    D3DRS_TO_STRING_CASE(POINTSIZE);
3655    D3DRS_TO_STRING_CASE(POINTSIZE_MIN);
3656    D3DRS_TO_STRING_CASE(POINTSPRITEENABLE);
3657    D3DRS_TO_STRING_CASE(POINTSCALEENABLE);
3658    D3DRS_TO_STRING_CASE(POINTSCALE_A);
3659    D3DRS_TO_STRING_CASE(POINTSCALE_B);
3660    D3DRS_TO_STRING_CASE(POINTSCALE_C);
3661    D3DRS_TO_STRING_CASE(MULTISAMPLEANTIALIAS);
3662    D3DRS_TO_STRING_CASE(MULTISAMPLEMASK);
3663    D3DRS_TO_STRING_CASE(PATCHEDGESTYLE);
3664    D3DRS_TO_STRING_CASE(DEBUGMONITORTOKEN);
3665    D3DRS_TO_STRING_CASE(POINTSIZE_MAX);
3666    D3DRS_TO_STRING_CASE(INDEXEDVERTEXBLENDENABLE);
3667    D3DRS_TO_STRING_CASE(COLORWRITEENABLE);
3668    D3DRS_TO_STRING_CASE(TWEENFACTOR);
3669    D3DRS_TO_STRING_CASE(BLENDOP);
3670    D3DRS_TO_STRING_CASE(POSITIONDEGREE);
3671    D3DRS_TO_STRING_CASE(NORMALDEGREE);
3672    D3DRS_TO_STRING_CASE(SCISSORTESTENABLE);
3673    D3DRS_TO_STRING_CASE(SLOPESCALEDEPTHBIAS);
3674    D3DRS_TO_STRING_CASE(ANTIALIASEDLINEENABLE);
3675    D3DRS_TO_STRING_CASE(MINTESSELLATIONLEVEL);
3676    D3DRS_TO_STRING_CASE(MAXTESSELLATIONLEVEL);
3677    D3DRS_TO_STRING_CASE(ADAPTIVETESS_X);
3678    D3DRS_TO_STRING_CASE(ADAPTIVETESS_Y);
3679    D3DRS_TO_STRING_CASE(ADAPTIVETESS_Z);
3680    D3DRS_TO_STRING_CASE(ADAPTIVETESS_W);
3681    D3DRS_TO_STRING_CASE(ENABLEADAPTIVETESSELLATION);
3682    D3DRS_TO_STRING_CASE(TWOSIDEDSTENCILMODE);
3683    D3DRS_TO_STRING_CASE(CCW_STENCILFAIL);
3684    D3DRS_TO_STRING_CASE(CCW_STENCILZFAIL);
3685    D3DRS_TO_STRING_CASE(CCW_STENCILPASS);
3686    D3DRS_TO_STRING_CASE(CCW_STENCILFUNC);
3687    D3DRS_TO_STRING_CASE(COLORWRITEENABLE1);
3688    D3DRS_TO_STRING_CASE(COLORWRITEENABLE2);
3689    D3DRS_TO_STRING_CASE(COLORWRITEENABLE3);
3690    D3DRS_TO_STRING_CASE(BLENDFACTOR);
3691    D3DRS_TO_STRING_CASE(SRGBWRITEENABLE);
3692    D3DRS_TO_STRING_CASE(DEPTHBIAS);
3693    D3DRS_TO_STRING_CASE(WRAP8);
3694    D3DRS_TO_STRING_CASE(WRAP9);
3695    D3DRS_TO_STRING_CASE(WRAP10);
3696    D3DRS_TO_STRING_CASE(WRAP11);
3697    D3DRS_TO_STRING_CASE(WRAP12);
3698    D3DRS_TO_STRING_CASE(WRAP13);
3699    D3DRS_TO_STRING_CASE(WRAP14);
3700    D3DRS_TO_STRING_CASE(WRAP15);
3701    D3DRS_TO_STRING_CASE(SEPARATEALPHABLENDENABLE);
3702    D3DRS_TO_STRING_CASE(SRCBLENDALPHA);
3703    D3DRS_TO_STRING_CASE(DESTBLENDALPHA);
3704    D3DRS_TO_STRING_CASE(BLENDOPALPHA);
3705    default:
3706        return "(invalid)";
3707    }
3708}
3709