r300_fs.c 17.9 KB
Newer Older
1
2
3
/*
 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
 *                Joakim Sindholt <opensource@zhasha.com>
Marek Olšák's avatar
Marek Olšák committed
4
 * Copyright 2009 Marek Olšák <maraeo@gmail.com>
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE. */

25
26
27
#include "util/u_math.h"
#include "util/u_memory.h"

28
#include "tgsi/tgsi_dump.h"
29
#include "tgsi/tgsi_ureg.h"
30

31
#include "r300_cb.h"
32
#include "r300_context.h"
33
#include "r300_emit.h"
34
35
#include "r300_screen.h"
#include "r300_fs.h"
36
#include "r300_reg.h"
37
#include "r300_tgsi_to_rc.h"
38

39
#include "radeon_code.h"
40
#include "radeon_compiler.h"
41

Marek Olšák's avatar
Marek Olšák committed
42
/* Convert info about FS input semantics to r300_shader_semantics. */
43
44
void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
                                struct r300_shader_semantics* fs_inputs)
Marek Olšák's avatar
Marek Olšák committed
45
46
47
48
49
50
51
52
53
54
55
{
    int i;
    unsigned index;

    r300_shader_semantics_reset(fs_inputs);

    for (i = 0; i < info->num_inputs; i++) {
        index = info->input_semantic_index[i];

        switch (info->input_semantic_name[i]) {
            case TGSI_SEMANTIC_COLOR:
56
                assert(index < ATTR_COLOR_COUNT);
Marek Olšák's avatar
Marek Olšák committed
57
58
59
60
                fs_inputs->color[index] = i;
                break;

            case TGSI_SEMANTIC_GENERIC:
61
                assert(index < ATTR_GENERIC_COUNT);
Marek Olšák's avatar
Marek Olšák committed
62
63
64
65
66
67
68
69
                fs_inputs->generic[index] = i;
                break;

            case TGSI_SEMANTIC_FOG:
                assert(index == 0);
                fs_inputs->fog = i;
                break;

Marek Olšák's avatar
Marek Olšák committed
70
71
72
73
74
            case TGSI_SEMANTIC_POSITION:
                assert(index == 0);
                fs_inputs->wpos = i;
                break;

75
76
77
78
79
            case TGSI_SEMANTIC_FACE:
                assert(index == 0);
                fs_inputs->face = i;
                break;

Marek Olšák's avatar
Marek Olšák committed
80
            default:
81
82
                fprintf(stderr, "r300: FP: Unknown input semantic: %i\n",
                        info->input_semantic_name[i]);
Marek Olšák's avatar
Marek Olšák committed
83
84
85
86
        }
    }
}

87
static void find_output_registers(struct r300_fragment_program_compiler * compiler,
88
                                  struct r300_fragment_shader_code *shader)
89
{
90
    unsigned i, colorbuf_count = 0;
91

92
    /* Mark the outputs as not present initially */
93
94
95
96
97
    compiler->OutputColor[0] = shader->info.num_outputs;
    compiler->OutputColor[1] = shader->info.num_outputs;
    compiler->OutputColor[2] = shader->info.num_outputs;
    compiler->OutputColor[3] = shader->info.num_outputs;
    compiler->OutputDepth = shader->info.num_outputs;
98

99
    /* Now see where they really are. */
100
101
    for(i = 0; i < shader->info.num_outputs; ++i) {
        switch(shader->info.output_semantic_name[i]) {
102
            case TGSI_SEMANTIC_COLOR:
103
104
                compiler->OutputColor[colorbuf_count] = i;
                colorbuf_count++;
105
106
107
                break;
            case TGSI_SEMANTIC_POSITION:
                compiler->OutputDepth = i;
108
                break;
109
110
111
112
113
114
115
116
117
        }
    }
}

static void allocate_hardware_inputs(
    struct r300_fragment_program_compiler * c,
    void (*allocate)(void * data, unsigned input, unsigned hwreg),
    void * mydata)
{
118
    struct r300_shader_semantics* inputs =
119
        (struct r300_shader_semantics*)c->UserData;
120
121
122
123
124
125
    int i, reg = 0;

    /* Allocate input registers. */
    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
        if (inputs->color[i] != ATTR_UNUSED) {
            allocate(mydata, inputs->color[i], reg++);
126
127
        }
    }
128
129
130
    if (inputs->face != ATTR_UNUSED) {
        allocate(mydata, inputs->face, reg++);
    }
131
132
133
    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
        if (inputs->generic[i] != ATTR_UNUSED) {
            allocate(mydata, inputs->generic[i], reg++);
134
        }
135
    }
136
137
138
    if (inputs->fog != ATTR_UNUSED) {
        allocate(mydata, inputs->fog, reg++);
    }
Marek Olšák's avatar
Marek Olšák committed
139
140
141
    if (inputs->wpos != ATTR_UNUSED) {
        allocate(mydata, inputs->wpos, reg++);
    }
142
143
}

144
static void get_external_state(
145
    struct r300_context* r300,
146
    struct r300_fragment_program_external_state* state)
147
{
148
149
    struct r300_textures_state *texstate = r300->textures_state.state;
    unsigned i;
150
    unsigned char *swizzle;
151

152
    for (i = 0; i < texstate->sampler_state_count; i++) {
153
        struct r300_sampler_state* s = texstate->sampler_states[i];
154

155
        if (!s) {
156
157
158
159
            continue;
        }

        if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
160
161
            state->unit[i].compare_mode_enabled = 1;

162
163
164
165
166
167
168
169
170
171
            /* Pass depth texture swizzling to the compiler. */
            if (texstate->sampler_views[i]) {
                swizzle = texstate->sampler_views[i]->swizzle;

                state->unit[i].depth_texture_swizzle =
                    RC_MAKE_SWIZZLE(swizzle[0], swizzle[1],
                                    swizzle[2], swizzle[3]);
            } else {
                state->unit[i].depth_texture_swizzle = RC_SWIZZLE_XYZW;
            }
172
173
174
175

            /* Fortunately, no need to translate this. */
            state->unit[i].texture_compare_func = s->state.compare_func;
        }
176

177
178
        state->unit[i].non_normalized_coords = !s->state.normalized_coords;

179
180
181
182
        if (texstate->sampler_views[i]) {
            struct r300_texture *t;
            t = (struct r300_texture*)texstate->sampler_views[i]->base.texture;

183
            /* XXX this should probably take into account STR, not just S. */
184
            if (t->desc.is_npot) {
185
186
187
                switch (s->state.wrap_s) {
                    case PIPE_TEX_WRAP_REPEAT:
                        state->unit[i].wrap_mode = RC_WRAP_REPEAT;
188
                        state->unit[i].fake_npot = TRUE;
189
190
191
                        break;

                    case PIPE_TEX_WRAP_MIRROR_REPEAT:
192
193
194
195
                        state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT;
                        state->unit[i].fake_npot = TRUE;
                        break;

196
197
198
                    case PIPE_TEX_WRAP_MIRROR_CLAMP:
                    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
                    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
199
                        state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP;
200
201
202
203
204
205
206
207
                        state->unit[i].fake_npot = TRUE;
                        break;

                    default:
                        state->unit[i].wrap_mode = RC_WRAP_NONE;
                        break;
                }
            }
208
        }
209
210
211
212
    }
}

static void r300_translate_fragment_shader(
213
214
215
216
217
    struct r300_context* r300,
    struct r300_fragment_shader_code* shader,
    const struct tgsi_token *tokens);

static void r300_dummy_fragment_shader(
218
219
    struct r300_context* r300,
    struct r300_fragment_shader_code* shader)
220
{
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
    struct pipe_shader_state state;
    struct ureg_program *ureg;
    struct ureg_dst out;
    struct ureg_src imm;

    /* Make a simple fragment shader which outputs (0, 0, 0, 1) */
    ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
    imm = ureg_imm4f(ureg, 0, 0, 0, 1);

    ureg_MOV(ureg, out, imm);
    ureg_END(ureg);

    state.tokens = ureg_finalize(ureg);

    shader->dummy = TRUE;
    r300_translate_fragment_shader(r300, shader, state.tokens);

    ureg_destroy(ureg);
}

242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
static void r300_emit_fs_code_to_buffer(
    struct r300_context *r300,
    struct r300_fragment_shader_code *shader)
{
    struct rX00_fragment_program_code *generic_code = &shader->code;
    unsigned imm_count = shader->immediates_count;
    unsigned imm_first = shader->externals_count;
    unsigned imm_end = generic_code->constants.Count;
    struct rc_constant *constants = generic_code->constants.Constants;
    unsigned i;
    CB_LOCALS;

    if (r300->screen->caps.is_r500) {
        struct r500_fragment_program_code *code = &generic_code->code.r500;

257
        shader->cb_code_size = 19 +
258
                               ((code->inst_end + 1) * 6) +
259
                               imm_count * 7 +
Marek Olšák's avatar
Marek Olšák committed
260
                               code->int_constant_count * 2;
261
262
263
264

        NEW_CB(shader->cb_code, shader->cb_code_size);
        OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
        OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx);
265
        OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl);
266
        for(i = 0; i < code->int_constant_count; i++){
Marek Olšák's avatar
Marek Olšák committed
267
268
269
270
                OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4),
                                                code->int_constants[i]);
        }
        OUT_CB_REG(R500_US_CODE_RANGE,
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
                   R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
        OUT_CB_REG(R500_US_CODE_OFFSET, 0);
        OUT_CB_REG(R500_US_CODE_ADDR,
                   R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end));

        OUT_CB_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR);
        OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6);
        for (i = 0; i <= code->inst_end; i++) {
            OUT_CB(code->inst[i].inst0);
            OUT_CB(code->inst[i].inst1);
            OUT_CB(code->inst[i].inst2);
            OUT_CB(code->inst[i].inst3);
            OUT_CB(code->inst[i].inst4);
            OUT_CB(code->inst[i].inst5);
        }

        /* Emit immediates. */
        if (imm_count) {
            for(i = imm_first; i < imm_end; ++i) {
                if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
                    const float *data = constants[i].u.Immediate;

                    OUT_CB_REG(R500_GA_US_VECTOR_INDEX,
                               R500_GA_US_VECTOR_INDEX_TYPE_CONST |
                               (i & R500_GA_US_VECTOR_INDEX_MASK));
                    OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
                    OUT_CB_TABLE(data, 4);
                }
            }
        }
    } else { /* r300 */
        struct r300_fragment_program_code *code = &generic_code->code.r300;

        shader->cb_code_size = 19 +
305
                               (r300->screen->caps.is_r400 ? 2 : 0) +
306
307
308
309
310
                               code->alu.length * 4 +
                               (code->tex.length ? (1 + code->tex.length) : 0) +
                               imm_count * 5;

        NEW_CB(shader->cb_code, shader->cb_code_size);
311
312
313
314

        if (r300->screen->caps.is_r400)
            OUT_CB_REG(R400_US_CODE_BANK, 0);

315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
        OUT_CB_REG(R300_US_CONFIG, code->config);
        OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
        OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);

        OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4);
        OUT_CB_TABLE(code->code_addr, 4);

        OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
        for (i = 0; i < code->alu.length; i++)
            OUT_CB(code->alu.inst[i].rgb_inst);

        OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
        for (i = 0; i < code->alu.length; i++)
            OUT_CB(code->alu.inst[i].rgb_addr);

        OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
        for (i = 0; i < code->alu.length; i++)
            OUT_CB(code->alu.inst[i].alpha_inst);

        OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
        for (i = 0; i < code->alu.length; i++)
            OUT_CB(code->alu.inst[i].alpha_addr);

        if (code->tex.length) {
            OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
            OUT_CB_TABLE(code->tex.inst, code->tex.length);
        }

        /* Emit immediates. */
        if (imm_count) {
            for(i = imm_first; i < imm_end; ++i) {
                if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
                    const float *data = constants[i].u.Immediate;

                    OUT_CB_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);
                    OUT_CB(pack_float24(data[0]));
                    OUT_CB(pack_float24(data[1]));
                    OUT_CB(pack_float24(data[2]));
                    OUT_CB(pack_float24(data[3]));
                }
            }
        }
    }

    OUT_CB_REG(R300_FG_DEPTH_SRC, shader->fg_depth_src);
    OUT_CB_REG(R300_US_W_FMT, shader->us_out_w);
    END_CB;
}

364
365
366
367
368
static void r300_translate_fragment_shader(
    struct r300_context* r300,
    struct r300_fragment_shader_code* shader,
    const struct tgsi_token *tokens)
{
369
370
    struct r300_fragment_program_compiler compiler;
    struct tgsi_to_rc ttr;
371
    int wpos, face;
372
    unsigned i;
373
374
375
376
377

    tgsi_scan_shader(tokens, &shader->info);
    r300_shader_read_fs_inputs(&shader->info, &shader->inputs);

    wpos = shader->inputs.wpos;
378
    face = shader->inputs.face;
379

Marek Olšák's avatar
Marek Olšák committed
380
    /* Setup the compiler. */
381
382
    memset(&compiler, 0, sizeof(compiler));
    rc_init(&compiler.Base);
383
    compiler.Base.Debug = DBG_ON(r300, DBG_FP);
384

385
386
    compiler.code = &shader->code;
    compiler.state = shader->compare_state;
387
388
    compiler.Base.is_r500 = r300->screen->caps.is_r500;
    compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
389
    compiler.AllocateHwInputs = &allocate_hardware_inputs;
390
    compiler.UserData = &shader->inputs;
391

392
    find_output_registers(&compiler, shader);
393
394

    if (compiler.Base.Debug) {
395
        DBG(r300, DBG_FP, "r300: Initial fragment program\n");
396
        tgsi_dump(tokens, 0);
397
398
    }

399
400
    /* Translate TGSI to our internal representation */
    ttr.compiler = &compiler.Base;
401
    ttr.info = &shader->info;
402
    ttr.use_half_swizzles = TRUE;
403

404
    r300_tgsi_to_rc(&ttr, tokens);
405

Marek Olšák's avatar
Marek Olšák committed
406
407
408
409
410
411
412
413
414
415
416
417
    /**
     * Transform the program to support WPOS.
     *
     * Introduce a small fragment at the start of the program that will be
     * the only code that directly reads the WPOS input.
     * All other code pieces that reference that input will be rewritten
     * to read from a newly allocated temporary. */
    if (wpos != ATTR_UNUSED) {
        /* Moving the input to some other reg is not really necessary. */
        rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE);
    }

418
419
420
421
    if (face != ATTR_UNUSED) {
        rc_transform_fragment_face(&compiler.Base, face);
    }

422
423
    /* Invoke the compiler */
    r3xx_compile_fragment_program(&compiler);
424

425
426
427
428
429
430
431
432
    /* Shaders with zero instructions are invalid,
     * use the dummy shader instead. */
    if (shader->code.code.r500.inst_end == -1) {
        rc_destroy(&compiler.Base);
        r300_dummy_fragment_shader(r300, shader);
        return;
    }

433
    if (compiler.Base.Error) {
434
435
        fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
                " instead.\n", compiler.Base.ErrorMsg);
436
437
438
439
440
441

        if (shader->dummy) {
            fprintf(stderr, "r300 FP: Cannot compile the dummy shader! "
                    "Giving up...\n");
            abort();
        }
442
443

        rc_destroy(&compiler.Base);
444
        r300_dummy_fragment_shader(r300, shader);
445
        return;
446
    }
447

448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
    /* Initialize numbers of constants for each type. */
    shader->externals_count = ttr.immediate_offset;
    shader->immediates_count = 0;
    shader->rc_state_count = 0;

    for (i = shader->externals_count; i < shader->code.constants.Count; i++) {
        switch (shader->code.constants.Constants[i].Type) {
            case RC_CONSTANT_IMMEDIATE:
                ++shader->immediates_count;
                break;
            case RC_CONSTANT_STATE:
                ++shader->rc_state_count;
                break;
            default:
                assert(0);
        }
    }

466
467
468
469
470
471
472
473
474
    /* Setup shader depth output. */
    if (shader->code.writes_depth) {
        shader->fg_depth_src = R300_FG_DEPTH_SRC_SHADER;
        shader->us_out_w = R300_W_FMT_W24 | R300_W_SRC_US;
    } else {
        shader->fg_depth_src = R300_FG_DEPTH_SRC_SCAN;
        shader->us_out_w = R300_W_FMT_W0 | R300_W_SRC_US;
    }

475
    /* And, finally... */
476
    rc_destroy(&compiler.Base);
477
478
479

    /* Build the command buffer. */
    r300_emit_fs_code_to_buffer(r300, shader);
480
481
482
483
}

boolean r300_pick_fragment_shader(struct r300_context* r300)
{
Marek Olšák's avatar
Marek Olšák committed
484
    struct r300_fragment_shader* fs = r300_fs(r300);
485
    struct r300_fragment_program_external_state state = {{{ 0 }}};
486
487
    struct r300_fragment_shader_code* ptr;

488
489
    get_external_state(r300, &state);

490
491
492
493
    if (!fs->first) {
        /* Build the fragment shader for the first time. */
        fs->first = fs->shader = CALLOC_STRUCT(r300_fragment_shader_code);

494
495
        memcpy(&fs->shader->compare_state, &state,
            sizeof(struct r300_fragment_program_external_state));
496
        r300_translate_fragment_shader(r300, fs->shader, fs->state.tokens);
497
498
        return TRUE;

499
    } else {
500
501
502
503
504
505
506
        /* Check if the currently-bound shader has been compiled
         * with the texture-compare state we need. */
        if (memcmp(&fs->shader->compare_state, &state, sizeof(state)) != 0) {
            /* Search for the right shader. */
            ptr = fs->first;
            while (ptr) {
                if (memcmp(&ptr->compare_state, &state, sizeof(state)) == 0) {
507
508
509
510
511
512
                    if (fs->shader != ptr) {
                        fs->shader = ptr;
                        return TRUE;
                    }
                    /* The currently-bound one is OK. */
                    return FALSE;
513
514
515
516
517
518
519
520
521
522
                }
                ptr = ptr->next;
            }

            /* Not found, gotta compile a new one. */
            ptr = CALLOC_STRUCT(r300_fragment_shader_code);
            ptr->next = fs->first;
            fs->first = fs->shader = ptr;

            ptr->compare_state = state;
523
            r300_translate_fragment_shader(r300, ptr, fs->state.tokens);
524
525
526
527
528
            return TRUE;
        }
    }

    return FALSE;
529
}