r3xx_vertprog.c 33.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE. */

#include "radeon_compiler.h"

25 26
#include <stdio.h>

27 28
#include "../r300_reg.h"

29
#include "radeon_dataflow.h"
30
#include "radeon_program_alu.h"
31
#include "radeon_swizzle.h"
32
#include "radeon_emulate_branches.h"
33
#include "radeon_emulate_loops.h"
34
#include "radeon_remove_constants.h"
35

36 37 38 39 40
struct loop {
	int BgnLoop;

};

41 42 43 44 45
/*
 * Take an already-setup and valid source then swizzle it appropriately to
 * obtain a constant ZERO or ONE source.
 */
#define __CONST(x, y)	\
46
	(PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]),	\
47 48 49 50
			   t_swizzle(y),	\
			   t_swizzle(y),	\
			   t_swizzle(y),	\
			   t_swizzle(y),	\
51
			   t_src_class(vpi->SrcReg[x].File), \
52
			   RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
53 54


55
static unsigned long t_dst_mask(unsigned int mask)
56
{
57 58
	/* RC_MASK_* is equivalent to VSF_FLAG_* */
	return mask & RC_MASK_XYZW;
59 60
}

61
static unsigned long t_dst_class(rc_register_file file)
62 63
{
	switch (file) {
64 65 66 67
	default:
		fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
		/* fall-through */
	case RC_FILE_TEMPORARY:
68
		return PVS_DST_REG_TEMPORARY;
69
	case RC_FILE_OUTPUT:
70
		return PVS_DST_REG_OUT;
71
	case RC_FILE_ADDRESS:
72 73 74 75 76
		return PVS_DST_REG_A0;
	}
}

static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
77
				 struct rc_dst_register *dst)
78
{
79
	if (dst->File == RC_FILE_OUTPUT)
80 81 82 83 84
		return vp->outputs[dst->Index];

	return dst->Index;
}

85
static unsigned long t_src_class(rc_register_file file)
86 87
{
	switch (file) {
88 89 90
	default:
		fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
		/* fall-through */
91
	case RC_FILE_NONE:
92
	case RC_FILE_TEMPORARY:
93
		return PVS_SRC_REG_TEMPORARY;
94
	case RC_FILE_INPUT:
95
		return PVS_SRC_REG_INPUT;
96
	case RC_FILE_CONSTANT:
97 98 99 100
		return PVS_SRC_REG_CONSTANT;
	}
}

101
static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
102 103 104 105 106
{
	unsigned long aclass = t_src_class(a.File);
	unsigned long bclass = t_src_class(b.File);

	if (aclass != bclass)
107
		return 0;
108
	if (aclass == PVS_SRC_REG_TEMPORARY)
109
		return 0;
110 111

	if (a.RelAddr || b.RelAddr)
112
		return 1;
113
	if (a.Index != b.Index)
114
		return 1;
115

116
	return 0;
117 118
}

119
static inline unsigned long t_swizzle(unsigned int swizzle)
120
{
121
	/* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
122 123 124 125
	return swizzle;
}

static unsigned long t_src_index(struct r300_vertex_program_code *vp,
126
				 struct rc_src_register *src)
127
{
128
	if (src->File == RC_FILE_INPUT) {
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
		assert(vp->inputs[src->Index] != -1);
		return vp->inputs[src->Index];
	} else {
		if (src->Index < 0) {
			fprintf(stderr,
				"negative offsets for indirect addressing do not work.\n");
			return 0;
		}
		return src->Index;
	}
}

/* these two functions should probably be merged... */

static unsigned long t_src(struct r300_vertex_program_code *vp,
144
			   struct rc_src_register *src)
145
{
146
	/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
147 148 149 150 151 152 153 154
	 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
	 */
	return PVS_SRC_OPERAND(t_src_index(vp, src),
			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
			       t_swizzle(GET_SWZ(src->Swizzle, 1)),
			       t_swizzle(GET_SWZ(src->Swizzle, 2)),
			       t_swizzle(GET_SWZ(src->Swizzle, 3)),
			       t_src_class(src->File),
155 156
			       src->Negate) |
	       (src->RelAddr << 4) | (src->Abs << 3);
157 158 159
}

static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
160
				  struct rc_src_register *src)
161
{
162
	/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
163 164 165 166 167 168 169 170
	 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
	 */
	return PVS_SRC_OPERAND(t_src_index(vp, src),
			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
			       t_src_class(src->File),
171
			       src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
172
	       (src->RelAddr << 4) | (src->Abs << 3);
173 174
}

175 176
static int valid_dst(struct r300_vertex_program_code *vp,
			   struct rc_dst_register *dst)
177
{
178 179 180
	if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
		return 0;
	} else if (dst->File == RC_FILE_ADDRESS) {
181 182 183
		assert(dst->Index == 0);
	}

184
	return 1;
185 186
}

187
static void ei_vector1(struct r300_vertex_program_code *vp,
188 189 190
				unsigned int hw_opcode,
				struct rc_sub_instruction *vpi,
				unsigned int * inst)
191
{
192
	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
193 194
				     0,
				     0,
195 196 197
				     t_dst_index(vp, &vpi->DstReg),
				     t_dst_mask(vpi->DstReg.WriteMask),
				     t_dst_class(vpi->DstReg.File));
198
	inst[1] = t_src(vp, &vpi->SrcReg[0]);
199 200
	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
	inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
201 202
}

203
static void ei_vector2(struct r300_vertex_program_code *vp,
204 205 206
				unsigned int hw_opcode,
				struct rc_sub_instruction *vpi,
				unsigned int * inst)
207
{
208
	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
209 210
				     0,
				     0,
211 212 213
				     t_dst_index(vp, &vpi->DstReg),
				     t_dst_mask(vpi->DstReg.WriteMask),
				     t_dst_class(vpi->DstReg.File));
214 215
	inst[1] = t_src(vp, &vpi->SrcReg[0]);
	inst[2] = t_src(vp, &vpi->SrcReg[1]);
216
	inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
217 218
}

219
static void ei_math1(struct r300_vertex_program_code *vp,
220 221 222
				unsigned int hw_opcode,
				struct rc_sub_instruction *vpi,
				unsigned int * inst)
223
{
224
	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
225 226
				     1,
				     0,
227 228 229
				     t_dst_index(vp, &vpi->DstReg),
				     t_dst_mask(vpi->DstReg.WriteMask),
				     t_dst_class(vpi->DstReg.File));
230
	inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
231 232
	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
	inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
233 234
}

235
static void ei_lit(struct r300_vertex_program_code *vp,
236 237
				      struct rc_sub_instruction *vpi,
				      unsigned int * inst)
238 239 240 241
{
	//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}

	inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
242 243
				     1,
				     0,
244 245 246 247
				     t_dst_index(vp, &vpi->DstReg),
				     t_dst_mask(vpi->DstReg.WriteMask),
				     t_dst_class(vpi->DstReg.File));
	/* NOTE: Users swizzling might not work. */
248 249
	inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
250
				  PVS_SRC_SELECT_FORCE_0,	// Z
251 252
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
				  t_src_class(vpi->SrcReg[0].File),
253
				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
254 255 256
	    (vpi->SrcReg[0].RelAddr << 4);
	inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
257
				  PVS_SRC_SELECT_FORCE_0,	// Z
258 259
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
				  t_src_class(vpi->SrcReg[0].File),
260
				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
261 262 263
	    (vpi->SrcReg[0].RelAddr << 4);
	inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
264
				  PVS_SRC_SELECT_FORCE_0,	// Z
265 266
				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
				  t_src_class(vpi->SrcReg[0].File),
267
				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
268
	    (vpi->SrcReg[0].RelAddr << 4);
269 270
}

271
static void ei_mad(struct r300_vertex_program_code *vp,
272 273
				      struct rc_sub_instruction *vpi,
				      unsigned int * inst)
274
{
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
	/* Remarks about hardware limitations of MAD
	 * (please preserve this comment, as this information is _NOT_
	 * in the documentation provided by AMD).
	 *
	 * As described in the documentation, MAD with three unique temporary
	 * source registers requires the use of the macro version.
	 *
	 * However (and this is not mentioned in the documentation), apparently
	 * the macro version is _NOT_ a full superset of the normal version.
	 * In particular, the macro version does not always work when relative
	 * addressing is used in the source operands.
	 *
	 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
	 * assembly shader path when using medium quality animations
	 * (i.e. animations with matrix blending instead of quaternion blending).
	 *
	 * Unfortunately, I (nha) have been unable to extract a Piglit regression
	 * test for this issue - for some reason, it is possible to have vertex
	 * programs whose prefix is *exactly* the same as the prefix of the
	 * offending program in Sauerbraten up to the offending instruction
	 * without causing any trouble.
	 *
	 * Bottom line: Only use the macro version only when really necessary;
	 * according to AMD docs, this should improve performance by one clock
	 * as a nice side bonus.
	 */
301 302 303
	if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
	    vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
	    vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
304 305 306 307
	    vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
	    vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
	    vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
		inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
308 309
				0,
				1,
310 311 312 313 314
				t_dst_index(vp, &vpi->DstReg),
				t_dst_mask(vpi->DstReg.WriteMask),
				t_dst_class(vpi->DstReg.File));
	} else {
		inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
315 316
				0,
				0,
317 318 319 320
				t_dst_index(vp, &vpi->DstReg),
				t_dst_mask(vpi->DstReg.WriteMask),
				t_dst_class(vpi->DstReg.File));
	}
321 322 323
	inst[1] = t_src(vp, &vpi->SrcReg[0]);
	inst[2] = t_src(vp, &vpi->SrcReg[1]);
	inst[3] = t_src(vp, &vpi->SrcReg[2]);
324 325
}

326
static void ei_pow(struct r300_vertex_program_code *vp,
327 328
				      struct rc_sub_instruction *vpi,
				      unsigned int * inst)
329 330
{
	inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
331 332
				     1,
				     0,
333 334 335
				     t_dst_index(vp, &vpi->DstReg),
				     t_dst_mask(vpi->DstReg.WriteMask),
				     t_dst_class(vpi->DstReg.File));
336
	inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
337
	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
338
	inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
339 340
}

341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
static void mark_write(void * userdata,	struct rc_instruction * inst,
		rc_register_file file,	unsigned int index, unsigned int mask)
{
	unsigned int * writemasks = userdata;

	if (file != RC_FILE_TEMPORARY)
		return;

	if (index >= R300_VS_MAX_TEMPS)
		return;

	writemasks[index] |= mask;
}

static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
{
	return PVS_SRC_OPERAND(compiler->PredicateIndex,
		t_swizzle(RC_SWIZZLE_ZERO),
		t_swizzle(RC_SWIZZLE_ZERO),
		t_swizzle(RC_SWIZZLE_ZERO),
		t_swizzle(RC_SWIZZLE_W),
		t_src_class(RC_FILE_TEMPORARY),
		0);
}

static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
					unsigned int hw_opcode, int is_math)
{
	return PVS_OP_DST_OPERAND(hw_opcode,
	     is_math,
	     0,
	     compiler->PredicateIndex,
	     RC_MASK_W,
	     t_dst_class(RC_FILE_TEMPORARY));

}

static void ei_if(struct r300_vertex_program_compiler * compiler,
					struct rc_instruction *rci,
					unsigned int * inst,
					unsigned int branch_depth)
{
	unsigned int predicate_opcode;
	int is_math = 0;

	if (!compiler->Base.is_r500) {
		rc_error(&compiler->Base,"Opcode IF not supported\n");
		return;
	}

	/* Reserve a temporary to use as our predicate stack counter, if we
	 * don't already have one. */
	if (!compiler->PredicateMask) {
		unsigned int writemasks[R300_VS_MAX_TEMPS];
		memset(writemasks, 0, sizeof(writemasks));
		struct rc_instruction * inst;
		unsigned int i;
		for(inst = compiler->Base.Program.Instructions.Next;
				inst != &compiler->Base.Program.Instructions;
							inst = inst->Next) {
			rc_for_all_writes_mask(inst, mark_write, writemasks);
		}
		for(i = 0; i < R300_VS_MAX_TEMPS; i++) {
			unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
			/* Only the W component can be used fo the predicate
			 * stack counter. */
			if (mask & RC_MASK_W) {
				compiler->PredicateMask = RC_MASK_W;
				compiler->PredicateIndex = i;
				break;
			}
		}
		if (i == R300_VS_MAX_TEMPS) {
			rc_error(&compiler->Base, "No free temporary to use for"
					" predicate stack counter.\n");
			return;
		}
	}
	predicate_opcode =
			branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;

	rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
	if (branch_depth == 0) {
		is_math = 1;
		predicate_opcode = ME_PRED_SET_NEQ;
		inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
		inst[2] = 0;
	} else {
		predicate_opcode = VE_PRED_SET_NEQ_PUSH;
		inst[1] = t_pred_src(compiler);
		inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
	}

	inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
	inst[3] = 0;

}

static void ei_else(struct r300_vertex_program_compiler * compiler,
							unsigned int * inst)
{
	if (!compiler->Base.is_r500) {
		rc_error(&compiler->Base,"Opcode ELSE not supported\n");
		return;
	}
	inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
	inst[1] = t_pred_src(compiler);
	inst[2] = 0;
	inst[3] = 0;
}

static void ei_endif(struct r300_vertex_program_compiler *compiler,
							unsigned int * inst)
{
	if (!compiler->Base.is_r500) {
		rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
		return;
	}
	inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
	inst[1] = t_pred_src(compiler);
	inst[2] = 0;
	inst[3] = 0;
}
464

465
static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
466
{
467
	struct rc_instruction *rci;
468

469
	struct loop * loops = NULL;
470 471 472
	int current_loop_depth = 0;
	int loops_reserved = 0;

473 474
	unsigned int branch_depth = 0;

475 476 477
	compiler->code->pos_end = 0;	/* Not supported yet */
	compiler->code->length = 0;

478
	compiler->SetHwInputOutput(compiler);
479

480
	for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
481
		struct rc_sub_instruction *vpi = &rci->U.I;
482
		unsigned int *inst = compiler->code->body.d + compiler->code->length;
483

484
		/* Skip instructions writing to non-existing destination */
485
		if (!valid_dst(compiler->code, &vpi->DstReg))
486
			continue;
487

488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
		if (rc_get_opcode_info(vpi->Opcode)->HasDstReg) {
			/* Relative addressing of destination operands is not supported yet. */
			if (vpi->DstReg.RelAddr) {
				rc_error(&compiler->Base, "Vertex program does not support relative "
					 "addressing of destination operands (yet).\n");
				return;
			}

			/* Neither is Saturate. */
			if (vpi->SaturateMode != RC_SATURATE_NONE) {
				rc_error(&compiler->Base, "Vertex program does not support the Saturate "
					 "modifier (yet).\n");
			}
		}

503 504
		if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS ||
		    (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) {
505 506
			rc_error(&compiler->Base, "Vertex program has too many instructions\n");
			return;
507 508
		}

509 510 511
		assert(compiler->Base.is_r500 ||
		       (vpi->Opcode != RC_OPCODE_SEQ &&
			vpi->Opcode != RC_OPCODE_SNE));
512

513
		switch (vpi->Opcode) {
514 515
		case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
		case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
516
		case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
517 518
		case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
		case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
519 520
		case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
		case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
521 522 523
		case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
		case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
		case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
524
		case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
525 526 527 528 529 530 531 532 533 534 535
		case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
		case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
		case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
		case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
		case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
		case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
		case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
		case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
		case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
		case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
		case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
536
		case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
537
		case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
538
		case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
539
		case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
540
		case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561
		case RC_OPCODE_BGNLOOP:
		{
			struct loop * l;

			if ((!compiler->Base.is_r500
				&& loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
				|| loops_reserved >= R500_VS_MAX_FC_DEPTH) {
				rc_error(&compiler->Base,
						"Loops are nested too deep.");
				return;
			}
			memory_pool_array_reserve(&compiler->Base.Pool,
					struct loop, loops, current_loop_depth,
					loops_reserved, 1);
			l = &loops[current_loop_depth++];
			memset(l , 0, sizeof(struct loop));
			l->BgnLoop = (compiler->code->length / 4);
			continue;
		}
		case RC_OPCODE_ENDLOOP:
		{
562 563 564 565 566 567 568 569 570 571
			struct loop * l;
			unsigned int act_addr;
			unsigned int last_addr;
			unsigned int ret_addr;

			assert(loops);
			l = &loops[current_loop_depth - 1];
			act_addr = l->BgnLoop - 1;
			last_addr = (compiler->code->length / 4) - 1;
			ret_addr = l->BgnLoop;
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608

			if (loops_reserved >= R300_VS_MAX_FC_OPS) {
				rc_error(&compiler->Base,
					"Too many flow control instructions.");
				return;
			}
			if (compiler->Base.is_r500) {
				compiler->code->fc_op_addrs.r500
					[compiler->code->num_fc_ops].lw =
					R500_PVS_FC_ACT_ADRS(act_addr)
					| R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
					;
				compiler->code->fc_op_addrs.r500
					[compiler->code->num_fc_ops].uw =
					R500_PVS_FC_LAST_INST(last_addr)
					| R500_PVS_FC_RTN_INST(ret_addr)
					;
			} else {
				compiler->code->fc_op_addrs.r300
					[compiler->code->num_fc_ops] =
					R300_PVS_FC_ACT_ADRS(act_addr)
					| R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
					| R300_PVS_FC_LAST_INST(last_addr)
					| R300_PVS_FC_RTN_INST(ret_addr)
					;
			}
			compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
				R300_PVS_FC_LOOP_INIT_VAL(0x0)
				| R300_PVS_FC_LOOP_STEP_VAL(0x1)
				;
			compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
						compiler->code->num_fc_ops);
			compiler->code->num_fc_ops++;
			current_loop_depth--;
			continue;
		}

609
		default:
610
			rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
611
			return;
612 613
		}

614 615 616 617 618 619 620 621 622 623 624 625 626
		/* Non-flow control instructions that are inside an if statement
		 * need to pay attention to the predicate bit. */
		if (branch_depth
			&& vpi->Opcode != RC_OPCODE_IF
			&& vpi->Opcode != RC_OPCODE_ELSE
			&& vpi->Opcode != RC_OPCODE_ENDIF) {

			inst[0] |= (PVS_DST_PRED_ENABLE_MASK
						<< PVS_DST_PRED_ENABLE_SHIFT);
			inst[0] |= (PVS_DST_PRED_SENSE_MASK
						<< PVS_DST_PRED_SENSE_SHIFT);
		}

627
		compiler->code->length += 4;
628

629 630 631
		if (compiler->Base.Error)
			return;
	}
632 633
}

634
struct temporary_allocation {
635 636
	unsigned int Allocated:1;
	unsigned int HwTemp:15;
637
	struct rc_instruction * LastRead;
638 639 640 641
};

static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
642
	struct rc_instruction *inst;
643
	struct rc_instruction *end_loop = NULL;
644
	unsigned int num_orig_temps = 0;
645
	char hwtemps[R300_VS_MAX_TEMPS];
646
	struct temporary_allocation * ta;
647
	unsigned int i, j;
648 649 650

	memset(hwtemps, 0, sizeof(hwtemps));

651
	/* Pass 1: Count original temporaries. */
652
	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
653
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
654

655
		for (i = 0; i < opcode->NumSrcRegs; ++i) {
656 657 658
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
				if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
					num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
659 660 661
			}
		}

662
		if (opcode->HasDstReg) {
663 664 665
			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
				if (inst->U.I.DstReg.Index >= num_orig_temps)
					num_orig_temps = inst->U.I.DstReg.Index + 1;
666 667 668
			}
		}
	}
669 670 671 672 673 674 675 676 677
	compiler->code->num_temporaries = num_orig_temps;

	/* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */
	for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);

		if (opcode->HasDstReg)
			if (inst->U.I.DstReg.RelAddr)
				return;
678

679 680 681 682 683 684 685 686 687
		for (i = 0; i < opcode->NumSrcRegs; ++i) {
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
			    inst->U.I.SrcReg[i].RelAddr) {
				return;
			}
		}
	}

	compiler->code->num_temporaries = 0;
688 689 690 691
	ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
			sizeof(struct temporary_allocation) * num_orig_temps);
	memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);

692
	/* Pass 3: Determine original temporary lifetimes */
693
	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
694
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
		/* Instructions inside of loops need to use the ENDLOOP
		 * instruction as their LastRead. */
		if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
			int endloops = 1;
			struct rc_instruction * ptr;
			for(ptr = inst->Next;
				ptr != &compiler->Base.Program.Instructions;
							ptr = ptr->Next){
				if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
					endloops++;
				} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
					endloops--;
					if (endloops <= 0) {
						end_loop = ptr;
						break;
					}
				}
			}
		}

		if (inst == end_loop) {
			end_loop = NULL;
			continue;
		}
719

720
		for (i = 0; i < opcode->NumSrcRegs; ++i) {
721
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
722 723
				ta[inst->U.I.SrcReg[i].Index].LastRead =
						end_loop ? end_loop : inst;
724 725 726
		}
	}

727
	/* Pass 4: Register allocation */
728
	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
729
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
730

731
		for (i = 0; i < opcode->NumSrcRegs; ++i) {
732 733 734
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
				unsigned int orig = inst->U.I.SrcReg[i].Index;
				inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
735 736

				if (ta[orig].Allocated && inst == ta[orig].LastRead)
737
					hwtemps[ta[orig].HwTemp] = 0;
738 739 740
			}
		}

741
		if (opcode->HasDstReg) {
742 743
			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
				unsigned int orig = inst->U.I.DstReg.Index;
744 745

				if (!ta[orig].Allocated) {
746
					for(j = 0; j < R300_VS_MAX_TEMPS; ++j) {
747 748 749
						if (!hwtemps[j])
							break;
					}
750
					if (j >= R300_VS_MAX_TEMPS) {
751 752
						fprintf(stderr, "Out of hw temporaries\n");
					} else {
753
						ta[orig].Allocated = 1;
754
						ta[orig].HwTemp = j;
755
						hwtemps[j] = 1;
756 757 758 759 760 761

						if (j >= compiler->code->num_temporaries)
							compiler->code->num_temporaries = j + 1;
					}
				}

762
				inst->U.I.DstReg.Index = ta[orig].HwTemp;
763 764 765 766 767
			}
		}
	}
}

768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797
/**
 * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
 * and the Saturate opcode modifier. Only Absolute is currently transformed.
 */
static int transform_nonnative_modifiers(
	struct radeon_compiler *c,
	struct rc_instruction *inst,
	void* unused)
{
	const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
	unsigned i;

	/* Transform ABS(a) to MAX(a, -a). */
	for (i = 0; i < opcode->NumSrcRegs; i++) {
		if (inst->U.I.SrcReg[i].Abs) {
			struct rc_instruction *new_inst;
			unsigned temp;

			inst->U.I.SrcReg[i].Abs = 0;

			temp = rc_find_free_temporary(c);

			new_inst = rc_insert_new_instruction(c, inst->Prev);
			new_inst->U.I.Opcode = RC_OPCODE_MAX;
			new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
			new_inst->U.I.DstReg.Index = temp;
			new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
			new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
			new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;

798
			memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
799 800
			inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
			inst->U.I.SrcReg[i].Index = temp;
801
			inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
802 803 804 805
		}
	}
	return 1;
}
806

807 808 809 810
/**
 * Vertex engine cannot read two inputs or two constants at the same time.
 * Introduce intermediate MOVs to temporary registers to account for this.
 */
811
static int transform_source_conflicts(
812 813
	struct radeon_compiler *c,
	struct rc_instruction* inst,
814 815
	void* unused)
{
816
	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
817

818
	if (opcode->NumSrcRegs == 3) {
819 820
		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
		    || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
821 822
			int tmpreg = rc_find_free_temporary(c);
			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
823 824 825 826 827 828 829 830
			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mov->U.I.DstReg.Index = tmpreg;
			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];

			reset_srcreg(&inst->U.I.SrcReg[2]);
			inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
			inst->U.I.SrcReg[2].Index = tmpreg;
831 832 833
		}
	}

834
	if (opcode->NumSrcRegs >= 2) {
835
		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
836 837
			int tmpreg = rc_find_free_temporary(c);
			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
838 839 840 841 842 843 844 845
			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
			inst_mov->U.I.DstReg.Index = tmpreg;
			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];

			reset_srcreg(&inst->U.I.SrcReg[1]);
			inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
			inst->U.I.SrcReg[1].Index = tmpreg;
846 847 848
		}
	}

849
	return 1;
850 851
}

852 853
static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
{
854
	int i;
855

856 857 858 859
	for(i = 0; i < 32; ++i) {
		if ((compiler->RequiredOutputs & (1 << i)) &&
		    !(compiler->Base.Program.OutputsWritten & (1 << i))) {
			struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
860
			inst->U.I.Opcode = RC_OPCODE_MOV;
861

862 863 864
			inst->U.I.DstReg.File = RC_FILE_OUTPUT;
			inst->U.I.DstReg.Index = i;
			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
865

866 867 868
			inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
			inst->U.I.SrcReg[0].Index = 0;
			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
869

870
			compiler->Base.Program.OutputsWritten |= 1 << i;
871 872 873 874
		}
	}
}

875 876
static void dataflow_outputs_mark_used(void * userdata, void * data,
		void (*callback)(void *, unsigned int, unsigned int))
877
{
878
	struct r300_vertex_program_compiler * c = userdata;
879
	int i;
880

881
	for(i = 0; i < 32; ++i) {
882 883
		if (c->RequiredOutputs & (1 << i))
			callback(data, i, RC_MASK_XYZW);
884 885 886
	}
}

887
static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
888 889 890 891
{
	(void) opcode;
	(void) reg;

892
	return 1;
893 894
}

895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
					  struct rc_instruction *arl,
					  struct rc_instruction *end,
					  int min_offset)
{
	struct rc_instruction *inst, *add;
	unsigned const_swizzle;

	/* Transform ARL */
	add = rc_insert_new_instruction(&c->Base, arl->Prev);
	add->U.I.Opcode = RC_OPCODE_ADD;
	add->U.I.DstReg.File = RC_FILE_TEMPORARY;
	add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
	add->U.I.DstReg.WriteMask = RC_MASK_X;
	add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
	add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
	add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
								     min_offset, &const_swizzle);
	add->U.I.SrcReg[1].Swizzle = const_swizzle;

	arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
	arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
	arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;

	/* Rewrite offsets up to and excluding inst. */
	for (inst = arl->Next; inst != end; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
			if (inst->U.I.SrcReg[i].RelAddr)
				inst->U.I.SrcReg[i].Index -= min_offset;
	}
}

static void rc_emulate_negative_addressing(struct r300_vertex_program_compiler *c)
{
	struct rc_instruction *inst, *lastARL = NULL;
	int min_offset = 0;

	for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		if (inst->U.I.Opcode == RC_OPCODE_ARL) {
			if (lastARL != NULL && min_offset < 0)
				transform_negative_addressing(c, lastARL, inst, min_offset);

			lastARL = inst;
			min_offset = 0;
			continue;
		}

		for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
			if (inst->U.I.SrcReg[i].RelAddr &&
			    inst->U.I.SrcReg[i].Index < 0) {
				/* ARL must precede any indirect addressing. */
				if (lastARL == NULL) {
					rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL.");
					return;
				}

				if (inst->U.I.SrcReg[i].Index < min_offset)
					min_offset = inst->U.I.SrcReg[i].Index;
			}
		}
	}

	if (lastARL != NULL && min_offset < 0)
		transform_negative_addressing(c, lastARL, inst, min_offset);
}

965 966 967 968 969 970 971 972
static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
{
	if (c->Base.Debug) {
		fprintf(stderr, "Vertex Program: %s\n", where);
		rc_print_program(&c->Base.Program);
	}
}

973

974 975 976 977 978
static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
	.IsNative = &swizzle_is_native,
	.Split = 0 /* should never be called */
};

979

980
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
981
{
982
	struct emulate_loop_state loop_state;
983

984
	c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
985

986
	addArtificialOutputs(c);
987

988
	debug_program_log(c, "before compilation");
989

990 991
	if (c->Base.is_r500)
		rc_transform_loops(&c->Base, &loop_state, R500_VS_MAX_ALU);
992
	else
993 994 995
		rc_transform_loops(&c->Base, &loop_state, R300_VS_MAX_ALU);
	if (c->Base.Error)
		return;
996

997
	debug_program_log(c, "after emulate loops");
998

999 1000 1001 1002 1003
	if (!c->Base.is_r500) {
		rc_emulate_branches(&c->Base);
		if (c->Base.Error)
			return;
		debug_program_log(c, "after emulate branches");
1004
	}
1005

1006 1007 1008 1009
	rc_emulate_negative_addressing(c);

	debug_program_log(c, "after negative addressing emulation");

1010
	if (c->Base.is_r500) {
1011 1012
		struct radeon_program_transformation transformations[] = {
			{ &r300_transform_vertex_alu, 0 },
1013
			{ &r300_transform_trig_scale_vertex, 0 }
1014
		};
1015 1016 1017
		radeonLocalTransform(&c->Base, 2, transformations);
		if (c->Base.Error)
			return;
1018

1019
		debug_program_log(c, "after native rewrite");
1020 1021 1022 1023 1024
	} else {
		struct radeon_program_transformation transformations[] = {
			{ &r300_transform_vertex_alu, 0 },
			{ &radeonTransformTrigSimple, 0 }
		};
1025 1026 1027
		radeonLocalTransform(&c->Base, 2, transformations);
		if (c->Base.Error)
			return;
1028

1029
		debug_program_log(c, "after native rewrite");
1030 1031 1032 1033 1034 1035 1036

		/* Note: This pass has to be done seperately from ALU rewrite,
		 * because it needs to check every instruction.
		 */
		struct radeon_program_transformation transformations2[] = {
			{ &transform_nonnative_modifiers, 0 },
		};
1037 1038 1039
		radeonLocalTransform(&c->Base, 1, transformations2);
		if (c->Base.Error)
			return;
1040

1041
		debug_program_log(c, "after emulate modifiers");
1042
	}
1043

1044 1045 1046 1047 1048 1049 1050 1051
	{
		/* Note: This pass has to be done seperately from ALU rewrite,
		 * otherwise non-native ALU instructions with source conflits
		 * will not be treated properly.
		 */
		struct radeon_program_transformation transformations[] = {
			{ &transform_source_conflicts, 0 },
		};
1052 1053 1054
		radeonLocalTransform(&c->Base, 1, transformations);
		if (c->Base.Error)
			return;
1055 1056
	}

1057
	debug_program_log(c, "after source conflict resolve");
1058

1059 1060 1061
	rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_used, c);
	if (c->Base.Error)
		return;
1062

1063
	debug_program_log(c, "after deadcode");
1064

1065 1066 1067
	rc_dataflow_swizzles(&c->Base);
	if (c->Base.Error)
		return;
1068

1069
	debug_program_log(c, "after dataflow");
1070

1071 1072 1073
	allocate_temporary_registers(c);
	if (c->Base.Error)
		return;
1074

1075
	debug_program_log(c, "after register allocation");
1076

1077 1078 1079 1080 1081 1082 1083 1084
	if (c->Base.remove_unused_constants) {
		rc_remove_unused_constants(&c->Base,
					   &c->code->constants_remap_table);
		if (c->Base.Error)
			return;

		debug_program_log(c, "after constants cleanup");
	}
1085

1086 1087 1088
	translate_vertex_program(c);
	if (c->Base.Error)
		return;
1089

1090
	rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
1091

1092 1093
	c->code->InputsRead = c->Base.Program.InputsRead;
	c->code->OutputsWritten = c->Base.Program.OutputsWritten;
1094

1095
	if (c->Base.Debug) {
1096
		fprintf(stderr, "Final vertex program code:\n");
1097
		r300_vertex_program_dump(c);
1098
	}
1099 1100

	/* Check the number of constants. */
1101 1102 1103 1104
	if (!c->Base.Error &&
	    c->Base.Program.Constants.Count > 256) {
		rc_error(&c->Base, "Too many constants. Max: 256, Got: %i\n",
			 c->Base.Program.Constants.Count);
1105
	}
1106
}