Commit 2c17f97f authored by Paul Berry's avatar Paul Berry

glsl/loops: consolidate bounded loop handling into a lowering pass.

Previously, all of the back-ends (ir_to_mesa, st_glsl_to_tgsi, and the
i965 fs and vec4 visitors) had nearly identical logic for handling
bounded loops.  This replaces the duplicate logic with an equivalent
lowering pass that is used by all the back-ends.

Note: on i965, there is a slight increase in instruction count.  For
example, a loop like this:

    for (int i = 0; i < 100; i++) {
      total += i;
    }

would previously compile down to this (vec4) native code:

          mov(8)       g4<1>.xD 0D
          mov(8)       g8<1>.xD 0D
    loop:
          cmp.ge.f0(8) null     g8<4;4,1>.xD 100D
    (+f0) break(8)
          add(8)       g5<1>.xD g5<4;4,1>.xD g4<4;4,1>.xD
          add(8)       g8<1>.xD g8<4;4,1>.xD 1D
          add(8)       g4<1>.xD g4<4;4,1>.xD 1D
          while(8) loop

After this patch, the "(+f0) break(8)" turns into:

    (+f0) if(8)
          break(8)
          endif(8)

because the back-end isn't smart enough to recognize that "if
(condition) break;" can be done using a conditional break instruction.
However, it should be relatively easy for a future peephole
optimization to properly optimize this.
Reviewed-by: default avatarJordan Justen <jordan.l.justen@intel.com>
Reviewed-by: default avatarIan Romanick <ian.d.romanick@intel.com>
parent 97d8b770
......@@ -59,6 +59,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/loop_analysis.cpp \
$(GLSL_SRCDIR)/loop_controls.cpp \
$(GLSL_SRCDIR)/loop_unroll.cpp \
$(GLSL_SRCDIR)/lower_bounded_loops.cpp \
$(GLSL_SRCDIR)/lower_clip_distance.cpp \
$(GLSL_SRCDIR)/lower_discard.cpp \
$(GLSL_SRCDIR)/lower_discard_flow.cpp \
......
......@@ -101,6 +101,7 @@ bool do_swizzle_swizzle(exec_list *instructions);
bool do_tree_grafting(exec_list *instructions);
bool do_vec_index_to_cond_assign(exec_list *instructions);
bool do_vec_index_to_swizzle(exec_list *instructions);
bool lower_bounded_loops(exec_list *instructions);
bool lower_discard(exec_list *instructions);
void lower_discard_flow(exec_list *instructions);
bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
......
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/** \file
*
* This pass converts bounded loops (those whose ir_loop contains non-null
* values for \c from, \c to, \c increment, and \c counter) into unbounded
* loops.
*
* For instance:
*
* (loop (declare () uint i) (constant uint 0) (constant uint 4)
* (constant uint 1)
* ...loop body...)
*
* Is transformed into:
*
* (declare () uint i)
* (assign (x) (var_ref i) (constant uint 0))
* (loop
* (if (expression bool >= (var_ref i) (constant uint 4))
* (break)
* ())
* ...loop body...
* (assign (x) (var_ref i)
* (expression uint + (var_ref i) (constant uint 1))))
*/
#include "ir_hierarchical_visitor.h"
#include "ir.h"
#include "ir_builder.h"
using namespace ir_builder;
namespace {
class lower_bounded_loops_visitor : public ir_hierarchical_visitor {
public:
lower_bounded_loops_visitor()
: progress(false)
{
}
virtual ir_visitor_status visit_leave(ir_loop *ir);
bool progress;
};
} /* anonymous namespace */
ir_visitor_status
lower_bounded_loops_visitor::visit_leave(ir_loop *ir)
{
if (ir->counter == NULL)
return visit_continue;
exec_list new_instructions;
ir_factory f(&new_instructions, ralloc_parent(ir));
/* Before the loop, declare the counter and initialize it to "from". */
f.emit(ir->counter);
f.emit(assign(ir->counter, ir->from));
ir->insert_before(&new_instructions);
/* At the top of the loop, compare the counter to "to", and break if the
* comparison succeeds.
*/
ir_loop_jump *brk = new(f.mem_ctx) ir_loop_jump(ir_loop_jump::jump_break);
ir_expression_operation cmp = (ir_expression_operation) ir->cmp;
ir->body_instructions.push_head(if_tree(expr(cmp, ir->counter, ir->to),
brk));
/* At the bottom of the loop, increment the counter. */
ir->body_instructions.push_tail(assign(ir->counter,
add(ir->counter, ir->increment)));
/* NULL out the counter, from, to, and increment variables. */
ir->counter = NULL;
ir->from = NULL;
ir->to = NULL;
ir->increment = NULL;
this->progress = true;
return visit_continue;
}
bool
lower_bounded_loops(exec_list *instructions)
{
lower_bounded_loops_visitor v;
visit_list_elements(&v, instructions);
return v.progress;
}
......@@ -2181,39 +2181,16 @@ fs_visitor::visit(ir_if *ir)
void
fs_visitor::visit(ir_loop *ir)
{
fs_reg counter = reg_undef;
/* Any bounded loops should have been lowered by lower_bounded_loops(). */
assert(ir->counter == NULL);
if (brw->gen < 6 && dispatch_width == 16) {
fail("Can't support (non-uniform) control flow on 16-wide\n");
}
if (ir->counter) {
this->base_ir = ir->counter;
ir->counter->accept(this);
counter = *(variable_storage(ir->counter));
if (ir->from) {
this->base_ir = ir->from;
ir->from->accept(this);
emit(MOV(counter, this->result));
}
}
this->base_ir = NULL;
emit(BRW_OPCODE_DO);
if (ir->to) {
this->base_ir = ir->to;
ir->to->accept(this);
emit(CMP(reg_null_d, counter, this->result,
brw_conditional_for_comparison(ir->cmp)));
fs_inst *inst = emit(BRW_OPCODE_BREAK);
inst->predicate = BRW_PREDICATE_NORMAL;
}
foreach_list(node, &ir->body_instructions) {
ir_instruction *ir = (ir_instruction *)node;
......@@ -2221,12 +2198,6 @@ fs_visitor::visit(ir_loop *ir)
ir->accept(this);
}
if (ir->increment) {
this->base_ir = ir->increment;
ir->increment->accept(this);
emit(ADD(counter, counter, this->result));
}
this->base_ir = NULL;
emit(BRW_OPCODE_WHILE);
}
......
......@@ -213,6 +213,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
|| progress;
} while (progress);
lower_bounded_loops(shader->ir);
/* Make a pass over the IR to add state references for any built-in
* uniforms that are used. This has to be done now (during linking).
* Code generation doesn't happen until the first time this shader is
......
......@@ -1007,48 +1007,18 @@ vec4_visitor::visit(ir_variable *ir)
void
vec4_visitor::visit(ir_loop *ir)
{
dst_reg counter;
/* Any bounded loops should have been lowered by lower_bounded_loops(). */
assert(ir->counter == NULL);
/* We don't want debugging output to print the whole body of the
* loop as the annotation.
*/
this->base_ir = NULL;
if (ir->counter != NULL) {
this->base_ir = ir->counter;
ir->counter->accept(this);
counter = *(variable_storage(ir->counter));
if (ir->from != NULL) {
this->base_ir = ir->from;
ir->from->accept(this);
emit(MOV(counter, this->result));
}
}
emit(BRW_OPCODE_DO);
if (ir->to) {
this->base_ir = ir->to;
ir->to->accept(this);
emit(CMP(dst_null_d(), src_reg(counter), this->result,
brw_conditional_for_comparison(ir->cmp)));
vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
inst->predicate = BRW_PREDICATE_NORMAL;
}
visit_instructions(&ir->body_instructions);
if (ir->increment) {
this->base_ir = ir->increment;
ir->increment->accept(this);
emit(ADD(counter, src_reg(counter), this->result));
}
emit(BRW_OPCODE_WHILE);
}
......
......@@ -759,49 +759,13 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
void
ir_to_mesa_visitor::visit(ir_loop *ir)
{
ir_dereference_variable *counter = NULL;
if (ir->counter != NULL)
counter = new(mem_ctx) ir_dereference_variable(ir->counter);
if (ir->from != NULL) {
assert(ir->counter != NULL);
ir_assignment *a =
new(mem_ctx) ir_assignment(counter, ir->from, NULL);
a->accept(this);
}
/* Any bounded loops should have been lowered by lower_bounded_loops(). */
assert(ir->counter == NULL);
emit(NULL, OPCODE_BGNLOOP);
if (ir->to) {
ir_expression *e =
new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type,
counter, ir->to);
ir_if *if_stmt = new(mem_ctx) ir_if(e);
ir_loop_jump *brk =
new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break);
if_stmt->then_instructions.push_tail(brk);
if_stmt->accept(this);
}
visit_exec_list(&ir->body_instructions, this);
if (ir->increment) {
ir_expression *e =
new(mem_ctx) ir_expression(ir_binop_add, counter->type,
counter, ir->increment);
ir_assignment *a =
new(mem_ctx) ir_assignment(counter, e, NULL);
a->accept(this);
}
emit(NULL, OPCODE_ENDLOOP);
}
......@@ -3091,6 +3055,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
progress = lower_vector_insert(ir, true) || progress;
} while (progress);
lower_bounded_loops(ir);
validate_ir_tree(ir);
}
......
......@@ -1137,53 +1137,13 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
void
glsl_to_tgsi_visitor::visit(ir_loop *ir)
{
ir_dereference_variable *counter = NULL;
if (ir->counter != NULL)
counter = new(ir) ir_dereference_variable(ir->counter);
if (ir->from != NULL) {
assert(ir->counter != NULL);
ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
a->accept(this);
delete a;
}
/* Any bounded loops should have been lowered by lower_bounded_loops(). */
assert(ir->counter == NULL);
emit(NULL, TGSI_OPCODE_BGNLOOP);
if (ir->to) {
ir_expression *e =
new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
counter, ir->to);
ir_if *if_stmt = new(ir) ir_if(e);
ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
if_stmt->then_instructions.push_tail(brk);
if_stmt->accept(this);
delete if_stmt;
delete e;
delete brk;
}
visit_exec_list(&ir->body_instructions, this);
if (ir->increment) {
ir_expression *e =
new(ir) ir_expression(ir_binop_add, counter->type,
counter, ir->increment);
ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
a->accept(this);
delete a;
delete e;
}
emit(NULL, TGSI_OPCODE_ENDLOOP);
}
......@@ -5345,6 +5305,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
} while (progress);
lower_bounded_loops(ir);
validate_ir_tree(ir);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment