Commit 018c69ac authored by Zack Rusin's avatar Zack Rusin
Browse files

gallivm: export unordered/ordered cmp to a common function



Only the floating point operarators change everything else
is the same so it makes sense to share the code.
Signed-off-by: default avatarZack Rusin <zackr@vmware.com>
Reviewed-by: default avatarJose Fonseca <jfonseca@vmware.com>
Reviewed-by: default avatarRoland Scheidegger <sroland@vmware.com>
parent 192c68b8
......@@ -68,14 +68,17 @@
/**
* Build code to compare two values 'a' and 'b' of 'type' using the given func.
* \param func one of PIPE_FUNC_x
* If the ordered argument is true the function will use LLVM's ordered
* comparisons, otherwise unordered comparisons will be used.
* The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef
lp_build_compare(struct gallivm_state *gallivm,
const struct lp_type type,
unsigned func,
LLVMValueRef a,
LLVMValueRef b)
static LLVMValueRef
lp_build_compare_ext(struct gallivm_state *gallivm,
const struct lp_type type,
unsigned func,
LLVMValueRef a,
LLVMValueRef b,
boolean ordered)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
......@@ -94,153 +97,6 @@ lp_build_compare(struct gallivm_state *gallivm,
if(func == PIPE_FUNC_ALWAYS)
return ones;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/*
* There are no unsigned integer comparison instructions in SSE.
*/
if (!type.floating && !type.sign &&
type.width * type.length == 128 &&
util_cpu_caps.has_sse2 &&
(func == PIPE_FUNC_LESS ||
func == PIPE_FUNC_LEQUAL ||
func == PIPE_FUNC_GREATER ||
func == PIPE_FUNC_GEQUAL) &&
(gallivm_debug & GALLIVM_DEBUG_PERF)) {
debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
__FUNCTION__, type.length, type.width);
}
#endif
#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
if(type.floating && util_cpu_caps.has_sse) {
/* float[4] comparison */
LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
LLVMValueRef args[3];
unsigned cc;
boolean swap;
swap = FALSE;
switch(func) {
case PIPE_FUNC_EQUAL:
cc = 0;
break;
case PIPE_FUNC_NOTEQUAL:
cc = 4;
break;
case PIPE_FUNC_LESS:
cc = 1;
break;
case PIPE_FUNC_LEQUAL:
cc = 2;
break;
case PIPE_FUNC_GREATER:
cc = 1;
swap = TRUE;
break;
case PIPE_FUNC_GEQUAL:
cc = 2;
swap = TRUE;
break;
default:
assert(0);
return lp_build_undef(gallivm, type);
}
if(swap) {
args[0] = b;
args[1] = a;
}
else {
args[0] = a;
args[1] = b;
}
args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0);
res = lp_build_intrinsic(builder,
"llvm.x86.sse.cmp.ps",
vec_type,
args, 3);
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
return res;
}
else if(util_cpu_caps.has_sse2) {
/* int[4] comparison */
static const struct {
unsigned swap:1;
unsigned eq:1;
unsigned gt:1;
unsigned not:1;
} table[] = {
{0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
{1, 0, 1, 0}, /* PIPE_FUNC_LESS */
{0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
{0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
{0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
{0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
{1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
{0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */
};
const char *pcmpeq;
const char *pcmpgt;
LLVMValueRef args[2];
LLVMValueRef res;
LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
switch (type.width) {
case 8:
pcmpeq = "llvm.x86.sse2.pcmpeq.b";
pcmpgt = "llvm.x86.sse2.pcmpgt.b";
break;
case 16:
pcmpeq = "llvm.x86.sse2.pcmpeq.w";
pcmpgt = "llvm.x86.sse2.pcmpgt.w";
break;
case 32:
pcmpeq = "llvm.x86.sse2.pcmpeq.d";
pcmpgt = "llvm.x86.sse2.pcmpgt.d";
break;
default:
assert(0);
return lp_build_undef(gallivm, type);
}
/* There are no unsigned comparison instructions. So flip the sign bit
* so that the results match.
*/
if (table[func].gt && !type.sign) {
LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1));
a = LLVMBuildXor(builder, a, msb, "");
b = LLVMBuildXor(builder, b, msb, "");
}
if(table[func].swap) {
args[0] = b;
args[1] = a;
}
else {
args[0] = a;
args[1] = b;
}
if(table[func].eq)
res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
else if (table[func].gt)
res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
else
res = LLVMConstNull(vec_type);
if(table[func].not)
res = LLVMBuildNot(builder, res, "");
return res;
}
} /* if (type.width * type.length == 128) */
#endif
#endif /* HAVE_LLVM < 0x0207 */
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
......@@ -251,22 +107,22 @@ lp_build_compare(struct gallivm_state *gallivm,
op = LLVMRealPredicateTrue;
break;
case PIPE_FUNC_EQUAL:
op = LLVMRealUEQ;
op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
break;
case PIPE_FUNC_NOTEQUAL:
op = LLVMRealUNE;
op = ordered ? LLVMRealONE : LLVMRealUNE;
break;
case PIPE_FUNC_LESS:
op = LLVMRealULT;
op = ordered ? LLVMRealOLT : LLVMRealULT;
break;
case PIPE_FUNC_LEQUAL:
op = LLVMRealULE;
op = ordered ? LLVMRealOLE : LLVMRealULE;
break;
case PIPE_FUNC_GREATER:
op = LLVMRealUGT;
op = ordered ? LLVMRealOGT : LLVMRealUGT;
break;
case PIPE_FUNC_GEQUAL:
op = LLVMRealUGE;
op = ordered ? LLVMRealOGE : LLVMRealUGE;
break;
default:
assert(0);
......@@ -367,29 +223,20 @@ lp_build_compare(struct gallivm_state *gallivm,
}
/**
* Build code to compare two values 'a' and 'b' using the given func.
* Build code to compare two values 'a' and 'b' of 'type' using the given func.
* \param func one of PIPE_FUNC_x
* If the operands are floating point numbers, the function will use
* ordered comparison which means that it will return true if both
* operands are not a NaN and the specified condition evaluates to true.
* The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef
lp_build_cmp_ordered(struct lp_build_context *bld,
unsigned func,
LLVMValueRef a,
LLVMValueRef b)
lp_build_compare(struct gallivm_state *gallivm,
const struct lp_type type,
unsigned func,
LLVMValueRef a,
LLVMValueRef b)
{
struct gallivm_state *gallivm = bld->gallivm;
const struct lp_type type = bld->type;
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
LLVMValueRef cond;
LLVMValueRef res;
assert(func >= PIPE_FUNC_NEVER);
assert(func <= PIPE_FUNC_ALWAYS);
......@@ -418,129 +265,157 @@ lp_build_cmp_ordered(struct lp_build_context *bld,
__FUNCTION__, type.length, type.width);
}
#endif
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
case PIPE_FUNC_NEVER:
op = LLVMRealPredicateFalse;
break;
case PIPE_FUNC_ALWAYS:
op = LLVMRealPredicateTrue;
break;
case PIPE_FUNC_EQUAL:
op = LLVMRealOEQ;
break;
case PIPE_FUNC_NOTEQUAL:
op = LLVMRealONE;
break;
case PIPE_FUNC_LESS:
op = LLVMRealOLT;
break;
case PIPE_FUNC_LEQUAL:
op = LLVMRealOLE;
break;
case PIPE_FUNC_GREATER:
op = LLVMRealOGT;
break;
case PIPE_FUNC_GEQUAL:
op = LLVMRealOGE;
break;
default:
assert(0);
return lp_build_undef(gallivm, type);
}
#if HAVE_LLVM >= 0x0207
cond = LLVMBuildFCmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
if (type.length == 1) {
cond = LLVMBuildFCmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
}
else {
unsigned i;
#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef cond;
LLVMValueRef res;
if(type.floating && util_cpu_caps.has_sse) {
/* float[4] comparison */
LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
LLVMValueRef args[3];
unsigned cc;
boolean swap;
res = LLVMGetUndef(int_vec_type);
swap = FALSE;
switch(func) {
case PIPE_FUNC_EQUAL:
cc = 0;
break;
case PIPE_FUNC_NOTEQUAL:
cc = 4;
break;
case PIPE_FUNC_LESS:
cc = 1;
break;
case PIPE_FUNC_LEQUAL:
cc = 2;
break;
case PIPE_FUNC_GREATER:
cc = 1;
swap = TRUE;
break;
case PIPE_FUNC_GEQUAL:
cc = 2;
swap = TRUE;
break;
default:
assert(0);
return lp_build_undef(gallivm, type);
}
debug_printf("%s: warning: using slow element-wise float"
" vector comparison\n", __FUNCTION__);
for (i = 0; i < type.length; ++i) {
LLVMValueRef index = lp_build_const_int32(gallivm, i);
cond = LLVMBuildFCmp(builder, op,
LLVMBuildExtractElement(builder, a, index, ""),
LLVMBuildExtractElement(builder, b, index, ""),
"");
cond = LLVMBuildSelect(builder, cond,
LLVMConstExtractElement(ones, index),
LLVMConstExtractElement(zeros, index),
"");
res = LLVMBuildInsertElement(builder, res, cond, index, "");
if(swap) {
args[0] = b;
args[1] = a;
}
else {
args[0] = a;
args[1] = b;
}
}
#endif
}
else {
LLVMIntPredicate op;
switch(func) {
case PIPE_FUNC_EQUAL:
op = LLVMIntEQ;
break;
case PIPE_FUNC_NOTEQUAL:
op = LLVMIntNE;
break;
case PIPE_FUNC_LESS:
op = type.sign ? LLVMIntSLT : LLVMIntULT;
break;
case PIPE_FUNC_LEQUAL:
op = type.sign ? LLVMIntSLE : LLVMIntULE;
break;
case PIPE_FUNC_GREATER:
op = type.sign ? LLVMIntSGT : LLVMIntUGT;
break;
case PIPE_FUNC_GEQUAL:
op = type.sign ? LLVMIntSGE : LLVMIntUGE;
break;
default:
assert(0);
return lp_build_undef(gallivm, type);
}
#if HAVE_LLVM >= 0x0207
cond = LLVMBuildICmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
if (type.length == 1) {
cond = LLVMBuildICmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0);
res = lp_build_intrinsic(builder,
"llvm.x86.sse.cmp.ps",
vec_type,
args, 3);
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
return res;
}
else {
unsigned i;
else if(util_cpu_caps.has_sse2) {
/* int[4] comparison */
static const struct {
unsigned swap:1;
unsigned eq:1;
unsigned gt:1;
unsigned not:1;
} table[] = {
{0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
{1, 0, 1, 0}, /* PIPE_FUNC_LESS */
{0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
{0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
{0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
{0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
{1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
{0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */
};
const char *pcmpeq;
const char *pcmpgt;
LLVMValueRef args[2];
LLVMValueRef res;
LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
res = LLVMGetUndef(int_vec_type);
switch (type.width) {
case 8:
pcmpeq = "llvm.x86.sse2.pcmpeq.b";
pcmpgt = "llvm.x86.sse2.pcmpgt.b";
break;
case 16:
pcmpeq = "llvm.x86.sse2.pcmpeq.w";
pcmpgt = "llvm.x86.sse2.pcmpgt.w";
break;
case 32:
pcmpeq = "llvm.x86.sse2.pcmpeq.d";
pcmpgt = "llvm.x86.sse2.pcmpgt.d";
break;
default:
assert(0);
return lp_build_undef(gallivm, type);
}
if (gallivm_debug & GALLIVM_DEBUG_PERF) {
debug_printf("%s: using slow element-wise int"
" vector comparison\n", __FUNCTION__);
/* There are no unsigned comparison instructions. So flip the sign bit
* so that the results match.
*/
if (table[func].gt && !type.sign) {
LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1));
a = LLVMBuildXor(builder, a, msb, "");
b = LLVMBuildXor(builder, b, msb, "");
}
for(i = 0; i < type.length; ++i) {
LLVMValueRef index = lp_build_const_int32(gallivm, i);
cond = LLVMBuildICmp(builder, op,
LLVMBuildExtractElement(builder, a, index, ""),
LLVMBuildExtractElement(builder, b, index, ""),
"");
cond = LLVMBuildSelect(builder, cond,
LLVMConstExtractElement(ones, index),
LLVMConstExtractElement(zeros, index),
"");
res = LLVMBuildInsertElement(builder, res, cond, index, "");
if(table[func].swap) {
args[0] = b;
args[1] = a;
}
else {
args[0] = a;
args[1] = b;
}
if(table[func].eq)
res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
else if (table[func].gt)
res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
else
res = LLVMConstNull(vec_type);
if(table[func].not)
res = LLVMBuildNot(builder, res, "");
return res;
}
} /* if (type.width * type.length == 128) */
#endif
}
#endif /* HAVE_LLVM < 0x0207 */
return res;
return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
}
/**
* Build code to compare two values 'a' and 'b' using the given func.
* \param func one of PIPE_FUNC_x
* If the operands are floating point numbers, the function will use
* ordered comparison which means that it will return true if both
* operands are not a NaN and the specified condition evaluates to true.
* The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef
lp_build_cmp_ordered(struct lp_build_context *bld,
unsigned func,
LLVMValueRef a,
LLVMValueRef b)
{
return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
}
/**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment