Skip to content
Snippets Groups Projects
Commit 739c7c01 authored by fbarchard@chromium.org's avatar fbarchard@chromium.org
Browse files

Mac YUV assembly

BUG=20749
TEST=play mac video... it should be 3 times faster rendering and visibly smoother 
Review URL: http://codereview.chromium.org/208030

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@26614 0039d316-1c4b-4281-b951-d872f2087c98
parent d7e80562
No related merge requests found
......@@ -63,9 +63,8 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
} // extern "C"
#if !defined(USE_MMX)
#if defined(_MSC_VER)
#define USE_MMX 1
#elif defined(OS_LINUX) && defined(ARCH_CPU_X86)
// Windows, Mac and Linux x86 use MMX; x64 and other CPUs do not.
#if defined(OS_WIN) || defined(ARCH_CPU_X86)
#define USE_MMX 1
#else
#define USE_MMX 0
......
......@@ -4,17 +4,9 @@
#include "media/base/yuv_row.h"
#ifdef _DEBUG
#include "base/logging.h"
#else
#define DCHECK(a)
#endif
// TODO(fbarchard): Make MMX work in DLLs. Currently only works in unittests.
// TODO(fbarchard): Do 64 bit version.
extern "C" {
#if USE_MMX
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
......@@ -37,7 +29,8 @@ extern "C" {
0 \
}
#define MMX_ALIGNED(var) var __attribute__((aligned(16)))
#define MMX_ALIGNED(var) \
var __attribute__ ((section ("__TEXT,__text"))) __attribute__ ((aligned(16)))
MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = {
RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
......@@ -245,75 +238,36 @@ MMX_ALIGNED(int16 kCoefficientsRgbV[256][4]) = {
#undef RGBV
#undef MMX_ALIGNED
// TODO(fbarchard): Use the following function instead of
// pure assembly to help make code more portable to 64 bit
// and Mac, which has different labels.
// no-gcse eliminates the frame pointer, freeing up ebp.
#if defined(FUTURE_64BIT_VERSION)
void __attribute__((optimize("O2", "no-gcse")))
NewFastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
asm(
"shr %4\n"
"1:\n"
"movzb (%1),%%eax\n"
"add $0x1,%1\n"
"movzb (%2),%%ebx\n"
"add $0x1,%2\n"
"movq kCoefficientsRgbU(,%%eax,8),%%mm0\n"
"movzb (%0),%%eax\n"
"paddsw kCoefficientsRgbV(,%%ebx,8),%%mm0\n"
"movzb 0x1(%0),%%ebx\n"
"movq kCoefficientsRgbY(,%%eax,8),%%mm1\n"
"add $0x2,%0\n"
"movq kCoefficientsRgbY(,%%ebx,8),%%mm2\n"
"paddsw %%mm0,%%mm1\n"
"paddsw %%mm0,%%mm2\n"
"psraw $0x6,%%mm1\n"
"psraw $0x6,%%mm2\n"
"packuswb %%mm2,%%mm1\n"
"movntq %%mm1,0x0(%3)\n"
"add $0x8,%3\n"
"sub $0x1,%4\n"
"jne 1b\n"
: : "r"(y_buf),"r"(u_buf),"r"(v_buf),"r"(rgb_buf),"r"(width)
: "eax","ebx");
}
#endif
extern void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
extern void MacConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int16 *kCoefficientsRgbY);
__asm__(
" .globl _FastConvertYUVToRGB32Row\n"
"_FastConvertYUVToRGB32Row:\n"
" .globl _MacConvertYUVToRGB32Row\n"
"_MacConvertYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"jmp convertend\n"
"mov 0x38(%esp),%ecx\n"
"jmp Lconvertend\n"
"convertloop:"
"Lconvertloop:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
"add $0x1,%esi\n"
"movq _kCoefficientsRgbU(,%eax,8),%mm0\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw _kCoefficientsRgbV(,%ebx,8),%mm0\n"
"paddsw 4096(%ecx,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
"movq _kCoefficientsRgbY(,%eax,8),%mm1\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"add $0x2,%edx\n"
"movq _kCoefficientsRgbY(,%ebx,8),%mm2\n"
"movq 0(%ecx,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
......@@ -321,67 +275,76 @@ extern void FastConvertYUVToRGB32Row(const uint8* y_buf,
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"convertend:"
"sub $0x2,%ecx\n"
"jns convertloop\n"
"Lconvertend:"
"sub $0x2,0x34(%esp)\n"
"jns Lconvertloop\n"
"and $0x1,%ecx\n"
"je convertdone\n"
"and $0x1,0x34(%esp)\n"
"je Lconvertdone\n"
"movzbl (%edi),%eax\n"
"movq _kCoefficientsRgbU(,%eax,8),%mm0\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"movzbl (%esi),%eax\n"
"paddsw _kCoefficientsRgbV(,%eax,8),%mm0\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"movq _kCoefficientsRgbY(,%eax,8),%mm1\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"convertdone:"
"Lconvertdone:\n"
"popa\n"
"ret\n"
);
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
&kCoefficientsRgbY[0][0]);
}
extern void ScaleYUVToRGB32Row(const uint8* y_buf,
extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx);
int scaled_dx,
int16 *kCoefficientsRgbY);
__asm__(
" .globl _ScaleYUVToRGB32Row\n"
"_ScaleYUVToRGB32Row:\n"
" .globl _MacScaleYUVToRGB32Row\n"
"_MacScaleYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"mov 0x3c(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp scaleend\n"
"jmp Lscaleend\n"
"scaleloop:"
"Lscaleloop:"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbU(,%eax,8),%mm0\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbV(,%eax,8),%mm0\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm2\n"
"movq 0(%ecx,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
......@@ -389,29 +352,29 @@ extern void ScaleYUVToRGB32Row(const uint8* y_buf,
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"scaleend:"
"sub $0x2,%ecx\n"
"jns scaleloop\n"
"Lscaleend:"
"sub $0x2,0x34(%esp)\n"
"jns Lscaleloop\n"
"and $0x1,%ecx\n"
"je scaledone\n"
"and $0x1,0x34(%esp)\n"
"je Lscaledone\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbU(,%eax,8),%mm0\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbV(,%eax,8),%mm0\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm2\n"
"movq 0(%ecx,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
......@@ -419,194 +382,22 @@ extern void ScaleYUVToRGB32Row(const uint8* y_buf,
"packuswb %mm2,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"scaledone:"
"Lscaledone:"
"popa\n"
"ret\n"
);
#else // USE_MMX
// Reference version of YUV converter.
static const int kClipTableSize = 256;
static const int kClipOverflow = 288; // Cb max is 535.
static uint8 kRgbClipTable[kClipOverflow +
kClipTableSize +
kClipOverflow] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0.
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // Unclipped values.
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 288 overflow values
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // clipped to 255.
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
// Clip an rgb channel value to 0..255 range.
// Source is signed fixed point 8.8.
// Table allows for values to underflow or overflow by 128.
// Therefore source range is -128 to 384.
// Output clips to unsigned 0 to 255.
static inline uint32 clip(int32 value) {
DCHECK(((value >> 8) + kClipOverflow) >= 0);
DCHECK(((value >> 8) + kClipOverflow) <
(kClipOverflow + kClipTableSize + kClipOverflow));
return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]);
}
static inline void YuvPixel(uint8 y,
uint8 u,
uint8 v,
uint8* rgb_buf) {
int32 d = static_cast<int32>(u) - 128;
int32 e = static_cast<int32>(v) - 128;
int32 cb = (516 * d + 128);
int32 cg = (- 100 * d - 208 * e + 128);
int32 cr = (409 * e + 128);
int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128);
*reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) |
(clip(C298a + cg) << 8) |
(clip(C298a + cr) << 16) |
(0xff000000);
}
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; x += 2) {
uint8 u = u_buf[x >> 1];
uint8 v = v_buf[x >> 1];
uint8 y0 = y_buf[x];
YuvPixel(y0, u, v, rgb_buf);
if ((x + 1) < width) {
uint8 y1 = y_buf[x + 1];
YuvPixel(y1, u, v, rgb_buf + 4);
}
rgb_buf += 8; // Advance 2 pixels.
}
}
// 28.4 fixed point is used. A shift by 4 isolates the integer.
// A shift by 5 is used to further subsample the chrominence channels.
// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
// for 1/4 pixel accurate interpolation.
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
int scaled_x = 0;
for (int x = 0; x < width; ++x) {
uint8 u = u_buf[scaled_x >> 5];
uint8 v = v_buf[scaled_x >> 5];
uint8 y0 = y_buf[scaled_x >> 4];
YuvPixel(y0, u, v, rgb_buf);
rgb_buf += 4;
scaled_x += scaled_dx;
}
MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
&kCoefficientsRgbY[0][0]);
}
#endif // USE_MMX
} // extern "C"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment