video-scaler.c 40.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
/* GStreamer
 * Copyright (C) <2014> Wim Taymans <wim.taymans@gmail.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif

#include <string.h>
#include <stdio.h>
#include <math.h>

28 29 30 31 32 33 34 35 36
/**
 * SECTION:gstvideoscaler
 * @short_description: Utility object for rescaling video frames
 *
 * #GstVideoScaler is a utility object for rescaling and resampling
 * video frames using various interpolation / sampling methods.
 *
 */

37
#ifndef DISABLE_ORC
38
#include <orc/orcfunctions.h>
39 40 41 42
#else
#define orc_memcpy memcpy
#endif

43
#include "video-orc.h"
44 45
#include "video-scaler.h"

Wim Taymans's avatar
Wim Taymans committed
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
#ifndef GST_DISABLE_GST_DEBUG
#define GST_CAT_DEFAULT ensure_debug_category()
static GstDebugCategory *
ensure_debug_category (void)
{
  static gsize cat_gonce = 0;

  if (g_once_init_enter (&cat_gonce)) {
    gsize cat_done;

    cat_done = (gsize) _gst_debug_category_new ("video-scaler", 0,
        "video-scaler object");

    g_once_init_leave (&cat_gonce, cat_done);
  }

  return (GstDebugCategory *) cat_gonce;
}

#else
#define ensure_debug_category() /* NOOP */
#endif /* GST_DISABLE_GST_DEBUG */

69 70 71 72 73 74
#define SCALE_U8          12
#define SCALE_U8_ROUND    (1 << (SCALE_U8 -1))
#define SCALE_U8_LQ       6
#define SCALE_U8_LQ_ROUND (1 << (SCALE_U8_LQ -1))
#define SCALE_U16         12
#define SCALE_U16_ROUND   (1 << (SCALE_U16 -1))
75

76 77
#define LQ

78
typedef void (*GstVideoScalerHFunc) (GstVideoScaler * scale,
79
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems);
80
typedef void (*GstVideoScalerVFunc) (GstVideoScaler * scale,
81
    gpointer srcs[], gpointer dest, guint dest_offset, guint width,
82
    guint n_elems);
83 84 85

struct _GstVideoScaler
{
86
  GstVideoResamplerMethod method;
87 88
  GstVideoScalerFlags flags;

89
  GstVideoResampler resampler;
90

91 92 93 94
  gboolean merged;
  gint in_y_offset;
  gint out_y_offset;

95 96
  /* cached integer coefficients */
  gint16 *taps_s16;
97 98
  gint16 *taps_s16_4;
  guint32 *offset_n;
99 100
  /* for ORC */
  gint inc;
101

102
  gint tmpwidth;
Wim Taymans's avatar
Wim Taymans committed
103 104
  gpointer tmpline1;
  gpointer tmpline2;
105 106 107
};

static void
108 109
resampler_zip (GstVideoResampler * resampler, const GstVideoResampler * r1,
    const GstVideoResampler * r2)
110
{
111
  guint i, out_size, max_taps, n_phases;
112
  gdouble *taps;
113
  guint32 *offset, *phase;
114 115 116 117 118

  g_return_if_fail (r1->max_taps == r2->max_taps);

  out_size = r1->out_size + r2->out_size;
  max_taps = r1->max_taps;
119
  n_phases = out_size;
120
  offset = g_malloc (sizeof (guint32) * out_size);
121 122
  phase = g_malloc (sizeof (guint32) * n_phases);
  taps = g_malloc (sizeof (gdouble) * max_taps * n_phases);
123 124 125 126

  resampler->in_size = r1->in_size + r2->in_size;
  resampler->out_size = out_size;
  resampler->max_taps = max_taps;
127
  resampler->n_phases = n_phases;
128 129
  resampler->offset = offset;
  resampler->phase = phase;
130
  resampler->n_taps = g_malloc (sizeof (guint32) * out_size);
131 132 133 134
  resampler->taps = taps;

  for (i = 0; i < out_size; i++) {
    guint idx = i / 2;
135
    const GstVideoResampler *r;
136 137 138 139 140 141

    r = (i & 1) ? r2 : r1;

    offset[i] = r->offset[idx] * 2 + (i & 1);
    phase[i] = i;

142
    memcpy (taps + i * max_taps, r->taps + r->phase[idx] * max_taps,
143 144 145 146
        max_taps * sizeof (gdouble));
  }
}

147
static void
148
realloc_tmplines (GstVideoScaler * scale, gint n_elems, gint width)
149 150 151
{
  scale->tmpline1 =
      g_realloc (scale->tmpline1,
152 153 154
      sizeof (gint32) * width * n_elems * scale->resampler.max_taps);
  scale->tmpline2 =
      g_realloc (scale->tmpline2, sizeof (gint32) * width * n_elems);
155 156 157
  scale->tmpwidth = width;
}

Wim Taymans's avatar
Wim Taymans committed
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
static void
scaler_dump (GstVideoScaler * scale)
{
#if 0
  gint i, j, in_size, out_size, max_taps;
  guint32 *offset, *phase;
  gdouble *taps;
  GstVideoResampler *r = &scale->resampler;

  in_size = r->in_size;
  out_size = r->out_size;
  offset = r->offset;
  phase = r->phase;
  max_taps = r->max_taps;
  taps = r->taps;

  g_print ("in %d, out %d, max_taps %d, n_phases %d\n", in_size, out_size,
      max_taps, r->n_phases);

  for (i = 0; i < out_size; i++) {
    g_print ("%d: \t%d \t%d:", i, offset[i], phase[i]);

    for (j = 0; j < max_taps; j++) {
      g_print ("\t%f", taps[i * max_taps + j]);
    }
    g_print ("\n");
  }
#endif
}

188 189
#define INTERLACE_SHIFT 0.5

190
/**
191
 * gst_video_scaler_new: (skip)
192
 * @method: a #GstVideoResamplerMethod
193 194 195 196
 * @flags: #GstVideoScalerFlags
 * @n_taps: number of taps to use
 * @in_size: number of source elements
 * @out_size: number of destination elements
197
 * @options: (allow-none): extra options
198 199 200 201 202 203 204 205 206 207 208
 *
 * Make a new @method video scaler. @in_size source lines/pixels will
 * be scaled to @out_size destination lines/pixels.
 *
 * @n_taps specifies the amount of pixels to use from the source for one output
 * pixel. If n_taps is 0, this function chooses a good value automatically based
 * on the @method and @in_size/@out_size.
 *
 * Returns: a #GstVideoResample
 */
GstVideoScaler *
209
gst_video_scaler_new (GstVideoResamplerMethod method, GstVideoScalerFlags flags,
210
    guint n_taps, guint in_size, guint out_size, GstStructure * options)
211 212 213 214 215 216 217 218 219 220 221 222 223 224
{
  GstVideoScaler *scale;

  g_return_val_if_fail (in_size != 0, NULL);
  g_return_val_if_fail (out_size != 0, NULL);

  scale = g_slice_new0 (GstVideoScaler);

  GST_DEBUG ("%d %u  %u->%u", method, n_taps, in_size, out_size);

  scale->method = method;
  scale->flags = flags;

  if (flags & GST_VIDEO_SCALER_FLAG_INTERLACED) {
225
    GstVideoResampler tresamp, bresamp;
226 227 228
    gdouble shift;

    shift = (INTERLACE_SHIFT * out_size) / in_size;
229

230 231 232
    gst_video_resampler_init (&tresamp, method,
        GST_VIDEO_RESAMPLER_FLAG_HALF_TAPS, (out_size + 1) / 2, n_taps, shift,
        (in_size + 1) / 2, (out_size + 1) / 2, options);
233

234 235
    n_taps = tresamp.max_taps;

236
    gst_video_resampler_init (&bresamp, method, 0, out_size - tresamp.out_size,
237
        n_taps, -shift, in_size - tresamp.in_size,
238
        out_size - tresamp.out_size, options);
239 240

    resampler_zip (&scale->resampler, &tresamp, &bresamp);
241 242
    gst_video_resampler_clear (&tresamp);
    gst_video_resampler_clear (&bresamp);
243
  } else {
244 245 246
    gst_video_resampler_init (&scale->resampler, method,
        GST_VIDEO_RESAMPLER_FLAG_NONE, out_size, n_taps, 0.0, in_size, out_size,
        options);
247
  }
248 249 250 251 252 253

  if (out_size == 1)
    scale->inc = 0;
  else
    scale->inc = ((in_size - 1) << 16) / (out_size - 1) - 1;

Wim Taymans's avatar
Wim Taymans committed
254
  scaler_dump (scale);
255
  GST_DEBUG ("max_taps %d", scale->resampler.max_taps);
Wim Taymans's avatar
Wim Taymans committed
256

257 258 259 260 261 262 263 264 265 266 267 268 269 270
  return scale;
}

/**
 * gst_video_scaler_free:
 * @scale: a #GstVideoScaler
 *
 * Free a previously allocated #GstVideoScaler @scale.
 */
void
gst_video_scaler_free (GstVideoScaler * scale)
{
  g_return_if_fail (scale != NULL);

271
  gst_video_resampler_clear (&scale->resampler);
272
  g_free (scale->taps_s16);
273 274 275 276
  g_free (scale->taps_s16_4);
  g_free (scale->offset_n);
  g_free (scale->tmpline1);
  g_free (scale->tmpline2);
277 278 279
  g_slice_free (GstVideoScaler, scale);
}

280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
/**
 * gst_video_scaler_get_max_taps:
 * @scale: a #GstVideoScaler
 *
 * Get the maximum number of taps for @scale.
 *
 * Returns: the maximum number of taps
 */
guint
gst_video_scaler_get_max_taps (GstVideoScaler * scale)
{
  g_return_val_if_fail (scale != NULL, 0);

  return scale->resampler.max_taps;
}

296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
/**
 * gst_video_scaler_get_coeff:
 * @scale: a #GstVideoScaler
 * @out_offset: an output offset
 * @in_offset: result input offset
 * @n_taps: result n_taps
 *
 * For a given pixel at @out_offset, get the first required input pixel at
 * @in_offset and the @n_taps filter coefficients.
 *
 * Note that for interlaced content, @in_offset needs to be incremented with
 * 2 to get the next input line.
 *
 * Returns: an array of @n_tap gdouble values with filter coefficients.
 */
const gdouble *
gst_video_scaler_get_coeff (GstVideoScaler * scale,
    guint out_offset, guint * in_offset, guint * n_taps)
{
  guint offset, phase;

  g_return_val_if_fail (scale != NULL, NULL);
  g_return_val_if_fail (out_offset < scale->resampler.out_size, NULL);

  offset = scale->resampler.offset[out_offset];
  phase = scale->resampler.phase[out_offset];

  if (in_offset)
    *in_offset = offset;
  if (n_taps) {
    *n_taps = scale->resampler.max_taps;
    if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
      *n_taps *= 2;
  }
  return scale->resampler.taps + phase * scale->resampler.max_taps;
}

static gboolean
resampler_convert_coeff (const gdouble * src,
    gpointer dest, guint n, guint bits, guint precision)
{
  gdouble multiplier;
  gint i, j;
  gdouble offset, l_offset, h_offset;
  gboolean exact = FALSE;

  multiplier = (1 << precision);

  /* Round to integer, but with an adjustable bias that we use to
   * eliminate the DC error. */
  l_offset = 0.0;
  h_offset = 1.0;
  offset = 0.5;

  for (i = 0; i < 64; i++) {
    gint sum = 0;

    for (j = 0; j < n; j++) {
      gint16 tap = floor (offset + src[j] * multiplier);

      ((gint16 *) dest)[j] = tap;

      sum += tap;
    }
    if (sum == (1 << precision)) {
      exact = TRUE;
      break;
    }

    if (l_offset == h_offset)
      break;

    if (sum < (1 << precision)) {
      if (offset > l_offset)
        l_offset = offset;
      offset += (h_offset - l_offset) / 2;
    } else {
      if (offset < h_offset)
        h_offset = offset;
      offset -= (h_offset - l_offset) / 2;
    }
  }

  if (!exact)
    GST_WARNING ("can't find exact taps");

  return exact;
}

static void
386
make_s16_taps (GstVideoScaler * scale, gint n_elems, gint precision)
387
{
388
  gint i, j, max_taps, n_phases, out_size, src_inc;
389
  gint16 *taps_s16, *taps_s16_4;
390
  gdouble *taps;
391
  guint32 *phase, *offset, *offset_n;
392 393 394 395 396 397 398 399

  n_phases = scale->resampler.n_phases;
  max_taps = scale->resampler.max_taps;

  taps = scale->resampler.taps;
  taps_s16 = scale->taps_s16 = g_malloc (sizeof (gint16) * n_phases * max_taps);

  for (i = 0; i < n_phases; i++) {
400
    resampler_convert_coeff (taps, taps_s16, max_taps, 16, precision);
401 402 403 404

    taps += max_taps;
    taps_s16 += max_taps;
  }
405 406 407 408 409 410 411 412 413 414 415 416

  out_size = scale->resampler.out_size;

  taps_s16 = scale->taps_s16;
  phase = scale->resampler.phase;
  offset = scale->resampler.offset;

  taps_s16_4 = scale->taps_s16_4 =
      g_malloc (sizeof (gint16) * out_size * max_taps * 4);
  offset_n = scale->offset_n =
      g_malloc (sizeof (guint32) * out_size * max_taps);

417 418 419 420
  if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
    src_inc = 2;
  else
    src_inc = 1;
421

422 423 424 425
  for (j = 0; j < max_taps; j++) {
    for (i = 0; i < out_size; i++) {
      gint16 tap;

426 427 428 429 430 431 432 433
      if (scale->merged) {
        if ((i & 1) == scale->out_y_offset)
          offset_n[j * out_size + i] = offset[i] + (2 * j);
        else
          offset_n[j * out_size + i] = offset[i] + (4 * j);
      } else {
        offset_n[j * out_size + i] = offset[i] + j * src_inc;
      }
434
      tap = taps_s16[phase[i] * max_taps + j];
435 436 437 438 439 440 441
      taps_s16_4[(j * out_size + i) * n_elems + 0] = tap;
      if (n_elems > 1)
        taps_s16_4[(j * out_size + i) * n_elems + 1] = tap;
      if (n_elems > 2)
        taps_s16_4[(j * out_size + i) * n_elems + 2] = tap;
      if (n_elems > 3)
        taps_s16_4[(j * out_size + i) * n_elems + 3] = tap;
442 443
    }
  }
444 445
}

446 447
#undef ACC_SCALE

448 449
static void
video_scale_h_near_u8 (GstVideoScaler * scale,
450
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
451 452
{
  guint8 *s, *d;
453
  gint i;
454 455 456 457

  d = (guint8 *) dest + dest_offset;
  s = (guint8 *) src;

458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
  {
#ifndef ACC_SCALE
    guint32 *offset = scale->resampler.offset + dest_offset;

    for (i = 0; i < width; i++)
      d[i] = s[offset[i]];
#else
    gint acc = 0;

    for (i = 0; i < width; i++) {
      gint j = (acc + 0x8000) >> 16;
      d[i] = s[j];
      acc += scale->inc;
    }
#endif
  }
474 475
}

476 477 478 479 480 481 482 483 484 485
static void
video_scale_h_near_3u8 (GstVideoScaler * scale,
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
  guint8 *s, *d;
  gint i;

  d = (guint8 *) dest + dest_offset;
  s = (guint8 *) src;

486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
  {
#ifndef ACC_SCALE
    guint32 *offset = scale->resampler.offset + dest_offset;

    for (i = 0; i < width; i++) {
      gint j = offset[i] * 3;

      d[i * 3 + 0] = s[j + 0];
      d[i * 3 + 1] = s[j + 1];
      d[i * 3 + 2] = s[j + 2];
    }
#else
    gint acc = 0;

    for (i = 0; i < width; i++) {
      gint j = ((acc + 0x8000) >> 16) * 3;

      d[i * 3 + 0] = s[j + 0];
      d[i * 3 + 1] = s[j + 1];
      d[i * 3 + 2] = s[j + 2];
      acc += scale->inc;
    }
#endif
509 510 511 512 513 514 515 516 517 518 519 520 521 522
  }
}

static void
video_scale_h_near_u16 (GstVideoScaler * scale,
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
  guint16 *s, *d;
  gint i;

  d = (guint16 *) dest + dest_offset;
  s = (guint16 *) src;

  {
523
#ifndef ACC_SCALE
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
    guint32 *offset = scale->resampler.offset + dest_offset;

    for (i = 0; i < width; i++)
      d[i] = s[offset[i]];
#else
    gint acc = 0;

    for (i = 0; i < width; i++) {
      gint j = (acc + 0x8000) >> 16;
      d[i] = s[j];
      acc += scale->inc;
    }
#endif
  }
}

540
static void
541
video_scale_h_near_u32 (GstVideoScaler * scale,
542
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
543
{
544
  guint32 *s, *d;
545 546 547 548

  d = (guint32 *) dest + dest_offset;
  s = (guint32 *) src;

549 550
#if 0
  /* ORC is slower on this */
551
  video_orc_resample_h_near_u32_lq (d, s, 0, scale->inc, width);
552 553
#elif 0
  video_orc_resample_h_near_u32 (d, s, offset, width);
554 555 556
#else
  {
    gint i;
557 558 559
#ifndef ACC_SCALE
    guint32 *offset = scale->resampler.offset + dest_offset;

560 561
    for (i = 0; i < width; i++)
      d[i] = s[offset[i]];
562 563 564 565 566 567 568 569 570
#else
    gint acc = 0;

    for (i = 0; i < width; i++) {
      gint j = (acc + 0x8000) >> 16;
      d[i] = s[j];
      acc += scale->inc;
    }
#endif
571 572
  }
#endif
573 574
}

575
static void
576
video_scale_h_near_u64 (GstVideoScaler * scale,
577
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
578 579 580 581 582 583 584 585 586 587 588 589 590
{
  guint64 *s, *d;
  gint i;
  guint32 *offset;

  d = (guint64 *) dest + dest_offset;
  s = (guint64 *) src;

  offset = scale->resampler.offset + dest_offset;
  for (i = 0; i < width; i++)
    d[i] = s[offset[i]];
}

591
static void
592 593
video_scale_h_2tap_1u8 (GstVideoScaler * scale,
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
594 595 596 597 598 599
{
  guint8 *s, *d;

  d = (guint8 *) dest + dest_offset;
  s = (guint8 *) src;

600
  video_orc_resample_h_2tap_1u8_lq (d, s, 0, scale->inc, width);
601
}
602 603 604

static void
video_scale_h_2tap_4u8 (GstVideoScaler * scale,
605
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
606 607 608 609 610 611
{
  guint32 *s, *d;

  d = (guint32 *) dest + dest_offset;
  s = (guint32 *) src;

612
  video_orc_resample_h_2tap_4u8_lq (d, s, 0, scale->inc, width);
613
}
614 615

static void
616
video_scale_h_ntap_u8 (GstVideoScaler * scale,
617
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
618
{
619
  gint16 *taps;
620 621
  gint i, max_taps, count;
  gpointer d;
622
  guint32 *offset_n;
623 624
  guint8 *pixels;
  gint16 *temp;
625 626

  if (scale->taps_s16 == NULL)
627
#ifdef LQ
628
    make_s16_taps (scale, n_elems, SCALE_U8_LQ);
629
#else
630
    make_s16_taps (scale, n_elems, SCALE_U8);
631
#endif
632 633

  max_taps = scale->resampler.max_taps;
634 635
  offset_n = scale->offset_n;

636
  pixels = (guint8 *) scale->tmpline1;
637

638
  /* prepare the arrays */
639
  count = width * max_taps;
640
  switch (n_elems) {
641 642 643 644 645 646 647 648 649 650
    case 1:
    {
      guint8 *s = (guint8 *) src;

      for (i = 0; i < count; i++)
        pixels[i] = s[offset_n[i]];

      d = (guint8 *) dest + dest_offset;
      break;
    }
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
    case 2:
    {
      guint16 *p16 = (guint16 *) pixels;
      guint16 *s = (guint16 *) src;

      for (i = 0; i < count; i++)
        p16[i] = s[offset_n[i]];

      d = (guint16 *) dest + dest_offset;
      break;
    }
    case 3:
    {
      guint8 *s = (guint8 *) src;

      for (i = 0; i < count; i++) {
667 668 669 670
        gint j = offset_n[i] * 3;
        pixels[i * 3 + 0] = s[j + 0];
        pixels[i * 3 + 1] = s[j + 1];
        pixels[i * 3 + 2] = s[j + 2];
671 672 673 674
      }
      d = (guint8 *) dest + dest_offset * 3;
      break;
    }
675 676 677 678
    case 4:
    {
      guint32 *p32 = (guint32 *) pixels;
      guint32 *s = (guint32 *) src;
679
#if 0
680
      video_orc_resample_h_near_u32 (p32, s, offset_n, count);
681
#else
682 683
      for (i = 0; i < count; i++)
        p32[i] = s[offset_n[i]];
684
#endif
685 686 687 688 689 690 691
      d = (guint32 *) dest + dest_offset;
      break;
    }
    default:
      return;
  }
  temp = (gint16 *) scale->tmpline2;
692
  taps = scale->taps_s16_4;
693
  count = width * n_elems;
694

695
#ifdef LQ
696 697 698
  if (max_taps == 2) {
    video_orc_resample_h_2tap_u8_lq (d, pixels, pixels + count, taps,
        taps + count, count);
699
  } else {
700 701 702
    /* first pixels with first tap to temp */
    if (max_taps >= 3) {
      video_orc_resample_h_multaps3_u8_lq (temp, pixels, pixels + count,
703
          pixels + count * 2, taps, taps + count, taps + count * 2, count);
704
      max_taps -= 3;
705
      pixels += count * 3;
706 707
      taps += count * 3;
    } else {
708 709 710 711 712 713 714 715
      gint first = max_taps % 3;

      video_orc_resample_h_multaps_u8_lq (temp, pixels, taps, count);
      video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels + count, count,
          taps + count, count * 2, count, first - 1);
      max_taps -= first;
      pixels += count * first;
      taps += count * first;
716
    }
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743
    while (max_taps > 3) {
      if (max_taps >= 6) {
        video_orc_resample_h_muladdtaps3_u8_lq (temp, pixels, pixels + count,
            pixels + count * 2, taps, taps + count, taps + count * 2, count);
        max_taps -= 3;
        pixels += count * 3;
        taps += count * 3;
      } else {
        video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels, count,
            taps, count * 2, count, max_taps - 3);
        pixels += count * (max_taps - 3);
        taps += count * (max_taps - 3);
        max_taps = 3;
      }
    }
    if (max_taps == 3) {
      video_orc_resample_h_muladdscaletaps3_u8_lq (d, pixels, pixels + count,
          pixels + count * 2, taps, taps + count, taps + count * 2, temp,
          count);
    } else {
      if (max_taps) {
        /* add other pixels with other taps to t4 */
        video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels, count,
            taps, count * 2, count, max_taps);
      }
      /* scale and write final result */
      video_orc_resample_scaletaps_u8_lq (d, temp, count);
744 745
    }
  }
746 747
#else
  /* first pixels with first tap to t4 */
748
  video_orc_resample_h_multaps_u8 (temp, pixels, taps, count);
749
  /* add other pixels with other taps to t4 */
750
  video_orc_resample_h_muladdtaps_u8 (temp, 0, pixels + count, count,
751 752
      taps + count, count * 2, count, max_taps - 1);
  /* scale and write final result */
753
  video_orc_resample_scaletaps_u8 (d, temp, count);
754
#endif
755 756
}

757
static void
758
video_scale_h_ntap_u16 (GstVideoScaler * scale,
759
    gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
760 761 762
{
  gint16 *taps;
  gint i, max_taps, count;
763
  gpointer d;
764
  guint32 *offset_n;
765
  guint16 *pixels;
766 767 768
  gint32 *temp;

  if (scale->taps_s16 == NULL)
769
    make_s16_taps (scale, n_elems, SCALE_U16);
770 771 772 773

  max_taps = scale->resampler.max_taps;
  offset_n = scale->offset_n;

774
  pixels = (guint16 *) scale->tmpline1;
775 776
  /* prepare the arrays FIXME, we can add this into ORC */
  count = width * max_taps;
777
  switch (n_elems) {
778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
    case 1:
    {
      guint16 *s = (guint16 *) src;

      for (i = 0; i < count; i++)
        pixels[i] = s[offset_n[i]];

      d = (guint16 *) dest + dest_offset;
      break;
    }
    case 4:
    {
      guint64 *p64 = (guint64 *) pixels;
      guint64 *s = (guint64 *) src;
#if 0
      video_orc_resample_h_near_u32 (p32, s, offset_n, count);
#else
      for (i = 0; i < count; i++)
        p64[i] = s[offset_n[i]];
#endif
      d = (guint64 *) dest + dest_offset;
      break;
    }
    default:
      return;
  }
804 805 806

  temp = (gint32 *) scale->tmpline2;
  taps = scale->taps_s16_4;
807
  count = width * n_elems;
808

809 810 811 812 813 814 815 816 817 818 819 820
  if (max_taps == 2) {
    video_orc_resample_h_2tap_u16 (d, pixels, pixels + count, taps,
        taps + count, count);
  } else {
    /* first pixels with first tap to t4 */
    video_orc_resample_h_multaps_u16 (temp, pixels, taps, count);
    /* add other pixels with other taps to t4 */
    video_orc_resample_h_muladdtaps_u16 (temp, 0, pixels + count, count * 2,
        taps + count, count * 2, count, max_taps - 1);
    /* scale and write final result */
    video_orc_resample_scaletaps_u16 (d, temp, count);
  }
821 822 823
}

static void
824 825 826 827 828
video_scale_v_near_u8 (GstVideoScaler * scale,
    gpointer srcs[], gpointer dest, guint dest_offset, guint width,
    guint n_elems)
{
  if (dest != srcs[0])
829
    memcpy (dest, srcs[0], n_elems * width);
830 831 832 833
}

static void
video_scale_v_near_u16 (GstVideoScaler * scale,
834
    gpointer srcs[], gpointer dest, guint dest_offset, guint width,
835
    guint n_elems)
836
{
837
  if (dest != srcs[0])
838
    memcpy (dest, srcs[0], n_elems * 2 * width);
839 840 841
}

static void
842 843
video_scale_v_2tap_u8 (GstVideoScaler * scale,
    gpointer srcs[], gpointer dest, guint dest_offset, guint width,
844
    guint n_elems)
845
{
846
  gint max_taps, src_inc;
847
  guint8 *s1, *s2, *d;
848
  gint16 p1;
849 850 851

  if (scale->taps_s16 == NULL)
#ifdef LQ
852
    make_s16_taps (scale, n_elems, SCALE_U8_LQ + 2);
853
#else
854
    make_s16_taps (scale, n_elems, SCALE_U8);
855 856 857 858
#endif

  max_taps = scale->resampler.max_taps;

859 860 861 862 863
  if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
    src_inc = 2;
  else
    src_inc = 1;

864 865 866
  d = (guint8 *) dest;
  s1 = (guint8 *) srcs[0 * src_inc];
  s2 = (guint8 *) srcs[1 * src_inc];
867 868 869
  p1 = scale->taps_s16[dest_offset * max_taps + 1];

#ifdef LQ
870
  video_orc_resample_v_2tap_u8_lq (d, s1, s2, p1, width * n_elems);
871
#else
872
  video_orc_resample_v_2tap_u8 (d, s1, s2, p1, width * n_elems);
873 874 875
#endif
}

876
static void
877 878
video_scale_v_2tap_u16 (GstVideoScaler * scale,
    gpointer srcs[], gpointer dest, guint dest_offset, guint width,
879
    guint n_elems)
880
{
881
  gint max_taps, src_inc;
882
  guint16 *s1, *s2, *d;
883 884 885
  gint16 p1;

  if (scale->taps_s16 == NULL)
886
    make_s16_taps (scale, n_elems, SCALE_U16);
887

888 889
  max_taps = scale->resampler.max_taps;

890 891 892 893 894
  if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
    src_inc = 2;
  else
    src_inc = 1;

895 896 897
  d = (guint16 *) dest;
  s1 = (guint16 *) srcs[0 * src_inc];
  s2 = (guint16 *) srcs[1 * src_inc];
898 899
  p1 = scale->taps_s16[dest_offset * max_taps + 1];

900
  video_orc_resample_v_2tap_u16 (d, s1, s2, p1, width * n_elems);
901 902
}

903 904 905 906 907 908 909 910 911 912 913 914
#if 0
static void
video_scale_h_4tap_8888 (GstVideoScaler * scale,
    gpointer src, gpointer dest, guint dest_offset, guint width)
{
  gint16 *taps;
  gint i, max_taps, count;
  guint8 *d;
  guint32 *offset_n;
  guint32 *pixels;

  if (scale->taps_s16 == NULL)
915
    make_s16_taps (scale, n_elems, S16_SCALE);
916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936

  max_taps = scale->resampler.max_taps;
  offset_n = scale->offset_n;

  d = (guint8 *) dest + 4 * dest_offset;

  /* prepare the arrays FIXME, we can add this into ORC */
  count = width * max_taps;
  pixels = (guint32 *) scale->tmpline1;
  for (i = 0; i < count; i++)
    pixels[i] = ((guint32 *) src)[offset_n[i]];

  taps = scale->taps_s16_4;
  count = width * 4;

  video_orc_resample_h_4tap_8 (d, pixels, pixels + width, pixels + 2 * width,
      pixels + 3 * width, taps, taps + count, taps + 2 * count,
      taps + 3 * count, count);
}
#endif

937
static void
938 939
video_scale_v_4tap_u8 (GstVideoScaler * scale,
    gpointer srcs[], gpointer dest, guint dest_offset, guint width,
940
    guint n_elems)
941 942
{
  gint max_taps;
943
  guint8 *s1, *s2, *s3, *s4, *d;
944
  gint p1, p2, p3, p4, src_inc;
945 946 947 948
  gint16 *taps;

  if (scale->taps_s16 == NULL)
#ifdef LQ
949
    make_s16_taps (scale, n_elems, SCALE_U8_LQ);
950
#else
951
    make_s16_taps (scale, n_elems, SCALE_U8);
952 953 954 955 956
#endif

  max_taps = scale->resampler.max_taps;
  taps = scale->taps_s16 + dest_offset * max_taps;

957 958 959 960 961
  if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
    src_inc = 2;
  else
    src_inc = 1;

962 963 964 965 966
  d = (guint8 *) dest;
  s1 = (guint8 *) srcs[0 * src_inc];
  s2 = (guint8 *) srcs[1 * src_inc];
  s3 = (guint8 *) srcs[2 * src_inc];
  s4 = (guint8 *) srcs[3 * src_inc];
967 968 969 970 971 972
  p1 = taps[0];
  p2 = taps[1];
  p3 = taps[2];
  p4 = taps[3];

#ifdef LQ
973
  video_orc_resample_v_4tap_u8_lq (d, s1, s2, s3, s4, p1, p2, p3, p4,
974
      width * n_elems);
975
#else
976
  video_orc_resample_v_4tap_u8 (d, s1, s2, s3, s4, p1, p2, p3, p4,
977
      width * n_elems);
978 979 980
#endif
}

981
static void
982 983
video_scale_v_ntap_u8 (GstVideoScaler * scale,
    gpointer srcs[], gpointer dest, guint dest_offset, guint width,
984
    guint n_elems)
985
{
986 987
  gint16 *taps;
  gint i, max_taps, count, src_inc;
988 989
  gpointer d;
  gint16 *temp;
990 991

  if (scale->taps_s16 == NULL)
992
#ifdef LQ
993
    make_s16_taps (scale, n_elems, SCALE_U8_LQ);
994
#else
995
    make_s16_taps (scale, n_elems, SCALE_U8);
996
#endif
997 998

  max_taps = scale->resampler.max_taps;
999 1000
  taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);

1001
  d = (guint32 *) dest;
1002 1003 1004 1005 1006 1007

  if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
    src_inc = 2;
  else
    src_inc = 1;

1008
  temp = (gint16 *) scale->tmpline2;
1009
  count = width * n_elems;
1010

1011
#ifdef LQ
1012
  if (max_taps >= 4) {
1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
    video_orc_resample_v_multaps4_u8_lq (temp, srcs[0], srcs[1 * src_inc],
        srcs[2 * src_inc], srcs[3 * src_inc], taps[0], taps[1], taps[2],
        taps[3], count);
    max_taps -= 4;
    srcs += 4 * src_inc;
    taps += 4;
  } else {
    gint first = (max_taps % 4);

    video_orc_resample_v_multaps_u8_lq (temp, srcs[0], taps[0], count);
    for (i = 1; i < first; i++) {
      video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
          count);
    }
    max_taps -= first;
    srcs += first * src_inc;
    taps += first;
  }
  while (max_taps > 4) {
    if (max_taps >= 8) {
      video_orc_resample_v_muladdtaps4_u8_lq (temp, srcs[0], srcs[1 * src_inc],
          srcs[2 * src_inc], srcs[3 * src_inc], taps[0], taps[1], taps[2],
          taps[3], count);
      max_taps -= 4;
      srcs += 4 * src_inc;
      taps += 4;
    } else {
      for (i = 0; i < max_taps - 4; i++)
        video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
            count);
      srcs += (max_taps - 4) * src_inc;
      taps += (max_taps - 4);
      max_taps = 4;
    }
  }
  if (max_taps == 4) {
    video_orc_resample_v_muladdscaletaps4_u8_lq (d, srcs[0], srcs[1 * src_inc],
        srcs[2 * src_inc], srcs[3 * src_inc], temp, taps[0], taps[1], taps[2],
        taps[3], count);
  } else {
    for (i = 0; i < max_taps; i++)
      video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
          count);
    video_orc_resample_scaletaps_u8_lq (d, temp, count);
1057
  }
1058

1059
#else
1060 1061 1062 1063
  video_orc_resample_v_multaps_u8 (temp, srcs[0], taps[0], count);
  for (i = 1; i < max_taps; i++) {
    video_orc_resample_v_muladdtaps_u8 (temp, srcs[i * src_inc], taps[i],
        count);
1064
  }
1065
  video_orc_resample_scaletaps_u8 (d, temp, count);
1066
#endif
1067 1068
}

1069
static void
1070 1071
video_scale_v_ntap_u16 (GstVideoScaler * scale,
    gpointer srcs[], gpointer dest, guint dest_offset, guint width,
1072
    guint n_elems)
1073 1074 1075
{
  gint16 *taps;
  gint i, max_taps, count, src_inc;
1076
  gpointer d;
1077 1078 1079
  gint32 *temp;

  if (scale->taps_s16 == NULL)
1080
    make_s16_taps (scale, n_elems, SCALE_U16);
1081

1082 1083 1084
  max_taps = scale->resampler.max_taps;
  taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);

1085
  d = (guint16 *) dest;
1086 1087 1088 1089 1090 1091

  if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
    src_inc = 2;
  else
    src_inc = 1;

Wim Taymans's avatar
Wim Taymans committed
1092
  temp = (gint32 *) scale->tmpline2;
1093
  count = width * n_elems;
1094 1095 1096 1097 1098 1099 1100 1101 1102

  video_orc_resample_v_multaps_u16 (temp, srcs[0], taps[0], count);
  for (i = 1; i < max_taps; i++) {
    video_orc_resample_v_muladdtaps_u16 (temp, srcs[i * src_inc], taps[i],
        count);
  }
  video_orc_resample_scaletaps_u16 (d, temp, count);
}

1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
static gint
get_y_offset (GstVideoFormat format)
{
  switch (format) {
    case GST_VIDEO_FORMAT_YUY2:
    case GST_VIDEO_FORMAT_YVYU:
      return 0;
    default:
    case GST_VIDEO_FORMAT_UYVY:
      return 1;
  }
}

/**
1117
 * gst_video_scaler_combine_packed_YUV: (skip)
1118 1119
 * @y_scale: a scaler for the Y component
 * @uv_scale: a scaler for the U and V components
1120 1121
 * @in_format: the input video format
 * @out_format: the output video format
1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141
 *
 * Combine a scaler for Y and UV into one scaler for the packed @format.
 *
 * Returns: a new horizontal videoscaler for @format.
 *
 * Since: 1.6
 */
GstVideoScaler *
gst_video_scaler_combine_packed_YUV (GstVideoScaler * y_scale,
    GstVideoScaler * uv_scale, GstVideoFormat in_format,
    GstVideoFormat out_format)
{
  GstVideoScaler *scale;
  GstVideoResampler *resampler;
  guint i, out_size, max_taps, n_phases;
  gdouble *taps;
  guint32 *offset, *phase;

  g_return_val_if_fail (y_scale != NULL, NULL);
  g_return_val_if_fail (uv_scale != NULL, NULL);
1142 1143
  g_return_val_if_fail (uv_scale->resampler.max_taps ==
      y_scale->resampler.max_taps, NULL);
1144 1145 1146 1147 1148 1149 1150 1151 1152

  scale = g_slice_new0 (GstVideoScaler);

  scale->method = y_scale->method;
  scale->flags = y_scale->flags;
  scale->merged = TRUE;

  resampler = &scale->resampler;

1153
  out_size = GST_ROUND_UP_4 (y_scale->resampler.out_size * 2);
1154
  max_taps = y_scale->resampler.max_taps;
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
  n_phases = out_size;
  offset = g_malloc (sizeof (guint32) * out_size);
  phase = g_malloc (sizeof (guint32) * n_phases);
  taps = g_malloc (sizeof (gdouble) * max_taps * n_phases);

  resampler->in_size = y_scale->resampler.in_size * 2;
  resampler->out_size = out_size;
  resampler->max_taps = max_taps;
  resampler->n_phases = n_phases;
  resampler->offset = offset;
  resampler->phase = phase;
  resampler->n_taps = g_malloc (sizeof (guint32) * out_size);
  resampler->taps = taps;

  scale->in_y_offset = get_y_offset (in_format);
  scale->out_y_offset = get_y_offset (out_format);
1171
  scale->inc = y_scale->inc;
1172 1173

  for (i = 0; i < out_size; i++) {
1174 1175
    gint ic;

1176
    if ((i & 1) == scale->out_y_offset) {
1177
      ic = MIN (i / 2, y_scale->resampler.out_size - 1);
1178
      offset[i] = y_scale->resampler.offset[ic] * 2 + scale->in_y_offset;
1179
      memcpy (taps + i * max_taps, y_scale->resampler.taps +
1180
          y_scale->resampler.phase[ic] * max_taps, max_taps * sizeof (gdouble));
1181
    } else {
1182
      ic = MIN (i / 4, uv_scale->resampler.out_size - 1);
1183
      offset[i] = uv_scale->resampler.offset[ic] * 4 + (i & 3);
1184
      memcpy (taps + i * max_taps, uv_scale->resampler.taps +
1185
          uv_scale->resampler.phase[ic] * max_taps,
1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
          max_taps * sizeof (gdouble));
    }
    phase[i] = i;
  }

  scaler_dump (scale);

  return scale;
}

Wim Taymans's avatar
Wim Taymans committed
1196 1197 1198 1199
static gboolean
get_functions (GstVideoScaler * hscale, GstVideoScaler * vscale,
    GstVideoFormat format,
    GstVideoScalerHFunc * hfunc, GstVideoScalerVFunc * vfunc,
1200
    gint * n_elems, guint * width, gint * bits)
1201
{
1202
  gboolean mono = FALSE;
1203

1204 1205
  switch (format) {
    case GST_VIDEO_FORMAT_GRAY8:
1206
      *bits = 8;
Wim Taymans's avatar
Wim Taymans committed
1207
      *n_elems = 1;
1208
      mono = TRUE;
1209
      break;
1210 1211 1212
    case GST_VIDEO_FORMAT_YUY2:
    case GST_VIDEO_FORMAT_YVYU:
    case GST_VIDEO_FORMAT_UYVY:
1213
      *bits = 8;
Wim Taymans's avatar
Wim Taymans committed
1214
      *n_elems = 1;
1215
      *width = GST_ROUND_UP_4 (*width * 2);
1216
      break;
1217 1218
    case GST_VIDEO_FORMAT_RGB:
    case GST_VIDEO_FORMAT_BGR:
1219
    case GST_VIDEO_FORMAT_v308:
Joan Pau Beltran's avatar
Joan Pau Beltran committed
1220
    case GST_VIDEO_FORMAT_IYU2:
1221
      *bits = 8;
Wim Taymans's avatar
Wim Taymans committed
1222
      *n_elems = 3;
1223
      break;
1224 1225 1226 1227 1228 1229 1230 1231 1232
    case GST_VIDEO_FORMAT_AYUV:
    case GST_VIDEO_FORMAT_RGBx:
    case GST_VIDEO_FORMAT_BGRx:
    case GST_VIDEO_FORMAT_xRGB:
    case GST_VIDEO_FORMAT_xBGR:
    case GST_VIDEO_FORMAT_RGBA:
    case GST_VIDEO_FORMAT_BGRA:
    case GST_VIDEO_FORMAT_ARGB:
    case GST_VIDEO_FORMAT_ABGR:
1233
      *bits = 8;
Wim Taymans's avatar
Wim Taymans committed
1234
      *n_elems = 4;
1235
      break;
1236 1237
    case GST_VIDEO_FORMAT_ARGB64:
    case GST_VIDEO_FORMAT_AYUV64:
1238
      *bits = 16;
Wim Taymans's avatar
Wim Taymans committed
1239 1240 1241 1242
      *n_elems = 4;
      break;
    case GST_VIDEO_FORMAT_GRAY16_LE:
    case GST_VIDEO_FORMAT_GRAY16_BE:
1243
      *bits = 16;
Wim Taymans's avatar
Wim Taymans committed
1244
      *n_elems = 1;
1245
      mono = TRUE;
1246
      break;
1247 1248 1249 1250
    case GST_VIDEO_FORMAT_NV12:
    case GST_VIDEO_FORMAT_NV16:
    case GST_VIDEO_FORMAT_NV21:
    case GST_VIDEO_FORMAT_NV24:
1251
    case GST_VIDEO_FORMAT_NV61:
1252
      *bits = 8;
Wim Taymans's avatar
Wim Taymans committed
1253
      *n_elems = 2;
1254
      break;
1255
    default:
Wim Taymans's avatar
Wim Taymans committed
1256
      return FALSE;
1257
  }
1258
  if (*bits == 8) {
Wim Taymans's avatar
Wim Taymans committed
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
    switch (hscale ? hscale->resampler.max_taps : 0) {
      case 0:
        break;
      case 1:
        if (*n_elems == 1)
          *hfunc = video_scale_h_near_u8;
        else if (*n_elems == 2)
          *hfunc = video_scale_h_near_u16;
        else if (*n_elems == 3)
          *hfunc = video_scale_h_near_3u8;
        else if (*n_elems == 4)
          *hfunc = video_scale_h_near_u32;
        break;
      case 2:
1273
        if (*n_elems == 1 && mono)
Wim Taymans's avatar
Wim Taymans committed
1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299
          *hfunc = video_scale_h_2tap_1u8;
        else if (*n_elems == 4)
          *hfunc = video_scale_h_2tap_4u8;
        else
          *hfunc = video_scale_h_ntap_u8;
        break;
      default:
        *hfunc = video_scale_h_ntap_u8;
        break;
    }
    switch (vscale ? vscale->resampler.max_taps : 0) {
      case 0:
        break;
      case 1:
        *vfunc = video_scale_v_near_u8;
        break;
      case 2:
        *vfunc = video_scale_v_2tap_u8;
        break;
      case 4:
        *vfunc = video_scale_v_4tap_u8;
        break;
      default:
        *vfunc = video_scale_v_ntap_u8;
        break;
    }