gsttypefindfunctions.c 136 KB
Newer Older
1
/* GStreamer
2
 * Copyright (C) 2003 Benjamin Otte <in7y118@public.uni-hamburg.de>
3 4
 * Copyright (C) 2005-2009 Tim-Philipp Müller <tim centricular net>
 * Copyright (C) 2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * gsttypefindfunctions.c: collection of various typefind functions
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

28
#include <glib.h>
29
#include <glib/gprintf.h>
30

31 32 33 34 35 36
/* don't want to add gio xdgmime typefinder if gio was disabled via configure */
#ifdef HAVE_GIO
#include <gio/gio.h>
#define USE_GIO
#endif

37
#include <gst/gst.h>
38

39
#include <stdio.h>
40 41
#include <string.h>
#include <ctype.h>
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
42

43
#include <gst/pbutils/pbutils.h>
44

45 46 47
GST_DEBUG_CATEGORY_STATIC (type_find_debug);
#define GST_CAT_DEFAULT type_find_debug

48 49 50 51 52 53 54 55
/* DataScanCtx: helper for typefind functions that scan through data
 * step-by-step, to avoid doing a peek at each and every offset */

#define DATA_SCAN_CTX_CHUNK_SIZE 4096

typedef struct
{
  guint64 offset;
56
  const guint8 *data;
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
  gint size;
} DataScanCtx;

static inline void
data_scan_ctx_advance (GstTypeFind * tf, DataScanCtx * c, guint bytes_to_skip)
{
  c->offset += bytes_to_skip;
  if (G_LIKELY (c->size > bytes_to_skip)) {
    c->size -= bytes_to_skip;
    c->data += bytes_to_skip;
  } else {
    c->data += c->size;
    c->size = 0;
  }
}

static inline gboolean
data_scan_ctx_ensure_data (GstTypeFind * tf, DataScanCtx * c, gint min_len)
{
76
  const guint8 *data;
77
  guint64 len;
78
  guint chunk_len = MAX (DATA_SCAN_CTX_CHUNK_SIZE, min_len);
79 80 81 82

  if (G_LIKELY (c->size >= min_len))
    return TRUE;

83
  data = gst_type_find_peek (tf, c->offset, chunk_len);
84 85
  if (G_LIKELY (data != NULL)) {
    c->data = data;
86
    c->size = chunk_len;
87 88 89 90 91 92 93 94
    return TRUE;
  }

  /* if there's less than our chunk size, try to get as much as we can, but
   * always at least min_len bytes (we might be typefinding the first buffer
   * of the stream and not have as much data available as we'd like) */
  len = gst_type_find_get_length (tf);
  if (len > 0) {
95
    len = CLAMP (len - c->offset, min_len, chunk_len);
96 97 98 99
  } else {
    len = min_len;
  }

100 101 102
  data = gst_type_find_peek (tf, c->offset, len);
  if (data != NULL) {
    c->data = data;
103 104 105 106 107 108 109
    c->size = len;
    return TRUE;
  }

  return FALSE;
}

110 111 112 113 114 115 116 117 118 119
static inline gboolean
data_scan_ctx_memcmp (GstTypeFind * tf, DataScanCtx * c, guint offset,
    const gchar * data, guint len)
{
  if (!data_scan_ctx_ensure_data (tf, c, offset + len))
    return FALSE;

  return (memcmp (c->data + offset, data, len) == 0);
}

120
/*** text/plain ***/
121
static gboolean xml_check_first_element (GstTypeFind * tf,
122
    const gchar * element, guint elen, gboolean strict);
123
static gboolean sdp_check_header (GstTypeFind * tf);
124

David Schleef's avatar
David Schleef committed
125
static GstStaticCaps utf8_caps = GST_STATIC_CAPS ("text/plain");
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
126

127
#define UTF8_CAPS gst_static_caps_get(&utf8_caps)
128 129 130 131

static gboolean
utf8_type_find_have_valid_utf8_at_offset (GstTypeFind * tf, guint64 offset,
    GstTypeFindProbability * prob)
132 133
{
  guint8 *data;
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
134

135
  /* randomly decided values */
136 137
  guint min_size = 16;          /* minimum size  */
  guint size = 32 * 1024;       /* starting size */
138
  guint probability = 95;       /* starting probability */
139 140
  guint step = 10;              /* how much we reduce probability in each
                                 * iteration */
141

142 143
  while (probability > step && size > min_size) {
    data = gst_type_find_peek (tf, offset, size);
144 145 146 147
    if (data) {
      gchar *end;
      gchar *start = (gchar *) data;

148
      if (g_utf8_validate (start, size, (const gchar **) &end) || (end - start + 4 > size)) {   /* allow last char to be cut off */
149 150
        *prob = probability;
        return TRUE;
151
      }
152 153
      *prob = 0;
      return FALSE;
154 155 156 157
    }
    size /= 2;
    probability -= step;
  }
158 159 160 161 162 163 164 165 166 167 168
  *prob = 0;
  return FALSE;
}

static void
utf8_type_find (GstTypeFind * tf, gpointer unused)
{
  GstTypeFindProbability start_prob, mid_prob;
  guint64 length;

  /* leave xml to the xml typefinders */
169
  if (xml_check_first_element (tf, "", 0, TRUE))
170 171
    return;

172 173 174 175
  /* leave sdp to the sdp typefinders */
  if (sdp_check_header (tf))
    return;

176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
  /* check beginning of stream */
  if (!utf8_type_find_have_valid_utf8_at_offset (tf, 0, &start_prob))
    return;

  GST_LOG ("start is plain text with probability of %u", start_prob);

  /* POSSIBLE is the highest probability we ever return if we can't
   * probe into the middle of the file and don't know its length */

  length = gst_type_find_get_length (tf);
  if (length == 0 || length == (guint64) - 1) {
    gst_type_find_suggest (tf, MIN (start_prob, GST_TYPE_FIND_POSSIBLE),
        UTF8_CAPS);
    return;
  }

  if (length < 64 * 1024) {
    gst_type_find_suggest (tf, start_prob, UTF8_CAPS);
    return;
  }

  /* check middle of stream */
  if (!utf8_type_find_have_valid_utf8_at_offset (tf, length / 2, &mid_prob))
    return;

  GST_LOG ("middle is plain text with probability of %u", mid_prob);
  gst_type_find_suggest (tf, (start_prob + mid_prob) / 2, UTF8_CAPS);
203 204
}

205
/*** text/uri-list ***/
206

David Schleef's avatar
David Schleef committed
207
static GstStaticCaps uri_caps = GST_STATIC_CAPS ("text/uri-list");
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
208

209
#define URI_CAPS (gst_static_caps_get(&uri_caps))
210
#define BUFFER_SIZE 16          /* If the string is < 16 bytes we're screwed */
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
211 212 213 214 215 216 217 218 219 220
#define INC_BUFFER {                                                    \
  pos++;                                                                \
  if (pos == BUFFER_SIZE) {                                             \
    pos = 0;                                                            \
    offset += BUFFER_SIZE;                                              \
    data = gst_type_find_peek (tf, offset, BUFFER_SIZE);                \
    if (data == NULL) return;                                           \
  } else {                                                              \
    data++;                                                             \
  }                                                                     \
221 222
}
static void
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
223
uri_type_find (GstTypeFind * tf, gpointer unused)
224 225 226 227
{
  guint8 *data = gst_type_find_peek (tf, 0, BUFFER_SIZE);
  guint pos = 0;
  guint offset = 0;
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
228

229 230 231 232 233
  if (data) {
    /* Search for # comment lines */
    while (*data == '#') {
      /* Goto end of line */
      while (*data != '\n') {
234
        INC_BUFFER;
235 236 237 238 239 240 241 242 243 244 245
      }

      INC_BUFFER;
    }

    if (!g_ascii_isalpha (*data)) {
      /* Had a non alpha char - can't be uri-list */
      return;
    }

    INC_BUFFER;
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
246

247 248 249 250 251 252 253 254 255 256 257
    while (g_ascii_isalnum (*data)) {
      INC_BUFFER;
    }

    if (*data != ':') {
      /* First non alpha char is not a : */
      return;
    }

    /* Get the next 2 bytes as well */
    data = gst_type_find_peek (tf, offset + pos, 3);
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
258 259 260
    if (data == NULL)
      return;

261 262 263 264 265 266 267 268
    if (data[1] != '/' && data[2] != '/') {
      return;
    }

    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, URI_CAPS);
  }
}

269 270 271

/*** application/xml **********************************************************/

272
#define XML_BUFFER_SIZE 16
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
273 274 275 276 277 278 279 280 281 282
#define XML_INC_BUFFER {                                                \
  pos++;                                                                \
  if (pos == XML_BUFFER_SIZE) {                                         \
    pos = 0;                                                            \
    offset += XML_BUFFER_SIZE;                                          \
    data = gst_type_find_peek (tf, offset, XML_BUFFER_SIZE);            \
    if (data == NULL) return FALSE;                                     \
  } else {                                                              \
    data++;                                                             \
  }                                                                     \
283 284 285
}

static gboolean
286 287
xml_check_first_element (GstTypeFind * tf, const gchar * element, guint elen,
    gboolean strict)
288
{
289 290
  gboolean got_xmldec;
  guint8 *data;
291 292 293
  guint offset = 0;
  guint pos = 0;

294 295 296 297 298
  data = gst_type_find_peek (tf, 0, XML_BUFFER_SIZE);
  if (!data)
    return FALSE;

  /* look for the XMLDec
299 300
   * see XML spec 2.8, Prolog and Document Type Declaration
   * http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */
301 302 303
  got_xmldec = (memcmp (data, "<?xml", 5) == 0);

  if (strict && !got_xmldec)
304 305
    return FALSE;

306 307 308 309 310
  /* skip XMLDec in any case if we've got one */
  if (got_xmldec) {
    pos += 5;
    data += 5;
  }
311

312 313 314 315
  /* look for the first element, it has to be the requested element. Bail
   * out if it is not within the first 4kB. */
  while (data && (offset + pos) < 4096) {
    while (*data != '<' && (offset + pos) < 4096) {
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
      XML_INC_BUFFER;
    }

    XML_INC_BUFFER;
    if (!g_ascii_isalpha (*data)) {
      /* if not alphabetic, it's a PI or an element / attribute declaration
       * like <?xxx or <!xxx */
      XML_INC_BUFFER;
      continue;
    }

    /* the first normal element, check if it's the one asked for */
    data = gst_type_find_peek (tf, offset + pos, elen + 1);
    return (data && element && strncmp ((char *) data, element, elen) == 0);
  }

  return FALSE;
}

static GstStaticCaps generic_xml_caps = GST_STATIC_CAPS ("application/xml");

#define GENERIC_XML_CAPS (gst_static_caps_get(&generic_xml_caps))
static void
xml_type_find (GstTypeFind * tf, gpointer unused)
{
341
  if (xml_check_first_element (tf, "", 0, TRUE)) {
342 343 344 345
    gst_type_find_suggest (tf, GST_TYPE_FIND_MINIMUM, GENERIC_XML_CAPS);
  }
}

346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
/*** application/sdp *********************************************************/

static GstStaticCaps sdp_caps = GST_STATIC_CAPS ("application/sdp");

#define SDP_CAPS (gst_static_caps_get(&sdp_caps))
static gboolean
sdp_check_header (GstTypeFind * tf)
{
  guint8 *data;

  data = gst_type_find_peek (tf, 0, 5);
  if (!data)
    return FALSE;

  /* sdp must start with v=0[\r]\n */
  if (memcmp (data, "v=0", 3))
    return FALSE;

364 365 366 367
  if (data[3] == '\r' && data[4] == '\n')
    return TRUE;
  if (data[3] == '\n')
    return TRUE;
368

369
  return FALSE;
370 371 372 373 374 375 376 377 378
}

static void
sdp_type_find (GstTypeFind * tf, gpointer unused)
{
  if (sdp_check_header (tf))
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SDP_CAPS);
}

379 380 381 382 383 384 385 386
/*** application/smil *********************************************************/

static GstStaticCaps smil_caps = GST_STATIC_CAPS ("application/smil");

#define SMIL_CAPS (gst_static_caps_get(&smil_caps))
static void
smil_type_find (GstTypeFind * tf, gpointer unused)
{
387
  if (xml_check_first_element (tf, "smil", 4, FALSE)) {
388 389 390 391
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SMIL_CAPS);
  }
}

392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
/*** text/html ***/

static GstStaticCaps html_caps = GST_STATIC_CAPS ("text/html");

#define HTML_CAPS gst_static_caps_get (&html_caps)

static void
html_type_find (GstTypeFind * tf, gpointer unused)
{
  gchar *d, *data;

  data = (gchar *) gst_type_find_peek (tf, 0, 16);
  if (!data)
    return;

  if (!g_ascii_strncasecmp (data, "<!DOCTYPE HTML", 14)) {
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
  } else if (xml_check_first_element (tf, "html", 4, FALSE)) {
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
  } else if ((d = memchr (data, '<', 16))) {
    data = (gchar *) gst_type_find_peek (tf, d - data, 6);
    if (data && g_ascii_strncasecmp (data, "<html>", 6) == 0) {
      gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
    }
  }
}

419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
/*** audio/midi ***/

static GstStaticCaps mid_caps = GST_STATIC_CAPS ("audio/midi");

#define MID_CAPS gst_static_caps_get(&mid_caps)
static void
mid_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data = gst_type_find_peek (tf, 0, 4);

  /* http://jedi.ks.uiuc.edu/~johns/links/music/midifile.html */
  if (data && data[0] == 'M' && data[1] == 'T' && data[2] == 'h'
      && data[3] == 'd')
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MID_CAPS);
}
434

435 436 437 438 439 440 441 442 443 444
/*** audio/mobile-xmf ***/

static GstStaticCaps mxmf_caps = GST_STATIC_CAPS ("audio/mobile-xmf");

#define MXMF_CAPS gst_static_caps_get(&mxmf_caps)
static void
mxmf_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data = NULL;

445
  /* Search FileId "XMF_" 4 bytes */
446 447
  data = gst_type_find_peek (tf, 0, 4);
  if (data && data[0] == 'X' && data[1] == 'M' && data[2] == 'F'
448 449 450 451 452 453 454 455 456 457 458 459
      && data[3] == '_') {
    /* Search Format version "2.00" 4 bytes */
    data = gst_type_find_peek (tf, 4, 4);
    if (data && data[0] == '2' && data[1] == '.' && data[2] == '0'
        && data[3] == '0') {
      /* Search TypeId 2     1 byte */
      data = gst_type_find_peek (tf, 11, 1);
      if (data && data[0] == 2) {
        gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MXMF_CAPS);
      }
    }
  }
460 461
}

462

463
/*** video/x-fli ***/
464

David Schleef's avatar
David Schleef committed
465
static GstStaticCaps flx_caps = GST_STATIC_CAPS ("video/x-fli");
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
466

467
#define FLX_CAPS gst_static_caps_get(&flx_caps)
468
static void
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
469
flx_type_find (GstTypeFind * tf, gpointer unused)
470
{
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
471
  guint8 *data = gst_type_find_peek (tf, 0, 134);
472 473 474 475

  if (data) {
    /* check magic and the frame type of the first frame */
    if ((data[4] == 0x11 || data[4] == 0x12 ||
476 477 478
            data[4] == 0x30 || data[4] == 0x44) &&
        data[5] == 0xaf &&
        ((data[132] == 0x00 || data[132] == 0xfa) && data[133] == 0xf1)) {
479 480 481 482 483 484 485 486
      gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLX_CAPS);
    }
    return;
  }
  data = gst_type_find_peek (tf, 0, 6);
  if (data) {
    /* check magic only */
    if ((data[4] == 0x11 || data[4] == 0x12 ||
487
            data[4] == 0x30 || data[4] == 0x44) && data[5] == 0xaf) {
488 489 490 491 492 493
      gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, FLX_CAPS);
    }
    return;
  }
}

494
/*** application/x-id3 ***/
495

David Schleef's avatar
David Schleef committed
496
static GstStaticCaps id3_caps = GST_STATIC_CAPS ("application/x-id3");
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
497

498
#define ID3_CAPS gst_static_caps_get(&id3_caps)
499
static void
500
id3v2_type_find (GstTypeFind * tf, gpointer unused)
501
{
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
502 503
  guint8 *data = gst_type_find_peek (tf, 0, 10);

504 505 506 507 508
  if (data && memcmp (data, "ID3", 3) == 0 &&
      data[3] != 0xFF && data[4] != 0xFF &&
      (data[6] & 0x80) == 0 && (data[7] & 0x80) == 0 &&
      (data[8] & 0x80) == 0 && (data[9] & 0x80) == 0) {
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, ID3_CAPS);
509
  }
510 511 512 513 514 515 516
}

static void
id3v1_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data = gst_type_find_peek (tf, -128, 3);

517
  if (data && memcmp (data, "TAG", 3) == 0) {
518
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, ID3_CAPS);
519 520 521
  }
}

522
/*** application/x-ape ***/
523 524 525 526 527 528 529 530 531 532 533 534

static GstStaticCaps apetag_caps = GST_STATIC_CAPS ("application/x-apetag");

#define APETAG_CAPS gst_static_caps_get(&apetag_caps)
static void
apetag_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data;

  /* APEv1/2 at start of file */
  data = gst_type_find_peek (tf, 0, 8);
  if (data && !memcmp (data, "APETAGEX", 8)) {
535
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, APETAG_CAPS);
536 537 538 539 540 541
    return;
  }

  /* APEv1/2 at end of file */
  data = gst_type_find_peek (tf, -32, 8);
  if (data && !memcmp (data, "APETAGEX", 8)) {
542
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, APETAG_CAPS);
543 544 545 546
    return;
  }
}

547
/*** audio/x-ttafile ***/
548

549
static GstStaticCaps tta_caps = GST_STATIC_CAPS ("audio/x-ttafile");
550 551 552 553 554 555 556 557 558 559 560 561 562 563 564

#define TTA_CAPS gst_static_caps_get(&tta_caps)
static void
tta_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data = gst_type_find_peek (tf, 0, 3);

  if (data) {
    if (memcmp (data, "TTA", 3) == 0) {
      gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TTA_CAPS);
      return;
    }
  }
}

565 566 567 568 569 570 571 572
/*** audio/x-flac ***/
static GstStaticCaps flac_caps = GST_STATIC_CAPS ("audio/x-flac");

#define FLAC_CAPS (gst_static_caps_get(&flac_caps))

static void
flac_type_find (GstTypeFind * tf, gpointer unused)
{
573
  DataScanCtx c = { 0, NULL, 0 };
574

575
  if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
576 577
    return;

578 579
  /* standard flac (also old/broken flac-in-ogg with an initial 4-byte marker
   * packet and without the usual packet framing) */
580 581 582 583 584
  if (memcmp (c.data, "fLaC", 4) == 0) {
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLAC_CAPS);
    return;
  }

585 586 587
  if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 6)))
    return;

588 589 590 591
  /* flac-in-ogg, see http://flac.sourceforge.net/ogg_mapping.html */
  if (memcmp (c.data, "\177FLAC\001", 6) == 0) {
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLAC_CAPS);
    return;
592
  }
593

594 595 596
/* disabled because it happily typefinds /dev/urandom as audio/x-flac, and
 * because I yet have to see header-less flac in the wild */
#if 0
597
  /* flac without headers (subset format) */
598 599
  /* 64K should be enough */
  while (c.offset < (64 * 1024)) {
600
    if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
601 602
      break;

603 604 605
    /* look for frame header,
     * http://flac.sourceforge.net/format.html#frame_header
     */
606
    if (c.data[0] == 0xff && (c.data[1] >> 2) == 0x3e) {
607
      /* bit 15 in the header must be 0 */
608
      if (((c.data[1] >> 1) & 0x01) == 0x01)
609
        goto advance;
610 611

      /* blocksize must be != 0x00 */
612
      if ((c.data[2] >> 4) == 0x00)
613
        goto advance;
614 615

      /* samplerate must be != 0x0f */
616
      if ((c.data[2] & 0x0f) == 0x0f)
617
        goto advance;
618 619
      /* also 0 is invalid, as it means get the info from the header and we
       * don't have headers if we are here */
620
      if ((c.data[2] & 0x0f) == 0x00)
621
        goto advance;
622 623

      /* channel assignment must be < 11 */
624
      if ((c.data[3] >> 4) >= 11)
625
        goto advance;
626

627
      /* sample size must be != 0x07 and != 0x05 */
628
      if (((c.data[3] >> 1) & 0x07) == 0x07)
629
        goto advance;
630
      if (((c.data[3] >> 1) & 0x07) == 0x05)
631 632 633
        goto advance;
      /* also 0 is invalid, as it means get the info from the header and we
       * don't have headers if we are here */
634
      if (((c.data[3] >> 1) & 0x07) == 0x00)
635
        goto advance;
636 637

      /* next bit must be 0 */
638
      if ((c.data[3] & 0x01) == 0x01)
639
        goto advance;
640

641 642 643
      /* FIXME: shouldn't we include the crc check ? */

      GST_DEBUG ("Found flac without headers at %d", (gint) c.offset);
644
      gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, FLAC_CAPS);
645 646
      return;
    }
647
  advance:
648 649
    data_scan_ctx_advance (tf, &c, 1);
  }
650
#endif
651 652
}

653
/*** audio/mpeg version 2, 4 ***/
654 655 656 657

static GstStaticCaps aac_caps = GST_STATIC_CAPS ("audio/mpeg, "
    "mpegversion = (int) { 2, 4 }, framed = (bool) false");
#define AAC_CAPS (gst_static_caps_get(&aac_caps))
658
#define AAC_AMOUNT (4096)
659 660 661
static void
aac_type_find (GstTypeFind * tf, gpointer unused)
{
662
  /* LUT to convert the AudioObjectType from the ADTS header to a string */
663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680
  DataScanCtx c = { 0, NULL, 0 };

  while (c.offset < AAC_AMOUNT) {
    guint snc, len;

    /* detect adts header or adif header.
     * The ADIF header is 4 bytes, that should be OK. The ADTS header, on
     * the other hand, is 14 bits only, so we require one valid frame with
     * again a valid syncpoint on the next one (28 bits) for certainty. We
     * require 4 kB, which is quite a lot, since frames are generally 200-400
     * bytes.
     */
    if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 6)))
      break;

    snc = GST_READ_UINT16_BE (c.data);
    if (G_UNLIKELY ((snc & 0xfff6) == 0xfff0)) {
      /* ADTS header - find frame length */
681 682
      GST_DEBUG ("Found one ADTS syncpoint at offset 0x%" G_GINT64_MODIFIER
          "x, tracing next...", c.offset);
683 684 685 686 687 688 689
      len = ((c.data[3] & 0x03) << 11) |
          (c.data[4] << 3) | ((c.data[5] & 0xe0) >> 5);

      if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len + 2)) {
        GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len);
        goto next;
      }
690

691
      /* check if there's a second ADTS frame */
692
      snc = GST_READ_UINT16_BE (c.data + len);
693
      if ((snc & 0xfff6) == 0xfff0) {
694
        GstCaps *caps;
695
        guint mpegversion, sample_freq_idx, channel_config, profile_idx, rate;
696
        guint8 audio_config[2];
697

698
        mpegversion = (c.data[1] & 0x08) ? 2 : 4;
699
        profile_idx = c.data[2] >> 6;
700 701 702
        sample_freq_idx = ((c.data[2] & 0x3c) >> 2);
        channel_config = ((c.data[2] & 0x01) << 2) + (c.data[3] >> 6);

703 704
        GST_DEBUG ("Found second ADTS-%d syncpoint at offset 0x%"
            G_GINT64_MODIFIER "x, framelen %u", mpegversion, c.offset, len);
705 706 707 708 709 710 711 712 713

        /* 0xd and 0xe are reserved. 0xf means the sample frequency is directly
         * specified in the header, but that's not allowed for ADTS */
        if (sample_freq_idx > 0xc) {
          GST_DEBUG ("Unexpected sample frequency index %d or wrong sync",
              sample_freq_idx);
          goto next;
        }

714
        rate = gst_codec_utils_aac_get_sample_rate_from_index (sample_freq_idx);
715
        GST_LOG ("ADTS: profile=%u, rate=%u", profile_idx, rate);
716

717 718 719
        /* The ADTS frame header is slightly different from the
         * AudioSpecificConfig defined for the MPEG-4 container, so we just
         * construct enough of it for getting the level here. */
720
        /* ADTS counts profiles from 0 instead of 1 to save bits */
721
        audio_config[0] = (profile_idx + 1) << 3;
722 723 724
        audio_config[0] |= (sample_freq_idx >> 1) & 0x7;
        audio_config[1] = (sample_freq_idx & 0x1) << 7;
        audio_config[1] |= (channel_config & 0xf) << 3;
725

726 727 728
        caps = gst_caps_new_simple ("audio/mpeg",
            "framed", G_TYPE_BOOLEAN, FALSE,
            "mpegversion", G_TYPE_INT, mpegversion,
729
            "stream-type", G_TYPE_STRING, "adts", NULL);
730

731
        gst_codec_utils_aac_caps_set_level_and_profile (caps, audio_config, 2);
732 733 734 735 736 737 738 739 740 741 742

        /* add rate and number of channels if we can */
        if (channel_config != 0 && channel_config <= 7) {
          const guint channels_map[] = { 0, 1, 2, 3, 4, 5, 6, 8 };

          gst_caps_set_simple (caps, "channels", G_TYPE_INT,
              channels_map[channel_config], "rate", G_TYPE_INT, rate, NULL);
        }

        gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, caps);
        gst_caps_unref (caps);
743
        break;
744
      }
745 746 747 748

      GST_DEBUG ("No next frame found... (should have been at 0x%x)", len);
    } else if (!memcmp (c.data, "ADIF", 4)) {
      /* ADIF header */
749
      gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY, "audio/mpeg",
750 751
          "framed", G_TYPE_BOOLEAN, FALSE, "mpegversion", G_TYPE_INT, 4,
          "stream-format", G_TYPE_STRING, "adif", NULL);
752
      break;
753
    }
754 755 756 757

  next:

    data_scan_ctx_advance (tf, &c, 1);
758 759 760
  }
}

761
/*** audio/mpeg version 1 ***/
762

763
/*
764 765 766 767
 * The chance that random data is identified as a valid mp3 header is 63 / 2^18
 * (0.024%) per try. This makes the function for calculating false positives
 *   1 - (1 - ((63 / 2 ^18) ^ GST_MP3_TYPEFIND_MIN_HEADERS)) ^ buffersize)
 * This has the following probabilities of false positives:
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
768 769 770 771 772 773
 * datasize               MIN_HEADERS
 * (bytes)      1       2       3       4
 * 4096         62.6%    0.02%   0%      0%
 * 16384        98%      0.09%   0%      0%
 * 1 MiB       100%      5.88%   0%      0%
 * 1 GiB       100%    100%      1.44%   0%
774 775 776 777 778 779 780 781 782 783 784
 * 1 TiB       100%    100%    100%      0.35%
 * This means that the current choice (3 headers by most of the time 4096 byte
 * buffers is pretty safe for now.
 *
 * The max. size of each frame is 1440 bytes, which means that for N frames to
 * be detected, we need 1440 * GST_MP3_TYPEFIND_MIN_HEADERS + 3 bytes of data.
 * Assuming we step into the stream right after the frame header, this
 * means we need 1440 * (GST_MP3_TYPEFIND_MIN_HEADERS + 1) - 1 + 3 bytes
 * of data (5762) to always detect any mp3.
 */

Stefan Kost's avatar
Stefan Kost committed
785
static const guint mp3types_bitrates[2][3][16] =
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
786 787 788 789 790 791
    { {{0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448,},
    {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,},
    {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320,}},
{{0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256,},
    {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,},
    {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,}},
792 793
};

Stefan Kost's avatar
Stefan Kost committed
794
static const guint mp3types_freqs[3][3] = { {11025, 12000, 8000},
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
795 796 797
{22050, 24000, 16000},
{44100, 48000, 32000}
};
798 799

static inline guint
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
800
mp3_type_frame_length_from_header (guint32 header, guint * put_layer,
801 802
    guint * put_channels, guint * put_bitrate, guint * put_samplerate,
    gboolean * may_be_free_format, gint possible_free_framelen)
803
{
804
  guint bitrate, layer, length, mode, samplerate, version, channels;
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828

  if ((header & 0xffe00000) != 0xffe00000)
    return 0;

  /* we don't need extension, copyright, original or
   * emphasis for the frame length */
  header >>= 6;

  /* mode */
  mode = header & 0x3;
  header >>= 3;

  /* padding */
  length = header & 0x1;
  header >>= 1;

  /* sampling frequency */
  samplerate = header & 0x3;
  if (samplerate == 3)
    return 0;
  header >>= 2;

  /* bitrate index */
  bitrate = header & 0xF;
829 830 831 832 833
  if (bitrate == 0 && possible_free_framelen == -1) {
    GST_LOG ("Possibly a free format mp3 - signalling");
    *may_be_free_format = TRUE;
  }
  if (bitrate == 15 || (bitrate == 0 && possible_free_framelen == -1))
834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
    return 0;

  /* ignore error correction, too */
  header >>= 5;

  /* layer */
  layer = 4 - (header & 0x3);
  if (layer == 4)
    return 0;
  header >>= 2;

  /* version 0=MPEG2.5; 2=MPEG2; 3=MPEG1 */
  version = header & 0x3;
  if (version == 1)
    return 0;

  /* lookup */
  channels = (mode == 3) ? 1 : 2;
  samplerate = mp3types_freqs[version > 0 ? version - 1 : 0][samplerate];
853 854 855 856 857 858 859 860 861 862
  if (bitrate == 0) {
    if (layer == 1) {
      length *= 4;
      length += possible_free_framelen;
      bitrate = length * samplerate / 48000;
    } else {
      length += possible_free_framelen;
      bitrate = length * samplerate /
          ((layer == 3 && version != 3) ? 72000 : 144000);
    }
863
  } else {
864 865 866 867 868 869 870 871
    /* calculating */
    bitrate = mp3types_bitrates[version == 3 ? 0 : 1][layer - 1][bitrate];
    if (layer == 1) {
      length = ((12000 * bitrate / samplerate) + length) * 4;
    } else {
      length += ((layer == 3
              && version != 3) ? 72000 : 144000) * bitrate / samplerate;
    }
872
  }
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
873

874
  GST_LOG ("mp3typefind: calculated mp3 frame length of %u bytes", length);
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
875 876 877 878
  GST_LOG
      ("mp3typefind: samplerate = %u - bitrate = %u - layer = %u - version = %u"
      " - channels = %u", samplerate, bitrate, layer, version, channels);

879 880 881 882 883 884 885 886 887 888 889 890 891
  if (put_layer)
    *put_layer = layer;
  if (put_channels)
    *put_channels = channels;
  if (put_bitrate)
    *put_bitrate = bitrate;
  if (put_samplerate)
    *put_samplerate = samplerate;

  return length;
}


David Schleef's avatar
David Schleef committed
892 893
static GstStaticCaps mp3_caps = GST_STATIC_CAPS ("audio/mpeg, "
    "mpegversion = (int) 1, layer = (int) [ 1, 3 ]");
894
#define MP3_CAPS (gst_static_caps_get(&mp3_caps))
895 896 897 898
/*
 * random values for typefinding
 * if no more data is available, we will return a probability of
 * (found_headers/TRY_HEADERS) * (MAXIMUM * (TRY_SYNC - bytes_skipped)
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
899
 *        / TRY_SYNC)
900
 * if found_headers >= MIN_HEADERS
901
 */
902 903
#define GST_MP3_TYPEFIND_MIN_HEADERS (2)
#define GST_MP3_TYPEFIND_TRY_HEADERS (5)
904
#define GST_MP3_TYPEFIND_TRY_SYNC (GST_TYPE_FIND_MAXIMUM * 100) /* 10kB */
905
#define GST_MP3_TYPEFIND_SYNC_SIZE (2048)
906
#define GST_MP3_WRONG_HEADER (10)
907 908

static void
909 910
mp3_type_find_at_offset (GstTypeFind * tf, guint64 start_off,
    guint * found_layer, GstTypeFindProbability * found_prob)
911 912
{
  guint8 *data = NULL;
913
  guint8 *data_end = NULL;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
914 915
  guint size;
  guint64 skipped;
916 917
  gint last_free_offset = -1;
  gint last_free_framelen = -1;
918
  gboolean headerstart = TRUE;
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933

  *found_layer = 0;
  *found_prob = 0;

  size = 0;
  skipped = 0;
  while (skipped < GST_MP3_TYPEFIND_TRY_SYNC) {
    if (size <= 0) {
      size = GST_MP3_TYPEFIND_SYNC_SIZE * 2;
      do {
        size /= 2;
        data = gst_type_find_peek (tf, skipped + start_off, size);
      } while (size > 10 && !data);
      if (!data)
        break;
934
      data_end = data + size;
935 936 937 938 939 940 941 942 943 944 945 946 947 948
    }
    if (*data == 0xFF) {
      guint8 *head_data = NULL;
      guint layer = 0, bitrate, samplerate, channels;
      guint found = 0;          /* number of valid headers found */
      guint64 offset = skipped;

      while (found < GST_MP3_TYPEFIND_TRY_HEADERS) {
        guint32 head;
        guint length;
        guint prev_layer = 0, prev_bitrate = 0;
        guint prev_channels = 0, prev_samplerate = 0;
        gboolean free = FALSE;

949 950
        if ((gint64) (offset - skipped + 4) >= 0 &&
            data + offset - skipped + 4 < data_end) {
951 952 953 954 955
          head_data = data + offset - skipped;
        } else {
          head_data = gst_type_find_peek (tf, offset + start_off, 4);
        }
        if (!head_data)
956
          break;
957 958 959 960 961 962 963 964 965 966 967
        head = GST_READ_UINT32_BE (head_data);
        if (!(length = mp3_type_frame_length_from_header (head, &layer,
                    &channels, &bitrate, &samplerate, &free,
                    last_free_framelen))) {
          if (free) {
            if (last_free_offset == -1)
              last_free_offset = offset;
            else {
              last_free_framelen = offset - last_free_offset;
              offset = last_free_offset;
              continue;
968
            }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
969
          } else {
970
            last_free_framelen = -1;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
971
          }
972

973 974 975 976
          /* Mark the fact that we didn't find a valid header at the beginning */
          if (found == 0)
            headerstart = FALSE;

977 978 979 980 981
          GST_LOG ("%d. header at offset %" G_GUINT64_FORMAT
              " (0x%" G_GINT64_MODIFIER "x) was not an mp3 header "
              "(possibly-free: %s)", found + 1, start_off + offset,
              start_off + offset, free ? "yes" : "no");
          break;
982
        }
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003
        if ((prev_layer && prev_layer != layer) ||
            /* (prev_bitrate && prev_bitrate != bitrate) || <-- VBR */
            (prev_samplerate && prev_samplerate != samplerate) ||
            (prev_channels && prev_channels != channels)) {
          /* this means an invalid property, or a change, which might mean
           * that this is not a mp3 but just a random bytestream. It could
           * be a freaking funky encoded mp3 though. We'll just not count
           * this header*/
          prev_layer = layer;
          prev_bitrate = bitrate;
          prev_channels = channels;
          prev_samplerate = samplerate;
        } else {
          found++;
          GST_LOG ("found %d. header at offset %" G_GUINT64_FORMAT " (0x%"
              G_GINT64_MODIFIER "X)", found, start_off + offset,
              start_off + offset);
        }
        offset += length;
      }
      g_assert (found <= GST_MP3_TYPEFIND_TRY_HEADERS);
1004 1005 1006 1007
      if (head_data == NULL &&
          gst_type_find_peek (tf, offset + start_off - 1, 1) == NULL)
        /* Incomplete last frame - don't count it. */
        found--;
1008 1009 1010 1011 1012 1013 1014
      if (found == GST_MP3_TYPEFIND_TRY_HEADERS ||
          (found >= GST_MP3_TYPEFIND_MIN_HEADERS && head_data == NULL)) {
        /* we can make a valid guess */
        guint probability = found * GST_TYPE_FIND_MAXIMUM *
            (GST_MP3_TYPEFIND_TRY_SYNC - skipped) /
            GST_MP3_TYPEFIND_TRY_HEADERS / GST_MP3_TYPEFIND_TRY_SYNC;

1015
        if (!headerstart
1016
            && probability > (GST_TYPE_FIND_MINIMUM + GST_MP3_WRONG_HEADER))
1017
          probability -= GST_MP3_WRONG_HEADER;
1018 1019 1020 1021 1022 1023
        if (probability < GST_TYPE_FIND_MINIMUM)
          probability = GST_TYPE_FIND_MINIMUM;
        if (start_off > 0)
          probability /= 2;

        GST_INFO
1024 1025 1026
            ("audio/mpeg calculated %u  =  %u  *  %u / %u  *  (%u - %"
            G_GUINT64_FORMAT ") / %u", probability, GST_TYPE_FIND_MAXIMUM,
            found, GST_MP3_TYPEFIND_TRY_HEADERS, GST_MP3_TYPEFIND_TRY_SYNC,
1027
            (guint64) skipped, GST_MP3_TYPEFIND_TRY_SYNC);
1028 1029
        /* make sure we're not id3 tagged */
        head_data = gst_type_find_peek (tf, -128, 3);
1030
        if (head_data && (memcmp (head_data, "TAG", 3) == 0)) {
1031
          probability = 0;
1032
        }