gsttypefindfunctions.c 88.9 KB
Newer Older
1
/* GStreamer
2
 * Copyright (C) 2003 Benjamin Otte <in7y118@public.uni-hamburg.de>
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 *
 * gsttypefindfunctions.c: collection of various typefind functions
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <glib/gstrfuncs.h>

#include <gst/gsttypefind.h>
#include <gst/gstelement.h>
#include <gst/gstversion.h>
#include <gst/gstinfo.h>
32
#include <gst/gstutils.h>
33
34
35

#include <string.h>
#include <ctype.h>
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
36

37
38
39
GST_DEBUG_CATEGORY_STATIC (type_find_debug);
#define GST_CAT_DEFAULT type_find_debug

40
/*** text/plain ***/
41
static gboolean xml_check_first_element (GstTypeFind * tf,
42
    const gchar * element, guint elen, gboolean strict);
43

44

David Schleef's avatar
David Schleef committed
45
static GstStaticCaps utf8_caps = GST_STATIC_CAPS ("text/plain");
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
46

47
#define UTF8_CAPS gst_static_caps_get(&utf8_caps)
48
49
50
51

static gboolean
utf8_type_find_have_valid_utf8_at_offset (GstTypeFind * tf, guint64 offset,
    GstTypeFindProbability * prob)
52
53
{
  guint8 *data;
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
54

55
  /* randomly decided values */
56
57
  guint min_size = 16;          /* minimum size  */
  guint size = 32 * 1024;       /* starting size */
58
  guint probability = 95;       /* starting probability */
59
60
  guint step = 10;              /* how much we reduce probability in each
                                 * iteration */
61

62
63
  while (probability > step && size > min_size) {
    data = gst_type_find_peek (tf, offset, size);
64
65
66
67
    if (data) {
      gchar *end;
      gchar *start = (gchar *) data;

68
      if (g_utf8_validate (start, size, (const gchar **) &end) || (end - start + 4 > size)) {   /* allow last char to be cut off */
69
70
        *prob = probability;
        return TRUE;
71
      }
72
73
      *prob = 0;
      return FALSE;
74
75
76
77
    }
    size /= 2;
    probability -= step;
  }
78
79
80
81
82
83
84
85
86
87
88
  *prob = 0;
  return FALSE;
}

static void
utf8_type_find (GstTypeFind * tf, gpointer unused)
{
  GstTypeFindProbability start_prob, mid_prob;
  guint64 length;

  /* leave xml to the xml typefinders */
89
  if (xml_check_first_element (tf, "", 0, TRUE))
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    return;

  /* check beginning of stream */
  if (!utf8_type_find_have_valid_utf8_at_offset (tf, 0, &start_prob))
    return;

  GST_LOG ("start is plain text with probability of %u", start_prob);

  /* POSSIBLE is the highest probability we ever return if we can't
   * probe into the middle of the file and don't know its length */

  length = gst_type_find_get_length (tf);
  if (length == 0 || length == (guint64) - 1) {
    gst_type_find_suggest (tf, MIN (start_prob, GST_TYPE_FIND_POSSIBLE),
        UTF8_CAPS);
    return;
  }

  if (length < 64 * 1024) {
    gst_type_find_suggest (tf, start_prob, UTF8_CAPS);
    return;
  }

  /* check middle of stream */
  if (!utf8_type_find_have_valid_utf8_at_offset (tf, length / 2, &mid_prob))
    return;

  GST_LOG ("middle is plain text with probability of %u", mid_prob);
  gst_type_find_suggest (tf, (start_prob + mid_prob) / 2, UTF8_CAPS);
119
120
}

121
/*** text/uri-list ***/
122

David Schleef's avatar
David Schleef committed
123
static GstStaticCaps uri_caps = GST_STATIC_CAPS ("text/uri-list");
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
124

125
#define URI_CAPS (gst_static_caps_get(&uri_caps))
126
#define BUFFER_SIZE 16          /* If the string is < 16 bytes we're screwed */
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
127
128
129
130
131
132
133
134
135
136
#define INC_BUFFER {                                                    \
  pos++;                                                                \
  if (pos == BUFFER_SIZE) {                                             \
    pos = 0;                                                            \
    offset += BUFFER_SIZE;                                              \
    data = gst_type_find_peek (tf, offset, BUFFER_SIZE);                \
    if (data == NULL) return;                                           \
  } else {                                                              \
    data++;                                                             \
  }                                                                     \
137
138
}
static void
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
139
uri_type_find (GstTypeFind * tf, gpointer unused)
140
141
142
143
{
  guint8 *data = gst_type_find_peek (tf, 0, BUFFER_SIZE);
  guint pos = 0;
  guint offset = 0;
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
144

145
146
147
148
149
  if (data) {
    /* Search for # comment lines */
    while (*data == '#') {
      /* Goto end of line */
      while (*data != '\n') {
150
        INC_BUFFER;
151
152
153
154
155
156
157
158
159
160
161
      }

      INC_BUFFER;
    }

    if (!g_ascii_isalpha (*data)) {
      /* Had a non alpha char - can't be uri-list */
      return;
    }

    INC_BUFFER;
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
162

163
164
165
166
167
168
169
170
171
172
173
    while (g_ascii_isalnum (*data)) {
      INC_BUFFER;
    }

    if (*data != ':') {
      /* First non alpha char is not a : */
      return;
    }

    /* Get the next 2 bytes as well */
    data = gst_type_find_peek (tf, offset + pos, 3);
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
174
175
176
    if (data == NULL)
      return;

177
178
179
180
181
182
183
184
    if (data[1] != '/' && data[2] != '/') {
      return;
    }

    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, URI_CAPS);
  }
}

185
186
187

/*** application/xml **********************************************************/

188
#define XML_BUFFER_SIZE 16
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
189
190
191
192
193
194
195
196
197
198
#define XML_INC_BUFFER {                                                \
  pos++;                                                                \
  if (pos == XML_BUFFER_SIZE) {                                         \
    pos = 0;                                                            \
    offset += XML_BUFFER_SIZE;                                          \
    data = gst_type_find_peek (tf, offset, XML_BUFFER_SIZE);            \
    if (data == NULL) return FALSE;                                     \
  } else {                                                              \
    data++;                                                             \
  }                                                                     \
199
200
201
}

static gboolean
202
203
xml_check_first_element (GstTypeFind * tf, const gchar * element, guint elen,
    gboolean strict)
204
{
205
206
  gboolean got_xmldec;
  guint8 *data;
207
208
209
  guint offset = 0;
  guint pos = 0;

210
211
212
213
214
  data = gst_type_find_peek (tf, 0, XML_BUFFER_SIZE);
  if (!data)
    return FALSE;

  /* look for the XMLDec
215
216
   * see XML spec 2.8, Prolog and Document Type Declaration
   * http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */
217
218
219
  got_xmldec = (memcmp (data, "<?xml", 5) == 0);

  if (strict && !got_xmldec)
220
221
    return FALSE;

222
223
224
225
226
  /* skip XMLDec in any case if we've got one */
  if (got_xmldec) {
    pos += 5;
    data += 5;
  }
227

228
229
230
231
  /* look for the first element, it has to be the requested element. Bail
   * out if it is not within the first 4kB. */
  while (data && (offset + pos) < 4096) {
    while (*data != '<' && (offset + pos) < 4096) {
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
      XML_INC_BUFFER;
    }

    XML_INC_BUFFER;
    if (!g_ascii_isalpha (*data)) {
      /* if not alphabetic, it's a PI or an element / attribute declaration
       * like <?xxx or <!xxx */
      XML_INC_BUFFER;
      continue;
    }

    /* the first normal element, check if it's the one asked for */
    data = gst_type_find_peek (tf, offset + pos, elen + 1);
    return (data && element && strncmp ((char *) data, element, elen) == 0);
  }

  return FALSE;
}

static GstStaticCaps generic_xml_caps = GST_STATIC_CAPS ("application/xml");

#define GENERIC_XML_CAPS (gst_static_caps_get(&generic_xml_caps))
static void
xml_type_find (GstTypeFind * tf, gpointer unused)
{
257
  if (xml_check_first_element (tf, "", 0, TRUE)) {
258
259
260
261
262
263
264
265
266
267
268
269
    gst_type_find_suggest (tf, GST_TYPE_FIND_MINIMUM, GENERIC_XML_CAPS);
  }
}

/*** application/smil *********************************************************/

static GstStaticCaps smil_caps = GST_STATIC_CAPS ("application/smil");

#define SMIL_CAPS (gst_static_caps_get(&smil_caps))
static void
smil_type_find (GstTypeFind * tf, gpointer unused)
{
270
  if (xml_check_first_element (tf, "smil", 4, FALSE)) {
271
272
273
274
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SMIL_CAPS);
  }
}

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
/*** text/html ***/

static GstStaticCaps html_caps = GST_STATIC_CAPS ("text/html");

#define HTML_CAPS gst_static_caps_get (&html_caps)

static void
html_type_find (GstTypeFind * tf, gpointer unused)
{
  gchar *d, *data;

  data = (gchar *) gst_type_find_peek (tf, 0, 16);
  if (!data)
    return;

  if (!g_ascii_strncasecmp (data, "<!DOCTYPE HTML", 14)) {
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
  } else if (xml_check_first_element (tf, "html", 4, FALSE)) {
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
  } else if ((d = memchr (data, '<', 16))) {
    data = (gchar *) gst_type_find_peek (tf, d - data, 6);
    if (data && g_ascii_strncasecmp (data, "<html>", 6) == 0) {
      gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
    }
  }
}

302
/*** video/x-fli ***/
303

David Schleef's avatar
David Schleef committed
304
static GstStaticCaps flx_caps = GST_STATIC_CAPS ("video/x-fli");
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
305

306
#define FLX_CAPS gst_static_caps_get(&flx_caps)
307
static void
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
308
flx_type_find (GstTypeFind * tf, gpointer unused)
309
{
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
310
  guint8 *data = gst_type_find_peek (tf, 0, 134);
311
312
313
314

  if (data) {
    /* check magic and the frame type of the first frame */
    if ((data[4] == 0x11 || data[4] == 0x12 ||
315
316
317
            data[4] == 0x30 || data[4] == 0x44) &&
        data[5] == 0xaf &&
        ((data[132] == 0x00 || data[132] == 0xfa) && data[133] == 0xf1)) {
318
319
320
321
322
323
324
325
      gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLX_CAPS);
    }
    return;
  }
  data = gst_type_find_peek (tf, 0, 6);
  if (data) {
    /* check magic only */
    if ((data[4] == 0x11 || data[4] == 0x12 ||
326
            data[4] == 0x30 || data[4] == 0x44) && data[5] == 0xaf) {
327
328
329
330
331
332
      gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, FLX_CAPS);
    }
    return;
  }
}

333
/*** application/x-id3 ***/
334

David Schleef's avatar
David Schleef committed
335
static GstStaticCaps id3_caps = GST_STATIC_CAPS ("application/x-id3");
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
336

337
#define ID3_CAPS gst_static_caps_get(&id3_caps)
338
static void
339
id3v2_type_find (GstTypeFind * tf, gpointer unused)
340
{
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
341
342
  guint8 *data = gst_type_find_peek (tf, 0, 10);

343
344
345
346
347
  if (data && memcmp (data, "ID3", 3) == 0 &&
      data[3] != 0xFF && data[4] != 0xFF &&
      (data[6] & 0x80) == 0 && (data[7] & 0x80) == 0 &&
      (data[8] & 0x80) == 0 && (data[9] & 0x80) == 0) {
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, ID3_CAPS);
348
  }
349
350
351
352
353
354
355
}

static void
id3v1_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data = gst_type_find_peek (tf, -128, 3);

356
  if (data && memcmp (data, "TAG", 3) == 0) {
357
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, ID3_CAPS);
358
359
360
  }
}

361
/*** application/x-ape ***/
362
363
364
365
366
367
368
369
370
371
372
373

static GstStaticCaps apetag_caps = GST_STATIC_CAPS ("application/x-apetag");

#define APETAG_CAPS gst_static_caps_get(&apetag_caps)
static void
apetag_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data;

  /* APEv1/2 at start of file */
  data = gst_type_find_peek (tf, 0, 8);
  if (data && !memcmp (data, "APETAGEX", 8)) {
374
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, APETAG_CAPS);
375
376
377
378
379
380
    return;
  }

  /* APEv1/2 at end of file */
  data = gst_type_find_peek (tf, -32, 8);
  if (data && !memcmp (data, "APETAGEX", 8)) {
381
    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, APETAG_CAPS);
382
383
384
385
    return;
  }
}

386
/*** audio/x-ttafile ***/
387

388
static GstStaticCaps tta_caps = GST_STATIC_CAPS ("audio/x-ttafile");
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403

#define TTA_CAPS gst_static_caps_get(&tta_caps)
static void
tta_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data = gst_type_find_peek (tf, 0, 3);

  if (data) {
    if (memcmp (data, "TTA", 3) == 0) {
      gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TTA_CAPS);
      return;
    }
  }
}

404
/*** audio/mpeg version 2, 4 ***/
405
406
407
408

static GstStaticCaps aac_caps = GST_STATIC_CAPS ("audio/mpeg, "
    "mpegversion = (int) { 2, 4 }, framed = (bool) false");
#define AAC_CAPS (gst_static_caps_get(&aac_caps))
409
#define AAC_AMOUNT (4096)
410
411
412
static void
aac_type_find (GstTypeFind * tf, gpointer unused)
{
413
414
415
416
417
418
419
420
421
  guint8 *data = gst_type_find_peek (tf, 0, AAC_AMOUNT);
  gint snc;

  /* detect adts header or adif header.
   * The ADIF header is 4 bytes, that should be OK. The ADTS header, on
   * the other hand, is 14 bits only, so we require one valid frame with
   * again a valid syncpoint on the next one (28 bits) for certainty. We
   * require 4 kB, which is quite a lot, since frames are generally 200-400
   * bytes.
422
   */
423
  if (data) {
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
    gint n;

    for (n = 0; n < AAC_AMOUNT - 3; n++) {
      snc = GST_READ_UINT16_BE (&data[n]);
      if ((snc & 0xfff6) == 0xfff0) {
        /* ADTS header - find frame length */
        gint len;

        GST_DEBUG ("Found one ADTS syncpoint at offset 0x%x, tracing next...",
            n);
        if (AAC_AMOUNT - n < 5) {
          GST_DEBUG ("Not enough data to parse ADTS header");
          break;
        }
        len = ((data[n + 3] & 0x03) << 11) |
            (data[n + 4] << 3) | ((data[n + 5] & 0xe0) >> 5);
        if (n + len + 2 >= AAC_AMOUNT) {
          GST_DEBUG ("Next frame is not within reach");
          break;
        } else if (len == 0) {
          continue;
        }
446

447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
        snc = GST_READ_UINT16_BE (&data[n + len]);
        if ((snc & 0xfff6) == 0xfff0) {
          gint mpegversion = (data[n + 1] & 0x08) ? 2 : 4;
          GstCaps *caps = gst_caps_new_simple ("audio/mpeg",
              "framed", G_TYPE_BOOLEAN, FALSE,
              "mpegversion", G_TYPE_INT, mpegversion,
              NULL);

          gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, caps);
          gst_caps_unref (caps);

          GST_DEBUG ("Found ADTS-%d syncpoint at offset 0x%x (framelen %u)",
              mpegversion, n, len);
          break;
        }

        GST_DEBUG ("No next frame found... (should be at 0x%x)", n + len);
      } else if (!memcmp (&data[n], "ADIF", 4)) {
        /* ADIF header */
        GstCaps *caps = gst_caps_new_simple ("audio/mpeg",
            "framed", G_TYPE_BOOLEAN, FALSE,
            "mpegversion", G_TYPE_INT, 4,
            NULL);

        gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, caps);
        gst_caps_unref (caps);
      }
474
    }
475
476
477
  }
}

478
/*** audio/mpeg version 1 ***/
479

480
/*
481
482
483
484
 * The chance that random data is identified as a valid mp3 header is 63 / 2^18
 * (0.024%) per try. This makes the function for calculating false positives
 *   1 - (1 - ((63 / 2 ^18) ^ GST_MP3_TYPEFIND_MIN_HEADERS)) ^ buffersize)
 * This has the following probabilities of false positives:
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
485
486
487
488
489
490
 * datasize               MIN_HEADERS
 * (bytes)      1       2       3       4
 * 4096         62.6%    0.02%   0%      0%
 * 16384        98%      0.09%   0%      0%
 * 1 MiB       100%      5.88%   0%      0%
 * 1 GiB       100%    100%      1.44%   0%
491
492
493
494
495
496
497
498
499
500
501
 * 1 TiB       100%    100%    100%      0.35%
 * This means that the current choice (3 headers by most of the time 4096 byte
 * buffers is pretty safe for now.
 *
 * The max. size of each frame is 1440 bytes, which means that for N frames to
 * be detected, we need 1440 * GST_MP3_TYPEFIND_MIN_HEADERS + 3 bytes of data.
 * Assuming we step into the stream right after the frame header, this
 * means we need 1440 * (GST_MP3_TYPEFIND_MIN_HEADERS + 1) - 1 + 3 bytes
 * of data (5762) to always detect any mp3.
 */

Stefan Kost's avatar
Stefan Kost committed
502
static const guint mp3types_bitrates[2][3][16] =
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
503
504
505
506
507
508
    { {{0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448,},
    {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,},
    {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320,}},
{{0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256,},
    {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,},
    {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,}},
509
510
};

Stefan Kost's avatar
Stefan Kost committed
511
static const guint mp3types_freqs[3][3] = { {11025, 12000, 8000},
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
512
513
514
{22050, 24000, 16000},
{44100, 48000, 32000}
};
515
516

static inline guint
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
517
mp3_type_frame_length_from_header (guint32 header, guint * put_layer,
518
519
    guint * put_channels, guint * put_bitrate, guint * put_samplerate,
    gboolean * may_be_free_format, gint possible_free_framelen)
520
{
521
  guint bitrate, layer, length, mode, samplerate, version, channels;
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545

  if ((header & 0xffe00000) != 0xffe00000)
    return 0;

  /* we don't need extension, copyright, original or
   * emphasis for the frame length */
  header >>= 6;

  /* mode */
  mode = header & 0x3;
  header >>= 3;

  /* padding */
  length = header & 0x1;
  header >>= 1;

  /* sampling frequency */
  samplerate = header & 0x3;
  if (samplerate == 3)
    return 0;
  header >>= 2;

  /* bitrate index */
  bitrate = header & 0xF;
546
547
548
549
550
  if (bitrate == 0 && possible_free_framelen == -1) {
    GST_LOG ("Possibly a free format mp3 - signalling");
    *may_be_free_format = TRUE;
  }
  if (bitrate == 15 || (bitrate == 0 && possible_free_framelen == -1))
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
    return 0;

  /* ignore error correction, too */
  header >>= 5;

  /* layer */
  layer = 4 - (header & 0x3);
  if (layer == 4)
    return 0;
  header >>= 2;

  /* version 0=MPEG2.5; 2=MPEG2; 3=MPEG1 */
  version = header & 0x3;
  if (version == 1)
    return 0;

  /* lookup */
  channels = (mode == 3) ? 1 : 2;
  samplerate = mp3types_freqs[version > 0 ? version - 1 : 0][samplerate];
570
571
572
573
574
575
576
577
578
579
  if (bitrate == 0) {
    if (layer == 1) {
      length *= 4;
      length += possible_free_framelen;
      bitrate = length * samplerate / 48000;
    } else {
      length += possible_free_framelen;
      bitrate = length * samplerate /
          ((layer == 3 && version != 3) ? 72000 : 144000);
    }
580
  } else {
581
582
583
584
585
586
587
588
    /* calculating */
    bitrate = mp3types_bitrates[version == 3 ? 0 : 1][layer - 1][bitrate];
    if (layer == 1) {
      length = ((12000 * bitrate / samplerate) + length) * 4;
    } else {
      length += ((layer == 3
              && version != 3) ? 72000 : 144000) * bitrate / samplerate;
    }
589
  }
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
590

591
  GST_LOG ("mp3typefind: calculated mp3 frame length of %u bytes", length);
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
592
593
594
595
  GST_LOG
      ("mp3typefind: samplerate = %u - bitrate = %u - layer = %u - version = %u"
      " - channels = %u", samplerate, bitrate, layer, version, channels);

596
597
598
599
600
601
602
603
604
605
606
607
608
  if (put_layer)
    *put_layer = layer;
  if (put_channels)
    *put_channels = channels;
  if (put_bitrate)
    *put_bitrate = bitrate;
  if (put_samplerate)
    *put_samplerate = samplerate;

  return length;
}


David Schleef's avatar
David Schleef committed
609
610
static GstStaticCaps mp3_caps = GST_STATIC_CAPS ("audio/mpeg, "
    "mpegversion = (int) 1, layer = (int) [ 1, 3 ]");
611
#define MP3_CAPS (gst_static_caps_get(&mp3_caps))
612
613
614
615
/*
 * random values for typefinding
 * if no more data is available, we will return a probability of
 * (found_headers/TRY_HEADERS) * (MAXIMUM * (TRY_SYNC - bytes_skipped)
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
616
 *        / TRY_SYNC)
617
 * if found_headers >= MIN_HEADERS
618
 */
619
620
#define GST_MP3_TYPEFIND_MIN_HEADERS (2)
#define GST_MP3_TYPEFIND_TRY_HEADERS (5)
621
#define GST_MP3_TYPEFIND_TRY_SYNC (GST_TYPE_FIND_MAXIMUM * 100) /* 10kB */
622
#define GST_MP3_TYPEFIND_SYNC_SIZE (2048)
623
#define GST_MP3_WRONG_HEADER (10)
624
625

static void
626
627
mp3_type_find_at_offset (GstTypeFind * tf, guint64 start_off,
    guint * found_layer, GstTypeFindProbability * found_prob)
628
629
{
  guint8 *data = NULL;
630
  guint8 *data_end = NULL;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
631
632
  guint size;
  guint64 skipped;
633
634
  gint last_free_offset = -1;
  gint last_free_framelen = -1;
635
  gboolean headerstart = TRUE;
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650

  *found_layer = 0;
  *found_prob = 0;

  size = 0;
  skipped = 0;
  while (skipped < GST_MP3_TYPEFIND_TRY_SYNC) {
    if (size <= 0) {
      size = GST_MP3_TYPEFIND_SYNC_SIZE * 2;
      do {
        size /= 2;
        data = gst_type_find_peek (tf, skipped + start_off, size);
      } while (size > 10 && !data);
      if (!data)
        break;
651
      data_end = data + size;
652
653
654
655
656
657
658
659
660
661
662
663
664
665
    }
    if (*data == 0xFF) {
      guint8 *head_data = NULL;
      guint layer = 0, bitrate, samplerate, channels;
      guint found = 0;          /* number of valid headers found */
      guint64 offset = skipped;

      while (found < GST_MP3_TYPEFIND_TRY_HEADERS) {
        guint32 head;
        guint length;
        guint prev_layer = 0, prev_bitrate = 0;
        guint prev_channels = 0, prev_samplerate = 0;
        gboolean free = FALSE;

666
667
        if ((gint64) (offset - skipped + 4) >= 0 &&
            data + offset - skipped + 4 < data_end) {
668
669
670
671
672
          head_data = data + offset - skipped;
        } else {
          head_data = gst_type_find_peek (tf, offset + start_off, 4);
        }
        if (!head_data)
673
          break;
674
675
676
677
678
679
680
681
682
683
684
        head = GST_READ_UINT32_BE (head_data);
        if (!(length = mp3_type_frame_length_from_header (head, &layer,
                    &channels, &bitrate, &samplerate, &free,
                    last_free_framelen))) {
          if (free) {
            if (last_free_offset == -1)
              last_free_offset = offset;
            else {
              last_free_framelen = offset - last_free_offset;
              offset = last_free_offset;
              continue;
685
            }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
686
          } else {
687
            last_free_framelen = -1;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
688
          }
689

690
691
692
693
          /* Mark the fact that we didn't find a valid header at the beginning */
          if (found == 0)
            headerstart = FALSE;

694
695
696
697
698
          GST_LOG ("%d. header at offset %" G_GUINT64_FORMAT
              " (0x%" G_GINT64_MODIFIER "x) was not an mp3 header "
              "(possibly-free: %s)", found + 1, start_off + offset,
              start_off + offset, free ? "yes" : "no");
          break;
699
        }
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
        if ((prev_layer && prev_layer != layer) ||
            /* (prev_bitrate && prev_bitrate != bitrate) || <-- VBR */
            (prev_samplerate && prev_samplerate != samplerate) ||
            (prev_channels && prev_channels != channels)) {
          /* this means an invalid property, or a change, which might mean
           * that this is not a mp3 but just a random bytestream. It could
           * be a freaking funky encoded mp3 though. We'll just not count
           * this header*/
          prev_layer = layer;
          prev_bitrate = bitrate;
          prev_channels = channels;
          prev_samplerate = samplerate;
        } else {
          found++;
          GST_LOG ("found %d. header at offset %" G_GUINT64_FORMAT " (0x%"
              G_GINT64_MODIFIER "X)", found, start_off + offset,
              start_off + offset);
        }
        offset += length;
      }
      g_assert (found <= GST_MP3_TYPEFIND_TRY_HEADERS);
      if (found == GST_MP3_TYPEFIND_TRY_HEADERS ||
          (found >= GST_MP3_TYPEFIND_MIN_HEADERS && head_data == NULL)) {
        /* we can make a valid guess */
        guint probability = found * GST_TYPE_FIND_MAXIMUM *
            (GST_MP3_TYPEFIND_TRY_SYNC - skipped) /
            GST_MP3_TYPEFIND_TRY_HEADERS / GST_MP3_TYPEFIND_TRY_SYNC;

728
729
730
        if (!headerstart
            && ((probability - GST_MP3_WRONG_HEADER) > GST_TYPE_FIND_MINIMUM))
          probability -= GST_MP3_WRONG_HEADER;
731
732
733
734
735
736
        if (probability < GST_TYPE_FIND_MINIMUM)
          probability = GST_TYPE_FIND_MINIMUM;
        if (start_off > 0)
          probability /= 2;

        GST_INFO
737
738
739
            ("audio/mpeg calculated %u  =  %u  *  %u / %u  *  (%u - %"
            G_GUINT64_FORMAT ") / %u", probability, GST_TYPE_FIND_MAXIMUM,
            found, GST_MP3_TYPEFIND_TRY_HEADERS, GST_MP3_TYPEFIND_TRY_SYNC,
740
            (guint64) skipped, GST_MP3_TYPEFIND_TRY_SYNC);
741
742
        /* make sure we're not id3 tagged */
        head_data = gst_type_find_peek (tf, -128, 3);
743
        if (head_data && (memcmp (head_data, "TAG", 3) == 0)) {
744
          probability = 0;
745
        }
746
747
748
749
750
751
        g_assert (probability <= GST_TYPE_FIND_MAXIMUM);

        *found_prob = probability;
        if (probability > 0)
          *found_layer = layer;
        return;
752
753
      }
    }
754
755
756
    data++;
    skipped++;
    size--;
757
  }
758
}
759

760
761
762
763
764
765
766
static void
mp3_type_find (GstTypeFind * tf, gpointer unused)
{
  GstTypeFindProbability prob, mid_prob;
  guint8 *data;
  guint layer, mid_layer;
  guint64 length;
767

768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
  mp3_type_find_at_offset (tf, 0, &layer, &prob);
  length = gst_type_find_get_length (tf);

  if (length == 0 || length == (guint64) - 1) {
    if (prob != 0)
      goto suggest;
    return;
  }

  /* if we're pretty certain already, skip the additional check */
  if (prob >= GST_TYPE_FIND_LIKELY)
    goto suggest;

  mp3_type_find_at_offset (tf, length / 2, &mid_layer, &mid_prob);

  if (mid_prob > 0) {
    if (prob == 0) {
      GST_LOG ("detected audio/mpeg only in the middle (p=%u)", mid_prob);
      layer = mid_layer;
      prob = mid_prob;
      goto suggest;
    }

    if (layer != mid_layer) {
      GST_WARNING ("audio/mpeg layer discrepancy: %u vs. %u", layer, mid_layer);
      return;                   /* FIXME: or should we just go with the one in the middle? */
    }

    /* detected mpeg audio both in middle of the file and at the start */
    prob = (prob + mid_prob) / 2;
    goto suggest;
  }

  /* let's see if there's a valid header right at the start */
802
  data = gst_type_find_peek (tf, 0, 4); /* use min. frame size? */
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
  if (data && mp3_type_frame_length_from_header (GST_READ_UINT32_BE (data),
          &layer, NULL, NULL, NULL, NULL, 0) != 0) {
    if (prob == 0)
      prob = GST_TYPE_FIND_POSSIBLE - 10;
    else
      prob = MAX (GST_TYPE_FIND_POSSIBLE - 10, prob + 10);
  }

  if (prob > 0)
    goto suggest;

  return;

suggest:
  {
818
    GstCaps *caps;
819
820
821
822
823
824
825
826
827

    g_assert (layer > 0);

    caps = gst_caps_make_writable (MP3_CAPS);
    gst_structure_set (gst_caps_get_structure (caps, 0), "layer",
        G_TYPE_INT, layer, NULL);
    gst_type_find_suggest (tf, prob, caps);
    gst_caps_unref (caps);
    return;
828
  }
829
830
}

831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
/*** audio/x-musepack ***/

static GstStaticCaps musepack_caps = GST_STATIC_CAPS ("audio/x-musepack");

#define MUSEPACK_CAPS (gst_static_caps_get(&musepack_caps))
static void
musepack_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data = gst_type_find_peek (tf, 0, 4);

  if (data && memcmp (data, "MP+", 3) == 0) {
    if ((data[3] & 0x7f) == 7) {
      gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MUSEPACK_CAPS);
    } else {
      gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY + 10, MUSEPACK_CAPS);
    }
  }
}

850
/*** audio/x-ac3 ***/
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
static GstStaticCaps ac3_caps = GST_STATIC_CAPS ("audio/x-ac3");

#define AC3_CAPS (gst_static_caps_get(&ac3_caps))

static void
ac3_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data = gst_type_find_peek (tf, 0, 2);

  if (data) {
    /* pretty lame method... */
    if (data[0] == 0x0b && data[1] == 0x77) {
      gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, AC3_CAPS);
      return;
    }
  }
}

869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
/*** wavpack ***/

static GstStaticCaps wavpack_caps =
GST_STATIC_CAPS ("audio/x-wavpack, framed = (boolean) false");

#define WAVPACK_CAPS (gst_static_caps_get(&wavpack_caps))

static GstStaticCaps wavpack_correction_caps =
GST_STATIC_CAPS ("audio/x-wavpack-correction, framed = (boolean) false");

#define WAVPACK_CORRECTION_CAPS (gst_static_caps_get(&wavpack_correction_caps))

static void
wavpack_type_find (GstTypeFind * tf, gpointer unused)
{
884
885
886
  guint64 offset;
  guint32 blocksize;
  guint8 *data;
887

888
  data = gst_type_find_peek (tf, 0, 32);
889
890
891
  if (!data)
    return;

892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
  if (data[0] != 'w' || data[1] != 'v' || data[2] != 'p' || data[3] != 'k')
    return;

  /* Note: wavpack blocks can be fairly large (easily 60-110k), possibly
   * larger than the max. limits imposed by certain typefinding elements
   * like id3demux or apedemux, so typefinding is most likely only going to
   * work in pull-mode */
  blocksize = GST_READ_UINT32_LE (data + 4);
  GST_LOG ("wavpack header, blocksize=0x%04x", blocksize);
  offset = 32;
  while (offset < 32 + blocksize) {
    guint32 sublen;

    /* get chunk header */
    GST_LOG ("peeking at chunk at offset 0x%04x", (guint) offset);
    data = gst_type_find_peek (tf, offset, 4);
    if (data == NULL)
      break;
    sublen = ((guint32) data[1]) << 1;
    if (data[0] & 0x80) {
      sublen |= (((guint32) data[2]) << 9) | (((guint32) data[3]) << 17);
      sublen += 1 + 3;          /* id + length */
    } else {
      sublen += 1 + 1;          /* id + length */
    }
    if (sublen > blocksize - offset + 32) {
Tim-Philipp Müller's avatar
Tim-Philipp Müller committed
918
919
      GST_LOG ("chunk length too big (%u > %" G_GUINT64_FORMAT ")", sublen,
          blocksize - offset);
920
921
922
923
924
925
926
927
928
929
930
931
932
933
      break;
    }
    if ((data[0] & 0x20) == 0) {
      switch (data[0] & 0x0f) {
        case 0xa:              /* ID_WV_BITSTREAM  */
        case 0xc:              /* ID_WVX_BITSTREAM */
          gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, WAVPACK_CAPS);
          return;
        case 0xb:              /* ID_WVC_BITSTREAM */
          gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY,
              WAVPACK_CORRECTION_CAPS);
          return;
        default:
          break;
934
935
      }
    }
936
    offset += sublen;
937
938
939
  }
}

940
/*** multipart/x-mixed-replace mimestream ***/
941

942
943
944
945
946
static GstStaticCaps multipart_caps =
GST_STATIC_CAPS ("multipart/x-mixed-replace");
#define MULTIPART_CAPS gst_static_caps_get(&multipart_caps)

/* multipart/x-mixed replace is: 
947
 *   <maybe some whitespace>--<some ascii chars>[\r]\n
948
949
950
951
952
953
 *   <more ascii chars>[\r]\nContent-type:<more ascii>[\r]\n */
static void
multipart_type_find (GstTypeFind * tf, gpointer unused)
{
  guint8 *data;
  guint8 *x;
954

955
956
957
958
959
960
961
962
963
#define MULTIPART_MAX_BOUNDARY_OFFSET 16
  data = gst_type_find_peek (tf, 0, MULTIPART_MAX_BOUNDARY_OFFSET);
  if (!data)
    return;

  for (x = data;
      x - data < MULTIPART_MAX_BOUNDARY_OFFSET - 2 && g_ascii_isspace (*x);
      x++);
  if (x[0] != '-' || x[1] != '-')
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
    return;

  /* Could be okay, peek what should be enough for a complete header */
#define MULTIPART_MAX_HEADER_SIZE 256
  data = gst_type_find_peek (tf, 0, MULTIPART_MAX_HEADER_SIZE);
  if (!data)
    return;

  for (x = data; x - data < MULTIPART_MAX_HEADER_SIZE - 14; x++) {
    if (!isascii (*x)) {
      return;
    }
    if (*x == '\n' &&
        !g_ascii_strncasecmp ("content-type:", (gchar *) x + 1, 13)) {
      GstCaps *caps = gst_caps_copy (MULTIPART_CAPS);

      gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, caps);
      gst_caps_unref (caps);
      return;
    }
  }
}

/*** video/mpeg systemstream ***/
David Schleef's avatar
David Schleef committed
988
989
static GstStaticCaps mpeg_sys_caps = GST_STATIC_CAPS ("video/mpeg, "
    "systemstream = (boolean) true, mpegversion = (int) [ 1, 2 ]");
990

991
#define MPEG_SYS_CAPS gst_static_caps_get(&mpeg_sys_caps)
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
#define IS_MPEG_HEADER(data)            ((((guint8 *)(data))[0] == 0x00) &&  \
                                         (((guint8 *)(data))[1] == 0x00) &&  \
                                         (((guint8 *)(data))[2] == 0x01))

#define IS_MPEG_PACK_HEADER(data)       (IS_MPEG_HEADER (data) &&            \
                                         (((guint8 *)(data))[3] == 0xBA))

#define IS_MPEG_SYSTEM_HEADER(data)     (IS_MPEG_HEADER (data) &&            \
                                         (((guint8 *)(data))[3] == 0xBB))
#define IS_MPEG_PACKET_HEADER(data)     (IS_MPEG_HEADER (data) &&            \
                                         ((((guint8 *)(data))[3] & 0x80) == 0x80))

#define IS_MPEG_PES_HEADER(data)        (IS_MPEG_HEADER (data) &&            \
                                         ((((guint8 *)(data))[3] == 0xE0) || \
                                          (((guint8 *)(data))[3] == 0xC0) || \
                                          (((guint8 *)(data))[3] == 0xBD)))
1008

1009
static void
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1010
mpeg2_sys_type_find (GstTypeFind * tf, gpointer unused)
1011
1012
{
  guint8 *data = gst_type_find_peek (tf, 0, 5);
1013
  gint mpegversion;
1014

1015
  if (data && IS_MPEG_PACK_HEADER (data)) {
1016
1017
    if ((data[4] & 0xC0) == 0x40) {
      /* type 2 */
1018
1019
      mpegversion = 2;
      goto suggest;
1020
    } else if ((data[4] & 0xF0) == 0x20) {
1021
1022
      mpegversion = 1;
      goto suggest;
1023
    }
1024
1025
  } else if (data && IS_MPEG_PES_HEADER (data)) {
    /* PES stream */
1026
1027
1028
1029
1030
1031
1032
    mpegversion = 2;
    goto suggest;
  }

  return;
suggest:
  {
1033
1034
1035
    GstCaps *caps = gst_caps_copy (MPEG_SYS_CAPS);

    gst_structure_set (gst_caps_get_structure (caps, 0), "mpegversion",
1036
        G_TYPE_INT, mpegversion, NULL);
1037
    gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, caps);
1038
    gst_caps_unref (caps);
1039
1040
  }
};
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1041

1042
/* ATTENTION: ugly return value:
1043
1044
1045
1046
1047
 * 0 -  invalid data
 * 1 - not enough data
 * anything else - size until next package
 */
static guint
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1048
mpeg1_parse_header (GstTypeFind * tf, guint64 offset)
1049
{
1050
  guint8 *data = gst_type_find_peek (tf, offset, 4);
1051
  guint size;
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1052

1053
  if (!data) {
1054
    GST_LOG ("couldn't get MPEG header bytes");
1055
1056
1057
    return 1;
  }

1058
  if (data[0] != 0 || data[1] != 0 || data[2] != 1) {
1059
    GST_LOG ("no sync");
1060
1061
    return 0;
  }
1062
1063
  offset += 4;

1064
1065
  GST_LOG ("sync %02x", data[3]);

1066
  switch (data[3]) {
1067
    case 0xBA:                 /* pack header */
1068
1069
      data = gst_type_find_peek (tf, offset, 8);
      if (!data) {
1070
1071
        GST_LOG ("couldn't get MPEG pack header bytes");
        return 1;
1072
1073
1074
1075
      }
      size = 12;
      /* check marker bits */
      if ((data[0] & 0xF1) != 0x21 ||
1076
1077
          (data[2] & 0x01) != 0x01 ||
          (data[4] & 0x01) != 0x01 ||
1078
1079
          (data[5] & 0x80) != 0x80 || (data[7] & 0x01) != 0x01) {
        GST_LOG ("wrong marker bits");
1080
        return 0;
1081
      }
1082
1083
      break;

1084
    case 0xB9:                 /* ISO end code */
1085
1086
1087
      size = 4;
      break;

1088
    case 0xBB:                 /* system header */
1089
1090
      data = gst_type_find_peek (tf, offset, 2);
      if (!data) {
1091
1092
        GST_LOG ("couldn't get MPEG pack header bytes");
        return 1;
1093
      }
1094
      size = GST_READ_UINT16_BE (data) + 6;
1095
1096
1097
      offset += 2;
      data = gst_type_find_peek (tf, offset, size - 6);
      if (!data) {
1098
1099
        GST_LOG ("couldn't get MPEG pack header bytes");
        return 1;
1100
1101
1102
      }
      /* check marker bits */
      if ((data[0] & 0x80) != 0x80 ||
1103
1104
          (data[2] & 0x01) != 0x01 || (data[4] & 0x20) != 0x20) {
        GST_LOG ("wrong marker bits");
1105
        return 0;
1106
      }
1107
1108
      /* check stream marker bits */
      for (offset = 6; offset < (size - 6); offset += 3) {
1109
1110
        if (data[offset] <= 0xBB || (data[offset + 1] & 0xC0) != 0xC0) {
          GST_LOG ("wrong marker bits");
1111
          return 0;
1112
        }
1113
1114
1115
1116
1117
      }
      break;

    default:
      if (data[3] < 0xB9)
1118
        return 0;
1119
1120
      data = gst_type_find_peek (tf, offset, 2);
      if (!data) {
1121
1122
        GST_LOG ("couldn't get MPEG pack header bytes");
        return 1;
1123
      }
1124
      size = GST_READ_UINT16_BE (data) + 6;
1125
1126
      /* FIXME: we could check PTS/DTS marker bits here... (bit overkill) */
      break;
1127
1128
1129
1130
  }

  return size;
}
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1131

1132
/* calculation of possibility to identify random data as mpeg systemstream:
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1133
1134
 * bits that must match in header detection:            32 (or more)
 * chance that random data is identifed:                1/2^32
1135
 * chance that GST_MPEG_TYPEFIND_TRY_HEADERS headers are identified:
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1136
 *                                      1/2^(32*GST_MPEG_TYPEFIND_TRY_HEADERS)
1137
 * chance that this happens in GST_MPEG_TYPEFIND_TRY_SYNC bytes:
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1138
 *                                      1-(1+1/2^(32*GST_MPEG_TYPEFIND_TRY_HEADERS)^GST_MPEG_TYPEFIND_TRY_SYNC)
1139
 * for current values:
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1140
1141
 *                                      1-(1+1/2^(32*4)^101024)
 *                                    = <some_number>
1142
 */
1143
#define GST_MPEG_TYPEFIND_TRY_HEADERS 4
1144
#define GST_MPEG_TYPEFIND_TRY_SYNC (100 * 1024) /* 100kB */
1145
1146
#define GST_MPEG_TYPEFIND_SYNC_SIZE 2048
static void
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1147
mpeg1_sys_type_find (GstTypeFind * tf, gpointer unused)
1148
{
David Schleef's avatar
David Schleef committed
1149
  guint8 *data = NULL;
1150
1151
  guint size = 0;
  guint64 skipped = 0;
David Schleef's avatar
David Schleef committed
1152
  GstCaps *caps;
1153
1154
1155
1156
1157

  while (skipped < GST_MPEG_TYPEFIND_TRY_SYNC) {
    if (size < 4) {
      data = gst_type_find_peek (tf, skipped, GST_MPEG_TYPEFIND_SYNC_SIZE);
      if (!data)
1158
        break;
1159
1160
      size = GST_MPEG_TYPEFIND_SYNC_SIZE;
    }
1161
    if (IS_MPEG_PACK_HEADER (data)) {
1162
1163
      /* found packet start code */
      guint found = 0;
1164
      guint packet_size = 0;
1165
      guint64 offset = skipped;
1166

1167
      while (found < GST_MPEG_TYPEFIND_TRY_HEADERS) {
1168
1169
1170
1171
1172
        packet_size = mpeg1_parse_header (tf, offset);
        if (packet_size <= 1)
          break;
        offset += packet_size;
        found++;
1173
1174
      }
      g_assert (found <= GST_MPEG_TYPEFIND_TRY_HEADERS);
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1175
      if (found == GST_MPEG_TYPEFIND_TRY_HEADERS || packet_size == 1) {
1176
        GST_LOG ("suggesting mpeg1 system steeam");
1177
1178
        caps = gst_caps_copy (MPEG_SYS_CAPS);
        gst_structure_set (gst_caps_get_structure (caps, 0), "mpegversion",
1179
            G_TYPE_INT, 1, NULL);
1180
        gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM - 1, caps);
1181
        gst_caps_unref (caps);
1182
        return;
1183
1184
1185
1186
1187
1188
1189
1190
      }
    }
    data++;
    skipped++;
    size--;
  }
}

1191
1192
1193
1194
1195
/** video/mpegts Transport Stream **/
static GstStaticCaps mpegts_caps = GST_STATIC_CAPS ("video/mpegts, "
    "systemstream = (boolean) true, packetsize = (int) [ 188, 208 ]");
#define MPEGTS_CAPS gst_static_caps_get(&mpegts_caps)

1196
1197
#define GST_MPEGTS_TYPEFIND_MIN_HEADERS 4
#define GST_MPEGTS_TYPEFIND_MAX_HEADERS 10
1198
1199
#define GST_MPEGTS_MAX_PACKET_SIZE 204
#define GST_MPEGTS_TYPEFIND_SYNC_SIZE \
1200
1201
1202
            (GST_MPEGTS_TYPEFIND_MIN_HEADERS * GST_MPEGTS_MAX_PACKET_SIZE)
#define GST_MPEGTS_TYPEFIND_MAX_SYNC \
            (GST_MPEGTS_TYPEFIND_MAX_HEADERS * GST_MPEGTS_MAX_PACKET_SIZE)
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217

#define MPEGTS_HDR_SIZE 4
#define IS_MPEGTS_HEADER(data) (((data)[0] == 0x47) && \
                                (((data)[1] & 0x80) == 0x00) && \
                                (((data)[3] & 0x10) == 0x10))

/* Helper function to search ahead at intervals of packet_size for mpegts
 * headers */
gint
mpeg_ts_probe_headers (GstTypeFind * tf, guint64 offset, gint packet_size)
{
  /* We always enter this function having found at least one header already */
  gint found = 1;
  guint8 *data = NULL;

1218
  while (found < GST_MPEGTS_TYPEFIND_MAX_HEADERS) {
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
    offset += packet_size;

    data = gst_type_find_peek (tf, offset, MPEGTS_HDR_SIZE);
    if (data == NULL || !IS_MPEGTS_HEADER (data))
      return found;

    found++;
  }

  return found;
}

/* Try and detect at least 4 packets in at most 10 packets worth of
 * data. Need to try several possible packet sizes */
static void
mpeg_ts_type_find (GstTypeFind * tf, gpointer unused)
{
  /* TS packet sizes to test: normal, DVHS packet size and 
   * FEC with 16 or 20 byte codes packet size. */
  const gint pack_sizes[] = { 188, 192, 204, 208 };
  const gint n_pack_sizes = sizeof (pack_sizes) / sizeof (gint);

  guint8 *data = NULL;
  guint size = 0;
  guint64 skipped = 0;

  while (skipped < GST_MPEGTS_TYPEFIND_MAX_SYNC) {
    if (size < MPEGTS_HDR_SIZE) {
      data = gst_type_find_peek (tf, skipped, GST_MPEGTS_TYPEFIND_SYNC_SIZE);
      if (!data)
        break;
      size = GST_MPEGTS_TYPEFIND_SYNC_SIZE;
    }

    /* Have at least MPEGTS_HDR_SIZE bytes at this point */
    if (IS_MPEGTS_HEADER (data)) {
      gint p;

      for (p = 0; p < n_pack_sizes; p++) {
1258
1259
        gint found;

1260
        /* Probe ahead at size pack_sizes[p] */
1261
1262
1263
        found = mpeg_ts_probe_headers (tf, skipped, pack_sizes[p]);
        if (found >= GST_MPEGTS_TYPEFIND_MIN_HEADERS) {
          gint probability;
1264
1265
1266
1267
          GstCaps *caps = gst_caps_copy (MPEGTS_CAPS);

          gst_structure_set (gst_caps_get_structure (caps, 0), "packetsize",
              G_TYPE_INT, pack_sizes[p], NULL);
1268
1269
1270
1271
1272
1273
1274
1275

          /* found at least 4 headers. 10 headers = MAXIMUM probability. 
           * Arbitrarily, I assigned 10% probability for each header we
           * found, 40% -> 100% */

          probability = 10 * MIN (found, 10);

          gst_type_find_suggest (tf, probability, caps);
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
          gst_caps_unref (caps);
          return;
        }
      }
    }
    data++;
    skipped++;
    size--;
  }
}

1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
/*** video/mpeg MPEG-4 elementary video stream ***/

static GstStaticCaps mpeg4_video_caps = GST_STATIC_CAPS ("video/mpeg, "
    "systemstream = (boolean) false, mpegversion = 4");
#define MPEG4_VIDEO_CAPS gst_static_caps_get(&mpeg4_video_caps)
static void
mpeg4_video_type_find (GstTypeFind * tf, gpointer unused)
{
  /* Header is a video object start code followed by a video object layer
   * start code. The last byte of this 8-byte header can be from 0x20 - 0x2F */
  static const guint8 header[] = { 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 };
  guint8 *data = NULL;

  data = gst_type_find_peek (tf, 0, 8);

  if (data && memcmp (data, header, 7) == 0 &&
      data[7] >= 0x20 && data[7] <= 0x2F) {
    GstCaps *caps = gst_caps_copy (MPEG4_VIDEO_CAPS);

    gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM - 1, caps);
    gst_caps_unref (caps);
  }
}

1311
/*** video/mpeg video stream ***/
1312

David Schleef's avatar
David Schleef committed
1313
1314
static GstStaticCaps mpeg_video_caps = GST_STATIC_CAPS ("video/mpeg, "
    "systemstream = (boolean) false");
1315
#define MPEG_VIDEO_CAPS gst_static_caps_get(&mpeg_video_caps)
1316
static void
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1317
mpeg_video_type_find (GstTypeFind * tf, gpointer unused)
1318
1319
1320
1321
1322
1323
{
  static const guint8 sequence_header[] = { 0x00, 0x00, 0x01, 0xb3 };
  guint8 *data = NULL;

  data = gst_type_find_peek (tf, 0, 8);

Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1324
  if (data && memcmp (data, sequence_header, 4) == 0) {
1325
    GstCaps *caps = gst_caps_copy (MPEG_VIDEO_CAPS);
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
1326

1327
    gst_structure_set (gst_caps_get_structure (caps, 0), "mpegversion",
1328
        G_TYPE_INT, 1, NULL);