ebml-read.c 14.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* GStreamer EBML I/O
 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
 *
 * ebml-read.c: read EBML data from file/stream
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <string.h>

#include "ebml-read.h"
#include "ebml-ids.h"

31
32
#include <math.h>

33
34
35
36
37
38
39
40
41
42
/* NAN is supposed to be in math.h, Microsoft defines it in xmath.h */
#ifdef _MSC_VER
#include <xmath.h>
#endif

/* If everything goes wrong try 0.0/0.0 which should be NAN */
#ifndef NAN
#define NAN (0.0 / 0.0)
#endif

43
GST_DEBUG_CATEGORY (ebmlread_debug);
44
45
#define GST_CAT_DEFAULT ebmlread_debug

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/* Peeks following element id and element length in datastream provided
 * by @peek with @ctx as user data.
 * Returns GST_FLOW_UNEXPECTED if not enough data to read id and length.
 * Otherwise, @needed provides the prefix length (id + length), and
 * @length provides element length.
 *
 * @object and @offset are provided for informative messaging/debug purposes.
 */
GstFlowReturn
gst_ebml_peek_id_length (guint32 * _id, guint64 * _length, guint * _needed,
    GstPeekData peek, gpointer * ctx, GstElement * el, guint64 offset)
{
  guint needed;
  const guint8 *buf;
  gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
  guint64 total;
  guint8 b;
63

64
65
66
  g_return_val_if_fail (_id != NULL, GST_FLOW_ERROR);
  g_return_val_if_fail (_length != NULL, GST_FLOW_ERROR);
  g_return_val_if_fail (_needed != NULL, GST_FLOW_ERROR);
67

68
69
70
  /* well ... */
  *_id = (guint32) GST_EBML_SIZE_UNKNOWN;
  *_length = GST_EBML_SIZE_UNKNOWN;
71

72
73
74
75
76
  /* read element id */
  needed = 2;
  buf = peek (ctx, needed);
  if (!buf)
    goto not_enough_data;
77

78
79
80
81
82
  b = GST_READ_UINT8 (buf);
  total = (guint64) b;
  while (read <= 4 && !(total & len_mask)) {
    read++;
    len_mask >>= 1;
83
  }
84
85
  if (G_UNLIKELY (read > 4))
    goto invalid_id;
86

87
88
89
90
91
  /* need id and at least something for subsequent length */
  needed = read + 1;
  buf = peek (ctx, needed);
  if (!buf)
    goto not_enough_data;
92

93
94
95
96
97
98
  while (n < read) {
    b = GST_READ_UINT8 (buf + n);
    total = (total << 8) | b;
    ++n;
  }
  *_id = (guint32) total;
99

100
101
102
103
104
105
106
107
  /* read element length */
  b = GST_READ_UINT8 (buf + n);
  total = (guint64) b;
  len_mask = 0x80;
  read = 1;
  while (read <= 8 && !(total & len_mask)) {
    read++;
    len_mask >>= 1;
108
  }
109
110
111
112
  if (G_UNLIKELY (read > 8))
    goto invalid_length;
  if ((total &= (len_mask - 1)) == len_mask - 1)
    num_ffs++;
113

114
115
116
117
  needed += read - 1;
  buf = peek (ctx, needed);
  if (!buf)
    goto not_enough_data;
118

119
120
121
122
  buf += (needed - read);
  n = 1;
  while (n < read) {
    guint8 b = GST_READ_UINT8 (buf + n);
123

124
125
126
127
128
    if (G_UNLIKELY (b == 0xff))
      num_ffs++;
    total = (total << 8) | b;
    ++n;
  }
129

130
131
132
133
134
  if (G_UNLIKELY (read == num_ffs))
    *_length = G_MAXUINT64;
  else
    *_length = total;
  *_length = total;
135

136
  *_needed = needed;
137

138
  return GST_FLOW_OK;
139

140
141
142
143
144
  /* ERRORS */
not_enough_data:
  {
    *_needed = needed;
    return GST_FLOW_UNEXPECTED;
145
  }
146
147
148
149
150
151
152
153
154
155
156
157
158
invalid_id:
  {
    GST_ERROR_OBJECT (el,
        "Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT " (0x%"
        G_GINT64_MODIFIER "x)", (guint) b, offset, offset);
    return GST_FLOW_ERROR;
  }
invalid_length:
  {
    GST_ERROR_OBJECT (el,
        "Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
        " (0x%" G_GINT64_MODIFIER "x)", (guint) b, offset, offset);
    return GST_FLOW_ERROR;
159
160
161
  }
}

162
163
/* setup for parsing @buf at position @offset on behalf of @el.
 * Takes ownership of @buf. */
164
void
165
gst_ebml_read_init (GstEbmlRead * ebml, GstElement * el, GstBuffer * buf,
166
167
    guint64 offset)
{
168
  GstEbmlMaster m;
169

170
171
  g_return_if_fail (el);
  g_return_if_fail (buf);
172

173
174
175
176
177
178
179
  ebml->el = el;
  ebml->offset = offset;
  ebml->buf = buf;
  ebml->readers = g_array_sized_new (FALSE, FALSE, sizeof (GstEbmlMaster), 10);
  m.offset = ebml->offset;
  gst_byte_reader_init (&m.br, GST_BUFFER_DATA (buf), GST_BUFFER_SIZE (buf));
  g_array_append_val (ebml->readers, m);
180
181
}

182
183
void
gst_ebml_read_clear (GstEbmlRead * ebml)
184
{
185
186
187
188
189
190
191
  if (ebml->readers)
    g_array_free (ebml->readers, TRUE);
  ebml->readers = NULL;
  if (ebml->buf)
    gst_buffer_unref (ebml->buf);
  ebml->buf = NULL;
  ebml->el = NULL;
192
193
}

194
195
static const guint8 *
gst_ebml_read_peek (GstByteReader * br, guint peek)
196
{
197
  const guint8 *data = NULL;
198

199
200
201
202
  if (G_LIKELY (gst_byte_reader_peek_data (br, peek, &data)))
    return data;
  else
    return NULL;
203
204
}

205
static GstFlowReturn
206
207
gst_ebml_peek_id_full (GstEbmlRead * ebml, guint32 * id, guint64 * length,
    guint * prefix)
208
{
209
  GstFlowReturn ret;
210

211
212
213
  ret = gst_ebml_peek_id_length (id, length, prefix,
      (GstPeekData) gst_ebml_read_peek, (gpointer) gst_ebml_read_br (ebml),
      ebml->el, gst_ebml_read_get_pos (ebml));
214
215
  if (ret != GST_FLOW_OK)
    return ret;
216

217
218
219
  GST_LOG_OBJECT (ebml->el, "id 0x%x at offset 0x%" G_GINT64_MODIFIER "x"
      " of length %" G_GUINT64_FORMAT ", prefix %d", *id,
      gst_ebml_read_get_pos (ebml), *length, *prefix);
220

221
222
#ifndef GST_DISABLE_GST_DEBUG
  {
223
    const guint8 *data = NULL;
224
225
    GstByteReader *br = gst_ebml_read_br (ebml);
    guint size = gst_byte_reader_get_remaining (br);
226

227
    gst_byte_reader_peek_data (br, size, &data);
228

229
230
231
    GST_LOG_OBJECT (ebml->el, "current br %p; remaining %d", br, size);
    if (data)
      GST_MEMDUMP_OBJECT (ebml->el, "element", data, MIN (size, *length));
232
  }
233
#endif
234

235
236
  return ret;
}
237

238
239
240
241
242
GstFlowReturn
gst_ebml_peek_id (GstEbmlRead * ebml, guint32 * id)
{
  guint64 length;
  guint needed;
243

244
  return gst_ebml_peek_id_full (ebml, id, &length, &needed);
245
246
247
}

/*
248
249
 * Read the next element, the contents are supposed to be sub-elements which
 * can be read separately.  A new bytereader is setup for doing so.
250
 */
251
252
GstFlowReturn
gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
253
{
254
255
  guint64 length;
  guint prefix;
256
  const guint8 *data = NULL;
257
  GstFlowReturn ret;
258
  GstEbmlMaster m;
259

260
  ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
261
262
  if (ret != GST_FLOW_OK)
    return ret;
263

264
265
  /* we just at least peeked the id */
  g_assert (gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix));
266

267
268
269
  m.offset = gst_ebml_read_get_pos (ebml);
  if (!gst_byte_reader_get_data (gst_ebml_read_br (ebml), length, &data))
    return GST_FLOW_PARSE;
270

271
272
273
274
  GST_LOG_OBJECT (ebml->el, "pushing level %d at offset %" G_GUINT64_FORMAT,
      ebml->readers->len, m.offset);
  gst_byte_reader_init (&m.br, data, length);
  g_array_append_val (ebml->readers, m);
275

276
  return GST_FLOW_OK;
277
278
}

279
/* explicitly pop a bytereader from stack.  Usually invoked automagically. */
280
GstFlowReturn
281
gst_ebml_read_pop_master (GstEbmlRead * ebml)
282
{
283
  g_return_val_if_fail (ebml->readers, GST_FLOW_ERROR);
284

285
286
287
288
  /* never remove initial bytereader */
  if (ebml->readers->len > 1) {
    GST_LOG_OBJECT (ebml->el, "popping level %d", ebml->readers->len - 1);
    g_array_remove_index (ebml->readers, ebml->readers->len - 1);
289
290
  }

291
  return GST_FLOW_OK;
292
293
294
}

/*
295
 * Skip the next element.
296
297
 */

298
GstFlowReturn
299
gst_ebml_read_skip (GstEbmlRead * ebml)
300
301
302
{
  guint64 length;
  guint32 id;
303
  guint prefix;
304
  GstFlowReturn ret;
305

306
  ret = gst_ebml_peek_id_full (ebml, &id, &length, &prefix);
307
308
  if (ret != GST_FLOW_OK)
    return ret;
309

310
311
  if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), length + prefix))
    return GST_FLOW_PARSE;
312

313
  return ret;
314
315
316
317
318
319
}

/*
 * Read the next element as a GstBuffer (binary).
 */

320
GstFlowReturn
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
321
gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
322
323
{
  guint64 length;
324
  guint prefix;
325
  GstFlowReturn ret;
326

327
  ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
328
329
  if (ret != GST_FLOW_OK)
    return ret;
330

331
332
333
334
335
  /* we just at least peeked the id */
  g_assert (gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix));

  if (G_LIKELY (length > 0)) {
    guint offset;
336

337
338
339
340
341
342
343
344
    offset = gst_ebml_read_get_pos (ebml) - ebml->offset;
    if (G_LIKELY (gst_byte_reader_skip (gst_ebml_read_br (ebml), length))) {
      *buf = gst_buffer_create_sub (ebml->buf, offset, length);
    } else {
      *buf = NULL;
      return GST_FLOW_PARSE;
    }
  } else {
345
346
    *buf = gst_buffer_new ();
  }
347

348
  return ret;
349
350
351
352
353
354
}

/*
 * Read the next element, return a pointer to it and its size.
 */

355
static GstFlowReturn
356
gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, const guint8 ** data,
357
358
359
    guint * size)
{
  guint64 length;
360
  guint prefix;
361
  GstFlowReturn ret;
362
363
364

  *size = 0;

365
  ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
366
367
  if (ret != GST_FLOW_OK)
    return ret;
368

369
370
  /* we just at least peeked the id */
  g_assert (gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix));
371
372

  *data = NULL;
373
374
375
376
  if (G_LIKELY (length >= 0)) {
    if (!gst_byte_reader_get_data (gst_ebml_read_br (ebml), length, data))
      return GST_FLOW_PARSE;
  }
377

378
  *size = length;
379

380
  return ret;
381
382
383
384
385
386
}

/*
 * Read the next element as an unsigned int.
 */

387
GstFlowReturn
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
388
gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
389
{
390
  const guint8 *data;
391
  guint size;
392
  GstFlowReturn ret;
393

394
395
396
  ret = gst_ebml_read_bytes (ebml, id, &data, &size);
  if (ret != GST_FLOW_OK)
    return ret;
397

398
  if (size > 8) {
399
    GST_ERROR_OBJECT (ebml->el,
400
        "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
401
402
        G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
        gst_ebml_read_get_pos (ebml) - size);
403
    return GST_FLOW_ERROR;
404
  }
405
406
407
408
409
410

  if (size == 0) {
    *num = 0;
    return ret;
  }

411
412
  *num = 0;
  while (size > 0) {
413
    *num = (*num << 8) | *data;
414
    size--;
415
    data++;
416
417
  }

418
  return ret;
419
420
421
422
423
424
}

/*
 * Read the next element as a signed int.
 */

425
GstFlowReturn
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
426
gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
427
{
428
  const guint8 *data;
429
430
  guint size;
  gboolean negative = 0;
431
  GstFlowReturn ret;
432

433
434
435
  ret = gst_ebml_read_bytes (ebml, id, &data, &size);
  if (ret != GST_FLOW_OK)
    return ret;
436

437
  if (size > 8) {
438
    GST_ERROR_OBJECT (ebml->el,
439
        "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
440
441
        G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
        gst_ebml_read_get_pos (ebml) - size);
442
    return GST_FLOW_ERROR;
443
  }
444

445
446
447
448
449
  if (size == 0) {
    *num = 0;
    return ret;
  }

450
451
  *num = 0;
  if (*data & 0x80) {
452
    negative = 1;
453
454
455
    *num = *data & ~0x80;
    size--;
    data++;
456
  }
457

458
459
460
461
  while (size > 0) {
    *num = (*num << 8) | *data;
    size--;
    data++;
462
463
464
  }

  /* make signed */
465
  if (negative) {
466
    *num = 0 - *num;
467
  }
468

469
  return ret;
470
471
}

472
473
474
475
476
477
478
479
480
481
482
/* Convert 80 bit extended precision float in big endian format to double.
 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
 * licensed under LGPL */

struct _ext_float
{
  guint8 exponent[2];
  guint8 mantissa[8];
};

static gdouble
483
_ext2dbl (const guint8 * data)
484
{
485
  struct _ext_float ext;
486
487
488
  guint64 m = 0;
  gint e, i;

489
490
491
  memcpy (&ext.exponent, data, 2);
  memcpy (&ext.mantissa, data + 2, 8);

492
  for (i = 0; i < 8; i++)
493
494
    m = (m << 8) + ext.mantissa[i];
  e = (((gint) ext.exponent[0] & 0x7f) << 8) | ext.exponent[1];
495
  if (e == 0x7fff && m)
496
    return NAN;
497
498
499
  e -= 16383 + 63;              /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
                                 * mantissa bit is written as opposed to the
                                 * single and double precision formats */
500
  if (ext.exponent[0] & 0x80)
501
502
503
504
    m = -m;
  return ldexp (m, e);
}

505
506
507
508
/*
 * Read the next element as a float.
 */

509
GstFlowReturn
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
510
gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
511
{
512
  const guint8 *data;
513
  guint size;
514
  GstFlowReturn ret;
515

516
517
518
  ret = gst_ebml_read_bytes (ebml, id, &data, &size);
  if (ret != GST_FLOW_OK)
    return ret;
519

520
  if (size != 0 && size != 4 && size != 8 && size != 10) {
521
    GST_ERROR_OBJECT (ebml->el,
522
        "Invalid float element size %d at position %" G_GUINT64_FORMAT " (0x%"
523
524
        G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
        gst_ebml_read_get_pos (ebml) - size);
525
    return GST_FLOW_ERROR;
526
527
528
529
530
  }

  if (size == 4) {
    gfloat f;

531
532
    memcpy (&f, data, 4);
    f = GFLOAT_FROM_BE (f);
533
534

    *num = f;
535
  } else if (size == 8) {
536
537
    gdouble d;

538
539
    memcpy (&d, data, 8);
    d = GDOUBLE_FROM_BE (d);
540
541

    *num = d;
542
  } else if (size == 10) {
543
    *num = _ext2dbl (data);
544
545
546
  } else {
    /* size == 0 means a value of 0.0 */
    *num = 0.0;
547
548
  }

549
  return ret;
550
551
552
}

/*
553
 * Read the next element as a C string.
554
555
 */

556
557
static GstFlowReturn
gst_ebml_read_string (GstEbmlRead * ebml, guint32 * id, gchar ** str)
558
{
559
  const guint8 *data;
560
  guint size;
561
  GstFlowReturn ret;
562

563
564
565
  ret = gst_ebml_read_bytes (ebml, id, &data, &size);
  if (ret != GST_FLOW_OK)
    return ret;
566

567
568
569
  *str = g_malloc (size + 1);
  memcpy (*str, data, size);
  (*str)[size] = '\0';
570

571
  return ret;
572
573
}

574
575
576
577
578
/*
 * Read the next element as an ASCII string.
 */

GstFlowReturn
579
gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str_out)
580
581
{
  GstFlowReturn ret;
582
  gchar *str;
583
584
585
586
587
588
  gchar *iter;

#ifndef GST_DISABLE_GST_DEBUG
  guint64 oldoff = ebml->offset;
#endif

589
  ret = gst_ebml_read_string (ebml, id, &str);
590
591
592
  if (ret != GST_FLOW_OK)
    return ret;

593
  for (iter = str; *iter != '\0'; iter++) {
594
595
596
    if (G_UNLIKELY (*iter & 0x80)) {
      GST_ERROR_OBJECT (ebml,
          "Invalid ASCII string at offset %" G_GUINT64_FORMAT, oldoff);
597
      g_free (str);
598
599
600
601
      return GST_FLOW_ERROR;
    }
  }

602
  *str_out = str;
603
604
605
  return ret;
}

606
607
608
609
/*
 * Read the next element as a UTF-8 string.
 */

610
GstFlowReturn
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
611
gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
612
{
613
  GstFlowReturn ret;
614
615

#ifndef GST_DISABLE_GST_DEBUG
616
  guint64 oldoff = gst_ebml_read_get_pos (ebml);
617
#endif
618

619
  ret = gst_ebml_read_string (ebml, id, str);
620
621
  if (ret != GST_FLOW_OK)
    return ret;
622
623
624

  if (str != NULL && *str != NULL && **str != '\0' &&
      !g_utf8_validate (*str, -1, NULL)) {
625
    GST_WARNING_OBJECT (ebml->el,
626
        "Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
627
628
629
  }

  return ret;
630
631
632
}

/*
633
634
 * Read the next element as a date.
 * Returns the seconds since the unix epoch.
635
636
 */

637
GstFlowReturn
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
638
gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
639
{
640
  gint64 ebml_date;
641
642
643
644
645
  GstFlowReturn ret;

  ret = gst_ebml_read_sint (ebml, id, &ebml_date);
  if (ret != GST_FLOW_OK)
    return ret;
646
647

  *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
648
649

  return ret;
650
651
652
653
654
655
}

/*
 * Read the next element as binary data.
 */

656
GstFlowReturn
Thomas Vander Stichele's avatar
Thomas Vander Stichele committed
657
658
gst_ebml_read_binary (GstEbmlRead * ebml,
    guint32 * id, guint8 ** binary, guint64 * length)
659
{
660
  const guint8 *data;
661
  guint size;
662
  GstFlowReturn ret;
663

664
665
666
  ret = gst_ebml_read_bytes (ebml, id, &data, &size);
  if (ret != GST_FLOW_OK)
    return ret;
667

668
669
  *length = size;
  *binary = g_memdup (data, size);
670

671
  return GST_FLOW_OK;
672
}