Tag Parser 12.3.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
Loading...
Searching...
No Matches
ebmlelement.cpp
Go to the documentation of this file.
1#include "./ebmlelement.h"
2#include "./ebmlid.h"
4#include "./matroskaid.h"
5
6#include "../exceptions.h"
7#include "../mediafileinfo.h"
8
9#include <c++utilities/conversion/binaryconversion.h>
10#include <c++utilities/io/binaryreader.h>
11#include <c++utilities/io/binarywriter.h>
12
13#include <cstdint>
14#include <cstring>
15#include <memory>
16#include <sstream>
17#include <string>
18
19using namespace std;
20using namespace CppUtilities;
21
22namespace TagParser {
23
32std::uint64_t EbmlElement::bytesToBeSkipped = 0x4000;
33
37EbmlElement::EbmlElement(MatroskaContainer &container, std::uint64_t startOffset)
38 : GenericFileElement<EbmlElement>(container, startOffset)
39{
40}
41
45EbmlElement::EbmlElement(MatroskaContainer &container, std::uint64_t startOffset, std::uint64_t maxSize)
46 : GenericFileElement<EbmlElement>(container, startOffset, maxSize)
47{
48}
49
53EbmlElement::EbmlElement(EbmlElement &parent, std::uint64_t startOffset)
54 : GenericFileElement<EbmlElement>(parent, startOffset)
55{
56}
57
61string EbmlElement::parsingContext() const
62{
63 return ("parsing header of EBML element " % idToString() % " at ") + startOffset();
64}
65
70{
71 static const string context("parsing EBML element header");
72
73 for (std::uint64_t skipped = 0; skipped < bytesToBeSkipped; ++m_startOffset, --m_maxSize, ++skipped) {
74 // check whether max size is valid
75 if (maxTotalSize() < 2) {
76 diag.emplace_back(DiagLevel::Critical, argsToString("The EBML element at ", startOffset(), " is truncated or does not exist."), context);
78 }
79 stream().seekg(static_cast<streamoff>(startOffset()));
80
81 // read ID
83 std::uint8_t beg = static_cast<std::uint8_t>(stream().peek()), mask = 0x80;
84 m_idLength = 1;
85 while (m_idLength <= maximumIdLengthSupported() && (beg & mask) == 0) {
86 ++m_idLength;
87 mask >>= 1;
88 }
90 if (!skipped) {
91 diag.emplace_back(
92 DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is not supported, trying to skip."), context);
93 }
94 continue; // try again
95 }
96 if (m_idLength > container().maxIdLength()) {
97 if (!skipped) {
98 diag.emplace_back(DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is invalid, trying to skip."), context);
99 }
100 continue; // try again
101 }
103 m_id = BE::toInt<std::uint32_t>(buf);
104
105 // check whether this element is actually a sibling of one of its parents rather then a child
106 // (might be the case if the parent's size is unknown and hence assumed to be the max file size)
108 // check at which level in the hierarchy the element is supposed to occur using its ID
109 // (the only chance to find out whether the element belongs higher up in the hierarchy)
110 const MatroskaElementLevel supposedLevel = matroskaIdLevel(m_id);
111 const std::uint8_t actualLevel = level();
112 if (actualLevel > supposedLevel) {
113 // the file belongs higher up in the hierarchy so find a better parent
114 if (EbmlElement *betterParent = m_parent->parent(actualLevel - static_cast<std::uint8_t>(supposedLevel))) {
115 // recompute the parent size (assumption - which was rest of the available space - was wrong)
117 m_parent->m_sizeUnknown = false;
118 // detach from ...
119 if (m_parent->firstChild() == this) {
120 // ... parent
121 m_parent->m_firstChild.release();
122 m_parent->m_firstChild = std::move(m_nextSibling);
123 } else {
124 // ... previous sibling
125 for (EbmlElement *sibling = m_parent->firstChild(); sibling; sibling = sibling->nextSibling()) {
126 if (sibling->nextSibling() == this) {
127 sibling->m_nextSibling.release();
128 sibling->m_nextSibling = std::move(m_nextSibling);
129 break;
130 }
131 }
132 }
133 // insert as child of better parent
134 if (EbmlElement *previousSibling = betterParent->lastChild()) {
135 previousSibling->m_nextSibling.reset(this);
136 } else {
137 betterParent->m_firstChild.reset(this);
138 }
139 // update own reference to parent
140 m_parent = betterParent;
141 }
142 }
143 }
144
145 // read size
146 beg = static_cast<std::uint8_t>(stream().peek());
147 mask = 0x80;
148 m_sizeLength = 1;
149 if ((m_sizeUnknown = (beg == 0xFF))) {
150 // this indicates that the element size is unknown
151 // -> just assume the element takes the maximum available size
153 } else {
154 while (m_sizeLength <= maximumSizeLengthSupported() && (beg & mask) == 0) {
155 ++m_sizeLength;
156 mask >>= 1;
157 }
159 if (!skipped) {
160 diag.emplace_back(DiagLevel::Critical, "EBML size length is not supported.", parsingContext());
161 }
162 continue; // try again
163 }
164 if (m_sizeLength > container().maxSizeLength()) {
165 if (!skipped) {
166 diag.emplace_back(DiagLevel::Critical, "EBML size length is invalid.", parsingContext());
167 }
168 continue; // try again
169 }
170 // read size into buffer
171 memset(buf, 0, sizeof(DataSizeType)); // reset buffer
173 // xor the first byte in buffer which has been read from the file with mask
174 *(buf + (maximumSizeLengthSupported() - m_sizeLength)) ^= static_cast<char>(mask);
175 m_dataSize = BE::toInt<std::uint64_t>(buf);
176 // check if element is truncated
177 if (totalSize() > maxTotalSize()) {
178 if (m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
179 if (!skipped) {
180 diag.emplace_back(DiagLevel::Critical, "EBML header seems to be truncated.", parsingContext());
181 }
182 continue; // try again
183 } else { // data truncated
184 diag.emplace_back(DiagLevel::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.",
185 parsingContext());
186 m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
187 }
188 }
189 }
190
191 // check if there's a first child
192 const std::uint64_t firstChildOffset = this->firstChildOffset();
193 if (firstChildOffset && firstChildOffset < totalSize()) {
194 m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
195 } else {
196 m_firstChild.reset();
197 }
198
199 // check if there's a sibling
200 if (totalSize() < maxTotalSize()) {
201 if (parent()) {
202 m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
203 } else {
205 }
206 } else {
207 m_nextSibling.reset();
208 }
209
210 // no critical errors occurred
211 // -> add a warning if bytes have been skipped
212 if (skipped) {
213 diag.emplace_back(DiagLevel::Warning, argsToString(skipped, " bytes have been skipped"), parsingContext());
214 }
215 // -> don't need another try, return here
216 return;
217 }
218
219 // critical errors occurred and skipping some bytes wasn't successful
220 throw InvalidDataException();
221}
222
227{
228 stream().seekg(static_cast<streamoff>(dataOffset()));
229 return reader().readString(dataSize());
230}
231
239{
240 constexpr DataSizeType maxBytesToRead = 8;
241 char buff[maxBytesToRead] = { 0 };
242 const auto bytesToSkip = maxBytesToRead - min(dataSize(), maxBytesToRead);
243 stream().seekg(static_cast<streamoff>(dataOffset()), ios_base::beg);
244 stream().read(buff + bytesToSkip, static_cast<streamoff>(sizeof(buff) - bytesToSkip));
245 return BE::toInt<std::uint64_t>(buff);
246}
247
253{
254 stream().seekg(static_cast<streamoff>(dataOffset()));
255 switch (dataSize()) {
256 case sizeof(float):
257 return static_cast<double>(reader().readFloat32BE());
258 case sizeof(double):
259 return reader().readFloat64BE();
260 default:
261 return 0.0;
262 }
263}
264
270{
271 if (id <= 0xFF) {
272 return 1;
273 } else if (id <= 0x7FFF) {
274 return 2;
275 } else if (id <= 0x3FFFFF) {
276 return 3;
277 } else if (id <= 0x1FFFFFFF) {
278 return 4;
279 } else {
280 throw InvalidDataException();
281 }
282}
283
288std::uint8_t EbmlElement::calculateSizeDenotationLength(std::uint64_t size)
289{
290 if (size < 126) {
291 return 1;
292 } else if (size <= 16382ul) {
293 return 2;
294 } else if (size <= 2097150ul) {
295 return 3;
296 } else if (size <= 268435454ul) {
297 return 4;
298 } else if (size <= 34359738366ul) {
299 return 5;
300 } else if (size <= 4398046511102ul) {
301 return 6;
302 } else if (size <= 562949953421310ul) {
303 return 7;
304 } else if (size <= 72057594037927934ul) {
305 return 8;
306 } else {
307 throw InvalidDataException();
308 }
309}
310
318{
319 if (id <= 0xFF) {
320 *buff = static_cast<char>(id);
321 return 1;
322 } else if (id <= 0x7FFF) {
323 BE::getBytes(static_cast<std::uint16_t>(id), buff);
324 return 2;
325 } else if (id <= 0x3FFFFF) {
326 BE::getBytes(static_cast<std::uint32_t>(id << 0x8), buff);
327 return 3;
328 } else if (id <= 0x1FFFFFFF) {
329 BE::getBytes(static_cast<std::uint32_t>(id), buff);
330 return 4;
331 } else {
332 throw InvalidDataException();
333 }
334}
335
343std::uint8_t EbmlElement::makeSizeDenotation(std::uint64_t size, char *buff)
344{
345 if (size < 126) {
346 *buff = static_cast<char>(size | 0x80);
347 return 1;
348 } else if (size <= 16382ul) {
349 BE::getBytes(static_cast<std::uint16_t>(size | 0x4000), buff);
350 return 2;
351 } else if (size <= 2097150ul) {
352 BE::getBytes(static_cast<std::uint32_t>((size | 0x200000) << 0x08), buff);
353 return 3;
354 } else if (size <= 268435454ul) {
355 BE::getBytes(static_cast<std::uint32_t>(size | 0x10000000), buff);
356 return 4;
357 } else if (size <= 34359738366ul) {
358 BE::getBytes(static_cast<std::uint64_t>((size | 0x800000000) << 0x18), buff);
359 return 5;
360 } else if (size <= 4398046511102ul) {
361 BE::getBytes(static_cast<std::uint64_t>((size | 0x40000000000) << 0x10), buff);
362 return 6;
363 } else if (size <= 562949953421310ul) {
364 BE::getBytes(static_cast<std::uint64_t>((size | 0x2000000000000) << 0x08), buff);
365 return 7;
366 } else if (size <= 72057594037927934ul) {
367 BE::getBytes(static_cast<std::uint64_t>(size | 0x100000000000000), buff);
368 return 8;
369 }
370 throw InvalidDataException();
371}
372
381std::uint8_t EbmlElement::makeSizeDenotation(std::uint64_t size, char *buff, std::uint8_t minBytes)
382{
383 if (minBytes <= 1 && size < 126) {
384 *buff = static_cast<char>(size | 0x80);
385 return 1;
386 } else if (minBytes <= 2 && size <= 16382ul) {
387 BE::getBytes(static_cast<std::uint16_t>(size | 0x4000), buff);
388 return 2;
389 } else if (minBytes <= 3 && size <= 2097150ul) {
390 BE::getBytes(static_cast<std::uint32_t>((size | 0x200000) << 0x08), buff);
391 return 3;
392 } else if (minBytes <= 4 && size <= 268435454ul) {
393 BE::getBytes(static_cast<std::uint32_t>(size | 0x10000000), buff);
394 return 4;
395 } else if (minBytes <= 5 && size <= 34359738366ul) {
396 BE::getBytes(static_cast<std::uint64_t>((size | 0x800000000) << 0x18), buff);
397 return 5;
398 } else if (minBytes <= 6 && size <= 4398046511102ul) {
399 BE::getBytes(static_cast<std::uint64_t>((size | 0x40000000000) << 0x10), buff);
400 return 6;
401 } else if (minBytes <= 7 && size <= 562949953421310ul) {
402 BE::getBytes(static_cast<std::uint64_t>((size | 0x2000000000000) << 0x08), buff);
403 return 7;
404 } else if (minBytes <= 8 && size <= 72057594037927934ul) {
405 BE::getBytes(static_cast<std::uint64_t>(size | 0x100000000000000), buff);
406 return 8;
407 }
408 throw InvalidDataException();
409}
410
415std::uint8_t EbmlElement::calculateUIntegerLength(std::uint64_t integer)
416{
417 if (integer <= 0xFFul) {
418 return 1;
419 } else if (integer <= 0xFFFFul) {
420 return 2;
421 } else if (integer <= 0xFFFFFFul) {
422 return 3;
423 } else if (integer <= 0xFFFFFFFFul) {
424 return 4;
425 } else if (integer <= 0xFFFFFFFFFFul) {
426 return 5;
427 } else if (integer <= 0xFFFFFFFFFFFFul) {
428 return 6;
429 } else if (integer <= 0xFFFFFFFFFFFFFFul) {
430 return 7;
431 } else {
432 return 8;
433 }
434}
435
440std::uint8_t EbmlElement::makeUInteger(std::uint64_t value, char *buff)
441{
442 if (value <= 0xFFul) {
443 *buff = static_cast<char>(value);
444 return 1;
445 } else if (value <= 0xFFFFul) {
446 BE::getBytes(static_cast<std::uint16_t>(value), buff);
447 return 2;
448 } else if (value <= 0xFFFFFFul) {
449 BE::getBytes(static_cast<std::uint32_t>(value << 0x08), buff);
450 return 3;
451 } else if (value <= 0xFFFFFFFFul) {
452 BE::getBytes(static_cast<std::uint32_t>(value), buff);
453 return 4;
454 } else if (value <= 0xFFFFFFFFFFul) {
455 BE::getBytes(static_cast<std::uint64_t>(value << 0x18), buff);
456 return 5;
457 } else if (value <= 0xFFFFFFFFFFFFul) {
458 BE::getBytes(static_cast<std::uint64_t>(value << 0x10), buff);
459 return 6;
460 } else if (value <= 0xFFFFFFFFFFFFFFul) {
461 BE::getBytes(static_cast<std::uint64_t>(value << 0x08), buff);
462 return 7;
463 } else {
464 BE::getBytes(static_cast<std::uint64_t>(value), buff);
465 return 8;
466 }
467}
468
478std::uint8_t EbmlElement::makeUInteger(std::uint64_t value, char *buff, std::uint8_t minBytes)
479{
480 if (minBytes <= 1 && value <= 0xFFul) {
481 *buff = static_cast<char>(value);
482 return 1;
483 } else if (minBytes <= 2 && value <= 0xFFFFul) {
484 BE::getBytes(static_cast<std::uint16_t>(value), buff);
485 return 2;
486 } else if (minBytes <= 3 && value <= 0xFFFFFFul) {
487 BE::getBytes(static_cast<std::uint32_t>(value << 0x08), buff);
488 return 3;
489 } else if (minBytes <= 4 && value <= 0xFFFFFFFFul) {
490 BE::getBytes(static_cast<std::uint32_t>(value), buff);
491 return 4;
492 } else if (minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
493 BE::getBytes(static_cast<std::uint64_t>(value << 0x18), buff);
494 return 5;
495 } else if (minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
496 BE::getBytes(static_cast<std::uint64_t>(value << 0x10), buff);
497 return 6;
498 } else if (minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
499 BE::getBytes(static_cast<std::uint64_t>(value << 0x08), buff);
500 return 7;
501 } else {
502 BE::getBytes(static_cast<std::uint64_t>(value), buff);
503 return 8;
504 }
505}
506
513void EbmlElement::makeSimpleElement(ostream &stream, IdentifierType id, std::uint64_t content)
514{
515 char buff1[8];
516 char buff2[8];
517 std::uint8_t sizeLength = EbmlElement::makeId(id, buff1);
518 stream.write(buff1, sizeLength);
519 std::uint8_t elementSize = EbmlElement::makeUInteger(content, buff2);
520 sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
521 stream.write(buff1, sizeLength);
522 stream.write(buff2, elementSize);
523}
524
531void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::IdentifierType id, string_view content)
532{
533 char buff1[8];
534 std::uint8_t sizeLength = EbmlElement::makeId(id, buff1);
535 stream.write(buff1, sizeLength);
536 sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
537 stream.write(buff1, sizeLength);
538 stream.write(content.data(), static_cast<std::streamsize>(content.size()));
539}
540
541} // namespace TagParser
The Diagnostics class is a container for DiagMessage.
The EbmlElement class helps to parse EBML files such as Matroska files.
std::uint64_t firstChildOffset() const
Returns the offset of the first child of the element.
static std::uint8_t makeUInteger(std::uint64_t value, char *buff)
Writes value to buff.
EbmlElement(MatroskaContainer &container, std::uint64_t startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
static std::uint8_t calculateIdLength(IdentifierType id)
Returns the length of the specified id in byte.
static void makeSimpleElement(std::ostream &stream, IdentifierType id, std::uint64_t content)
Makes a simple EBML element.
static std::uint8_t calculateSizeDenotationLength(std::uint64_t size)
Returns the length of the size denotation for the specified size in byte.
static std::uint8_t makeId(IdentifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
double readFloat()
Reads the content of the element as float.
static std::uint64_t bytesToBeSkipped
Specifies the number of bytes to be skipped till a valid EBML element is found in the stream.
Definition ebmlelement.h:56
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition ebmlelement.h:71
std::string readString()
Reads the content of the element as string.
std::uint64_t readUInteger()
Reads the content of the element as unsigned integer.
static std::uint8_t makeSizeDenotation(std::uint64_t size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
void internalParse(Diagnostics &diag)
Parses the EBML element.
static std::uint8_t calculateUIntegerLength(std::uint64_t integer)
Returns the length of the specified unsigned integer in byte.
The GenericFileElement class helps to parse binary files which consist of an arboreal element structu...
ImplementationType * lastChild()
Returns the last child of the element.
std::uint32_t headerSize() const
Returns the header size of the element in byte.
typename FileElementTraits< EbmlElement >::DataSizeType DataSizeType
ImplementationType * nextSibling()
Returns the next sibling of the element.
ImplementationType * parent()
Returns the parent of the element.
ImplementationType * firstChild()
Returns the first child of the element.
std::unique_ptr< ImplementationType > m_firstChild
static constexpr std::uint32_t maximumIdLengthSupported()
typename FileElementTraits< ImplementationType >::IdentifierType IdentifierType
Specifies the type used to store identifiers.
static constexpr std::uint32_t maximumSizeLengthSupported()
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Implementation of GenericContainer<MediaFileInfo, MatroskaTag, MatroskaTrack, EbmlElement>.
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
Contains all classes and functions of the TagInfo library.
Definition aaccodebook.h:10
TAG_PARSER_EXPORT MatroskaElementLevel matroskaIdLevel(std::uint32_t matroskaId)
Returns the level at which elements with the specified matroskaId are supposed to occur in a Matroska...