Tag Parser 12.3.1
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
Loading...
Searching...
No Matches
ebmlelement.cpp
Go to the documentation of this file.
1#include "./ebmlelement.h"
2#include "./ebmlid.h"
4#include "./matroskaid.h"
5
6#include "../exceptions.h"
7#include "../mediafileinfo.h"
8
9#include <c++utilities/conversion/binaryconversion.h>
10#include <c++utilities/io/binaryreader.h>
11#include <c++utilities/io/binarywriter.h>
12
13#include <cstdint>
14#include <cstring>
15#include <memory>
16#include <sstream>
17#include <string>
18
19using namespace std;
20using namespace CppUtilities;
21
22namespace TagParser {
23
32std::uint64_t EbmlElement::bytesToBeSkipped = 0x4000;
33
37EbmlElement::EbmlElement(MatroskaContainer &container, std::uint64_t startOffset)
38 : GenericFileElement<EbmlElement>(container, startOffset)
39{
40}
41
45EbmlElement::EbmlElement(MatroskaContainer &container, std::uint64_t startOffset, std::uint64_t maxSize)
46 : GenericFileElement<EbmlElement>(container, startOffset, maxSize)
47{
48}
49
53EbmlElement::EbmlElement(EbmlElement &parent, std::uint64_t startOffset)
54 : GenericFileElement<EbmlElement>(parent, startOffset)
55{
56}
57
61string EbmlElement::parsingContext() const
62{
63 return ("parsing header of EBML element " % idToString() % " at ") + startOffset();
64}
65
70{
71 static const string context("parsing EBML element header");
72
73 for (std::uint64_t skipped = 0; skipped < bytesToBeSkipped; ++m_startOffset, --m_maxSize, ++skipped) {
74 // check whether max size is valid
75 if (maxTotalSize() < 2) {
76 diag.emplace_back(DiagLevel::Critical, argsToString("The EBML element at ", startOffset(), " is truncated or does not exist."), context);
78 }
79 stream().seekg(static_cast<streamoff>(startOffset()));
80
81 // read ID
83 std::uint8_t beg = static_cast<std::uint8_t>(stream().peek()), mask = 0x80;
84 m_idLength = 1;
85 while (m_idLength <= maximumIdLengthSupported() && (beg & mask) == 0) {
86 ++m_idLength;
87 mask >>= 1;
88 }
90 if (!skipped) {
91 diag.emplace_back(
92 DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is not supported, trying to skip."), context);
93 }
94 continue; // try again
95 }
96 if (m_idLength > container().maxIdLength()) {
97 if (!skipped) {
98 diag.emplace_back(DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is invalid, trying to skip."), context);
99 }
100 continue; // try again
101 }
103 m_id = BE::toInt<std::uint32_t>(buf);
104
105 // check whether this element is actually a sibling of one of its parents rather then a child
106 // (might be the case if the parent's size is unknown and hence assumed to be the max file size)
108 // check at which level in the hierarchy the element is supposed to occur using its ID
109 // (the only chance to find out whether the element belongs higher up in the hierarchy)
110 const MatroskaElementLevel supposedLevel = matroskaIdLevel(m_id);
111 const std::uint8_t actualLevel = level();
112 if (actualLevel > supposedLevel) {
113 // the file belongs higher up in the hierarchy so find a better parent
114 if (EbmlElement *betterParent = m_parent->parent(actualLevel - static_cast<std::uint8_t>(supposedLevel))) {
115 // recompute the parent size (assumption - which was rest of the available space - was wrong)
117 m_parent->m_sizeUnknown = false;
118 // detach from ...
119 if (m_parent->firstChild() == this) {
120 // ... parent
121 m_parent->m_firstChild.release();
122 m_parent->m_firstChild = std::move(m_nextSibling);
123 } else {
124 // ... previous sibling
125 for (EbmlElement *sibling = m_parent->firstChild(); sibling; sibling = sibling->nextSibling()) {
126 if (sibling->nextSibling() == this) {
127 sibling->m_nextSibling.release();
128 sibling->m_nextSibling = std::move(m_nextSibling);
129 break;
130 }
131 }
132 }
133 // insert as child of better parent
134 if (EbmlElement *previousSibling = betterParent->lastChild()) {
135 previousSibling->m_nextSibling.reset(this);
136 } else {
137 betterParent->m_firstChild.reset(this);
138 }
139 // update own reference to parent
140 m_parent = betterParent;
141 }
142 }
143 }
144
145 // read size
146 beg = static_cast<std::uint8_t>(stream().peek());
147 mask = 0x80;
148 m_sizeLength = 1;
149 if ((m_sizeUnknown = (beg == 0xFF))) {
150 // this indicates that the element size is unknown
151 // -> just assume the element takes the maximum available size
153 } else {
154 while (m_sizeLength <= maximumSizeLengthSupported() && (beg & mask) == 0) {
155 ++m_sizeLength;
156 mask >>= 1;
157 }
159 if (!skipped) {
160 diag.emplace_back(DiagLevel::Critical, "EBML size length is not supported.", parsingContext());
161 }
162 continue; // try again
163 }
164 if (m_sizeLength > container().maxSizeLength()) {
165 if (!skipped) {
166 diag.emplace_back(DiagLevel::Critical, "EBML size length is invalid.", parsingContext());
167 }
168 continue; // try again
169 }
170 // read size into buffer
171 memset(buf, 0, sizeof(DataSizeType)); // reset buffer
173 // xor the first byte in buffer which has been read from the file with mask
174 *(buf + (maximumSizeLengthSupported() - m_sizeLength)) ^= static_cast<char>(mask);
175 m_dataSize = BE::toInt<std::uint64_t>(buf);
176 // check if element is truncated
177 if (totalSize() > maxTotalSize()) {
178 if (m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
179 if (!skipped) {
180 diag.emplace_back(DiagLevel::Critical, "EBML header seems to be truncated.", parsingContext());
181 }
182 continue; // try again
183 } else { // data truncated
184 diag.emplace_back(DiagLevel::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.",
185 parsingContext());
186 m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
187 }
188 }
189 }
190
191 // check if there's a first child
192 const std::uint64_t firstChildOffset = this->firstChildOffset();
193 if (firstChildOffset && firstChildOffset < totalSize()) {
194 m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
195 } else {
196 m_firstChild.reset();
197 }
198
199 // check if there's a sibling
200 if (totalSize() < maxTotalSize()) {
201 if (parent()) {
202 m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
203 } else {
205 }
206 } else {
207 m_nextSibling.reset();
208 }
209
210 // no critical errors occurred
211 // -> add a warning if bytes have been skipped
212 if (skipped) {
213 diag.emplace_back(DiagLevel::Warning, argsToString(skipped, " bytes have been skipped"), parsingContext());
214 }
215 // -> don't need another try, return here
216 return;
217 }
218
219 // critical errors occurred and skipping some bytes wasn't successful
220 throw InvalidDataException();
221}
222
227{
228 stream().seekg(static_cast<streamoff>(dataOffset()));
229 return reader().readString(dataSize());
230}
231
239{
240 constexpr DataSizeType maxBytesToRead = 8;
241 char buff[maxBytesToRead] = { 0 };
242 const auto bytesToSkip = maxBytesToRead - min(dataSize(), maxBytesToRead);
243 stream().seekg(static_cast<streamoff>(dataOffset()), ios_base::beg);
244 stream().read(buff + bytesToSkip, static_cast<streamoff>(sizeof(buff) - bytesToSkip));
245 return BE::toInt<std::uint64_t>(buff);
246}
247
253{
254 stream().seekg(static_cast<streamoff>(dataOffset()));
255 switch (dataSize()) {
256 case sizeof(float):
257 return static_cast<double>(reader().readFloat32BE());
258 case sizeof(double):
259 return reader().readFloat64BE();
260 default:
261 return 0.0;
262 }
263}
264
270{
271 if (id <= 0xFF) {
272 return 1;
273 } else if (id <= 0x7FFF) {
274 return 2;
275 } else if (id <= 0x3FFFFF) {
276 return 3;
277 } else if (id <= 0x1FFFFFFF) {
278 return 4;
279 } else {
280 throw InvalidDataException();
281 }
282}
283
288std::uint8_t EbmlElement::calculateSizeDenotationLength(std::uint64_t size)
289{
290 if (size < 126) {
291 return 1;
292 } else if (size <= 16382ul) {
293 return 2;
294 } else if (size <= 2097150ul) {
295 return 3;
296 } else if (size <= 268435454ul) {
297 return 4;
298 } else if (size <= 34359738366ul) {
299 return 5;
300 } else if (size <= 4398046511102ul) {
301 return 6;
302 } else if (size <= 562949953421310ul) {
303 return 7;
304 } else if (size <= 72057594037927934ul) {
305 return 8;
306 } else {
307 throw InvalidDataException();
308 }
309}
310
318{
319 if (id <= 0xFF) {
320 *buff = static_cast<char>(id);
321 return 1;
322 } else if (id <= 0x7FFF) {
323 BE::getBytes(static_cast<std::uint16_t>(id), buff);
324 return 2;
325 } else if (id <= 0x3FFFFF) {
326 BE::getBytes(static_cast<std::uint32_t>(id << 0x8), buff);
327 return 3;
328 } else if (id <= 0x1FFFFFFF) {
329 BE::getBytes(static_cast<std::uint32_t>(id), buff);
330 return 4;
331 } else {
332 throw InvalidDataException();
333 }
334}
335
343std::uint8_t EbmlElement::makeSizeDenotation(std::uint64_t size, char *buff)
344{
345 if (size < 126) {
346 *buff = static_cast<char>(size | 0x80);
347 return 1;
348 } else if (size <= 16382ul) {
349 BE::getBytes(static_cast<std::uint16_t>(size | 0x4000), buff);
350 return 2;
351 } else if (size <= 2097150ul) {
352 BE::getBytes(static_cast<std::uint32_t>((size | 0x200000) << 0x08), buff);
353 return 3;
354 } else if (size <= 268435454ul) {
355 BE::getBytes(static_cast<std::uint32_t>(size | 0x10000000), buff);
356 return 4;
357 } else if (size <= 34359738366ul) {
358 BE::getBytes(static_cast<std::uint64_t>((size | 0x800000000) << 0x18), buff);
359 return 5;
360 } else if (size <= 4398046511102ul) {
361 BE::getBytes(static_cast<std::uint64_t>((size | 0x40000000000) << 0x10), buff);
362 return 6;
363 } else if (size <= 562949953421310ul) {
364 BE::getBytes(static_cast<std::uint64_t>((size | 0x2000000000000) << 0x08), buff);
365 return 7;
366 } else if (size <= 72057594037927934ul) {
367 BE::getBytes(static_cast<std::uint64_t>(size | 0x100000000000000), buff);
368 return 8;
369 }
370 throw InvalidDataException();
371}
372
382std::uint8_t EbmlElement::makeSizeDenotation(std::uint64_t size, char *buff, std::uint8_t minBytes)
383{
384 if (minBytes <= 1 && size < 126) {
385 *buff = static_cast<char>(size | 0x80);
386 return 1;
387 } else if (minBytes <= 2 && size <= 16382ul) {
388 BE::getBytes(static_cast<std::uint16_t>(size | 0x4000), buff);
389 return 2;
390 } else if (minBytes <= 3 && size <= 2097150ul) {
391 BE::getBytes(static_cast<std::uint32_t>((size | 0x200000) << 0x08), buff);
392 return 3;
393 } else if (minBytes <= 4 && size <= 268435454ul) {
394 BE::getBytes(static_cast<std::uint32_t>(size | 0x10000000), buff);
395 return 4;
396 } else if (minBytes <= 5 && size <= 34359738366ul) {
397 BE::getBytes(static_cast<std::uint64_t>((size | 0x800000000) << 0x18), buff);
398 return 5;
399 } else if (minBytes <= 6 && size <= 4398046511102ul) {
400 BE::getBytes(static_cast<std::uint64_t>((size | 0x40000000000) << 0x10), buff);
401 return 6;
402 } else if (minBytes <= 7 && size <= 562949953421310ul) {
403 BE::getBytes(static_cast<std::uint64_t>((size | 0x2000000000000) << 0x08), buff);
404 return 7;
405 } else if (minBytes <= 8 && size <= 72057594037927934ul) {
406 BE::getBytes(static_cast<std::uint64_t>(size | 0x100000000000000), buff);
407 return 8;
408 }
409 throw InvalidDataException();
410}
411
416std::uint8_t EbmlElement::calculateUIntegerLength(std::uint64_t integer)
417{
418 if (integer <= 0xFFul) {
419 return 1;
420 } else if (integer <= 0xFFFFul) {
421 return 2;
422 } else if (integer <= 0xFFFFFFul) {
423 return 3;
424 } else if (integer <= 0xFFFFFFFFul) {
425 return 4;
426 } else if (integer <= 0xFFFFFFFFFFul) {
427 return 5;
428 } else if (integer <= 0xFFFFFFFFFFFFul) {
429 return 6;
430 } else if (integer <= 0xFFFFFFFFFFFFFFul) {
431 return 7;
432 } else {
433 return 8;
434 }
435}
436
441std::uint8_t EbmlElement::makeUInteger(std::uint64_t value, char *buff)
442{
443 if (value <= 0xFFul) {
444 *buff = static_cast<char>(value);
445 return 1;
446 } else if (value <= 0xFFFFul) {
447 BE::getBytes(static_cast<std::uint16_t>(value), buff);
448 return 2;
449 } else if (value <= 0xFFFFFFul) {
450 BE::getBytes(static_cast<std::uint32_t>(value << 0x08), buff);
451 return 3;
452 } else if (value <= 0xFFFFFFFFul) {
453 BE::getBytes(static_cast<std::uint32_t>(value), buff);
454 return 4;
455 } else if (value <= 0xFFFFFFFFFFul) {
456 BE::getBytes(static_cast<std::uint64_t>(value << 0x18), buff);
457 return 5;
458 } else if (value <= 0xFFFFFFFFFFFFul) {
459 BE::getBytes(static_cast<std::uint64_t>(value << 0x10), buff);
460 return 6;
461 } else if (value <= 0xFFFFFFFFFFFFFFul) {
462 BE::getBytes(static_cast<std::uint64_t>(value << 0x08), buff);
463 return 7;
464 } else {
465 BE::getBytes(static_cast<std::uint64_t>(value), buff);
466 return 8;
467 }
468}
469
479std::uint8_t EbmlElement::makeUInteger(std::uint64_t value, char *buff, std::uint8_t minBytes)
480{
481 if (minBytes <= 1 && value <= 0xFFul) {
482 *buff = static_cast<char>(value);
483 return 1;
484 } else if (minBytes <= 2 && value <= 0xFFFFul) {
485 BE::getBytes(static_cast<std::uint16_t>(value), buff);
486 return 2;
487 } else if (minBytes <= 3 && value <= 0xFFFFFFul) {
488 BE::getBytes(static_cast<std::uint32_t>(value << 0x08), buff);
489 return 3;
490 } else if (minBytes <= 4 && value <= 0xFFFFFFFFul) {
491 BE::getBytes(static_cast<std::uint32_t>(value), buff);
492 return 4;
493 } else if (minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
494 BE::getBytes(static_cast<std::uint64_t>(value << 0x18), buff);
495 return 5;
496 } else if (minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
497 BE::getBytes(static_cast<std::uint64_t>(value << 0x10), buff);
498 return 6;
499 } else if (minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
500 BE::getBytes(static_cast<std::uint64_t>(value << 0x08), buff);
501 return 7;
502 } else {
503 BE::getBytes(static_cast<std::uint64_t>(value), buff);
504 return 8;
505 }
506}
507
514void EbmlElement::makeSimpleElement(ostream &stream, IdentifierType id, std::uint64_t content)
515{
516 char buff1[8];
517 char buff2[8];
518 std::uint8_t sizeLength = EbmlElement::makeId(id, buff1);
519 stream.write(buff1, sizeLength);
520 std::uint8_t elementSize = EbmlElement::makeUInteger(content, buff2);
521 sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
522 stream.write(buff1, sizeLength);
523 stream.write(buff2, elementSize);
524}
525
532void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::IdentifierType id, string_view content)
533{
534 char buff1[8];
535 std::uint8_t sizeLength = EbmlElement::makeId(id, buff1);
536 stream.write(buff1, sizeLength);
537 sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
538 stream.write(buff1, sizeLength);
539 stream.write(content.data(), static_cast<std::streamsize>(content.size()));
540}
541
542} // namespace TagParser
The Diagnostics class is a container for DiagMessage.
The EbmlElement class helps to parse EBML files such as Matroska files.
std::uint64_t firstChildOffset() const
Returns the offset of the first child of the element.
static std::uint8_t makeUInteger(std::uint64_t value, char *buff)
Writes value to buff.
EbmlElement(MatroskaContainer &container, std::uint64_t startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
static std::uint8_t calculateIdLength(IdentifierType id)
Returns the length of the specified id in byte.
static void makeSimpleElement(std::ostream &stream, IdentifierType id, std::uint64_t content)
Makes a simple EBML element.
static std::uint8_t calculateSizeDenotationLength(std::uint64_t size)
Returns the length of the size denotation for the specified size in byte.
static std::uint8_t makeId(IdentifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
double readFloat()
Reads the content of the element as float.
static std::uint64_t bytesToBeSkipped
Specifies the number of bytes to be skipped till a valid EBML element is found in the stream.
Definition ebmlelement.h:56
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition ebmlelement.h:71
std::string readString()
Reads the content of the element as string.
std::uint64_t readUInteger()
Reads the content of the element as unsigned integer.
static std::uint8_t makeSizeDenotation(std::uint64_t size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
void internalParse(Diagnostics &diag)
Parses the EBML element.
static std::uint8_t calculateUIntegerLength(std::uint64_t integer)
Returns the length of the specified unsigned integer in byte.
The GenericFileElement class helps to parse binary files which consist of an arboreal element structu...
ImplementationType * lastChild()
Returns the last child of the element.
std::uint32_t headerSize() const
Returns the header size of the element in byte.
typename FileElementTraits< EbmlElement >::DataSizeType DataSizeType
ImplementationType * nextSibling()
Returns the next sibling of the element.
ImplementationType * parent()
Returns the parent of the element.
ImplementationType * firstChild()
Returns the first child of the element.
std::unique_ptr< ImplementationType > m_firstChild
static constexpr std::uint32_t maximumIdLengthSupported()
typename FileElementTraits< ImplementationType >::IdentifierType IdentifierType
Specifies the type used to store identifiers.
static constexpr std::uint32_t maximumSizeLengthSupported()
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Implementation of GenericContainer<MediaFileInfo, MatroskaTag, MatroskaTrack, EbmlElement>.
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
Contains all classes and functions of the TagInfo library.
Definition aaccodebook.h:10
TAG_PARSER_EXPORT MatroskaElementLevel matroskaIdLevel(std::uint32_t matroskaId)
Returns the level at which elements with the specified matroskaId are supposed to occur in a Matroska...