Tag Parser 12.5.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
Loading...
Searching...
No Matches
ebmlelement.cpp
Go to the documentation of this file.
1#include "./ebmlelement.h"
2#include "./ebmlid.h"
4#include "./matroskaid.h"
5
6#include "../exceptions.h"
7#include "../mediafileinfo.h"
8
9#include <c++utilities/conversion/binaryconversion.h>
10#include <c++utilities/io/binaryreader.h>
11#include <c++utilities/io/binarywriter.h>
12
13#include <cstdint>
14#include <cstring>
15#include <memory>
16#include <sstream>
17#include <string>
18
19using namespace std;
20using namespace CppUtilities;
21
22namespace TagParser {
23
28
32std::uint64_t EbmlElement::bytesToBeSkipped = 0x4000;
33
41
49
57
61string EbmlElement::parsingContext() const
62{
63 return ("parsing header of EBML element " % idToString() % " at ") + startOffset();
64}
65
70{
71 static const string context("parsing EBML element header");
72
73 for (std::uint64_t skipped = 0; skipped < bytesToBeSkipped; ++m_startOffset, --m_maxSize, ++skipped) {
74 // check whether max size is valid
75 if (maxTotalSize() < 2) {
76 diag.emplace_back(DiagLevel::Critical, argsToString("The EBML element at ", startOffset(), " is truncated or does not exist."), context);
78 }
79 stream().seekg(static_cast<streamoff>(startOffset()));
80
81 // read ID
83 std::uint8_t beg = static_cast<std::uint8_t>(stream().peek()), mask = 0x80;
84 m_idLength = 1;
85 while (m_idLength <= maximumIdLengthSupported() && (beg & mask) == 0) {
86 ++m_idLength;
87 mask >>= 1;
88 }
90 if (!skipped) {
91 diag.emplace_back(
92 DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is not supported, trying to skip."), context);
93 }
94 continue; // try again
95 }
96 if (m_idLength > container().maxIdLength()) {
97 if (!skipped) {
98 diag.emplace_back(DiagLevel::Critical, argsToString("EBML ID length at ", startOffset(), " is invalid, trying to skip."), context);
99 }
100 continue; // try again
101 }
103 m_id = BE::toInt<std::uint32_t>(buf);
104
105 // check whether this element is actually a sibling of one of its parents rather then a child
106 // (might be the case if the parent's size is unknown and hence assumed to be the max file size)
107 if (m_parent && m_parent->m_sizeUnknown) {
108 // check at which level in the hierarchy the element is supposed to occur using its ID
109 // (the only chance to find out whether the element belongs higher up in the hierarchy)
110 const MatroskaElementLevel supposedLevel = matroskaIdLevel(m_id);
111 const std::uint8_t actualLevel = level();
112 if (actualLevel > supposedLevel) {
113 // the file belongs higher up in the hierarchy so find a better parent
114 if (EbmlElement *betterParent = m_parent->parent(actualLevel - static_cast<std::uint8_t>(supposedLevel))) {
115 // recompute the parent size (assumption - which was rest of the available space - was wrong)
116 m_parent->m_dataSize = m_startOffset - m_parent->m_startOffset - m_parent->headerSize();
117 m_parent->m_sizeUnknown = false;
118 // detach from ...
119 if (m_parent->firstChild() == this) {
120 // ... parent
121 m_parent->m_firstChild.release();
122 m_parent->m_firstChild = std::move(m_nextSibling);
123 } else {
124 // ... previous sibling
125 for (EbmlElement *sibling = m_parent->firstChild(); sibling; sibling = sibling->nextSibling()) {
126 if (sibling->nextSibling() == this) {
127 sibling->m_nextSibling.release();
128 sibling->m_nextSibling = std::move(m_nextSibling);
129 break;
130 }
131 }
132 }
133 // insert as child of better parent
134 if (EbmlElement *previousSibling = betterParent->lastChild()) {
135 previousSibling->m_nextSibling.reset(this);
136 } else {
137 betterParent->m_firstChild.reset(this);
138 }
139 // update own reference to parent
140 m_parent = betterParent;
141 }
142 }
143 }
144
145 // read size
146 beg = static_cast<std::uint8_t>(stream().peek());
147 mask = 0x80;
148 m_sizeLength = 1;
149 if ((m_sizeUnknown = (beg == 0xFF))) {
150 // this indicates that the element size is unknown
151 // -> just assume the element takes the maximum available size
153 } else {
154 while (m_sizeLength <= maximumSizeLengthSupported() && (beg & mask) == 0) {
155 ++m_sizeLength;
156 mask >>= 1;
157 }
159 if (!skipped) {
160 diag.emplace_back(DiagLevel::Critical, "EBML size length is not supported.", parsingContext());
161 }
162 continue; // try again
163 }
164 if (m_sizeLength > container().maxSizeLength()) {
165 if (!skipped) {
166 diag.emplace_back(DiagLevel::Critical, "EBML size length is invalid.", parsingContext());
167 }
168 continue; // try again
169 }
170 // read size into buffer
171 memset(buf, 0, sizeof(DataSizeType)); // reset buffer
173 // xor the first byte in buffer which has been read from the file with mask
174 *(buf + (maximumSizeLengthSupported() - m_sizeLength)) ^= static_cast<char>(mask);
175 m_dataSize = BE::toInt<std::uint64_t>(buf);
176 // check if element is truncated
177 if (totalSize() > maxTotalSize()) {
178 if (m_idLength + m_sizeLength > maxTotalSize()) { // header truncated
179 if (!skipped) {
180 diag.emplace_back(DiagLevel::Critical, "EBML header seems to be truncated.", parsingContext());
181 }
182 continue; // try again
183 } else { // data truncated
184 diag.emplace_back(DiagLevel::Warning, "Data of EBML element seems to be truncated; unable to parse siblings of that element.",
185 parsingContext());
186 m_dataSize = maxTotalSize() - m_idLength - m_sizeLength; // using max size instead
187 }
188 }
189 }
190
191 // check if there's a first child
192 const std::uint64_t firstChildOffset = this->firstChildOffset();
193 if (firstChildOffset && firstChildOffset < totalSize()) {
194 m_firstChild.reset(new EbmlElement(static_cast<EbmlElement &>(*this), startOffset() + firstChildOffset));
195 } else {
196 m_firstChild.reset();
197 }
198
199 // check if there's a sibling
200 if (totalSize() < maxTotalSize()) {
201 if (parent()) {
202 m_nextSibling.reset(new EbmlElement(*(parent()), startOffset() + totalSize()));
203 } else {
205 }
206 } else {
207 m_nextSibling.reset();
208 }
209
210 // no critical errors occurred
211 // -> add a warning if bytes have been skipped
212 if (skipped) {
213 diag.emplace_back(DiagLevel::Warning, argsToString(skipped, " bytes have been skipped"), parsingContext());
214 }
215 // -> don't need another try, return here
216 return;
217 }
218
219 // critical errors occurred and skipping some bytes wasn't successful
220 throw InvalidDataException();
221}
222
227{
228 stream().seekg(static_cast<streamoff>(dataOffset()));
229 return reader().readString(dataSize());
230}
231
239{
240 constexpr DataSizeType maxBytesToRead = 8;
241 char buff[maxBytesToRead] = { 0 };
242 const auto bytesToSkip = maxBytesToRead - min(dataSize(), maxBytesToRead);
243 stream().seekg(static_cast<streamoff>(dataOffset()), ios_base::beg);
244 stream().read(buff + bytesToSkip, static_cast<streamoff>(sizeof(buff) - bytesToSkip));
245 return BE::toInt<std::uint64_t>(buff);
246}
247
253{
254 stream().seekg(static_cast<streamoff>(dataOffset()));
255 switch (dataSize()) {
256 case sizeof(float):
257 return static_cast<double>(reader().readFloat32BE());
258 case sizeof(double):
259 return reader().readFloat64BE();
260 default:
261 return 0.0;
262 }
263}
264
270{
271 if (id <= 0xFF) {
272 return 1;
273 } else if (id <= 0x7FFF) {
274 return 2;
275 } else if (id <= 0x3FFFFF) {
276 return 3;
277 } else if (id <= 0x1FFFFFFF) {
278 return 4;
279 } else {
280 throw InvalidDataException();
281 }
282}
283
288std::uint8_t EbmlElement::calculateSizeDenotationLength(std::uint64_t size)
289{
290 if (size < 126) {
291 return 1;
292 } else if (size <= 16382ul) {
293 return 2;
294 } else if (size <= 2097150ul) {
295 return 3;
296 } else if (size <= 268435454ul) {
297 return 4;
298 } else if (size <= 34359738366ul) {
299 return 5;
300 } else if (size <= 4398046511102ul) {
301 return 6;
302 } else if (size <= 562949953421310ul) {
303 return 7;
304 } else if (size <= 72057594037927934ul) {
305 return 8;
306 } else {
307 throw InvalidDataException();
308 }
309}
310
318{
319 if (id <= 0xFF) {
320 *buff = static_cast<char>(id);
321 return 1;
322 } else if (id <= 0x7FFF) {
323 BE::getBytes(static_cast<std::uint16_t>(id), buff);
324 return 2;
325 } else if (id <= 0x3FFFFF) {
326 BE::getBytes(static_cast<std::uint32_t>(id << 0x8), buff);
327 return 3;
328 } else if (id <= 0x1FFFFFFF) {
329 BE::getBytes(static_cast<std::uint32_t>(id), buff);
330 return 4;
331 } else {
332 throw InvalidDataException();
333 }
334}
335
343std::uint8_t EbmlElement::makeSizeDenotation(std::uint64_t size, char *buff)
344{
345 if (size < 126) {
346 *buff = static_cast<char>(size | 0x80);
347 return 1;
348 } else if (size <= 16382ul) {
349 BE::getBytes(static_cast<std::uint16_t>(size | 0x4000), buff);
350 return 2;
351 } else if (size <= 2097150ul) {
352 BE::getBytes(static_cast<std::uint32_t>((size | 0x200000) << 0x08), buff);
353 return 3;
354 } else if (size <= 268435454ul) {
355 BE::getBytes(static_cast<std::uint32_t>(size | 0x10000000), buff);
356 return 4;
357 } else if (size <= 34359738366ul) {
358 BE::getBytes(static_cast<std::uint64_t>((size | 0x800000000) << 0x18), buff);
359 return 5;
360 } else if (size <= 4398046511102ul) {
361 BE::getBytes(static_cast<std::uint64_t>((size | 0x40000000000) << 0x10), buff);
362 return 6;
363 } else if (size <= 562949953421310ul) {
364 BE::getBytes(static_cast<std::uint64_t>((size | 0x2000000000000) << 0x08), buff);
365 return 7;
366 } else if (size <= 72057594037927934ul) {
367 BE::getBytes(static_cast<std::uint64_t>(size | 0x100000000000000), buff);
368 return 8;
369 }
370 throw InvalidDataException();
371}
372
382std::uint8_t EbmlElement::makeSizeDenotation(std::uint64_t size, char *buff, std::uint8_t minBytes)
383{
384 if (minBytes <= 1 && size < 126) {
385 *buff = static_cast<char>(size | 0x80);
386 return 1;
387 } else if (minBytes <= 2 && size <= 16382ul) {
388 BE::getBytes(static_cast<std::uint16_t>(size | 0x4000), buff);
389 return 2;
390 } else if (minBytes <= 3 && size <= 2097150ul) {
391 BE::getBytes(static_cast<std::uint32_t>((size | 0x200000) << 0x08), buff);
392 return 3;
393 } else if (minBytes <= 4 && size <= 268435454ul) {
394 BE::getBytes(static_cast<std::uint32_t>(size | 0x10000000), buff);
395 return 4;
396 } else if (minBytes <= 5 && size <= 34359738366ul) {
397 BE::getBytes(static_cast<std::uint64_t>((size | 0x800000000) << 0x18), buff);
398 return 5;
399 } else if (minBytes <= 6 && size <= 4398046511102ul) {
400 BE::getBytes(static_cast<std::uint64_t>((size | 0x40000000000) << 0x10), buff);
401 return 6;
402 } else if (minBytes <= 7 && size <= 562949953421310ul) {
403 BE::getBytes(static_cast<std::uint64_t>((size | 0x2000000000000) << 0x08), buff);
404 return 7;
405 } else if (minBytes <= 8 && size <= 72057594037927934ul) {
406 BE::getBytes(static_cast<std::uint64_t>(size | 0x100000000000000), buff);
407 return 8;
408 }
409 throw InvalidDataException();
410}
411
416std::uint8_t EbmlElement::calculateUIntegerLength(std::uint64_t integer)
417{
418 if (integer <= 0xFFul) {
419 return 1;
420 } else if (integer <= 0xFFFFul) {
421 return 2;
422 } else if (integer <= 0xFFFFFFul) {
423 return 3;
424 } else if (integer <= 0xFFFFFFFFul) {
425 return 4;
426 } else if (integer <= 0xFFFFFFFFFFul) {
427 return 5;
428 } else if (integer <= 0xFFFFFFFFFFFFul) {
429 return 6;
430 } else if (integer <= 0xFFFFFFFFFFFFFFul) {
431 return 7;
432 } else {
433 return 8;
434 }
435}
436
441std::uint8_t EbmlElement::makeUInteger(std::uint64_t value, char *buff)
442{
443 if (value <= 0xFFul) {
444 *buff = static_cast<char>(value);
445 return 1;
446 } else if (value <= 0xFFFFul) {
447 BE::getBytes(static_cast<std::uint16_t>(value), buff);
448 return 2;
449 } else if (value <= 0xFFFFFFul) {
450 BE::getBytes(static_cast<std::uint32_t>(value << 0x08), buff);
451 return 3;
452 } else if (value <= 0xFFFFFFFFul) {
453 BE::getBytes(static_cast<std::uint32_t>(value), buff);
454 return 4;
455 } else if (value <= 0xFFFFFFFFFFul) {
456 BE::getBytes(static_cast<std::uint64_t>(value << 0x18), buff);
457 return 5;
458 } else if (value <= 0xFFFFFFFFFFFFul) {
459 BE::getBytes(static_cast<std::uint64_t>(value << 0x10), buff);
460 return 6;
461 } else if (value <= 0xFFFFFFFFFFFFFFul) {
462 BE::getBytes(static_cast<std::uint64_t>(value << 0x08), buff);
463 return 7;
464 } else {
465 BE::getBytes(static_cast<std::uint64_t>(value), buff);
466 return 8;
467 }
468}
469
479std::uint8_t EbmlElement::makeUInteger(std::uint64_t value, char *buff, std::uint8_t minBytes)
480{
481 if (minBytes <= 1 && value <= 0xFFul) {
482 *buff = static_cast<char>(value);
483 return 1;
484 } else if (minBytes <= 2 && value <= 0xFFFFul) {
485 BE::getBytes(static_cast<std::uint16_t>(value), buff);
486 return 2;
487 } else if (minBytes <= 3 && value <= 0xFFFFFFul) {
488 BE::getBytes(static_cast<std::uint32_t>(value << 0x08), buff);
489 return 3;
490 } else if (minBytes <= 4 && value <= 0xFFFFFFFFul) {
491 BE::getBytes(static_cast<std::uint32_t>(value), buff);
492 return 4;
493 } else if (minBytes <= 5 && value <= 0xFFFFFFFFFFul) {
494 BE::getBytes(static_cast<std::uint64_t>(value << 0x18), buff);
495 return 5;
496 } else if (minBytes <= 6 && value <= 0xFFFFFFFFFFFFul) {
497 BE::getBytes(static_cast<std::uint64_t>(value << 0x10), buff);
498 return 6;
499 } else if (minBytes <= 7 && value <= 0xFFFFFFFFFFFFFFul) {
500 BE::getBytes(static_cast<std::uint64_t>(value << 0x08), buff);
501 return 7;
502 } else {
503 BE::getBytes(static_cast<std::uint64_t>(value), buff);
504 return 8;
505 }
506}
507
514void EbmlElement::makeSimpleElement(ostream &stream, IdentifierType id, std::uint64_t content)
515{
516 char buff1[8];
517 char buff2[8];
518 std::uint8_t sizeLength = EbmlElement::makeId(id, buff1);
519 stream.write(buff1, sizeLength);
520 std::uint8_t elementSize = EbmlElement::makeUInteger(content, buff2);
521 sizeLength = EbmlElement::makeSizeDenotation(elementSize, buff1);
522 stream.write(buff1, sizeLength);
523 stream.write(buff2, elementSize);
524}
525
532void EbmlElement::makeSimpleElement(std::ostream &stream, GenericFileElement::IdentifierType id, string_view content)
533{
534 char buff1[8];
535 std::uint8_t sizeLength = EbmlElement::makeId(id, buff1);
536 stream.write(buff1, sizeLength);
537 sizeLength = EbmlElement::makeSizeDenotation(content.size(), buff1);
538 stream.write(buff1, sizeLength);
539 stream.write(content.data(), static_cast<std::streamsize>(content.size()));
540}
541
542} // namespace TagParser
The Diagnostics class is a container for DiagMessage.
std::uint64_t firstChildOffset() const
Returns the offset of the first child of the element.
static std::uint8_t makeUInteger(std::uint64_t value, char *buff)
Writes value to buff.
EbmlElement(MatroskaContainer &container, std::uint64_t startOffset)
Constructs a new top level element with the specified container at the specified startOffset.
static std::uint8_t calculateIdLength(IdentifierType id)
Returns the length of the specified id in byte.
static void makeSimpleElement(std::ostream &stream, IdentifierType id, std::uint64_t content)
Makes a simple EBML element.
static std::uint8_t calculateSizeDenotationLength(std::uint64_t size)
Returns the length of the size denotation for the specified size in byte.
static std::uint8_t makeId(IdentifierType id, char *buff)
Stores the specified id in the specified buffer which must be at least 8 bytes long.
double readFloat()
Reads the content of the element as float.
static std::uint64_t bytesToBeSkipped
Specifies the number of bytes to be skipped till a valid EBML element is found in the stream.
Definition ebmlelement.h:56
std::string idToString() const
Converts the specified EBML ID to a printable string.
Definition ebmlelement.h:71
std::string readString()
Reads the content of the element as string.
std::uint64_t readUInteger()
Reads the content of the element as unsigned integer.
static std::uint8_t makeSizeDenotation(std::uint64_t size, char *buff)
Makes the size denotation for the specified size and stores it to buff.
void internalParse(Diagnostics &diag)
Parses the EBML element.
static std::uint8_t calculateUIntegerLength(std::uint64_t integer)
Returns the length of the specified unsigned integer in byte.
GenericFileElement(ContainerType &container, std::uint64_t startOffset)
typename FileElementTraits< EbmlElement >::DataSizeType DataSizeType
static constexpr std::uint32_t maximumIdLengthSupported()
typename FileElementTraits< ImplementationType >::IdentifierType IdentifierType
Specifies the type used to store identifiers.
static constexpr std::uint32_t maximumSizeLengthSupported()
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
Definition exceptions.h:25
Implementation of GenericContainer<MediaFileInfo, MatroskaTag, MatroskaTrack, EbmlElement>.
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
Definition exceptions.h:39
Contains all classes and functions of the TagInfo library.
Definition aaccodebook.h:10
TAG_PARSER_EXPORT MatroskaElementLevel matroskaIdLevel(std::uint32_t matroskaId)
Returns the level at which elements with the specified matroskaId are supposed to occur in a Matroska...