Tag Parser 12.4.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
Loading...
Searching...
No Matches
mp4track.cpp
Go to the documentation of this file.
1#include "./mp4track.h"
2#include "./mp4atom.h"
3#include "./mp4container.h"
4#include "./mp4ids.h"
5#include "./mpeg4descriptor.h"
6
8
10
13
14#include "../exceptions.h"
15#include "../mediafileinfo.h"
16#include "../mediaformat.h"
17
18#include <c++utilities/conversion/stringbuilder.h>
19#include <c++utilities/io/binaryreader.h>
20#include <c++utilities/io/binarywriter.h>
21#include <c++utilities/io/bitreader.h>
22
23#include <cmath>
24#include <locale>
25
26using namespace std;
27using namespace CppUtilities;
28
29namespace TagParser {
30
34struct Mp4Timings {
37 std::uint64_t tkhdDuration, mdhdDuration = 0;
38 constexpr std::uint8_t requiredTkhdVersion() const;
39 constexpr std::uint8_t requiredMdhdVersion() const;
40};
41
49 friend class Mp4Track;
50
51private:
53 std::uint64_t requiredSize = 100;
55 bool canUseExisting = false;
57 bool truncated = false;
59 std::uint8_t version = 0;
61 std::uint8_t writeVersion = 0;
63 bool versionUnknown = false;
65 Mp4Timings timings;
67 std::uint8_t timingsVersion = 0;
69 std::uint8_t additionalDataOffset = 0;
71 bool discardBuffer = false;
72};
73
74constexpr std::uint8_t Mp4Timings::requiredTkhdVersion() const
75{
76 return (tkhdCreationTime > std::numeric_limits<std::uint32_t>::max() || tkhdModificationTime > std::numeric_limits<std::uint32_t>::max()
77 || tkhdDuration > std::numeric_limits<std::uint32_t>::max())
78 ? 1
79 : 0;
80}
81
82constexpr std::uint8_t Mp4Timings::requiredMdhdVersion() const
83{
84 return (mdhdCreationTime > std::numeric_limits<std::uint32_t>::max() || mdhdModificationTime > std::numeric_limits<std::uint32_t>::max()
85 || mdhdDuration > std::numeric_limits<std::uint32_t>::max())
86 ? 1
87 : 0;
88}
89
97 : audioObjectType(0)
98 , sampleFrequencyIndex(0xF)
99 , sampleFrequency(0)
100 , channelConfiguration(0)
101 , extensionAudioObjectType(0)
102 , sbrPresent(false)
103 , psPresent(false)
104 , extensionSampleFrequencyIndex(0xF)
105 , extensionSampleFrequency(0)
106 , extensionChannelConfiguration(0)
107 , frameLengthFlag(false)
108 , dependsOnCoreCoder(false)
109 , coreCoderDelay(0)
110 , extensionFlag(0)
111 , layerNr(0)
112 , numOfSubFrame(0)
113 , layerLength(0)
114 , resilienceFlags(0)
115 , epConfig(0)
116{
117}
118
131
154 : AbstractTrack(trakAtom.stream(), trakAtom.startOffset())
155 , m_trakAtom(&trakAtom)
156 , m_tkhdAtom(nullptr)
157 , m_mdiaAtom(nullptr)
158 , m_mdhdAtom(nullptr)
159 , m_hdlrAtom(nullptr)
160 , m_minfAtom(nullptr)
161 , m_stblAtom(nullptr)
162 , m_stsdAtom(nullptr)
163 , m_stscAtom(nullptr)
164 , m_stcoAtom(nullptr)
165 , m_stszAtom(nullptr)
166 , m_rawMediaType(0)
167 , m_framesPerSample(1)
168 , m_chunkOffsetSize(4)
169 , m_chunkCount(0)
170 , m_sampleToChunkEntryCount(0)
171 , m_rawTkhdCreationTime(0)
172 , m_rawMdhdCreationTime(0)
173 , m_rawTkhdModificationTime(0)
174 , m_rawMdhdModificationTime(0)
175 , m_rawTkhdDuration(0)
176 , m_rawMdhdDuration(0)
177{
178}
179
186
188{
189 return TrackType::Mp4Track;
190}
191
202std::vector<std::uint64_t> Mp4Track::readChunkOffsets(bool parseFragments, Diagnostics &diag)
203{
204 static const auto context = std::string("reading chunk offset table of MP4 track");
205 if (!isHeaderValid() || !m_istream) {
206 diag.emplace_back(DiagLevel::Critical, "Track has not been parsed.", context);
207 throw InvalidDataException();
208 }
209 auto offsets = std::vector<std::uint64_t>();
210 if (m_stcoAtom) {
211 // verify integrity of the chunk offset table
212 auto offsetSize = m_stcoAtom->id() == Mp4AtomIds::ChunkOffset64 ? 8u : 4u;
213 auto actualTableSize = m_stcoAtom->dataSize();
214 if (actualTableSize < (8 + offsetSize)) {
215 diag.emplace_back(DiagLevel::Critical, "The stco/co64 atom is truncated. There are no chunk offsets present.", context);
216 throw InvalidDataException();
217 } else {
218 actualTableSize -= 8;
219 }
220 std::uint32_t actualChunkCount = chunkCount();
221 std::uint64_t calculatedTableSize = chunkCount() * offsetSize;
222 if (calculatedTableSize < actualTableSize) {
223 diag.emplace_back(DiagLevel::Critical,
224 "The stco/co64 atom stores more chunk offsets as denoted. The additional chunk offsets will be ignored.", context);
225 } else if (calculatedTableSize > actualTableSize) {
226 diag.emplace_back(DiagLevel::Critical, "The stco/co64 atom is truncated. It stores less chunk offsets as denoted.", context);
227 actualChunkCount = static_cast<std::uint32_t>(std::floor(static_cast<double>(actualTableSize) / static_cast<double>(offsetSize)));
228 }
229 // read the table
230 offsets.reserve(actualChunkCount);
231 m_istream->seekg(static_cast<streamoff>(m_stcoAtom->dataOffset() + 8));
232 switch (offsetSize) {
233 case 4:
234 for (std::uint32_t i = 0; i < actualChunkCount; ++i) {
235 offsets.push_back(reader().readUInt32BE());
236 }
237 break;
238 case 8:
239 for (std::uint32_t i = 0; i < actualChunkCount; ++i) {
240 offsets.push_back(reader().readUInt64BE());
241 }
242 break;
243 }
244 }
245 // read sample offsets of fragments
246 if (parseFragments) {
247 //std::uint64_t totalDuration = 0;
248 for (Mp4Atom *moofAtom = m_trakAtom->container().firstElement()->siblingByIdIncludingThis(Mp4AtomIds::MovieFragment, diag); moofAtom;
249 moofAtom = moofAtom->siblingById(Mp4AtomIds::MovieFragment, diag)) {
250 moofAtom->parse(diag);
251 for (Mp4Atom *trafAtom = moofAtom->childById(Mp4AtomIds::TrackFragment, diag); trafAtom;
252 trafAtom = trafAtom->siblingById(Mp4AtomIds::TrackFragment, diag)) {
253 trafAtom->parse(diag);
254 for (Mp4Atom *tfhdAtom = trafAtom->childById(Mp4AtomIds::TrackFragmentHeader, diag); tfhdAtom;
255 tfhdAtom = tfhdAtom->siblingById(Mp4AtomIds::TrackFragmentHeader, diag)) {
256 tfhdAtom->parse(diag);
257 std::uint32_t calculatedDataSize = 0;
258 if (tfhdAtom->dataSize() < calculatedDataSize) {
259 diag.emplace_back(DiagLevel::Critical, "tfhd atom is truncated.", context);
260 } else {
261 inputStream().seekg(static_cast<streamoff>(tfhdAtom->dataOffset() + 1));
262 const std::uint32_t flags = reader().readUInt24BE();
263 if (m_id == reader().readUInt32BE()) { // check track ID
264 if (flags & 0x000001) { // base-data-offset present
265 calculatedDataSize += 8;
266 }
267 if (flags & 0x000002) { // sample-description-index present
268 calculatedDataSize += 4;
269 }
270 if (flags & 0x000008) { // default-sample-duration present
271 calculatedDataSize += 4;
272 }
273 if (flags & 0x000010) { // default-sample-size present
274 calculatedDataSize += 4;
275 }
276 if (flags & 0x000020) { // default-sample-flags present
277 calculatedDataSize += 4;
278 }
279 // some variables are currently skipped because they are currently not interesting
280 //std::uint64_t baseDataOffset = moofAtom->startOffset();
281 //std::uint32_t defaultSampleDescriptionIndex = 0;
282 //std::uint32_t defaultSampleDuration = 0;
283 std::uint32_t defaultSampleSize = 0;
284 //std::uint32_t defaultSampleFlags = 0;
285 if (tfhdAtom->dataSize() < calculatedDataSize) {
286 diag.emplace_back(DiagLevel::Critical, "tfhd atom is truncated (presence of fields denoted).", context);
287 } else {
288 if (flags & 0x000001) { // base-data-offset present
289 //baseDataOffset = reader.readUInt64();
290 inputStream().seekg(8, ios_base::cur);
291 }
292 if (flags & 0x000002) { // sample-description-index present
293 //defaultSampleDescriptionIndex = reader.readUInt32();
294 inputStream().seekg(4, ios_base::cur);
295 }
296 if (flags & 0x000008) { // default-sample-duration present
297 //defaultSampleDuration = reader().readUInt32BE();
298 inputStream().seekg(4, ios_base::cur);
299 }
300 if (flags & 0x000010) { // default-sample-size present
301 defaultSampleSize = reader().readUInt32BE();
302 }
303 if (flags & 0x000020) { // default-sample-flags present
304 //defaultSampleFlags = reader().readUInt32BE();
305 inputStream().seekg(4, ios_base::cur);
306 }
307 }
308 for (Mp4Atom *trunAtom = trafAtom->childById(Mp4AtomIds::TrackFragmentRun, diag); trunAtom;
309 trunAtom = trunAtom->siblingById(Mp4AtomIds::TrackFragmentRun, diag)) {
310 std::uint32_t trunCalculatedDataSize = 8;
311 if (trunAtom->dataSize() < trunCalculatedDataSize) {
312 diag.emplace_back(DiagLevel::Critical, "trun atom is truncated.", context);
313 } else {
314 inputStream().seekg(static_cast<streamoff>(trunAtom->dataOffset() + 1));
315 std::uint32_t trunFlags = reader().readUInt24BE();
316 std::uint32_t sampleCount = reader().readUInt32BE();
318 if (trunFlags & 0x000001) { // data offset present
319 trunCalculatedDataSize += 4;
320 }
321 if (trunFlags & 0x000004) { // first-sample-flags present
322 trunCalculatedDataSize += 4;
323 }
324 std::uint32_t entrySize = 0;
325 if (trunFlags & 0x000100) { // sample-duration present
326 entrySize += 4;
327 }
328 if (trunFlags & 0x000200) { // sample-size present
329 entrySize += 4;
330 }
331 if (trunFlags & 0x000400) { // sample-flags present
332 entrySize += 4;
333 }
334 if (trunFlags & 0x000800) { // sample-composition-time-offsets present
335 entrySize += 4;
336 }
337 trunCalculatedDataSize += entrySize * sampleCount;
338 if (trunAtom->dataSize() < trunCalculatedDataSize) {
339 diag.emplace_back(DiagLevel::Critical, "trun atom is truncated (presence of fields denoted).", context);
340 } else {
341 if (trunFlags & 0x000001) { // data offset present
342 inputStream().seekg(4, ios_base::cur);
343 //int32 dataOffset = reader().readInt32BE();
344 }
345 if (trunFlags & 0x000004) { // first-sample-flags present
346 inputStream().seekg(4, ios_base::cur);
347 }
348 for (std::uint32_t i = 0; i < sampleCount; ++i) {
349 if (trunFlags & 0x000100) { // sample-duration present
350 //totalDuration += reader().readUInt32BE();
351 inputStream().seekg(4, ios_base::cur);
352 } else {
353 //totalDuration += defaultSampleDuration;
354 }
355 if (trunFlags & 0x000200) { // sample-size present
356 m_sampleSizes.push_back(reader().readUInt32BE());
357 m_size += m_sampleSizes.back();
358 } else {
359 m_size += defaultSampleSize;
360 }
361 if (trunFlags & 0x000400) { // sample-flags present
362 inputStream().seekg(4, ios_base::cur);
363 }
364 if (trunFlags & 0x000800) { // sample-composition-time-offsets present
365 inputStream().seekg(4, ios_base::cur);
366 }
367 }
368 }
369 }
370 }
371 if (m_sampleSizes.empty() && defaultSampleSize) {
372 m_sampleSizes.push_back(defaultSampleSize);
373 }
374 }
375 }
376 }
377 }
378 }
379 }
380 return offsets;
381}
382
387std::uint64_t Mp4Track::accumulateSampleSizes(size_t &sampleIndex, size_t count, Diagnostics &diag)
388{
389 if (sampleIndex + count <= m_sampleSizes.size()) {
390 std::uint64_t sum = 0;
391 for (size_t end = sampleIndex + count; sampleIndex < end; ++sampleIndex) {
392 sum += m_sampleSizes[sampleIndex];
393 }
394 return sum;
395 } else if (m_sampleSizes.size() == 1) {
396 sampleIndex += count;
397 return static_cast<std::uint64_t>(m_sampleSizes.front()) * count;
398 } else {
399 diag.emplace_back(DiagLevel::Critical, "There are not as many sample size entries as samples.", "reading chunk sizes of MP4 track");
400 throw InvalidDataException();
401 }
402}
403
412void Mp4Track::addChunkSizeEntries(
413 std::vector<std::uint64_t> &chunkSizeTable, size_t count, size_t &sampleIndex, std::uint32_t sampleCount, Diagnostics &diag)
414{
415 for (size_t i = 0; i < count; ++i) {
416 chunkSizeTable.push_back(accumulateSampleSizes(sampleIndex, sampleCount, diag));
417 }
418}
419
424const TrackHeaderInfo &Mp4Track::verifyPresentTrackHeader() const
425{
426 if (m_trackHeaderInfo) {
427 return *m_trackHeaderInfo;
428 }
429
430 // return the default TrackHeaderInfo in case there is no track header prsent
431 auto &info = *(m_trackHeaderInfo = std::make_unique<TrackHeaderInfo>());
432 if (!m_tkhdAtom) {
433 return info;
434 }
435
436 // ensure the tkhd atom is buffered but mark the buffer to be discarded again if it has not been present
437 info.discardBuffer = m_tkhdAtom->buffer() == nullptr;
438 if (info.discardBuffer) {
439 m_tkhdAtom->makeBuffer();
440 }
441
442 // check the version of the existing tkhd atom to determine where additional data starts
443 switch (info.version = static_cast<std::uint8_t>(m_tkhdAtom->buffer()[m_tkhdAtom->headerSize()])) {
444 case 0:
445 info.additionalDataOffset = 32;
446 break;
447 case 1:
448 info.additionalDataOffset = 44;
449 break;
450 default:
451 info.additionalDataOffset = 44;
452 info.versionUnknown = true;
453 }
454
455 // check whether the existing tkhd atom is not truncated
456 if (info.additionalDataOffset + 48u <= m_tkhdAtom->dataSize()) {
457 info.canUseExisting = true;
458 } else {
459 info.truncated = true;
460 info.canUseExisting = info.additionalDataOffset < m_tkhdAtom->dataSize();
461 if (!info.canUseExisting && info.discardBuffer) {
462 m_tkhdAtom->discardBuffer();
463 }
464 }
465
466 // determine required size
467 info.requiredSize = m_tkhdAtom->dataSize() + 8;
468 info.timings = computeTimings();
469 info.timingsVersion = info.timings.requiredTkhdVersion();
470 if (info.version == 0) {
471 info.writeVersion = info.timingsVersion;
472 // add 12 byte to size if update from version 0 to version 1 is required (which needs 12 byte more)
473 if (info.writeVersion != 0) {
474 info.requiredSize += 12;
475 }
476 } else {
477 info.writeVersion = info.version;
478 }
479 // -> add 8 byte to the size because it must be denoted using a 64-bit integer
480 if (info.requiredSize > numeric_limits<std::uint32_t>::max()) {
481 info.requiredSize += 8;
482 }
483 return info;
484}
485
489Mp4Timings Mp4Track::computeTimings() const
490{
491 auto timings = Mp4Timings();
492 if (m_trakAtom && (m_trakAtom->container().fileInfo().fileHandlingFlags() & MediaFileHandlingFlags::PreserveRawTimingValues)) {
493 timings.tkhdCreationTime = m_rawTkhdCreationTime;
494 timings.tkhdModificationTime = m_rawTkhdModificationTime;
495 timings.tkhdDuration = m_rawTkhdDuration;
496 timings.mdhdCreationTime = m_rawMdhdCreationTime;
497 timings.mdhdModificationTime = m_rawMdhdModificationTime;
498 timings.mdhdDuration = m_rawMdhdDuration;
499 } else {
500 timings.tkhdCreationTime = timings.mdhdCreationTime = static_cast<std::uint64_t>((m_creationTime - Mp4Container::epoch).totalSeconds());
501 timings.tkhdModificationTime = timings.mdhdModificationTime
502 = static_cast<std::uint64_t>((m_modificationTime - Mp4Container::epoch).totalSeconds());
503 timings.tkhdDuration = timings.mdhdDuration = static_cast<std::uint64_t>(m_duration.totalTicks() * m_timeScale / TimeSpan::ticksPerSecond);
504 }
505 return timings;
506}
507
515vector<tuple<std::uint32_t, std::uint32_t, std::uint32_t>> Mp4Track::readSampleToChunkTable(Diagnostics &diag)
516{
517 static const string context("reading sample to chunk table of MP4 track");
518 if (!isHeaderValid() || !m_istream || !m_stscAtom) {
519 diag.emplace_back(DiagLevel::Critical, "Track has not been parsed or is invalid.", context);
520 throw InvalidDataException();
521 }
522 // verify integrity of the sample to chunk table
523 std::uint64_t actualTableSize = m_stscAtom->dataSize();
524 if (actualTableSize < 20) {
525 diag.emplace_back(DiagLevel::Critical, "The stsc atom is truncated. There are no \"sample to chunk\" entries present.", context);
526 throw InvalidDataException();
527 } else {
528 actualTableSize -= 8;
529 }
530 std::uint64_t actualSampleToChunkEntryCount = sampleToChunkEntryCount();
531 std::uint64_t calculatedTableSize = actualSampleToChunkEntryCount * 12;
532 if (calculatedTableSize < actualTableSize) {
533 diag.emplace_back(DiagLevel::Critical, "The stsc atom stores more entries as denoted. The additional entries will be ignored.", context);
534 } else if (calculatedTableSize > actualTableSize) {
535 diag.emplace_back(DiagLevel::Critical, "The stsc atom is truncated. It stores less entries as denoted.", context);
536 actualSampleToChunkEntryCount = actualTableSize / 12;
537 }
538 // prepare reading
539 vector<tuple<std::uint32_t, std::uint32_t, std::uint32_t>> sampleToChunkTable;
540 sampleToChunkTable.reserve(actualSampleToChunkEntryCount);
541 m_istream->seekg(static_cast<streamoff>(m_stscAtom->dataOffset() + 8));
542 for (std::uint32_t i = 0; i < actualSampleToChunkEntryCount; ++i) {
543 // read entry
544 std::uint32_t firstChunk = reader().readUInt32BE();
545 std::uint32_t samplesPerChunk = reader().readUInt32BE();
546 std::uint32_t sampleDescriptionIndex = reader().readUInt32BE();
547 sampleToChunkTable.emplace_back(firstChunk, samplesPerChunk, sampleDescriptionIndex);
548 }
549 return sampleToChunkTable;
550}
551
564vector<std::uint64_t> Mp4Track::readChunkSizes(Diagnostics &diag)
565{
566 static const string context("reading chunk sizes of MP4 track");
567 if (!isHeaderValid() || !m_istream || !m_stcoAtom) {
568 diag.emplace_back(DiagLevel::Critical, "Track has not been parsed or is invalid.", context);
569 throw InvalidDataException();
570 }
571 // read sample to chunk table
572 const auto sampleToChunkTable = readSampleToChunkTable(diag);
573 // accumulate chunk sizes from the table
574 vector<std::uint64_t> chunkSizes;
575 if (!sampleToChunkTable.empty()) {
576 // prepare reading
577 auto tableIterator = sampleToChunkTable.cbegin();
578 chunkSizes.reserve(m_chunkCount);
579 // read first entry
580 size_t sampleIndex = 0;
581 std::uint32_t previousChunkIndex = get<0>(*tableIterator); // the first chunk has the index 1 and not zero!
582 if (previousChunkIndex != 1) {
583 diag.emplace_back(DiagLevel::Critical, "The first chunk of the first \"sample to chunk\" entry must be 1.", context);
584 previousChunkIndex = 1; // try to read the entry anyway
585 }
586 std::uint32_t samplesPerChunk = get<1>(*tableIterator);
587 // read the following entries
588 ++tableIterator;
589 for (const auto tableEnd = sampleToChunkTable.cend(); tableIterator != tableEnd; ++tableIterator) {
590 std::uint32_t firstChunkIndex = get<0>(*tableIterator);
591 if (firstChunkIndex > previousChunkIndex && firstChunkIndex <= m_chunkCount) {
592 addChunkSizeEntries(chunkSizes, firstChunkIndex - previousChunkIndex, sampleIndex, samplesPerChunk, diag);
593 } else {
594 diag.emplace_back(DiagLevel::Critical,
595 "The first chunk index of a \"sample to chunk\" entry must be greater than the first chunk of the previous entry and not "
596 "greater than the chunk count.",
597 context);
598 throw InvalidDataException();
599 }
600 previousChunkIndex = firstChunkIndex;
601 samplesPerChunk = get<1>(*tableIterator);
602 }
603 if (m_chunkCount >= previousChunkIndex) {
604 addChunkSizeEntries(chunkSizes, m_chunkCount + 1 - previousChunkIndex, sampleIndex, samplesPerChunk, diag);
605 }
606 }
607 return chunkSizes;
608}
609
614std::unique_ptr<Mpeg4ElementaryStreamInfo> Mp4Track::parseMpeg4ElementaryStreamInfo(
615 CppUtilities::BinaryReader &reader, Mp4Atom *esDescAtom, Diagnostics &diag)
616{
617 static const string context("parsing MPEG-4 elementary stream descriptor");
618 using namespace Mpeg4ElementaryStreamObjectIds;
619 unique_ptr<Mpeg4ElementaryStreamInfo> esInfo;
620 if (esDescAtom->dataSize() >= 12) {
621 reader.stream()->seekg(static_cast<streamoff>(esDescAtom->dataOffset()));
622 // read version/flags
623 if (reader.readUInt32BE() != 0) {
624 diag.emplace_back(DiagLevel::Warning, "Unknown version/flags.", context);
625 }
626 // read extended descriptor
627 Mpeg4Descriptor esDesc(esDescAtom->container(), static_cast<std::uint64_t>(reader.stream()->tellg()), esDescAtom->dataSize() - 4);
628 try {
629 esDesc.parse(diag);
630 // check ID
632 diag.emplace_back(DiagLevel::Critical, "Invalid descriptor found.", context);
633 throw Failure();
634 }
635 // read stream info
636 reader.stream()->seekg(static_cast<streamoff>(esDesc.dataOffset()));
637 esInfo = make_unique<Mpeg4ElementaryStreamInfo>();
638 esInfo->id = reader.readUInt16BE();
639 esInfo->esDescFlags = reader.readByte();
640 if (esInfo->dependencyFlag()) {
641 esInfo->dependsOnId = reader.readUInt16BE();
642 }
643 if (esInfo->urlFlag()) {
644 esInfo->url = reader.readString(reader.readByte());
645 }
646 if (esInfo->ocrFlag()) {
647 esInfo->ocrId = reader.readUInt16BE();
648 }
649 for (Mpeg4Descriptor *esDescChild
650 = esDesc.denoteFirstChild(static_cast<std::uint32_t>(static_cast<std::uint64_t>(reader.stream()->tellg()) - esDesc.startOffset()));
651 esDescChild; esDescChild = esDescChild->nextSibling()) {
652 esDescChild->parse(diag);
653 switch (esDescChild->id()) {
655 // read decoder config descriptor
656 reader.stream()->seekg(static_cast<streamoff>(esDescChild->dataOffset()));
657 esInfo->objectTypeId = reader.readByte();
658 esInfo->decCfgDescFlags = reader.readByte();
659 esInfo->bufferSize = reader.readUInt24BE();
660 esInfo->maxBitrate = reader.readUInt32BE();
661 esInfo->averageBitrate = reader.readUInt32BE();
662 for (Mpeg4Descriptor *decCfgDescChild = esDescChild->denoteFirstChild(esDescChild->headerSize() + 13); decCfgDescChild;
663 decCfgDescChild = decCfgDescChild->nextSibling()) {
664 decCfgDescChild->parse(diag);
665 switch (decCfgDescChild->id()) {
667 // read decoder specific info
668 switch (esInfo->objectTypeId) {
669 case Aac:
670 case Mpeg2AacMainProfile:
671 case Mpeg2AacLowComplexityProfile:
672 case Mpeg2AacScaleableSamplingRateProfile:
673 case Mpeg2Audio:
674 case Mpeg1Audio:
675 esInfo->audioSpecificConfig
676 = parseAudioSpecificConfig(*reader.stream(), decCfgDescChild->dataOffset(), decCfgDescChild->dataSize(), diag);
677 break;
678 case Mpeg4Visual:
679 esInfo->videoSpecificConfig
680 = parseVideoSpecificConfig(reader, decCfgDescChild->dataOffset(), decCfgDescChild->dataSize(), diag);
681 break;
682 default:; // TODO: cover more object types
683 }
684 break;
685 }
686 }
687 break;
689 // uninteresting
690 break;
691 }
692 }
693 } catch (const Failure &) {
694 diag.emplace_back(DiagLevel::Critical, "The MPEG-4 descriptor element structure is invalid.", context);
695 }
696 } else {
697 diag.emplace_back(DiagLevel::Warning, "Elementary stream descriptor atom (esds) is truncated.", context);
698 }
699 return esInfo;
700}
701
706unique_ptr<Mpeg4AudioSpecificConfig> Mp4Track::parseAudioSpecificConfig(
707 istream &stream, std::uint64_t startOffset, std::uint64_t size, Diagnostics &diag)
708{
709 static const string context("parsing MPEG-4 audio specific config from elementary stream descriptor");
710 using namespace Mpeg4AudioObjectIds;
711 // read config into buffer and construct BitReader for bitwise reading
712 stream.seekg(static_cast<streamoff>(startOffset));
713 auto buff = make_unique<char[]>(size);
714 stream.read(buff.get(), static_cast<streamoff>(size));
715 BitReader bitReader(buff.get(), size);
716 auto audioCfg = make_unique<Mpeg4AudioSpecificConfig>();
717 try {
718 // read audio object type
719 auto getAudioObjectType = [&bitReader] {
720 std::uint8_t objType = bitReader.readBits<std::uint8_t>(5);
721 if (objType == 31) {
722 objType = 32 + bitReader.readBits<std::uint8_t>(6);
723 }
724 return objType;
725 };
726 audioCfg->audioObjectType = getAudioObjectType();
727 // read sampling frequency
728 if ((audioCfg->sampleFrequencyIndex = bitReader.readBits<std::uint8_t>(4)) == 0xF) {
729 audioCfg->sampleFrequency = bitReader.readBits<std::uint32_t>(24);
730 }
731 // read channel config
732 audioCfg->channelConfiguration = bitReader.readBits<std::uint8_t>(4);
733 // read extension header
734 switch (audioCfg->audioObjectType) {
735 case Sbr:
736 case Ps:
737 audioCfg->extensionAudioObjectType = audioCfg->audioObjectType;
738 audioCfg->sbrPresent = true;
739 if ((audioCfg->extensionSampleFrequencyIndex = bitReader.readBits<std::uint8_t>(4)) == 0xF) {
740 audioCfg->extensionSampleFrequency = bitReader.readBits<std::uint32_t>(24);
741 }
742 if ((audioCfg->audioObjectType = getAudioObjectType()) == ErBsac) {
743 audioCfg->extensionChannelConfiguration = bitReader.readBits<std::uint8_t>(4);
744 }
745 break;
746 }
747 switch (audioCfg->extensionAudioObjectType) {
748 case Ps:
749 audioCfg->psPresent = true;
750 audioCfg->extensionChannelConfiguration = Mpeg4ChannelConfigs::FrontLeftFrontRight;
751 break;
752 }
753 // read GA specific config
754 switch (audioCfg->audioObjectType) {
755 case AacMain:
756 case AacLc:
757 case AacLtp:
758 case AacScalable:
759 case TwinVq:
760 case ErAacLc:
761 case ErAacLtp:
762 case ErAacScalable:
763 case ErTwinVq:
764 case ErBsac:
765 case ErAacLd:
766 audioCfg->frameLengthFlag = bitReader.readBits<std::uint8_t>(1);
767 if ((audioCfg->dependsOnCoreCoder = bitReader.readBit())) {
768 audioCfg->coreCoderDelay = bitReader.readBits<std::uint8_t>(14);
769 }
770 audioCfg->extensionFlag = bitReader.readBit();
771 if (audioCfg->channelConfiguration == 0) {
772 throw NotImplementedException(); // TODO: parse program_config_element
773 }
774 switch (audioCfg->audioObjectType) {
775 case AacScalable:
776 case ErAacScalable:
777 audioCfg->layerNr = bitReader.readBits<std::uint8_t>(3);
778 break;
779 default:;
780 }
781 if (audioCfg->extensionFlag == 1) {
782 switch (audioCfg->audioObjectType) {
783 case ErBsac:
784 audioCfg->numOfSubFrame = bitReader.readBits<std::uint8_t>(5);
785 audioCfg->layerLength = bitReader.readBits<std::uint16_t>(11);
786 break;
787 case ErAacLc:
788 case ErAacLtp:
789 case ErAacScalable:
790 case ErAacLd:
791 audioCfg->resilienceFlags = bitReader.readBits<std::uint8_t>(3);
792 break;
793 default:;
794 }
795 if (bitReader.readBit() == 1) { // extension flag 3
796 throw NotImplementedException(); // TODO
797 }
798 }
799 break;
800 default:
801 throw NotImplementedException(); // TODO: cover remaining object types
802 }
803 // read error specific config
804 switch (audioCfg->audioObjectType) {
805 case ErAacLc:
806 case ErAacLtp:
807 case ErAacScalable:
808 case ErTwinVq:
809 case ErBsac:
810 case ErAacLd:
811 case ErCelp:
812 case ErHvxc:
813 case ErHiln:
814 case ErParametric:
815 case ErAacEld:
816 switch (audioCfg->epConfig = bitReader.readBits<std::uint8_t>(2)) {
817 case 2:
818 break;
819 case 3:
820 bitReader.skipBits(1);
821 break;
822 default:
823 throw NotImplementedException(); // TODO
824 }
825 break;
826 }
827 if (audioCfg->extensionAudioObjectType != Sbr && audioCfg->extensionAudioObjectType != Ps && bitReader.bitsAvailable() >= 16) {
828 std::uint16_t syncExtensionType = bitReader.readBits<std::uint16_t>(11);
829 if (syncExtensionType == 0x2B7) {
830 if ((audioCfg->extensionAudioObjectType = getAudioObjectType()) == Sbr) {
831 if ((audioCfg->sbrPresent = bitReader.readBit())) {
832 if ((audioCfg->extensionSampleFrequencyIndex = bitReader.readBits<std::uint8_t>(4)) == 0xF) {
833 audioCfg->extensionSampleFrequency = bitReader.readBits<std::uint32_t>(24);
834 }
835 if (bitReader.bitsAvailable() >= 12) {
836 if ((syncExtensionType = bitReader.readBits<std::uint16_t>(11)) == 0x548) {
837 audioCfg->psPresent = bitReader.readBits<std::uint8_t>(1);
838 }
839 }
840 }
841 } else if (audioCfg->extensionAudioObjectType == ErBsac) {
842 if ((audioCfg->sbrPresent = bitReader.readBit())) {
843 if ((audioCfg->extensionSampleFrequencyIndex = bitReader.readBits<std::uint8_t>(4)) == 0xF) {
844 audioCfg->extensionSampleFrequency = bitReader.readBits<std::uint32_t>(24);
845 }
846 }
847 audioCfg->extensionChannelConfiguration = bitReader.readBits<std::uint8_t>(4);
848 }
849 } else if (syncExtensionType == 0x548) {
850 audioCfg->psPresent = bitReader.readBit();
851 }
852 }
853 } catch (const NotImplementedException &) {
854 diag.emplace_back(DiagLevel::Information, "Not implemented for the format of audio track.", context);
855 } catch (const std::ios_base::failure &) {
856 if (stream.fail()) {
857 // IO error caused by input stream
858 throw;
859 } else {
860 // IO error caused by bitReader
861 diag.emplace_back(DiagLevel::Critical, "Audio specific configuration is truncated.", context);
862 }
863 }
864 return audioCfg;
865}
866
871std::unique_ptr<Mpeg4VideoSpecificConfig> Mp4Track::parseVideoSpecificConfig(
872 BinaryReader &reader, std::uint64_t startOffset, std::uint64_t size, Diagnostics &diag)
873{
874 static const string context("parsing MPEG-4 video specific config from elementary stream descriptor");
875 using namespace Mpeg4AudioObjectIds;
876 auto videoCfg = make_unique<Mpeg4VideoSpecificConfig>();
877 // seek to start
878 reader.stream()->seekg(static_cast<streamoff>(startOffset));
879 if (size > 3 && (reader.readUInt24BE() == 1)) {
880 size -= 3;
881 std::uint32_t buff1;
882 while (size) {
883 --size;
884 switch (reader.readByte()) { // read start code
886 if (size) {
887 videoCfg->profile = reader.readByte();
888 --size;
889 }
890 break;
892
893 break;
895 buff1 = 0;
896 while (size >= 3) {
897 if ((buff1 = reader.readUInt24BE()) != 1) {
898 reader.stream()->seekg(-2, ios_base::cur);
899 videoCfg->userData.push_back(static_cast<char>(buff1 >> 16));
900 --size;
901 } else {
902 size -= 3;
903 break;
904 }
905 }
906 if (buff1 != 1 && size > 0) {
907 videoCfg->userData += reader.readString(size);
908 size = 0;
909 }
910 break;
911 default:;
912 }
913 // skip remaining values to get the start of the next video object
914 while (size >= 3) {
915 if (reader.readUInt24BE() != 1) {
916 reader.stream()->seekg(-2, ios_base::cur);
917 --size;
918 } else {
919 size -= 3;
920 break;
921 }
922 }
923 }
924 } else {
925 diag.emplace_back(DiagLevel::Critical, "\"Visual Object Sequence Header\" not found.", context);
926 }
927 return videoCfg;
928}
929
947void Mp4Track::updateChunkOffsets(const vector<std::int64_t> &oldMdatOffsets, const vector<std::int64_t> &newMdatOffsets)
948{
949 if (!isHeaderValid() || !m_ostream || !m_istream || !m_stcoAtom) {
950 throw InvalidDataException();
951 }
952 if (oldMdatOffsets.size() == 0 || oldMdatOffsets.size() != newMdatOffsets.size()) {
953 throw InvalidDataException();
954 }
955 static const unsigned int stcoDataBegin = 8;
956 std::uint64_t startPos = m_stcoAtom->dataOffset() + stcoDataBegin;
957 std::uint64_t endPos = startPos + m_stcoAtom->dataSize() - stcoDataBegin;
958 m_istream->seekg(static_cast<streamoff>(startPos));
959 m_ostream->seekp(static_cast<streamoff>(startPos));
960 vector<std::int64_t>::size_type i;
961 vector<std::int64_t>::size_type size;
962 auto currentPos = static_cast<std::uint64_t>(m_istream->tellg());
963 switch (m_stcoAtom->id()) {
965 std::uint32_t off;
966 while ((currentPos + 4) <= endPos) {
967 off = m_reader.readUInt32BE();
968 for (i = 0, size = oldMdatOffsets.size(); i < size; ++i) {
969 if (off > static_cast<std::uint64_t>(oldMdatOffsets[i])) {
970 off += static_cast<std::uint32_t>(newMdatOffsets[i] - oldMdatOffsets[i]);
971 break;
972 }
973 }
974 m_ostream->seekp(static_cast<streamoff>(currentPos));
975 m_writer.writeUInt32BE(off);
976 currentPos += static_cast<std::uint64_t>(m_istream->gcount());
977 }
978 break;
979 }
981 std::uint64_t off;
982 while ((currentPos + 8) <= endPos) {
983 off = m_reader.readUInt64BE();
984 for (i = 0, size = oldMdatOffsets.size(); i < size; ++i) {
985 if (off > static_cast<std::uint64_t>(oldMdatOffsets[i])) {
986 off += static_cast<std::uint64_t>(newMdatOffsets[i] - oldMdatOffsets[i]);
987 break;
988 }
989 }
990 m_ostream->seekp(static_cast<streamoff>(currentPos));
991 m_writer.writeUInt64BE(off);
992 currentPos += static_cast<std::uint64_t>(m_istream->gcount());
993 }
994 break;
995 }
996 default:
997 throw InvalidDataException();
998 }
999}
1000
1014void Mp4Track::updateChunkOffsets(const std::vector<std::uint64_t> &chunkOffsets)
1015{
1016 if (!isHeaderValid() || !m_ostream || !m_istream || !m_stcoAtom) {
1017 throw InvalidDataException();
1018 }
1019 if (chunkOffsets.size() != chunkCount()) {
1020 throw InvalidDataException();
1021 }
1022 m_ostream->seekp(static_cast<streamoff>(m_stcoAtom->dataOffset() + 8));
1023 switch (m_stcoAtom->id()) {
1025 for (auto offset : chunkOffsets) {
1026 m_writer.writeUInt32BE(static_cast<std::uint32_t>(offset));
1027 }
1028 break;
1030 for (auto offset : chunkOffsets) {
1031 m_writer.writeUInt64BE(offset);
1032 }
1033 break;
1034 default:
1035 throw InvalidDataException();
1036 }
1037}
1038
1052void Mp4Track::updateChunkOffset(std::uint32_t chunkIndex, std::uint64_t offset)
1053{
1054 if (!isHeaderValid() || !m_istream || !m_stcoAtom || chunkIndex >= m_chunkCount) {
1055 throw InvalidDataException();
1056 }
1057 const auto offsetSize = m_stcoAtom->id() == Mp4AtomIds::ChunkOffset64 ? 8u : 4u;
1058 m_ostream->seekp(static_cast<streamoff>(m_stcoAtom->dataOffset() + 8 + offsetSize * chunkIndex));
1059 switch (m_stcoAtom->id()) {
1061 writer().writeUInt32BE(static_cast<std::uint32_t>(offset));
1062 break;
1064 writer().writeUInt64BE(offset);
1065 break;
1066 default:
1067 throw InvalidDataException();
1068 }
1069}
1070
1075{
1076 if (!avcConfig.spsInfos.empty()) {
1077 const SpsInfo &spsInfo = avcConfig.spsInfos.back();
1078 track.m_format.sub = spsInfo.profileIndication;
1079 track.m_version = static_cast<double>(spsInfo.levelIndication) / 10;
1080 track.m_cropping = spsInfo.cropping;
1081 track.m_pixelSize = spsInfo.pictureSize;
1082 switch (spsInfo.chromaFormatIndication) {
1083 case 0:
1084 track.m_chromaFormat = "monochrome";
1085 break;
1086 case 1:
1087 track.m_chromaFormat = "YUV 4:2:0";
1088 break;
1089 case 2:
1090 track.m_chromaFormat = "YUV 4:2:2";
1091 break;
1092 case 3:
1093 track.m_chromaFormat = "YUV 4:4:4";
1094 break;
1095 default:;
1096 }
1097 track.m_pixelAspectRatio = spsInfo.pixelAspectRatio;
1098 } else {
1099 track.m_format.sub = avcConfig.profileIndication;
1100 track.m_version = static_cast<double>(avcConfig.levelIndication) / 10;
1101 }
1102}
1103
1109{
1110 CPP_UTILITIES_UNUSED(av1Config)
1111 CPP_UTILITIES_UNUSED(track)
1113}
1114
1122{
1123 CPP_UTILITIES_UNUSED(diag)
1124
1125 if (m_tkhdAtom) {
1126 m_tkhdAtom->makeBuffer();
1127 }
1128 for (Mp4Atom *trakChild = m_trakAtom->firstChild(); trakChild; trakChild = trakChild->nextSibling()) {
1129 if (trakChild->id() == Mp4AtomIds::Media) {
1130 continue;
1131 }
1132 trakChild->makeBuffer();
1133 }
1134 if (m_minfAtom) {
1135 for (Mp4Atom *childAtom = m_minfAtom->firstChild(); childAtom; childAtom = childAtom->nextSibling()) {
1136 if (childAtom->id() == Mp4AtomIds::SampleTable) {
1137 continue;
1138 }
1139 childAtom->makeBuffer();
1140 }
1141 }
1142 if (m_stblAtom) {
1143 for (Mp4Atom *childAtom = m_stblAtom->firstChild(); childAtom; childAtom = childAtom->nextSibling()) {
1144 childAtom->makeBuffer();
1145 }
1146 }
1147}
1148
1153{
1154 CPP_UTILITIES_UNUSED(diag)
1155 // version + flags + entry count (also 32-bit in case of co64) + entry size
1156 auto size = 1u + 3u + 4ul + static_cast<std::uint64_t>(chunkOffsetSize()) * static_cast<std::uint64_t>(chunkCount());
1158 return size;
1159}
1160
1165std::tuple<std::uint64_t, std::uint64_t> Mp4Track::calculateSampleTableSize(Diagnostics &diag) const
1166{
1167 auto stblSize = std::uint64_t();
1168 auto stcoSize = std::uint64_t();
1169 auto writeChunkOffsetTableManually = false;
1170 if (m_stblAtom) {
1171 for (Mp4Atom *stblChildAtom = m_stblAtom->firstChild(); stblChildAtom; stblChildAtom = stblChildAtom->nextSibling()) {
1172 switch (stblChildAtom->id()) {
1174 if (m_chunkOffsetSize != 4) {
1175 writeChunkOffsetTableManually = true;
1176 continue;
1177 }
1178 break;
1180 if (m_chunkOffsetSize != 8) {
1181 writeChunkOffsetTableManually = true;
1182 continue;
1183 }
1184 break;
1185 }
1186 stblSize += stblChildAtom->totalSize();
1187 }
1188 }
1189 if (writeChunkOffsetTableManually) {
1190 stblSize += (stcoSize = chunkOffsetAtomSize(diag));
1191 }
1192 Mp4Atom::addHeaderSize(stblSize);
1193 return std::make_tuple(stblSize, stcoSize);
1194}
1195
1199std::uint64_t Mp4Track::requiredSize(Diagnostics &diag) const
1200{
1201 CPP_UTILITIES_UNUSED(diag)
1202
1203 const auto &info = verifyPresentTrackHeader();
1204 // add size of
1205 // ... trak header
1206 std::uint64_t size = 8;
1207 // ... tkhd atom
1208 size += info.requiredSize;
1209 // ... children beside tkhd and mdia
1210 for (Mp4Atom *trakChild = m_trakAtom->firstChild(); trakChild; trakChild = trakChild->nextSibling()) {
1211 if (trakChild->id() == Mp4AtomIds::Media || trakChild->id() == Mp4AtomIds::TrackHeader) {
1212 continue;
1213 }
1214 size += trakChild->totalSize();
1215 }
1216 // ... mdhd total size
1217 if (info.timingsVersion == 0) {
1218 // write version 0 where timing fields are 32-bit
1219 size += 32;
1220 } else {
1221 // write version 1 where timing fields are 64-bit
1222 size += 44;
1223 }
1224 // ... mdia header + hdlr total size + minf header
1225 size += 8 + (33 + m_name.size()) + 8;
1226 // ... minf children
1227 auto dinfAtomWritten = false;
1228 if (m_minfAtom) {
1229 for (Mp4Atom *childAtom = m_minfAtom->firstChild(); childAtom; childAtom = childAtom->nextSibling()) {
1230 switch (childAtom->id()) {
1232 size += std::get<0>(calculateSampleTableSize(diag));
1233 continue;
1234 }
1236 dinfAtomWritten = true;
1237 break;
1238 }
1239 size += childAtom->totalSize();
1240 }
1241 }
1242 if (!dinfAtomWritten) {
1243 // take 36 bytes for a self-made dinf atom into account if the file lacks one
1244 size += 36;
1245 }
1246 return size;
1247}
1248
1258{
1259 // write header
1260 ostream::pos_type trakStartOffset = outputStream().tellp();
1261 m_writer.writeUInt32BE(0); // write size later
1262 m_writer.writeUInt32BE(Mp4AtomIds::Track);
1263
1264 // write tkhd atom
1265 makeTrackHeader(diag);
1266
1267 // write children of trak atom except mdia
1268 for (Mp4Atom *trakChild = trakAtom().firstChild(); trakChild; trakChild = trakChild->nextSibling()) {
1269 if (trakChild->id() == Mp4AtomIds::Media || trakChild->id() == Mp4AtomIds::TrackHeader) {
1270 continue;
1271 }
1272 trakChild->copyPreferablyFromBuffer(outputStream(), diag, nullptr);
1273 }
1274
1275 // write mdia atom
1276 makeMedia(diag);
1277
1278 // write size (of trak atom)
1279 Mp4Atom::seekBackAndWriteAtomSize(outputStream(), trakStartOffset, diag);
1280}
1281
1287{
1288 // verify the existing track header to make the new one based on it (if possible)
1289 const auto &info = verifyPresentTrackHeader();
1290
1291 // add notifications in case the present track header could not be parsed
1292 if (info.versionUnknown) {
1293 diag.emplace_back(DiagLevel::Critical,
1294 argsToString("The version of the present \"tkhd\"-atom (", info.version, ") is unknown. Assuming version 1."),
1295 argsToString("making \"tkhd\"-atom of track ", m_id));
1296 }
1297 if (info.truncated) {
1298 diag.emplace_back(
1299 DiagLevel::Critical, argsToString("The present \"tkhd\"-atom is truncated."), argsToString("making \"tkhd\"-atom of track ", m_id));
1300 }
1301
1302 // make size and element ID
1303 if (info.requiredSize > numeric_limits<std::uint32_t>::max()) {
1304 writer().writeUInt32BE(1);
1305 writer().writeUInt32BE(Mp4AtomIds::TrackHeader);
1306 writer().writeUInt64BE(info.requiredSize);
1307 } else {
1308 writer().writeUInt32BE(static_cast<std::uint32_t>(info.requiredSize));
1309 writer().writeUInt32BE(Mp4AtomIds::TrackHeader);
1310 }
1311
1312 // make version and flags
1313 writer().writeByte(info.writeVersion);
1314 std::uint32_t flags = 0;
1315 if (isEnabled()) {
1316 flags |= 0x000001;
1317 }
1319 flags |= 0x000002;
1320 }
1322 flags |= 0x000004;
1323 }
1324 writer().writeUInt24BE(flags);
1325
1326 // make creation and modification time
1327 if (info.writeVersion != 0) {
1328 writer().writeUInt64BE(info.timings.tkhdCreationTime);
1329 writer().writeUInt64BE(info.timings.tkhdModificationTime);
1330 } else {
1331 writer().writeUInt32BE(static_cast<std::uint32_t>(info.timings.tkhdCreationTime));
1332 writer().writeUInt32BE(static_cast<std::uint32_t>(info.timings.tkhdModificationTime));
1333 }
1334
1335 // make track ID and duration
1336 writer().writeUInt32BE(static_cast<std::uint32_t>(m_id));
1337 writer().writeUInt32BE(0); // reserved
1338 if (info.writeVersion != 0) {
1339 writer().writeUInt64BE(info.timings.tkhdDuration);
1340 } else {
1341 writer().writeUInt32BE(static_cast<std::uint32_t>(info.timings.tkhdDuration));
1342 }
1343 writer().writeUInt32BE(0); // reserved
1344 writer().writeUInt32BE(0); // reserved
1345
1346 // make further values, either from existing tkhd atom or just some defaults
1347 if (info.canUseExisting) {
1348 // write all bytes after the previously determined additionalDataOffset
1349 m_ostream->write(m_tkhdAtom->buffer().get() + m_tkhdAtom->headerSize() + info.additionalDataOffset,
1350 static_cast<streamoff>(m_tkhdAtom->dataSize() - info.additionalDataOffset));
1351 // discard the buffer again if it wasn't present before
1352 if (info.discardBuffer) {
1353 m_tkhdAtom->discardBuffer();
1354 }
1355 } else {
1356 // write default values
1357 diag.emplace_back(DiagLevel::Warning, "Writing some default values because the existing tkhd atom is truncated.", "making tkhd atom");
1358 writer().writeInt16BE(0); // layer
1359 writer().writeInt16BE(0); // alternate group
1360 writer().writeFixed8BE(1.0); // volume (fixed 8.8 - 2 byte)
1361 writer().writeUInt16BE(0); // reserved
1362 for (const std::int32_t value : { 0x00010000, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000 }) { // unity matrix
1363 writer().writeInt32BE(value);
1364 }
1365 writer().writeFixed16BE(1.0); // width
1366 writer().writeFixed16BE(1.0); // height
1367 }
1368}
1369
1375{
1376 ostream::pos_type mdiaStartOffset = outputStream().tellp();
1377 writer().writeUInt32BE(0); // write size later
1378 writer().writeUInt32BE(Mp4AtomIds::Media);
1379 // write mdhd atom
1380 const auto &info = verifyPresentTrackHeader();
1381 const auto &timings = info.timings;
1382 const auto timingsVersion = timings.requiredMdhdVersion();
1383 writer().writeUInt32BE(timingsVersion != 0 ? 44 : 32); // size
1384 writer().writeUInt32BE(Mp4AtomIds::MediaHeader);
1385 writer().writeByte(timingsVersion); // version
1386 writer().writeUInt24BE(0); // flags
1387 if (timingsVersion != 0) {
1388 writer().writeUInt64BE(timings.mdhdCreationTime);
1389 writer().writeUInt64BE(timings.mdhdModificationTime);
1390 } else {
1391 writer().writeUInt32BE(static_cast<std::uint32_t>(timings.mdhdCreationTime));
1392 writer().writeUInt32BE(static_cast<std::uint32_t>(timings.mdhdModificationTime));
1393 }
1394 writer().writeUInt32BE(m_timeScale);
1395 if (timingsVersion != 0) {
1396 writer().writeUInt64BE(timings.mdhdDuration);
1397 } else {
1398 writer().writeUInt32BE(static_cast<std::uint32_t>(timings.mdhdDuration));
1399 }
1400 // convert and write language
1401 // note: Not using m_locale.abbreviatedName() here to preserve "und" (explicitly undefined).
1402 const auto *language = static_cast<const std::string *>(&LocaleDetail::getEmpty());
1403 for (const auto &detail : m_locale) {
1404 if (!detail.empty() && (detail.format == LocaleFormat::ISO_639_2_T || detail.format == LocaleFormat::Unknown)) {
1405 language = &detail;
1406 break;
1407 }
1408 }
1409 auto codedLanguage = static_cast<std::uint16_t>(0u);
1410 for (auto charIndex = static_cast<std::size_t>(0); charIndex != 3; ++charIndex) {
1411 const char langChar = charIndex < language->size() ? (*language)[charIndex] : 0;
1412 if (langChar >= 'a' && langChar <= 'z') {
1413 codedLanguage |= static_cast<std::uint16_t>((langChar - 0x60) << (0xA - charIndex * 0x5));
1414 continue;
1415 }
1416
1417 // handle invalid characters
1418 if (language->empty()) {
1419 // preserve null value (empty language field) which is not the same as "und" (explicitly undefined)
1420 codedLanguage = 0;
1421 break;
1422 }
1423 diag.emplace_back(DiagLevel::Warning, "Assigned language \"" % *language + "\" is of an invalid format. Setting language to undefined.",
1424 "making mdhd atom");
1425 codedLanguage = 0x55C4; // und(efined)
1426 break;
1427 }
1428 if (language->size() > 3) {
1429 diag.emplace_back(
1430 DiagLevel::Warning, "Assigned language \"" % *language + "\" is longer than 3 byte and hence will be truncated.", "making mdhd atom");
1431 }
1432 writer().writeUInt16BE(codedLanguage);
1433 writer().writeUInt16BE(0); // pre defined
1434 // write hdlr atom
1435 writer().writeUInt32BE(33 + static_cast<std::uint32_t>(m_name.size())); // size
1436 writer().writeUInt32BE(Mp4AtomIds::HandlerReference);
1437 writer().writeUInt64BE(0); // version, flags, pre defined
1438 switch (m_mediaType) {
1439 case MediaType::Video:
1440 outputStream().write("vide", 4);
1441 break;
1442 case MediaType::Audio:
1443 outputStream().write("soun", 4);
1444 break;
1445 case MediaType::Hint:
1446 outputStream().write("hint", 4);
1447 break;
1448 case MediaType::Text:
1449 outputStream().write("text", 4);
1450 break;
1451 case MediaType::Meta:
1452 outputStream().write("meta", 4);
1453 break;
1454 default:
1456 diag.emplace_back(DiagLevel::Critical, "Media type is invalid; keeping media type as-is.", "making hdlr atom");
1457 }
1458 writer().writeUInt32BE(m_rawMediaType);
1459 break;
1460 }
1461 for (int i = 0; i < 3; ++i)
1462 writer().writeUInt32BE(0); // reserved
1463 writer().writeTerminatedString(m_name);
1464 // write minf atom
1465 makeMediaInfo(diag);
1466 // write size (of mdia atom)
1467 Mp4Atom::seekBackAndWriteAtomSize(outputStream(), mdiaStartOffset, diag);
1468}
1469
1475{
1476 ostream::pos_type minfStartOffset = outputStream().tellp();
1477 writer().writeUInt32BE(0); // write size later
1478 writer().writeUInt32BE(Mp4AtomIds::MediaInformation);
1479 auto dinfAtomWritten = false;
1480 if (m_minfAtom) {
1481 // copy existing atoms as-is except sample table which is handled by makeSampleTable()
1482 for (Mp4Atom *childAtom = m_minfAtom->firstChild(); childAtom; childAtom = childAtom->nextSibling()) {
1483 switch (childAtom->id()) {
1485 makeSampleTable(diag);
1486 continue;
1488 dinfAtomWritten = true;
1489 break;
1490 }
1491 childAtom->copyPreferablyFromBuffer(outputStream(), diag, nullptr);
1492 }
1493 }
1494 // write dinf atom if not written yet
1495 if (!dinfAtomWritten) {
1496 writer().writeUInt32BE(36); // size
1497 writer().writeUInt32BE(Mp4AtomIds::DataInformation);
1498 // write dref atom
1499 writer().writeUInt32BE(28); // size
1500 writer().writeUInt32BE(Mp4AtomIds::DataReference);
1501 writer().writeUInt32BE(0); // version and flags
1502 writer().writeUInt32BE(1); // entry count
1503 // write url atom
1504 writer().writeUInt32BE(12); // size
1505 writer().writeUInt32BE(Mp4AtomIds::DataEntryUrl);
1506 writer().writeByte(0); // version
1507 writer().writeUInt24BE(0x000001); // flags (media data is in the same file as the movie box)
1508 }
1509 // write size (of minf atom)
1510 Mp4Atom::seekBackAndWriteAtomSize(outputStream(), minfStartOffset, diag);
1511}
1512
1518{
1519 // find existing stbl atom
1520 if (!m_stblAtom) {
1521 diag.emplace_back(DiagLevel::Critical,
1522 "Source track does not contain mandatory stbl atom and the tagparser lib is unable to make one from scratch.", "making stbl atom");
1523 return;
1524 }
1525
1526 // compute size and write header
1527 auto [stblSize, stcoSize] = calculateSampleTableSize(diag);
1529
1530 // write children
1531 for (auto *stblChildAtom = m_stblAtom->firstChild(); stblChildAtom; stblChildAtom = stblChildAtom->nextSibling()) {
1532 switch (stblChildAtom->id()) {
1535 if (stcoSize) {
1536 continue;
1537 }
1538 }
1539 stblChildAtom->copyPreferablyFromBuffer(outputStream(), diag, nullptr);
1540 }
1541
1542 // write chunk offset table
1543 if (!stcoSize) {
1544 return;
1545 }
1546 const auto chunkOffsets = readChunkOffsets(false, diag);
1548 Mp4Atom::makeHeader(stcoSize, atomId, writer());
1549 writer().writeUInt32BE(0); // version + flags
1550 writer().writeUInt32BE(static_cast<std::uint32_t>(chunkOffsets.size()));
1551 switch (chunkOffsetSize()) {
1552 case 4:
1553 for (const auto chunk : chunkOffsets) {
1554 writer().writeUInt32BE(static_cast<std::uint32_t>(chunk));
1555 }
1556 break;
1557 case 8:
1558 for (const auto chunk : chunkOffsets) {
1559 writer().writeUInt64BE(chunk);
1560 }
1561 break;
1562 }
1563}
1564
1566{
1567 CPP_UTILITIES_UNUSED(progress)
1568
1569 static const string context("parsing MP4 track");
1570 using namespace Mp4AtomIds;
1571 if (!m_trakAtom) {
1572 diag.emplace_back(DiagLevel::Critical, "\"trak\"-atom is null.", context);
1573 throw InvalidDataException();
1574 }
1575
1576 // get atoms
1577 try {
1578 if (!(m_tkhdAtom = m_trakAtom->childById(TrackHeader, diag))) {
1579 diag.emplace_back(DiagLevel::Critical, "No \"tkhd\"-atom found.", context);
1580 throw InvalidDataException();
1581 }
1582 if (!(m_mdiaAtom = m_trakAtom->childById(Media, diag))) {
1583 diag.emplace_back(DiagLevel::Critical, "No \"mdia\"-atom found.", context);
1584 throw InvalidDataException();
1585 }
1586 if (!(m_mdhdAtom = m_mdiaAtom->childById(MediaHeader, diag))) {
1587 diag.emplace_back(DiagLevel::Critical, "No \"mdhd\"-atom found.", context);
1588 throw InvalidDataException();
1589 }
1590 if (!(m_hdlrAtom = m_mdiaAtom->childById(HandlerReference, diag))) {
1591 diag.emplace_back(DiagLevel::Critical, "No \"hdlr\"-atom found.", context);
1592 throw InvalidDataException();
1593 }
1594 if (!(m_minfAtom = m_mdiaAtom->childById(MediaInformation, diag))) {
1595 diag.emplace_back(DiagLevel::Critical, "No \"minf\"-atom found.", context);
1596 throw InvalidDataException();
1597 }
1598 if (!(m_stblAtom = m_minfAtom->childById(SampleTable, diag))) {
1599 diag.emplace_back(DiagLevel::Critical, "No \"stbl\"-atom found.", context);
1600 throw InvalidDataException();
1601 }
1602 if (!(m_stsdAtom = m_stblAtom->childById(SampleDescription, diag))) {
1603 diag.emplace_back(DiagLevel::Critical, "No \"stsd\"-atom found.", context);
1604 throw InvalidDataException();
1605 }
1606 if (!(m_stcoAtom = m_stblAtom->childById(ChunkOffset, diag)) && !(m_stcoAtom = m_stblAtom->childById(ChunkOffset64, diag))) {
1607 diag.emplace_back(DiagLevel::Critical, "No \"stco\"/\"co64\"-atom found.", context);
1608 throw InvalidDataException();
1609 }
1610 if (!(m_stscAtom = m_stblAtom->childById(SampleToChunk, diag))) {
1611 diag.emplace_back(DiagLevel::Critical, "No \"stsc\"-atom found.", context);
1612 throw InvalidDataException();
1613 }
1614 if (!(m_stszAtom = m_stblAtom->childById(SampleSize, diag)) && !(m_stszAtom = m_stblAtom->childById(CompactSampleSize, diag))) {
1615 diag.emplace_back(DiagLevel::Critical, "No \"stsz\"/\"stz2\"-atom found.", context);
1616 throw InvalidDataException();
1617 }
1618 } catch (const Failure &) {
1619 diag.emplace_back(DiagLevel::Critical, "Unable to parse relevant atoms.", context);
1620 throw InvalidDataException();
1621 }
1622
1623 BinaryReader &reader = m_trakAtom->reader();
1624
1625 // read tkhd atom
1626 m_istream->seekg(static_cast<streamoff>(m_tkhdAtom->startOffset() + 8)); // seek to beg, skip size and name
1627 auto atomVersion = reader.readByte(); // read version
1628 const auto flags = reader.readUInt24BE();
1629 modFlagEnum(m_flags, TrackFlags::Enabled, flags & 0x000001);
1630 modFlagEnum(m_flags, TrackFlags::UsedInPresentation, flags & 0x000002);
1631 modFlagEnum(m_flags, TrackFlags::UsedWhenPreviewing, flags & 0x000004);
1632 switch (atomVersion) {
1633 case 0:
1634 m_rawTkhdCreationTime = reader.readUInt32BE();
1635 m_rawTkhdModificationTime = reader.readUInt32BE();
1636 m_id = reader.readUInt32BE();
1637 m_istream->seekg(4, std::ios_base::cur);
1638 m_rawTkhdDuration = reader.readUInt32BE();
1639 break;
1640 case 1:
1641 m_rawTkhdCreationTime = reader.readUInt64BE();
1642 m_rawTkhdModificationTime = reader.readUInt64BE();
1643 m_id = reader.readUInt32BE();
1644 m_istream->seekg(4, std::ios_base::cur);
1645 m_rawTkhdDuration = reader.readUInt64BE();
1646 break;
1647 default:
1648 diag.emplace_back(DiagLevel::Critical,
1649 "Version of \"tkhd\"-atom not supported. It will be ignored. Track ID, creation time and modification time might not be be determined.",
1650 context);
1651 m_rawTkhdCreationTime = m_rawTkhdModificationTime = m_rawTkhdDuration = 0;
1654 m_id = 0;
1655 }
1656
1657 // read mdhd atom
1658 m_istream->seekg(static_cast<streamoff>(m_mdhdAtom->dataOffset())); // seek to beg, skip size and name
1659 atomVersion = reader.readByte(); // read version
1660 m_istream->seekg(3, ios_base::cur); // skip flags
1661 switch (atomVersion) {
1662 case 0:
1663 m_rawMdhdCreationTime = reader.readUInt32BE();
1664 m_rawMdhdModificationTime = reader.readUInt32BE();
1665 m_timeScale = reader.readUInt32BE();
1666 m_rawMdhdDuration = reader.readUInt32BE();
1667 break;
1668 case 1:
1669 m_rawMdhdCreationTime = reader.readUInt64BE();
1670 m_rawMdhdModificationTime = reader.readUInt64BE();
1671 m_timeScale = reader.readUInt32BE();
1672 m_rawMdhdDuration = reader.readUInt64BE();
1673 break;
1674 default:
1675 diag.emplace_back(DiagLevel::Warning,
1676 "Version of \"mdhd\"-atom not supported. It will be ignored. Creation time, modification time, time scale and duration might not be "
1677 "determined.",
1678 context);
1679 m_rawMdhdCreationTime = m_rawMdhdModificationTime = m_rawMdhdDuration = 0;
1680 m_timeScale = 0;
1681 m_duration = TimeSpan();
1682 }
1683 m_creationTime = Mp4Container::epoch + TimeSpan::fromSeconds(static_cast<TimeSpan::TickType>(m_rawMdhdCreationTime));
1684 m_modificationTime = Mp4Container::epoch + TimeSpan::fromSeconds(static_cast<TimeSpan::TickType>(m_rawMdhdModificationTime));
1685 m_duration = TimeSpan::fromSeconds(static_cast<TimeSpan::TickType>(m_rawMdhdDuration)) / static_cast<TimeSpan::TickType>(m_timeScale);
1686
1687 std::uint16_t tmp = reader.readUInt16BE();
1688 if (tmp) {
1689 const char buff[] = {
1690 static_cast<char>(((tmp & 0x7C00) >> 0xA) + 0x60),
1691 static_cast<char>(((tmp & 0x03E0) >> 0x5) + 0x60),
1692 static_cast<char>(((tmp & 0x001F) >> 0x0) + 0x60),
1693 };
1694 m_locale.emplace_back(std::string(buff, 3), LocaleFormat::ISO_639_2_T);
1695 } else {
1696 m_locale.clear();
1697 }
1698
1699 // read hdlr atom
1700 // -> seek to begin skipping size, name, version, flags and reserved bytes
1701 m_istream->seekg(static_cast<streamoff>(m_hdlrAtom->dataOffset() + 8));
1702 // -> track type
1703 switch (m_rawMediaType = reader.readUInt32BE()) {
1704 case 0x76696465:
1706 break;
1707 case 0x736F756E:
1709 break;
1710 case 0x68696E74:
1712 break;
1713 case 0x6D657461:
1715 break;
1716 case 0x74657874:
1718 break;
1719 default:
1721 }
1722 // -> name
1723 m_istream->seekg(12, ios_base::cur); // skip reserved bytes
1724 if (static_cast<std::uint64_t>(tmp = static_cast<std::uint8_t>(m_istream->peek())) == m_hdlrAtom->dataSize() - 12 - 4 - 8 - 1) {
1725 // assume size prefixed string (seems to appear in QuickTime files)
1726 m_istream->seekg(1, ios_base::cur);
1727 m_name = reader.readString(tmp);
1728 } else {
1729 // assume null terminated string (appears in MP4 files)
1730 m_name = reader.readTerminatedString(m_hdlrAtom->dataSize() - 12 - 4 - 8, 0);
1731 }
1732
1733 // read stco atom (only chunk count)
1734 m_chunkOffsetSize = (m_stcoAtom->id() == Mp4AtomIds::ChunkOffset64) ? 8 : 4;
1735 m_istream->seekg(static_cast<streamoff>(m_stcoAtom->dataOffset() + 4));
1736 m_chunkCount = reader.readUInt32BE();
1737
1738 // read stsd atom
1739 m_istream->seekg(static_cast<streamoff>(m_stsdAtom->dataOffset() + 4)); // seek to beg, skip size, name, version and flags
1740 const auto entryCount = reader.readUInt32BE();
1741 Mp4Atom *esDescParentAtom = nullptr;
1742 if (entryCount) {
1743 try {
1744 for (Mp4Atom *codecConfigContainerAtom = m_stsdAtom->firstChild(); codecConfigContainerAtom;
1745 codecConfigContainerAtom = codecConfigContainerAtom->nextSibling()) {
1746 codecConfigContainerAtom->parse(diag);
1747
1748 // parse FOURCC
1749 m_formatId = interpretIntegerAsString<std::uint32_t>(codecConfigContainerAtom->id());
1750 m_format = FourccIds::fourccToMediaFormat(codecConfigContainerAtom->id());
1751
1752 // parse codecConfigContainerAtom
1753 m_istream->seekg(static_cast<streamoff>(codecConfigContainerAtom->dataOffset()));
1754 switch (codecConfigContainerAtom->id()) {
1757 case FourccIds::Amr:
1758 case FourccIds::Drms:
1759 case FourccIds::Alac:
1761 case FourccIds::Ac3:
1762 case FourccIds::EAc3:
1764 case FourccIds::Dts:
1765 case FourccIds::DtsH:
1766 case FourccIds::DtsE:
1767 case FourccIds::Flac:
1768 case FourccIds::Opus:
1769 m_istream->seekg(6 + 2, ios_base::cur); // skip reserved bytes, data reference index
1770 tmp = reader.readUInt16BE(); // read sound version
1771 m_istream->seekg(6, ios_base::cur);
1772 m_channelCount = reader.readUInt16BE();
1773 m_bitsPerSample = reader.readUInt16BE();
1774 m_istream->seekg(4, ios_base::cur); // skip reserved bytes (again)
1775 if (!m_samplingFrequency) {
1776 m_samplingFrequency = reader.readUInt32BE() >> 16;
1777 if (codecConfigContainerAtom->id() != FourccIds::DolbyMpl) {
1778 m_samplingFrequency >>= 16;
1779 }
1780 } else {
1781 m_istream->seekg(4, ios_base::cur);
1782 }
1783 if (codecConfigContainerAtom->id() != FourccIds::WindowsMediaAudio) {
1784 switch (tmp) {
1785 case 1:
1786 codecConfigContainerAtom->denoteFirstChild(codecConfigContainerAtom->headerSize() + 28 + 16);
1787 break;
1788 case 2:
1789 codecConfigContainerAtom->denoteFirstChild(codecConfigContainerAtom->headerSize() + 28 + 32);
1790 break;
1791 default:
1792 codecConfigContainerAtom->denoteFirstChild(codecConfigContainerAtom->headerSize() + 28);
1793 }
1794 if (!esDescParentAtom) {
1795 esDescParentAtom = codecConfigContainerAtom;
1796 }
1797 }
1798 break;
1802 case FourccIds::Avc1:
1803 case FourccIds::Avc2:
1804 case FourccIds::Avc3:
1805 case FourccIds::Avc4:
1806 case FourccIds::Drmi:
1807 case FourccIds::Hevc1:
1808 case FourccIds::Hevc2:
1809 case FourccIds::Av1_IVF:
1811 case FourccIds::Vp9_2:
1812 m_istream->seekg(6 + 2 + 16, ios_base::cur); // skip reserved bytes, data reference index, and reserved bytes (again)
1813 m_pixelSize.setWidth(reader.readUInt16BE());
1814 m_pixelSize.setHeight(reader.readUInt16BE());
1815 m_resolution.setWidth(static_cast<std::uint32_t>(reader.readFixed16BE()));
1816 m_resolution.setHeight(static_cast<std::uint32_t>(reader.readFixed16BE()));
1817 m_istream->seekg(4, ios_base::cur); // skip reserved bytes
1818 m_framesPerSample = reader.readUInt16BE();
1819 tmp = reader.readByte();
1820 m_compressorName = reader.readString(31);
1821 if (tmp == 0) {
1822 m_compressorName.clear();
1823 } else if (tmp < 32) {
1824 m_compressorName.resize(tmp);
1825 }
1826 m_depth = reader.readUInt16BE(); // 24: color without alpha
1827 codecConfigContainerAtom->denoteFirstChild(codecConfigContainerAtom->headerSize() + 78);
1828 if (!esDescParentAtom) {
1829 esDescParentAtom = codecConfigContainerAtom;
1830 }
1831 break;
1833 // skip reserved bytes and data reference index
1834 codecConfigContainerAtom->denoteFirstChild(codecConfigContainerAtom->headerSize() + 8);
1835 if (!esDescParentAtom) {
1836 esDescParentAtom = codecConfigContainerAtom;
1837 }
1838 break;
1840 break; // TODO
1842 break; // TODO
1843 default:;
1844 }
1845 }
1846
1847 if (esDescParentAtom) {
1848 // parse AVC configuration
1849 if (auto *const avcConfigAtom = esDescParentAtom->childById(Mp4AtomIds::AvcConfiguration, diag)) {
1850 m_istream->seekg(static_cast<streamoff>(avcConfigAtom->dataOffset()));
1851 m_avcConfig = make_unique<TagParser::AvcConfiguration>();
1852 try {
1853 m_avcConfig->parse(reader, avcConfigAtom->dataSize(), diag);
1854 addInfo(*m_avcConfig, *this);
1855 } catch (const TruncatedDataException &) {
1856 diag.emplace_back(DiagLevel::Critical, "AVC configuration is truncated.", context);
1857 } catch (const Failure &) {
1858 diag.emplace_back(DiagLevel::Critical, "AVC configuration is invalid.", context);
1859 }
1860 }
1861
1862 // parse AV1 configuration
1863 if (auto *const av1ConfigAtom = esDescParentAtom->childById(Mp4AtomIds::Av1Configuration, diag)) {
1864 m_istream->seekg(static_cast<streamoff>(av1ConfigAtom->dataOffset()));
1865 m_av1Config = make_unique<TagParser::Av1Configuration>();
1866 try {
1867 m_av1Config->parse(reader, av1ConfigAtom->dataSize(), diag);
1868 addInfo(*m_av1Config, *this);
1869 } catch (const NotImplementedException &) {
1870 diag.emplace_back(DiagLevel::Information, "Parsing AV1 configuration is not supported yet.", context);
1871 } catch (const TruncatedDataException &) {
1872 diag.emplace_back(DiagLevel::Critical, "AV1 configuration is truncated.", context);
1873 } catch (const Failure &) {
1874 diag.emplace_back(DiagLevel::Critical, "AV1 configuration is invalid.", context);
1875 }
1876 }
1877
1878 // parse MPEG-4 elementary stream descriptor
1879 auto *esDescAtom = esDescParentAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor, diag);
1880 if (!esDescAtom) {
1881 esDescAtom = esDescParentAtom->childById(Mp4FormatExtensionIds::Mpeg4ElementaryStreamDescriptor2, diag);
1882 }
1883 if (esDescAtom) {
1884 try {
1885 if ((m_esInfo = parseMpeg4ElementaryStreamInfo(m_reader, esDescAtom, diag))) {
1887 m_bitrate = static_cast<double>(m_esInfo->averageBitrate) / 1000;
1888 m_maxBitrate = static_cast<double>(m_esInfo->maxBitrate) / 1000;
1889 if (m_esInfo->audioSpecificConfig) {
1890 // check the audio specific config for useful information
1891 m_format += Mpeg4AudioObjectIds::idToMediaFormat(m_esInfo->audioSpecificConfig->audioObjectType,
1892 m_esInfo->audioSpecificConfig->sbrPresent, m_esInfo->audioSpecificConfig->psPresent);
1893 if (m_esInfo->audioSpecificConfig->sampleFrequencyIndex == 0xF) {
1894 m_samplingFrequency = m_esInfo->audioSpecificConfig->sampleFrequency;
1895 } else if (m_esInfo->audioSpecificConfig->sampleFrequencyIndex < sizeof(mpeg4SamplingFrequencyTable)) {
1896 m_samplingFrequency = mpeg4SamplingFrequencyTable[m_esInfo->audioSpecificConfig->sampleFrequencyIndex];
1897 } else {
1898 diag.emplace_back(DiagLevel::Warning, "Audio specific config has invalid sample frequency index.", context);
1899 }
1900 if (m_esInfo->audioSpecificConfig->extensionSampleFrequencyIndex == 0xF) {
1901 m_extensionSamplingFrequency = m_esInfo->audioSpecificConfig->extensionSampleFrequency;
1902 } else if (m_esInfo->audioSpecificConfig->extensionSampleFrequencyIndex < sizeof(mpeg4SamplingFrequencyTable)) {
1904 = mpeg4SamplingFrequencyTable[m_esInfo->audioSpecificConfig->extensionSampleFrequencyIndex];
1905 } else {
1906 diag.emplace_back(
1907 DiagLevel::Warning, "Audio specific config has invalid extension sample frequency index.", context);
1908 }
1909 m_channelConfig = m_esInfo->audioSpecificConfig->channelConfiguration;
1910 m_extensionChannelConfig = m_esInfo->audioSpecificConfig->extensionChannelConfiguration;
1911 }
1912 if (m_esInfo->videoSpecificConfig) {
1913 // check the video specific config for useful information
1914 if (m_format.general == GeneralMediaFormat::Mpeg4Video && m_esInfo->videoSpecificConfig->profile) {
1915 m_format.sub = m_esInfo->videoSpecificConfig->profile;
1916 if (!m_esInfo->videoSpecificConfig->userData.empty()) {
1917 m_formatId += " / ";
1918 m_formatId += m_esInfo->videoSpecificConfig->userData;
1919 }
1920 }
1921 }
1922 // check the stream data for missing information
1923 switch (m_format.general) {
1926 MpegAudioFrame frame;
1927 m_istream->seekg(static_cast<streamoff>(m_stcoAtom->dataOffset() + 8));
1928 m_istream->seekg(static_cast<streamoff>(m_chunkOffsetSize == 8 ? reader.readUInt64BE() : reader.readUInt32BE()));
1929 frame.parseHeader(reader, diag);
1930 MpegAudioFrameStream::addInfo(frame, *this);
1931 break;
1932 }
1933 default:;
1934 }
1935 }
1936 } catch (const Failure &) {
1937 }
1938 }
1939 }
1940 } catch (const Failure &) {
1941 diag.emplace_back(DiagLevel::Critical, "Unable to parse child atoms of \"stsd\"-atom.", context);
1942 }
1943 }
1944
1945 // read stsz atom which holds the sample size table
1946 m_sampleSizes.clear();
1947 m_size = m_sampleCount = 0;
1948 std::uint64_t actualSampleSizeTableSize = m_stszAtom->dataSize();
1949 if (actualSampleSizeTableSize < 12) {
1950 diag.emplace_back(DiagLevel::Critical,
1951 "The stsz atom is truncated. There are no sample sizes present. The size of the track can not be determined.", context);
1952 } else {
1953 actualSampleSizeTableSize -= 12; // subtract size of version and flags
1954 m_istream->seekg(static_cast<streamoff>(m_stszAtom->dataOffset() + 4)); // seek to beg, skip size, name, version and flags
1955 std::uint32_t fieldSize;
1956 std::uint32_t constantSize;
1957 if (m_stszAtom->id() == Mp4AtomIds::CompactSampleSize) {
1958 constantSize = 0;
1959 m_istream->seekg(3, ios_base::cur); // seek reserved bytes
1960 fieldSize = reader.readByte();
1961 m_sampleCount = reader.readUInt32BE();
1962 } else {
1963 constantSize = reader.readUInt32BE();
1964 m_sampleCount = reader.readUInt32BE();
1965 fieldSize = 32;
1966 }
1967 if (constantSize) {
1968 m_sampleSizes.push_back(constantSize);
1969 m_size = constantSize * m_sampleCount;
1970 } else {
1971 auto actualSampleCount = m_sampleCount;
1972 const auto calculatedSampleSizeTableSize
1973 = static_cast<std::uint64_t>(std::ceil((0.125 * fieldSize) * static_cast<double>(m_sampleCount)));
1974 if (calculatedSampleSizeTableSize < actualSampleSizeTableSize) {
1975 diag.emplace_back(
1976 DiagLevel::Critical, "The stsz atom stores more entries as denoted. The additional entries will be ignored.", context);
1977 } else if (calculatedSampleSizeTableSize > actualSampleSizeTableSize) {
1978 diag.emplace_back(DiagLevel::Critical, "The stsz atom is truncated. It stores less entries as denoted.", context);
1979 actualSampleCount = static_cast<std::uint64_t>(floor(static_cast<double>(actualSampleSizeTableSize) / (0.125 * fieldSize)));
1980 }
1981 m_sampleSizes.reserve(actualSampleCount);
1982 std::uint32_t i = 1;
1983 switch (fieldSize) {
1984 case 4:
1985 for (; i <= actualSampleCount; i += 2) {
1986 std::uint8_t val = reader.readByte();
1987 m_sampleSizes.push_back(val >> 4);
1988 m_sampleSizes.push_back(val & 0xF0);
1989 m_size += (val >> 4) + (val & 0xF0);
1990 }
1991 if (i <= actualSampleCount + 1) {
1992 m_sampleSizes.push_back(reader.readByte() >> 4);
1993 m_size += m_sampleSizes.back();
1994 }
1995 break;
1996 case 8:
1997 for (; i <= actualSampleCount; ++i) {
1998 m_sampleSizes.push_back(reader.readByte());
1999 m_size += m_sampleSizes.back();
2000 }
2001 break;
2002 case 16:
2003 for (; i <= actualSampleCount; ++i) {
2004 m_sampleSizes.push_back(reader.readUInt16BE());
2005 m_size += m_sampleSizes.back();
2006 }
2007 break;
2008 case 32:
2009 for (; i <= actualSampleCount; ++i) {
2010 m_sampleSizes.push_back(reader.readUInt32BE());
2011 m_size += m_sampleSizes.back();
2012 }
2013 break;
2014 default:
2015 diag.emplace_back(DiagLevel::Critical,
2016 "The fieldsize used to store the sample sizes is not supported. The sample count and size of the track can not be determined.",
2017 context);
2018 }
2019 }
2020 }
2021
2022 // no sample sizes found, search for trun atoms
2023 std::uint64_t totalDuration = 0;
2024 for (Mp4Atom *moofAtom = m_trakAtom->container().firstElement()->siblingByIdIncludingThis(MovieFragment, diag); moofAtom;
2025 moofAtom = moofAtom->siblingById(MovieFragment, diag)) {
2026 moofAtom->parse(diag);
2027 for (Mp4Atom *trafAtom = moofAtom->childById(TrackFragment, diag); trafAtom; trafAtom = trafAtom->siblingById(TrackFragment, diag)) {
2028 trafAtom->parse(diag);
2029 for (Mp4Atom *tfhdAtom = trafAtom->childById(TrackFragmentHeader, diag); tfhdAtom;
2030 tfhdAtom = tfhdAtom->siblingById(TrackFragmentHeader, diag)) {
2031 tfhdAtom->parse(diag);
2032 std::uint32_t calculatedDataSize = 0;
2033 if (tfhdAtom->dataSize() < calculatedDataSize) {
2034 diag.emplace_back(DiagLevel::Critical, "tfhd atom is truncated.", context);
2035 } else {
2036 m_istream->seekg(static_cast<streamoff>(tfhdAtom->dataOffset() + 1));
2037 std::uint32_t tfhdFlags = reader.readUInt24BE();
2038 if (m_id == reader.readUInt32BE()) { // check track ID
2039 if (tfhdFlags & 0x000001) { // base-data-offset present
2040 calculatedDataSize += 8;
2041 }
2042 if (tfhdFlags & 0x000002) { // sample-description-index present
2043 calculatedDataSize += 4;
2044 }
2045 if (tfhdFlags & 0x000008) { // default-sample-duration present
2046 calculatedDataSize += 4;
2047 }
2048 if (tfhdFlags & 0x000010) { // default-sample-size present
2049 calculatedDataSize += 4;
2050 }
2051 if (tfhdFlags & 0x000020) { // default-sample-flags present
2052 calculatedDataSize += 4;
2053 }
2054 //uint64 baseDataOffset = moofAtom->startOffset();
2055 //uint32 defaultSampleDescriptionIndex = 0;
2056 std::uint32_t defaultSampleDuration = 0;
2057 std::uint32_t defaultSampleSize = 0;
2058 //uint32 defaultSampleFlags = 0;
2059 if (tfhdAtom->dataSize() < calculatedDataSize) {
2060 diag.emplace_back(DiagLevel::Critical, "tfhd atom is truncated (presence of fields denoted).", context);
2061 } else {
2062 if (tfhdFlags & 0x000001) { // base-data-offset present
2063 //baseDataOffset = reader.readUInt64();
2064 m_istream->seekg(8, ios_base::cur);
2065 }
2066 if (tfhdFlags & 0x000002) { // sample-description-index present
2067 //defaultSampleDescriptionIndex = reader.readUInt32();
2068 m_istream->seekg(4, ios_base::cur);
2069 }
2070 if (tfhdFlags & 0x000008) { // default-sample-duration present
2071 defaultSampleDuration = reader.readUInt32BE();
2072 //m_istream->seekg(4, ios_base::cur);
2073 }
2074 if (tfhdFlags & 0x000010) { // default-sample-size present
2075 defaultSampleSize = reader.readUInt32BE();
2076 }
2077 if (tfhdFlags & 0x000020) { // default-sample-flags present
2078 //defaultSampleFlags = reader.readUInt32BE();
2079 m_istream->seekg(4, ios_base::cur);
2080 }
2081 }
2082 for (Mp4Atom *trunAtom = trafAtom->childById(TrackFragmentRun, diag); trunAtom;
2083 trunAtom = trunAtom->siblingById(TrackFragmentRun, diag)) {
2084 std::uint32_t trunCalculatedDataSize = 8;
2085 if (trunAtom->dataSize() < trunCalculatedDataSize) {
2086 diag.emplace_back(DiagLevel::Critical, "trun atom is truncated.", context);
2087 } else {
2088 m_istream->seekg(static_cast<streamoff>(trunAtom->dataOffset() + 1));
2089 std::uint32_t trunFlags = reader.readUInt24BE();
2090 std::uint32_t sampleCount = reader.readUInt32BE();
2092 if (trunFlags & 0x000001) { // data offset present
2093 trunCalculatedDataSize += 4;
2094 }
2095 if (trunFlags & 0x000004) { // first-sample-flags present
2096 trunCalculatedDataSize += 4;
2097 }
2098 std::uint32_t entrySize = 0;
2099 if (trunFlags & 0x000100) { // sample-duration present
2100 entrySize += 4;
2101 }
2102 if (trunFlags & 0x000200) { // sample-size present
2103 entrySize += 4;
2104 }
2105 if (trunFlags & 0x000400) { // sample-flags present
2106 entrySize += 4;
2107 }
2108 if (trunFlags & 0x000800) { // sample-composition-time-offsets present
2109 entrySize += 4;
2110 }
2111 trunCalculatedDataSize += entrySize * sampleCount;
2112 if (trunAtom->dataSize() < trunCalculatedDataSize) {
2113 diag.emplace_back(DiagLevel::Critical, "trun atom is truncated (presence of fields denoted).", context);
2114 } else {
2115 if (trunFlags & 0x000001) { // data offset present
2116 m_istream->seekg(4, ios_base::cur);
2117 //int32 dataOffset = reader.readInt32();
2118 }
2119 if (trunFlags & 0x000004) { // first-sample-flags present
2120 m_istream->seekg(4, ios_base::cur);
2121 }
2122 for (std::uint32_t i = 0; i < sampleCount; ++i) {
2123 if (trunFlags & 0x000100) { // sample-duration present
2124 totalDuration += reader.readUInt32BE();
2125 } else {
2126 totalDuration += defaultSampleDuration;
2127 }
2128 if (trunFlags & 0x000200) { // sample-size present
2129 m_sampleSizes.push_back(reader.readUInt32BE());
2130 m_size += m_sampleSizes.back();
2131 } else {
2132 m_size += defaultSampleSize;
2133 }
2134 if (trunFlags & 0x000400) { // sample-flags present
2135 m_istream->seekg(4, ios_base::cur);
2136 }
2137 if (trunFlags & 0x000800) { // sample-composition-time-offsets present
2138 m_istream->seekg(4, ios_base::cur);
2139 }
2140 }
2141 }
2142 }
2143 }
2144 if (m_sampleSizes.empty() && defaultSampleSize) {
2145 m_sampleSizes.push_back(defaultSampleSize);
2146 }
2147 }
2148 }
2149 }
2150 }
2151 }
2152
2153 // set duration from "trun-information" if the duration has not been determined yet
2154 if (m_duration.isNull() && totalDuration) {
2155 std::uint32_t timeScale = m_timeScale;
2156 if (!timeScale) {
2157 timeScale = trakAtom().container().timeScale();
2158 }
2159 if (timeScale) {
2160 m_duration = TimeSpan::fromSeconds(static_cast<double>(totalDuration) / static_cast<double>(timeScale));
2161 }
2162 }
2163
2164 // calculate average bitrate
2166 m_bitrate = (static_cast<double>(m_size) * 0.0078125) / m_duration.totalSeconds();
2167 }
2168
2169 // read stsc atom (only number of entries)
2170 m_istream->seekg(static_cast<streamoff>(m_stscAtom->dataOffset() + 4));
2171 m_sampleToChunkEntryCount = reader.readUInt32BE();
2172}
2173
2174} // namespace TagParser
The AbortableProgressFeedback class provides feedback about an ongoing operation via callbacks.
The AbstractTrack class parses and stores technical information about video, audio and other kinds of...
std::uint64_t size() const
Returns the size in bytes if known; otherwise returns 0.
std::uint32_t timeScale() const
Returns the time scale if known; otherwise returns 0.
std::uint8_t m_extensionChannelConfig
std::string_view m_chromaFormat
std::uint64_t startOffset() const
Returns the start offset of the track in the associated stream.
std::uint16_t m_bitsPerSample
std::istream & inputStream()
Returns the associated input stream.
bool isEnabled() const
Returns true if the track is marked as enabled; otherwise returns false.
std::uint64_t sampleCount() const
Returns the number of samples/frames if known; otherwise returns 0.
CppUtilities::BinaryReader & reader()
Returns a binary reader for the associated stream.
CppUtilities::TimeSpan m_duration
CppUtilities::BinaryReader m_reader
TrackFlags flags() const
Returns flags (various boolean properties) of this track.
CppUtilities::DateTime m_modificationTime
CppUtilities::BinaryWriter m_writer
bool isHeaderValid() const
Returns an indication whether the track header is valid.
std::ostream & outputStream()
Returns the associated output stream.
std::uint32_t m_extensionSamplingFrequency
CppUtilities::DateTime m_creationTime
std::uint32_t m_samplingFrequency
CppUtilities::BinaryWriter & writer()
Returns a binary writer for the associated stream.
The Diagnostics class is a container for DiagMessage.
The class inherits from std::exception and serves as base class for exceptions thrown by the elements...
std::uint64_t startOffset() const
Returns the start offset in the related stream.
void discardBuffer()
Discards buffered data.
const std::unique_ptr< char[]> & buffer()
Returns buffered data.
std::uint32_t headerSize() const
Returns the header size of the element in byte.
const IdentifierType & id() const
Returns the element ID.
ImplementationType * childById(const IdentifierType &id, Diagnostics &diag)
Returns the first child with the specified id.
ImplementationType * nextSibling()
Returns the next sibling of the element.
ImplementationType * denoteFirstChild(std::uint32_t offset)
Denotes the first child to start at the specified offset (relative to the start offset of this descri...
ImplementationType * firstChild()
Returns the first child of the element.
DataSizeType dataSize() const
Returns the data size of the element in byte.
void parse(Diagnostics &diag)
Parses the header information of the element which is read from the related stream at the start offse...
std::uint64_t dataOffset() const
Returns the data offset of the element in the related stream.
ContainerType & container()
Returns the related container.
CppUtilities::BinaryReader & reader()
Returns the related BinaryReader.
void makeBuffer()
Buffers the element (header and data).
ImplementationType * siblingById(const IdentifierType &id, Diagnostics &diag)
Returns the first sibling with the specified id.
The exception that is thrown when the data to be parsed or to be made seems invalid and therefore can...
GeneralMediaFormat general
The Mp4Atom class helps to parse MP4 files.
static constexpr void addHeaderSize(std::uint64_t &dataSize)
Adds the header size to the specified data size.
Definition mp4atom.h:81
static void seekBackAndWriteAtomSize(std::ostream &stream, const std::ostream::pos_type &startOffset, Diagnostics &diag)
This function helps to write the atom size after writing an atom to a stream.
Definition mp4atom.cpp:133
static void makeHeader(std::uint64_t size, std::uint32_t id, CppUtilities::BinaryWriter &writer)
Writes an MP4 atom header to the specified stream.
Definition mp4atom.cpp:171
static const CppUtilities::DateTime epoch
Dates within MP4 tracks are expressed as the number of seconds since this date.
Implementation of TagParser::AbstractTrack for the MP4 container.
static std::unique_ptr< Mpeg4VideoSpecificConfig > parseVideoSpecificConfig(CppUtilities::BinaryReader &reader, std::uint64_t startOffset, std::uint64_t size, Diagnostics &diag)
Parses the video specific configuration for the track.
Definition mp4track.cpp:871
static std::unique_ptr< Mpeg4ElementaryStreamInfo > parseMpeg4ElementaryStreamInfo(CppUtilities::BinaryReader &reader, Mp4Atom *esDescAtom, Diagnostics &diag)
Reads the MPEG-4 elementary stream descriptor for the track.
Definition mp4track.cpp:614
std::uint32_t chunkCount() const
Returns the number of chunks denoted by the stco/co64 atom.
Definition mp4track.h:254
std::vector< std::tuple< std::uint32_t, std::uint32_t, std::uint32_t > > readSampleToChunkTable(Diagnostics &diag)
Reads the sample to chunk table.
Definition mp4track.cpp:515
static void addInfo(const AvcConfiguration &avcConfig, AbstractTrack &track)
Adds the information from the specified avcConfig to the specified track.
std::vector< std::uint64_t > readChunkSizes(TagParser::Diagnostics &diag)
Reads the chunk sizes from the stsz (sample sizes) and stsc (samples per chunk) atom.
Definition mp4track.cpp:564
void updateChunkOffsets(const std::vector< std::int64_t > &oldMdatOffsets, const std::vector< std::int64_t > &newMdatOffsets)
Updates the chunk offsets of the track.
Definition mp4track.cpp:947
void internalParseHeader(Diagnostics &diag, AbortableProgressFeedback &progress) override
This method is internally called to parse header information.
void makeSampleTable(Diagnostics &diag)
Makes the sample table (stbl atom) for the track.
std::uint64_t requiredSize(Diagnostics &diag) const
Returns the number of bytes written when calling makeTrack().
TrackType type() const override
Returns the type of the track if known; otherwise returns TrackType::Unspecified.
Definition mp4track.cpp:187
std::uint32_t sampleToChunkEntryCount() const
Returns the number of "sample to chunk" entries within the stsc atom.
Definition mp4track.h:262
void makeMedia(Diagnostics &diag)
Makes the media information (mdia atom) for the track.
std::uint64_t chunkOffsetAtomSize(Diagnostics &diag) const
Returns the size of the stco/co64 atom for this track based on the parsed/assigned chunkOffsetSize() ...
std::vector< std::uint64_t > readChunkOffsets(bool parseFragments, Diagnostics &diag)
Reads the chunk offsets from the stco atom and fragments if parseFragments is true.
Definition mp4track.cpp:202
unsigned int chunkOffsetSize() const
Returns the size of a single chunk offset denotation within the stco/co64 atom.
Definition mp4track.h:237
~Mp4Track() override
Destroys the track.
Definition mp4track.cpp:183
void makeTrackHeader(Diagnostics &diag)
Makes the track header (tkhd atom) for the track.
void bufferTrackAtoms(Diagnostics &diag)
Buffers all atoms required by the makeTrack() method.
void makeMediaInfo(Diagnostics &diag)
Makes a media information (minf atom) for the track.
Mp4Atom & trakAtom()
Returns the trak atom for the current instance.
Definition mp4track.h:212
void makeTrack(Diagnostics &diag)
Makes the track entry ("trak"-atom) for the track.
static std::unique_ptr< Mpeg4AudioSpecificConfig > parseAudioSpecificConfig(std::istream &stream, std::uint64_t startOffset, std::uint64_t size, Diagnostics &diag)
Parses the audio specific configuration for the track.
Definition mp4track.cpp:706
void updateChunkOffset(std::uint32_t chunkIndex, std::uint64_t offset)
Updates a particular chunk offset.
The Mpeg4Descriptor class helps to parse MPEG-4 descriptors.
static void addInfo(const MpegAudioFrame &frame, AbstractTrack &track)
Adds the information from the specified frame to the specified track.
The MpegAudioFrame class is used to parse MPEG audio frames.
void parseHeader(CppUtilities::BinaryReader &reader, Diagnostics &diag)
Parses the header read using the specified reader.
This exception is thrown when the an operation is invoked that has not been implemented yet.
void setWidth(std::uint32_t value)
Sets the width.
Definition size.h:76
void setHeight(std::uint32_t value)
Sets the height.
Definition size.h:84
The exception that is thrown when the data to be parsed is truncated and therefore can not be parsed ...
TAG_PARSER_EXPORT MediaFormat fourccToMediaFormat(std::uint32_t fourccId)
Definition mp4ids.cpp:51
TAG_PARSER_EXPORT MediaFormat idToMediaFormat(std::uint8_t mpeg4AudioObjectId, bool sbrPresent=false, bool psPresent=false)
Definition mp4ids.cpp:373
TAG_PARSER_EXPORT MediaFormat streamObjectTypeFormat(std::uint8_t streamObjectTypeId)
Returns the TagParser::MediaFormat denoted by the specified MPEG-4 stream ID.
Definition mp4ids.cpp:221
Contains all classes and functions of the TagInfo library.
Definition aaccodebook.h:10
std::uint32_t mpeg4SamplingFrequencyTable[13]
Definition mp4ids.cpp:429
TrackType
The TrackType enum specifies the underlying file type of a track and the concrete class of the track ...
The Av1Configuration struct provides a parser for AV1 configuration found in ISOBMFF files.
The AvcConfiguration struct provides a parser for AVC configuration.
std::vector< SpsInfo > spsInfos
static const LocaleDetail & getEmpty()
Returns an empty LocaleDetail.
The Mp4Timings struct holds timing values found in multiple MP4 atoms.
Definition mp4track.cpp:34
std::uint64_t mdhdModificationTime
Definition mp4track.cpp:36
constexpr std::uint8_t requiredTkhdVersion() const
Definition mp4track.cpp:74
std::uint64_t tkhdModificationTime
Definition mp4track.cpp:36
std::uint64_t tkhdCreationTime
Definition mp4track.cpp:35
std::uint64_t tkhdDuration
Definition mp4track.cpp:37
std::uint64_t mdhdDuration
Definition mp4track.cpp:37
std::uint64_t mdhdCreationTime
Definition mp4track.cpp:35
constexpr std::uint8_t requiredMdhdVersion() const
Definition mp4track.cpp:82
The SpsInfo struct holds the sequence parameter set.
AspectRatio pixelAspectRatio
Definition avcinfo.h:87
std::uint8_t profileIndication
Definition avcinfo.h:74
std::uint8_t levelIndication
Definition avcinfo.h:76
ugolomb chromaFormatIndication
Definition avcinfo.h:77
The TrackHeaderInfo struct holds information about the present track header (tkhd atom) and informati...
Definition mp4track.cpp:48