Tag Parser 12.4.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
aacframe.h
Go to the documentation of this file.
1#ifndef TAG_PARSER_AACFRAME_H
2#define TAG_PARSER_AACFRAME_H
3
4// NOTE: The AAC parser is still WIP. It does not work yet and its API/ABI may change even in patch releases.
5
6#include "../global.h"
7
8#include <c++utilities/io/bitreader.h>
9
10#include <cstdint>
11#include <memory>
12
13namespace TagParser {
14
16
17class AdtsFrame;
18
19constexpr auto aacMaxChannels = 64;
20constexpr auto aacMaxSyntaxElements = 48;
21constexpr auto aacMaxWindowGroups = 8;
22constexpr auto aacMaxSfb = 51;
23constexpr auto aacMaxLtpSfb = 40;
24constexpr auto aacMaxltpSfbS = 8;
25constexpr auto aacInvalidSbrElement = 0xFF;
26constexpr auto aacNoTimeSlots960 = 15;
27constexpr auto aacNoTimeSlots = 16;
28constexpr auto aacSbrRate = 2;
29constexpr auto aacSbrM = 49;
30constexpr auto aacSbrMaxLe = 5;
31constexpr auto aacSbrMaxNtsrhfg = 40;
32
33using SbrHuffTab = const std::int8_t (*)[2];
34
35namespace AacSyntaxElementTypes {
36enum KnownTypes : std::uint8_t {
37 SingleChannelElement,
38 ChannelPairElement,
39 ChannelCouplingElement,
40 LowFrequencyElement,
41 DataStreamElement,
42 ProgramConfigElement,
43 FillElement,
44 EndOfFrame
45};
46}
47
48namespace AacIcsSequenceTypes {
49enum KnownTypes : std::uint8_t { OnlyLongSequence, LongStartSequence, EightShortSequence, LongStopSequence };
50}
51
52namespace AacScaleFactorTypes {
53enum KnownTypes : std::uint8_t {
54 ZeroHcb = 0,
55 FirstPairHcb = 5,
56 EscHcb = 11,
57 QuadLen = 4,
58 PairLen = 2,
59 NoiseHcb = 13,
60 IntensityHcb2 = 14,
61 IntensityHcb = 15
62};
63}
64
65namespace AacExtensionTypes {
66enum KnownTypes : std::uint8_t { Fill = 0, FillData = 1, DataElement = 2, DynamicRange = 11, SacData = 12, SbrData = 13, SbrDataCrc = 14 };
67}
68
69namespace BsFrameClasses {
70enum BsFrameClass : std::uint8_t { FixFix, FixVar, VarFix, VarVar };
71}
72
73namespace AacSbrExtensionIds {
74enum KnownIds : std::uint8_t { DrmParametricStereo = 0, Ps = 2 };
75}
76
77struct TAG_PARSER_EXPORT AacLtpInfo {
78 AacLtpInfo();
79 std::uint8_t lastBand;
80 std::uint8_t dataPresent;
81 std::uint16_t lag;
82 std::uint8_t lagUpdate;
83 std::uint8_t coef;
84 std::uint8_t longUsed[aacMaxLtpSfb];
85 std::uint8_t shortUsed[8];
86 std::uint8_t shortLagPresent[8];
87 std::uint8_t shortLag[8];
88};
89
90struct TAG_PARSER_EXPORT AacPredictorInfo {
91 AacPredictorInfo();
92 std::uint8_t maxSfb;
93 std::uint8_t reset;
94 std::uint8_t resetGroupNumber;
95 std::uint8_t predictionUsed[aacMaxSfb];
96};
97
98struct TAG_PARSER_EXPORT AacPulseInfo {
99 AacPulseInfo();
100 std::uint8_t count;
101 std::uint8_t startSfb;
102 std::uint8_t offset[4];
103 std::uint8_t amp[4];
104};
105
106struct TAG_PARSER_EXPORT AacTnsInfo {
107 AacTnsInfo();
108 std::uint8_t filt[8];
109 std::uint8_t coefRes[8];
110 std::uint8_t length[8][4];
111 std::uint8_t order[8][4];
112 std::uint8_t direction[8][4];
113 std::uint8_t coefCompress[8][4];
114 std::uint8_t coef[8][4][32];
115};
116
117struct TAG_PARSER_EXPORT AacSsrInfo {
118 AacSsrInfo();
119 std::uint8_t maxBand;
120 std::uint8_t adjustNum[4][8];
121 std::uint8_t alevcode[4][8][8];
122 std::uint8_t aloccode[4][8][8];
123};
124
125struct TAG_PARSER_EXPORT AacDrcInfo {
126 AacDrcInfo();
127 std::uint8_t present;
128 std::uint8_t bandCount;
129 std::uint8_t pceInstanceTag;
130 std::uint8_t excludedChannelsPresent;
131 std::uint8_t bandTop[17];
132 std::uint8_t progRefLevel;
133 std::uint8_t dynamicRangeSign[17];
134 std::uint8_t dynamicRangeControl[17];
135 std::uint8_t excludeMask[aacMaxChannels];
136 std::uint8_t additionalExcludedChannels[aacMaxChannels];
137};
138
139struct TAG_PARSER_EXPORT AacPsInfo {
140 AacPsInfo();
141 std::uint8_t headerRead;
142 std::uint8_t use34HybridBands;
143 std::uint8_t enableIID; // Inter-channel Intensity Difference
144 std::uint8_t iidMode;
145 std::uint8_t iidParCount;
146 std::uint8_t iidopdParCount;
147 // TODO
148};
149
150struct TAG_PARSER_EXPORT AacDrmPsInfo {
151 AacDrmPsInfo();
152 std::uint8_t headerRead;
153 std::uint8_t use34HybridBands;
154 std::uint8_t enableIID; // Inter-channel Intensity Difference
155 std::uint8_t iidMode;
156 std::uint8_t iidParCount;
157 std::uint8_t iidopdParCount;
158 // TODO
159};
160
161struct TAG_PARSER_EXPORT AacSbrInfo {
162 AacSbrInfo(std::uint8_t sbrElementType, std::uint16_t samplingFrequency, std::uint16_t frameLength, bool isDrm);
163
164 std::uint8_t aacElementId;
165 std::uint16_t samplingFrequency;
166
167 std::uint32_t maxAacLine;
168
169 std::uint8_t rate;
170 std::uint8_t justSeeked;
171 std::uint8_t ret;
172
173 std::uint8_t ampRes[2];
174
175 std::uint8_t k0;
176 std::uint8_t kx;
177 std::uint8_t m;
178 std::uint8_t nMaster;
179 std::uint8_t nHigh;
180 std::uint8_t nLow;
181 std::uint8_t nq;
182 std::uint8_t nl[4];
183 std::uint8_t n[2];
184
185 std::uint8_t fMaster[64];
186 std::uint8_t fTableRes[2][64];
187 std::uint8_t fTableNoise[64];
188 std::uint8_t fTableLim[4][64];
189 std::uint8_t fGroup[5][64];
190 std::uint8_t ng[5];
191
192 std::uint8_t tableMapKToG[64];
193
194 std::uint8_t absBordLead[2];
195 std::uint8_t absBordTrail[2];
196 std::uint8_t relLeadCount[2];
197 std::uint8_t relTrailCount[2];
198
199 std::uint8_t le[2];
200 std::uint8_t lePrev[2];
201 std::uint8_t lq[2];
202
203 std::uint8_t te[2][aacSbrMaxLe + 1];
204 std::uint8_t tq[2][3];
205 std::uint8_t f[2][aacSbrMaxLe + 1];
206 std::uint8_t fPrev[2];
207
208 //real_t *gTempPrev[2][5];
209 //real_t *qTempPrev[2][5];
210 //sbyte gqRingbufIndex[2];
211
212 std::int16_t e[2][64][aacSbrMaxLe];
213 std::int16_t ePrev[2][64];
214 //real_t eOrig[2][64][aacSbrMaxLe];
215 //real_t eCurr[2][64][aacSbrMaxLe];
216 std::int32_t q[2][64][2];
217 //real_t qDiv[2][64][2];
218 //real_t qDiv2[2][64][2];
219 std::int32_t qPrev[2][64];
220
221 std::int8_t la[2];
222 std::int8_t laPrev[2];
223
224 std::uint8_t bsInvfMode[2][aacSbrMaxLe];
225 std::uint8_t bsInvfModePrev[2][aacSbrMaxLe];
226 //real_t bwArray[2][64];
227 //real_t bwArrayPrev[2][64];
228
229 std::uint8_t noPatches;
230 std::uint8_t patchNoSubbands[64];
231 std::uint8_t patchStartSubband[64];
232
233 std::uint8_t bsAddHarmonic[2][64];
234 std::uint8_t bsAddHarmonicPrev[2][64];
235
236 std::uint16_t indexNoisePrev[2];
237 std::uint8_t psiIsPrev[2];
238
239 std::uint8_t bsStartFreqPrev;
240 std::uint8_t bsStopFreqPrev;
241 std::uint8_t bsXoverBandPrev;
242 std::uint8_t bsFreqScalePrev;
243 std::uint8_t bsAlterScalePrev;
244 std::uint8_t bsNoiseBandsPrev;
245
246 std::int8_t prevEnvIsShort[2];
247
248 std::int8_t kxPrev;
249 std::uint8_t bsco;
250 std::uint8_t bscoPrev;
251 std::uint8_t mPrev;
252 std::uint16_t frameLength;
253
254 std::uint8_t reset;
255 std::uint32_t frame;
256 std::uint32_t headerCount;
257
258 std::uint8_t idAac;
259 //qmfa_info *qmfa[2];
260 //qmfs_info *qmfs[2];
261
262 //qmf_t Xsbr[2][aacSbrMaxNtsrhfg][64];
263
264 std::uint8_t isDrmSbr;
265 std::shared_ptr<AacDrmPsInfo> drmPs;
266
267 std::uint8_t timeSlotsRateCount;
268 std::uint8_t timeSlotsCount;
269 std::uint8_t tHfGen;
270 std::uint8_t tHfAdj;
271
272 std::shared_ptr<AacPsInfo> ps;
273 std::uint8_t psUsed;
274 std::uint8_t psResetFlag;
275
276 std::uint8_t bsHeaderFlag;
277 std::uint8_t bsCrcFlag;
278 std::uint16_t bsSbrCrcBits;
279 std::uint8_t bsProtocolVersion;
280 std::uint8_t bsAmpRes;
281 std::uint8_t bsStartFreq;
282 std::uint8_t bsStopFreq;
283 std::uint8_t bsXoverBand;
284 std::uint8_t bsFreqScale;
285 std::uint8_t bsAlterScale;
286 std::uint8_t bsNoiseBands;
287 std::uint8_t bsLimiterBands;
288 std::uint8_t bsLimiterGains;
289 std::uint8_t bsInterpolFreq;
290 std::uint8_t bsSmoothingMode;
291 std::uint8_t bsSamplerateMode;
292 std::uint8_t bsAddHarmonicFlag[2];
293 std::uint8_t bsAddHarmonicFlagPrev[2];
294 std::uint8_t bsExtendedData;
295 std::uint8_t bsExtensionId;
296 std::uint8_t bsExtensionData;
297 std::uint8_t bsCoupling;
298 std::uint8_t bsFrameClass[2];
299 std::uint8_t bsRelBord[2][9];
300 std::uint8_t bsRelBord0[2][9];
301 std::uint8_t bsRelBord1[2][9];
302 std::uint8_t bsPointer[2];
303 std::uint8_t bsAbsBord0[2];
304 std::uint8_t bsAbsBord1[2];
305 std::uint8_t bsRelCount0[2];
306 std::uint8_t bsRelCount1[2];
307 std::uint8_t bsDfEnv[2][9];
308 std::uint8_t bsDfNoise[2][3];
309};
310
311struct TAG_PARSER_EXPORT AacProgramConfig {
312 AacProgramConfig();
313 std::uint8_t elementInstanceTag;
314 std::uint8_t objectType;
315 std::uint8_t samplingFrequencyIndex;
316 std::uint8_t frontChannelElementCount;
317 std::uint8_t sideChannelElementCount;
318 std::uint8_t backChannelElementCount;
319 std::uint8_t lfeChannelElementCount;
320 std::uint8_t assocDataElementCount;
321 std::uint8_t validCcElementCount;
322 std::uint8_t monoMixdownPresent;
323 std::uint8_t monoMixdownElementNumber;
324 std::uint8_t stereoMixdownPresent;
325 std::uint8_t stereoMixdownElementNumber;
326 std::uint8_t matrixMixdownIdxPresent;
327 std::uint8_t pseudoSurroundEnable;
328 std::uint8_t matrixMixdownIdx;
329 std::uint8_t frontElementIsCpe[16];
330 std::uint8_t frontElementTagSelect[16];
331 std::uint8_t sideElementIsCpe[16];
332 std::uint8_t sideElementTagSelect[16];
333 std::uint8_t backElementIsCpe[16];
334 std::uint8_t backElementTagSelect[16];
335 std::uint8_t lfeElementTagSelect[16];
336 std::uint8_t assocDataElementTagSelect[16];
337 std::uint8_t ccElementIsIndSw[16];
338 std::uint8_t validCcElementTagSelect[16];
339 std::uint8_t channels;
340 std::uint8_t commentFieldBytes;
341 std::uint8_t commentFieldData[257];
342 std::uint8_t frontChannelCount;
343 std::uint8_t sideChannelCount;
344 std::uint8_t backChannelCount;
345 std::uint8_t lfeChannelCount;
346 std::uint8_t sceChannel[16];
347 std::uint8_t cpeChannel[16];
348};
349
350struct TAG_PARSER_EXPORT AacIcsInfo {
351 AacIcsInfo();
352
353 std::uint8_t maxSfb;
354
355 std::uint8_t swbCount;
356 std::uint8_t windowGroupCount;
357 std::uint8_t windowCount;
358 std::uint8_t windowSequence;
359 std::uint8_t windowGroupLengths[8];
360 std::uint8_t windowShape;
361 std::uint8_t scaleFactorGrouping;
362 std::uint16_t sectionSfbOffset[8][15 * 8];
363 std::uint16_t swbOffset[52];
364 std::uint16_t maxSwbOffset;
365
366 std::uint8_t sectionCb[8][15 * 8];
367 std::uint16_t sectionStart[8][15 * 8];
368 std::uint16_t sectionEnd[8][15 * 8];
369 std::uint8_t sfbCb[8][15 * 8];
370 std::uint8_t sectionsPerGroup[8];
371
372 std::uint8_t globalGain;
373 std::uint16_t scaleFactors[8][51];
374
375 std::uint8_t midSideCodingMaskPresent;
376 std::uint8_t midSideCodingUsed[aacMaxWindowGroups][aacMaxSfb];
377
378 std::uint8_t noiseUsed;
379 std::uint8_t isUsed;
380
381 std::uint8_t pulseDataPresent;
382 std::uint8_t tnsDataPresent;
383 std::uint8_t gainControlPresent;
384 std::uint8_t predictorDataPresent;
385
386 AacPulseInfo pulse;
387 AacTnsInfo tns;
388 AacPredictorInfo predictor;
389 AacLtpInfo ltp1;
390 AacLtpInfo ltp2;
391 AacSsrInfo ssr;
392 std::shared_ptr<AacSbrInfo> sbr;
393
394 // error resilience
395 std::uint16_t reorderedSpectralDataLength;
396 std::uint8_t longestCodewordLength;
397 std::uint8_t sfConcealment;
398 std::uint8_t revGlobalGain;
399 std::uint16_t rvlcSfLength;
400 std::uint16_t dpcmNoiseNrg;
401 std::uint8_t sfEscapesPresent;
402 std::uint8_t rvlcEscapesLength;
403 std::uint16_t dpcmNoiseLastPos;
404};
405
406class TAG_PARSER_EXPORT AacFrameElementParser {
407public:
408 AacFrameElementParser(std::uint8_t audioObjectId, std::uint8_t samplingFrequencyIndex, std::uint8_t extensionSamplingFrequencyIndex,
409 std::uint8_t channelConfig, std::uint16_t frameLength = 1024);
410
411 void parse(const AdtsFrame &adtsFrame, std::unique_ptr<char[]> &data, std::size_t dataSize);
412 void parse(const AdtsFrame &adtsFrame, std::istream &stream, std::size_t dataSize);
413
414private:
415 void parseLtpInfo(const AacIcsInfo &ics, AacLtpInfo &ltp);
416 void parseIcsInfo(AacIcsInfo &ics);
417 void parseSectionData(AacIcsInfo &ics);
418 void decodeScaleFactorData(AacIcsInfo &ics);
419 void decodeRvlcScaleFactorData(AacIcsInfo &ics);
420 void parseScaleFactorData(AacIcsInfo &ics);
421 void parsePulseData(AacIcsInfo &ics);
422 void parseTnsData(AacIcsInfo &ics);
423 void parseGainControlData(AacIcsInfo &ics);
424 void parseSpectralData(AacIcsInfo &ics, std::int16_t *specData);
425 void parseSideInfo(AacIcsInfo &ics, bool scaleFlag);
426 std::uint8_t parseExcludedChannels();
427 std::uint8_t parseDynamicRange();
428 static std::int8_t sbrLog2(const std::int8_t val);
429 std::int16_t sbrHuffmanDec(SbrHuffTab table);
430 void parseSbrGrid(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
431 void parseSbrDtdf(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
432 void parseInvfMode(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
433 void parseSbrEnvelope(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
434 void parseSbrNoise(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
435 void parseSbrSinusoidalCoding(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
436 std::uint16_t parseSbrExtension(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t extensionId, std::uint8_t bitsLeft);
437 std::uint16_t parsePsData(std::shared_ptr<AacPsInfo> &ps, std::uint8_t &header);
438 std::uint16_t parseDrmPsData(std::shared_ptr<AacDrmPsInfo> &drmPs);
439 void parseSbrSingleChannelElement(std::shared_ptr<AacSbrInfo> &sbr);
440 void parseSbrChannelPairElement(std::shared_ptr<AacSbrInfo> &sbr);
441 std::shared_ptr<AacSbrInfo> makeSbrInfo(std::uint8_t sbrElement, bool isDrm = false);
442 void parseSbrExtensionData(std::uint8_t sbrElement, std::uint16_t count, bool crcFlag);
443 std::uint8_t parseHuffmanScaleFactor();
444 void parseHuffmanSpectralData(std::uint8_t cb, std::int16_t *sp);
445 void huffmanSignBits(std::int16_t *sp, std::uint8_t len);
446 void huffman2StepQuad(std::uint8_t cb, std::int16_t *sp);
447 void huffmanBinaryQuadSign(std::uint8_t cb, std::int16_t *sp);
448 void huffmanBinaryPair(std::uint8_t cb, std::int16_t *sp);
449 void huffman2StepPair(std::uint8_t cb, std::int16_t *sp);
450 void huffmanBinaryPairSign(std::uint8_t cb, std::int16_t *sp);
451 void huffman2StepPairSign(std::uint8_t cb, std::int16_t *sp);
452 std::int16_t huffmanGetEscape(std::int16_t sp);
453 constexpr static std::int16_t huffmanCodebook(std::uint8_t i);
454 static void vcb11CheckLav(std::uint8_t cb, std::int16_t *sp);
455 void calculateWindowGroupingInfo(AacIcsInfo &ics);
456 void parseIndividualChannelStream(AacIcsInfo &ics, std::int16_t *specData, bool scaleFlag = false);
457 void parseSingleChannelElement();
458 void parseChannelPairElement();
459 void parseCouplingChannelElement();
460 void parseLowFrequencyElement();
461 void parseDataStreamElement();
462 void parseProgramConfigElement();
463 void parseFillElement(std::uint8_t sbrElement = aacInvalidSbrElement);
464 void parseRawDataBlock();
465
466 // these fields contain setup information
467 CppUtilities::BitReader m_reader;
468 std::uint8_t m_mpeg4AudioObjectId;
469 std::uint8_t m_mpeg4SamplingFrequencyIndex;
470 std::uint8_t m_mpeg4ExtensionSamplingFrequencyIndex;
471 std::uint8_t m_mpeg4ChannelConfig;
472 std::uint16_t m_frameLength;
473 std::uint8_t m_aacSectionDataResilienceFlag;
474 std::uint8_t m_aacScalefactorDataResilienceFlag;
475 std::uint8_t m_aacSpectralDataResilienceFlag;
476 // these fields will be parsed
477 std::uint8_t m_elementId[aacMaxChannels];
478 std::uint8_t m_channelCount;
479 std::uint8_t m_elementCount;
480 std::uint8_t m_elementChannelCount[aacMaxSyntaxElements];
481 //std::uint8_t m_channel;
482 //std::int16_t m_pairedChannel;
483 std::uint8_t m_elementInstanceTag[aacMaxSyntaxElements];
484 std::uint8_t m_commonWindow;
485 AacIcsInfo m_ics1;
486 AacIcsInfo m_ics2;
487 AacDrcInfo m_drc;
488 AacProgramConfig m_pce;
489 std::uint8_t m_sbrPresentFlag;
490 //std::uint8_t m_forceUpSampling;
491 //std::uint8_t m_downSampledSbr;
492 std::shared_ptr<AacSbrInfo> m_sbrElements[aacMaxSyntaxElements];
493 std::uint8_t m_psUsed[aacMaxSyntaxElements];
494 std::uint8_t m_psUsedGlobal;
495 std::uint8_t m_psResetFlag;
496};
497
501inline AacFrameElementParser::AacFrameElementParser(std::uint8_t audioObjectId, std::uint8_t samplingFrequencyIndex,
502 std::uint8_t extensionSamplingFrequencyIndex, std::uint8_t channelConfig, std::uint16_t frameLength)
503 : m_reader(nullptr, nullptr)
504 , m_mpeg4AudioObjectId(audioObjectId)
505 , m_mpeg4SamplingFrequencyIndex(samplingFrequencyIndex)
506 , m_mpeg4ExtensionSamplingFrequencyIndex(extensionSamplingFrequencyIndex)
507 , m_mpeg4ChannelConfig(channelConfig)
508 , m_frameLength(frameLength)
509 , m_aacSpectralDataResilienceFlag(0)
510 , m_elementId{ 0 }
511 , m_channelCount(0)
512 , m_elementCount(0)
513 , m_elementChannelCount{ 0 }
514 , m_elementInstanceTag{ 0 }
515 , m_commonWindow(0)
516 ,
517 //m_channel(0),
518 //m_pairedChannel(0),
519 m_sbrPresentFlag(0)
520 ,
521 //m_forceUpSampling(0),
522 //m_downSampledSbr(0),
523 m_sbrElements{ 0 }
524 , m_psUsed{ 0 }
525 , m_psUsedGlobal(0)
526 , m_psResetFlag(0)
527{
528}
529
530inline std::int8_t AacFrameElementParser::sbrLog2(const std::int8_t val)
531{
532 static const std::int8_t log2tab[] = { 0, 0, 1, 2, 2, 3, 3, 3, 3, 4 };
533 return (val < 10 && val >= 0) ? log2tab[val] : 0;
534}
535
536constexpr std::int16_t AacFrameElementParser::huffmanCodebook(std::uint8_t i)
537{
538 return static_cast<std::int16_t>(i ? (16428320 & 0xFFFF) : ((16428320 >> 16) & 0xFFFF));
539}
540
542
543} // namespace TagParser
544
545#endif // TAG_PARSER_AACFRAME_H
#define TAG_PARSER_EXPORT
Marks the symbol to be exported by the tagparser library.
Definition global.h:14
Contains all classes and functions of the TagInfo library.
Definition aaccodebook.h:10