Tag Parser 12.4.0
C++ library for reading and writing MP4 (iTunes), ID3, Vorbis, Opus, FLAC and Matroska tags
Loading...
Searching...
No Matches
aacframe.h
Go to the documentation of this file.
1#ifndef TAG_PARSER_AACFRAME_H
2#define TAG_PARSER_AACFRAME_H
3
4// NOTE: The AAC parser is still WIP. It does not work yet and its API/ABI may change even in patch releases.
5
6#include "../global.h"
7
8#include <c++utilities/io/bitreader.h>
9
10#include <cstdint>
11#include <memory>
12
13namespace TagParser {
14
16
17class AdtsFrame;
18
19constexpr auto aacMaxChannels = 64;
20constexpr auto aacMaxSyntaxElements = 48;
21constexpr auto aacMaxWindowGroups = 8;
22constexpr auto aacMaxSfb = 51;
23constexpr auto aacMaxLtpSfb = 40;
24constexpr auto aacMaxltpSfbS = 8;
25constexpr auto aacInvalidSbrElement = 0xFF;
26constexpr auto aacNoTimeSlots960 = 15;
27constexpr auto aacNoTimeSlots = 16;
28constexpr auto aacSbrRate = 2;
29constexpr auto aacSbrM = 49;
30constexpr auto aacSbrMaxLe = 5;
31constexpr auto aacSbrMaxNtsrhfg = 40;
32
33using SbrHuffTab = const std::int8_t (*)[2];
34
35namespace AacSyntaxElementTypes {
36enum KnownTypes : std::uint8_t {
37 SingleChannelElement,
38 ChannelPairElement,
39 ChannelCouplingElement,
40 LowFrequencyElement,
41 DataStreamElement,
42 ProgramConfigElement,
43 FillElement,
44 EndOfFrame
45};
46}
47
48namespace AacIcsSequenceTypes {
49enum KnownTypes : std::uint8_t { OnlyLongSequence, LongStartSequence, EightShortSequence, LongStopSequence };
50}
51
52namespace AacScaleFactorTypes {
53enum KnownTypes : std::uint8_t {
54 ZeroHcb = 0,
55 FirstPairHcb = 5,
56 EscHcb = 11,
57 QuadLen = 4,
58 PairLen = 2,
59 NoiseHcb = 13,
60 IntensityHcb2 = 14,
61 IntensityHcb = 15
62};
63}
64
65namespace AacExtensionTypes {
66enum KnownTypes : std::uint8_t { Fill = 0, FillData = 1, DataElement = 2, DynamicRange = 11, SacData = 12, SbrData = 13, SbrDataCrc = 14 };
67}
68
69namespace BsFrameClasses {
70enum BsFrameClass : std::uint8_t { FixFix, FixVar, VarFix, VarVar };
71}
72
73namespace AacSbrExtensionIds {
74enum KnownIds : std::uint8_t { DrmParametricStereo = 0, Ps = 2 };
75}
76
77struct TAG_PARSER_EXPORT AacLtpInfo {
78 AacLtpInfo();
79 std::uint8_t lastBand;
80 std::uint8_t dataPresent;
81 std::uint16_t lag;
82 std::uint8_t lagUpdate;
83 std::uint8_t coef;
84 std::uint8_t longUsed[aacMaxLtpSfb];
85 std::uint8_t shortUsed[8];
86 std::uint8_t shortLagPresent[8];
87 std::uint8_t shortLag[8];
88};
89
90struct TAG_PARSER_EXPORT AacPredictorInfo {
91 AacPredictorInfo();
92 std::uint8_t maxSfb;
93 std::uint8_t reset;
94 std::uint8_t resetGroupNumber;
95 std::uint8_t predictionUsed[aacMaxSfb];
96};
97
98struct TAG_PARSER_EXPORT AacPulseInfo {
99 AacPulseInfo();
100 std::uint8_t count;
101 std::uint8_t startSfb;
102 std::uint8_t offset[4];
103 std::uint8_t amp[4];
104};
105
106struct TAG_PARSER_EXPORT AacTnsInfo {
107 AacTnsInfo();
108 std::uint8_t filt[8];
109 std::uint8_t coefRes[8];
110 std::uint8_t length[8][4];
111 std::uint8_t order[8][4];
112 std::uint8_t direction[8][4];
113 std::uint8_t coefCompress[8][4];
114 std::uint8_t coef[8][4][32];
115};
116
117struct TAG_PARSER_EXPORT AacSsrInfo {
118 AacSsrInfo();
119 std::uint8_t maxBand;
120 std::uint8_t adjustNum[4][8];
121 std::uint8_t alevcode[4][8][8];
122 std::uint8_t aloccode[4][8][8];
123};
124
125struct TAG_PARSER_EXPORT AacDrcInfo {
126 AacDrcInfo();
127 std::uint8_t present;
128 std::uint8_t bandCount;
129 std::uint8_t pceInstanceTag;
130 std::uint8_t excludedChannelsPresent;
131 std::uint8_t bandTop[17];
132 std::uint8_t progRefLevel;
133 std::uint8_t dynamicRangeSign[17];
134 std::uint8_t dynamicRangeControl[17];
135 std::uint8_t excludeMask[aacMaxChannels];
136 std::uint8_t additionalExcludedChannels[aacMaxChannels];
137};
138
139struct TAG_PARSER_EXPORT AacPsInfo {
140 AacPsInfo();
141 std::uint8_t headerRead;
142 std::uint8_t use34HybridBands;
143 std::uint8_t enableIID; // Inter-channel Intensity Difference
144 std::uint8_t iidMode;
145 std::uint8_t iidParCount;
146 std::uint8_t iidopdParCount;
147 // TODO
148};
149
150struct TAG_PARSER_EXPORT AacDrmPsInfo {
151 AacDrmPsInfo();
152 std::uint8_t headerRead;
153 std::uint8_t use34HybridBands;
154 std::uint8_t enableIID; // Inter-channel Intensity Difference
155 std::uint8_t iidMode;
156 std::uint8_t iidParCount;
157 std::uint8_t iidopdParCount;
158 // TODO
159};
160
161struct TAG_PARSER_EXPORT AacSbrInfo {
162 AacSbrInfo(std::uint8_t sbrElementType, std::uint16_t samplingFrequency, std::uint16_t frameLength, bool isDrm);
163
164 std::uint8_t aacElementId;
165 std::uint16_t samplingFrequency;
166
167 std::uint32_t maxAacLine;
168
169 std::uint8_t rate;
170 std::uint8_t justSeeked;
171 std::uint8_t ret;
172
173 std::uint8_t ampRes[2];
174
175 std::uint8_t k0;
176 std::uint8_t kx;
177 std::uint8_t m;
178 std::uint8_t nMaster;
179 std::uint8_t nHigh;
180 std::uint8_t nLow;
181 std::uint8_t nq;
182 std::uint8_t nl[4];
183 std::uint8_t n[2];
184
185 std::uint8_t fMaster[64];
186 std::uint8_t fTableRes[2][64];
187 std::uint8_t fTableNoise[64];
188 std::uint8_t fTableLim[4][64];
189 std::uint8_t fGroup[5][64];
190 std::uint8_t ng[5];
191
192 std::uint8_t tableMapKToG[64];
193
194 std::uint8_t absBordLead[2];
195 std::uint8_t absBordTrail[2];
196 std::uint8_t relLeadCount[2];
197 std::uint8_t relTrailCount[2];
198
199 std::uint8_t le[2];
200 std::uint8_t lePrev[2];
201 std::uint8_t lq[2];
202
203 std::uint8_t te[2][aacSbrMaxLe + 1];
204 std::uint8_t tq[2][3];
205 std::uint8_t f[2][aacSbrMaxLe + 1];
206 std::uint8_t fPrev[2];
207
208 //real_t *gTempPrev[2][5];
209 //real_t *qTempPrev[2][5];
210 //sbyte gqRingbufIndex[2];
211
212 std::int16_t e[2][64][aacSbrMaxLe];
213 std::int16_t ePrev[2][64];
214 //real_t eOrig[2][64][aacSbrMaxLe];
215 //real_t eCurr[2][64][aacSbrMaxLe];
216 std::int32_t q[2][64][2];
217 //real_t qDiv[2][64][2];
218 //real_t qDiv2[2][64][2];
219 std::int32_t qPrev[2][64];
220
221 std::int8_t la[2];
222 std::int8_t laPrev[2];
223
224 std::uint8_t bsInvfMode[2][aacSbrMaxLe];
225 std::uint8_t bsInvfModePrev[2][aacSbrMaxLe];
226 //real_t bwArray[2][64];
227 //real_t bwArrayPrev[2][64];
228
229 std::uint8_t noPatches;
230 std::uint8_t patchNoSubbands[64];
231 std::uint8_t patchStartSubband[64];
232
233 std::uint8_t bsAddHarmonic[2][64];
234 std::uint8_t bsAddHarmonicPrev[2][64];
235
236 std::uint16_t indexNoisePrev[2];
237 std::uint8_t psiIsPrev[2];
238
239 std::uint8_t bsStartFreqPrev;
240 std::uint8_t bsStopFreqPrev;
241 std::uint8_t bsXoverBandPrev;
242 std::uint8_t bsFreqScalePrev;
243 std::uint8_t bsAlterScalePrev;
244 std::uint8_t bsNoiseBandsPrev;
245
246 std::int8_t prevEnvIsShort[2];
247
248 std::int8_t kxPrev;
249 std::uint8_t bsco;
250 std::uint8_t bscoPrev;
251 std::uint8_t mPrev;
252 std::uint16_t frameLength;
253
254 std::uint8_t reset;
255 std::uint32_t frame;
256 std::uint32_t headerCount;
257
258 std::uint8_t idAac;
259 //qmfa_info *qmfa[2];
260 //qmfs_info *qmfs[2];
261
262 //qmf_t Xsbr[2][aacSbrMaxNtsrhfg][64];
263
264 std::uint8_t isDrmSbr;
265 std::shared_ptr<AacDrmPsInfo> drmPs;
266
267 std::uint8_t timeSlotsRateCount;
268 std::uint8_t timeSlotsCount;
269 std::uint8_t tHfGen;
270 std::uint8_t tHfAdj;
271
272 std::shared_ptr<AacPsInfo> ps;
273 std::uint8_t psUsed;
274 std::uint8_t psResetFlag;
275
276 std::uint8_t bsHeaderFlag;
277 std::uint8_t bsCrcFlag;
278 std::uint16_t bsSbrCrcBits;
279 std::uint8_t bsProtocolVersion;
280 std::uint8_t bsAmpRes;
281 std::uint8_t bsStartFreq;
282 std::uint8_t bsStopFreq;
283 std::uint8_t bsXoverBand;
284 std::uint8_t bsFreqScale;
285 std::uint8_t bsAlterScale;
286 std::uint8_t bsNoiseBands;
287 std::uint8_t bsLimiterBands;
288 std::uint8_t bsLimiterGains;
289 std::uint8_t bsInterpolFreq;
290 std::uint8_t bsSmoothingMode;
291 std::uint8_t bsSamplerateMode;
292 std::uint8_t bsAddHarmonicFlag[2];
293 std::uint8_t bsAddHarmonicFlagPrev[2];
294 std::uint8_t bsExtendedData;
295 std::uint8_t bsExtensionId;
296 std::uint8_t bsExtensionData;
297 std::uint8_t bsCoupling;
298 std::uint8_t bsFrameClass[2];
299 std::uint8_t bsRelBord[2][9];
300 std::uint8_t bsRelBord0[2][9];
301 std::uint8_t bsRelBord1[2][9];
302 std::uint8_t bsPointer[2];
303 std::uint8_t bsAbsBord0[2];
304 std::uint8_t bsAbsBord1[2];
305 std::uint8_t bsRelCount0[2];
306 std::uint8_t bsRelCount1[2];
307 std::uint8_t bsDfEnv[2][9];
308 std::uint8_t bsDfNoise[2][3];
309};
310
311struct TAG_PARSER_EXPORT AacProgramConfig {
312 AacProgramConfig();
313 std::uint8_t elementInstanceTag;
314 std::uint8_t objectType;
315 std::uint8_t samplingFrequencyIndex;
316 std::uint8_t frontChannelElementCount;
317 std::uint8_t sideChannelElementCount;
318 std::uint8_t backChannelElementCount;
319 std::uint8_t lfeChannelElementCount;
320 std::uint8_t assocDataElementCount;
321 std::uint8_t validCcElementCount;
322 std::uint8_t monoMixdownPresent;
323 std::uint8_t monoMixdownElementNumber;
324 std::uint8_t stereoMixdownPresent;
325 std::uint8_t stereoMixdownElementNumber;
326 std::uint8_t matrixMixdownIdxPresent;
327 std::uint8_t pseudoSurroundEnable;
328 std::uint8_t matrixMixdownIdx;
329 std::uint8_t frontElementIsCpe[16];
330 std::uint8_t frontElementTagSelect[16];
331 std::uint8_t sideElementIsCpe[16];
332 std::uint8_t sideElementTagSelect[16];
333 std::uint8_t backElementIsCpe[16];
334 std::uint8_t backElementTagSelect[16];
335 std::uint8_t lfeElementTagSelect[16];
336 std::uint8_t assocDataElementTagSelect[16];
337 std::uint8_t ccElementIsIndSw[16];
338 std::uint8_t validCcElementTagSelect[16];
339 std::uint8_t channels;
340 std::uint8_t commentFieldBytes;
341 std::uint8_t commentFieldData[257];
342 std::uint8_t frontChannelCount;
343 std::uint8_t sideChannelCount;
344 std::uint8_t backChannelCount;
345 std::uint8_t lfeChannelCount;
346 std::uint8_t sceChannel[16];
347 std::uint8_t cpeChannel[16];
348};
349
350struct TAG_PARSER_EXPORT AacIcsInfo {
351 AacIcsInfo();
352
353 std::uint8_t maxSfb;
354
355 std::uint8_t swbCount;
356 std::uint8_t windowGroupCount;
357 std::uint8_t windowCount;
358 std::uint8_t windowSequence;
359 std::uint8_t windowGroupLengths[8];
360 std::uint8_t windowShape;
361 std::uint8_t scaleFactorGrouping;
362 std::uint16_t sectionSfbOffset[8][15 * 8];
363 std::uint16_t swbOffset[52];
364 std::uint16_t maxSwbOffset;
365
366 std::uint8_t sectionCb[8][15 * 8];
367 std::uint16_t sectionStart[8][15 * 8];
368 std::uint16_t sectionEnd[8][15 * 8];
369 std::uint8_t sfbCb[8][15 * 8];
370 std::uint8_t sectionsPerGroup[8];
371
372 std::uint8_t globalGain;
373 std::uint16_t scaleFactors[8][51];
374
375 std::uint8_t midSideCodingMaskPresent;
376 std::uint8_t midSideCodingUsed[aacMaxWindowGroups][aacMaxSfb];
377
378 std::uint8_t noiseUsed;
379 std::uint8_t isUsed;
380
381 std::uint8_t pulseDataPresent;
382 std::uint8_t tnsDataPresent;
383 std::uint8_t gainControlPresent;
384 std::uint8_t predictorDataPresent;
385
386 AacPulseInfo pulse;
387 AacTnsInfo tns;
388 AacPredictorInfo predictor;
389 AacLtpInfo ltp1;
390 AacLtpInfo ltp2;
391 AacSsrInfo ssr;
392 std::shared_ptr<AacSbrInfo> sbr;
393
394 // error resilience
395 std::uint16_t reorderedSpectralDataLength;
396 std::uint8_t longestCodewordLength;
397 std::uint8_t sfConcealment;
398 std::uint8_t revGlobalGain;
399 std::uint16_t rvlcSfLength;
400 std::uint16_t dpcmNoiseNrg;
401 std::uint8_t sfEscapesPresent;
402 std::uint8_t rvlcEscapesLength;
403 std::uint16_t dpcmNoiseLastPos;
404};
405
406class TAG_PARSER_EXPORT AacFrameElementParser {
407public:
408 AacFrameElementParser(std::uint8_t audioObjectId, std::uint8_t samplingFrequencyIndex, std::uint8_t extensionSamplingFrequencyIndex,
409 std::uint8_t channelConfig, std::uint16_t frameLength = 1024);
410
411 void parse(const AdtsFrame &adtsFrame, std::unique_ptr<char[]> &data, std::size_t dataSize);
412 void parse(const AdtsFrame &adtsFrame, std::istream &stream, std::size_t dataSize);
413
414private:
415 void parseLtpInfo(const AacIcsInfo &ics, AacLtpInfo &ltp);
416 void parseIcsInfo(AacIcsInfo &ics);
417 void parseSectionData(AacIcsInfo &ics);
418 void decodeScaleFactorData(AacIcsInfo &ics);
419 void decodeRvlcScaleFactorData(AacIcsInfo &ics);
420 void parseScaleFactorData(AacIcsInfo &ics);
421 void parsePulseData(AacIcsInfo &ics);
422 void parseTnsData(AacIcsInfo &ics);
423 void parseGainControlData(AacIcsInfo &ics);
424 void parseSpectralData(AacIcsInfo &ics, std::int16_t *specData);
425 void parseSideInfo(AacIcsInfo &ics, bool scaleFlag);
426 std::uint8_t parseExcludedChannels();
427 std::uint8_t parseDynamicRange();
428 static std::int8_t sbrLog2(const std::int8_t val);
429 std::int16_t sbrHuffmanDec(SbrHuffTab table);
430 void parseSbrGrid(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
431 void parseSbrDtdf(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
432 void parseInvfMode(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
433 void parseSbrEnvelope(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
434 void parseSbrNoise(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
435 void parseSbrSinusoidalCoding(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t channel);
436 std::uint16_t parseSbrExtension(std::shared_ptr<AacSbrInfo> &sbr, std::uint8_t extensionId, std::uint8_t bitsLeft);
437 std::uint16_t parsePsData(std::shared_ptr<AacPsInfo> &ps, std::uint8_t &header);
438 std::uint16_t parseDrmPsData(std::shared_ptr<AacDrmPsInfo> &drmPs);
439 void parseSbrSingleChannelElement(std::shared_ptr<AacSbrInfo> &sbr);
440 void parseSbrChannelPairElement(std::shared_ptr<AacSbrInfo> &sbr);
441 std::shared_ptr<AacSbrInfo> makeSbrInfo(std::uint8_t sbrElement, bool isDrm = false);
442 void parseSbrExtensionData(std::uint8_t sbrElement, std::uint16_t count, bool crcFlag);
443 std::uint8_t parseHuffmanScaleFactor();
444 void parseHuffmanSpectralData(std::uint8_t cb, std::int16_t *sp);
445 void huffmanSignBits(std::int16_t *sp, std::uint8_t len);
446 void huffman2StepQuad(std::uint8_t cb, std::int16_t *sp);
447 void huffmanBinaryQuadSign(std::uint8_t cb, std::int16_t *sp);
448 void huffmanBinaryPair(std::uint8_t cb, std::int16_t *sp);
449 void huffman2StepPair(std::uint8_t cb, std::int16_t *sp);
450 void huffmanBinaryPairSign(std::uint8_t cb, std::int16_t *sp);
451 void huffman2StepPairSign(std::uint8_t cb, std::int16_t *sp);
452 std::int16_t huffmanGetEscape(std::int16_t sp);
453 constexpr static std::int16_t huffmanCodebook(std::uint8_t i);
454 static void vcb11CheckLav(std::uint8_t cb, std::int16_t *sp);
455 void calculateWindowGroupingInfo(AacIcsInfo &ics);
456 void parseIndividualChannelStream(AacIcsInfo &ics, std::int16_t *specData, bool scaleFlag = false);
457 void parseSingleChannelElement();
458 void parseChannelPairElement();
459 void parseCouplingChannelElement();
460 void parseLowFrequencyElement();
461 void parseDataStreamElement();
462 void parseProgramConfigElement();
463 void parseFillElement(std::uint8_t sbrElement = aacInvalidSbrElement);
464 void parseRawDataBlock();
465
466 // these fields contain setup information
467 CppUtilities::BitReader m_reader;
468 std::uint8_t m_mpeg4AudioObjectId;
469 std::uint8_t m_mpeg4SamplingFrequencyIndex;
470 std::uint8_t m_mpeg4ExtensionSamplingFrequencyIndex;
471 std::uint8_t m_mpeg4ChannelConfig;
472 std::uint16_t m_frameLength;
473 std::uint8_t m_aacSectionDataResilienceFlag;
474 std::uint8_t m_aacScalefactorDataResilienceFlag;
475 std::uint8_t m_aacSpectralDataResilienceFlag;
476 // these fields will be parsed
477 std::uint8_t m_elementId[aacMaxChannels];
478 std::uint8_t m_channelCount;
479 std::uint8_t m_elementCount;
480 std::uint8_t m_elementChannelCount[aacMaxSyntaxElements];
481 //std::uint8_t m_channel;
482 //std::int16_t m_pairedChannel;
483 std::uint8_t m_elementInstanceTag[aacMaxSyntaxElements];
484 std::uint8_t m_commonWindow;
485 AacIcsInfo m_ics1;
486 AacIcsInfo m_ics2;
487 AacDrcInfo m_drc;
488 AacProgramConfig m_pce;
489 std::uint8_t m_sbrPresentFlag;
490 //std::uint8_t m_forceUpSampling;
491 //std::uint8_t m_downSampledSbr;
492 std::shared_ptr<AacSbrInfo> m_sbrElements[aacMaxSyntaxElements];
493 std::uint8_t m_psUsed[aacMaxSyntaxElements];
494 std::uint8_t m_psUsedGlobal;
495 std::uint8_t m_psResetFlag;
496};
497
501inline AacFrameElementParser::AacFrameElementParser(std::uint8_t audioObjectId, std::uint8_t samplingFrequencyIndex,
502 std::uint8_t extensionSamplingFrequencyIndex, std::uint8_t channelConfig, std::uint16_t frameLength)
503 : m_reader(nullptr, nullptr)
504 , m_mpeg4AudioObjectId(audioObjectId)
505 , m_mpeg4SamplingFrequencyIndex(samplingFrequencyIndex)
506 , m_mpeg4ExtensionSamplingFrequencyIndex(extensionSamplingFrequencyIndex)
507 , m_mpeg4ChannelConfig(channelConfig)
508 , m_frameLength(frameLength)
509 , m_aacSpectralDataResilienceFlag(0)
510 , m_elementId{ 0 }
511 , m_channelCount(0)
512 , m_elementCount(0)
513 , m_elementChannelCount{ 0 }
514 , m_elementInstanceTag{ 0 }
515 , m_commonWindow(0)
516 ,
517 //m_channel(0),
518 //m_pairedChannel(0),
519 m_sbrPresentFlag(0)
520 ,
521 //m_forceUpSampling(0),
522 //m_downSampledSbr(0),
523 m_sbrElements{ 0 }
524 , m_psUsed{ 0 }
525 , m_psUsedGlobal(0)
526 , m_psResetFlag(0)
527{
528}
529
530inline std::int8_t AacFrameElementParser::sbrLog2(const std::int8_t val)
531{
532 static const std::int8_t log2tab[] = { 0, 0, 1, 2, 2, 3, 3, 3, 3, 4 };
533 return (val < 10 && val >= 0) ? log2tab[val] : 0;
534}
535
536constexpr std::int16_t AacFrameElementParser::huffmanCodebook(std::uint8_t i)
537{
538 return static_cast<std::int16_t>(i ? (16428320 & 0xFFFF) : ((16428320 >> 16) & 0xFFFF));
539}
540
542
543} // namespace TagParser
544
545#endif // TAG_PARSER_AACFRAME_H
#define TAG_PARSER_EXPORT
Marks the symbol to be exported by the tagparser library.
Definition global.h:14
Contains all classes and functions of the TagInfo library.
Definition aaccodebook.h:10