44#if defined(OJPH_ARCH_I386) || defined(OJPH_ARCH_X86_64)
102 ui32 val = 0xFFFFFFFF;
103 if (melp->size > 4) {
104 val = *(
ui32*)melp->data;
108 else if (melp->size > 0)
111 while (melp->size > 1) {
112 ui32 v = *melp->data++;
113 ui32 m = ~(0xFFu << i);
114 val = (val & m) | (v << i);
119 ui32 v = *melp->data++;
121 ui32 m = ~(0xFFu << i);
122 val = (val & m) | (v << i);
127 int bits = 32 - melp->unstuff;
134 bool unstuff = ((val & 0xFF) == 0xFF);
136 t = t << (8 - unstuff);
139 t |= (val>>8) & 0xFF;
140 unstuff = (((val >> 8) & 0xFF) == 0xFF);
142 t = t << (8 - unstuff);
144 t |= (val>>16) & 0xFF;
145 unstuff = (((val >> 16) & 0xFF) == 0xFF);
147 t = t << (8 - unstuff);
149 t |= (val>>24) & 0xFF;
150 melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
154 melp->tmp |= ((
ui64)t) << (64 - bits - melp->bits);
176 static const int mel_exp[13] = {
177 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
186 while (melp->bits >= 6 && melp->num_runs < 8)
188 int eval = mel_exp[melp->k];
190 if (melp->tmp & (1ull<<63))
194 melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;
201 run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
202 melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0;
203 melp->tmp <<= eval + 1;
204 melp->bits -= eval + 1;
205 run = (run << 1) + 1;
207 eval = melp->num_runs * 7;
208 melp->runs &= ~((
ui64)0x3F << eval);
209 melp->runs |= ((
ui64)run) << eval;
227 melp->data = bbuf + lcup - scup;
230 melp->unstuff =
false;
231 melp->size = scup - 1;
239 int num = 4 - (int)(intptr_t(melp->data) & 0x3);
240 for (
int i = 0; i < num; ++i) {
241 assert(melp->unstuff ==
false || melp->data[0] <= 0x8F);
242 ui64 d = (melp->size > 0) ? *melp->data : 0xFF;
244 if (melp->size == 1) d |= 0xF;
246 melp->data += melp->size-- > 0;
247 int d_bits = 8 - melp->unstuff;
248 melp->tmp = (melp->tmp << d_bits) | d;
249 melp->bits += d_bits;
250 melp->unstuff = ((d & 0xFF) == 0xFF);
253 melp->tmp <<= (64 - melp->bits);
266 if (melp->num_runs == 0)
269 int t = melp->runs & 0x7F;
322 val = *(
ui32*)(vlcp->data - 3);
326 else if (vlcp->size > 0)
329 while (vlcp->size > 0) {
330 ui32 v = *vlcp->data--;
338 ui32 tmp = val >> 24;
342 bits = 8 - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
343 bool unstuff = (val >> 24) > 0x8F;
345 tmp |= ((val >> 16) & 0xFF) << bits;
346 bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
347 unstuff = ((val >> 16) & 0xFF) > 0x8F;
349 tmp |= ((val >> 8) & 0xFF) << bits;
350 bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
351 unstuff = ((val >> 8) & 0xFF) > 0x8F;
353 tmp |= (val & 0xFF) << bits;
354 bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
355 unstuff = (val & 0xFF) > 0x8F;
358 vlcp->tmp |= (
ui64)tmp << vlcp->bits;
360 vlcp->unstuff = unstuff;
381 vlcp->data = data + lcup - 2;
384 vlcp->size = scup - 2;
386 ui32 d = *vlcp->data--;
388 vlcp->bits = 4 - ((vlcp->tmp & 7) == 7);
389 vlcp->unstuff = (d | 0xF) > 0x8F;
396 int num = 1 + (int)(intptr_t(vlcp->data) & 0x3);
397 int tnum = num < vlcp->size ? num : vlcp->size;
398 for (
int i = 0; i < tnum; ++i) {
402 ui32 d_bits = 8 - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
403 vlcp->tmp |= d << vlcp->bits;
404 vlcp->bits += d_bits;
405 vlcp->unstuff = d > 0x8F;
427 return (
ui32)vlcp->tmp;
439 assert(num_bits <= vlcp->bits);
440 vlcp->tmp >>= num_bits;
441 vlcp->bits -= num_bits;
442 return (
ui32)vlcp->tmp;
465 val = *(
ui32*)(mrp->data - 3);
469 else if (mrp->size > 0)
472 while (mrp->size > 0) {
473 ui32 v = *mrp->data--;
481 ui32 bits, tmp = val >> 24;
484 bits = 8 - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
485 bool unstuff = (val >> 24) > 0x8F;
488 tmp |= ((val >> 16) & 0xFF) << bits;
489 bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
490 unstuff = ((val >> 16) & 0xFF) > 0x8F;
492 tmp |= ((val >> 8) & 0xFF) << bits;
493 bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
494 unstuff = ((val >> 8) & 0xFF) > 0x8F;
496 tmp |= (val & 0xFF) << bits;
497 bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
498 unstuff = (val & 0xFF) > 0x8F;
500 mrp->tmp |= (
ui64)tmp << mrp->bits;
502 mrp->unstuff = unstuff;
523 mrp->data = data + lcup + len2 - 1;
533 int num = 1 + (int)(intptr_t(mrp->data) & 0x3);
534 for (
int i = 0; i < num; ++i) {
537 d = (mrp->size-- > 0) ? *mrp->data-- : 0;
539 ui32 d_bits = 8 - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
540 mrp->tmp |= d << mrp->bits;
542 mrp->unstuff = d > 0x8F;
563 return (
ui32)mrp->tmp;
574 assert(num_bits <= mrp->bits);
575 mrp->tmp >>= num_bits;
576 mrp->bits -= num_bits;
577 return (
ui32)mrp->tmp;
584 struct frwd_struct_ssse3 {
614 assert(msp->bits <= 128);
616 __m128i offset, val, validity, all_xff;
617 val = _mm_loadu_si128((__m128i*)msp->data);
618 int bytes = msp->size >= 16 ? 16 : msp->size;
619 validity = _mm_set1_epi8((
char)bytes);
623 offset = _mm_set_epi64x(0x0F0E0D0C0B0A0908,0x0706050403020100);
624 validity = _mm_cmpgt_epi8(validity, offset);
625 all_xff = _mm_set1_epi8(-1);
628 __m128i t = _mm_xor_si128(validity, all_xff);
629 val = _mm_or_si128(t, val);
632 val = _mm_and_si128(validity, val);
637 ff_bytes = _mm_cmpeq_epi8(val, all_xff);
638 ff_bytes = _mm_and_si128(ff_bytes, validity);
639 ui32 flags = (
ui32)_mm_movemask_epi8(ff_bytes);
641 ui32 next_unstuff = flags >> 16;
642 flags |= msp->unstuff;
654 t = _mm_set1_epi8((
char)loc);
655 m = _mm_cmpgt_epi8(offset, t);
657 t = _mm_and_si128(m, val);
658 c = _mm_srli_epi64(t, 1);
659 t = _mm_srli_si128(t, 8);
660 t = _mm_slli_epi64(t, 63);
661 t = _mm_or_si128(t, c);
663 val = _mm_or_si128(t, _mm_andnot_si128(m, val));
667 assert(msp->bits >= 0 && msp->bits <= 128);
668 int cur_bytes = msp->bits >> 3;
669 int cur_bits = msp->bits & 7;
671 b1 = _mm_sll_epi64(val, _mm_set1_epi64x(cur_bits));
672 b2 = _mm_slli_si128(val, 8);
673 b2 = _mm_srl_epi64(b2, _mm_set1_epi64x(64-cur_bits));
674 b1 = _mm_or_si128(b1, b2);
675 b2 = _mm_loadu_si128((__m128i*)(msp->tmp + cur_bytes));
676 b2 = _mm_or_si128(b1, b2);
677 _mm_storeu_si128((__m128i*)(msp->tmp + cur_bytes), b2);
679 int consumed_bits = bits < 128 - cur_bits ? bits : 128 - cur_bits;
680 cur_bytes = (msp->bits + (
ui32)consumed_bits + 7) >> 3;
681 int upper = _mm_extract_epi16(val, 7);
682 upper >>= consumed_bits - 128 + 16;
683 msp->tmp[cur_bytes] = (
ui8)upper;
685 msp->bits += (
ui32)bits;
686 msp->unstuff = next_unstuff;
687 assert(msp->unstuff == 0 || msp->unstuff == 1);
701 void frwd_init(frwd_struct_ssse3 *msp,
const ui8* data,
int size)
704 _mm_storeu_si128((__m128i *)msp->tmp, _mm_setzero_si128());
705 _mm_storeu_si128((__m128i *)msp->tmp + 1, _mm_setzero_si128());
706 _mm_storeu_si128((__m128i *)msp->tmp + 2, _mm_setzero_si128());
724 assert(num_bits > 0 && num_bits <= msp->bits && num_bits < 128);
725 msp->bits -= num_bits;
727 __m128i *p = (__m128i*)(msp->tmp + ((num_bits >> 3) & 0x18));
730 __m128i v0, v1, c0, c1, t;
731 v0 = _mm_loadu_si128(p);
732 v1 = _mm_loadu_si128(p + 1);
735 c0 = _mm_srl_epi64(v0, _mm_set1_epi64x(num_bits));
736 t = _mm_srli_si128(v0, 8);
737 t = _mm_sll_epi64(t, _mm_set1_epi64x(64 - num_bits));
738 c0 = _mm_or_si128(c0, t);
739 t = _mm_slli_si128(v1, 8);
740 t = _mm_sll_epi64(t, _mm_set1_epi64x(64 - num_bits));
741 c0 = _mm_or_si128(c0, t);
743 _mm_storeu_si128((__m128i*)msp->tmp, c0);
745 c1 = _mm_srl_epi64(v1, _mm_set1_epi64x(num_bits));
746 t = _mm_srli_si128(v1, 8);
747 t = _mm_sll_epi64(t, _mm_set1_epi64x(64 - num_bits));
748 c1 = _mm_or_si128(c1, t);
750 _mm_storeu_si128((__m128i*)msp->tmp + 1, c1);
764 if (msp->bits <= 128)
767 if (msp->bits <= 128)
770 __m128i t = _mm_loadu_si128((__m128i*)msp->tmp);
789 frwd_struct_ssse3* magsgn,
ui32 p, __m128i& vn)
796 row = _mm_setzero_si128();
797 w0 = _mm_shuffle_epi32(inf_u_q, _MM_SHUFFLE(N, N, N, N));
799 flags = _mm_and_si128(w0, _mm_set_epi32(0x8880, 0x4440, 0x2220, 0x1110));
800 insig = _mm_cmpeq_epi32(flags, _mm_setzero_si128());
801 if (_mm_movemask_epi8(insig) != 0xFFFF)
803 U_q = _mm_shuffle_epi32(U_q, _MM_SHUFFLE(N, N, N, N));
804 flags = _mm_mullo_epi16(flags, _mm_set_epi16(1,1,2,2,4,4,8,8));
813 w0 = _mm_srli_epi32(flags, 15);
814 m_n = _mm_sub_epi32(U_q, w0);
815 m_n = _mm_andnot_si128(insig, m_n);
819 __m128i inc_sum = m_n;
820 inc_sum = _mm_add_epi32(inc_sum, _mm_bslli_si128(inc_sum, 4));
821 inc_sum = _mm_add_epi32(inc_sum, _mm_bslli_si128(inc_sum, 8));
822 int total_mn = _mm_extract_epi16(inc_sum, 6);
823 __m128i ex_sum = _mm_bslli_si128(inc_sum, 4);
826 __m128i byte_idx = _mm_srli_epi32(ex_sum, 3);
827 __m128i bit_idx = _mm_and_si128(ex_sum, _mm_set1_epi32(7));
828 byte_idx = _mm_shuffle_epi8(byte_idx,
829 _mm_set_epi32(0x0C0C0C0C, 0x08080808, 0x04040404, 0x00000000));
830 byte_idx = _mm_add_epi32(byte_idx, _mm_set1_epi32(0x03020100));
831 __m128i d0 = _mm_shuffle_epi8(ms_vec, byte_idx);
832 byte_idx = _mm_add_epi32(byte_idx, _mm_set1_epi32(0x01010101));
833 __m128i d1 = _mm_shuffle_epi8(ms_vec, byte_idx);
836 bit_idx = _mm_or_si128(bit_idx, _mm_slli_epi32(bit_idx, 16));
837 __m128i bit_shift = _mm_shuffle_epi8(
838 _mm_set_epi8(1, 3, 7, 15, 31, 63, 127, -1,
839 1, 3, 7, 15, 31, 63, 127, -1), bit_idx);
840 bit_shift = _mm_add_epi16(bit_shift, _mm_set1_epi16(0x0101));
841 d0 = _mm_mullo_epi16(d0, bit_shift);
842 d0 = _mm_srli_epi16(d0, 8);
843 d1 = _mm_mullo_epi16(d1, bit_shift);
844 d1 = _mm_and_si128(d1, _mm_set1_epi32((
si32)0xFF00FF00));
845 d0 = _mm_or_si128(d0, d1);
849 __m128i ones = _mm_set1_epi32(1);
850 __m128i twos = _mm_set1_epi32(2);
851 __m128i U_q_m1 = _mm_sub_epi32(U_q, ones);
852 U_q_m1 = _mm_and_si128(U_q_m1, _mm_set_epi32(0,0,0,0x1F));
853 w0 = _mm_sub_epi32(twos, w0);
854 shift = _mm_sll_epi32(w0, U_q_m1);
855 ms_vec = _mm_and_si128(d0, _mm_sub_epi32(shift, ones));
858 w0 = _mm_and_si128(flags, _mm_set1_epi32(0x800));
859 w0 = _mm_cmpeq_epi32(w0, _mm_setzero_si128());
860 w0 = _mm_andnot_si128(w0, shift);
861 ms_vec = _mm_or_si128(ms_vec, w0);
862 w0 = _mm_slli_epi32(ms_vec, 31);
863 ms_vec = _mm_or_si128(ms_vec, ones);
864 __m128i tvn = ms_vec;
865 ms_vec = _mm_add_epi32(ms_vec, twos);
866 ms_vec = _mm_slli_epi32(ms_vec, (
si32)p - 1);
867 ms_vec = _mm_or_si128(ms_vec, w0);
868 row = _mm_andnot_si128(insig, ms_vec);
870 ms_vec = _mm_andnot_si128(insig, tvn);
872 tvn = _mm_shuffle_epi8(ms_vec,
873 _mm_set_epi32(-1, -1, 0x0F0E0D0C, 0x07060504));
875 tvn = _mm_shuffle_epi8(ms_vec,
876 _mm_set_epi32(-1, 0x0F0E0D0C, 0x07060504, -1));
879 vn = _mm_or_si128(vn, tvn);
899 frwd_struct_ssse3* magsgn,
ui32 p, __m128i& vn)
906 row = _mm_setzero_si128();
907 w0 = _mm_shuffle_epi8(inf_u_q,
908 _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504,
909 0x0100, 0x0100, 0x0100, 0x0100));
911 flags = _mm_and_si128(w0,
912 _mm_set_epi16((
si16)0x8880, 0x4440, 0x2220, 0x1110,
913 (
si16)0x8880, 0x4440, 0x2220, 0x1110));
914 insig = _mm_cmpeq_epi16(flags, _mm_setzero_si128());
915 if (_mm_movemask_epi8(insig) != 0xFFFF)
917 U_q = _mm_shuffle_epi8(U_q,
918 _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504,
919 0x0100, 0x0100, 0x0100, 0x0100));
920 flags = _mm_mullo_epi16(flags, _mm_set_epi16(1,2,4,8,1,2,4,8));
929 w0 = _mm_srli_epi16(flags, 15);
930 m_n = _mm_sub_epi16(U_q, w0);
931 m_n = _mm_andnot_si128(insig, m_n);
935 __m128i inc_sum = m_n;
936 inc_sum = _mm_add_epi16(inc_sum, _mm_bslli_si128(inc_sum, 2));
937 inc_sum = _mm_add_epi16(inc_sum, _mm_bslli_si128(inc_sum, 4));
938 inc_sum = _mm_add_epi16(inc_sum, _mm_bslli_si128(inc_sum, 8));
939 int total_mn = _mm_extract_epi16(inc_sum, 7);
940 __m128i ex_sum = _mm_bslli_si128(inc_sum, 2);
943 __m128i byte_idx = _mm_srli_epi16(ex_sum, 3);
944 __m128i bit_idx = _mm_and_si128(ex_sum, _mm_set1_epi16(7));
945 byte_idx = _mm_shuffle_epi8(byte_idx,
946 _mm_set_epi16(0x0E0E, 0x0C0C, 0x0A0A, 0x0808,
947 0x0606, 0x0404, 0x0202, 0x0000));
948 byte_idx = _mm_add_epi16(byte_idx, _mm_set1_epi16(0x0100));
949 __m128i d0 = _mm_shuffle_epi8(ms_vec, byte_idx);
950 byte_idx = _mm_add_epi16(byte_idx, _mm_set1_epi16(0x0101));
951 __m128i d1 = _mm_shuffle_epi8(ms_vec, byte_idx);
954 __m128i bit_shift = _mm_shuffle_epi8(
955 _mm_set_epi8(1, 3, 7, 15, 31, 63, 127, -1,
956 1, 3, 7, 15, 31, 63, 127, -1), bit_idx);
957 bit_shift = _mm_add_epi16(bit_shift, _mm_set1_epi16(0x0101));
958 d0 = _mm_mullo_epi16(d0, bit_shift);
959 d0 = _mm_srli_epi16(d0, 8);
960 d1 = _mm_mullo_epi16(d1, bit_shift);
961 d1 = _mm_and_si128(d1, _mm_set1_epi16((
si16)0xFF00));
962 d0 = _mm_or_si128(d0, d1);
965 __m128i shift, t0, t1, Uq0, Uq1;
966 __m128i ones = _mm_set1_epi16(1);
967 __m128i twos = _mm_set1_epi16(2);
968 __m128i U_q_m1 = _mm_sub_epi32(U_q, ones);
969 Uq0 = _mm_and_si128(U_q_m1, _mm_set_epi32(0,0,0,0x1F));
970 Uq1 = _mm_bsrli_si128(U_q_m1, 14);
971 w0 = _mm_sub_epi16(twos, w0);
972 t0 = _mm_and_si128(w0, _mm_set_epi64x(0, -1));
973 t1 = _mm_and_si128(w0, _mm_set_epi64x(-1, 0));
974 t0 = _mm_sll_epi16(t0, Uq0);
975 t1 = _mm_sll_epi16(t1, Uq1);
976 shift = _mm_or_si128(t0, t1);
977 ms_vec = _mm_and_si128(d0, _mm_sub_epi16(shift, ones));
980 w0 = _mm_and_si128(flags, _mm_set1_epi16(0x800));
981 w0 = _mm_cmpeq_epi16(w0, _mm_setzero_si128());
982 w0 = _mm_andnot_si128(w0, shift);
983 ms_vec = _mm_or_si128(ms_vec, w0);
984 w0 = _mm_slli_epi16(ms_vec, 15);
985 ms_vec = _mm_or_si128(ms_vec, ones);
986 __m128i tvn = ms_vec;
987 ms_vec = _mm_add_epi16(ms_vec, twos);
988 ms_vec = _mm_slli_epi16(ms_vec, (
si32)p - 1);
989 ms_vec = _mm_or_si128(ms_vec, w0);
990 row = _mm_andnot_si128(insig, ms_vec);
992 ms_vec = _mm_andnot_si128(insig, tvn);
993 w0 = _mm_shuffle_epi8(ms_vec,
994 _mm_set_epi16(-1, -1, -1, -1, -1, -1, 0x0706, 0x0302));
995 vn = _mm_or_si128(vn, w0);
996 w0 = _mm_shuffle_epi8(ms_vec,
997 _mm_set_epi16(-1, -1, -1, -1, -1, 0x0F0E, 0x0B0A, -1));
998 vn = _mm_or_si128(vn, w0);
1025 ui32 missing_msbs,
ui32 num_passes,
1030 static bool insufficient_precision =
false;
1031 static bool modify_code =
false;
1032 static bool truncate_spp_mrp =
false;
1034 if (num_passes > 1 && lengths2 == 0)
1036 OJPH_WARN(0x00010001,
"A malformed codeblock that has more than "
1037 "one coding pass, but zero length for "
1038 "2nd and potential 3rd pass.");
1044 OJPH_WARN(0x00010002,
"We do not support more than 3 coding passes; "
1045 "This codeblocks has %d passes.",
1050 if (missing_msbs > 30)
1052 if (insufficient_precision ==
false)
1054 insufficient_precision =
true;
1055 OJPH_WARN(0x00010003,
"32 bits are not enough to decode this "
1056 "codeblock. This message will not be "
1057 "displayed again.");
1061 else if (missing_msbs == 30)
1063 if (modify_code ==
false) {
1065 OJPH_WARN(0x00010004,
"Not enough precision to decode the cleanup "
1066 "pass. The code can be modified to support "
1067 "this case. This message will not be "
1068 "displayed again.");
1072 else if (missing_msbs == 29)
1074 if (num_passes > 1) {
1076 if (truncate_spp_mrp ==
false) {
1077 truncate_spp_mrp =
true;
1078 OJPH_WARN(0x00010005,
"Not enough precision to decode the SgnProp "
1079 "nor MagRef passes; both will be skipped. "
1080 "This message will not be displayed "
1085 ui32 p = 30 - missing_msbs;
1091 OJPH_WARN(0x00010006,
"Wrong codeblock length.");
1097 lcup = (int)lengths1;
1099 scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
1100 if (scup < 2 || scup > lcup || scup > 4079)
1118 ui16 scratch[8 * 513] = {0};
1126 ui32 sstr = ((width + 2u) + 7u) & ~7u;
1128 assert((stride & 0x3) == 0);
1130 ui32 mmsbp2 = missing_msbs + 2;
1142 mel_init(&mel, coded_data, lcup, scup);
1144 rev_init(&vlc, coded_data, lcup, scup);
1154 for (
ui32 x = 0; x < width; sp += 4)
1173 t0 = (run == -1) ? t0 : 0;
1187 c_q = ((t0 & 0x10U) << 3) | ((t0 & 0xE0U) << 2);
1196 t1 =
vlc_tbl0[c_q + (vlc_val & 0x7F)];
1199 if (c_q == 0 && x < width)
1204 t1 = (run == -1) ? t1 : 0;
1209 t1 = x < width ? t1 : 0;
1218 c_q = ((t1 & 0x10U) << 3) | ((t1 & 0xE0U) << 2);
1226 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1227 if (uvlc_mode == 0xc0)
1231 uvlc_mode += (run == -1) ? 0x40 : 0;
1248 ui32 len = uvlc_entry & 0xF;
1249 ui32 tmp = vlc_val & ((1 << len) - 1);
1253 len = uvlc_entry & 0x7;
1255 ui16 u_q = (
ui16)(1 + (uvlc_entry&7) + (tmp&~(0xFFU<<len)));
1257 u_q = (
ui16)(1 + (uvlc_entry >> 3) + (tmp >> len));
1263 for (
ui32 y = 2; y < height; y += 2)
1266 ui16 *sp = scratch + (y >> 1) * sstr;
1268 for (
ui32 x = 0; x < width; sp += 4)
1274 c_q |= ((sp[0 - (
si32)sstr] & 0xA0U) << 2);
1275 c_q |= ((sp[2 - (
si32)sstr] & 0x20U) << 4);
1291 t0 = (run == -1) ? t0 : 0;
1306 c_q = ((t0 & 0x40U) << 2) | ((t0 & 0x80U) << 1);
1308 c_q |= sp[0 - (
si32)sstr] & 0x80;
1310 c_q |= ((sp[2 - (
si32)sstr] & 0xA0U) << 2);
1311 c_q |= ((sp[4 - (
si32)sstr] & 0x20U) << 4);
1320 t1 =
vlc_tbl1[ c_q + (vlc_val & 0x7F)];
1323 if (c_q == 0 && x < width)
1328 t1 = (run == -1) ? t1 : 0;
1333 t1 = x < width ? t1 : 0;
1343 c_q = ((t1 & 0x40U) << 2) | ((t1 & 0x80U) << 1);
1345 c_q |= sp[2 - (
si32)sstr] & 0x80;
1353 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1359 ui32 len = uvlc_entry & 0xF;
1360 ui32 tmp = vlc_val & ((1 << len) - 1);
1364 len = uvlc_entry & 0x7;
1366 ui16 u_q = (
ui16)((uvlc_entry & 7) + (tmp & ~(0xFFU << len)));
1368 u_q = (
ui16)((uvlc_entry >> 3) + (tmp >> len));
1391 const int v_n_size = 512 + 8;
1392 ui32 v_n_scratch[2 * v_n_size] = {0};
1394 frwd_struct_ssse3 magsgn;
1399 ui32 *vp = v_n_scratch;
1400 ui32 *dp = decoded_data;
1403 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1407 __m128i inf_u_q, U_q;
1410 inf_u_q = _mm_loadu_si128((__m128i*)sp);
1411 U_q = _mm_srli_epi32(inf_u_q, 16);
1413 w0 = _mm_cmpgt_epi32(U_q, _mm_set1_epi32((
int)mmsbp2));
1414 int i = _mm_movemask_epi8(w0);
1419 __m128i vn = _mm_set1_epi32(2);
1422 w0 = _mm_loadu_si128((__m128i*)vp);
1423 w0 = _mm_and_si128(w0, _mm_set_epi32(0,0,0,-1));
1424 w0 = _mm_or_si128(w0, vn);
1425 _mm_storeu_si128((__m128i*)vp, w0);
1428 w0 = _mm_unpacklo_epi32(row0, row1);
1429 w1 = _mm_unpackhi_epi32(row0, row1);
1430 row0 = _mm_unpacklo_epi32(w0, w1);
1431 row1 = _mm_unpackhi_epi32(w0, w1);
1432 _mm_store_si128((__m128i*)dp, row0);
1433 _mm_store_si128((__m128i*)(dp + stride), row1);
1437 for (
ui32 y = 2; y < height; y += 2)
1441 ui32 *vp = v_n_scratch;
1442 const __m128i lut_lo = _mm_set_epi8(
1443 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 31
1445 const __m128i lut_hi = _mm_set_epi8(
1446 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 31
1448 const __m128i nibble_mask = _mm_set1_epi8(0x0F);
1449 const __m128i byte_offset8 = _mm_set1_epi16(8);
1450 const __m128i byte_offset16 = _mm_set1_epi16(16);
1451 const __m128i cc = _mm_set1_epi32(31);
1452 for (
ui32 x = 0; x <= width; x += 8, vp += 4)
1455 v = _mm_loadu_si128((__m128i*)vp);
1457 t = _mm_and_si128(nibble_mask, v);
1458 v = _mm_and_si128(_mm_srli_epi16(v, 4), nibble_mask);
1459 t = _mm_shuffle_epi8(lut_lo, t);
1460 v = _mm_shuffle_epi8(lut_hi, v);
1461 v = _mm_min_epu8(v, t);
1463 t = _mm_srli_epi16(v, 8);
1464 v = _mm_or_si128(v, byte_offset8);
1465 v = _mm_min_epu8(v, t);
1467 t = _mm_srli_epi32(v, 16);
1468 v = _mm_or_si128(v, byte_offset16);
1469 v = _mm_min_epu8(v, t);
1471 v = _mm_sub_epi16(cc, v);
1472 _mm_storeu_si128((__m128i*)(vp + v_n_size), v);
1476 ui32 *vp = v_n_scratch;
1477 ui16 *sp = scratch + (y >> 1) * sstr;
1478 ui32 *dp = decoded_data + y * stride;
1481 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1485 __m128i inf_u_q, U_q;
1488 __m128i gamma, emax, kappa, u_q;
1490 inf_u_q = _mm_loadu_si128((__m128i*)sp);
1491 gamma = _mm_and_si128(inf_u_q, _mm_set1_epi32(0xF0));
1492 w0 = _mm_sub_epi32(gamma, _mm_set1_epi32(1));
1493 gamma = _mm_and_si128(gamma, w0);
1494 gamma = _mm_cmpeq_epi32(gamma, _mm_setzero_si128());
1496 emax = _mm_loadu_si128((__m128i*)(vp + v_n_size));
1497 w0 = _mm_bsrli_si128(emax, 4);
1498 emax = _mm_max_epi16(w0, emax);
1499 emax = _mm_andnot_si128(gamma, emax);
1501 kappa = _mm_set1_epi32(1);
1502 kappa = _mm_max_epi16(emax, kappa);
1504 u_q = _mm_srli_epi32(inf_u_q, 16);
1505 U_q = _mm_add_epi32(u_q, kappa);
1507 w0 = _mm_cmpgt_epi32(U_q, _mm_set1_epi32((
int)mmsbp2));
1508 int i = _mm_movemask_epi8(w0);
1513 __m128i vn = _mm_set1_epi32(2);
1516 w0 = _mm_loadu_si128((__m128i*)vp);
1517 w0 = _mm_and_si128(w0, _mm_set_epi32(0,0,0,-1));
1518 w0 = _mm_or_si128(w0, vn);
1519 _mm_storeu_si128((__m128i*)vp, w0);
1522 w0 = _mm_unpacklo_epi32(row0, row1);
1523 w1 = _mm_unpackhi_epi32(row0, row1);
1524 row0 = _mm_unpacklo_epi32(w0, w1);
1525 row1 = _mm_unpackhi_epi32(w0, w1);
1526 _mm_store_si128((__m128i*)dp, row0);
1527 _mm_store_si128((__m128i*)(dp + stride), row1);
1542 const int v_n_size = 512 + 8;
1543 ui16 v_n_scratch[2 * v_n_size] = {0};
1545 frwd_struct_ssse3 magsgn;
1550 ui16 *vp = v_n_scratch;
1551 ui32 *dp = decoded_data;
1554 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1558 __m128i inf_u_q, U_q;
1561 inf_u_q = _mm_loadu_si128((__m128i*)sp);
1562 U_q = _mm_srli_epi32(inf_u_q, 16);
1564 w0 = _mm_cmpgt_epi32(U_q, _mm_set1_epi32((
int)mmsbp2));
1565 int i = _mm_movemask_epi8(w0);
1570 __m128i vn = _mm_set1_epi16(2);
1572 w0 = _mm_loadu_si128((__m128i*)vp);
1573 w0 = _mm_and_si128(w0, _mm_set_epi16(0,0,0,0,0,0,0,-1));
1574 w0 = _mm_or_si128(w0, vn);
1575 _mm_storeu_si128((__m128i*)vp, w0);
1578 w0 = _mm_shuffle_epi8(row,
1579 _mm_set_epi16(0x0D0C, -1, 0x0908, -1,
1580 0x0504, -1, 0x0100, -1));
1581 _mm_store_si128((__m128i*)dp, w0);
1582 w1 = _mm_shuffle_epi8(row,
1583 _mm_set_epi16(0x0F0E, -1, 0x0B0A, -1,
1584 0x0706, -1, 0x0302, -1));
1585 _mm_store_si128((__m128i*)(dp + stride), w1);
1589 for (
ui32 y = 2; y < height; y += 2)
1593 ui16 *vp = v_n_scratch;
1594 const __m128i lut_lo = _mm_set_epi8(
1595 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 15
1597 const __m128i lut_hi = _mm_set_epi8(
1598 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 15
1600 const __m128i nibble_mask = _mm_set1_epi8(0x0F);
1601 const __m128i byte_offset8 = _mm_set1_epi16(8);
1602 const __m128i cc = _mm_set1_epi16(15);
1603 for (
ui32 x = 0; x <= width; x += 16, vp += 8)
1606 v = _mm_loadu_si128((__m128i*)vp);
1608 t = _mm_and_si128(nibble_mask, v);
1609 v = _mm_and_si128(_mm_srli_epi16(v, 4), nibble_mask);
1610 t = _mm_shuffle_epi8(lut_lo, t);
1611 v = _mm_shuffle_epi8(lut_hi, v);
1612 v = _mm_min_epu8(v, t);
1614 t = _mm_srli_epi16(v, 8);
1615 v = _mm_or_si128(v, byte_offset8);
1616 v = _mm_min_epu8(v, t);
1618 v = _mm_sub_epi16(cc, v);
1619 _mm_storeu_si128((__m128i*)(vp + v_n_size), v);
1623 ui16 *vp = v_n_scratch;
1624 ui16 *sp = scratch + (y >> 1) * sstr;
1625 ui32 *dp = decoded_data + y * stride;
1628 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1632 __m128i inf_u_q, U_q;
1635 __m128i gamma, emax, kappa, u_q;
1637 inf_u_q = _mm_loadu_si128((__m128i*)sp);
1638 gamma = _mm_and_si128(inf_u_q, _mm_set1_epi32(0xF0));
1639 w0 = _mm_sub_epi32(gamma, _mm_set1_epi32(1));
1640 gamma = _mm_and_si128(gamma, w0);
1641 gamma = _mm_cmpeq_epi32(gamma, _mm_setzero_si128());
1643 emax = _mm_loadu_si128((__m128i*)(vp + v_n_size));
1644 w0 = _mm_bsrli_si128(emax, 2);
1645 emax = _mm_max_epi16(w0, emax);
1646 emax = _mm_shuffle_epi8(emax,
1647 _mm_set_epi16(-1, 0x0706, -1, 0x0504,
1648 -1, 0x0302, -1, 0x0100));
1649 emax = _mm_andnot_si128(gamma, emax);
1651 kappa = _mm_set1_epi32(1);
1652 kappa = _mm_max_epi16(emax, kappa);
1654 u_q = _mm_srli_epi32(inf_u_q, 16);
1655 U_q = _mm_add_epi32(u_q, kappa);
1657 w0 = _mm_cmpgt_epi32(U_q, _mm_set1_epi32((
int)mmsbp2));
1658 int i = _mm_movemask_epi8(w0);
1663 __m128i vn = _mm_set1_epi16(2);
1665 w0 = _mm_loadu_si128((__m128i*)vp);
1666 w0 = _mm_and_si128(w0, _mm_set_epi16(0,0,0,0,0,0,0,-1));
1667 w0 = _mm_or_si128(w0, vn);
1668 _mm_storeu_si128((__m128i*)vp, w0);
1670 w0 = _mm_shuffle_epi8(row,
1671 _mm_set_epi16(0x0D0C, -1, 0x0908, -1,
1672 0x0504, -1, 0x0100, -1));
1673 _mm_store_si128((__m128i*)dp, w0);
1674 w1 = _mm_shuffle_epi8(row,
1675 _mm_set_epi16(0x0F0E, -1, 0x0B0A, -1,
1676 0x0706, -1, 0x0302, -1));
1677 _mm_store_si128((__m128i*)(dp + stride), w1);
1691 ui16*
const sigma = scratch;
1693 ui32 mstr = (width + 3u) >> 2;
1695 mstr = ((mstr + 2u) + 7u) & ~7u;
1703 const __m128i mask_3 = _mm_set1_epi32(0x30);
1704 const __m128i mask_C = _mm_set1_epi32(0xC0);
1705 const __m128i shuffle_mask = _mm_set_epi32(-1, -1, -1, 0x0C080400);
1706 for (y = 0; y < height; y += 4)
1708 ui16* sp = scratch + (y >> 1) * sstr;
1709 ui16* dp = sigma + (y >> 2) * mstr;
1710 for (
ui32 x = 0; x < width; x += 8, sp += 8, dp += 2)
1712 __m128i s0, s1, u3, uC, t0, t1;
1714 s0 = _mm_loadu_si128((__m128i*)(sp));
1715 u3 = _mm_and_si128(s0, mask_3);
1716 u3 = _mm_srli_epi32(u3, 4);
1717 uC = _mm_and_si128(s0, mask_C);
1718 uC = _mm_srli_epi32(uC, 2);
1719 t0 = _mm_or_si128(u3, uC);
1721 s1 = _mm_loadu_si128((__m128i*)(sp + sstr));
1722 u3 = _mm_and_si128(s1, mask_3);
1723 u3 = _mm_srli_epi32(u3, 2);
1724 uC = _mm_and_si128(s1, mask_C);
1725 t1 = _mm_or_si128(u3, uC);
1727 __m128i r = _mm_or_si128(t0, t1);
1728 r = _mm_shuffle_epi8(r, shuffle_mask);
1730 dp[0] = (
ui16)_mm_extract_epi16(r, 0);
1731 dp[1] = (
ui16)_mm_extract_epi16(r, 1);
1737 ui16* dp = sigma + (y >> 2) * mstr;
1738 __m128i zero = _mm_setzero_si128();
1739 for (
ui32 x = 0; x < width; x += 32, dp += 8)
1740 _mm_storeu_si128((__m128i*)dp, zero);
1756 ui16 prev_row_sig[256 + 8] = {0};
1758 frwd_struct_ssse3 sigprop;
1759 frwd_init<0>(&sigprop, coded_data + lengths1, (
int)lengths2);
1761 for (
ui32 y = 0; y < height; y += 4)
1763 ui32 pattern = 0xFFFFu;
1764 if (height - y < 4) {
1766 if (height - y < 3) {
1776 ui16 *prev_sig = prev_row_sig;
1777 ui16 *cur_sig = sigma + (y >> 2) * mstr;
1778 ui32 *dpp = decoded_data + y * stride;
1779 for (
ui32 x = 0; x < width; x += 4, dpp += 4, ++cur_sig, ++prev_sig)
1784 pattern = pattern >> (s * 4);
1799 ui32 ns = *(
ui32*)(cur_sig + mstr);
1800 ui32 u = (ps & 0x88888888) >> 3;
1802 u |= (ns & 0x11111111) << 3;
1807 mbr |= (cs & 0x77777777) << 1;
1808 mbr |= (cs & 0xEEEEEEEE) >> 1;
1825 ui32 cwd = (
ui32)_mm_extract_epi16(cwd_vec, 0);
1828 ui32 col_mask = 0xFu;
1829 ui32 inv_sig = ~cs & pattern;
1830 for (
int i = 0; i < 16; i += 4, col_mask <<= 4)
1832 if ((col_mask & new_sig) == 0)
1836 ui32 sample_mask = 0x1111u & col_mask;
1837 if (new_sig & sample_mask)
1839 new_sig &= ~sample_mask;
1842 ui32 t = 0x33u << i;
1843 new_sig |= t & inv_sig;
1849 if (new_sig & sample_mask)
1851 new_sig &= ~sample_mask;
1854 ui32 t = 0x76u << i;
1855 new_sig |= t & inv_sig;
1861 if (new_sig & sample_mask)
1863 new_sig &= ~sample_mask;
1866 ui32 t = 0xECu << i;
1867 new_sig |= t & inv_sig;
1873 if (new_sig & sample_mask)
1875 new_sig &= ~sample_mask;
1878 ui32 t = 0xC8u << i;
1879 new_sig |= t & inv_sig;
1887 cwd |= (
ui32)_mm_extract_epi16(cwd_vec, 1) << (16 - cnt);
1891 __m128i new_sig_vec = _mm_set1_epi16((
si16)new_sig);
1892 new_sig_vec = _mm_shuffle_epi8(new_sig_vec,
1893 _mm_set_epi8(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0));
1894 new_sig_vec = _mm_and_si128(new_sig_vec,
1895 _mm_set1_epi64x((
si64)0x8040201008040201));
1896 new_sig_vec = _mm_cmpeq_epi8(new_sig_vec,
1897 _mm_set1_epi64x((
si64)0x8040201008040201));
1901 __m128i inc_sum = new_sig_vec;
1902 inc_sum = _mm_abs_epi8(inc_sum);
1903 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 1));
1904 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 2));
1905 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 4));
1906 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 8));
1907 cnt += (
ui32)_mm_extract_epi16(inc_sum, 7) >> 8;
1909 __m128i ex_sum = _mm_bslli_si128(inc_sum, 1);
1913 cwd_vec = _mm_set1_epi16((
si16)cwd);
1914 cwd_vec = _mm_shuffle_epi8(cwd_vec,
1915 _mm_set_epi8(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0));
1916 cwd_vec = _mm_and_si128(cwd_vec,
1917 _mm_set1_epi64x((
si64)0x8040201008040201));
1918 cwd_vec = _mm_cmpeq_epi8(cwd_vec,
1919 _mm_set1_epi64x((
si64)0x8040201008040201));
1920 cwd_vec = _mm_abs_epi8(cwd_vec);
1924 __m128i v = _mm_shuffle_epi8(cwd_vec, ex_sum);
1928 _mm_set_epi8(-1,-1,-1,12,-1,-1,-1,8,-1,-1,-1,4,-1,-1,-1,0);
1929 __m128i val = _mm_set1_epi32(3 << (p - 2));
1931 for (
int c = 0; c < 4; ++ c) {
1932 __m128i s0, s0_ns, s0_val;
1934 s0 = _mm_load_si128((__m128i*)dp);
1938 s0_ns = _mm_shuffle_epi8(new_sig_vec, m);
1939 s0_ns = _mm_cmpeq_epi32(s0_ns, _mm_set1_epi32(0xFF));
1942 s0_val = _mm_shuffle_epi8(v, m);
1943 s0_val = _mm_slli_epi32(s0_val, 31);
1944 s0_val = _mm_or_si128(s0_val, val);
1945 s0_val = _mm_and_si128(s0_val, s0_ns);
1948 s0 = _mm_or_si128(s0, s0_val);
1950 _mm_store_si128((__m128i*)dp, s0);
1953 m = _mm_add_epi32(m, _mm_set1_epi32(1));
1960 *prev_sig = (
ui16)(new_sig);
1964 new_sig |= (t & 0x7777) << 1;
1965 new_sig |= (t & 0xEEEE) >> 1;
1978 rev_init_mrp(&magref, coded_data, (
int)lengths1, (
int)lengths2);
1980 for (
ui32 y = 0; y < height; y += 4)
1982 ui16 *cur_sig = sigma + (y >> 2) * mstr;
1983 ui32 *dpp = decoded_data + y * stride;
1984 for (
ui32 i = 0; i < width; i += 4, dpp += 4)
1989 ui16 sig = *cur_sig++;
1997 __m128i sig_vec = _mm_set1_epi16((
si16)sig);
1998 sig_vec = _mm_shuffle_epi8(sig_vec,
1999 _mm_set_epi8(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0));
2000 sig_vec = _mm_and_si128(sig_vec,
2001 _mm_set1_epi64x((
si64)0x8040201008040201));
2002 sig_vec = _mm_cmpeq_epi8(sig_vec,
2003 _mm_set1_epi64x((
si64)0x8040201008040201));
2004 sig_vec = _mm_abs_epi8(sig_vec);
2008 __m128i inc_sum = sig_vec;
2009 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 1));
2010 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 2));
2011 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 4));
2012 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 8));
2013 total_bits = _mm_extract_epi16(inc_sum, 7) >> 8;
2014 __m128i ex_sum = _mm_bslli_si128(inc_sum, 1);
2021 __m128i cwd_vec = _mm_set1_epi16((
si16)cwd);
2022 cwd_vec = _mm_shuffle_epi8(cwd_vec,
2023 _mm_set_epi8(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0));
2024 cwd_vec = _mm_and_si128(cwd_vec,
2025 _mm_set1_epi64x((
si64)0x8040201008040201));
2026 cwd_vec = _mm_cmpeq_epi8(cwd_vec,
2027 _mm_set1_epi64x((
si64)0x8040201008040201));
2028 cwd_vec = _mm_add_epi8(cwd_vec, _mm_set1_epi8(1));
2029 cwd_vec = _mm_add_epi8(cwd_vec, cwd_vec);
2030 cwd_vec = _mm_or_si128(cwd_vec, _mm_set1_epi8(1));
2034 _mm_set_epi8(-1,-1,-1,12,-1,-1,-1,8,-1,-1,-1,4,-1,-1,-1,0);
2036 for (
int c = 0; c < 4; ++c) {
2037 __m128i s0, s0_sig, s0_idx, s0_val;
2039 s0 = _mm_load_si128((__m128i*)dp);
2041 s0_sig = _mm_shuffle_epi8(sig_vec, m);
2042 s0_sig = _mm_cmpeq_epi8(s0_sig, _mm_setzero_si128());
2044 s0_idx = _mm_shuffle_epi8(ex_sum, m);
2045 s0_val = _mm_shuffle_epi8(cwd_vec, s0_idx);
2047 s0_val = _mm_andnot_si128(s0_sig, s0_val);
2049 s0_val = _mm_slli_epi32(s0_val, (
si32)p - 2);
2050 s0 = _mm_xor_si128(s0, s0_val);
2052 _mm_store_si128((__m128i*)dp, s0);
2055 m = _mm_add_epi32(m, _mm_set1_epi32(1));
ui16 uvlc_tbl0[256+64]
uvlc_tbl0 contains decoding information for initial row of quads
ui16 uvlc_tbl1[256]
uvlc_tbl1 contains decoding information for non-initial row of quads
ui16 vlc_tbl1[1024]
vlc_tbl1 contains decoding information for non-initial row of quads
ui16 vlc_tbl0[1024]
vlc_tbl0 contains decoding information for initial row of quads
static ui32 rev_fetch(rev_struct *vlcp)
Retrieves 32 bits from the head of a rev_struct structure.
static void rev_init_mrp(rev_struct *mrp, ui8 *data, int lcup, int len2)
Initialized rev_struct structure for MRP segment, and reads a number of bytes such that the next 32 b...
static void mel_read(dec_mel_st *melp)
Reads and unstuffs the MEL bitstream.
bool ojph_decode_codeblock_ssse3(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
static void frwd_advance(frwd_struct32 *msp, ui32 num_bits)
Consume num_bits bits from the bitstream of frwd_struct32.
static void rev_read_mrp(rev_struct *mrp)
Reads and unstuffs from rev_struct.
static ui32 rev_fetch_mrp(rev_struct *mrp)
Retrieves 32 bits from the head of a rev_struct structure.
static void frwd_read(frwd_struct32 *msp)
Read and unstuffs 32 bits from forward-growing bitstream.
static void rev_read(rev_struct *vlcp)
Read and unstuff data from a backwardly-growing segment.
static int mel_get_run(dec_mel_st *melp)
Retrieves one run from dec_mel_st; if there are no runs stored MEL segment is decoded.
static void rev_init(rev_struct *vlcp, ui8 *data, int lcup, int scup)
Initiates the rev_struct structure and reads a few bytes to move the read address to multiple of 4.
static void mel_init(dec_mel_st *melp, ui8 *bbuf, int lcup, int scup)
Initiates a dec_mel_st structure for MEL decoding and reads some bytes in order to get the read addre...
static ui32 rev_advance(rev_struct *vlcp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static v128_t decode_one_quad32(const v128_t inf_u_q, v128_t U_q, frwd_struct *magsgn, ui32 p, v128_t &vn)
decodes one quad, using 32 bit data
static ui32 frwd_fetch(frwd_struct32 *msp)
Fetches 32 bits from the frwd_struct32 bitstream.
static ui32 rev_advance_mrp(rev_struct *mrp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static v128_t decode_two_quad16(const v128_t inf_u_q, v128_t U_q, frwd_struct *magsgn, ui32 p, v128_t &vn)
decodes twos consecutive quads (one octet), using 16 bit data
static void frwd_init(frwd_struct32 *msp, const ui8 *data, int size)
Initialize frwd_struct32 struct and reads some bytes.
static void mel_decode(dec_mel_st *melp)
Decodes unstuffed MEL segment bits stored in tmp to runs.
static ui32 count_leading_zeros(ui32 val)
MEL state structure for reading and decoding the MEL bitstream.
bool unstuff
true if the next bit needs to be unstuffed
int num_runs
number of decoded runs left in runs (maximum 8)
int size
number of bytes in MEL code
ui8 * data
the address of data (or bitstream)
int k
state of MEL decoder
int bits
number of bits stored in tmp
ui64 tmp
temporary buffer for read data
ui64 runs
runs of decoded MEL codewords (7 bits/run)
A structure for reading and unstuffing a segment that grows backward, such as VLC and MRP.
ui32 bits
number of bits stored in tmp
int size
number of bytes left
ui8 * data
pointer to where to read data
ui64 tmp
temporary buffer of read data