OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_colour_avx.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_colour_avx.cpp
34// Author: Aous Naman
35// Date: 11 October 2019
36//***************************************************************************/
37
38#include "ojph_arch.h"
39#if defined(OJPH_ARCH_I386) || defined(OJPH_ARCH_X86_64)
40
41#include <cmath>
42
43#include "ojph_defs.h"
44#include "ojph_colour.h"
45#include "ojph_colour_local.h"
46
47#include <immintrin.h>
48
49namespace ojph {
50 namespace local {
51
53 void avx_ict_forward(const float *r, const float *g, const float *b,
54 float *y, float *cb, float *cr, ui32 repeat)
55 {
56 __m256 alpha_rf = _mm256_set1_ps(CT_CNST::ALPHA_RF);
57 __m256 alpha_gf = _mm256_set1_ps(CT_CNST::ALPHA_GF);
58 __m256 alpha_bf = _mm256_set1_ps(CT_CNST::ALPHA_BF);
59 __m256 beta_cbf = _mm256_set1_ps(CT_CNST::BETA_CbF);
60 __m256 beta_crf = _mm256_set1_ps(CT_CNST::BETA_CrF);
61 for (int i = (repeat + 7) >> 3; i > 0; --i)
62 {
63 __m256 mr = _mm256_load_ps(r);
64 __m256 mb = _mm256_load_ps(b);
65 __m256 my = _mm256_mul_ps(alpha_rf, mr);
66 my = _mm256_add_ps(my, _mm256_mul_ps(alpha_gf, _mm256_load_ps(g)));
67 my = _mm256_add_ps(my, _mm256_mul_ps(alpha_bf, mb));
68 _mm256_store_ps(y, my);
69 _mm256_store_ps(cb, _mm256_mul_ps(beta_cbf, _mm256_sub_ps(mb, my)));
70 _mm256_store_ps(cr, _mm256_mul_ps(beta_crf, _mm256_sub_ps(mr, my)));
71
72 r += 8; g += 8; b += 8;
73 y += 8; cb += 8; cr += 8;
74 }
75 }
76
78 void avx_ict_backward(const float *y, const float *cb, const float *cr,
79 float *r, float *g, float *b, ui32 repeat)
80 {
81 __m256 gamma_cr2g = _mm256_set1_ps(CT_CNST::GAMMA_CR2G);
82 __m256 gamma_cb2g = _mm256_set1_ps(CT_CNST::GAMMA_CB2G);
83 __m256 gamma_cr2r = _mm256_set1_ps(CT_CNST::GAMMA_CR2R);
84 __m256 gamma_cb2b = _mm256_set1_ps(CT_CNST::GAMMA_CB2B);
85 for (int i = (repeat + 7) >> 3; i > 0; --i)
86 {
87 __m256 my = _mm256_load_ps(y);
88 __m256 mcr = _mm256_load_ps(cr);
89 __m256 mcb = _mm256_load_ps(cb);
90 __m256 mg = _mm256_sub_ps(my, _mm256_mul_ps(gamma_cr2g, mcr));
91 _mm256_store_ps(g, _mm256_sub_ps(mg, _mm256_mul_ps(gamma_cb2g, mcb)));
92 _mm256_store_ps(r, _mm256_add_ps(my, _mm256_mul_ps(gamma_cr2r, mcr)));
93 _mm256_store_ps(b, _mm256_add_ps(my, _mm256_mul_ps(gamma_cb2b, mcb)));
94
95 y += 8; cb += 8; cr += 8;
96 r += 8; g += 8; b += 8;
97 }
98 }
99
100 }
101}
102
103#endif
void avx_ict_forward(const float *r, const float *g, const float *b, float *y, float *cb, float *cr, ui32 repeat)
void avx_ict_backward(const float *y, const float *cb, const float *cr, float *r, float *g, float *b, ui32 repeat)
uint32_t ui32
Definition ojph_defs.h:54
static const float GAMMA_CR2R
static const float BETA_CbF
static const float GAMMA_CB2B
static const float ALPHA_RF
static const float GAMMA_CB2G
static const float GAMMA_CR2G
static const float ALPHA_BF
static const float BETA_CrF
static const float ALPHA_GF