| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtGui module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | #ifndef QCOLORTRCLUT_P_H |
| 41 | #define QCOLORTRCLUT_P_H |
| 42 | |
| 43 | // |
| 44 | // W A R N I N G |
| 45 | // ------------- |
| 46 | // |
| 47 | // This file is not part of the Qt API. It exists purely as an |
| 48 | // implementation detail. This header file may change from version to |
| 49 | // version without notice, or even be removed. |
| 50 | // |
| 51 | // We mean it. |
| 52 | // |
| 53 | |
| 54 | #include <QtGui/private/qtguiglobal_p.h> |
| 55 | #include <QtCore/qsharedpointer.h> |
| 56 | #include <QtGui/qrgb.h> |
| 57 | #include <QtGui/qrgba64.h> |
| 58 | |
| 59 | #include <cmath> |
| 60 | |
| 61 | #if defined(__SSE2__) |
| 62 | #include <emmintrin.h> |
| 63 | #elif defined(__ARM_NEON__) || defined(__ARM_NEON) |
| 64 | #include <arm_neon.h> |
| 65 | #endif |
| 66 | |
| 67 | QT_BEGIN_NAMESPACE |
| 68 | |
| 69 | class QColorTransferFunction; |
| 70 | class QColorTransferTable; |
| 71 | |
| 72 | class Q_GUI_EXPORT QColorTrcLut : public QEnableSharedFromThis<QColorTrcLut> |
| 73 | { |
| 74 | public: |
| 75 | static QColorTrcLut *fromGamma(qreal gamma); |
| 76 | static QColorTrcLut *fromTransferFunction(const QColorTransferFunction &transfn); |
| 77 | static QColorTrcLut *fromTransferTable(const QColorTransferTable &transTable); |
| 78 | |
| 79 | // The following methods all convert opaque or unpremultiplied colors: |
| 80 | |
| 81 | QRgba64 toLinear64(QRgb rgb32) const |
| 82 | { |
| 83 | #if defined(__SSE2__) |
| 84 | __m128i v = _mm_cvtsi32_si128(a: rgb32); |
| 85 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
| 86 | const __m128i vidx = _mm_slli_epi16(a: v, count: 4); |
| 87 | const int ridx = _mm_extract_epi16(vidx, 2); |
| 88 | const int gidx = _mm_extract_epi16(vidx, 1); |
| 89 | const int bidx = _mm_extract_epi16(vidx, 0); |
| 90 | v = _mm_slli_epi16(a: v, count: 8); // a * 256 |
| 91 | v = _mm_insert_epi16(v, m_toLinear[ridx], 0); |
| 92 | v = _mm_insert_epi16(v, m_toLinear[gidx], 1); |
| 93 | v = _mm_insert_epi16(v, m_toLinear[bidx], 2); |
| 94 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
| 95 | QRgba64 rgba64; |
| 96 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
| 97 | return rgba64; |
| 98 | #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
| 99 | uint8x8_t v8 = vreinterpret_u8_u32(vmov_n_u32(rgb32)); |
| 100 | uint16x4_t v16 = vget_low_u16(vmovl_u8(v8)); |
| 101 | const uint16x4_t vidx = vshl_n_u16(v16, 4); |
| 102 | const int ridx = vget_lane_u16(vidx, 2); |
| 103 | const int gidx = vget_lane_u16(vidx, 1); |
| 104 | const int bidx = vget_lane_u16(vidx, 0); |
| 105 | v16 = vshl_n_u16(v16, 8); // a * 256 |
| 106 | v16 = vset_lane_u16(m_toLinear[ridx], v16, 0); |
| 107 | v16 = vset_lane_u16(m_toLinear[gidx], v16, 1); |
| 108 | v16 = vset_lane_u16(m_toLinear[bidx], v16, 2); |
| 109 | v16 = vadd_u16(v16, vshr_n_u16(v16, 8)); |
| 110 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v16), 0)); |
| 111 | #else |
| 112 | uint r = m_toLinear[qRed(rgb32) << 4]; |
| 113 | uint g = m_toLinear[qGreen(rgb32) << 4]; |
| 114 | uint b = m_toLinear[qBlue(rgb32) << 4]; |
| 115 | r = r + (r >> 8); |
| 116 | g = g + (g >> 8); |
| 117 | b = b + (b >> 8); |
| 118 | return QRgba64::fromRgba64(r, g, b, qAlpha(rgb32) * 257); |
| 119 | #endif |
| 120 | } |
| 121 | |
| 122 | QRgb toLinear(QRgb rgb32) const |
| 123 | { |
| 124 | return convertWithTable(rgb32, table: m_toLinear); |
| 125 | } |
| 126 | |
| 127 | QRgba64 toLinear(QRgba64 rgb64) const |
| 128 | { |
| 129 | return convertWithTable(rgb64, table: m_toLinear); |
| 130 | } |
| 131 | |
| 132 | float u8ToLinearF32(int c) const |
| 133 | { |
| 134 | ushort v = m_toLinear[c << 4]; |
| 135 | return v * (1.0f / (255*256)); |
| 136 | } |
| 137 | |
| 138 | float u16ToLinearF32(int c) const |
| 139 | { |
| 140 | c -= (c >> 8); |
| 141 | ushort v = m_toLinear[c >> 4]; |
| 142 | return v * (1.0f / (255*256)); |
| 143 | } |
| 144 | |
| 145 | float toLinear(float f) const |
| 146 | { |
| 147 | ushort v = m_toLinear[(int)(f * (255 * 16) + 0.5f)]; |
| 148 | return v * (1.0f / (255*256)); |
| 149 | } |
| 150 | |
| 151 | QRgb fromLinear64(QRgba64 rgb64) const |
| 152 | { |
| 153 | #if defined(__SSE2__) |
| 154 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
| 155 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
| 156 | const __m128i vidx = _mm_srli_epi16(a: v, count: 4); |
| 157 | const int ridx = _mm_extract_epi16(vidx, 0); |
| 158 | const int gidx = _mm_extract_epi16(vidx, 1); |
| 159 | const int bidx = _mm_extract_epi16(vidx, 2); |
| 160 | v = _mm_insert_epi16(v, m_fromLinear[ridx], 2); |
| 161 | v = _mm_insert_epi16(v, m_fromLinear[gidx], 1); |
| 162 | v = _mm_insert_epi16(v, m_fromLinear[bidx], 0); |
| 163 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
| 164 | v = _mm_srli_epi16(a: v, count: 8); |
| 165 | v = _mm_packus_epi16(a: v, b: v); |
| 166 | return _mm_cvtsi128_si32(a: v); |
| 167 | #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
| 168 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
| 169 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
| 170 | const uint16x4_t vidx = vshr_n_u16(v, 4); |
| 171 | const int ridx = vget_lane_u16(vidx, 0); |
| 172 | const int gidx = vget_lane_u16(vidx, 1); |
| 173 | const int bidx = vget_lane_u16(vidx, 2); |
| 174 | v = vset_lane_u16(m_fromLinear[ridx], v, 2); |
| 175 | v = vset_lane_u16(m_fromLinear[gidx], v, 1); |
| 176 | v = vset_lane_u16(m_fromLinear[bidx], v, 0); |
| 177 | uint8x8_t v8 = vrshrn_n_u16(vcombine_u16(v, v), 8); |
| 178 | return vget_lane_u32(vreinterpret_u32_u8(v8), 0); |
| 179 | #else |
| 180 | uint a = rgb64.alpha(); |
| 181 | uint r = rgb64.red(); |
| 182 | uint g = rgb64.green(); |
| 183 | uint b = rgb64.blue(); |
| 184 | a = a - (a >> 8); |
| 185 | r = r - (r >> 8); |
| 186 | g = g - (g >> 8); |
| 187 | b = b - (b >> 8); |
| 188 | a = (a + 0x80) >> 8; |
| 189 | r = (m_fromLinear[r >> 4] + 0x80) >> 8; |
| 190 | g = (m_fromLinear[g >> 4] + 0x80) >> 8; |
| 191 | b = (m_fromLinear[b >> 4] + 0x80) >> 8; |
| 192 | return (a << 24) | (r << 16) | (g << 8) | b; |
| 193 | #endif |
| 194 | } |
| 195 | |
| 196 | QRgb fromLinear(QRgb rgb32) const |
| 197 | { |
| 198 | return convertWithTable(rgb32, table: m_fromLinear); |
| 199 | } |
| 200 | |
| 201 | QRgba64 fromLinear(QRgba64 rgb64) const |
| 202 | { |
| 203 | return convertWithTable(rgb64, table: m_fromLinear); |
| 204 | } |
| 205 | |
| 206 | int u8FromLinearF32(float f) const |
| 207 | { |
| 208 | ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)]; |
| 209 | return (v + 0x80) >> 8; |
| 210 | } |
| 211 | int u16FromLinearF32(float f) const |
| 212 | { |
| 213 | ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)]; |
| 214 | return v + (v >> 8); |
| 215 | } |
| 216 | float fromLinear(float f) const |
| 217 | { |
| 218 | ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)]; |
| 219 | return v * (1.0f / (255*256)); |
| 220 | } |
| 221 | |
| 222 | // We translate to 0-65280 (255*256) instead to 0-65535 to make simple |
| 223 | // shifting an accurate conversion. |
| 224 | // We translate from 0-4080 (255*16) for the same speed up, and to keep |
| 225 | // the tables small enough to fit in most inner caches. |
| 226 | ushort m_toLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280] |
| 227 | ushort m_fromLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280] |
| 228 | |
| 229 | private: |
| 230 | QColorTrcLut() { } |
| 231 | |
| 232 | Q_ALWAYS_INLINE static QRgb convertWithTable(QRgb rgb32, const ushort *table) |
| 233 | { |
| 234 | const int r = (table[qRed(rgb: rgb32) << 4] + 0x80) >> 8; |
| 235 | const int g = (table[qGreen(rgb: rgb32) << 4] + 0x80) >> 8; |
| 236 | const int b = (table[qBlue(rgb: rgb32) << 4] + 0x80) >> 8; |
| 237 | return (rgb32 & 0xff000000) | (r << 16) | (g << 8) | b; |
| 238 | } |
| 239 | Q_ALWAYS_INLINE static QRgba64 convertWithTable(QRgba64 rgb64, const ushort *table) |
| 240 | { |
| 241 | #if defined(__SSE2__) |
| 242 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
| 243 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
| 244 | const __m128i vidx = _mm_srli_epi16(a: v, count: 4); |
| 245 | const int ridx = _mm_extract_epi16(vidx, 2); |
| 246 | const int gidx = _mm_extract_epi16(vidx, 1); |
| 247 | const int bidx = _mm_extract_epi16(vidx, 0); |
| 248 | v = _mm_insert_epi16(v, table[ridx], 2); |
| 249 | v = _mm_insert_epi16(v, table[gidx], 1); |
| 250 | v = _mm_insert_epi16(v, table[bidx], 0); |
| 251 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
| 252 | QRgba64 rgba64; |
| 253 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
| 254 | return rgba64; |
| 255 | #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
| 256 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
| 257 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
| 258 | const uint16x4_t vidx = vshr_n_u16(v, 4); |
| 259 | const int ridx = vget_lane_u16(vidx, 2); |
| 260 | const int gidx = vget_lane_u16(vidx, 1); |
| 261 | const int bidx = vget_lane_u16(vidx, 0); |
| 262 | v = vset_lane_u16(table[ridx], v, 2); |
| 263 | v = vset_lane_u16(table[gidx], v, 1); |
| 264 | v = vset_lane_u16(table[bidx], v, 0); |
| 265 | v = vadd_u16(v, vshr_n_u16(v, 8)); |
| 266 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v), 0)); |
| 267 | #else |
| 268 | ushort r = rgb64.red(); |
| 269 | ushort g = rgb64.green(); |
| 270 | ushort b = rgb64.blue(); |
| 271 | r = r - (r >> 8); |
| 272 | g = g - (g >> 8); |
| 273 | b = b - (b >> 8); |
| 274 | r = table[r >> 4]; |
| 275 | g = table[g >> 4]; |
| 276 | b = table[b >> 4]; |
| 277 | r = r + (r >> 8); |
| 278 | g = g + (g >> 8); |
| 279 | b = b + (b >> 8); |
| 280 | return QRgba64::fromRgba64(r, g, b, rgb64.alpha()); |
| 281 | #endif |
| 282 | } |
| 283 | }; |
| 284 | |
| 285 | QT_END_NAMESPACE |
| 286 | |
| 287 | #endif // QCOLORTRCLUT_P_H |
| 288 | |