| 1 | // Copyright (C) 2016 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | |
| 4 | #ifndef QCOLORTRCLUT_P_H |
| 5 | #define QCOLORTRCLUT_P_H |
| 6 | |
| 7 | // |
| 8 | // W A R N I N G |
| 9 | // ------------- |
| 10 | // |
| 11 | // This file is not part of the Qt API. It exists purely as an |
| 12 | // implementation detail. This header file may change from version to |
| 13 | // version without notice, or even be removed. |
| 14 | // |
| 15 | // We mean it. |
| 16 | // |
| 17 | |
| 18 | #include <QtGui/private/qtguiglobal_p.h> |
| 19 | #include <QtGui/qrgb.h> |
| 20 | #include <QtGui/qrgba64.h> |
| 21 | #include <QtCore/private/qsimd_p.h> |
| 22 | |
| 23 | #include <cmath> |
| 24 | #include <memory> |
| 25 | |
| 26 | #if defined(__SSE2__) |
| 27 | #include <emmintrin.h> |
| 28 | #elif defined(__ARM_NEON__) |
| 29 | #include <arm_neon.h> |
| 30 | #endif |
| 31 | |
| 32 | QT_BEGIN_NAMESPACE |
| 33 | |
| 34 | class QColorTransferGenericFunction; |
| 35 | class QColorTransferFunction; |
| 36 | class QColorTransferTable; |
| 37 | class QColorTrc; |
| 38 | |
| 39 | class Q_GUI_EXPORT QColorTrcLut |
| 40 | { |
| 41 | public: |
| 42 | static constexpr uint32_t ShiftUp = 4; // Amount to shift up from 1->255 |
| 43 | static constexpr uint32_t ShiftDown = (8 - ShiftUp); // Amount to shift down from 1->65280 |
| 44 | static constexpr qsizetype Resolution = (1 << ShiftUp) * 255; // Number of entries in table |
| 45 | |
| 46 | enum Direction { |
| 47 | ToLinear = 1, |
| 48 | FromLinear = 2, |
| 49 | BiLinear = ToLinear | FromLinear |
| 50 | }; |
| 51 | |
| 52 | static std::shared_ptr<QColorTrcLut> fromGamma(float gamma, Direction dir = BiLinear); |
| 53 | static std::shared_ptr<QColorTrcLut> fromTrc(const QColorTrc &trc, Direction dir = BiLinear); |
| 54 | void setFromGamma(float gamma, Direction dir = BiLinear); |
| 55 | void setFromTransferFunction(const QColorTransferFunction &transFn, Direction dir = BiLinear); |
| 56 | void setFromTransferTable(const QColorTransferTable &transTable, Direction dir = BiLinear); |
| 57 | void setFromTransferGenericFunction(const QColorTransferGenericFunction &transfn, Direction dir); |
| 58 | void setFromTrc(const QColorTrc &trc, Direction dir); |
| 59 | |
| 60 | // The following methods all convert opaque or unpremultiplied colors: |
| 61 | |
| 62 | QRgba64 toLinear64(QRgb rgb32) const |
| 63 | { |
| 64 | #if defined(__SSE2__) |
| 65 | __m128i v = _mm_cvtsi32_si128(a: rgb32); |
| 66 | v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128()); |
| 67 | const __m128i vidx = _mm_slli_epi16(a: v, count: ShiftUp); |
| 68 | const int ridx = _mm_extract_epi16(vidx, 2); |
| 69 | const int gidx = _mm_extract_epi16(vidx, 1); |
| 70 | const int bidx = _mm_extract_epi16(vidx, 0); |
| 71 | v = _mm_slli_epi16(a: v, count: 8); // a * 256 |
| 72 | v = _mm_insert_epi16(v, m_toLinear[ridx], 0); |
| 73 | v = _mm_insert_epi16(v, m_toLinear[gidx], 1); |
| 74 | v = _mm_insert_epi16(v, m_toLinear[bidx], 2); |
| 75 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
| 76 | QRgba64 rgba64; |
| 77 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
| 78 | return rgba64; |
| 79 | #elif defined(__ARM_NEON__) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
| 80 | uint8x8_t v8 = vreinterpret_u8_u32(vmov_n_u32(rgb32)); |
| 81 | uint16x4_t v16 = vget_low_u16(vmovl_u8(v8)); |
| 82 | const uint16x4_t vidx = vshl_n_u16(v16, ShiftUp); |
| 83 | const int ridx = vget_lane_u16(vidx, 2); |
| 84 | const int gidx = vget_lane_u16(vidx, 1); |
| 85 | const int bidx = vget_lane_u16(vidx, 0); |
| 86 | v16 = vshl_n_u16(v16, 8); // a * 256 |
| 87 | v16 = vset_lane_u16(m_toLinear[ridx], v16, 0); |
| 88 | v16 = vset_lane_u16(m_toLinear[gidx], v16, 1); |
| 89 | v16 = vset_lane_u16(m_toLinear[bidx], v16, 2); |
| 90 | v16 = vadd_u16(v16, vshr_n_u16(v16, 8)); |
| 91 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v16), 0)); |
| 92 | #else |
| 93 | uint r = m_toLinear[qRed(rgb32) << ShiftUp]; |
| 94 | uint g = m_toLinear[qGreen(rgb32) << ShiftUp]; |
| 95 | uint b = m_toLinear[qBlue(rgb32) << ShiftUp]; |
| 96 | r = r + (r >> 8); |
| 97 | g = g + (g >> 8); |
| 98 | b = b + (b >> 8); |
| 99 | return QRgba64::fromRgba64(r, g, b, qAlpha(rgb32) * 257); |
| 100 | #endif |
| 101 | } |
| 102 | QRgba64 toLinear64(QRgba64) const = delete; |
| 103 | |
| 104 | QRgb toLinear(QRgb rgb32) const |
| 105 | { |
| 106 | return convertWithTable(rgb32, table: m_toLinear.get()); |
| 107 | } |
| 108 | |
| 109 | QRgba64 toLinear(QRgba64 rgb64) const |
| 110 | { |
| 111 | return convertWithTable(rgb64, table: m_toLinear.get()); |
| 112 | } |
| 113 | |
| 114 | float u8ToLinearF32(int c) const |
| 115 | { |
| 116 | ushort v = m_toLinear[c << ShiftUp]; |
| 117 | return v * (1.0f / (255*256)); |
| 118 | } |
| 119 | |
| 120 | float u16ToLinearF32(int c) const |
| 121 | { |
| 122 | c -= (c >> 8); |
| 123 | ushort v = m_toLinear[c >> ShiftDown]; |
| 124 | return v * (1.0f / (255*256)); |
| 125 | } |
| 126 | |
| 127 | float toLinear(float f) const |
| 128 | { |
| 129 | ushort v = m_toLinear[(int)(f * Resolution + 0.5f)]; |
| 130 | return v * (1.0f / (255*256)); |
| 131 | } |
| 132 | |
| 133 | QRgb fromLinear64(QRgba64 rgb64) const |
| 134 | { |
| 135 | #if defined(__SSE2__) |
| 136 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
| 137 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
| 138 | const __m128i vidx = _mm_srli_epi16(a: v, count: ShiftDown); |
| 139 | const int ridx = _mm_extract_epi16(vidx, 0); |
| 140 | const int gidx = _mm_extract_epi16(vidx, 1); |
| 141 | const int bidx = _mm_extract_epi16(vidx, 2); |
| 142 | v = _mm_insert_epi16(v, m_fromLinear[ridx], 2); |
| 143 | v = _mm_insert_epi16(v, m_fromLinear[gidx], 1); |
| 144 | v = _mm_insert_epi16(v, m_fromLinear[bidx], 0); |
| 145 | v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80)); |
| 146 | v = _mm_srli_epi16(a: v, count: 8); |
| 147 | v = _mm_packus_epi16(a: v, b: v); |
| 148 | return _mm_cvtsi128_si32(a: v); |
| 149 | #elif defined(__ARM_NEON__) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
| 150 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
| 151 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
| 152 | const uint16x4_t vidx = vshr_n_u16(v, ShiftDown); |
| 153 | const int ridx = vget_lane_u16(vidx, 0); |
| 154 | const int gidx = vget_lane_u16(vidx, 1); |
| 155 | const int bidx = vget_lane_u16(vidx, 2); |
| 156 | v = vset_lane_u16(m_fromLinear[ridx], v, 2); |
| 157 | v = vset_lane_u16(m_fromLinear[gidx], v, 1); |
| 158 | v = vset_lane_u16(m_fromLinear[bidx], v, 0); |
| 159 | uint8x8_t v8 = vrshrn_n_u16(vcombine_u16(v, v), 8); |
| 160 | return vget_lane_u32(vreinterpret_u32_u8(v8), 0); |
| 161 | #else |
| 162 | uint a = rgb64.alpha(); |
| 163 | uint r = rgb64.red(); |
| 164 | uint g = rgb64.green(); |
| 165 | uint b = rgb64.blue(); |
| 166 | a = a - (a >> 8); |
| 167 | r = r - (r >> 8); |
| 168 | g = g - (g >> 8); |
| 169 | b = b - (b >> 8); |
| 170 | a = (a + 0x80) >> 8; |
| 171 | r = (m_fromLinear[r >> ShiftDown] + 0x80) >> 8; |
| 172 | g = (m_fromLinear[g >> ShiftDown] + 0x80) >> 8; |
| 173 | b = (m_fromLinear[b >> ShiftDown] + 0x80) >> 8; |
| 174 | return (a << 24) | (r << 16) | (g << 8) | b; |
| 175 | #endif |
| 176 | } |
| 177 | |
| 178 | QRgb fromLinear(QRgb rgb32) const |
| 179 | { |
| 180 | return convertWithTable(rgb32, table: m_fromLinear.get()); |
| 181 | } |
| 182 | |
| 183 | QRgba64 fromLinear(QRgba64 rgb64) const |
| 184 | { |
| 185 | return convertWithTable(rgb64, table: m_fromLinear.get()); |
| 186 | } |
| 187 | |
| 188 | int u8FromLinearF32(float f) const |
| 189 | { |
| 190 | ushort v = m_fromLinear[(int)(f * Resolution + 0.5f)]; |
| 191 | return (v + 0x80) >> 8; |
| 192 | } |
| 193 | int u16FromLinearF32(float f) const |
| 194 | { |
| 195 | ushort v = m_fromLinear[(int)(f * Resolution + 0.5f)]; |
| 196 | return v + (v >> 8); |
| 197 | } |
| 198 | float fromLinear(float f) const |
| 199 | { |
| 200 | ushort v = m_fromLinear[(int)(f * Resolution + 0.5f)]; |
| 201 | return v * (1.0f / (255*256)); |
| 202 | } |
| 203 | |
| 204 | // We translate to 0-65280 (255*256) instead to 0-65535 to make simple |
| 205 | // shifting an accurate conversion. |
| 206 | // We translate from 0->Resolution (4080 = 255*16) for the same speed up, |
| 207 | // and to keep the tables small enough to fit in most inner caches. |
| 208 | std::unique_ptr<ushort[]> m_toLinear; // [0->Resolution] -> [0-65280] |
| 209 | std::unique_ptr<ushort[]> m_fromLinear; // [0->Resolution] -> [0-65280] |
| 210 | ushort m_unclampedToLinear = Resolution; |
| 211 | |
| 212 | private: |
| 213 | QColorTrcLut() = default; |
| 214 | |
| 215 | static std::shared_ptr<QColorTrcLut> create(); |
| 216 | |
| 217 | Q_ALWAYS_INLINE static QRgb convertWithTable(QRgb rgb32, const ushort *table) |
| 218 | { |
| 219 | const int r = (table[qRed(rgb: rgb32) << ShiftUp] + 0x80) >> 8; |
| 220 | const int g = (table[qGreen(rgb: rgb32) << ShiftUp] + 0x80) >> 8; |
| 221 | const int b = (table[qBlue(rgb: rgb32) << ShiftUp] + 0x80) >> 8; |
| 222 | return (rgb32 & 0xff000000) | (r << 16) | (g << 8) | b; |
| 223 | } |
| 224 | Q_ALWAYS_INLINE static QRgba64 convertWithTable(QRgba64 rgb64, const ushort *table) |
| 225 | { |
| 226 | #if defined(__SSE2__) |
| 227 | __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64)); |
| 228 | v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
| 229 | const __m128i vidx = _mm_srli_epi16(a: v, count: ShiftDown); |
| 230 | const int ridx = _mm_extract_epi16(vidx, 2); |
| 231 | const int gidx = _mm_extract_epi16(vidx, 1); |
| 232 | const int bidx = _mm_extract_epi16(vidx, 0); |
| 233 | v = _mm_insert_epi16(v, table[ridx], 2); |
| 234 | v = _mm_insert_epi16(v, table[gidx], 1); |
| 235 | v = _mm_insert_epi16(v, table[bidx], 0); |
| 236 | v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8)); |
| 237 | QRgba64 rgba64; |
| 238 | _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v); |
| 239 | return rgba64; |
| 240 | #elif defined(__ARM_NEON__) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
| 241 | uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64)); |
| 242 | v = vsub_u16(v, vshr_n_u16(v, 8)); |
| 243 | const uint16x4_t vidx = vshr_n_u16(v, ShiftDown); |
| 244 | const int ridx = vget_lane_u16(vidx, 2); |
| 245 | const int gidx = vget_lane_u16(vidx, 1); |
| 246 | const int bidx = vget_lane_u16(vidx, 0); |
| 247 | v = vset_lane_u16(table[ridx], v, 2); |
| 248 | v = vset_lane_u16(table[gidx], v, 1); |
| 249 | v = vset_lane_u16(table[bidx], v, 0); |
| 250 | v = vadd_u16(v, vshr_n_u16(v, 8)); |
| 251 | return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v), 0)); |
| 252 | #else |
| 253 | ushort r = rgb64.red(); |
| 254 | ushort g = rgb64.green(); |
| 255 | ushort b = rgb64.blue(); |
| 256 | r = r - (r >> 8); |
| 257 | g = g - (g >> 8); |
| 258 | b = b - (b >> 8); |
| 259 | r = table[r >> ShiftDown]; |
| 260 | g = table[g >> ShiftDown]; |
| 261 | b = table[b >> ShiftDown]; |
| 262 | r = r + (r >> 8); |
| 263 | g = g + (g >> 8); |
| 264 | b = b + (b >> 8); |
| 265 | return QRgba64::fromRgba64(r, g, b, rgb64.alpha()); |
| 266 | #endif |
| 267 | } |
| 268 | }; |
| 269 | |
| 270 | QT_END_NAMESPACE |
| 271 | |
| 272 | #endif // QCOLORTRCLUT_P_H |
| 273 | |