| 1 | // Copyright (C) 2022 The Qt Company Ltd. |
| 2 | // Copyright (C) 2016 by Southwest Research Institute (R) |
| 3 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 4 | |
| 5 | #ifndef QFLOAT16_H |
| 6 | #define QFLOAT16_H |
| 7 | |
| 8 | #include <QtCore/qcompare.h> |
| 9 | #include <QtCore/qglobal.h> |
| 10 | #include <QtCore/qhashfunctions.h> |
| 11 | #include <QtCore/qmath.h> |
| 12 | #include <QtCore/qnamespace.h> |
| 13 | #include <QtCore/qtconfigmacros.h> |
| 14 | #include <QtCore/qtypes.h> |
| 15 | |
| 16 | #include <limits> |
| 17 | #include <string.h> |
| 18 | #include <type_traits> |
| 19 | |
| 20 | #if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__AVX2__) && !defined(__F16C__) |
| 21 | // All processors that support AVX2 do support F16C too, so we could enable the |
| 22 | // feature unconditionally if __AVX2__ is defined. However, all currently |
| 23 | // supported compilers except Microsoft's are able to define __F16C__ on their |
| 24 | // own when the user enables the feature, so we'll trust them. |
| 25 | # if defined(Q_CC_MSVC) && !defined(Q_CC_CLANG) |
| 26 | # define __F16C__ 1 |
| 27 | # endif |
| 28 | #endif |
| 29 | |
| 30 | #if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__) |
| 31 | #include <immintrin.h> |
| 32 | #endif |
| 33 | |
| 34 | QT_BEGIN_NAMESPACE |
| 35 | |
| 36 | #if 0 |
| 37 | #pragma qt_class(QFloat16) |
| 38 | #pragma qt_no_master_include |
| 39 | #endif |
| 40 | |
| 41 | #ifndef QT_NO_DATASTREAM |
| 42 | class QDataStream; |
| 43 | #endif |
| 44 | class QTextStream; |
| 45 | |
| 46 | class qfloat16 |
| 47 | { |
| 48 | struct Wrap |
| 49 | { |
| 50 | // To let our private constructor work, without other code seeing |
| 51 | // ambiguity when constructing from int, double &c. |
| 52 | quint16 b16; |
| 53 | constexpr inline explicit Wrap(int value) : b16(quint16(value)) {} |
| 54 | }; |
| 55 | |
| 56 | template <typename T> |
| 57 | using if_type_is_integral = std::enable_if_t<std::is_integral_v<std::remove_reference_t<T>>, bool>; |
| 58 | |
| 59 | public: |
| 60 | using NativeType = QtPrivate::NativeFloat16Type; |
| 61 | |
| 62 | static constexpr bool IsNative = QFLOAT16_IS_NATIVE; |
| 63 | using NearestFloat = std::conditional_t<IsNative, NativeType, float>; |
| 64 | |
| 65 | constexpr inline qfloat16() noexcept : b16(0) {} |
| 66 | explicit qfloat16(Qt::Initialization) noexcept { } |
| 67 | |
| 68 | #if QFLOAT16_IS_NATIVE |
| 69 | constexpr inline qfloat16(NativeType f) : nf(f) {} |
| 70 | constexpr operator NativeType() const noexcept { return nf; } |
| 71 | #else |
| 72 | inline qfloat16(float f) noexcept; |
| 73 | inline operator float() const noexcept; |
| 74 | #endif |
| 75 | template <typename T, typename = std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, NearestFloat>>> |
| 76 | constexpr explicit qfloat16(T value) noexcept : qfloat16(NearestFloat(value)) {} |
| 77 | |
| 78 | // Support for qIs{Inf,NaN,Finite}: |
| 79 | bool isInf() const noexcept { return (b16 & 0x7fff) == 0x7c00; } |
| 80 | bool isNaN() const noexcept { return (b16 & 0x7fff) > 0x7c00; } |
| 81 | bool isFinite() const noexcept { return (b16 & 0x7fff) < 0x7c00; } |
| 82 | Q_CORE_EXPORT int fpClassify() const noexcept; |
| 83 | // Can't specialize std::copysign() for qfloat16 |
| 84 | qfloat16 copySign(qfloat16 sign) const noexcept |
| 85 | { return qfloat16(Wrap((sign.b16 & 0x8000) | (b16 & 0x7fff))); } |
| 86 | // Support for std::numeric_limits<qfloat16> |
| 87 | |
| 88 | #ifdef __STDCPP_FLOAT16_T__ |
| 89 | private: |
| 90 | using Bounds = std::numeric_limits<NativeType>; |
| 91 | public: |
| 92 | static constexpr qfloat16 _limit_epsilon() noexcept { return Bounds::epsilon(); } |
| 93 | static constexpr qfloat16 _limit_min() noexcept { return Bounds::min(); } |
| 94 | static constexpr qfloat16 _limit_denorm_min() noexcept { return Bounds::denorm_min(); } |
| 95 | static constexpr qfloat16 _limit_max() noexcept { return Bounds::max(); } |
| 96 | static constexpr qfloat16 _limit_lowest() noexcept { return Bounds::lowest(); } |
| 97 | static constexpr qfloat16 _limit_infinity() noexcept { return Bounds::infinity(); } |
| 98 | static constexpr qfloat16 _limit_quiet_NaN() noexcept { return Bounds::quiet_NaN(); } |
| 99 | #if QT_CONFIG(signaling_nan) |
| 100 | static constexpr qfloat16 _limit_signaling_NaN() noexcept { return Bounds::signaling_NaN(); } |
| 101 | #endif |
| 102 | #else |
| 103 | static constexpr qfloat16 _limit_epsilon() noexcept { return qfloat16(Wrap(0x1400)); } |
| 104 | static constexpr qfloat16 _limit_min() noexcept { return qfloat16(Wrap(0x400)); } |
| 105 | static constexpr qfloat16 _limit_denorm_min() noexcept { return qfloat16(Wrap(1)); } |
| 106 | static constexpr qfloat16 _limit_max() noexcept { return qfloat16(Wrap(0x7bff)); } |
| 107 | static constexpr qfloat16 _limit_lowest() noexcept { return qfloat16(Wrap(0xfbff)); } |
| 108 | static constexpr qfloat16 _limit_infinity() noexcept { return qfloat16(Wrap(0x7c00)); } |
| 109 | static constexpr qfloat16 _limit_quiet_NaN() noexcept { return qfloat16(Wrap(0x7e00)); } |
| 110 | #if QT_CONFIG(signaling_nan) |
| 111 | static constexpr qfloat16 _limit_signaling_NaN() noexcept { return qfloat16(Wrap(0x7d00)); } |
| 112 | #endif |
| 113 | #endif |
| 114 | inline constexpr bool isNormal() const noexcept |
| 115 | { return (b16 & 0x7c00) && (b16 & 0x7c00) != 0x7c00; } |
| 116 | private: |
| 117 | // ABI note: Qt 6's qfloat16 began with just a quint16 member so it ended |
| 118 | // up passed in general purpose registers in any function call taking |
| 119 | // qfloat16 by value (it has trivial copy constructors). This means the |
| 120 | // integer member in the anonymous union below must remain until a |
| 121 | // binary-incompatible version of Qt. If you remove it, on platforms using |
| 122 | // the System V ABI for C, the native type is passed in FP registers. |
| 123 | union { |
| 124 | quint16 b16; |
| 125 | #if QFLOAT16_IS_NATIVE |
| 126 | NativeType nf; |
| 127 | #endif |
| 128 | }; |
| 129 | constexpr inline explicit qfloat16(Wrap nibble) noexcept : |
| 130 | #if QFLOAT16_IS_NATIVE && defined(__cpp_lib_bit_cast) |
| 131 | nf(std::bit_cast<NativeType>(nibble.b16)) |
| 132 | #else |
| 133 | b16(nibble.b16) |
| 134 | #endif |
| 135 | {} |
| 136 | |
| 137 | Q_CORE_EXPORT static const quint32 mantissatable[]; |
| 138 | Q_CORE_EXPORT static const quint32 exponenttable[]; |
| 139 | Q_CORE_EXPORT static const quint32 offsettable[]; |
| 140 | Q_CORE_EXPORT static const quint16 basetable[]; |
| 141 | Q_CORE_EXPORT static const quint16 shifttable[]; |
| 142 | Q_CORE_EXPORT static const quint32 roundtable[]; |
| 143 | |
| 144 | friend bool qIsNull(qfloat16 f) noexcept; |
| 145 | |
| 146 | friend inline qfloat16 operator-(qfloat16 a) noexcept |
| 147 | { |
| 148 | qfloat16 f; |
| 149 | f.b16 = a.b16 ^ quint16(0x8000); |
| 150 | return f; |
| 151 | } |
| 152 | |
| 153 | friend inline qfloat16 operator+(qfloat16 a, qfloat16 b) noexcept { return qfloat16(static_cast<NearestFloat>(a) + static_cast<NearestFloat>(b)); } |
| 154 | friend inline qfloat16 operator-(qfloat16 a, qfloat16 b) noexcept { return qfloat16(static_cast<NearestFloat>(a) - static_cast<NearestFloat>(b)); } |
| 155 | friend inline qfloat16 operator*(qfloat16 a, qfloat16 b) noexcept { return qfloat16(static_cast<NearestFloat>(a) * static_cast<NearestFloat>(b)); } |
| 156 | friend inline qfloat16 operator/(qfloat16 a, qfloat16 b) noexcept { return qfloat16(static_cast<NearestFloat>(a) / static_cast<NearestFloat>(b)); } |
| 157 | |
| 158 | friend size_t qHash(qfloat16 key, size_t seed = 0) noexcept |
| 159 | { return qHash(key: float(key), seed); } // 6.4 algorithm, so keep using it; ### Qt 7: fix QTBUG-116077 |
| 160 | |
| 161 | QT_WARNING_PUSH |
| 162 | QT_WARNING_DISABLE_GCC("-Wfloat-conversion" ) |
| 163 | |
| 164 | #define QF16_MAKE_ARITH_OP_FP(FP, OP) \ |
| 165 | friend inline FP operator OP(qfloat16 lhs, FP rhs) noexcept { return static_cast<FP>(lhs) OP rhs; } \ |
| 166 | friend inline FP operator OP(FP lhs, qfloat16 rhs) noexcept { return lhs OP static_cast<FP>(rhs); } |
| 167 | #define QF16_MAKE_ARITH_OP_EQ_FP(FP, OP_EQ, OP) \ |
| 168 | friend inline qfloat16& operator OP_EQ(qfloat16& lhs, FP rhs) noexcept \ |
| 169 | { lhs = qfloat16(NearestFloat(static_cast<FP>(lhs) OP rhs)); return lhs; } |
| 170 | #define QF16_MAKE_ARITH_OP(FP) \ |
| 171 | QF16_MAKE_ARITH_OP_FP(FP, +) \ |
| 172 | QF16_MAKE_ARITH_OP_FP(FP, -) \ |
| 173 | QF16_MAKE_ARITH_OP_FP(FP, *) \ |
| 174 | QF16_MAKE_ARITH_OP_FP(FP, /) \ |
| 175 | QF16_MAKE_ARITH_OP_EQ_FP(FP, +=, +) \ |
| 176 | QF16_MAKE_ARITH_OP_EQ_FP(FP, -=, -) \ |
| 177 | QF16_MAKE_ARITH_OP_EQ_FP(FP, *=, *) \ |
| 178 | QF16_MAKE_ARITH_OP_EQ_FP(FP, /=, /) |
| 179 | |
| 180 | QF16_MAKE_ARITH_OP(long double) |
| 181 | QF16_MAKE_ARITH_OP(double) |
| 182 | QF16_MAKE_ARITH_OP(float) |
| 183 | #if QFLOAT16_IS_NATIVE |
| 184 | QF16_MAKE_ARITH_OP(NativeType) |
| 185 | #endif |
| 186 | #undef QF16_MAKE_ARITH_OP |
| 187 | #undef QF16_MAKE_ARITH_OP_FP |
| 188 | |
| 189 | #define QF16_MAKE_ARITH_OP_INT(OP) \ |
| 190 | friend inline double operator OP(qfloat16 lhs, int rhs) noexcept { return static_cast<double>(lhs) OP rhs; } \ |
| 191 | friend inline double operator OP(int lhs, qfloat16 rhs) noexcept { return lhs OP static_cast<double>(rhs); } |
| 192 | |
| 193 | QF16_MAKE_ARITH_OP_INT(+) |
| 194 | QF16_MAKE_ARITH_OP_INT(-) |
| 195 | QF16_MAKE_ARITH_OP_INT(*) |
| 196 | QF16_MAKE_ARITH_OP_INT(/) |
| 197 | #undef QF16_MAKE_ARITH_OP_INT |
| 198 | |
| 199 | QT_WARNING_DISABLE_FLOAT_COMPARE |
| 200 | |
| 201 | #if QFLOAT16_IS_NATIVE |
| 202 | # define QF16_CONSTEXPR constexpr |
| 203 | # define QF16_PARTIALLY_ORDERED Q_DECLARE_PARTIALLY_ORDERED_LITERAL_TYPE |
| 204 | #else |
| 205 | # define QF16_CONSTEXPR |
| 206 | # define QF16_PARTIALLY_ORDERED Q_DECLARE_PARTIALLY_ORDERED |
| 207 | #endif |
| 208 | |
| 209 | friend QF16_CONSTEXPR bool comparesEqual(const qfloat16 &lhs, const qfloat16 &rhs) noexcept |
| 210 | { return static_cast<NearestFloat>(lhs) == static_cast<NearestFloat>(rhs); } |
| 211 | friend QF16_CONSTEXPR |
| 212 | Qt::partial_ordering compareThreeWay(const qfloat16 &lhs, const qfloat16 &rhs) noexcept |
| 213 | { return Qt::compareThreeWay(lhs: static_cast<NearestFloat>(lhs), rhs: static_cast<NearestFloat>(rhs)); } |
| 214 | QF16_PARTIALLY_ORDERED(qfloat16) |
| 215 | |
| 216 | #define QF16_MAKE_ORDER_OP_FP(FP) \ |
| 217 | friend QF16_CONSTEXPR bool comparesEqual(const qfloat16 &lhs, FP rhs) noexcept \ |
| 218 | { return static_cast<FP>(lhs) == rhs; } \ |
| 219 | friend QF16_CONSTEXPR \ |
| 220 | Qt::partial_ordering compareThreeWay(const qfloat16 &lhs, FP rhs) noexcept \ |
| 221 | { return Qt::compareThreeWay(static_cast<FP>(lhs), rhs); } \ |
| 222 | QF16_PARTIALLY_ORDERED(qfloat16, FP) |
| 223 | |
| 224 | QF16_MAKE_ORDER_OP_FP(long double) |
| 225 | QF16_MAKE_ORDER_OP_FP(double) |
| 226 | QF16_MAKE_ORDER_OP_FP(float) |
| 227 | #if QFLOAT16_IS_NATIVE |
| 228 | QF16_MAKE_ORDER_OP_FP(qfloat16::NativeType) |
| 229 | #endif |
| 230 | #undef QF16_MAKE_ORDER_OP_FP |
| 231 | |
| 232 | template <typename T, if_type_is_integral<T> = true> |
| 233 | friend QF16_CONSTEXPR bool comparesEqual(const qfloat16 &lhs, T rhs) noexcept |
| 234 | { return static_cast<NearestFloat>(lhs) == static_cast<NearestFloat>(rhs); } |
| 235 | template <typename T, if_type_is_integral<T> = true> |
| 236 | friend QF16_CONSTEXPR Qt::partial_ordering compareThreeWay(const qfloat16 &lhs, T rhs) noexcept |
| 237 | { return Qt::compareThreeWay(lhs: static_cast<NearestFloat>(lhs), rhs: static_cast<NearestFloat>(rhs)); } |
| 238 | |
| 239 | QF16_PARTIALLY_ORDERED(qfloat16, qint8) |
| 240 | QF16_PARTIALLY_ORDERED(qfloat16, quint8) |
| 241 | QF16_PARTIALLY_ORDERED(qfloat16, qint16) |
| 242 | QF16_PARTIALLY_ORDERED(qfloat16, quint16) |
| 243 | QF16_PARTIALLY_ORDERED(qfloat16, qint32) |
| 244 | QF16_PARTIALLY_ORDERED(qfloat16, quint32) |
| 245 | QF16_PARTIALLY_ORDERED(qfloat16, long) |
| 246 | QF16_PARTIALLY_ORDERED(qfloat16, unsigned long) |
| 247 | QF16_PARTIALLY_ORDERED(qfloat16, qint64) |
| 248 | QF16_PARTIALLY_ORDERED(qfloat16, quint64) |
| 249 | #ifdef QT_SUPPORTS_INT128 |
| 250 | QF16_PARTIALLY_ORDERED(qfloat16, qint128) |
| 251 | QF16_PARTIALLY_ORDERED(qfloat16, quint128) |
| 252 | #endif |
| 253 | |
| 254 | #undef QF16_PARTIALLY_ORDERED |
| 255 | #undef QF16_CONSTEXPR |
| 256 | |
| 257 | QT_WARNING_POP |
| 258 | |
| 259 | #ifndef QT_NO_DATASTREAM |
| 260 | friend Q_CORE_EXPORT QDataStream &operator<<(QDataStream &ds, qfloat16 f); |
| 261 | friend Q_CORE_EXPORT QDataStream &operator>>(QDataStream &ds, qfloat16 &f); |
| 262 | #endif |
| 263 | friend Q_CORE_EXPORT QTextStream &operator<<(QTextStream &ts, qfloat16 f); |
| 264 | friend Q_CORE_EXPORT QTextStream &operator>>(QTextStream &ts, qfloat16 &f); |
| 265 | }; |
| 266 | |
| 267 | Q_DECLARE_TYPEINFO(qfloat16, Q_PRIMITIVE_TYPE); |
| 268 | |
| 269 | Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *, const float *, qsizetype length) noexcept; |
| 270 | Q_CORE_EXPORT void qFloatFromFloat16(float *, const qfloat16 *, qsizetype length) noexcept; |
| 271 | |
| 272 | // Complement qnumeric.h: |
| 273 | [[nodiscard]] inline bool qIsInf(qfloat16 f) noexcept { return f.isInf(); } |
| 274 | [[nodiscard]] inline bool qIsNaN(qfloat16 f) noexcept { return f.isNaN(); } |
| 275 | [[nodiscard]] inline bool qIsFinite(qfloat16 f) noexcept { return f.isFinite(); } |
| 276 | [[nodiscard]] inline int qFpClassify(qfloat16 f) noexcept { return f.fpClassify(); } |
| 277 | // [[nodiscard]] quint32 qFloatDistance(qfloat16 a, qfloat16 b); |
| 278 | |
| 279 | [[nodiscard]] inline qfloat16 qSqrt(qfloat16 f) |
| 280 | { |
| 281 | #if defined(__cpp_lib_extended_float) && defined(__STDCPP_FLOAT16_T__) && 0 |
| 282 | // https://wg21.link/p1467 - disabled until tested |
| 283 | using namespace std; |
| 284 | return sqrt(f); |
| 285 | #elif QFLOAT16_IS_NATIVE && defined(__HAVE_FLOAT16) && __HAVE_FLOAT16 |
| 286 | // This C library (glibc) has sqrtf16(). |
| 287 | return sqrtf16(f); |
| 288 | #else |
| 289 | bool mathUpdatesErrno = true; |
| 290 | # if defined(__NO_MATH_ERRNO__) || defined(_M_FP_FAST) |
| 291 | mathUpdatesErrno = false; |
| 292 | # elif defined(math_errhandling) |
| 293 | mathUpdatesErrno = (math_errhandling & MATH_ERRNO); |
| 294 | # endif |
| 295 | |
| 296 | // We don't need to set errno to EDOM if (f >= 0 && f != -0 && !isnan(f)) |
| 297 | // (or if we don't care about errno in the first place). We can merge the |
| 298 | // NaN check with by negating and inverting: !(0 > f), and leaving zero to |
| 299 | // sqrtf(). |
| 300 | if (!mathUpdatesErrno || !(0 > f)) { |
| 301 | # if defined(__AVX512FP16__) |
| 302 | __m128h v = _mm_set_sh(f); |
| 303 | v = _mm_sqrt_sh(v, v); |
| 304 | return _mm_cvtsh_h(v); |
| 305 | # endif |
| 306 | } |
| 307 | |
| 308 | // WG14's N2601 does not provide a way to tell which types an |
| 309 | // implementation supports, so we assume it doesn't and fall back to FP32 |
| 310 | float f32 = float(f); |
| 311 | f32 = sqrtf(x: f32); |
| 312 | return qfloat16::NearestFloat(f32); |
| 313 | #endif |
| 314 | } |
| 315 | |
| 316 | // The remainder of these utility functions complement qglobal.h |
| 317 | [[nodiscard]] inline int qRound(qfloat16 d) noexcept |
| 318 | { return qRound(f: static_cast<float>(d)); } |
| 319 | |
| 320 | [[nodiscard]] inline qint64 qRound64(qfloat16 d) noexcept |
| 321 | { return qRound64(f: static_cast<float>(d)); } |
| 322 | |
| 323 | [[nodiscard]] inline bool qFuzzyCompare(qfloat16 p1, qfloat16 p2) noexcept |
| 324 | { |
| 325 | qfloat16::NearestFloat f1 = static_cast<qfloat16::NearestFloat>(p1); |
| 326 | qfloat16::NearestFloat f2 = static_cast<qfloat16::NearestFloat>(p2); |
| 327 | // The significand precision for IEEE754 half precision is |
| 328 | // 11 bits (10 explicitly stored), or approximately 3 decimal |
| 329 | // digits. In selecting the fuzzy comparison factor of 102.5f |
| 330 | // (that is, (2^10+1)/10) below, we effectively select a |
| 331 | // window of about 1 (least significant) decimal digit about |
| 332 | // which the two operands can vary and still return true. |
| 333 | return (qAbs(t: f1 - f2) * 102.5f <= qMin(a: qAbs(t: f1), b: qAbs(t: f2))); |
| 334 | } |
| 335 | |
| 336 | /*! |
| 337 | \internal |
| 338 | */ |
| 339 | [[nodiscard]] inline bool qFuzzyIsNull(qfloat16 f) noexcept |
| 340 | { |
| 341 | return qAbs(t: f) < 0.00976f; // 1/102.5 to 3 significant digits; see qFuzzyCompare() |
| 342 | } |
| 343 | |
| 344 | [[nodiscard]] inline bool qIsNull(qfloat16 f) noexcept |
| 345 | { |
| 346 | return (f.b16 & static_cast<quint16>(0x7fff)) == 0; |
| 347 | } |
| 348 | |
| 349 | inline int qIntCast(qfloat16 f) noexcept |
| 350 | { return int(static_cast<qfloat16::NearestFloat>(f)); } |
| 351 | |
| 352 | #if !defined(Q_QDOC) && !QFLOAT16_IS_NATIVE |
| 353 | QT_WARNING_PUSH |
| 354 | QT_WARNING_DISABLE_CLANG("-Wc99-extensions" ) |
| 355 | QT_WARNING_DISABLE_GCC("-Wold-style-cast" ) |
| 356 | inline qfloat16::qfloat16(float f) noexcept |
| 357 | { |
| 358 | #if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__) |
| 359 | __m128 packsingle = _mm_set_ss(f); |
| 360 | __m128i packhalf = _mm_cvtps_ph(packsingle, 0); |
| 361 | b16 = _mm_extract_epi16(packhalf, 0); |
| 362 | #elif defined (__ARM_FP16_FORMAT_IEEE) |
| 363 | __fp16 f16 = __fp16(f); |
| 364 | memcpy(&b16, &f16, sizeof(quint16)); |
| 365 | #else |
| 366 | quint32 u; |
| 367 | memcpy(dest: &u, src: &f, n: sizeof(quint32)); |
| 368 | const quint32 signAndExp = u >> 23; |
| 369 | const quint16 base = basetable[signAndExp]; |
| 370 | const quint16 shift = shifttable[signAndExp]; |
| 371 | const quint32 round = roundtable[signAndExp]; |
| 372 | quint32 mantissa = (u & 0x007fffff); |
| 373 | if ((signAndExp & 0xff) == 0xff) { |
| 374 | if (mantissa) // keep nan from truncating to inf |
| 375 | mantissa = qMax(a: 1U << shift, b: mantissa); |
| 376 | } else { |
| 377 | // Round half to even. First round up by adding one in the most |
| 378 | // significant bit we'll be discarding: |
| 379 | mantissa += round; |
| 380 | // If the last bit we'll be keeping is now set, but all later bits are |
| 381 | // clear, we were at half and shouldn't have rounded up; decrement will |
| 382 | // clear this last kept bit. Any later set bit hides the decrement. |
| 383 | if (mantissa & (1 << shift)) |
| 384 | --mantissa; |
| 385 | } |
| 386 | |
| 387 | // We use add as the mantissa may overflow causing |
| 388 | // the exp part to shift exactly one value. |
| 389 | b16 = quint16(base + (mantissa >> shift)); |
| 390 | #endif |
| 391 | } |
| 392 | QT_WARNING_POP |
| 393 | |
| 394 | inline qfloat16::operator float() const noexcept |
| 395 | { |
| 396 | #if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__) |
| 397 | __m128i packhalf = _mm_cvtsi32_si128(b16); |
| 398 | __m128 packsingle = _mm_cvtph_ps(packhalf); |
| 399 | return _mm_cvtss_f32(packsingle); |
| 400 | #elif defined (__ARM_FP16_FORMAT_IEEE) |
| 401 | __fp16 f16; |
| 402 | memcpy(&f16, &b16, sizeof(quint16)); |
| 403 | return float(f16); |
| 404 | #else |
| 405 | quint32 u = mantissatable[offsettable[b16 >> 10] + (b16 & 0x3ff)] |
| 406 | + exponenttable[b16 >> 10]; |
| 407 | float f; |
| 408 | memcpy(dest: &f, src: &u, n: sizeof(quint32)); |
| 409 | return f; |
| 410 | #endif |
| 411 | } |
| 412 | #endif // Q_QDOC and non-native |
| 413 | |
| 414 | /* |
| 415 | qHypot compatibility; see ../kernel/qmath.h |
| 416 | */ |
| 417 | namespace QtPrivate { |
| 418 | template <> struct QHypotType<qfloat16, qfloat16> |
| 419 | { |
| 420 | using type = qfloat16; |
| 421 | }; |
| 422 | template <typename R> struct QHypotType<R, qfloat16> |
| 423 | { |
| 424 | using type = std::conditional_t<std::is_floating_point_v<R>, R, double>; |
| 425 | }; |
| 426 | template <typename R> struct QHypotType<qfloat16, R> : QHypotType<R, qfloat16> |
| 427 | { |
| 428 | }; |
| 429 | } |
| 430 | |
| 431 | // Avoid passing qfloat16 to std::hypot(), while ensuring return types |
| 432 | // consistent with the above: |
| 433 | inline auto qHypot(qfloat16 x, qfloat16 y) |
| 434 | { |
| 435 | #if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__) || QFLOAT16_IS_NATIVE |
| 436 | return QtPrivate::QHypotHelper<qfloat16>(x).add(y).result(); |
| 437 | #else |
| 438 | return qfloat16(qHypot(x: float(x), y: float(y))); |
| 439 | #endif |
| 440 | } |
| 441 | |
| 442 | // in ../kernel/qmath.h |
| 443 | template<typename F, typename ...Fs> auto qHypot(F first, Fs... rest); |
| 444 | |
| 445 | template <typename T> typename QtPrivate::QHypotType<T, qfloat16>::type |
| 446 | qHypot(T x, qfloat16 y) |
| 447 | { |
| 448 | if constexpr (std::is_floating_point_v<T>) |
| 449 | return qHypot(x, float(y)); |
| 450 | else |
| 451 | return qHypot(x: qfloat16(x), y); |
| 452 | } |
| 453 | template <typename T> auto qHypot(qfloat16 x, T y) |
| 454 | { |
| 455 | return qHypot(y, x); |
| 456 | } |
| 457 | |
| 458 | #if defined(__cpp_lib_hypot) && __cpp_lib_hypot >= 201603L // Expected to be true |
| 459 | // If any are not qfloat16, convert each qfloat16 to float: |
| 460 | /* (The following splits the some-but-not-all-qfloat16 cases up, using |
| 461 | (X|Y|Z)&~(X&Y&Z) = X ? ~(Y&Z) : Y|Z = X&~(Y&Z) | ~X&Y | ~X&~Y&Z, |
| 462 | into non-overlapping cases, to avoid ambiguity.) */ |
| 463 | template <typename Ty, typename Tz, |
| 464 | typename std::enable_if< |
| 465 | // Ty, Tz aren't both qfloat16: |
| 466 | !(std::is_same_v<qfloat16, Ty> && std::is_same_v<qfloat16, Tz>), int>::type = 0> |
| 467 | auto qHypot(qfloat16 x, Ty y, Tz z) { return qHypot(qfloat16::NearestFloat(x), y, z); } |
| 468 | template <typename Tx, typename Tz, |
| 469 | typename std::enable_if< |
| 470 | // Tx isn't qfloat16: |
| 471 | !std::is_same_v<qfloat16, Tx>, int>::type = 0> |
| 472 | auto qHypot(Tx x, qfloat16 y, Tz z) { return qHypot(x, qfloat16::NearestFloat(y), z); } |
| 473 | template <typename Tx, typename Ty, |
| 474 | typename std::enable_if< |
| 475 | // Neither Tx nor Ty is qfloat16: |
| 476 | !std::is_same_v<qfloat16, Tx> && !std::is_same_v<qfloat16, Ty>, int>::type = 0> |
| 477 | auto qHypot(Tx x, Ty y, qfloat16 z) { return qHypot(x, y, qfloat16::NearestFloat(z)); } |
| 478 | |
| 479 | // If all are qfloat16, stay with qfloat16 (albeit via float, if no native support): |
| 480 | inline auto qHypot(qfloat16 x, qfloat16 y, qfloat16 z) |
| 481 | { |
| 482 | #if (defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)) || QFLOAT16_IS_NATIVE |
| 483 | return QtPrivate::QHypotHelper<qfloat16>(x).add(y).add(z).result(); |
| 484 | #else |
| 485 | return qfloat16(qHypot(x: float(x), y: float(y), z: float(z))); |
| 486 | #endif |
| 487 | } |
| 488 | #endif // 3-arg std::hypot() is available |
| 489 | |
| 490 | QT_END_NAMESPACE |
| 491 | |
| 492 | namespace std { |
| 493 | template<> |
| 494 | class numeric_limits<QT_PREPEND_NAMESPACE(qfloat16)> : public numeric_limits<float> |
| 495 | { |
| 496 | public: |
| 497 | /* |
| 498 | Treat quint16 b16 as if it were: |
| 499 | uint S: 1; // b16 >> 15 (sign); can be set for zero |
| 500 | uint E: 5; // (b16 >> 10) & 0x1f (offset exponent) |
| 501 | uint M: 10; // b16 & 0x3ff (adjusted mantissa) |
| 502 | |
| 503 | for E == 0: magnitude is M / 2.^{24} |
| 504 | for 0 < E < 31: magnitude is (1. + M / 2.^{10}) * 2.^{E - 15) |
| 505 | for E == 31: not finite |
| 506 | */ |
| 507 | static constexpr int digits = 11; |
| 508 | static constexpr int min_exponent = -13; |
| 509 | static constexpr int max_exponent = 16; |
| 510 | |
| 511 | static constexpr int digits10 = 3; |
| 512 | static constexpr int max_digits10 = 5; |
| 513 | static constexpr int min_exponent10 = -4; |
| 514 | static constexpr int max_exponent10 = 4; |
| 515 | |
| 516 | static constexpr QT_PREPEND_NAMESPACE(qfloat16) epsilon() |
| 517 | { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_epsilon(); } |
| 518 | static constexpr QT_PREPEND_NAMESPACE(qfloat16) (min)() |
| 519 | { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_min(); } |
| 520 | static constexpr QT_PREPEND_NAMESPACE(qfloat16) denorm_min() |
| 521 | { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_denorm_min(); } |
| 522 | static constexpr QT_PREPEND_NAMESPACE(qfloat16) (max)() |
| 523 | { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_max(); } |
| 524 | static constexpr QT_PREPEND_NAMESPACE(qfloat16) lowest() |
| 525 | { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_lowest(); } |
| 526 | static constexpr QT_PREPEND_NAMESPACE(qfloat16) infinity() |
| 527 | { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_infinity(); } |
| 528 | static constexpr QT_PREPEND_NAMESPACE(qfloat16) quiet_NaN() |
| 529 | { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_quiet_NaN(); } |
| 530 | #if QT_CONFIG(signaling_nan) |
| 531 | static constexpr QT_PREPEND_NAMESPACE(qfloat16) signaling_NaN() |
| 532 | { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_signaling_NaN(); } |
| 533 | #else |
| 534 | static constexpr bool has_signaling_NaN = false; |
| 535 | #endif |
| 536 | }; |
| 537 | |
| 538 | template<> class numeric_limits<const QT_PREPEND_NAMESPACE(qfloat16)> |
| 539 | : public numeric_limits<QT_PREPEND_NAMESPACE(qfloat16)> {}; |
| 540 | template<> class numeric_limits<volatile QT_PREPEND_NAMESPACE(qfloat16)> |
| 541 | : public numeric_limits<QT_PREPEND_NAMESPACE(qfloat16)> {}; |
| 542 | template<> class numeric_limits<const volatile QT_PREPEND_NAMESPACE(qfloat16)> |
| 543 | : public numeric_limits<QT_PREPEND_NAMESPACE(qfloat16)> {}; |
| 544 | |
| 545 | // Adding overloads to std isn't allowed, so we can't extend this to support |
| 546 | // for fpclassify(), isnormal() &c. (which, furthermore, are macros on MinGW). |
| 547 | } // namespace std |
| 548 | |
| 549 | #endif // QFLOAT16_H |
| 550 | |