| 1 | /* |
| 2 | * Copyright (C) 2006 George Staikos <[email protected]> |
| 3 | * Copyright (C) 2006 Alexey Proskuryakov <[email protected]> |
| 4 | * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
| 5 | * |
| 6 | * This library is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Library General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * This library is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Library General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Library General Public License |
| 17 | * along with this library; see the file COPYING.LIB. If not, write to |
| 18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 19 | * Boston, MA 02110-1301, USA. |
| 20 | * |
| 21 | */ |
| 22 | |
| 23 | #ifndef WTF_UNICODE_QT4_H |
| 24 | #define WTF_UNICODE_QT4_H |
| 25 | |
| 26 | #include <QtCore/qchar.h> |
| 27 | #include <QtCore/qstring.h> |
| 28 | |
| 29 | #include <config.h> |
| 30 | |
| 31 | #include <stdint.h> |
| 32 | |
| 33 | // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h |
| 34 | #if defined(Q_OS_WIN) || COMPILER(WINSCW) || COMPILER(RVCT) |
| 35 | typedef wchar_t UChar; |
| 36 | #else |
| 37 | typedef uint16_t UChar; |
| 38 | #endif |
| 39 | typedef int32_t UChar32; |
| 40 | |
| 41 | // some defines from ICU |
| 42 | |
| 43 | #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) |
| 44 | #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) |
| 45 | #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) |
| 46 | #define U16_GET_SUPPLEMENTARY(lead, trail) \ |
| 47 | (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) |
| 48 | |
| 49 | #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) |
| 50 | #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) |
| 51 | |
| 52 | #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) |
| 53 | #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) |
| 54 | #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) |
| 55 | #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) |
| 56 | |
| 57 | #define U16_NEXT(s, i, length, c) { \ |
| 58 | (c)=(s)[(i)++]; \ |
| 59 | if(U16_IS_LEAD(c)) { \ |
| 60 | uint16_t __c2; \ |
| 61 | if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ |
| 62 | ++(i); \ |
| 63 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ |
| 64 | } \ |
| 65 | } \ |
| 66 | } |
| 67 | |
| 68 | #define U16_PREV(s, start, i, c) { \ |
| 69 | (c)=(s)[--(i)]; \ |
| 70 | if(U16_IS_TRAIL(c)) { \ |
| 71 | uint16_t __c2; \ |
| 72 | if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ |
| 73 | --(i); \ |
| 74 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ |
| 75 | } \ |
| 76 | } \ |
| 77 | } |
| 78 | |
| 79 | #define U_MASK(x) ((uint32_t)1<<(x)) |
| 80 | |
| 81 | namespace WTF { |
| 82 | namespace Unicode { |
| 83 | |
| 84 | QT_USE_NAMESPACE |
| 85 | |
| 86 | enum Direction { |
| 87 | LeftToRight = QChar::DirL, |
| 88 | RightToLeft = QChar::DirR, |
| 89 | EuropeanNumber = QChar::DirEN, |
| 90 | EuropeanNumberSeparator = QChar::DirES, |
| 91 | EuropeanNumberTerminator = QChar::DirET, |
| 92 | ArabicNumber = QChar::DirAN, |
| 93 | CommonNumberSeparator = QChar::DirCS, |
| 94 | BlockSeparator = QChar::DirB, |
| 95 | SegmentSeparator = QChar::DirS, |
| 96 | WhiteSpaceNeutral = QChar::DirWS, |
| 97 | OtherNeutral = QChar::DirON, |
| 98 | LeftToRightEmbedding = QChar::DirLRE, |
| 99 | LeftToRightOverride = QChar::DirLRO, |
| 100 | RightToLeftArabic = QChar::DirAL, |
| 101 | RightToLeftEmbedding = QChar::DirRLE, |
| 102 | RightToLeftOverride = QChar::DirRLO, |
| 103 | PopDirectionalFormat = QChar::DirPDF, |
| 104 | NonSpacingMark = QChar::DirNSM, |
| 105 | BoundaryNeutral = QChar::DirBN |
| 106 | }; |
| 107 | |
| 108 | enum DecompositionType { |
| 109 | DecompositionNone = QChar::NoDecomposition, |
| 110 | DecompositionCanonical = QChar::Canonical, |
| 111 | DecompositionCompat = QChar::Compat, |
| 112 | DecompositionCircle = QChar::Circle, |
| 113 | DecompositionFinal = QChar::Final, |
| 114 | DecompositionFont = QChar::Font, |
| 115 | DecompositionFraction = QChar::Fraction, |
| 116 | DecompositionInitial = QChar::Initial, |
| 117 | DecompositionIsolated = QChar::Isolated, |
| 118 | DecompositionMedial = QChar::Medial, |
| 119 | DecompositionNarrow = QChar::Narrow, |
| 120 | DecompositionNoBreak = QChar::NoBreak, |
| 121 | DecompositionSmall = QChar::Small, |
| 122 | DecompositionSquare = QChar::Square, |
| 123 | DecompositionSub = QChar::Sub, |
| 124 | DecompositionSuper = QChar::Super, |
| 125 | DecompositionVertical = QChar::Vertical, |
| 126 | DecompositionWide = QChar::Wide |
| 127 | }; |
| 128 | |
| 129 | enum CharCategory { |
| 130 | Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing), |
| 131 | Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining), |
| 132 | Mark_Enclosing = U_MASK(QChar::Mark_Enclosing), |
| 133 | Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit), |
| 134 | Number_Letter = U_MASK(QChar::Number_Letter), |
| 135 | Number_Other = U_MASK(QChar::Number_Other), |
| 136 | Separator_Space = U_MASK(QChar::Separator_Space), |
| 137 | Separator_Line = U_MASK(QChar::Separator_Line), |
| 138 | Separator_Paragraph = U_MASK(QChar::Separator_Paragraph), |
| 139 | Other_Control = U_MASK(QChar::Other_Control), |
| 140 | Other_Format = U_MASK(QChar::Other_Format), |
| 141 | Other_Surrogate = U_MASK(QChar::Other_Surrogate), |
| 142 | Other_PrivateUse = U_MASK(QChar::Other_PrivateUse), |
| 143 | Other_NotAssigned = U_MASK(QChar::Other_NotAssigned), |
| 144 | Letter_Uppercase = U_MASK(QChar::Letter_Uppercase), |
| 145 | Letter_Lowercase = U_MASK(QChar::Letter_Lowercase), |
| 146 | Letter_Titlecase = U_MASK(QChar::Letter_Titlecase), |
| 147 | Letter_Modifier = U_MASK(QChar::Letter_Modifier), |
| 148 | Letter_Other = U_MASK(QChar::Letter_Other), |
| 149 | Punctuation_Connector = U_MASK(QChar::Punctuation_Connector), |
| 150 | Punctuation_Dash = U_MASK(QChar::Punctuation_Dash), |
| 151 | Punctuation_Open = U_MASK(QChar::Punctuation_Open), |
| 152 | Punctuation_Close = U_MASK(QChar::Punctuation_Close), |
| 153 | Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote), |
| 154 | Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote), |
| 155 | Punctuation_Other = U_MASK(QChar::Punctuation_Other), |
| 156 | Symbol_Math = U_MASK(QChar::Symbol_Math), |
| 157 | Symbol_Currency = U_MASK(QChar::Symbol_Currency), |
| 158 | Symbol_Modifier = U_MASK(QChar::Symbol_Modifier), |
| 159 | Symbol_Other = U_MASK(QChar::Symbol_Other) |
| 160 | }; |
| 161 | |
| 162 | |
| 163 | // FIXME: handle surrogates correctly in all methods |
| 164 | |
| 165 | inline UChar32 toLower(UChar32 ch) |
| 166 | { |
| 167 | return QChar::toLower(ucs4: ch); |
| 168 | } |
| 169 | |
| 170 | inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
| 171 | { |
| 172 | QString s = QString::fromRawData(reinterpret_cast<const QChar *>(src), size: srcLength); |
| 173 | |
| 174 | s = s.toLower(); |
| 175 | |
| 176 | *error = resultLength < s.size(); |
| 177 | |
| 178 | if (!*error && result) { |
| 179 | const ushort *p = reinterpret_cast<const ushort *>(s.constData()); |
| 180 | ushort *pp = reinterpret_cast<ushort *>(result); |
| 181 | memcpy(dest: pp, src: p, n: s.size() * sizeof(ushort)); |
| 182 | |
| 183 | if (resultLength > s.size()) |
| 184 | pp[s.size()] = 0; |
| 185 | } |
| 186 | |
| 187 | return s.size(); |
| 188 | } |
| 189 | |
| 190 | inline UChar32 toUpper(UChar32 ch) |
| 191 | { |
| 192 | return QChar::toUpper(ucs4: ch); |
| 193 | } |
| 194 | |
| 195 | inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
| 196 | { |
| 197 | QString s = QString::fromRawData(reinterpret_cast<const QChar *>(src), size: srcLength); |
| 198 | |
| 199 | s = s.toUpper(); |
| 200 | |
| 201 | *error = resultLength < s.size(); |
| 202 | |
| 203 | if (!*error && result) { |
| 204 | const ushort *p = reinterpret_cast<const ushort *>(s.constData()); |
| 205 | ushort *pp = reinterpret_cast<ushort *>(result); |
| 206 | memcpy(dest: pp, src: p, n: s.size() * sizeof(ushort)); |
| 207 | |
| 208 | if (resultLength > s.size()) |
| 209 | pp[s.size()] = 0; |
| 210 | } |
| 211 | |
| 212 | return s.size(); |
| 213 | } |
| 214 | |
| 215 | inline UChar32 toTitleCase(UChar32 c) |
| 216 | { |
| 217 | return QChar::toTitleCase(ucs4: c); |
| 218 | } |
| 219 | |
| 220 | inline UChar32 foldCase(UChar32 c) |
| 221 | { |
| 222 | return QChar::toCaseFolded(ucs4: c); |
| 223 | } |
| 224 | |
| 225 | inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
| 226 | { |
| 227 | QString s = QString::fromRawData(reinterpret_cast<const QChar *>(src), size: srcLength); |
| 228 | |
| 229 | s = s.toCaseFolded(); |
| 230 | |
| 231 | *error = resultLength < s.size(); |
| 232 | |
| 233 | if (!*error && result) { |
| 234 | const ushort *p = reinterpret_cast<const ushort *>(s.constData()); |
| 235 | ushort *pp = reinterpret_cast<ushort *>(result); |
| 236 | memcpy(dest: pp, src: p, n: s.size() * sizeof(ushort)); |
| 237 | |
| 238 | if (resultLength > s.size()) |
| 239 | pp[s.size()] = 0; |
| 240 | } |
| 241 | |
| 242 | return s.size(); |
| 243 | } |
| 244 | |
| 245 | inline bool isArabicChar(UChar32 c) |
| 246 | { |
| 247 | return c >= 0x0600 && c <= 0x06FF; |
| 248 | } |
| 249 | |
| 250 | inline bool isPrintableChar(UChar32 c) |
| 251 | { |
| 252 | return QChar::isPrint(ucs4: c); |
| 253 | } |
| 254 | |
| 255 | inline bool isSeparatorSpace(UChar32 c) |
| 256 | { |
| 257 | return QChar::category(ucs4: c) == QChar::Separator_Space; |
| 258 | } |
| 259 | |
| 260 | inline bool isPunct(UChar32 c) |
| 261 | { |
| 262 | return QChar::isPunct(ucs4: c); |
| 263 | } |
| 264 | |
| 265 | inline bool isLower(UChar32 c) |
| 266 | { |
| 267 | return QChar::isLower(ucs4: c); |
| 268 | } |
| 269 | |
| 270 | inline bool hasLineBreakingPropertyComplexContext(UChar32) |
| 271 | { |
| 272 | // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context). |
| 273 | return false; |
| 274 | } |
| 275 | |
| 276 | inline UChar32 mirroredChar(UChar32 c) |
| 277 | { |
| 278 | return QChar::mirroredChar(ucs4: c); |
| 279 | } |
| 280 | |
| 281 | inline uint8_t combiningClass(UChar32 c) |
| 282 | { |
| 283 | return QChar::combiningClass(ucs4: c); |
| 284 | } |
| 285 | |
| 286 | inline DecompositionType decompositionType(UChar32 c) |
| 287 | { |
| 288 | return (DecompositionType)QChar::decompositionTag(ucs4: c); |
| 289 | } |
| 290 | |
| 291 | inline int umemcasecmp(const UChar* a, const UChar* b, int len) |
| 292 | { |
| 293 | // handle surrogates correctly |
| 294 | for (int i = 0; i < len; ++i) { |
| 295 | uint c1 = QChar::toCaseFolded(ucs4: ushort(a[i])); |
| 296 | uint c2 = QChar::toCaseFolded(ucs4: ushort(b[i])); |
| 297 | if (c1 != c2) |
| 298 | return c1 - c2; |
| 299 | } |
| 300 | return 0; |
| 301 | } |
| 302 | |
| 303 | inline Direction direction(UChar32 c) |
| 304 | { |
| 305 | return (Direction)QChar::direction(ucs4: c); |
| 306 | } |
| 307 | |
| 308 | inline CharCategory category(UChar32 c) |
| 309 | { |
| 310 | return (CharCategory) U_MASK(QChar::category(c)); |
| 311 | } |
| 312 | |
| 313 | } } |
| 314 | |
| 315 | #endif // WTF_UNICODE_QT4_H |
| 316 | |