LibreOffice
LibreOffice 24.8 SDK C/C++ API Reference
Loading...
Searching...
No Matches
character.hxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20/*
21 * This file is part of LibreOffice published API.
22 */
23
24#ifndef INCLUDED_RTL_CHARACTER_HXX
25#define INCLUDED_RTL_CHARACTER_HXX
26
27#include "sal/config.h"
28
29#include <cassert>
30#include <cstddef>
31
32#include "sal/types.h"
33
34#if defined LIBO_INTERNAL_ONLY
35#include <type_traits>
36#endif
37
38namespace rtl
39{
48inline SAL_CONSTEXPR bool isUnicodeCodePoint(sal_uInt32 code) { return code <= 0x10FFFF; }
49
58inline SAL_CONSTEXPR bool isAscii(sal_uInt32 code)
59{
60 assert(isUnicodeCodePoint(code));
61 return code <= 0x7F;
62}
63
64#if defined LIBO_INTERNAL_ONLY
65bool isAscii(char) = delete;
66bool isAscii(signed char) = delete;
67template <typename T>
68inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
69isAscii(T code)
70{
71 return isAscii(sal_uInt32(code));
72}
73#endif
74
84inline SAL_CONSTEXPR bool isAsciiLowerCase(sal_uInt32 code)
85{
86 assert(isUnicodeCodePoint(code));
87 return code >= 'a' && code <= 'z';
88}
89
90#if defined LIBO_INTERNAL_ONLY
91bool isAsciiLowerCase(char) = delete;
92bool isAsciiLowerCase(signed char) = delete;
93template <typename T>
94inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
95isAsciiLowerCase(T code)
96{
97 return isAsciiLowerCase(sal_uInt32(code));
98}
99#endif
100
110inline SAL_CONSTEXPR bool isAsciiUpperCase(sal_uInt32 code)
111{
112 assert(isUnicodeCodePoint(code));
113 return code >= 'A' && code <= 'Z';
114}
115
116#if defined LIBO_INTERNAL_ONLY
117bool isAsciiUpperCase(char) = delete;
118bool isAsciiUpperCase(signed char) = delete;
119template <typename T>
120inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
121isAsciiUpperCase(T code)
122{
123 return isAsciiUpperCase(sal_uInt32(code));
124}
125#endif
126
136inline SAL_CONSTEXPR bool isAsciiAlpha(sal_uInt32 code)
137{
138 assert(isUnicodeCodePoint(code));
139 return isAsciiLowerCase(code) || isAsciiUpperCase(code);
140}
141
142#if defined LIBO_INTERNAL_ONLY
143bool isAsciiAlpha(char) = delete;
144bool isAsciiAlpha(signed char) = delete;
145template <typename T>
146inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
147isAsciiAlpha(T code)
148{
149 return isAsciiAlpha(sal_uInt32(code));
150}
151#endif
152
162inline SAL_CONSTEXPR bool isAsciiDigit(sal_uInt32 code)
163{
164 assert(isUnicodeCodePoint(code));
165 return code >= '0' && code <= '9';
166}
167
168#if defined LIBO_INTERNAL_ONLY
169bool isAsciiDigit(char) = delete;
170bool isAsciiDigit(signed char) = delete;
171template <typename T>
172inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
173isAsciiDigit(T code)
174{
175 return isAsciiDigit(sal_uInt32(code));
176}
177#endif
178
188inline SAL_CONSTEXPR bool isAsciiAlphanumeric(sal_uInt32 code)
189{
190 assert(isUnicodeCodePoint(code));
191 return isAsciiDigit(code) || isAsciiAlpha(code);
192}
193
194#if defined LIBO_INTERNAL_ONLY
195bool isAsciiAlphanumeric(char) = delete;
196bool isAsciiAlphanumeric(signed char) = delete;
197template <typename T>
198inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
200{
201 return isAsciiAlphanumeric(sal_uInt32(code));
202}
203#endif
204
214inline SAL_CONSTEXPR bool isAsciiCanonicHexDigit(sal_uInt32 code)
215{
216 assert(isUnicodeCodePoint(code));
217 return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
218}
219
220#if defined LIBO_INTERNAL_ONLY
221bool isAsciiCanonicHexDigit(char) = delete;
222bool isAsciiCanonicHexDigit(signed char) = delete;
223template <typename T>
224inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
226{
227 return isAsciiCanonicHexDigit(sal_uInt32(code));
228}
229#endif
230
240inline SAL_CONSTEXPR bool isAsciiHexDigit(sal_uInt32 code)
241{
242 assert(isUnicodeCodePoint(code));
243 return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
244}
245
246#if defined LIBO_INTERNAL_ONLY
247bool isAsciiHexDigit(char) = delete;
248bool isAsciiHexDigit(signed char) = delete;
249template <typename T>
250inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
251isAsciiHexDigit(T code)
252{
253 return isAsciiHexDigit(sal_uInt32(code));
254}
255#endif
256
265inline SAL_CONSTEXPR bool isAsciiOctalDigit(sal_uInt32 code)
266{
267 assert(isUnicodeCodePoint(code));
268 return code >= '0' && code <= '7';
269}
270
271#if defined LIBO_INTERNAL_ONLY
272bool isAsciiOctalDigit(char) = delete;
273bool isAsciiOctalDigit(signed char) = delete;
274template <typename T>
275inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
276isAsciiOctalDigit(T code)
277{
278 return isAsciiOctalDigit(sal_uInt32(code));
279}
280#endif
281
291inline SAL_CONSTEXPR bool isAsciiWhiteSpace(sal_uInt32 code)
292{
293 assert(isUnicodeCodePoint(code));
294 return code == ' ' || code == '\f' || code == '\n' || code == '\r' || code == '\t'
295 || code == '\v';
296}
297
298#if defined LIBO_INTERNAL_ONLY
299bool isAsciiWhiteSpace(char) = delete;
300bool isAsciiWhiteSpace(signed char) = delete;
301template <typename T>
302inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
303isAsciiWhiteSpace(T code)
304{
305 return isAsciiWhiteSpace(sal_uInt32(code));
306}
307#endif
308
317inline SAL_CONSTEXPR sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
318{
319 assert(isUnicodeCodePoint(code));
320 return isAsciiLowerCase(code) ? code - 32 : code;
321}
322
323#if defined LIBO_INTERNAL_ONLY
324sal_uInt32 toAsciiUpperCase(char) = delete;
325sal_uInt32 toAsciiUpperCase(signed char) = delete;
326template <typename T>
327inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32),
328 sal_uInt32>
329toAsciiUpperCase(T code)
330{
331 return toAsciiUpperCase(sal_uInt32(code));
332}
333#endif
334
343inline SAL_CONSTEXPR sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
344{
345 assert(isUnicodeCodePoint(code));
346 return isAsciiUpperCase(code) ? code + 32 : code;
347}
348
349#if defined LIBO_INTERNAL_ONLY
350sal_uInt32 toAsciiLowerCase(char) = delete;
351sal_uInt32 toAsciiLowerCase(signed char) = delete;
352template <typename T>
353inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32),
354 sal_uInt32>
355toAsciiLowerCase(T code)
356{
357 return toAsciiLowerCase(sal_uInt32(code));
358}
359#endif
360
373inline SAL_CONSTEXPR sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
374{
375 assert(isUnicodeCodePoint(code1));
376 assert(isUnicodeCodePoint(code2));
377 return static_cast<sal_Int32>(toAsciiLowerCase(code1))
378 - static_cast<sal_Int32>(toAsciiLowerCase(code2));
379}
380
382namespace detail
383{
384sal_uInt32 const surrogatesHighFirst = 0xD800;
385sal_uInt32 const surrogatesHighLast = 0xDBFF;
386sal_uInt32 const surrogatesLowFirst = 0xDC00;
387sal_uInt32 const surrogatesLowLast = 0xDFFF;
388}
390
399inline SAL_CONSTEXPR bool isSurrogate(sal_uInt32 code)
400{
401 assert(isUnicodeCodePoint(code));
402 return code >= detail::surrogatesHighFirst && code <= detail::surrogatesLowLast;
403}
404
413inline SAL_CONSTEXPR bool isHighSurrogate(sal_uInt32 code)
414{
415 assert(isUnicodeCodePoint(code));
416 return code >= detail::surrogatesHighFirst && code <= detail::surrogatesHighLast;
417}
418
427inline SAL_CONSTEXPR bool isLowSurrogate(sal_uInt32 code)
428{
429 assert(isUnicodeCodePoint(code));
430 return code >= detail::surrogatesLowFirst && code <= detail::surrogatesLowLast;
431}
432
442{
443 assert(isUnicodeCodePoint(code));
444 assert(code >= 0x10000);
445 return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
446}
447
457{
458 assert(isUnicodeCodePoint(code));
459 assert(code >= 0x10000);
460 return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
461}
462
473inline SAL_CONSTEXPR sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
474{
475 assert(isHighSurrogate(high));
476 assert(isLowSurrogate(low));
477 return ((high - detail::surrogatesHighFirst) << 10) + (low - detail::surrogatesLowFirst)
478 + 0x10000;
479}
480
493inline SAL_CONSTEXPR std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode* output)
494{
495 assert(isUnicodeCodePoint(code));
496 assert(output != NULL);
497 if (code < 0x10000)
498 {
499 output[0] = code;
500 return 1;
501 }
502 else
503 {
504 output[0] = getHighSurrogate(code);
505 output[1] = getLowSurrogate(code);
506 return 2;
507 }
508}
509
518inline SAL_CONSTEXPR bool isUnicodeScalarValue(sal_uInt32 code)
519{
520 return isUnicodeCodePoint(code) && !isSurrogate(code);
521}
522}
523
524#endif
525
526/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define SAL_CONSTEXPR
C++11 "constexpr" feature.
Definition types.h:422
sal_uInt16 sal_Unicode
Definition types.h:123
Definition bootstrap.hxx:34
SAL_CONSTEXPR bool isUnicodeScalarValue(sal_uInt32 code)
Check for Unicode scalar value.
Definition character.hxx:518
SAL_CONSTEXPR bool isAsciiHexDigit(sal_uInt32 code)
Check for ASCII hexadecimal digit character.
Definition character.hxx:240
SAL_CONSTEXPR sal_Unicode getLowSurrogate(sal_uInt32 code)
Get low surrogate half of a non-BMP Unicode code point.
Definition character.hxx:456
SAL_CONSTEXPR bool isLowSurrogate(sal_uInt32 code)
Check for low surrogate.
Definition character.hxx:427
SAL_CONSTEXPR bool isUnicodeCodePoint(sal_uInt32 code)
Check for Unicode code point.
Definition character.hxx:48
SAL_CONSTEXPR bool isAsciiAlpha(sal_uInt32 code)
Check for ASCII alphabetic character.
Definition character.hxx:136
SAL_CONSTEXPR bool isAsciiOctalDigit(sal_uInt32 code)
Check for ASCII octal digit character.
Definition character.hxx:265
SAL_CONSTEXPR bool isAsciiLowerCase(sal_uInt32 code)
Check for ASCII lower case character.
Definition character.hxx:84
SAL_CONSTEXPR bool isAsciiWhiteSpace(sal_uInt32 code)
Check for ASCII white space character.
Definition character.hxx:291
SAL_CONSTEXPR bool isAsciiCanonicHexDigit(sal_uInt32 code)
Check for ASCII canonic hexadecimal digit character.
Definition character.hxx:214
SAL_CONSTEXPR std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode *output)
Split a Unicode code point into UTF-16 code units.
Definition character.hxx:493
SAL_CONSTEXPR sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
Compare two characters ignoring ASCII case.
Definition character.hxx:373
SAL_CONSTEXPR bool isAscii(sal_uInt32 code)
Check for ASCII character.
Definition character.hxx:58
SAL_CONSTEXPR sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
Combine surrogates to form a code point.
Definition character.hxx:473
SAL_CONSTEXPR sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
Convert a character, if ASCII, to upper case.
Definition character.hxx:317
SAL_CONSTEXPR bool isAsciiAlphanumeric(sal_uInt32 code)
Check for ASCII alphanumeric character.
Definition character.hxx:188
SAL_CONSTEXPR bool isAsciiUpperCase(sal_uInt32 code)
Check for ASCII upper case character.
Definition character.hxx:110
SAL_CONSTEXPR bool isAsciiDigit(sal_uInt32 code)
Check for ASCII digit character.
Definition character.hxx:162
SAL_CONSTEXPR bool isSurrogate(sal_uInt32 code)
Check for surrogate.
Definition character.hxx:399
SAL_CONSTEXPR bool isHighSurrogate(sal_uInt32 code)
Check for high surrogate.
Definition character.hxx:413
SAL_CONSTEXPR sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
Convert a character, if ASCII, to lower case.
Definition character.hxx:343
SAL_CONSTEXPR sal_Unicode getHighSurrogate(sal_uInt32 code)
Get high surrogate half of a non-BMP Unicode code point.
Definition character.hxx:441