Doxygen
Toggle main menu visibility
Loading...
Searching...
No Matches
utf8.h
Go to the documentation of this file.
1
/******************************************************************************
2
*
3
* Copyright (C) 1997-2021 by Dimitri van Heesch.
4
*
5
* Permission to use, copy, modify, and distribute this software and its
6
* documentation under the terms of the GNU General Public License is hereby
7
* granted. No representations are made about the suitability of this software
8
* for any purpose. It is provided "as is" without express or implied warranty.
9
* See the GNU General Public License for more details.
10
*
11
* Documents produced by Doxygen are derivative works derived from the
12
* input used in their production; they are not affected by this license.
13
*
14
*/
15
16
#ifndef UTF8_H
17
#define UTF8_H
18
19
#include <cstdint>
20
#include <string>
21
22
class
TextStream
;
23
24
/** @file
25
* @brief Various UTF8 related helper functions.
26
*
27
* See https://en.wikipedia.org/wiki/UTF-8 for details on UTF8 encoding.
28
*/
29
30
31
/** Converts the input string into a lower case version, also taking into account
32
* non-ASCII characters that has a lower case variant.
33
*/
34
std::string
convertUTF8ToLower
(
const
std::string &input);
35
36
/** Converts the input string into a upper case version, also taking into account
37
* non-ASCII characters that has a upper case variant.
38
*/
39
std::string
convertUTF8ToUpper
(
const
std::string &input);
40
41
/** Returns the UTF8 character found at byte position pos in the input string.
42
* The resulting string can be a multi byte sequence.
43
*/
44
std::string
getUTF8CharAt
(
const
std::string &input,
size_t
pos);
45
46
/** Returns the 32bit Unicode value matching character at byte position pos in
47
* the UTF8 encoded input.
48
*/
49
uint32_t
getUnicodeForUTF8CharAt
(
const
std::string &input,
size_t
pos);
50
51
/** Returns the number of bytes making up a single UTF8 character given the first byte
52
* in the sequence.
53
*/
54
uint8_t
getUTF8CharNumBytes
(
char
firstByte);
55
56
/** Writes the UTF8 character pointed to by s to stream t and returns a pointer
57
* to the next character.
58
*/
59
const
char
*
writeUTF8Char
(
TextStream
&t,
const
char
*s);
60
61
/** Returns true iff the last character in input is a multibyte character. */
62
bool
lastUTF8CharIsMultibyte
(
const
std::string &input);
63
64
/** Returns true iff the input string at byte position pos holds an upper case character. */
65
bool
isUTF8CharUpperCase
(
const
std::string &input,
size_t
pos);
66
67
/** Check if the first character pointed at by input is a non-breakable whitespace character.
68
* Returns the byte size of the character if there is match or 0 if not.
69
*/
70
int
isUTF8NonBreakableSpace
(
const
char
*input);
71
72
/** Check if the given Unicode character represents a punctuation character */
73
bool
isUTF8PunctuationCharacter
(uint32_t unicode);
74
75
#endif
TextStream
Text streaming class that buffers data.
Definition
textstream.h:36
convertUTF8ToUpper
std::string convertUTF8ToUpper(const std::string &input)
Converts the input string into a upper case version, also taking into account non-ASCII characters th...
Definition
utf8.cpp:192
isUTF8CharUpperCase
bool isUTF8CharUpperCase(const std::string &input, size_t pos)
Returns true iff the input string at byte position pos holds an upper case character.
Definition
utf8.cpp:218
isUTF8PunctuationCharacter
bool isUTF8PunctuationCharacter(uint32_t unicode)
Check if the given Unicode character represents a punctuation character.
Definition
utf8.cpp:234
getUnicodeForUTF8CharAt
uint32_t getUnicodeForUTF8CharAt(const std::string &input, size_t pos)
Returns the 32bit Unicode value matching character at byte position pos in the UTF8 encoded input.
Definition
utf8.cpp:135
lastUTF8CharIsMultibyte
bool lastUTF8CharIsMultibyte(const std::string &input)
Returns true iff the last character in input is a multibyte character.
Definition
utf8.cpp:212
isUTF8NonBreakableSpace
int isUTF8NonBreakableSpace(const char *input)
Check if the first character pointed at by input is a non-breakable whitespace character.
Definition
utf8.cpp:228
convertUTF8ToLower
std::string convertUTF8ToLower(const std::string &input)
Converts the input string into a lower case version, also taking into account non-ASCII characters th...
Definition
utf8.cpp:187
getUTF8CharAt
std::string getUTF8CharAt(const std::string &input, size_t pos)
Returns the UTF8 character found at byte position pos in the input string.
Definition
utf8.cpp:127
getUTF8CharNumBytes
uint8_t getUTF8CharNumBytes(char firstByte)
Returns the number of bytes making up a single UTF8 character given the first byte in the sequence.
Definition
utf8.cpp:23
writeUTF8Char
const char * writeUTF8Char(TextStream &t, const char *s)
Writes the UTF8 character pointed to by s to stream t and returns a pointer to the next character.
Definition
utf8.cpp:197
src
utf8.h
Generated by
1.17.0