ICU4C
#include <parseerr.h>
#include <ptypes.h>
#include <putil.h>
#include <stringoptions.h>
#include <ubrk.h>
#include <uchar.h>
#include <ucol.h>
#include <ucpmap.h>
#include <udisplaycontext.h>
#include <uenum.h>
#include <uldnames.h>
#include <uloc.h>
#include <ulocdata.h>
#include <umachine.h>
#include <unorm2.h>
#include <urep.h>
#include <uscript.h>
#include <ustring.h>
#include <utext.h>
#include <utf.h>
#include <utf16.h>
#include <utf8.h>
#include <utrans.h>
#include <utypes.h>
#include <uversion.h>
Summary
Typedefs |
|
---|---|
OldUChar
|
OldUCharuint16_t
Default ICU 58 definition of UChar. |
UBidiPairedBracketType
|
typedef Bidi Paired Bracket Type constants. |
UBlockCode
|
typedefenum UBlockCode
|
UBool
|
typedefint8_t
The ICU boolean type, a signed-byte integer. |
UBreakIteratorType
|
typedefenum UBreakIteratorType
The possible types of text boundaries. |
UCPMap
|
typedefstruct UCPMap
Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values. |
UCPMapValueFilter(const void *context, uint32_t value)
|
typedefuint32_t U_CALLCONV
Callback function type: Modifies a map value. |
UChar
|
UCharchar16_t
The base type for UTF-16 code units and pointers. |
UChar32
|
typedefint32_t
Define UChar32 as a type for single Unicode code points. |
UCharCategory
|
typedefenum UCharCategory
Data for enumerated Unicode general category types. |
UCharDirection
|
typedefenum UCharDirection
This specifies the language directional property of a character set. |
UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
|
typedef Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c |
UCharNameChoice
|
typedefenum UCharNameChoice
Selector constants for u_charName(). |
UColAttribute
|
typedefenum UColAttribute
Attributes that collation service understands. |
UColAttributeValue
|
typedefenum UColAttributeValue
Enum containing attribute values for controlling collation behavior. |
UColBoundMode
|
typedefenum UColBoundMode
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum. |
UColReorderCode
|
typedefenum UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes. |
UCollationResult
|
typedefenum UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method. |
UCollationStrength
|
typedef Base letter represents a primary difference. |
UCollator
|
typedefstruct UCollator
structure representing a collator object instance |
UDate
|
typedefdouble
Date and Time data type. |
UDecompositionType
|
typedefenum UDecompositionType
Decomposition Type constants. |
UDisplayContext
|
typedefenum UDisplayContext
|
UDisplayContextType
|
typedefenum UDisplayContextType
|
UEastAsianWidth
|
typedefenum UEastAsianWidth
East Asian Width constants. |
UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
|
typedef Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name. |
UEnumeration
|
typedefstruct UEnumeration
structure representing an enumeration object instance |
UErrorCode
|
typedefenum UErrorCode
Standard ICU4C error code type, a substitute for exceptions. |
UGraphemeClusterBreak
|
typedef Grapheme Cluster Break constants. |
UHangulSyllableType
|
typedefenum UHangulSyllableType
Hangul Syllable Type constants. |
UIdentifierStatus
|
typedefenum UIdentifierStatus
Identifier Status constants. |
UIdentifierType
|
typedefenum UIdentifierType
Identifier Type constants. |
UIndicPositionalCategory
|
typedef Indic Positional Category constants. |
UIndicSyllabicCategory
|
typedef Indic Syllabic Category constants. |
UJoiningGroup
|
typedefenum UJoiningGroup
Joining Group constants. |
UJoiningType
|
typedefenum UJoiningType
Joining Type constants. |
ULineBreak
|
typedefenum ULineBreak
Line Break constants. |
ULineBreakTag
|
typedefenum ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus(). |
ULocAvailableType
|
typedefenum ULocAvailableType
Types for uloc_getAvailableByType and uloc_countAvailableByType. |
ULocaleData
|
typedefstruct ULocaleData
A locale data object. |
ULocaleDisplayNames
|
typedefstruct ULocaleDisplayNames
C typedef for struct ULocaleDisplayNames. |
UNormalizationCheckResult
|
typedef Result values for normalization quick check functions. |
UNormalizer2
|
typedefstruct UNormalizer2
C typedef for struct UNormalizer2. |
UNumericType
|
typedefenum UNumericType
Numeric Type constants. |
UParseError
|
typedefstruct UParseError
A UParseError struct is used to returned detailed information about parsing errors. |
UProperty
|
typedefenum UProperty
Selection constants for Unicode properties. |
UPropertyNameChoice
|
typedefenum UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName(). |
UReplaceable
|
typedefvoid *
An opaque replaceable text object. |
UReplaceableCallbacks
|
typedefstruct UReplaceableCallbacks
A set of function pointers that transliterators use to manipulate a UReplaceable. |
UScriptCode
|
typedefenum UScriptCode
Constants for ISO 15924 script codes. |
UScriptUsage
|
typedefenum UScriptUsage
Script usage constants. |
USentenceBreak
|
typedefenum USentenceBreak
Sentence Break constants. |
USentenceBreakTag
|
typedefenum USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus(). |
UText
|
typedefstruct UText
C typedef for struct UText. |
UTransDirection
|
typedefenum UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator. |
UTransPosition
|
typedefstruct UTransPosition
Position structure for utrans_transIncremental() incremental transliteration. |
UTransliterator
|
typedefvoid *
An opaque transliterator for use in C. |
UVersionInfo[U_MAX_VERSION_LENGTH]
|
typedefuint8_t
The binary form of a version on ICU APIs is an array of 4 uint8_t. |
UVerticalOrientation
|
typedefenum UVerticalOrientation
Vertical Orientation constants. |
UWordBreak
|
typedefenum UWordBreak
Enum constants for the word break tags returned by getRuleStatus(). |
UWordBreakValues
|
typedefenum UWordBreakValues
Word Break constants. |
Variables |
|
---|---|
context
|
U_CDECL_BEGIN typedef void *
|
Functions |
|
---|---|
UChar(U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset
|
U_CDECL_BEGIN typedef
Callback function for u_unescapeAt() that returns a character of the source text given an offset and a context pointer.
|
u_charAge(UChar32 c, UVersionInfo versionArray)
|
U_CAPI void U_EXPORT2
Get the "age" of the code point.
|
u_charDigitValue(UChar32 c)
|
U_CAPI int32_t U_EXPORT2
Returns the decimal digit value of a decimal digit character.
|
u_charDirection(UChar32 c)
|
U_CAPIUCharDirection U_EXPORT2
Returns the bidirectional category value for the code point, which is used in the Unicode bidirectional algorithm (UAX #9 http://www.unicode.org/reports/tr9/).
|
u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
|
Find a Unicode character by its name and return its code point value.
|
u_charMirror(UChar32 c)
|
Maps the specified character to a "mirror-image" character.
|
u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Retrieve the name of a Unicode character.
|
u_charType(UChar32 c)
|
U_CAPI int8_t U_EXPORT2
Returns the general category value for the code point.
|
u_countChar32(const UChar *s, int32_t length)
|
U_CAPI int32_t U_EXPORT2
Count Unicode code points in the length UChar code units of the string.
|
u_digit(UChar32 ch, int8_t radix)
|
U_CAPI int32_t U_EXPORT2
Returns the decimal digit value of the code point in the specified radix.
|
u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
|
U_CAPI void U_EXPORT2
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.
|
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
|
U_CAPI void U_EXPORT2
Enumerate efficiently all code points with their Unicode general categories.
|
u_errorName(UErrorCode code)
|
U_CAPI const char *U_EXPORT2
Return a string for a UErrorCode value.
|
u_foldCase(UChar32 c, uint32_t options)
|
The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
|
u_forDigit(int32_t digit, int8_t radix)
|
Determines the character representation for a specific digit in the specified radix.
|
u_getBidiPairedBracket(UChar32 c)
|
Maps the specified character to its paired bracket character.
|
u_getCombiningClass(UChar32 c)
|
U_CAPI uint8_t U_EXPORT2
Returns the combining class of the code point as specified in UnicodeData.txt.
|
u_getIntPropertyMaxValue(UProperty which)
|
U_CAPI int32_t U_EXPORT2
Get the maximum value for an enumerated/integer/binary Unicode property.
|
u_getIntPropertyMinValue(UProperty which)
|
U_CAPI int32_t U_EXPORT2
Get the minimum value for an enumerated/integer/binary Unicode property.
|
u_getIntPropertyValue(UChar32 c, UProperty which)
|
U_CAPI int32_t U_EXPORT2
Get the property value for an enumerated or integer Unicode property for a code point.
|
u_getNumericValue(UChar32 c)
|
U_CAPI double U_EXPORT2
Get the numeric value for a Unicode code point as defined in the Unicode Character Database.
|
u_getPropertyEnum(const char *alias)
|
Return the UProperty enum for a given property name, as specified in the Unicode database file PropertyAliases.txt.
|
u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
|
U_CAPI const char *U_EXPORT2
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases.txt.
|
u_getPropertyValueEnum(UProperty property, const char *alias)
|
U_CAPI int32_t U_EXPORT2
Return the property value integer for a given value name, as specified in the Unicode database file PropertyValueAliases.txt.
|
u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
|
U_CAPI const char *U_EXPORT2
Return the Unicode name for a given property value, as given in the Unicode database file PropertyValueAliases.txt.
|
u_getUnicodeVersion(UVersionInfo versionArray)
|
U_CAPI void U_EXPORT2
Gets the Unicode version information.
|
u_getVersion(UVersionInfo versionArray)
|
U_CAPI void U_EXPORT2
Gets the ICU release version.
|
u_hasBinaryProperty(UChar32 c, UProperty which)
|
Check a binary Unicode property for a code point.
|
u_isIDIgnorable(UChar32 c)
|
Determines if the specified character should be regarded as an ignorable character in an identifier, according to Java.
|
u_isIDPart(UChar32 c)
|
Determines if the specified character is permissible as a non-initial character of an identifier according to UAX #31 Unicode Identifier and Pattern Syntax.
|
u_isIDStart(UChar32 c)
|
Determines if the specified character is permissible as the first character in an identifier according to UAX #31 Unicode Identifier and Pattern Syntax.
|
u_isISOControl(UChar32 c)
|
Determines whether the specified code point is an ISO control code.
|
u_isJavaIDPart(UChar32 c)
|
Determines if the specified character is permissible in a Java identifier.
|
u_isJavaIDStart(UChar32 c)
|
Determines if the specified character is permissible as the first character in a Java identifier.
|
u_isJavaSpaceChar(UChar32 c)
|
Determine if the specified code point is a space character according to Java.
|
u_isMirrored(UChar32 c)
|
Determines whether the code point has the Bidi_Mirrored property.
|
u_isUAlphabetic(UChar32 c)
|
Check if a code point has the Alphabetic Unicode property.
|
u_isULowercase(UChar32 c)
|
Check if a code point has the Lowercase Unicode property.
|
u_isUUppercase(UChar32 c)
|
Check if a code point has the Uppercase Unicode property.
|
u_isUWhiteSpace(UChar32 c)
|
Check if a code point has the White_Space Unicode property.
|
u_isWhitespace(UChar32 c)
|
Determines if the specified code point is a whitespace character according to Java/ICU.
|
u_isalnum(UChar32 c)
|
Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.
|
u_isalpha(UChar32 c)
|
Determines whether the specified code point is a letter character.
|
u_isbase(UChar32 c)
|
Non-standard: Determines whether the specified code point is a base character.
|
u_isblank(UChar32 c)
|
Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.
|
u_iscntrl(UChar32 c)
|
Determines whether the specified code point is a control character (as defined by this function).
|
u_isdefined(UChar32 c)
|
Determines whether the specified code point is "defined", which usually means that it is assigned a character.
|
u_isdigit(UChar32 c)
|
Determines whether the specified code point is a digit character according to Java.
|
u_isgraph(UChar32 c)
|
Determines whether the specified code point is a "graphic" character (printable, excluding spaces).
|
u_islower(UChar32 c)
|
Determines whether the specified code point has the general category "Ll" (lowercase letter).
|
u_isprint(UChar32 c)
|
Determines whether the specified code point is a printable character.
|
u_ispunct(UChar32 c)
|
Determines whether the specified code point is a punctuation character.
|
u_isspace(UChar32 c)
|
Determines if the specified character is a space character or not.
|
u_istitle(UChar32 c)
|
Determines whether the specified code point is a titlecase letter.
|
u_isupper(UChar32 c)
|
Determines whether the specified code point has the general category "Lu" (uppercase letter).
|
u_isxdigit(UChar32 c)
|
Determines whether the specified code point is a hexadecimal digit.
|
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options)
|
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
|
u_memchr(const UChar *s, UChar c, int32_t count)
|
Find the first occurrence of a BMP code point in a string.
|
u_memchr32(const UChar *s, UChar32 c, int32_t count)
|
Find the first occurrence of a code point in a string.
|
u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count)
|
U_CAPI int32_t U_EXPORT2
Compare the first
count UChars of each buffer. |
u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
|
u_memcpy(UChar *dest, const UChar *src, int32_t count)
|
Synonym for memcpy(), but with UChars only.
|
u_memmove(UChar *dest, const UChar *src, int32_t count)
|
Synonym for memmove(), but with UChars only.
|
u_memrchr(const UChar *s, UChar c, int32_t count)
|
Find the last occurrence of a BMP code point in a string.
|
u_memrchr32(const UChar *s, UChar32 c, int32_t count)
|
Find the last occurrence of a code point in a string.
|
u_memset(UChar *dest, UChar c, int32_t count)
|
Initialize
count characters of dest to c . |
u_strCaseCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
|
u_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool codePointOrder)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings (binary order).
|
u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
|
Find the first occurrence of a substring in a string.
|
u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
|
Find the last occurrence of a substring in a string.
|
u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Case-folds the characters in a string.
|
u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode)
|
Convert a UTF-32 string to UTF-16.
|
u_strFromUTF32WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
|
Convert a UTF-32 string to UTF-16.
|
u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
|
Convert a UTF-8 string to UTF-16.
|
u_strFromUTF8Lenient(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
|
Convert a UTF-8 string to UTF-16.
|
u_strFromUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
|
Convert a UTF-8 string to UTF-16.
|
u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number)
|
Check if the string contains more Unicode code points than a certain number.
|
u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Lowercase the characters in a string.
|
u_strToTitle(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Titlecase a string.
|
u_strToUTF32(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
|
Convert a UTF-16 string to UTF-32.
|
u_strToUTF32WithSub(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
|
Convert a UTF-16 string to UTF-32.
|
u_strToUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
|
U_CAPI char *U_EXPORT2
Convert a UTF-16 string to UTF-8.
|
u_strToUTF8WithSub(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
|
U_CAPI char *U_EXPORT2
Convert a UTF-16 string to UTF-8.
|
u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
|
U_CDECL_ENDU_CAPI int32_t U_EXPORT2
Uppercase the characters in a string.
|
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options)
|
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
|
u_strcat(UChar *dst, const UChar *src)
|
Concatenate two ustrings.
|
u_strchr(const UChar *s, UChar c)
|
Find the first occurrence of a BMP code point in a string.
|
u_strchr32(const UChar *s, UChar32 c)
|
Find the first occurrence of a code point in a string.
|
u_strcmp(const UChar *s1, const UChar *s2)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings for bitwise equality (code unit order).
|
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
|
u_strcpy(UChar *dst, const UChar *src)
|
Copy a ustring.
|
u_strcspn(const UChar *string, const UChar *matchSet)
|
U_CAPI int32_t U_EXPORT2
Returns the number of consecutive characters in
string , beginning with the first, that do not occur somewhere in matchSet . |
u_strlen(const UChar *s)
|
U_CAPI int32_t U_EXPORT2
Determine the length of an array of UChar.
|
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options)
|
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
|
u_strncat(UChar *dst, const UChar *src, int32_t n)
|
Concatenate two ustrings.
|
u_strncmp(const UChar *ucs1, const UChar *ucs2, int32_t n)
|
U_CAPI int32_t U_EXPORT2
Compare two ustrings for bitwise equality.
|
u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n)
|
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
|
u_strncpy(UChar *dst, const UChar *src, int32_t n)
|
Copy a ustring.
|
u_strpbrk(const UChar *string, const UChar *matchSet)
|
Locates the first occurrence in the string
string of any of the characters in the string matchSet . |
u_strrchr(const UChar *s, UChar c)
|
Find the last occurrence of a BMP code point in a string.
|
u_strrchr32(const UChar *s, UChar32 c)
|
Find the last occurrence of a code point in a string.
|
u_strrstr(const UChar *s, const UChar *substring)
|
Find the last occurrence of a substring in a string.
|
u_strspn(const UChar *string, const UChar *matchSet)
|
U_CAPI int32_t U_EXPORT2
Returns the number of consecutive characters in
string , beginning with the first, that occur somewhere in matchSet . |
u_strstr(const UChar *s, const UChar *substring)
|
Find the first occurrence of a substring in a string.
|
u_strtok_r(UChar *src, const UChar *delim, UChar **saveState)
|
The string tokenizer API allows an application to break a string into tokens.
|
u_tolower(UChar32 c)
|
The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
|
u_totitle(UChar32 c)
|
The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
|
u_toupper(UChar32 c)
|
The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
|
u_versionToString(const UVersionInfo versionArray, char *versionString)
|
U_CAPI void U_EXPORT2
Write a string with dotted-decimal version information according to the input UVersionInfo.
|
ubrk_clone(const UBreakIterator *bi, UErrorCode *status)
|
U_CAPIUBreakIterator *U_EXPORT2
Thread safe cloning operation.
|
ubrk_close(UBreakIterator *bi)
|
U_CAPI void U_EXPORT2
Close a UBreakIterator.
|
ubrk_countAvailable(void)
|
U_CAPI int32_t U_EXPORT2
Determine how many locales have text breaking information available.
|
ubrk_current(const UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Determine the most recently-returned text boundary.
|
ubrk_first(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Set the iterator position to zero, the start of the text being scanned.
|
ubrk_following(UBreakIterator *bi, int32_t offset)
|
U_CAPI int32_t U_EXPORT2
Advance the iterator to the first boundary following the specified offset.
|
ubrk_getAvailable(int32_t index)
|
U_CAPI const char *U_EXPORT2
Get a locale for which text breaking information is available.
|
ubrk_getRuleStatus(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Return the status from the break rule that determined the most recently returned break position.
|
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Get the statuses from the break rules that determined the most recently returned break position.
|
ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
|
Returns true if the specified position is a boundary position.
|
ubrk_last(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Set the iterator position to the index immediately beyond the last character in the text being scanned.
|
ubrk_next(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Advance the iterator to the boundary following the current boundary.
|
ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
|
U_CAPIUBreakIterator *U_EXPORT2
Open a new UBreakIterator for locating text boundaries for a specified locale.
|
ubrk_preceding(UBreakIterator *bi, int32_t offset)
|
U_CAPI int32_t U_EXPORT2
Set the iterator position to the first boundary preceding the specified offset.
|
ubrk_previous(UBreakIterator *bi)
|
U_CAPI int32_t U_EXPORT2
Set the iterator position to the boundary preceding the current boundary.
|
ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Sets an existing iterator to point to a new piece of text.
|
ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Sets an existing iterator to point to a new piece of text.
|
ucol_clone(const UCollator *coll, UErrorCode *status)
|
Thread safe cloning operation.
|
ucol_close(UCollator *coll)
|
U_CAPI void U_EXPORT2
Close a UCollator.
|
ucol_countAvailable(void)
|
U_CAPI int32_t U_EXPORT2
Determine how many locales have collation rules available.
|
ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status)
|
U_CAPIUColAttributeValue U_EXPORT2
Universal attribute getter.
|
ucol_getAvailable(int32_t localeIndex)
|
U_CAPI const char *U_EXPORT2
Get a locale for which collation rules are available.
|
ucol_getDisplayName(const char *objLoc, const char *dispLoc, UChar *result, int32_t resultLength, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Get the display name for a UCollator.
|
ucol_getEquivalentReorderCodes(int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Retrieves the reorder codes that are grouped with the given reorder code.
|
ucol_getFunctionalEquivalent(char *result, int32_t resultCapacity, const char *keyword, const char *locale, UBool *isAvailable, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Return the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.
|
ucol_getKeywordValues(const char *keyword, UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Given a keyword, create a string enumeration of all values for that keyword that are currently in use.
|
ucol_getKeywordValuesForLocale(const char *key, const char *locale, UBool commonlyUsed, UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Given a key and a locale, returns an array of string values in a preferred order that would make a difference.
|
ucol_getKeywords(UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Create a string enumerator of all possible keywords that are relevant to collation.
|
ucol_getMaxVariable(const UCollator *coll)
|
U_CAPIUColReorderCode U_EXPORT2
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
|
ucol_getReorderCodes(const UCollator *coll, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Retrieves the reordering codes for this collator.
|
ucol_getSortKey(const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength)
|
U_CAPI int32_t U_EXPORT2
Get a sort key for a string from a UCollator.
|
ucol_getStrength(const UCollator *coll)
|
U_CAPIUCollationStrength U_EXPORT2
Get the collation strength used in a UCollator.
|
ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, const uint8_t *src2, int32_t src2Length, uint8_t *dest, int32_t destCapacity)
|
U_CAPI int32_t U_EXPORT2
Merges two sort keys.
|
ucol_open(const char *loc, UErrorCode *status)
|
Open a UCollator for comparing strings.
|
ucol_openAvailableLocales(UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Create a string enumerator of all locales for which a valid collator may be opened.
|
ucol_openRules(const UChar *rules, int32_t rulesLength, UColAttributeValue normalizationMode, UCollationStrength strength, UParseError *parseError, UErrorCode *status)
|
Produce a UCollator instance according to the rules supplied.
|
ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Universal attribute setter.
|
ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode)
|
U_CAPI void U_EXPORT2
Sets the variable top to the top of the specified reordering group.
|
ucol_setReorderCodes(UCollator *coll, const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode *pErrorCode)
|
U_CAPI void U_EXPORT2
Sets the reordering codes for this collator.
|
ucol_setStrength(UCollator *coll, UCollationStrength strength)
|
U_CAPI void U_EXPORT2
Set the collation strength used in a UCollator.
|
ucol_strcoll(const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength)
|
U_CAPIUCollationResult U_EXPORT2
Compare two strings.
|
ucol_strcollUTF8(const UCollator *coll, const char *source, int32_t sourceLength, const char *target, int32_t targetLength, UErrorCode *status)
|
U_CAPIUCollationResult U_EXPORT2
Compare two strings in UTF-8.
|
uenum_close(UEnumeration *en)
|
U_CAPI void U_EXPORT2
Disposes of resources in use by the iterator.
|
uenum_count(UEnumeration *en, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Returns the number of elements that the iterator traverses.
|
uenum_next(UEnumeration *en, int32_t *resultLength, UErrorCode *status)
|
U_CAPI const char *U_EXPORT2
Returns the next element in the iterator's list.
|
uenum_openCharStringsEnumeration(const char *const strings[], int32_t count, UErrorCode *ec)
|
U_CAPIUEnumeration *U_EXPORT2
Given an array of const char* strings (invariant chars only), return a UEnumeration.
|
uenum_openUCharStringsEnumeration(const UChar *const strings[], int32_t count, UErrorCode *ec)
|
U_CAPIUEnumeration *U_EXPORT2
Given an array of const UChar* strings, return a UEnumeration.
|
uenum_reset(UEnumeration *en, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Resets the iterator to the current list of service IDs.
|
uenum_unext(UEnumeration *en, int32_t *resultLength, UErrorCode *status)
|
Returns the next element in the iterator's list.
|
uldn_close(ULocaleDisplayNames *ldn)
|
U_CAPI void U_EXPORT2
Closes a ULocaleDisplayNames instance obtained from uldn_open().
|
uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type, UErrorCode *pErrorCode)
|
U_CAPIUDisplayContext U_EXPORT2
Returns the UDisplayContext value for the specified UDisplayContextType.
|
uldn_getDialectHandling(const ULocaleDisplayNames *ldn)
|
U_CAPIUDialectHandling U_EXPORT2
Returns the dialect handling used in the display names.
|
uldn_getLocale(const ULocaleDisplayNames *ldn)
|
U_CAPI const char *U_EXPORT2
Returns the locale used to determine the display names.
|
uldn_keyDisplayName(const ULocaleDisplayNames *ldn, const char *key, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided locale key.
|
uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn, const char *key, const char *value, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided value (used with the provided key).
|
uldn_languageDisplayName(const ULocaleDisplayNames *ldn, const char *lang, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided language code.
|
uldn_localeDisplayName(const ULocaleDisplayNames *ldn, const char *locale, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided locale.
|
uldn_open(const char *locale, UDialectHandling dialectHandling, UErrorCode *pErrorCode)
|
U_CAPIULocaleDisplayNames *U_EXPORT2
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided dialectHandling.
|
uldn_openForContext(const char *locale, UDisplayContext *contexts, int32_t length, UErrorCode *pErrorCode)
|
U_CAPIULocaleDisplayNames *U_EXPORT2
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided UDisplayContext settings.
|
uldn_regionDisplayName(const ULocaleDisplayNames *ldn, const char *region, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided region code.
|
uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn, UScriptCode scriptCode, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided script code.
|
uldn_scriptDisplayName(const ULocaleDisplayNames *ldn, const char *script, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided script.
|
uldn_variantDisplayName(const ULocaleDisplayNames *ldn, const char *variant, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided variant.
|
uloc_acceptLanguage(char *result, int32_t resultAvailable, UAcceptResult *outResult, const char **acceptList, int32_t acceptListCount, UEnumeration *availableLocales, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Based on a list of available locales, determine an acceptable locale for the user.
|
uloc_addLikelySubtags(const char *localeID, char *maximizedLocaleID, int32_t maximizedLocaleIDCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Add the likely subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
|
uloc_canonicalize(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale.
|
uloc_countAvailable(void)
|
U_CAPI int32_t U_EXPORT2
Gets the size of the all available locale list.
|
uloc_forLanguageTag(const char *langtag, char *localeID, int32_t localeIDCapacity, int32_t *parsedLength, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Returns a locale ID for the specified BCP47 language tag string.
|
uloc_getAvailable(int32_t n)
|
U_CAPI const char *U_EXPORT2
Gets the specified locale from a list of available locales.
|
uloc_getBaseName(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale, like uloc_getName(), but without keywords.
|
uloc_getCharacterOrientation(const char *localeId, UErrorCode *status)
|
U_CAPIULayoutType U_EXPORT2
Get the layout character orientation for the specified locale.
|
uloc_getCountry(const char *localeID, char *country, int32_t countryCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the country code for the specified locale.
|
uloc_getDefault(void)
|
U_CAPI const char *U_EXPORT2
Gets ICU's default locale.
|
uloc_getDisplayCountry(const char *locale, const char *displayLocale, UChar *country, int32_t countryCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the country name suitable for display for the specified locale.
|
uloc_getDisplayKeyword(const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the keyword name suitable for display for the specified locale.
|
uloc_getDisplayKeywordValue(const char *locale, const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the value of the keyword suitable for display for the specified locale.
|
uloc_getDisplayLanguage(const char *locale, const char *displayLocale, UChar *language, int32_t languageCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the language name suitable for display for the specified locale.
|
uloc_getDisplayName(const char *localeID, const char *inLocaleID, UChar *result, int32_t maxResultSize, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the full name suitable for display for the specified locale.
|
uloc_getDisplayScript(const char *locale, const char *displayLocale, UChar *script, int32_t scriptCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the script name suitable for display for the specified locale.
|
uloc_getDisplayVariant(const char *locale, const char *displayLocale, UChar *variant, int32_t variantCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Gets the variant name suitable for display for the specified locale.
|
uloc_getISO3Country(const char *localeID)
|
U_CAPI const char *U_EXPORT2
Gets the ISO country code for the specified locale.
|
uloc_getISO3Language(const char *localeID)
|
U_CAPI const char *U_EXPORT2
Gets the ISO language code for the specified locale.
|
uloc_getISOCountries(void)
|
U_CAPI const char *const *U_EXPORT2
Gets a list of all available 2-letter country codes defined in ISO 639.
|
uloc_getISOLanguages(void)
|
U_CAPI const char *const *U_EXPORT2
Gets a list of all available 2-letter language codes defined in ISO 639, plus additional 3-letter codes determined to be useful for locale generation as defined by Unicode CLDR.
|
uloc_getKeywordValue(const char *localeID, const char *keywordName, char *buffer, int32_t bufferCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Get the value for a keyword.
|
uloc_getLanguage(const char *localeID, char *language, int32_t languageCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the language code for the specified locale.
|
uloc_getLineOrientation(const char *localeId, UErrorCode *status)
|
U_CAPIULayoutType U_EXPORT2
Get the layout line orientation for the specified locale.
|
uloc_getName(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale.
|
uloc_getScript(const char *localeID, char *script, int32_t scriptCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the script code for the specified locale.
|
uloc_getVariant(const char *localeID, char *variant, int32_t variantCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the variant code for the specified locale.
|
uloc_isRightToLeft(const char *locale)
|
Returns whether the locale's script is written right-to-left.
|
uloc_minimizeSubtags(const char *localeID, char *minimizedLocaleID, int32_t minimizedLocaleIDCapacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Minimize the subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
|
uloc_openKeywords(const char *localeID, UErrorCode *status)
|
U_CAPIUEnumeration *U_EXPORT2
Gets an enumeration of keywords for the specified locale.
|
uloc_setKeywordValue(const char *keywordName, const char *keywordValue, char *buffer, int32_t bufferCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Sets or removes the value of the specified keyword.
|
uloc_toLanguageTag(const char *localeID, char *langtag, int32_t langtagCapacity, UBool strict, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Returns a well-formed language tag for this locale ID.
|
uloc_toLegacyKey(const char *keyword)
|
U_CAPI const char *U_EXPORT2
Converts the specified keyword (BCP 47 Unicode locale extension key, or legacy key) to the legacy key.
|
uloc_toLegacyType(const char *keyword, const char *value)
|
U_CAPI const char *U_EXPORT2
Converts the specified keyword value (BCP 47 Unicode locale extension type, or legacy type or type alias) to the canonical legacy type.
|
uloc_toUnicodeLocaleKey(const char *keyword)
|
U_CAPI const char *U_EXPORT2
Converts the specified keyword (legacy key, or BCP 47 Unicode locale extension key) to the equivalent BCP 47 Unicode locale extension key.
|
uloc_toUnicodeLocaleType(const char *keyword, const char *value)
|
U_CAPI const char *U_EXPORT2
Converts the specified keyword value (legacy type, or BCP 47 Unicode locale extension type) to the well-formed BCP 47 Unicode locale extension type for the specified keyword (category).
|
ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Return the current CLDR version used by the library.
|
unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Appends the second string to the first string (merging them at the boundary) and returns the length of the first string.
|
unorm2_close(UNormalizer2 *norm2)
|
U_CAPI void U_EXPORT2
Closes a UNormalizer2 instance from unorm2_openFiltered().
|
unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)
|
Performs pairwise composition of a & b and returns the composite if there is one.
|
unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)
|
U_CAPI uint8_t U_EXPORT2
Gets the combining class of c.
|
unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Gets the decomposition mapping of c.
|
unorm2_getNFCInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFC normalization.
|
unorm2_getNFDInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFD normalization.
|
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization which is equivalent to applying the NFKC_Casefold mappings and then NFC.
|
unorm2_getNFKCInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKC normalization.
|
unorm2_getNFKDInstance(UErrorCode *pErrorCode)
|
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKD normalization.
|
unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Gets the raw decomposition mapping of c.
|
unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)
|
Tests if the character always has a normalization boundary after it, regardless of context.
|
unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)
|
Tests if the character always has a normalization boundary before it, regardless of context.
|
unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)
|
Tests if the character is normalization-inert.
|
unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
|
Tests if the string is normalized.
|
unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Writes the normalized form of the source string to the destination string (replacing its contents) and returns the length of the destination string.
|
unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Appends the normalized form of the second string to the first string (merging them at the boundary) and returns the length of the first string.
|
unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
|
U_CAPIUNormalizationCheckResult U_EXPORT2
Tests if the string is normalized.
|
unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Returns the end of the normalized substring of the input string.
|
uscript_breaksBetweenLetters(UScriptCode script)
|
Returns true if the script allows line breaks between letters (excluding hyphenation).
|
uscript_getCode(const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err)
|
U_CAPI int32_t U_EXPORT2
Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
|
uscript_getName(UScriptCode scriptCode)
|
U_CAPI const char *U_EXPORT2
Returns the long Unicode script name, if there is one.
|
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
|
U_CAPI int32_t U_EXPORT2
Writes the script sample character string.
|
uscript_getScript(UChar32 codepoint, UErrorCode *err)
|
U_CAPIUScriptCode U_EXPORT2
Gets the script code associated with the given codepoint.
|
uscript_getScriptExtensions(UChar32 c, UScriptCode *scripts, int32_t capacity, UErrorCode *errorCode)
|
U_CAPI int32_t U_EXPORT2
Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.
|
uscript_getShortName(UScriptCode scriptCode)
|
U_CAPI const char *U_EXPORT2
Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.
|
uscript_getUsage(UScriptCode script)
|
U_CAPIUScriptUsage U_EXPORT2
Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
|
uscript_hasScript(UChar32 c, UScriptCode sc)
|
Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.
|
uscript_isCased(UScriptCode script)
|
Returns true if in modern (or most recent) usage of the script case distinctions are customary.
|
uscript_isRightToLeft(UScriptCode script)
|
Returns true if the script is written right-to-left.
|
utext_char32At(UText *ut, int64_t nativeIndex)
|
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.
|
utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
|
Clone a UText.
|
utext_close(UText *ut)
|
Close function for UText instances.
|
utext_current32(UText *ut)
|
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached the end of the input text.
|
utext_equals(const UText *a, const UText *b)
|
Compare two UText objects for equality.
|
utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Extract text from a UText into a UChar buffer.
|
utext_getNativeIndex(const UText *ut)
|
U_CAPI int64_t U_EXPORT2
Get the current iterator position, which can range from 0 to the length of the text.
|
utext_getPreviousNativeIndex(UText *ut)
|
U_CAPI int64_t U_EXPORT2
Get the native index of the character preceding the current position.
|
utext_moveIndex32(UText *ut, int32_t delta)
|
Move the iterator position by delta code points.
|
utext_nativeLength(UText *ut)
|
U_CAPI int64_t U_EXPORT2
Get the length of the text.
|
utext_next32(UText *ut)
|
Get the code point at the current iteration position of the UText, and advance the position to the first index following the character.
|
utext_next32From(UText *ut, int64_t nativeIndex)
|
Set the iteration index and return the code point at that index.
|
utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
|
Open a read-only UText for UChar * string.
|
utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
|
Open a read-only UText implementation for UTF-8 strings.
|
utext_previous32(UText *ut)
|
Move the iterator position to the character (code point) whose index precedes the current position, and return that character.
|
utext_previous32From(UText *ut, int64_t nativeIndex)
|
Set the iteration index, and return the code point preceding the one specified by the initial index.
|
utext_setNativeIndex(UText *ut, int64_t nativeIndex)
|
U_CAPI void U_EXPORT2
Set the current iteration position to the nearest code point boundary at or preceding the specified index.
|
utrans_clone(const UTransliterator *trans, UErrorCode *status)
|
U_CAPIUTransliterator *U_EXPORT2
Create a copy of a transliterator.
|
utrans_close(UTransliterator *trans)
|
U_CAPI void U_EXPORT2
Close a transliterator.
|
utrans_openIDs(UErrorCode *pErrorCode)
|
U_CAPIUEnumeration *U_EXPORT2
Return a UEnumeration for the available transliterators.
|
utrans_openInverse(const UTransliterator *trans, UErrorCode *status)
|
U_CAPIUTransliterator *U_EXPORT2
Open an inverse of an existing transliterator.
|
utrans_openU(const UChar *id, int32_t idLength, UTransDirection dir, const UChar *rules, int32_t rulesLength, UParseError *parseError, UErrorCode *pErrorCode)
|
U_CAPIUTransliterator *U_EXPORT2
Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.
|
utrans_setFilter(UTransliterator *trans, const UChar *filterPattern, int32_t filterPatternLen, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Set the filter used by a transliterator.
|
utrans_toRules(const UTransliterator *trans, UBool escapeUnprintable, UChar *result, int32_t resultLength, UErrorCode *status)
|
U_CAPI int32_t U_EXPORT2
Create a rule string that can be passed to utrans_openU to recreate this transliterator.
|
utrans_trans(const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, int32_t start, int32_t *limit, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Transliterate a segment of a UReplaceable string.
|
utrans_transIncremental(const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, UTransPosition *pos, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously.
|
utrans_transIncrementalUChars(const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, UTransPosition *pos, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously.
|
utrans_transUChars(const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, int32_t start, int32_t *limit, UErrorCode *status)
|
U_CAPI void U_EXPORT2
Transliterate a segment of a UChar* string.
|
Structs |
|
---|---|
UParseError |
A UParseError struct is used to returned detailed information about parsing errors. |
UReplaceableCallbacks |
A set of function pointers that transliterators use to manipulate a UReplaceable. |
UTransPosition |
Position structure for utrans_transIncremental() incremental transliteration. |
Enumerations
Anonymous Enum 117
Anonymous Enum 117
The capacity of the context strings in UParseError.
Properties | |
---|---|
U_PARSE_CONTEXT_LEN
|
UAcceptResult
UAcceptResult
Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.
See also:uloc_acceptLanguageFromHTTP See also:uloc_acceptLanguage
UBidiPairedBracketType
UBidiPairedBracketType
Bidi Paired Bracket Type constants.
See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE
Properties | |
---|---|
U_BPT_CLOSE
|
Close paired bracket. |
U_BPT_COUNT
|
One more than the highest normal UBidiPairedBracketType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_BPT_NONE
|
Not a paired bracket. |
U_BPT_OPEN
|
Open paired bracket. |
UBlockCode
UBlockCode
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
Properties | |
---|---|
UBLOCK_ADLAM
|
|
UBLOCK_AEGEAN_NUMBERS
|
|
UBLOCK_AHOM
|
|
UBLOCK_ALCHEMICAL_SYMBOLS
|
|
UBLOCK_ALPHABETIC_PRESENTATION_FORMS
|
|
UBLOCK_ANATOLIAN_HIEROGLYPHS
|
|
UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION
|
|
UBLOCK_ANCIENT_GREEK_NUMBERS
|
|
UBLOCK_ANCIENT_SYMBOLS
|
|
UBLOCK_ARABIC
|
|
UBLOCK_ARABIC_EXTENDED_A
|
|
UBLOCK_ARABIC_EXTENDED_B
|
|
UBLOCK_ARABIC_EXTENDED_C
|
|
UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS
|
|
UBLOCK_ARABIC_PRESENTATION_FORMS_A
|
|
UBLOCK_ARABIC_PRESENTATION_FORMS_B
|
|
UBLOCK_ARABIC_SUPPLEMENT
|
|
UBLOCK_ARMENIAN
|
|
UBLOCK_ARROWS
|
|
UBLOCK_AVESTAN
|
|
UBLOCK_BALINESE
|
|
UBLOCK_BAMUM
|
|
UBLOCK_BAMUM_SUPPLEMENT
|
|
UBLOCK_BASIC_LATIN
|
|
UBLOCK_BASSA_VAH
|
|
UBLOCK_BATAK
|
|
UBLOCK_BENGALI
|
|
UBLOCK_BHAIKSUKI
|
|
UBLOCK_BLOCK_ELEMENTS
|
|
UBLOCK_BOPOMOFO
|
|
UBLOCK_BOPOMOFO_EXTENDED
|
|
UBLOCK_BOX_DRAWING
|
|
UBLOCK_BRAHMI
|
|
UBLOCK_BRAILLE_PATTERNS
|
|
UBLOCK_BUGINESE
|
|
UBLOCK_BUHID
|
|
UBLOCK_BYZANTINE_MUSICAL_SYMBOLS
|
|
UBLOCK_CARIAN
|
|
UBLOCK_CAUCASIAN_ALBANIAN
|
|
UBLOCK_CHAKMA
|
|
UBLOCK_CHAM
|
|
UBLOCK_CHEROKEE
|
|
UBLOCK_CHEROKEE_SUPPLEMENT
|
|
UBLOCK_CHESS_SYMBOLS
|
|
UBLOCK_CHORASMIAN
|
|
UBLOCK_CJK_COMPATIBILITY
|
|
UBLOCK_CJK_COMPATIBILITY_FORMS
|
|
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
|
|
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
|
|
UBLOCK_CJK_RADICALS_SUPPLEMENT
|
|
UBLOCK_CJK_STROKES
|
|
UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H
|
|
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I
|
|
UBLOCK_COMBINING_DIACRITICAL_MARKS
|
|
UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED
|
|
UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT
|
|
UBLOCK_COMBINING_HALF_MARKS
|
|
UBLOCK_COMBINING_MARKS_FOR_SYMBOLS
|
Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". |
UBLOCK_COMMON_INDIC_NUMBER_FORMS
|
|
UBLOCK_CONTROL_PICTURES
|
|
UBLOCK_COPTIC
|
|
UBLOCK_COPTIC_EPACT_NUMBERS
|
|
UBLOCK_COUNT
|
One more than the highest normal UBlockCode value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UBLOCK_COUNTING_ROD_NUMERALS
|
|
UBLOCK_CUNEIFORM
|
|
UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION
|
|
UBLOCK_CURRENCY_SYMBOLS
|
|
UBLOCK_CYPRIOT_SYLLABARY
|
|
UBLOCK_CYPRO_MINOAN
|
|
UBLOCK_CYRILLIC
|
|
UBLOCK_CYRILLIC_EXTENDED_A
|
|
UBLOCK_CYRILLIC_EXTENDED_B
|
|
UBLOCK_CYRILLIC_EXTENDED_C
|
|
UBLOCK_CYRILLIC_EXTENDED_D
|
|
UBLOCK_CYRILLIC_SUPPLEMENT
|
|
UBLOCK_CYRILLIC_SUPPLEMENTARY
|
Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". |
UBLOCK_DESERET
|
|
UBLOCK_DEVANAGARI
|
|
UBLOCK_DEVANAGARI_EXTENDED
|
|
UBLOCK_DEVANAGARI_EXTENDED_A
|
|
UBLOCK_DINGBATS
|
|
UBLOCK_DIVES_AKURU
|
|
UBLOCK_DOGRA
|
|
UBLOCK_DOMINO_TILES
|
|
UBLOCK_DUPLOYAN
|
|
UBLOCK_EARLY_DYNASTIC_CUNEIFORM
|
|
UBLOCK_EGYPTIAN_HIEROGLYPHS
|
|
UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS
|
|
UBLOCK_ELBASAN
|
|
UBLOCK_ELYMAIC
|
|
UBLOCK_EMOTICONS
|
|
UBLOCK_ENCLOSED_ALPHANUMERICS
|
|
UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT
|
|
UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS
|
|
UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT
|
|
UBLOCK_ETHIOPIC
|
|
UBLOCK_ETHIOPIC_EXTENDED
|
|
UBLOCK_ETHIOPIC_EXTENDED_A
|
|
UBLOCK_ETHIOPIC_EXTENDED_B
|
|
UBLOCK_ETHIOPIC_SUPPLEMENT
|
|
UBLOCK_GENERAL_PUNCTUATION
|
|
UBLOCK_GEOMETRIC_SHAPES
|
|
UBLOCK_GEOMETRIC_SHAPES_EXTENDED
|
|
UBLOCK_GEORGIAN
|
|
UBLOCK_GEORGIAN_EXTENDED
|
|
UBLOCK_GEORGIAN_SUPPLEMENT
|
|
UBLOCK_GLAGOLITIC
|
|
UBLOCK_GLAGOLITIC_SUPPLEMENT
|
|
UBLOCK_GOTHIC
|
|
UBLOCK_GRANTHA
|
|
UBLOCK_GREEK
|
Unicode 3.2 renames this block to "Greek and Coptic". |
UBLOCK_GREEK_EXTENDED
|
|
UBLOCK_GUJARATI
|
|
UBLOCK_GUNJALA_GONDI
|
|
UBLOCK_GURMUKHI
|
|
UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
|
|
UBLOCK_HANGUL_COMPATIBILITY_JAMO
|
|
UBLOCK_HANGUL_JAMO
|
|
UBLOCK_HANGUL_JAMO_EXTENDED_A
|
|
UBLOCK_HANGUL_JAMO_EXTENDED_B
|
|
UBLOCK_HANGUL_SYLLABLES
|
|
UBLOCK_HANIFI_ROHINGYA
|
|
UBLOCK_HANUNOO
|
|
UBLOCK_HATRAN
|
|
UBLOCK_HEBREW
|
|
UBLOCK_HIGH_PRIVATE_USE_SURROGATES
|
|
UBLOCK_HIGH_SURROGATES
|
|
UBLOCK_HIRAGANA
|
|
UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS
|
|
UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION
|
|
UBLOCK_IMPERIAL_ARAMAIC
|
|
UBLOCK_INDIC_SIYAQ_NUMBERS
|
|
UBLOCK_INSCRIPTIONAL_PAHLAVI
|
|
UBLOCK_INSCRIPTIONAL_PARTHIAN
|
|
UBLOCK_INVALID_CODE
|
|
UBLOCK_IPA_EXTENSIONS
|
|
UBLOCK_JAVANESE
|
|
UBLOCK_KAITHI
|
|
UBLOCK_KAKTOVIK_NUMERALS
|
|
UBLOCK_KANA_EXTENDED_A
|
|
UBLOCK_KANA_EXTENDED_B
|
|
UBLOCK_KANA_SUPPLEMENT
|
|
UBLOCK_KANBUN
|
|
UBLOCK_KANGXI_RADICALS
|
|
UBLOCK_KANNADA
|
|
UBLOCK_KATAKANA
|
|
UBLOCK_KATAKANA_PHONETIC_EXTENSIONS
|
|
UBLOCK_KAWI
|
|
UBLOCK_KAYAH_LI
|
|
UBLOCK_KHAROSHTHI
|
|
UBLOCK_KHITAN_SMALL_SCRIPT
|
|
UBLOCK_KHMER
|
|
UBLOCK_KHMER_SYMBOLS
|
|
UBLOCK_KHOJKI
|
|
UBLOCK_KHUDAWADI
|
|
UBLOCK_LAO
|
|
UBLOCK_LATIN_1_SUPPLEMENT
|
|
UBLOCK_LATIN_EXTENDED_A
|
|
UBLOCK_LATIN_EXTENDED_ADDITIONAL
|
|
UBLOCK_LATIN_EXTENDED_B
|
|
UBLOCK_LATIN_EXTENDED_C
|
|
UBLOCK_LATIN_EXTENDED_D
|
|
UBLOCK_LATIN_EXTENDED_E
|
|
UBLOCK_LATIN_EXTENDED_F
|
|
UBLOCK_LATIN_EXTENDED_G
|
|
UBLOCK_LEPCHA
|
|
UBLOCK_LETTERLIKE_SYMBOLS
|
|
UBLOCK_LIMBU
|
|
UBLOCK_LINEAR_A
|
|
UBLOCK_LINEAR_B_IDEOGRAMS
|
|
UBLOCK_LINEAR_B_SYLLABARY
|
|
UBLOCK_LISU
|
|
UBLOCK_LISU_SUPPLEMENT
|
|
UBLOCK_LOW_SURROGATES
|
|
UBLOCK_LYCIAN
|
|
UBLOCK_LYDIAN
|
|
UBLOCK_MAHAJANI
|
|
UBLOCK_MAHJONG_TILES
|
|
UBLOCK_MAKASAR
|
|
UBLOCK_MALAYALAM
|
|
UBLOCK_MANDAIC
|
|
UBLOCK_MANICHAEAN
|
|
UBLOCK_MARCHEN
|
|
UBLOCK_MASARAM_GONDI
|
|
UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS
|
|
UBLOCK_MATHEMATICAL_OPERATORS
|
|
UBLOCK_MAYAN_NUMERALS
|
|
UBLOCK_MEDEFAIDRIN
|
|
UBLOCK_MEETEI_MAYEK
|
|
UBLOCK_MEETEI_MAYEK_EXTENSIONS
|
|
UBLOCK_MENDE_KIKAKUI
|
|
UBLOCK_MEROITIC_CURSIVE
|
|
UBLOCK_MEROITIC_HIEROGLYPHS
|
|
UBLOCK_MIAO
|
|
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
|
|
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
|
|
UBLOCK_MISCELLANEOUS_SYMBOLS
|
|
UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS
|
|
UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS
|
|
UBLOCK_MISCELLANEOUS_TECHNICAL
|
|
UBLOCK_MODI
|
|
UBLOCK_MODIFIER_TONE_LETTERS
|
|
UBLOCK_MONGOLIAN
|
|
UBLOCK_MONGOLIAN_SUPPLEMENT
|
|
UBLOCK_MRO
|
|
UBLOCK_MULTANI
|
|
UBLOCK_MUSICAL_SYMBOLS
|
|
UBLOCK_MYANMAR
|
|
UBLOCK_MYANMAR_EXTENDED_A
|
|
UBLOCK_MYANMAR_EXTENDED_B
|
|
UBLOCK_NABATAEAN
|
|
UBLOCK_NAG_MUNDARI
|
|
UBLOCK_NANDINAGARI
|
|
UBLOCK_NEWA
|
|
UBLOCK_NEW_TAI_LUE
|
|
UBLOCK_NKO
|
|
UBLOCK_NO_BLOCK
|
New No_Block value in Unicode 4. |
UBLOCK_NUMBER_FORMS
|
|
UBLOCK_NUSHU
|
|
UBLOCK_NYIAKENG_PUACHUE_HMONG
|
|
UBLOCK_OGHAM
|
|
UBLOCK_OLD_HUNGARIAN
|
|
UBLOCK_OLD_ITALIC
|
|
UBLOCK_OLD_NORTH_ARABIAN
|
|
UBLOCK_OLD_PERMIC
|
|
UBLOCK_OLD_PERSIAN
|
|
UBLOCK_OLD_SOGDIAN
|
|
UBLOCK_OLD_SOUTH_ARABIAN
|
|
UBLOCK_OLD_TURKIC
|
|
UBLOCK_OLD_UYGHUR
|
|
UBLOCK_OL_CHIKI
|
|
UBLOCK_OPTICAL_CHARACTER_RECOGNITION
|
|
UBLOCK_ORIYA
|
|
UBLOCK_ORNAMENTAL_DINGBATS
|
|
UBLOCK_OSAGE
|
|
UBLOCK_OSMANYA
|
|
UBLOCK_OTTOMAN_SIYAQ_NUMBERS
|
|
UBLOCK_PAHAWH_HMONG
|
|
UBLOCK_PALMYRENE
|
|
UBLOCK_PAU_CIN_HAU
|
|
UBLOCK_PHAGS_PA
|
|
UBLOCK_PHAISTOS_DISC
|
|
UBLOCK_PHOENICIAN
|
|
UBLOCK_PHONETIC_EXTENSIONS
|
|
UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT
|
|
UBLOCK_PLAYING_CARDS
|
|
UBLOCK_PRIVATE_USE
|
Same as UBLOCK_PRIVATE_USE_AREA. Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs. |
UBLOCK_PRIVATE_USE_AREA
|
Same as UBLOCK_PRIVATE_USE. Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs. |
UBLOCK_PSALTER_PAHLAVI
|
|
UBLOCK_REJANG
|
|
UBLOCK_RUMI_NUMERAL_SYMBOLS
|
|
UBLOCK_RUNIC
|
|
UBLOCK_SAMARITAN
|
|
UBLOCK_SAURASHTRA
|
|
UBLOCK_SHARADA
|
|
UBLOCK_SHAVIAN
|
|
UBLOCK_SHORTHAND_FORMAT_CONTROLS
|
|
UBLOCK_SIDDHAM
|
|
UBLOCK_SINHALA
|
|
UBLOCK_SINHALA_ARCHAIC_NUMBERS
|
|
UBLOCK_SMALL_FORM_VARIANTS
|
|
UBLOCK_SMALL_KANA_EXTENSION
|
|
UBLOCK_SOGDIAN
|
|
UBLOCK_SORA_SOMPENG
|
|
UBLOCK_SOYOMBO
|
|
UBLOCK_SPACING_MODIFIER_LETTERS
|
|
UBLOCK_SPECIALS
|
|
UBLOCK_SUNDANESE
|
|
UBLOCK_SUNDANESE_SUPPLEMENT
|
|
UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS
|
|
UBLOCK_SUPPLEMENTAL_ARROWS_A
|
|
UBLOCK_SUPPLEMENTAL_ARROWS_B
|
|
UBLOCK_SUPPLEMENTAL_ARROWS_C
|
|
UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS
|
|
UBLOCK_SUPPLEMENTAL_PUNCTUATION
|
|
UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS
|
|
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A
|
|
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B
|
|
UBLOCK_SUTTON_SIGNWRITING
|
|
UBLOCK_SYLOTI_NAGRI
|
|
UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A
|
|
UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING
|
|
UBLOCK_SYRIAC
|
|
UBLOCK_SYRIAC_SUPPLEMENT
|
|
UBLOCK_TAGALOG
|
|
UBLOCK_TAGBANWA
|
|
UBLOCK_TAGS
|
|
UBLOCK_TAI_LE
|
|
UBLOCK_TAI_THAM
|
|
UBLOCK_TAI_VIET
|
|
UBLOCK_TAI_XUAN_JING_SYMBOLS
|
|
UBLOCK_TAKRI
|
|
UBLOCK_TAMIL
|
|
UBLOCK_TAMIL_SUPPLEMENT
|
|
UBLOCK_TANGSA
|
|
UBLOCK_TANGUT
|
|
UBLOCK_TANGUT_COMPONENTS
|
|
UBLOCK_TANGUT_SUPPLEMENT
|
|
UBLOCK_TELUGU
|
|
UBLOCK_THAANA
|
|
UBLOCK_THAI
|
|
UBLOCK_TIBETAN
|
|
UBLOCK_TIFINAGH
|
|
UBLOCK_TIRHUTA
|
|
UBLOCK_TOTO
|
|
UBLOCK_TRANSPORT_AND_MAP_SYMBOLS
|
|
UBLOCK_UGARITIC
|
|
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
|
|
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED
|
|
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A
|
|
UBLOCK_VAI
|
|
UBLOCK_VARIATION_SELECTORS
|
|
UBLOCK_VARIATION_SELECTORS_SUPPLEMENT
|
|
UBLOCK_VEDIC_EXTENSIONS
|
|
UBLOCK_VERTICAL_FORMS
|
|
UBLOCK_VITHKUQI
|
|
UBLOCK_WANCHO
|
|
UBLOCK_WARANG_CITI
|
|
UBLOCK_YEZIDI
|
|
UBLOCK_YIJING_HEXAGRAM_SYMBOLS
|
|
UBLOCK_YI_RADICALS
|
|
UBLOCK_YI_SYLLABLES
|
|
UBLOCK_ZANABAZAR_SQUARE
|
|
UBLOCK_ZNAMENNY_MUSICAL_NOTATION
|
UBreakIteratorType
UBreakIteratorType
The possible types of text boundaries.
Properties | |
---|---|
UBRK_CHARACTER
|
Character breaks. |
UBRK_COUNT
|
One more than the highest normal UBreakIteratorType value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UBRK_LINE
|
Line breaks. |
UBRK_SENTENCE
|
Sentence breaks. |
UBRK_TITLE
|
Title Case breaks The iterator created using this type locates title boundaries as described for Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, please use Word Boundary iterator. Deprecated. ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later. |
UBRK_WORD
|
Word breaks. |
UCPMapRangeOption
UCPMapRangeOption
Selectors for how ucpmap_getRange() etc.
should report value ranges overlapping with surrogates. Most users should use UCPMAP_RANGE_NORMAL.
See also:ucpmap_getRange See also:ucptrie_getRange See also:umutablecptrie_getRange
Properties | |
---|---|
UCPMAP_RANGE_FIXED_ALL_SURROGATES
|
ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that all surrogates (U+D800..U+DFFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter. The surrogateValue is not transformed via filter(). See U_IS_SURROGATE(c). Most users should use UCPMAP_RANGE_NORMAL instead. This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points. |
UCPMAP_RANGE_FIXED_LEAD_SURROGATES
|
ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that lead surrogates (U+D800..U+DBFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter. The surrogateValue is not transformed via filter(). See U_IS_LEAD(c). Most users should use UCPMAP_RANGE_NORMAL instead. This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points. |
UCPMAP_RANGE_NORMAL
|
ucpmap_getRange() enumerates all same-value ranges as stored in the map. Most users should use this option. |
UCharCategory
UCharCategory
Data for enumerated Unicode general category types.
See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
Properties | |
---|---|
U_CHAR_CATEGORY_COUNT
|
One higher than the last enum UCharCategory constant. This numeric value is stable (will not change), see http://www.unicode.org/policies/stability_policy.html#Property_Value |
U_COMBINING_SPACING_MARK
|
Mc. |
U_CONNECTOR_PUNCTUATION
|
Pc. |
U_CONTROL_CHAR
|
Cc. |
U_CURRENCY_SYMBOL
|
Sc. |
U_DASH_PUNCTUATION
|
Pd. |
U_DECIMAL_DIGIT_NUMBER
|
Nd. |
U_ENCLOSING_MARK
|
Me. |
U_END_PUNCTUATION
|
Pe. |
U_FINAL_PUNCTUATION
|
Pf. |
U_FORMAT_CHAR
|
Cf. |
U_GENERAL_OTHER_TYPES
|
Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) |
U_INITIAL_PUNCTUATION
|
Pi. |
U_LETTER_NUMBER
|
Nl. |
U_LINE_SEPARATOR
|
Zl. |
U_LOWERCASE_LETTER
|
Ll. |
U_MATH_SYMBOL
|
Sm. |
U_MODIFIER_LETTER
|
Lm. |
U_MODIFIER_SYMBOL
|
Sk. |
U_NON_SPACING_MARK
|
Mn. |
U_OTHER_LETTER
|
Lo. |
U_OTHER_NUMBER
|
No. |
U_OTHER_PUNCTUATION
|
Po. |
U_OTHER_SYMBOL
|
So. |
U_PARAGRAPH_SEPARATOR
|
Zp. |
U_PRIVATE_USE_CHAR
|
Co. |
U_SPACE_SEPARATOR
|
Zs. |
U_START_PUNCTUATION
|
Ps. |
U_SURROGATE
|
Cs. |
U_TITLECASE_LETTER
|
Lt. |
U_UNASSIGNED
|
Non-category for unassigned and non-character code points. |
U_UPPERCASE_LETTER
|
Lu. |
UCharDirection
UCharDirection
This specifies the language directional property of a character set.
Properties | |
---|---|
U_ARABIC_NUMBER
|
AN. |
U_BLOCK_SEPARATOR
|
B. |
U_BOUNDARY_NEUTRAL
|
BN. |
U_CHAR_DIRECTION_COUNT
|
One more than the highest UCharDirection value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_COMMON_NUMBER_SEPARATOR
|
CS. |
U_DIR_NON_SPACING_MARK
|
NSM. |
U_EUROPEAN_NUMBER
|
EN. |
U_EUROPEAN_NUMBER_SEPARATOR
|
ES. |
U_EUROPEAN_NUMBER_TERMINATOR
|
ET. |
U_FIRST_STRONG_ISOLATE
|
FSI. |
U_LEFT_TO_RIGHT
|
L. |
U_LEFT_TO_RIGHT_EMBEDDING
|
LRE. |
U_LEFT_TO_RIGHT_ISOLATE
|
LRI. |
U_LEFT_TO_RIGHT_OVERRIDE
|
LRO. |
U_OTHER_NEUTRAL
|
ON. |
U_POP_DIRECTIONAL_FORMAT
|
PDF. |
U_POP_DIRECTIONAL_ISOLATE
|
PDI. |
U_RIGHT_TO_LEFT
|
R. |
U_RIGHT_TO_LEFT_ARABIC
|
AL. |
U_RIGHT_TO_LEFT_EMBEDDING
|
RLE. |
U_RIGHT_TO_LEFT_ISOLATE
|
RLI. |
U_RIGHT_TO_LEFT_OVERRIDE
|
RLO. |
U_SEGMENT_SEPARATOR
|
S. |
U_WHITE_SPACE_NEUTRAL
|
WS. |
UCharNameChoice
UCharNameChoice
Selector constants for u_charName().
u_charName() returns the "modern" name of a Unicode character; or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646; or an "extended" name that gives each Unicode code point a unique name.
See also: u_charName
Properties | |
---|---|
U_CHAR_NAME_ALIAS
|
Corrected name from NameAliases.txt. |
U_CHAR_NAME_CHOICE_COUNT
|
One more than the highest normal UCharNameChoice value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_EXTENDED_CHAR_NAME
|
Standard or synthetic character name. |
U_UNICODE_10_CHAR_NAME
|
The Unicode_1_Name property value which is of little practical value. Beginning with ICU 49, ICU APIs return an empty string for this name choice. Deprecated. ICU 49 |
U_UNICODE_CHAR_NAME
|
Unicode character name (Name property). |
UColAttribute
UColAttribute
Attributes that collation service understands.
All the attributes can take UCOL_DEFAULT value, as well as the values specific to each one.
Properties | |
---|---|
UCOL_ALTERNATE_HANDLING
|
Attribute for handling variable elements. Acceptable values are UCOL_NON_IGNORABLE which treats all the codepoints with non-ignorable primary weights in the same way, and UCOL_SHIFTED which causes codepoints with primary weights that are equal or below the variable top value to be ignored on primary level and moved to the quaternary level. The default setting in a Collator object depends on the locale data loaded from the resources. For most locales, the default is UCOL_NON_IGNORABLE, but for others, such as "th", the default could be UCOL_SHIFTED. |
UCOL_ATTRIBUTE_COUNT
|
One more than the highest normal UColAttribute value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCOL_CASE_FIRST
|
Controls the ordering of upper and lower case letters. Acceptable values are UCOL_OFF, which orders upper and lower case letters in accordance to their tertiary weights, UCOL_UPPER_FIRST which forces upper case letters to sort before lower case letters, and UCOL_LOWER_FIRST which does the opposite. The default setting in a Collator object depends on the locale data loaded from the resources. For most locales, the default is UCOL_OFF, but for others, such as "da" or "mt", the default could be UCOL_UPPER. |
UCOL_CASE_LEVEL
|
Controls whether an extra case level (positioned before the third level) is generated or not. Acceptable values are UCOL_OFF, when case level is not generated, and UCOL_ON which causes the case level to be generated. Contents of the case level are affected by the value of UCOL_CASE_FIRST attribute. A simple way to ignore accent differences in a string is to set the strength to UCOL_PRIMARY and enable case level. The default setting in a Collator object depends on the locale data loaded from the resources. |
UCOL_DECOMPOSITION_MODE
|
An alias for UCOL_NORMALIZATION_MODE attribute. |
UCOL_FRENCH_COLLATION
|
Attribute for direction of secondary weights - used in Canadian French. Acceptable values are UCOL_ON, which results in secondary weights being considered backwards and UCOL_OFF which treats secondary weights in the order they appear. |
UCOL_HIRAGANA_QUATERNARY_MODE
|
When turned on, this attribute positions Hiragana before all non-ignorables on quaternary level This is a sneaky way to produce JIS sort order. This attribute was an implementation detail of the CLDR Japanese tailoring. Since ICU 50, this attribute is not settable any more via API functions. Since CLDR 25/ICU 53, explicit quaternary relations are used to achieve the same Japanese sort order. Deprecated. ICU 50 Implementation detail, cannot be set via API, was removed from implementation. |
UCOL_NORMALIZATION_MODE
|
Controls whether the normalization check and necessary normalizations are performed. When set to UCOL_OFF no normalization check is performed. The correctness of the result is guaranteed only if the input data is in so-called FCD form (see users manual for more info). When set to UCOL_ON, an incremental check is performed to see whether the input data is in the FCD form. If the data is not in the FCD form, incremental NFD normalization is performed. The default setting in a Collator object depends on the locale data loaded from the resources. For many locales, the default is UCOL_OFF, but for others, such as "hi" "vi', or "bn", * the default could be UCOL_ON. |
UCOL_NUMERIC_COLLATION
|
When turned on, this attribute makes substrings of digits sort according to their numeric values. This is a way to get '100' to sort AFTER '2'. Note that the longest digit substring that can be treated as a single unit is 254 digits (not counting leading zeros). If a digit substring is longer than that, the digits beyond the limit will be treated as a separate digit substring. A "digit" in this sense is a code point with General_Category=Nd, which does not include circled numbers, roman numerals, etc. Only a contiguous digit substring is considered, that is, non-negative integers without separators. There is no support for plus/minus signs, decimals, exponents, etc. |
UCOL_STRENGTH
|
The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength for most locales (except Japanese) is tertiary. Quaternary strength is useful when combined with shifted setting for alternate handling attribute and for JIS X 4061 collation, when it is used to distinguish between Katakana and Hiragana. Otherwise, quaternary level is affected only by the number of non-ignorable code points in the string. Identical strength is rarely useful, as it amounts to codepoints of the NFD form of the string. |
UColAttributeValue
UColAttributeValue
Enum containing attribute values for controlling collation behavior.
Here are all the allowable values. Not every attribute can take every value. The only universal value is UCOL_DEFAULT, which resets the attribute value to the predefined value for that locale
Properties | |
---|---|
UCOL_ATTRIBUTE_VALUE_COUNT
|
One more than the highest normal UColAttributeValue value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCOL_CE_STRENGTH_LIMIT
|
|
UCOL_DEFAULT
|
accepted by most attributes |
UCOL_DEFAULT_STRENGTH
|
Default collation strength. |
UCOL_IDENTICAL
|
Identical collation strength. |
UCOL_LOWER_FIRST
|
Valid for UCOL_CASE_FIRST - lower case sorts before upper case. |
UCOL_NON_IGNORABLE
|
Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable |
UCOL_OFF
|
Turn the feature off - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE. |
UCOL_ON
|
Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE. |
UCOL_PRIMARY
|
Primary collation strength. |
UCOL_QUATERNARY
|
Quaternary collation strength. |
UCOL_SECONDARY
|
Secondary collation strength. |
UCOL_SHIFTED
|
Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted |
UCOL_STRENGTH_LIMIT
|
|
UCOL_TERTIARY
|
Tertiary collation strength. |
UCOL_UPPER_FIRST
|
upper case sorts before lower case |
UColBoundMode
UColBoundMode
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.
Underlying code depends on them having these numbers
Properties | |
---|---|
UCOL_BOUND_LOWER
|
lower bound |
UCOL_BOUND_UPPER
|
upper bound that will match strings of exact size |
UCOL_BOUND_UPPER_LONG
|
upper bound that will match all the strings that have the same initial substring as the given string |
UCOL_BOUND_VALUE_COUNT
|
One more than the highest normal UColBoundMode value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UColReorderCode
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes.
These reordering codes are to be used in conjunction with the script codes. See also:ucol_getReorderCodesSee also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCode
Properties | |
---|---|
UCOL_REORDER_CODE_CURRENCY
|
Characters with the currency property. This is equivalent to the rule value "currency". |
UCOL_REORDER_CODE_DEFAULT
|
A special reordering code that is used to specify the default reordering codes for a locale. |
UCOL_REORDER_CODE_DIGIT
|
Characters with the digit property. This is equivalent to the rule value "digit". |
UCOL_REORDER_CODE_FIRST
|
The first entry in the enumeration of reordering groups. This is intended for use in range checking and enumeration of the reorder codes. |
UCOL_REORDER_CODE_LIMIT
|
One more than the highest normal UColReorderCode value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCOL_REORDER_CODE_NONE
|
A special reordering code that is used to specify no reordering codes. |
UCOL_REORDER_CODE_OTHERS
|
A special reordering code that is used to specify all other codes used for reordering except for the codes lised as UColReorderCode values and those listed explicitly in a reordering. |
UCOL_REORDER_CODE_PUNCTUATION
|
Characters with the punctuation property. This is equivalent to the rule value "punct". |
UCOL_REORDER_CODE_SPACE
|
Characters with the space property. This is equivalent to the rule value "space". |
UCOL_REORDER_CODE_SYMBOL
|
Characters with the symbol property. This is equivalent to the rule value "symbol". |
UCollationResult
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.
UCOL_EQUAL is returned if source string is compared to be equal to target string in the ucol_strcoll() method. UCOL_GREATER is returned if source string is compared to be greater than target string in the ucol_strcoll() method. See also:ucol_strcoll() Possible values for a comparison result
Properties | |
---|---|
UCOL_EQUAL
|
string a == string b |
UCOL_GREATER
|
string a > string b |
UCOL_LESS
|
string a < string b |
UDecompositionType
UDecompositionType
Decomposition Type constants.
See also: UCHAR_DECOMPOSITION_TYPE
Properties | |
---|---|
U_DT_CANONICAL
|
|
U_DT_CIRCLE
|
|
U_DT_COMPAT
|
|
U_DT_COUNT
|
One more than the highest normal UDecompositionType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_DT_FINAL
|
|
U_DT_FONT
|
|
U_DT_FRACTION
|
|
U_DT_INITIAL
|
|
U_DT_ISOLATED
|
|
U_DT_MEDIAL
|
|
U_DT_NARROW
|
|
U_DT_NOBREAK
|
|
U_DT_NONE
|
|
U_DT_SMALL
|
|
U_DT_SQUARE
|
|
U_DT_SUB
|
|
U_DT_SUPER
|
|
U_DT_VERTICAL
|
|
U_DT_WIDE
|
UDialectHandling
UDialectHandling
UDisplayContext
UDisplayContext
Display context settings.
Note, the specific numeric values are internal and may change.
UDisplayContextType
UDisplayContextType
Display context types, for getting values of a particular setting.
Note, the specific numeric values are internal and may change.
UEastAsianWidth
UEastAsianWidth
East Asian Width constants.
See also: UCHAR_EAST_ASIAN_WIDTH See also: u_getIntPropertyValue
Properties | |
---|---|
U_EA_AMBIGUOUS
|
|
U_EA_COUNT
|
One more than the highest normal UEastAsianWidth value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_EA_FULLWIDTH
|
|
U_EA_HALFWIDTH
|
|
U_EA_NARROW
|
|
U_EA_NEUTRAL
|
|
U_EA_WIDE
|
UErrorCode
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Initialize the UErrorCode with U_ZERO_ERROR, and check for success or failure using U_SUCCESS() or U_FAILURE():
UErrorCode errorCode = U_ZERO_ERROR; // call ICU API that needs an error code parameter. if (U_FAILURE(errorCode)) { // An error occurred. Handle it here. }
C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a suitable subclass.
For more information, see: https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes
Note: By convention, ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode first test:
if (U_FAILURE(errorCode)) { return immediately; }
so that in a chain of such functions the first one that sets an error code causes the following ones to not perform any operations.
Properties | |
---|---|
U_AMBIGUOUS_ALIAS_WARNING
|
This converter alias can go to different converter implementations. |
U_ARGUMENT_TYPE_MISMATCH
|
Argument name and argument index mismatch in MessageFormat functions. |
U_BAD_VARIABLE_DEFINITION
|
Missing '$' or duplicate variable name. |
U_BRK_ASSIGN_ERROR
|
Syntax error in RBBI rule assignment statement. |
U_BRK_ERROR_LIMIT
|
One more than the highest normal BreakIterator error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_BRK_ERROR_START
|
Start of codes indicating Break Iterator failures. |
U_BRK_HEX_DIGITS_EXPECTED
|
Hex digits expected as part of a escaped char in a rule. |
U_BRK_INIT_ERROR
|
Initialization failure. Probable missing ICU Data. |
U_BRK_INTERNAL_ERROR
|
An internal error (bug) was detected. |
U_BRK_MALFORMED_RULE_TAG
|
The {nnn} tag on a rule is malformed. |
U_BRK_MISMATCHED_PAREN
|
Mis-matched parentheses in an RBBI rule. |
U_BRK_NEW_LINE_IN_QUOTED_STRING
|
Missing closing quote in an RBBI rule. |
U_BRK_RULE_EMPTY_SET
|
Rule contains an empty Unicode Set. |
U_BRK_RULE_SYNTAX
|
Syntax error in RBBI rule. |
U_BRK_SEMICOLON_EXPECTED
|
Missing ';' at the end of a RBBI rule. |
U_BRK_UNCLOSED_SET
|
UnicodeSet writing an RBBI rule missing a closing ']'. |
U_BRK_UNDEFINED_VARIABLE
|
Use of an undefined $Variable in an RBBI rule. |
U_BRK_UNRECOGNIZED_OPTION
|
!!option in RBBI rules not recognized. |
U_BRK_VARIABLE_REDFINITION
|
RBBI rule $Variable redefined. |
U_BUFFER_OVERFLOW_ERROR
|
A result would not fit in the supplied buffer. |
U_CE_NOT_FOUND_ERROR
|
Currently used only while setting variable top, but can be used generally. |
U_COLLATOR_VERSION_MISMATCH
|
Collator version is not compatible with the base version. |
U_DECIMAL_NUMBER_SYNTAX_ERROR
|
Decimal number syntax error. |
U_DEFAULT_KEYWORD_MISSING
|
Missing DEFAULT rule in plural rules. |
U_DIFFERENT_UCA_VERSION
|
ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function |
U_DUPLICATE_KEYWORD
|
Duplicate keyword in PluralFormat. |
U_ENUM_OUT_OF_SYNC_ERROR
|
UEnumeration out of sync with underlying collection. |
U_ERROR_LIMIT
|
One more than the highest normal error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_ERROR_WARNING_LIMIT
|
One more than the highest normal UErrorCode warning value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_ERROR_WARNING_START
|
Start of information results (semantically successful) |
U_FILE_ACCESS_ERROR
|
The requested file cannot be found. |
U_FMT_PARSE_ERROR_LIMIT
|
One more than the highest normal formatting API error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_FMT_PARSE_ERROR_START
|
Start of format library errors. |
U_FORMAT_INEXACT_ERROR
|
Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY. |
U_IDNA_ACE_PREFIX_ERROR
|
|
U_IDNA_CHECK_BIDI_ERROR
|
|
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR
|
|
U_IDNA_ERROR_LIMIT
|
One more than the highest normal IDNA error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_IDNA_ERROR_START
|
|
U_IDNA_LABEL_TOO_LONG_ERROR
|
|
U_IDNA_PROHIBITED_ERROR
|
|
U_IDNA_STD3_ASCII_RULES_ERROR
|
|
U_IDNA_UNASSIGNED_ERROR
|
|
U_IDNA_VERIFICATION_ERROR
|
|
U_IDNA_ZERO_LENGTH_LABEL_ERROR
|
|
U_ILLEGAL_ARGUMENT_ERROR
|
Start of codes indicating failure. |
U_ILLEGAL_CHARACTER
|
A special character is outside its allowed context. |
U_ILLEGAL_CHAR_FOUND
|
Character conversion: Illegal input sequence/combination of input units. |
U_ILLEGAL_CHAR_IN_SEGMENT
|
UNUSED as of ICU 2.4. |
U_ILLEGAL_ESCAPE_SEQUENCE
|
ISO-2022 illegal escape sequence. |
U_ILLEGAL_PAD_POSITION
|
Pad symbol misplaced in number pattern. |
U_INDEX_OUTOFBOUNDS_ERROR
|
Trying to access the index that is out of bounds. |
U_INPUT_TOO_LONG_ERROR
|
The input is impractically long for an operation. It is rejected because it may lead to problems such as excessive processing time, stack depth, or heap memory requirements. |
U_INTERNAL_PROGRAM_ERROR
|
Indicates a bug in the library code. |
U_INTERNAL_TRANSLITERATOR_ERROR
|
Internal transliterator system error. |
U_INVALID_CHAR_FOUND
|
Character conversion: Unmappable input sequence. In other APIs: Invalid character. |
U_INVALID_FORMAT_ERROR
|
Data format is not what is expected. |
U_INVALID_FUNCTION
|
A "&fn()" rule specifies an unknown transliterator. |
U_INVALID_ID
|
A "::id" rule specifies an unknown transliterator. |
U_INVALID_PROPERTY_PATTERN
|
UNUSED as of ICU 2.4. |
U_INVALID_RBT_SYNTAX
|
A "::id" rule was passed to the RuleBasedTransliterator parser. |
U_INVALID_STATE_ERROR
|
Requested operation can not be completed with ICU in its current state. |
U_INVALID_TABLE_FILE
|
Conversion table file not found. |
U_INVALID_TABLE_FORMAT
|
Conversion table file found, but corrupted. |
U_INVARIANT_CONVERSION_ERROR
|
Unable to convert a UChar* string to char* with the invariant converter. |
U_MALFORMED_EXPONENTIAL_PATTERN
|
Grouping symbol in exponent pattern. |
U_MALFORMED_PRAGMA
|
A 'use' pragma is invalid. |
U_MALFORMED_RULE
|
Elements of a rule are misplaced. |
U_MALFORMED_SET
|
A UnicodeSet pattern is invalid. |
U_MALFORMED_SYMBOL_REFERENCE
|
UNUSED as of ICU 2.4. |
U_MALFORMED_UNICODE_ESCAPE
|
A Unicode escape pattern is invalid. |
U_MALFORMED_VARIABLE_DEFINITION
|
A variable definition is invalid. |
U_MALFORMED_VARIABLE_REFERENCE
|
A variable reference is invalid. |
U_MEMORY_ALLOCATION_ERROR
|
Memory allocation error. |
U_MESSAGE_PARSE_ERROR
|
Unable to parse a message (message format) |
U_MF_DUPLICATE_DECLARATION_ERROR
|
The same variable is declared in more than one .local or .input declaration. Deprecated. This API is for technology preview only. |
U_MF_DUPLICATE_OPTION_NAME_ERROR
|
In an annotation, the same option name appears more than once. This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only. |
U_MF_FORMATTING_ERROR
|
Covers all runtime errors: for example, an internally inconsistent set of options. Deprecated. This API is for technology preview only. |
U_MF_MISSING_SELECTOR_ANNOTATION_ERROR
|
A selector expression evaluates to an unannotated operand. Deprecated. This API is for technology preview only. |
U_MF_NONEXHAUSTIVE_PATTERN_ERROR
|
In a match-construct, the variants do not cover all possible values. This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only. |
U_MF_OPERAND_MISMATCH_ERROR
|
An operand provided to a function does not have the required form for that function. This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only. |
U_MF_SELECTOR_ERROR
|
A selector function is applied to an operand of the wrong type. This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only. |
U_MF_SYNTAX_ERROR
|
Includes all syntax errors. This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only. |
U_MF_UNKNOWN_FUNCTION_ERROR
|
An annotation refers to a function not defined by the standard or custom function registry. This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only. |
U_MF_UNRESOLVED_VARIABLE_ERROR
|
A variable is referred to but not bound by any definition. This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only. |
U_MF_UNSUPPORTED_EXPRESSION_ERROR
|
A message includes syntax reserved for future standardization or private implementation use. Deprecated. This API is for technology preview only. |
U_MF_UNSUPPORTED_STATEMENT_ERROR
|
A message includes a reserved statement. Deprecated. This API is for technology preview only. |
U_MF_VARIANT_KEY_MISMATCH_ERROR
|
In a match-construct, one or more variants had a different number of keys from the number of selectors. This API is for internal use only. ICU 75 technology preview Deprecated. This API is for technology preview only. |
U_MISMATCHED_SEGMENT_DELIMITERS
|
UNUSED as of ICU 2.4. |
U_MISPLACED_ANCHOR_START
|
A start anchor appears at an illegal position. |
U_MISPLACED_COMPOUND_FILTER
|
A compound filter is in an invalid location. |
U_MISPLACED_CURSOR_OFFSET
|
A cursor offset occurs at an illegal position. |
U_MISPLACED_QUANTIFIER
|
A quantifier appears after a segment close delimiter. |
U_MISSING_OPERATOR
|
A rule contains no operator. |
U_MISSING_RESOURCE_ERROR
|
The requested resource cannot be found. |
U_MISSING_SEGMENT_CLOSE
|
UNUSED as of ICU 2.4. |
U_MULTIPLE_ANTE_CONTEXTS
|
More than one ante context. |
U_MULTIPLE_COMPOUND_FILTERS
|
More than one compound filter. |
U_MULTIPLE_CURSORS
|
More than one cursor. |
U_MULTIPLE_DECIMAL_SEPARATORS
|
More than one decimal separator in number pattern. |
U_MULTIPLE_DECIMAL_SEPERATORS
|
Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS |
U_MULTIPLE_EXPONENTIAL_SYMBOLS
|
More than one exponent symbol in number pattern. |
U_MULTIPLE_PAD_SPECIFIERS
|
More than one pad symbol in number pattern. |
U_MULTIPLE_PERCENT_SYMBOLS
|
More than one percent symbol in number pattern. |
U_MULTIPLE_PERMILL_SYMBOLS
|
More than one permill symbol in number pattern. |
U_MULTIPLE_POST_CONTEXTS
|
More than one post context. |
U_NO_SPACE_AVAILABLE
|
No space available for in-buffer expansion for Arabic shaping. |
U_NO_WRITE_PERMISSION
|
Attempt to modify read-only or constant data. |
U_NUMBER_ARG_OUTOFBOUNDS_ERROR
|
The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. |
U_NUMBER_SKELETON_SYNTAX_ERROR
|
The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. |
U_PARSE_ERROR
|
Equivalent to Java ParseException. |
U_PARSE_ERROR_LIMIT
|
One more than the highest normal Transliterator error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_PARSE_ERROR_START
|
Start of Transliterator errors. |
U_PATTERN_SYNTAX_ERROR
|
Syntax error in format pattern. |
U_PLUGIN_CHANGED_LEVEL_WARNING
|
A plugin caused a level change. May not be an error, but later plugins may not load. |
U_PLUGIN_DIDNT_SET_LEVEL
|
The plugin didn't call uplug_setPlugLevel in response to a QUERY. |
U_PLUGIN_ERROR_LIMIT
|
One more than the highest normal plug-in error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_PLUGIN_ERROR_START
|
Start of codes indicating plugin failures. |
U_PLUGIN_TOO_HIGH
|
The plugin's level is too high to be loaded right now. |
U_PRIMARY_TOO_LONG_ERROR
|
User tried to set variable top to a primary that is longer than two bytes. |
U_REGEX_BAD_ESCAPE_SEQUENCE
|
Unrecognized backslash escape sequence in pattern. |
U_REGEX_BAD_INTERVAL
|
Error in {min,max} interval. |
U_REGEX_ERROR_LIMIT
|
One more than the highest normal regular expression error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_REGEX_ERROR_START
|
Start of codes indicating Regexp failures. |
U_REGEX_INTERNAL_ERROR
|
An internal error (bug) was detected. |
U_REGEX_INVALID_BACK_REF
|
Back-reference to a non-existent capture group. |
U_REGEX_INVALID_CAPTURE_GROUP_NAME
|
Invalid capture group name. |
U_REGEX_INVALID_FLAG
|
Invalid value for match mode flags. |
U_REGEX_INVALID_RANGE
|
In a character range [x-y], x is greater than y. |
U_REGEX_INVALID_STATE
|
RegexMatcher in invalid state for requested operation. |
U_REGEX_LOOK_BEHIND_LIMIT
|
Look-Behind pattern matches must have a bounded maximum length. |
U_REGEX_MAX_LT_MIN
|
In {min,max}, max is less than min. |
U_REGEX_MISMATCHED_PAREN
|
Incorrectly nested parentheses in regexp pattern. |
U_REGEX_MISSING_CLOSE_BRACKET
|
Missing closing bracket on a bracket expression. |
U_REGEX_NUMBER_TOO_BIG
|
Decimal number is too large. |
U_REGEX_OCTAL_TOO_BIG
|
Octal character constants must be <= 0377. Deprecated. ICU 54. This error cannot occur. |
U_REGEX_PATTERN_TOO_BIG
|
Pattern exceeds limits on size or complexity. |
U_REGEX_PROPERTY_SYNTAX
|
Incorrect Unicode property. |
U_REGEX_RULE_SYNTAX
|
Syntax error in regexp pattern. |
U_REGEX_SET_CONTAINS_STRING
|
Regexps cannot have UnicodeSets containing strings. |
U_REGEX_STACK_OVERFLOW
|
Regular expression backtrack stack overflow. |
U_REGEX_STOPPED_BY_CALLER
|
Matching operation aborted by user callback fn. |
U_REGEX_TIME_OUT
|
Maximum allowed match time exceeded. |
U_REGEX_UNIMPLEMENTED
|
Use of regexp feature that is not yet implemented. |
U_RESOURCE_TYPE_MISMATCH
|
an operation is requested over a resource that does not support it |
U_RULE_MASK_ERROR
|
A rule is hidden by an earlier more general rule. |
U_SAFECLONE_ALLOCATED_WARNING
|
A SafeClone operation required allocating memory (informational only) |
U_SORT_KEY_TOO_SHORT_WARNING
|
Number of levels requested in getBound is higher than the number of levels in the sort key. |
U_STANDARD_ERROR_LIMIT
|
One more than the highest standard error code. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_STATE_OLD_WARNING
|
ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading |
U_STATE_TOO_OLD_ERROR
|
ICU cannot construct a service from this state, as it is no longer supported. |
U_STRINGPREP_CHECK_BIDI_ERROR
|
|
U_STRINGPREP_PROHIBITED_ERROR
|
|
U_STRINGPREP_UNASSIGNED_ERROR
|
|
U_STRING_NOT_TERMINATED_WARNING
|
An output string could not be NUL-terminated because output length==destCapacity. |
U_TOO_MANY_ALIASES_ERROR
|
There are too many aliases in the path to the requested resource. It is very possible that a circular alias definition has occurred |
U_TRAILING_BACKSLASH
|
A dangling backslash. |
U_TRUNCATED_CHAR_FOUND
|
Character conversion: Incomplete input sequence. |
U_UNCLOSED_SEGMENT
|
A closing ')' is missing. |
U_UNDEFINED_KEYWORD
|
Undefined Plural keyword. |
U_UNDEFINED_SEGMENT_REFERENCE
|
A segment reference does not correspond to a defined segment. |
U_UNDEFINED_VARIABLE
|
A variable reference does not correspond to a defined variable. |
U_UNEXPECTED_TOKEN
|
Syntax error in format pattern. |
U_UNMATCHED_BRACES
|
Braces do not match in message pattern. |
U_UNQUOTED_SPECIAL
|
A special character was not quoted or escaped. |
U_UNSUPPORTED_ATTRIBUTE
|
UNUSED as of ICU 2.4. |
U_UNSUPPORTED_ERROR
|
Requested operation not supported in current context. |
U_UNSUPPORTED_ESCAPE_SEQUENCE
|
ISO-2022 unsupported escape sequence. |
U_UNSUPPORTED_PROPERTY
|
UNUSED as of ICU 2.4. |
U_UNTERMINATED_QUOTE
|
A closing single quote is missing. |
U_USELESS_COLLATOR_ERROR
|
Collator is options only and no base is specified. |
U_USING_DEFAULT_WARNING
|
A resource bundle lookup returned a result from the root locale (not an error) |
U_USING_FALLBACK_WARNING
|
A resource bundle lookup returned a fallback result (not an error) |
U_VARIABLE_RANGE_EXHAUSTED
|
Too many stand-ins generated for the given variable range. |
U_VARIABLE_RANGE_OVERLAP
|
The variable range overlaps characters used in rules. |
U_ZERO_ERROR
|
No error, no warning. |
UGraphemeClusterBreak
UGraphemeClusterBreak
Grapheme Cluster Break constants.
See also: UCHAR_GRAPHEME_CLUSTER_BREAK
Properties | |
---|---|
U_GCB_CONTROL
|
|
U_GCB_COUNT
|
One more than the highest normal UGraphemeClusterBreak value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_GCB_CR
|
|
U_GCB_EXTEND
|
|
U_GCB_E_BASE
|
|
U_GCB_E_BASE_GAZ
|
|
U_GCB_E_MODIFIER
|
|
U_GCB_GLUE_AFTER_ZWJ
|
|
U_GCB_L
|
|
U_GCB_LF
|
|
U_GCB_LV
|
|
U_GCB_LVT
|
|
U_GCB_OTHER
|
|
U_GCB_PREPEND
|
|
U_GCB_REGIONAL_INDICATOR
|
|
U_GCB_SPACING_MARK
|
|
U_GCB_T
|
|
U_GCB_V
|
|
U_GCB_ZWJ
|
UHangulSyllableType
UHangulSyllableType
Hangul Syllable Type constants.
See also: UCHAR_HANGUL_SYLLABLE_TYPE
Properties | |
---|---|
U_HST_COUNT
|
One more than the highest normal UHangulSyllableType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_HST_LEADING_JAMO
|
|
U_HST_LVT_SYLLABLE
|
|
U_HST_LV_SYLLABLE
|
|
U_HST_NOT_APPLICABLE
|
|
U_HST_TRAILING_JAMO
|
|
U_HST_VOWEL_JAMO
|
UIdentifierStatus
UIdentifierStatus
Identifier Status constants.
See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
See also: UCHAR_IDENTIFIER_STATUS
Properties | |
---|---|
U_ID_STATUS_ALLOWED
|
|
U_ID_STATUS_RESTRICTED
|
UIdentifierType
UIdentifierType
Identifier Type constants.
See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
See also: UCHAR_IDENTIFIER_TYPE
UIndicPositionalCategory
UIndicPositionalCategory
Indic Positional Category constants.
See also: UCHAR_INDIC_POSITIONAL_CATEGORY
UIndicSyllabicCategory
UIndicSyllabicCategory
Indic Syllabic Category constants.
See also: UCHAR_INDIC_SYLLABIC_CATEGORY
UJoiningGroup
UJoiningGroup
Joining Group constants.
See also: UCHAR_JOINING_GROUP
Properties | |
---|---|
U_JG_AFRICAN_FEH
|
|
U_JG_AFRICAN_NOON
|
|
U_JG_AFRICAN_QAF
|
|
U_JG_AIN
|
|
U_JG_ALAPH
|
|
U_JG_ALEF
|
|
U_JG_BEH
|
|
U_JG_BETH
|
|
U_JG_BURUSHASKI_YEH_BARREE
|
|
U_JG_COUNT
|
One more than the highest normal UJoiningGroup value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_JG_DAL
|
|
U_JG_DALATH_RISH
|
|
U_JG_E
|
|
U_JG_FARSI_YEH
|
|
U_JG_FE
|
|
U_JG_FEH
|
|
U_JG_FINAL_SEMKATH
|
|
U_JG_GAF
|
|
U_JG_GAMAL
|
|
U_JG_HAH
|
|
U_JG_HAMZA_ON_HEH_GOAL
|
|
U_JG_HANIFI_ROHINGYA_KINNA_YA
|
|
U_JG_HANIFI_ROHINGYA_PA
|
|
U_JG_HE
|
|
U_JG_HEH
|
|
U_JG_HEH_GOAL
|
|
U_JG_HETH
|
|
U_JG_KAF
|
|
U_JG_KAPH
|
|
U_JG_KHAPH
|
|
U_JG_KNOTTED_HEH
|
|
U_JG_LAM
|
|
U_JG_LAMADH
|
|
U_JG_MALAYALAM_BHA
|
|
U_JG_MALAYALAM_JA
|
|
U_JG_MALAYALAM_LLA
|
|
U_JG_MALAYALAM_LLLA
|
|
U_JG_MALAYALAM_NGA
|
|
U_JG_MALAYALAM_NNA
|
|
U_JG_MALAYALAM_NNNA
|
|
U_JG_MALAYALAM_NYA
|
|
U_JG_MALAYALAM_RA
|
|
U_JG_MALAYALAM_SSA
|
|
U_JG_MALAYALAM_TTA
|
|
U_JG_MANICHAEAN_ALEPH
|
|
U_JG_MANICHAEAN_AYIN
|
|
U_JG_MANICHAEAN_BETH
|
|
U_JG_MANICHAEAN_DALETH
|
|
U_JG_MANICHAEAN_DHAMEDH
|
|
U_JG_MANICHAEAN_FIVE
|
|
U_JG_MANICHAEAN_GIMEL
|
|
U_JG_MANICHAEAN_HETH
|
|
U_JG_MANICHAEAN_HUNDRED
|
|
U_JG_MANICHAEAN_KAPH
|
|
U_JG_MANICHAEAN_LAMEDH
|
|
U_JG_MANICHAEAN_MEM
|
|
U_JG_MANICHAEAN_NUN
|
|
U_JG_MANICHAEAN_ONE
|
|
U_JG_MANICHAEAN_PE
|
|
U_JG_MANICHAEAN_QOPH
|
|
U_JG_MANICHAEAN_RESH
|
|
U_JG_MANICHAEAN_SADHE
|
|
U_JG_MANICHAEAN_SAMEKH
|
|
U_JG_MANICHAEAN_TAW
|
|
U_JG_MANICHAEAN_TEN
|
|
U_JG_MANICHAEAN_TETH
|
|
U_JG_MANICHAEAN_THAMEDH
|
|
U_JG_MANICHAEAN_TWENTY
|
|
U_JG_MANICHAEAN_WAW
|
|
U_JG_MANICHAEAN_YODH
|
|
U_JG_MANICHAEAN_ZAYIN
|
|
U_JG_MEEM
|
|
U_JG_MIM
|
|
U_JG_NOON
|
|
U_JG_NO_JOINING_GROUP
|
|
U_JG_NUN
|
|
U_JG_NYA
|
|
U_JG_PE
|
|
U_JG_QAF
|
|
U_JG_QAPH
|
|
U_JG_REH
|
|
U_JG_REVERSED_PE
|
|
U_JG_ROHINGYA_YEH
|
|
U_JG_SAD
|
|
U_JG_SADHE
|
|
U_JG_SEEN
|
|
U_JG_SEMKATH
|
|
U_JG_SHIN
|
|
U_JG_STRAIGHT_WAW
|
|
U_JG_SWASH_KAF
|
|
U_JG_SYRIAC_WAW
|
|
U_JG_TAH
|
|
U_JG_TAW
|
|
U_JG_TEH_MARBUTA
|
|
U_JG_TEH_MARBUTA_GOAL
|
|
U_JG_TETH
|
|
U_JG_THIN_YEH
|
|
U_JG_VERTICAL_TAIL
|
|
U_JG_WAW
|
|
U_JG_YEH
|
|
U_JG_YEH_BARREE
|
|
U_JG_YEH_WITH_TAIL
|
|
U_JG_YUDH
|
|
U_JG_YUDH_HE
|
|
U_JG_ZAIN
|
|
U_JG_ZHAIN
|
UJoiningType
UJoiningType
Joining Type constants.
See also: UCHAR_JOINING_TYPE
Properties | |
---|---|
U_JT_COUNT
|
One more than the highest normal UJoiningType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_JT_DUAL_JOINING
|
|
U_JT_JOIN_CAUSING
|
|
U_JT_LEFT_JOINING
|
|
U_JT_NON_JOINING
|
|
U_JT_RIGHT_JOINING
|
|
U_JT_TRANSPARENT
|
ULayoutType
ULayoutType
ULineBreak
ULineBreak
Line Break constants.
See also: UCHAR_LINE_BREAK
Properties | |
---|---|
U_LB_AKSARA
|
|
U_LB_AKSARA_PREBASE
|
|
U_LB_AKSARA_START
|
|
U_LB_ALPHABETIC
|
|
U_LB_AMBIGUOUS
|
|
U_LB_BREAK_AFTER
|
|
U_LB_BREAK_BEFORE
|
|
U_LB_BREAK_BOTH
|
|
U_LB_BREAK_SYMBOLS
|
|
U_LB_CARRIAGE_RETURN
|
|
U_LB_CLOSE_PARENTHESIS
|
|
U_LB_CLOSE_PUNCTUATION
|
|
U_LB_COMBINING_MARK
|
|
U_LB_COMPLEX_CONTEXT
|
|
U_LB_CONDITIONAL_JAPANESE_STARTER
|
|
U_LB_CONTINGENT_BREAK
|
|
U_LB_COUNT
|
One more than the highest normal ULineBreak value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_LB_EXCLAMATION
|
|
U_LB_E_BASE
|
|
U_LB_E_MODIFIER
|
|
U_LB_GLUE
|
|
U_LB_H2
|
|
U_LB_H3
|
|
U_LB_HEBREW_LETTER
|
|
U_LB_HYPHEN
|
|
U_LB_IDEOGRAPHIC
|
|
U_LB_INFIX_NUMERIC
|
|
U_LB_INSEPARABLE
|
Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0. |
U_LB_INSEPERABLE
|
|
U_LB_JL
|
|
U_LB_JT
|
|
U_LB_JV
|
|
U_LB_LINE_FEED
|
|
U_LB_MANDATORY_BREAK
|
|
U_LB_NEXT_LINE
|
|
U_LB_NONSTARTER
|
|
U_LB_NUMERIC
|
|
U_LB_OPEN_PUNCTUATION
|
|
U_LB_POSTFIX_NUMERIC
|
|
U_LB_PREFIX_NUMERIC
|
|
U_LB_QUOTATION
|
|
U_LB_REGIONAL_INDICATOR
|
|
U_LB_SPACE
|
|
U_LB_SURROGATE
|
|
U_LB_UNKNOWN
|
|
U_LB_VIRAMA
|
|
U_LB_VIRAMA_FINAL
|
|
U_LB_WORD_JOINER
|
|
U_LB_ZWJ
|
|
U_LB_ZWSPACE
|
ULineBreakTag
ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.
The numeric values of all of these constants are stable (will not change).
ULocAvailableType
ULocAvailableType
Types for uloc_getAvailableByType and uloc_countAvailableByType.
ULocDataLocaleType
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested, valid or actual locale.
For example, a collator for "en_US_CALIFORNIA" was requested. In the current state of ICU (2.0), the requested locale is "en_US_CALIFORNIA", the valid locale is "en_US" (most specific locale supported by ICU) and the actual locale is "root" (the collation data comes unmodified from the UCA) The locale is considered supported by ICU if there is a core ICU bundle for that locale (although it may be empty).
Properties | |
---|---|
ULOC_ACTUAL_LOCALE
|
This is locale the data actually comes from. |
ULOC_DATA_LOCALE_TYPE_LIMIT
|
One more than the highest normal ULocDataLocaleType value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
ULOC_REQUESTED_LOCALE
|
This is the requested locale. Deprecated. ICU 2.8 |
ULOC_VALID_LOCALE
|
This is the most specific locale supported by ICU. |
UNormalization2Mode
UNormalization2Mode
Constants for normalization modes.
For details about standard Unicode normalization forms and about the algorithms which are also used with custom mapping tables see http://www.unicode.org/unicode/reports/tr15/
Properties | |
---|---|
UNORM2_COMPOSE
|
Decomposition followed by composition. Same as standard NFC when using an "nfc" instance. Same as standard NFKC when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/ |
UNORM2_COMPOSE_CONTIGUOUS
|
Compose only contiguously. Also known as "FCC" or "Fast C Contiguous". The result will often but not always be in NFC. The result will conform to FCD which is useful for processing. Not a standard Unicode normalization form. For details see http://www.unicode.org/notes/tn5/#FCC |
UNORM2_DECOMPOSE
|
Map, and reorder canonically. Same as standard NFD when using an "nfc" instance. Same as standard NFKD when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/ |
UNORM2_FCD
|
"Fast C or D" form. If a string is in this form, then further decomposition without reordering would yield the same form as DECOMPOSE. Text in "Fast C or D" form can be processed efficiently with data tables that are "canonically closed", that is, that provide equivalent data for equivalent text, without having to be fully normalized. Not a standard Unicode normalization form. Not a unique form: Different FCD strings can be canonically equivalent. For details see http://www.unicode.org/notes/tn5/#FCD |
UNormalizationCheckResult
UNormalizationCheckResult
Result values for normalization quick check functions.
For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
UNumericType
UNumericType
Numeric Type constants.
See also: UCHAR_NUMERIC_TYPE
Properties | |
---|---|
U_NT_COUNT
|
One more than the highest normal UNumericType value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_NT_DECIMAL
|
|
U_NT_DIGIT
|
|
U_NT_NONE
|
|
U_NT_NUMERIC
|
UProperty
UProperty
Selection constants for Unicode properties.
These constants are used in functions like u_hasBinaryProperty to select one of the Unicode properties.
The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
For details about the properties see UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available. Check u_getUnicodeVersion to be sure.
See also: u_hasBinaryProperty See also: u_getIntPropertyValue See also: u_getUnicodeVersion
Properties | |
---|---|
UCHAR_AGE
|
String property Age. Corresponds to u_charAge. |
UCHAR_ALPHABETIC
|
Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic |
UCHAR_ASCII_HEX_DIGIT
|
Binary property ASCII_Hex_Digit. 0-9 A-F a-f |
UCHAR_BASIC_EMOJI
|
Binary property of strings Basic_Emoji. |
UCHAR_BIDI_CLASS
|
Enumerated property Bidi_Class. Same as u_charDirection, returns UCharDirection values. |
UCHAR_BIDI_CONTROL
|
Binary property Bidi_Control. Format controls which have specific functions in the Bidi Algorithm. |
UCHAR_BIDI_MIRRORED
|
Binary property Bidi_Mirrored. Characters that may change display in RTL text. Same as u_isMirrored. See Bidi Algorithm, UTR 9. |
UCHAR_BIDI_MIRRORING_GLYPH
|
String property Bidi_Mirroring_Glyph. Corresponds to u_charMirror. |
UCHAR_BIDI_PAIRED_BRACKET
|
String property Bidi_Paired_Bracket (new in Unicode 6.3). Corresponds to u_getBidiPairedBracket. |
UCHAR_BIDI_PAIRED_BRACKET_TYPE
|
Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). Used in UAX #9: Unicode Bidirectional Algorithm (http://www.unicode.org/reports/tr9/) Returns UBidiPairedBracketType values. |
UCHAR_BINARY_LIMIT
|
One more than the last constant for binary Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_BINARY_START
|
First constant for binary Unicode properties. |
UCHAR_BLOCK
|
Enumerated property Block. Same as ublock_getCode, returns UBlockCode values. |
UCHAR_CANONICAL_COMBINING_CLASS
|
Enumerated property Canonical_Combining_Class. Same as u_getCombiningClass, returns 8-bit numeric values. |
UCHAR_CASED
|
Binary property Cased. For Lowercase, Uppercase and Titlecase characters. |
UCHAR_CASE_FOLDING
|
String property Case_Folding. Corresponds to u_strFoldCase in ustring.h. |
UCHAR_CASE_IGNORABLE
|
Binary property Case_Ignorable. Used in context-sensitive case mappings. |
UCHAR_CASE_SENSITIVE
|
Binary property Case_Sensitive. Either the source of a case mapping or in the target of a case mapping. Not the same as the general category Cased_Letter. |
UCHAR_CHANGES_WHEN_CASEFOLDED
|
Binary property Changes_When_Casefolded. |
UCHAR_CHANGES_WHEN_CASEMAPPED
|
Binary property Changes_When_Casemapped. |
UCHAR_CHANGES_WHEN_LOWERCASED
|
Binary property Changes_When_Lowercased. |
UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
|
Binary property Changes_When_NFKC_Casefolded. |
UCHAR_CHANGES_WHEN_TITLECASED
|
Binary property Changes_When_Titlecased. |
UCHAR_CHANGES_WHEN_UPPERCASED
|
Binary property Changes_When_Uppercased. |
UCHAR_DASH
|
Binary property Dash. Variations of dashes. |
UCHAR_DECOMPOSITION_TYPE
|
Enumerated property Decomposition_Type. Returns UDecompositionType values. |
UCHAR_DEFAULT_IGNORABLE_CODE_POINT
|
Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). Ignorable in most processing. <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) |
UCHAR_DEPRECATED
|
Binary property Deprecated (new in Unicode 3.2). The usage of deprecated characters is strongly discouraged. |
UCHAR_DIACRITIC
|
Binary property Diacritic. Characters that linguistically modify the meaning of another character to which they apply. |
UCHAR_DOUBLE_LIMIT
|
One more than the last constant for double Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_DOUBLE_START
|
First constant for double Unicode properties. |
UCHAR_EAST_ASIAN_WIDTH
|
Enumerated property East_Asian_Width. See http://www.unicode.org/reports/tr11/ Returns UEastAsianWidth values. |
UCHAR_EMOJI
|
Binary property Emoji. |
UCHAR_EMOJI_COMPONENT
|
Binary property Emoji_Component. |
UCHAR_EMOJI_KEYCAP_SEQUENCE
|
Binary property of strings Emoji_Keycap_Sequence. |
UCHAR_EMOJI_MODIFIER
|
Binary property Emoji_Modifier. |
UCHAR_EMOJI_MODIFIER_BASE
|
Binary property Emoji_Modifier_Base. |
UCHAR_EMOJI_PRESENTATION
|
Binary property Emoji_Presentation. |
UCHAR_EXTENDED_PICTOGRAPHIC
|
Binary property Extended_Pictographic. |
UCHAR_EXTENDER
|
Binary property Extender. Extend the value or shape of a preceding alphabetic character, e.g., length and iteration marks. |
UCHAR_FULL_COMPOSITION_EXCLUSION
|
Binary property Full_Composition_Exclusion. CompositionExclusions.txt+Singleton Decompositions+ Non-Starter Decompositions. |
UCHAR_GENERAL_CATEGORY
|
Enumerated property General_Category. Same as u_charType, returns UCharCategory values. |
UCHAR_GENERAL_CATEGORY_MASK
|
Bitmask property General_Category_Mask. This is the General_Category property returned as a bit mask. When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), returns bit masks for UCharCategory values where exactly one bit is set. When used with u_getPropertyValueName() and u_getPropertyValueEnum(), a multi-bit mask is used for sets of categories like "Letters". Mask values should be cast to uint32_t. |
UCHAR_GRAPHEME_BASE
|
Binary property Grapheme_Base (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ |
UCHAR_GRAPHEME_CLUSTER_BREAK
|
Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UGraphemeClusterBreak values. |
UCHAR_GRAPHEME_EXTEND
|
Binary property Grapheme_Extend (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ |
UCHAR_GRAPHEME_LINK
|
Binary property Grapheme_Link (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. |
UCHAR_HANGUL_SYLLABLE_TYPE
|
Enumerated property Hangul_Syllable_Type, new in Unicode 4. Returns UHangulSyllableType values. |
UCHAR_HEX_DIGIT
|
Binary property Hex_Digit. Characters commonly used for hexadecimal numbers. |
UCHAR_HYPHEN
|
Binary property Hyphen. Dashes used to mark connections between pieces of words, plus the Katakana middle dot. |
UCHAR_IDENTIFIER_STATUS
|
Enumerated property Identifier_Status. Used for UTS #39 General Security Profile for Identifiers (https://www.unicode.org/reports/tr39/#General_Security_Profile). |
UCHAR_IDENTIFIER_TYPE
|
Miscellaneous property Identifier_Type. Used for UTS #39 General Security Profile for Identifiers (https://www.unicode.org/reports/tr39/#General_Security_Profile). Corresponds to u_hasIDType() and u_getIDTypes(). Each code point maps to a set of UIdentifierType values. See also:u_hasIDType See also:u_getIDTypes |
UCHAR_IDEOGRAPHIC
|
Binary property Ideographic. CJKV ideographs. |
UCHAR_IDS_BINARY_OPERATOR
|
Binary property IDS_Binary_Operator (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences. |
UCHAR_IDS_TRINARY_OPERATOR
|
Binary property IDS_Trinary_Operator (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences. |
UCHAR_IDS_UNARY_OPERATOR
|
Binary property IDS_Unary_Operator. For programmatic determination of Ideographic Description Sequences. |
UCHAR_ID_COMPAT_MATH_CONTINUE
|
Binary property ID_Compat_Math_Continue. Used in mathematical identifier profile in UAX #31. |
UCHAR_ID_COMPAT_MATH_START
|
Binary property ID_Compat_Math_Start. Used in mathematical identifier profile in UAX #31. |
UCHAR_ID_CONTINUE
|
Binary property ID_Continue. Characters that can continue an identifier. DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." ID_Start+Mn+Mc+Nd+Pc |
UCHAR_ID_START
|
Binary property ID_Start. Characters that can start an identifier. Lu+Ll+Lt+Lm+Lo+Nl |
UCHAR_INDIC_POSITIONAL_CATEGORY
|
Enumerated property Indic_Positional_Category. New in Unicode 6.0 as provisional property Indic_Matra_Category; renamed and changed to informative in Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt |
UCHAR_INDIC_SYLLABIC_CATEGORY
|
Enumerated property Indic_Syllabic_Category. New in Unicode 6.0 as provisional; informative since Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt |
UCHAR_INT_LIMIT
|
One more than the last constant for enumerated/integer Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_INT_START
|
First constant for enumerated/integer Unicode properties. |
UCHAR_INVALID_CODE
|
Represents a nonexistent or invalid property or property value. |
UCHAR_ISO_COMMENT
|
Deprecated string property ISO_Comment. Corresponds to u_getISOComment. Deprecated. ICU 49 |
UCHAR_JOINING_GROUP
|
Enumerated property Joining_Group. Returns UJoiningGroup values. |
UCHAR_JOINING_TYPE
|
Enumerated property Joining_Type. Returns UJoiningType values. |
UCHAR_JOIN_CONTROL
|
Binary property Join_Control. Format controls for cursive joining and ligation. |
UCHAR_LEAD_CANONICAL_COMBINING_CLASS
|
Enumerated property Lead_Canonical_Combining_Class. ICU-specific property for the ccc of the first code point of the decomposition, or lccc(c)=ccc(NFD(c)[0]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. |
UCHAR_LINE_BREAK
|
Enumerated property Line_Break. Returns ULineBreak values. |
UCHAR_LOGICAL_ORDER_EXCEPTION
|
Binary property Logical_Order_Exception (new in Unicode 3.2). Characters that do not use logical order and require special handling in most processing. |
UCHAR_LOWERCASE
|
Binary property Lowercase. Same as u_isULowercase, different from u_islower. Ll+Other_Lowercase |
UCHAR_LOWERCASE_MAPPING
|
String property Lowercase_Mapping. Corresponds to u_strToLower in ustring.h. |
UCHAR_MASK_LIMIT
|
One more than the last constant for bit-mask Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_MASK_START
|
First constant for bit-mask Unicode properties. |
UCHAR_MATH
|
Binary property Math. Sm+Other_Math |
UCHAR_NAME
|
String property Name. Corresponds to u_charName. |
UCHAR_NFC_INERT
|
Binary property NFC_Inert. ICU-specific property for characters that are inert under NFC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method. |
UCHAR_NFC_QUICK_CHECK
|
Enumerated property NFC_Quick_Check. Returns UNormalizationCheckResult values. |
UCHAR_NFD_INERT
|
Binary property NFD_Inert. ICU-specific property for characters that are inert under NFD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method. |
UCHAR_NFD_QUICK_CHECK
|
Enumerated property NFD_Quick_Check. Returns UNormalizationCheckResult values. |
UCHAR_NFKC_INERT
|
Binary property NFKC_Inert. ICU-specific property for characters that are inert under NFKC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method. |
UCHAR_NFKC_QUICK_CHECK
|
Enumerated property NFKC_Quick_Check. Returns UNormalizationCheckResult values. |
UCHAR_NFKD_INERT
|
Binary property NFKD_Inert. ICU-specific property for characters that are inert under NFKD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method. |
UCHAR_NFKD_QUICK_CHECK
|
Enumerated property NFKD_Quick_Check. Returns UNormalizationCheckResult values. |
UCHAR_NONCHARACTER_CODE_POINT
|
Binary property Noncharacter_Code_Point. Code points that are explicitly defined as illegal for the encoding of characters. |
UCHAR_NUMERIC_TYPE
|
Enumerated property Numeric_Type. Returns UNumericType values. |
UCHAR_NUMERIC_VALUE
|
Double property Numeric_Value. Corresponds to u_getNumericValue. |
UCHAR_OTHER_PROPERTY_LIMIT
|
One more than the last constant for Unicode properties with unusual value types. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_OTHER_PROPERTY_START
|
First constant for Unicode properties with unusual value types. |
UCHAR_PATTERN_SYNTAX
|
Binary property Pattern_Syntax (new in Unicode 4.1). See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/) |
UCHAR_PATTERN_WHITE_SPACE
|
Binary property Pattern_White_Space (new in Unicode 4.1). See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/) |
UCHAR_POSIX_ALNUM
|
Binary property alnum (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_POSIX_BLANK
|
Binary property blank (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_POSIX_GRAPH
|
Binary property graph (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_POSIX_PRINT
|
Binary property print (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_POSIX_XDIGIT
|
Binary property xdigit (a C/POSIX character class). Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation. |
UCHAR_PREPENDED_CONCATENATION_MARK
|
Binary property Prepended_Concatenation_Mark. |
UCHAR_QUOTATION_MARK
|
Binary property Quotation_Mark. |
UCHAR_RADICAL
|
Binary property Radical (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences. |
UCHAR_REGIONAL_INDICATOR
|
Binary property Regional_Indicator. |
UCHAR_RGI_EMOJI
|
Binary property of strings RGI_Emoji. |
UCHAR_RGI_EMOJI_FLAG_SEQUENCE
|
Binary property of strings RGI_Emoji_Flag_Sequence. |
UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE
|
Binary property of strings RGI_Emoji_Modifier_Sequence. |
UCHAR_RGI_EMOJI_TAG_SEQUENCE
|
Binary property of strings RGI_Emoji_Tag_Sequence. |
UCHAR_RGI_EMOJI_ZWJ_SEQUENCE
|
Binary property of strings RGI_Emoji_ZWJ_Sequence. |
UCHAR_SCRIPT
|
Enumerated property Script. Same as uscript_getScript, returns UScriptCode values. |
UCHAR_SCRIPT_EXTENSIONS
|
Miscellaneous property Script_Extensions (new in Unicode 6.0). Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/. Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. |
UCHAR_SEGMENT_STARTER
|
Binary Property Segment_Starter. ICU-specific property for characters that are starters in terms of Unicode normalization and combining character sequences. They have ccc=0 and do not occur in non-initial position of the canonical decomposition of any character (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). ICU uses this property for segmenting a string for generating a set of canonically equivalent strings, e.g. for canonical closure while processing collation tailoring rules. |
UCHAR_SENTENCE_BREAK
|
Enumerated property Sentence_Break (new in Unicode 4.1). Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns USentenceBreak values. |
UCHAR_SIMPLE_CASE_FOLDING
|
String property Simple_Case_Folding. Corresponds to u_foldCase. |
UCHAR_SIMPLE_LOWERCASE_MAPPING
|
String property Simple_Lowercase_Mapping. Corresponds to u_tolower. |
UCHAR_SIMPLE_TITLECASE_MAPPING
|
String property Simple_Titlecase_Mapping. Corresponds to u_totitle. |
UCHAR_SIMPLE_UPPERCASE_MAPPING
|
String property Simple_Uppercase_Mapping. Corresponds to u_toupper. |
UCHAR_SOFT_DOTTED
|
Binary property Soft_Dotted (new in Unicode 3.2). Characters with a "soft dot", like i or j. An accent placed on these characters causes the dot to disappear. |
UCHAR_STRING_LIMIT
|
One more than the last constant for string Unicode properties. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
UCHAR_STRING_START
|
First constant for string Unicode properties. |
UCHAR_S_TERM
|
Binary property STerm (new in Unicode 4.0.1). Sentence Terminal. Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) |
UCHAR_TERMINAL_PUNCTUATION
|
Binary property Terminal_Punctuation. Punctuation characters that generally mark the end of textual units. |
UCHAR_TITLECASE_MAPPING
|
String property Titlecase_Mapping. Corresponds to u_strToTitle in ustring.h. |
UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
|
Enumerated property Trail_Canonical_Combining_Class. ICU-specific property for the ccc of the last code point of the decomposition, or tccc(c)=ccc(NFD(c)[last]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. |
UCHAR_UNICODE_1_NAME
|
String property Unicode_1_Name. This property is of little practical value. Beginning with ICU 49, ICU APIs return an empty string for this property. Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). Deprecated. ICU 49 |
UCHAR_UNIFIED_IDEOGRAPH
|
Binary property Unified_Ideograph (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences. |
UCHAR_UPPERCASE
|
Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. Lu+Other_Uppercase |
UCHAR_UPPERCASE_MAPPING
|
String property Uppercase_Mapping. Corresponds to u_strToUpper in ustring.h. |
UCHAR_VARIATION_SELECTOR
|
Binary property Variation_Selector (new in Unicode 4.0.1). Indicates all those characters that qualify as Variation Selectors. For details on the behavior of these characters, see StandardizedVariants.html and 15.6 Variation Selectors. |
UCHAR_VERTICAL_ORIENTATION
|
Enumerated property Vertical_Orientation. Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). New as a UCD property in Unicode 10.0. |
UCHAR_WHITE_SPACE
|
Binary property White_Space. Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. Space characters+TAB+CR+LF-ZWSP-ZWNBSP |
UCHAR_WORD_BREAK
|
Enumerated property Word_Break (new in Unicode 4.1). Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UWordBreakValues values. |
UCHAR_XID_CONTINUE
|
Binary property XID_Continue. ID_Continue modified to allow closure under normalization forms NFKC and NFKD. |
UCHAR_XID_START
|
Binary property XID_Start. ID_Start modified to allow closure under normalization forms NFKC and NFKD. |
UPropertyNameChoice
UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
These selectors are used to choose which name is returned for a given property or value. All properties and values have a long name. Most have a short name, but some do not. Unicode allows for additional names, beyond the long and short name, which would be indicated by U_LONG_PROPERTY_NAME + i, where i=1, 2,...
See also: u_getPropertyName() See also: u_getPropertyValueName()
Properties | |
---|---|
U_LONG_PROPERTY_NAME
|
|
U_PROPERTY_NAME_CHOICE_COUNT
|
One more than the highest normal UPropertyNameChoice value. Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_SHORT_PROPERTY_NAME
|
UScriptCode
UScriptCode
Constants for ISO 15924 script codes.
The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.
Properties | |
---|---|
USCRIPT_ADLAM
|
|
USCRIPT_AFAKA
|
|
USCRIPT_AHOM
|
|
USCRIPT_ANATOLIAN_HIEROGLYPHS
|
|
USCRIPT_ARABIC
|
|
USCRIPT_ARABIC_NASTALIQ
|
|
USCRIPT_ARMENIAN
|
|
USCRIPT_AVESTAN
|
|
USCRIPT_BALINESE
|
|
USCRIPT_BAMUM
|
|
USCRIPT_BASSA_VAH
|
|
USCRIPT_BATAK
|
|
USCRIPT_BENGALI
|
|
USCRIPT_BHAIKSUKI
|
|
USCRIPT_BLISSYMBOLS
|
|
USCRIPT_BOOK_PAHLAVI
|
|
USCRIPT_BOPOMOFO
|
|
USCRIPT_BRAHMI
|
|
USCRIPT_BRAILLE
|
|
USCRIPT_BUGINESE
|
|
USCRIPT_BUHID
|
|
USCRIPT_CANADIAN_ABORIGINAL
|
Canadian_Aboriginal script. |
USCRIPT_CARIAN
|
|
USCRIPT_CAUCASIAN_ALBANIAN
|
|
USCRIPT_CHAKMA
|
|
USCRIPT_CHAM
|
|
USCRIPT_CHEROKEE
|
|
USCRIPT_CHORASMIAN
|
|
USCRIPT_CIRTH
|
|
USCRIPT_CODE_LIMIT
|
One more than the highest normal UScriptCode value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
USCRIPT_COMMON
|
|
USCRIPT_COPTIC
|
|
USCRIPT_CUNEIFORM
|
|
USCRIPT_CYPRIOT
|
|
USCRIPT_CYPRO_MINOAN
|
|
USCRIPT_CYRILLIC
|
|
USCRIPT_DEMOTIC_EGYPTIAN
|
|
USCRIPT_DESERET
|
|
USCRIPT_DEVANAGARI
|
|
USCRIPT_DIVES_AKURU
|
|
USCRIPT_DOGRA
|
|
USCRIPT_DUPLOYAN
|
|
USCRIPT_DUPLOYAN_SHORTAND
|
Deprecated. ICU 54 Typo, use USCRIPT_DUPLOYAN |
USCRIPT_EASTERN_SYRIAC
|
|
USCRIPT_EGYPTIAN_HIEROGLYPHS
|
|
USCRIPT_ELBASAN
|
|
USCRIPT_ELYMAIC
|
|
USCRIPT_ESTRANGELO_SYRIAC
|
|
USCRIPT_ETHIOPIC
|
|
USCRIPT_GEORGIAN
|
|
USCRIPT_GLAGOLITIC
|
|
USCRIPT_GOTHIC
|
|
USCRIPT_GRANTHA
|
|
USCRIPT_GREEK
|
|
USCRIPT_GUJARATI
|
|
USCRIPT_GUNJALA_GONDI
|
|
USCRIPT_GURMUKHI
|
|
USCRIPT_HAN
|
|
USCRIPT_HANGUL
|
|
USCRIPT_HANIFI_ROHINGYA
|
|
USCRIPT_HANUNOO
|
|
USCRIPT_HAN_WITH_BOPOMOFO
|
|
USCRIPT_HARAPPAN_INDUS
|
|
USCRIPT_HATRAN
|
|
USCRIPT_HEBREW
|
|
USCRIPT_HIERATIC_EGYPTIAN
|
|
USCRIPT_HIRAGANA
|
|
USCRIPT_IMPERIAL_ARAMAIC
|
|
USCRIPT_INHERITED
|
|
USCRIPT_INSCRIPTIONAL_PAHLAVI
|
|
USCRIPT_INSCRIPTIONAL_PARTHIAN
|
|
USCRIPT_INVALID_CODE
|
|
USCRIPT_JAMO
|
|
USCRIPT_JAPANESE
|
|
USCRIPT_JAVANESE
|
|
USCRIPT_JURCHEN
|
|
USCRIPT_KAITHI
|
|
USCRIPT_KANNADA
|
|
USCRIPT_KATAKANA
|
|
USCRIPT_KATAKANA_OR_HIRAGANA
|
New script code in Unicode 4.0.1. |
USCRIPT_KAWI
|
|
USCRIPT_KAYAH_LI
|
|
USCRIPT_KHAROSHTHI
|
|
USCRIPT_KHITAN_SMALL_SCRIPT
|
|
USCRIPT_KHMER
|
|
USCRIPT_KHOJKI
|
|
USCRIPT_KHUDAWADI
|
|
USCRIPT_KHUTSURI
|
|
USCRIPT_KOREAN
|
|
USCRIPT_KPELLE
|
|
USCRIPT_LANNA
|
|
USCRIPT_LAO
|
|
USCRIPT_LATIN
|
|
USCRIPT_LATIN_FRAKTUR
|
|
USCRIPT_LATIN_GAELIC
|
|
USCRIPT_LEPCHA
|
|
USCRIPT_LIMBU
|
|
USCRIPT_LINEAR_A
|
|
USCRIPT_LINEAR_B
|
|
USCRIPT_LISU
|
|
USCRIPT_LOMA
|
|
USCRIPT_LYCIAN
|
|
USCRIPT_LYDIAN
|
|
USCRIPT_MAHAJANI
|
|
USCRIPT_MAKASAR
|
|
USCRIPT_MALAYALAM
|
|
USCRIPT_MANDAEAN
|
|
USCRIPT_MANDAIC
|
|
USCRIPT_MANICHAEAN
|
|
USCRIPT_MARCHEN
|
|
USCRIPT_MASARAM_GONDI
|
|
USCRIPT_MATHEMATICAL_NOTATION
|
|
USCRIPT_MAYAN_HIEROGLYPHS
|
|
USCRIPT_MEDEFAIDRIN
|
|
USCRIPT_MEITEI_MAYEK
|
|
USCRIPT_MENDE
|
Mende Kikakui. |
USCRIPT_MEROITIC
|
|
USCRIPT_MEROITIC_CURSIVE
|
|
USCRIPT_MEROITIC_HIEROGLYPHS
|
|
USCRIPT_MIAO
|
|
USCRIPT_MODI
|
|
USCRIPT_MONGOLIAN
|
|
USCRIPT_MOON
|
|
USCRIPT_MRO
|
|
USCRIPT_MULTANI
|
|
USCRIPT_MYANMAR
|
|
USCRIPT_NABATAEAN
|
|
USCRIPT_NAG_MUNDARI
|
|
USCRIPT_NAKHI_GEBA
|
|
USCRIPT_NANDINAGARI
|
|
USCRIPT_NEWA
|
|
USCRIPT_NEW_TAI_LUE
|
|
USCRIPT_NKO
|
|
USCRIPT_NUSHU
|
|
USCRIPT_NYIAKENG_PUACHUE_HMONG
|
|
USCRIPT_OGHAM
|
|
USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC
|
|
USCRIPT_OLD_HUNGARIAN
|
|
USCRIPT_OLD_ITALIC
|
|
USCRIPT_OLD_NORTH_ARABIAN
|
|
USCRIPT_OLD_PERMIC
|
|
USCRIPT_OLD_PERSIAN
|
|
USCRIPT_OLD_SOGDIAN
|
|
USCRIPT_OLD_SOUTH_ARABIAN
|
|
USCRIPT_OLD_UYGHUR
|
|
USCRIPT_OL_CHIKI
|
|
USCRIPT_ORIYA
|
|
USCRIPT_ORKHON
|
|
USCRIPT_OSAGE
|
|
USCRIPT_OSMANYA
|
|
USCRIPT_PAHAWH_HMONG
|
|
USCRIPT_PALMYRENE
|
|
USCRIPT_PAU_CIN_HAU
|
|
USCRIPT_PHAGS_PA
|
|
USCRIPT_PHOENICIAN
|
|
USCRIPT_PHONETIC_POLLARD
|
|
USCRIPT_PSALTER_PAHLAVI
|
|
USCRIPT_REJANG
|
|
USCRIPT_RONGORONGO
|
|
USCRIPT_RUNIC
|
|
USCRIPT_SAMARITAN
|
|
USCRIPT_SARATI
|
|
USCRIPT_SAURASHTRA
|
|
USCRIPT_SHARADA
|
|
USCRIPT_SHAVIAN
|
|
USCRIPT_SIDDHAM
|
|
USCRIPT_SIGN_WRITING
|
Sutton SignWriting. |
USCRIPT_SIMPLIFIED_HAN
|
|
USCRIPT_SINDHI
|
|
USCRIPT_SINHALA
|
|
USCRIPT_SOGDIAN
|
|
USCRIPT_SORA_SOMPENG
|
|
USCRIPT_SOYOMBO
|
|
USCRIPT_SUNDANESE
|
|
USCRIPT_SYLOTI_NAGRI
|
|
USCRIPT_SYMBOLS
|
|
USCRIPT_SYMBOLS_EMOJI
|
|
USCRIPT_SYRIAC
|
|
USCRIPT_TAGALOG
|
|
USCRIPT_TAGBANWA
|
|
USCRIPT_TAI_LE
|
|
USCRIPT_TAI_VIET
|
|
USCRIPT_TAKRI
|
|
USCRIPT_TAMIL
|
|
USCRIPT_TANGSA
|
|
USCRIPT_TANGUT
|
|
USCRIPT_TELUGU
|
|
USCRIPT_TENGWAR
|
|
USCRIPT_THAANA
|
|
USCRIPT_THAI
|
|
USCRIPT_TIBETAN
|
|
USCRIPT_TIFINAGH
|
|
USCRIPT_TIRHUTA
|
|
USCRIPT_TOTO
|
|
USCRIPT_TRADITIONAL_HAN
|
|
USCRIPT_UCAS
|
Canadian_Aboriginal script (alias). |
USCRIPT_UGARITIC
|
|
USCRIPT_UNKNOWN
|
|
USCRIPT_UNWRITTEN_LANGUAGES
|
|
USCRIPT_VAI
|
|
USCRIPT_VISIBLE_SPEECH
|
|
USCRIPT_VITHKUQI
|
|
USCRIPT_WANCHO
|
|
USCRIPT_WARANG_CITI
|
|
USCRIPT_WESTERN_SYRIAC
|
|
USCRIPT_WOLEAI
|
|
USCRIPT_YEZIDI
|
|
USCRIPT_YI
|
|
USCRIPT_ZANABAZAR_SQUARE
|
UScriptUsage
UScriptUsage
Script usage constants.
See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
USentenceBreak
USentenceBreak
Sentence Break constants.
See also: UCHAR_SENTENCE_BREAK
Properties | |
---|---|
U_SB_ATERM
|
|
U_SB_CLOSE
|
|
U_SB_COUNT
|
One more than the highest normal USentenceBreak value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_SB_CR
|
|
U_SB_EXTEND
|
|
U_SB_FORMAT
|
|
U_SB_LF
|
|
U_SB_LOWER
|
|
U_SB_NUMERIC
|
|
U_SB_OLETTER
|
|
U_SB_OTHER
|
|
U_SB_SCONTINUE
|
|
U_SB_SEP
|
|
U_SB_SP
|
|
U_SB_STERM
|
|
U_SB_UPPER
|
USentenceBreakTag
USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
A range of values is defined for each category of sentence, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.
The numeric values of all of these constants are stable (will not change).
UTransDirection
UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
Specified when a transliterator is opened. An "A-B" transliterator transliterates A to B when operating in the forward direction, and B to A when operating in the reverse direction.
UVerticalOrientation
UVerticalOrientation
Vertical Orientation constants.
See also: UCHAR_VERTICAL_ORIENTATION
Properties | |
---|---|
U_VO_ROTATED
|
|
U_VO_TRANSFORMED_ROTATED
|
|
U_VO_TRANSFORMED_UPRIGHT
|
|
U_VO_UPRIGHT
|
UWordBreak
UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.
The numeric values of all of these constants are stable (will not change).
UWordBreakValues
UWordBreakValues
Word Break constants.
(UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
See also: UCHAR_WORD_BREAK
Properties | |
---|---|
U_WB_ALETTER
|
|
U_WB_COUNT
|
One more than the highest normal UWordBreakValues value. The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK). Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420. |
U_WB_CR
|
|
U_WB_DOUBLE_QUOTE
|
|
U_WB_EXTEND
|
|
U_WB_EXTENDNUMLET
|
|
U_WB_E_BASE
|
|
U_WB_E_BASE_GAZ
|
|
U_WB_E_MODIFIER
|
|
U_WB_FORMAT
|
|
U_WB_GLUE_AFTER_ZWJ
|
|
U_WB_HEBREW_LETTER
|
|
U_WB_KATAKANA
|
|
U_WB_LF
|
|
U_WB_MIDLETTER
|
|
U_WB_MIDNUM
|
|
U_WB_MIDNUMLET
|
|
U_WB_NEWLINE
|
|
U_WB_NUMERIC
|
|
U_WB_OTHER
|
|
U_WB_REGIONAL_INDICATOR
|
|
U_WB_SINGLE_QUOTE
|
|
U_WB_WSEGSPACE
|
|
U_WB_ZWJ
|
Typedefs
OldUChar
uint16_t OldUChar
Default ICU 58 definition of UChar.
A base type for UTF-16 code units and pointers. Unsigned 16-bit integer.
Define OldUChar to be wchar_t if that is 16 bits wide. If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
This makes the definition of OldUChar platform-dependent but allows direct string type compatibility with platforms with 16-bit wchar_t types.
This is how UChar was defined in ICU 58, for transition convenience. Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. The current UChar responds to UCHAR_TYPE but OldUChar does not.
UBidiPairedBracketType
enum UBidiPairedBracketType UBidiPairedBracketType
Bidi Paired Bracket Type constants.
See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE
UBool
int8_t UBool
The ICU boolean type, a signed-byte integer.
ICU-specific for historical reasons: The C and C++ standards used to not define type bool. Also provides a fixed type definition, as opposed to type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
UBreakIteratorType
enum UBreakIteratorType UBreakIteratorType
The possible types of text boundaries.
UCPMap
struct UCPMap UCPMap
Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
See also:UCPTrie See also:UMutableCPTrie
UCPMapValueFilter
uint32_t U_CALLCONV UCPMapValueFilter(const void *context, uint32_t value)
Callback function type: Modifies a map value.
Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). The modified value will be returned by the getRange function.
Can be used to ignore some of the value bits, make a filter for one of several values, return a value index computed from the map value, etc.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
the modified value
|
UChar
char16_t UChar
The base type for UTF-16 code units and pointers.
Unsigned 16-bit integer. Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
UChar is configurable by defining the macro UCHAR_TYPE on the preprocessor or compiler command line: -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.) This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
The default is UChar=char16_t.
C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
In C, char16_t is a simple typedef of uint_least16_t. ICU requires uint_least16_t=uint16_t for data memory mapping. On macOS, char16_t is not available because the uchar.h standard header is missing.
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
UChar32 is a signed 32-bit integer (same as int32_t).
The Unicode code point range is 0..0x10ffff. All other values (negative or >=0x110000) are illegal as Unicode code points. They may be used as sentinel values to indicate "done", "error" or similar non-code point conditions.
Before ICU 2.4 (Jitterbug 2146), UChar32 was defined to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) or else to be uint32_t. That is, the definition of UChar32 was platform-dependent.
See also: U_SENTINEL
UCharCategory
enum UCharCategory UCharCategory
Data for enumerated Unicode general category types.
See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
UCharDirection
enum UCharDirection UCharDirection
This specifies the language directional property of a character set.
UCharEnumTypeRange
UBoolU_CALLCONV UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c The callback function can stop the enumeration by returning false.
Details
Parameters
context
start
limit
type
Returns
UCharNameChoice
enum UCharNameChoice UCharNameChoice
Selector constants for u_charName().
u_charName() returns the "modern" name of a Unicode character; or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646; or an "extended" name that gives each Unicode code point a unique name.
See also: u_charName
UColAttribute
enum UColAttribute UColAttribute
Attributes that collation service understands.
All the attributes can take UCOL_DEFAULT value, as well as the values specific to each one.
UColAttributeValue
enum UColAttributeValue UColAttributeValue
Enum containing attribute values for controlling collation behavior.
Here are all the allowable values. Not every attribute can take every value. The only universal value is UCOL_DEFAULT, which resets the attribute value to the predefined value for that locale
UColBoundMode
enum UColBoundMode UColBoundMode
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.
Underlying code depends on them having these numbers
UColReorderCode
enum UColReorderCode UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes.
These reordering codes are to be used in conjunction with the script codes. See also:ucol_getReorderCodesSee also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCode
UCollationResult
enum UCollationResult UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.
UCOL_EQUAL is returned if source string is compared to be equal to target string in the ucol_strcoll() method. UCOL_GREATER is returned if source string is compared to be greater than target string in the ucol_strcoll() method. See also:ucol_strcoll() Possible values for a comparison result
UCollationStrength
UColAttributeValue UCollationStrength
Base letter represents a primary difference.
Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary differences. Use this to set the strength of a Collator object. Example of primary difference, "abc" < "abd"
Diacritical differences on the same base letter represent a secondary difference. Set comparison level to UCOL_SECONDARY to ignore tertiary differences. Use this to set the strength of a Collator object. Example of secondary difference, "ä" >> "a".
Uppercase and lowercase versions of the same character represents a tertiary difference. Set comparison level to UCOL_TERTIARY to include all comparison differences. Use this to set the strength of a Collator object. Example of tertiary difference, "abc" <<< "ABC".
Two characters are considered "identical" when they have the same unicode spellings. UCOL_IDENTICAL. For example, "ä" == "ä".
UCollationStrength is also used to determine the strength of sort keys generated from UCollator objects These values can be now found in the UColAttributeValue enum.
UDate
double UDate
Date and Time data type.
This is a primitive data type that holds the date and time as the number of milliseconds since 1970-jan-01, 00:00 UTC. UTC leap seconds are ignored.
UDecompositionType
enum UDecompositionType UDecompositionType
Decomposition Type constants.
See also: UCHAR_DECOMPOSITION_TYPE
UEastAsianWidth
enum UEastAsianWidth UEastAsianWidth
East Asian Width constants.
See also: UCHAR_EAST_ASIAN_WIDTH See also: u_getIntPropertyValue
UEnumCharNamesFn
UBoolU_CALLCONV UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name.
If such a function returns false, then the enumeration is stopped.
See also: UCharNameChoice See also: u_enumCharNames
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
true if the enumeration should continue, false to stop it.
|
UErrorCode
enum UErrorCode UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Initialize the UErrorCode with U_ZERO_ERROR, and check for success or failure using U_SUCCESS() or U_FAILURE():
UErrorCode errorCode = U_ZERO_ERROR; // call ICU API that needs an error code parameter. if (U_FAILURE(errorCode)) { // An error occurred. Handle it here. }
C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a suitable subclass.
For more information, see: https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes
Note: By convention, ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode first test:
if (U_FAILURE(errorCode)) { return immediately; }
so that in a chain of such functions the first one that sets an error code causes the following ones to not perform any operations.
UGraphemeClusterBreak
enum UGraphemeClusterBreak UGraphemeClusterBreak
Grapheme Cluster Break constants.
See also: UCHAR_GRAPHEME_CLUSTER_BREAK
UHangulSyllableType
enum UHangulSyllableType UHangulSyllableType
Hangul Syllable Type constants.
See also: UCHAR_HANGUL_SYLLABLE_TYPE
UIdentifierStatus
enum UIdentifierStatus UIdentifierStatus
Identifier Status constants.
See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
See also: UCHAR_IDENTIFIER_STATUS
UIdentifierType
enum UIdentifierType UIdentifierType
Identifier Type constants.
See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
See also: UCHAR_IDENTIFIER_TYPE
UIndicPositionalCategory
enum UIndicPositionalCategory UIndicPositionalCategory
Indic Positional Category constants.
See also: UCHAR_INDIC_POSITIONAL_CATEGORY
UIndicSyllabicCategory
enum UIndicSyllabicCategory UIndicSyllabicCategory
Indic Syllabic Category constants.
See also: UCHAR_INDIC_SYLLABIC_CATEGORY
UJoiningGroup
enum UJoiningGroup UJoiningGroup
Joining Group constants.
See also: UCHAR_JOINING_GROUP
ULineBreakTag
enum ULineBreakTag ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.
The numeric values of all of these constants are stable (will not change).
ULocAvailableType
enum ULocAvailableType ULocAvailableType
Types for uloc_getAvailableByType and uloc_countAvailableByType.
ULocaleDisplayNames
struct ULocaleDisplayNames ULocaleDisplayNames
C typedef for struct ULocaleDisplayNames.
UNormalizationCheckResult
enum UNormalizationCheckResult UNormalizationCheckResult
Result values for normalization quick check functions.
For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
UParseError
struct UParseError UParseError
A UParseError struct is used to returned detailed information about parsing errors.
It is used by ICU parsing engines that parse long rules, patterns, or programs, where the text being parsed is long enough that more information than a UErrorCode is needed to localize the error.
The line, offset, and context fields are optional; parsing engines may choose not to use to use them.
The preContext and postContext strings include some part of the context surrounding the error. If the source text is "let for=7" and "for" is the error (e.g., because it is a reserved word), then some examples of what a parser might produce are the following:
preContext postContext "" "" The parser does not support context "let " "=7" Pre- and post-context only "let " "for=7" Pre- and post-context and error text "" "for" Error text only
Examples of engines which use UParseError (or may use it in the future) are Transliterator, RuleBasedBreakIterator, and RegexPattern.
UProperty
enum UProperty UProperty
Selection constants for Unicode properties.
These constants are used in functions like u_hasBinaryProperty to select one of the Unicode properties.
The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
For details about the properties see UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available. Check u_getUnicodeVersion to be sure.
See also: u_hasBinaryProperty See also: u_getIntPropertyValue See also: u_getUnicodeVersion
UPropertyNameChoice
enum UPropertyNameChoice UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
These selectors are used to choose which name is returned for a given property or value. All properties and values have a long name. Most have a short name, but some do not. Unicode allows for additional names, beyond the long and short name, which would be indicated by U_LONG_PROPERTY_NAME + i, where i=1, 2,...
See also: u_getPropertyName() See also: u_getPropertyValueName()
UReplaceable
void * UReplaceable
An opaque replaceable text object.
This will be manipulated only through the caller-supplied UReplaceableFunctor struct. Related to the C++ class Replaceable. This is currently only used in the Transliterator C API, see utrans.h .
UReplaceableCallbacks
struct UReplaceableCallbacks UReplaceableCallbacks
A set of function pointers that transliterators use to manipulate a UReplaceable.
The caller should supply the required functions to manipulate their text appropriately. Related to the C++ class Replaceable.
UScriptCode
enum UScriptCode UScriptCode
Constants for ISO 15924 script codes.
The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.
UScriptUsage
enum UScriptUsage UScriptUsage
Script usage constants.
See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
USentenceBreak
enum USentenceBreak USentenceBreak
Sentence Break constants.
See also: UCHAR_SENTENCE_BREAK
USentenceBreakTag
enum USentenceBreakTag USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
A range of values is defined for each category of sentence, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.
The numeric values of all of these constants are stable (will not change).
UTransDirection
enum UTransDirection UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
Specified when a transliterator is opened. An "A-B" transliterator transliterates A to B when operating in the forward direction, and B to A when operating in the reverse direction.
UTransPosition
struct UTransPosition UTransPosition
Position structure for utrans_transIncremental() incremental transliteration.
This structure defines two substrings of the text being transliterated. The first region, [contextStart, contextLimit), defines what characters the transliterator will read as context. The second region, [start, limit), defines what characters will actually be transliterated. The second region should be a subset of the first.
After a transliteration operation, some of the indices in this structure will be modified. See the field descriptions for details.
contextStart <= start <= limit <= contextLimit
Note: All index values in this structure must be at code point boundaries. That is, none of them may occur between two code units of a surrogate pair. If any index does split a surrogate pair, results are unspecified.
UTransliterator
void * UTransliterator
An opaque transliterator for use in C.
Open with utrans_openxxx() and close with utrans_close() when done. Equivalent to the C++ class Transliterator and its subclasses. See also:Transliterator
UVersionInfo
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).
UVerticalOrientation
enum UVerticalOrientation UVerticalOrientation
Vertical Orientation constants.
See also: UCHAR_VERTICAL_ORIENTATION
UWordBreak
enum UWordBreak UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.
The numeric values of all of these constants are stable (will not change).
UWordBreakValues
enum UWordBreakValues UWordBreakValues
Word Break constants.
(UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
See also: UCHAR_WORD_BREAK
Variables
context
U_CDECL_BEGIN typedef void * context
Functions
UChar
U_CDECL_BEGIN typedef UChar( U_CALLCONV *UNESCAPE_CHAR_AT )(int32_t offset
Callback function for u_unescapeAt() that returns a character of the source text given an offset and a context pointer.
The context pointer will be whatever is passed into u_unescapeAt().
See also:u_unescapeAt
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
the character represented by the escape sequence at offset
|
u_charAge
U_CAPI void U_EXPORT2 u_charAge( UChar32 c, UVersionInfo versionArray )
Get the "age" of the code point.
The "age" is the Unicode version when the code point was first designated (as a non-character or for Private Use) or assigned a character. This can be useful to avoid emitting code points to receiving processes that do not accept newer characters. The data is from the UCD file DerivedAge.txt.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
u_charDigitValue
U_CAPI int32_t U_EXPORT2 u_charDigitValue( UChar32 c )
Returns the decimal digit value of a decimal digit character.
Such characters have the general category "Nd" (decimal digit numbers) and a Numeric_Type of Decimal.
Unlike ICU releases before 2.6, no digit values are returned for any Han characters because Han number characters are often used with a special Chinese-style number format (with characters for powers of 10 in between) instead of in decimal-positional notation. Unicode 4 explicitly assigns Han number characters the Numeric_Type Numeric instead of Decimal. See Jitterbug 1483 for more details.
Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() for complete numeric Unicode properties.
See also: u_getNumericValue
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the decimal digit value of c, or -1 if c is not a decimal digit character
|
u_charDirection
U_CAPIUCharDirection U_EXPORT2 u_charDirection( UChar32 c )
Returns the bidirectional category value for the code point, which is used in the Unicode bidirectional algorithm (UAX #9 http://www.unicode.org/reports/tr9/).
Note that some unassigned code points have bidi values of R or AL because they are in blocks that are reserved for Right-To-Left scripts.
Same as java.lang.Character.getDirectionality()
See also: UCharDirection
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the bidirectional category (UCharDirection) value
|
u_charFromName
U_CAPIUChar32 U_EXPORT2 u_charFromName( UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode )
Find a Unicode character by its name and return its code point value.
The name is matched exactly and completely. If the name does not correspond to a code point, pErrorCode is set to U_INVALID_CHAR_FOUND
. A Unicode 1.0 name is matched only if it differs from the modern name. Unicode names are all uppercase. Extended names are lowercase followed by an uppercase hexadecimal number, and within angle brackets.
See also: UCharNameChoice See also: u_charName See also: u_enumCharNames
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
The Unicode value of the code point with the given name, or an undefined value if there is no such code point.
|
u_charMirror
U_CAPIUChar32 U_EXPORT2 u_charMirror( UChar32 c )
Maps the specified character to a "mirror-image" character.
For characters with the Bidi_Mirrored property, implementations sometimes need a "poor man's" mapping to another Unicode character (code point) such that the default glyph may serve as the mirror-image of the default glyph of the specified character. This is useful for text conversion to and from codepages with visual order, and for displays without glyph selection capabilities.
See also: UCHAR_BIDI_MIRRORED See also: u_isMirrored
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
another Unicode code point that may serve as a mirror-image substitute, or c itself if there is no such mapping or c does not have the Bidi_Mirrored property
|
u_charName
U_CAPI int32_t U_EXPORT2 u_charName( UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode )
Retrieve the name of a Unicode character.
Depending on nameChoice
, the character name written into the buffer is the "modern" name or the name that was defined in Unicode version 1.0. The name contains only "invariant" characters like A-Z, 0-9, space, and '-'. Unicode 1.0 names are only retrieved if they are different from the modern names and if the data file contains the data for them. gennames may or may not be called with a command line option to include 1.0 names in unames.dat.
See also: UCharNameChoice See also: u_charFromName See also: u_enumCharNames
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
The length of the name, or 0 if there is no name for this character. If the bufferLength is less than or equal to the length, then the buffer contains the truncated name and the returned length indicates the full length of the name. The length does not include the zero-termination.
|
u_charType
U_CAPI int8_t U_EXPORT2 u_charType( UChar32 c )
Returns the general category value for the code point.
Same as java.lang.Character.getType().
See also: UCharCategory
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the general category (UCharCategory) value
|
u_countChar32
U_CAPI int32_t U_EXPORT2 u_countChar32( const UChar *s, int32_t length )
Count Unicode code points in the length UChar code units of the string.
A code point may occupy either one or two UChar code units. Counting code points involves reading all code units.
This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
The number of code points in the specified code units.
|
u_digit
U_CAPI int32_t U_EXPORT2 u_digit( UChar32 ch, int8_t radix )
Returns the decimal digit value of the code point in the specified radix.
If the radix is not in the range 2<=radix<=36
or if the value of c
is not a valid digit in the specified radix, -1
is returned. A character is a valid digit if at least one of the following is true:
- The character has a decimal digit value. Such characters have the general category "Nd" (decimal digit numbers) and a Numeric_Type of Decimal. In this case the value is the character's decimal digit value.
- The character is one of the uppercase Latin letters
'A'
through'Z'
. In this case the value isc-'A'+10
. - The character is one of the lowercase Latin letters
'a'
through'z'
. In this case the value isch-'a'+10
. - Latin letters from both the ASCII range (0061..007A, 0041..005A) as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) are recognized.
Same as java.lang.Character.digit().
See also: UCHAR_NUMERIC_TYPE See also: u_forDigit See also: u_charDigitValue See also: u_isdigit
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
the numeric value represented by the character in the specified radix, or -1 if there is no value or if the value exceeds the radix.
|
u_enumCharNames
U_CAPI void U_EXPORT2 u_enumCharNames( UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode )
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.
For Unicode 1.0 names, only those are enumerated that differ from the modern names.
See also: UCharNameChoice See also: UEnumCharNamesFn See also: u_charName See also: u_charFromName
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
u_enumCharTypes
U_CAPI void U_EXPORT2 u_enumCharTypes( UCharEnumTypeRange *enumRange, const void *context )
Enumerate efficiently all code points with their Unicode general categories.
This is useful for building data structures (e.g., UnicodeSet's), for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
For each contiguous range of code points with a given general category ("character type"), the UCharEnumTypeRange function is called. Adjacent ranges have different types. The Unicode Standard guarantees that the numeric value of the type is 0..31.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
u_errorName
U_CAPI const char *U_EXPORT2 u_errorName( UErrorCode code )
Return a string for a UErrorCode value.
The string will be the same as the name of the error code constant in the UErrorCode enum above.
u_foldCase
U_CAPIUChar32 U_EXPORT2 u_foldCase( UChar32 c, uint32_t options )
The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see ustring.h and the UnicodeString class. See also the User Guide chapter on C/POSIX migration: https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
the Simple_Case_Folding of the code point, if any; otherwise the code point itself.
|
u_forDigit
U_CAPIUChar32 U_EXPORT2 u_forDigit( int32_t digit, int8_t radix )
Determines the character representation for a specific digit in the specified radix.
If the value of radix
is not a valid radix, or the value of digit
is not a valid digit in the specified radix, the null character (U+0000
) is returned.
The radix
argument is valid if it is greater than or equal to 2 and less than or equal to 36. The digit
argument is valid if 0 <= digit < radix
.
If the digit is less than 10, then '0' + digit
is returned. Otherwise, the value 'a' + digit - 10
is returned.
Same as java.lang.Character.forDigit().
See also: u_digit See also: u_charDigitValue See also: u_isdigit
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
the
char representation of the specified digit in the specified radix. |
u_getBidiPairedBracket
U_CAPIUChar32 U_EXPORT2 u_getBidiPairedBracket( UChar32 c )
Maps the specified character to its paired bracket character.
For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). Otherwise c itself is returned. See http://www.unicode.org/reports/tr9/
See also: UCHAR_BIDI_PAIRED_BRACKET See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE See also: u_charMirror
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the paired bracket code point, or c itself if there is no such mapping (Bidi_Paired_Bracket_Type=None)
|
u_getCombiningClass
U_CAPI uint8_t U_EXPORT2 u_getCombiningClass( UChar32 c )
Returns the combining class of the code point as specified in UnicodeData.txt.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the combining class of the character
|
u_getIntPropertyMaxValue
U_CAPI int32_t U_EXPORT2 u_getIntPropertyMaxValue( UProperty which )
Get the maximum value for an enumerated/integer/binary Unicode property.
Can be used together with u_getIntPropertyMinValue to allocate arrays of UnicodeSet or similar.
Examples for min/max values (for Unicode 3.2):
- UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
- UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
- UCHAR_IDEOGRAPHIC: 0/1 (false/true)
For undefined UProperty constant values, min/max values will be 0/-1.
See also: UProperty See also: u_hasBinaryProperty See also: u_getUnicodeVersion See also: u_getIntPropertyMaxValue See also: u_getIntPropertyValue
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
Maximum value returned by u_getIntPropertyValue for a Unicode property. <=0 if the property selector is out of range.
|
u_getIntPropertyMinValue
U_CAPI int32_t U_EXPORT2 u_getIntPropertyMinValue( UProperty which )
Get the minimum value for an enumerated/integer/binary Unicode property.
Can be used together with u_getIntPropertyMaxValue to allocate arrays of UnicodeSet or similar.
See also: UProperty See also: u_hasBinaryProperty See also: u_getUnicodeVersion See also: u_getIntPropertyMaxValue See also: u_getIntPropertyValue
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
Minimum value returned by u_getIntPropertyValue for a Unicode property. 0 if the property selector is out of range.
|
u_getIntPropertyValue
U_CAPI int32_t U_EXPORT2 u_getIntPropertyValue( UChar32 c, UProperty which )
Get the property value for an enumerated or integer Unicode property for a code point.
Also returns binary and mask property values.
Unicode, especially in version 3.2, defines many more properties than the original set in UnicodeData.txt.
The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see http://www.unicode.org/ . For names of Unicode properties see the UCD file PropertyAliases.txt.
Sample usage: UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
See also:UPropertySee also:u_hasBinaryPropertySee also:u_getIntPropertyMinValueSee also:u_getIntPropertyMaxValueSee also:u_getIntPropertyMap See also:u_getUnicodeVersion
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
Numeric value that is directly the property value or, for enumerated properties, corresponds to the numeric value of the enumerated constant of the respective property value enumeration type (cast to enum type if necessary). Returns 0 or 1 (for false/true) for binary Unicode properties. Returns a bit-mask for mask properties. Returns 0 if 'which' is out of bounds or if the Unicode version does not have data for the property at all, or not for this code point.
|
u_getNumericValue
U_CAPI double U_EXPORT2 u_getNumericValue( UChar32 c )
Get the numeric value for a Unicode code point as defined in the Unicode Character Database.
A "double" return type is necessary because some numeric values are fractions, negative, or too large for int32_t.
For characters without any numeric values in the Unicode Character Database, this function will return U_NO_NUMERIC_VALUE. Note: This is different from the Unicode Standard which specifies NaN as the default value. (NaN is not available on all platforms.)
Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() also supports negative values, large values, and fractions, while Java's getNumericValue() returns values 10..35 for ASCII letters.
See also: U_NO_NUMERIC_VALUE
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
|
u_getPropertyEnum
U_CAPIUProperty U_EXPORT2 u_getPropertyEnum( const char *alias )
Return the UProperty enum for a given property name, as specified in the Unicode database file PropertyAliases.txt.
Short, long, and any other variants are recognized.
In addition, this function maps the synthetic names "gcm" / "General_Category_Mask" to the property UCHAR_GENERAL_CATEGORY_MASK. These names are not in PropertyAliases.txt.
See also: UProperty
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
a UProperty enum, or UCHAR_INVALID_CODE if the given name does not match any property.
|
u_getPropertyName
U_CAPI const char *U_EXPORT2 u_getPropertyName( UProperty property, UPropertyNameChoice nameChoice )
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases.txt.
In addition, this function maps the property UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / "General_Category_Mask". These names are not in PropertyAliases.txt.
See also: UProperty See also: UPropertyNameChoice
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
a pointer to the name, or NULL if either the property or the nameChoice is out of range. If a given nameChoice returns NULL, then all larger values of nameChoice will return NULL, with one exception: if NULL is returned for U_SHORT_PROPERTY_NAME, then U_LONG_PROPERTY_NAME (and higher) may still return a non-NULL value. The returned pointer is valid until u_cleanup() is called.
|
u_getPropertyValueEnum
U_CAPI int32_t U_EXPORT2 u_getPropertyValueEnum( UProperty property, const char *alias )
Return the property value integer for a given value name, as specified in the Unicode database file PropertyValueAliases.txt.
Short, long, and any other variants are recognized.
Note: Some of the names in PropertyValueAliases.txt will only be recognized with UCHAR_GENERAL_CATEGORY_MASK, not UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
See also: UProperty
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
a value integer or UCHAR_INVALID_CODE if the given name does not match any value of the given property, or if the property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values are not values of UCharCategory, but rather mask values produced by U_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented.
|
u_getPropertyValueName
U_CAPI const char *U_EXPORT2 u_getPropertyValueName( UProperty property, int32_t value, UPropertyNameChoice nameChoice )
Return the Unicode name for a given property value, as given in the Unicode database file PropertyValueAliases.txt.
Note: Some of the names in PropertyValueAliases.txt can only be retrieved using UCHAR_GENERAL_CATEGORY_MASK, not UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
See also: UProperty See also: UPropertyNameChoice
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
a pointer to the name, or NULL if either the property or the nameChoice is out of range. If a given nameChoice returns NULL, then all larger values of nameChoice will return NULL, with one exception: if NULL is returned for U_SHORT_PROPERTY_NAME, then U_LONG_PROPERTY_NAME (and higher) may still return a non-NULL value. The returned pointer is valid until u_cleanup() is called.
|
u_getUnicodeVersion
U_CAPI void U_EXPORT2 u_getUnicodeVersion( UVersionInfo versionArray )
Gets the Unicode version information.
The version array is filled in with the version information for the Unicode standard that is currently used by ICU. For example, Unicode version 3.1.1 is represented as an array with the values { 3, 1, 1, 0 }.
Details | |||
---|---|---|---|
Parameters |
|
u_getVersion
U_CAPI void U_EXPORT2 u_getVersion( UVersionInfo versionArray )
Gets the ICU release version.
The version array stores the version information for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. Definition of this function lives in putil.c
Details | |||
---|---|---|---|
Parameters |
|
u_hasBinaryProperty
U_CAPIUBool U_EXPORT2 u_hasBinaryProperty( UChar32 c, UProperty which )
Check a binary Unicode property for a code point.
Unicode, especially in version 3.2, defines many more properties than the original set in UnicodeData.txt.
The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see http://www.unicode.org/ucd/ . For names of Unicode properties see the UCD file PropertyAliases.txt.
Important: If ICU is built with UCD files from Unicode versions below 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available.
See also:UPropertySee also:u_getBinaryPropertySet See also:u_getIntPropertyValueSee also:u_getUnicodeVersion
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
true or false according to the binary Unicode property value for c. Also false if 'which' is out of bounds or if the Unicode version does not have data for the property at all.
|
u_isIDIgnorable
U_CAPIUBool U_EXPORT2 u_isIDIgnorable( UChar32 c )
Determines if the specified character should be regarded as an ignorable character in an identifier, according to Java.
True for characters with general category "Cf" (format controls) as well as non-whitespace ISO controls (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).
Same as java.lang.Character.isIdentifierIgnorable().
Note that Unicode just recommends to ignore Cf (format controls).
See also: UCHAR_DEFAULT_IGNORABLE_CODE_POINT See also: u_isIDStart See also: u_isIDPart
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is ignorable in identifiers according to Java
|
u_isIDPart
U_CAPIUBool U_EXPORT2 u_isIDPart( UChar32 c )
Determines if the specified character is permissible as a non-initial character of an identifier according to UAX #31 Unicode Identifier and Pattern Syntax.
Same as Unicode ID_Continue (UCHAR_ID_CONTINUE).
See also: UCHAR_ID_CONTINUE See also: u_isIDStart See also: u_isIDIgnorable
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point may occur as a non-initial character of an identifier
|
u_isIDStart
U_CAPIUBool U_EXPORT2 u_isIDStart( UChar32 c )
Determines if the specified character is permissible as the first character in an identifier according to UAX #31 Unicode Identifier and Pattern Syntax.
Same as Unicode ID_Start (UCHAR_ID_START).
See also: UCHAR_ID_START See also: u_isalpha See also: u_isIDPart
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point may start an identifier
|
u_isISOControl
U_CAPIUBool U_EXPORT2 u_isISOControl( UChar32 c )
Determines whether the specified code point is an ISO control code.
True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
Same as java.lang.Character.isISOControl().
See also: u_iscntrl
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is an ISO control code
|
u_isJavaIDPart
U_CAPIUBool U_EXPORT2 u_isJavaIDPart( UChar32 c )
Determines if the specified character is permissible in a Java identifier.
In addition to u_isIDPart(c), true for characters with general category "Sc" (currency symbols).
Same as java.lang.Character.isJavaIdentifierPart().
See also: u_isIDIgnorable See also: u_isJavaIDStart See also: u_isalpha See also: u_isdigit See also: u_isIDPart
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point may occur in a Java identifier
|
u_isJavaIDStart
U_CAPIUBool U_EXPORT2 u_isJavaIDStart( UChar32 c )
Determines if the specified character is permissible as the first character in a Java identifier.
In addition to u_isIDStart(c), true for characters with general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
Same as java.lang.Character.isJavaIdentifierStart().
See also: u_isJavaIDPart See also: u_isalpha See also: u_isIDStart
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point may start a Java identifier
|
u_isJavaSpaceChar
U_CAPIUBool U_EXPORT2 u_isJavaSpaceChar( UChar32 c )
Determine if the specified code point is a space character according to Java.
True for characters with general categories "Z" (separators), which does not include control codes (e.g., TAB or Line Feed).
Same as java.lang.Character.isSpaceChar().
Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.
See also: u_isspace See also: u_isWhitespace See also: u_isUWhiteSpace
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a space character according to Character.isSpaceChar()
|
u_isMirrored
U_CAPIUBool U_EXPORT2 u_isMirrored( UChar32 c )
Determines whether the code point has the Bidi_Mirrored property.
This property is set for characters that are commonly used in Right-To-Left contexts and need to be displayed with a "mirrored" glyph.
Same as java.lang.Character.isMirrored(). Same as UCHAR_BIDI_MIRRORED
See also: UCHAR_BIDI_MIRRORED
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the character has the Bidi_Mirrored property
|
u_isUAlphabetic
U_CAPIUBool U_EXPORT2 u_isUAlphabetic( UChar32 c )
Check if a code point has the Alphabetic Unicode property.
Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). This is different from u_isalpha! See also:UCHAR_ALPHABETICSee also:u_isalphaSee also:u_hasBinaryProperty
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point has the Alphabetic Unicode property, false otherwise
|
u_isULowercase
U_CAPIUBool U_EXPORT2 u_isULowercase( UChar32 c )
Check if a code point has the Lowercase Unicode property.
Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). This is different from u_islower! See also:UCHAR_LOWERCASESee also:u_islowerSee also:u_hasBinaryProperty
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point has the Lowercase Unicode property, false otherwise
|
u_isUUppercase
U_CAPIUBool U_EXPORT2 u_isUUppercase( UChar32 c )
Check if a code point has the Uppercase Unicode property.
Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). This is different from u_isupper! See also:UCHAR_UPPERCASESee also:u_isupperSee also:u_hasBinaryProperty
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point has the Uppercase Unicode property, false otherwise
|
u_isUWhiteSpace
U_CAPIUBool U_EXPORT2 u_isUWhiteSpace( UChar32 c )
Check if a code point has the White_Space Unicode property.
Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). This is different from both u_isspace and u_isWhitespace!
Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.
See also: UCHAR_WHITE_SPACE See also: u_isWhitespace See also: u_isspace See also: u_isJavaSpaceChar See also: u_hasBinaryProperty
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point has the White_Space Unicode property, false otherwise.
|
u_isWhitespace
U_CAPIUBool U_EXPORT2 u_isWhitespace( UChar32 c )
Determines if the specified code point is a whitespace character according to Java/ICU.
A character is considered to be a Java whitespace character if and only if it satisfies one of the following criteria:
- It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
- It is U+0009 HORIZONTAL TABULATION.
- It is U+000A LINE FEED.
- It is U+000B VERTICAL TABULATION.
- It is U+000C FORM FEED.
- It is U+000D CARRIAGE RETURN.
- It is U+001C FILE SEPARATOR.
- It is U+001D GROUP SEPARATOR.
- It is U+001E RECORD SEPARATOR.
- It is U+001F UNIT SEPARATOR.
This API tries to sync with the semantics of Java's java.lang.Character.isWhitespace(), but it may not return the exact same results because of the Unicode version difference.
Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. See http://www.unicode.org/versions/Unicode4.0.1/
Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.
See also: u_isspace See also: u_isJavaSpaceChar See also: u_isUWhiteSpace
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a whitespace character according to Java/ICU
|
u_isalnum
U_CAPIUBool U_EXPORT2 u_isalnum( UChar32 c )
Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.
True for characters with general categories "L" (letters) and "Nd" (decimal digit numbers).
Same as java.lang.Character.isLetterOrDigit().
In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is an alphanumeric character according to Character.isLetterOrDigit()
|
u_isalpha
U_CAPIUBool U_EXPORT2 u_isalpha( UChar32 c )
Determines whether the specified code point is a letter character.
True for general categories "L" (letters).
Same as java.lang.Character.isLetter().
In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
See also: u_isdigit See also: u_isalnum
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a letter character
|
u_isbase
U_CAPIUBool U_EXPORT2 u_isbase( UChar32 c )
Non-standard: Determines whether the specified code point is a base character.
True for general categories "L" (letters), "N" (numbers), "Mc" (spacing combining marks), and "Me" (enclosing marks).
Note that this is different from the Unicode Standard definition in chapter 3.6, conformance clause D51 “Base character”, which defines base characters as the code points with general categories Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs).
See also: u_isalpha See also: u_isdigit
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a base character according to this function
|
u_isblank
U_CAPIUBool U_EXPORT2 u_isblank( UChar32 c )
Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.
The following are equivalent definitions:
true for Unicode White_Space characters except for "vertical space controls" where "vertical space controls" are the following characters: U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
same as
true for U+0009 (TAB) and characters with general category "Zs" (space separators).
Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.
This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a "blank"
|
u_iscntrl
U_CAPIUBool U_EXPORT2 u_iscntrl( UChar32 c )
Determines whether the specified code point is a control character (as defined by this function).
A control character is one of the following:
- ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
- U_CONTROL_CHAR (Cc)
- U_FORMAT_CHAR (Cf)
- U_LINE_SEPARATOR (Zl)
- U_PARAGRAPH_SEPARATOR (Zp)
This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
See also: UCHAR_DEFAULT_IGNORABLE_CODE_POINT See also: u_isprint
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a control character
|
u_isdefined
U_CAPIUBool U_EXPORT2 u_isdefined( UChar32 c )
Determines whether the specified code point is "defined", which usually means that it is assigned a character.
True for general categories other than "Cn" (other, not assigned), i.e., true for all code points mentioned in UnicodeData.txt.
Note that non-character code points (e.g., U+FDD0) are not "defined" (they are Cn), but surrogate code points are "defined" (Cs).
Same as java.lang.Character.isDefined().
See also: u_isdigit See also: u_isalpha See also: u_isalnum See also: u_isupper See also: u_islower See also: u_istitle
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is assigned a character
|
u_isdigit
U_CAPIUBool U_EXPORT2 u_isdigit( UChar32 c )
Determines whether the specified code point is a digit character according to Java.
True for characters with general category "Nd" (decimal digit numbers). Beginning with Unicode 4, this is the same as testing for the Numeric_Type of Decimal.
Same as java.lang.Character.isDigit().
In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a digit character according to Character.isDigit()
|
u_isgraph
U_CAPIUBool U_EXPORT2 u_isgraph( UChar32 c )
Determines whether the specified code point is a "graphic" character (printable, excluding spaces).
true for all characters except those with general categories "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), "Cn" (unassigned), and "Z" (separators).
This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a "graphic" character
|
u_islower
U_CAPIUBool U_EXPORT2 u_islower( UChar32 c )
Determines whether the specified code point has the general category "Ll" (lowercase letter).
Same as java.lang.Character.isLowerCase().
This misses some characters that are also lowercase but have a different general category value. In order to include those, use UCHAR_LOWERCASE.
In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
See also: UCHAR_LOWERCASE See also: u_isupper See also: u_istitle
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is an Ll lowercase letter
|
u_isprint
U_CAPIUBool U_EXPORT2 u_isprint( UChar32 c )
Determines whether the specified code point is a printable character.
True for general categories other than "C" (controls).
This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
See also: UCHAR_DEFAULT_IGNORABLE_CODE_POINT See also: u_iscntrl
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a printable character
|
u_ispunct
U_CAPIUBool U_EXPORT2 u_ispunct( UChar32 c )
Determines whether the specified code point is a punctuation character.
True for characters with general categories "P" (punctuation).
This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a punctuation character
|
u_isspace
U_CAPIUBool U_EXPORT2 u_isspace( UChar32 c )
Determines if the specified character is a space character or not.
Note: There are several ICU whitespace functions; please see the uchar.h file documentation for a detailed comparison.
This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
See also: u_isJavaSpaceChar See also: u_isWhitespace See also: u_isUWhiteSpace
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the character is a space character; false otherwise.
|
u_istitle
U_CAPIUBool U_EXPORT2 u_istitle( UChar32 c )
Determines whether the specified code point is a titlecase letter.
True for general category "Lt" (titlecase letter).
Same as java.lang.Character.isTitleCase().
See also: u_isupper See also: u_islower See also: u_totitle
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is an Lt titlecase letter
|
u_isupper
U_CAPIUBool U_EXPORT2 u_isupper( UChar32 c )
Determines whether the specified code point has the general category "Lu" (uppercase letter).
Same as java.lang.Character.isUpperCase().
This misses some characters that are also uppercase but have a different general category value. In order to include those, use UCHAR_UPPERCASE.
In addition to being equivalent to a Java function, this also serves as a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
See also: UCHAR_UPPERCASE See also: u_islower See also: u_istitle See also: u_tolower
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is an Lu uppercase letter
|
u_isxdigit
U_CAPIUBool U_EXPORT2 u_isxdigit( UChar32 c )
Determines whether the specified code point is a hexadecimal digit.
This is equivalent to u_digit(c, 16)>=0. True for characters with general category "Nd" (decimal digit numbers) as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. (That is, for letters with code points 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
In order to narrow the definition of hexadecimal digits to only ASCII characters, use (c<=0x7f && u_isxdigit(c)).
This is a C/POSIX migration function. See the comments about C/POSIX character classification functions in the documentation at the top of this header file.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the code point is a hexadecimal digit
|
u_memcasecmp
U_CAPI int32_t U_EXPORT2 u_memcasecmp( const UChar *s1, const UChar *s2, int32_t length, uint32_t options )
Compare two strings case-insensitively using full case folding.
This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options)).
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
A negative, zero, or positive integer indicating the comparison result.
|
u_memchr
U_CAPIUChar *U_EXPORT2 u_memchr( const UChar *s, UChar c, int32_t count )
Find the first occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to the first occurrence of
c in s or NULL if c is not in s . |
See also: u_strchr See also: u_memchr32 See also: u_strFindFirst
u_memchr32
U_CAPIUChar *U_EXPORT2 u_memchr32( const UChar *s, UChar32 c, int32_t count )
Find the first occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to the first occurrence of
c in s or NULL if c is not in s . |
See also: u_strchr32 See also: u_memchr See also: u_strFindFirst
u_memcmp
U_CAPI int32_t U_EXPORT2 u_memcmp( const UChar *buf1, const UChar *buf2, int32_t count )
Compare the first count
UChars of each buffer.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
When buf1 < buf2, a negative number is returned. When buf1 == buf2, 0 is returned. When buf1 > buf2, a positive number is returned.
|
u_memcmpCodePointOrder
U_CAPI int32_t U_EXPORT2 u_memcmpCodePointOrder( const UChar *s1, const UChar *s2, int32_t count )
Compare two Unicode strings in code point order.
This is different in UTF-16 from u_memcmp() if supplementary characters are present. For details, see u_strCompare().
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order
|
u_memcpy
U_CAPIUChar *U_EXPORT2 u_memcpy( UChar *dest, const UChar *src, int32_t count )
Synonym for memcpy(), but with UChars only.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to
dest |
u_memmove
U_CAPIUChar *U_EXPORT2 u_memmove( UChar *dest, const UChar *src, int32_t count )
Synonym for memmove(), but with UChars only.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to
dest |
u_memrchr
U_CAPIUChar *U_EXPORT2 u_memrchr( const UChar *s, UChar c, int32_t count )
Find the last occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to the last occurrence of
c in s or NULL if c is not in s . |
See also: u_strrchr See also: u_memrchr32 See also: u_strFindLast
u_memrchr32
U_CAPIUChar *U_EXPORT2 u_memrchr32( const UChar *s, UChar32 c, int32_t count )
Find the last occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to the last occurrence of
c in s or NULL if c is not in s . |
See also: u_strrchr32 See also: u_memrchr See also: u_strFindLast
u_memset
U_CAPIUChar *U_EXPORT2 u_memset( UChar *dest, UChar c, int32_t count )
Initialize count
characters of dest
to c
.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to
dest . |
u_strCaseCompare
U_CAPI int32_t U_EXPORT2 u_strCaseCompare( const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode )
Compare two strings case-insensitively using full case folding.
This is equivalent to u_strCompare(u_strFoldCase(s1, options), u_strFoldCase(s2, options), (options&U_COMPARE_CODE_POINT_ORDER)!=0).
The comparison can be done in UTF-16 code unit order or in code point order. They differ only when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.
This functions works with strings of different explicitly specified lengths unlike the ANSI C-like u_strcmp() and u_memcmp() etc. NUL-terminated strings are possible with length arguments of -1.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
<0 or 0 or >0 as usual for string comparisons
|
u_strCompare
U_CAPI int32_t U_EXPORT2 u_strCompare( const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool codePointOrder )
Compare two Unicode strings (binary order).
The comparison can be done in code unit order or in code point order. They differ only in UTF-16 when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.
This functions works with strings of different explicitly specified lengths unlike the ANSI C-like u_strcmp() and u_memcmp() etc. NUL-terminated strings are possible with length arguments of -1.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
<0 or 0 or >0 as usual for string comparisons
|
u_strFindFirst
U_CAPIUChar *U_EXPORT2 u_strFindFirst( const UChar *s, int32_t length, const UChar *substring, int32_t subLength )
Find the first occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
A pointer to the first occurrence of
substring in s , or s itself if the substring is empty, or NULL if substring is not in s . |
See also: u_strstr See also: u_strFindLast
u_strFindLast
U_CAPIUChar *U_EXPORT2 u_strFindLast( const UChar *s, int32_t length, const UChar *substring, int32_t subLength )
Find the last occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
A pointer to the last occurrence of
substring in s , or s itself if the substring is empty, or NULL if substring is not in s . |
See also: u_strstr See also: u_strFindLast
u_strFoldCase
U_CAPI int32_t U_EXPORT2 u_strFoldCase( UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode )
Case-folds the characters in a string.
Case-folding is locale-independent and not context-sensitive, but there is an option for whether to include or exclude mappings for dotted I and dotless i that are marked with 'T' in CaseFolding.txt.
The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.
|
u_strFromUTF32
U_CAPIUChar *U_EXPORT2 u_strFromUTF32( UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode )
Convert a UTF-32 string to UTF-16.
If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
See also: u_strFromUTF32WithSub See also: u_strToUTF32
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
The pointer to destination buffer.
|
u_strFromUTF32WithSub
U_CAPIUChar *U_EXPORT2 u_strFromUTF32WithSub( UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode )
Convert a UTF-32 string to UTF-16.
Same as u_strFromUTF32() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().
See also: u_strFromUTF32 See also: u_strToUTF32WithSub
Details | |||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||||||
Returns |
The pointer to destination buffer.
|
u_strFromUTF8
U_CAPIUChar *U_EXPORT2 u_strFromUTF8( UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode )
Convert a UTF-8 string to UTF-16.
If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
The pointer to destination buffer.
|
u_strFromUTF8Lenient
U_CAPIUChar *U_EXPORT2 u_strFromUTF8Lenient( UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode )
Convert a UTF-8 string to UTF-16.
Same as u_strFromUTF8() except that this function is designed to be very fast, which it achieves by being lenient about malformed UTF-8 sequences. This function is intended for use in environments where UTF-8 text is expected to be well-formed.
Its semantics are:
- Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
- The function will not read beyond the input string, nor write beyond the destCapacity.
- Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not be well-formed UTF-16. The function will resynchronize to valid code point boundaries within a small number of code points after an illegal sequence.
- Non-shortest forms are not detected and will result in "spoofing" output.
For further performance improvement, if srcLength is given (>=0), then it must be destCapacity>=srcLength.
There is no inverse u_strToUTF8Lenient() function because there is practically no performance gain from not checking that a UTF-16 string is well-formed.
See also: u_strFromUTF8 See also: u_strFromUTF8WithSub See also: u_strToUTF8WithSub
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
The pointer to destination buffer.
|
u_strFromUTF8WithSub
U_CAPIUChar *U_EXPORT2 u_strFromUTF8WithSub( UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode )
Convert a UTF-8 string to UTF-16.
Same as u_strFromUTF8() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
See also: u_strFromUTF8 See also: u_strFromUTF8Lenient See also: u_strToUTF8WithSub
Details | |||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||||||
Returns |
The pointer to destination buffer.
|
u_strHasMoreChar32Than
U_CAPIUBool U_EXPORT2 u_strHasMoreChar32Than( const UChar *s, int32_t length, int32_t number )
Check if the string contains more Unicode code points than a certain number.
This is more efficient than counting all code points in the entire string and comparing that number with a threshold. This function may not need to scan the string at all if the length is known (not -1 for NUL-termination) and falls within a certain range, and never needs to count more than 'number+1' code points. Logically equivalent to (u_countChar32(s, length)>number). A Unicode code point may occupy either one or two UChar code units.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
Boolean value for whether the string contains more Unicode code points than 'number'. Same as (u_countChar32(s, length)>number).
|
u_strToLower
U_CAPI int32_t U_EXPORT2 u_strToLower( UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode )
Lowercase the characters in a string.
Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.
|
u_strToTitle
U_CAPI int32_t U_EXPORT2 u_strToTitle( UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode )
Titlecase a string.
Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others.
The titlecase break iterator can be provided to customize for arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. The standard titlecase iterator for the root locale implements the algorithm of Unicode TR 21.
This function uses only the setText(), first() and next() methods of the provided break iterator.
The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
Details | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||||
Returns |
The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.
|
u_strToUTF32
U_CAPIUChar32 *U_EXPORT2 u_strToUTF32( UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode )
Convert a UTF-16 string to UTF-32.
If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
See also: u_strToUTF32WithSub See also: u_strFromUTF32
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
The pointer to destination buffer.
|
u_strToUTF32WithSub
U_CAPIUChar32 *U_EXPORT2 u_strToUTF32WithSub( UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode )
Convert a UTF-16 string to UTF-32.
Same as u_strToUTF32() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().
See also: u_strToUTF32 See also: u_strFromUTF32WithSub
Details | |||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||||||
Returns |
The pointer to destination buffer.
|
u_strToUTF8
U_CAPI char *U_EXPORT2 u_strToUTF8( char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode )
Convert a UTF-16 string to UTF-8.
If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
The pointer to destination buffer.
|
u_strToUTF8WithSub
U_CAPI char *U_EXPORT2 u_strToUTF8WithSub( char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode )
Convert a UTF-16 string to UTF-8.
Same as u_strToUTF8() except for the additional subchar which is output for illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
See also: u_strToUTF8 See also: u_strFromUTF8WithSub
Details | |||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||||||
Returns |
The pointer to destination buffer.
|
u_strToUpper
U_CDECL_ENDU_CAPI int32_t U_EXPORT2 u_strToUpper( UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode )
Uppercase the characters in a string.
Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.
|
u_strcasecmp
U_CAPI int32_t U_EXPORT2 u_strcasecmp( const UChar *s1, const UChar *s2, uint32_t options )
Compare two strings case-insensitively using full case folding.
This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A negative, zero, or positive integer indicating the comparison result.
|
u_strcat
U_CAPIUChar *U_EXPORT2 u_strcat( UChar *dst, const UChar *src )
Concatenate two ustrings.
Appends a copy of src
, including the null terminator, to dst
. The initial copied character from src
overwrites the null terminator in dst
.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to
dst . |
u_strchr
U_CAPIUChar *U_EXPORT2 u_strchr( const UChar *s, UChar c )
Find the first occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to the first occurrence of
c in s or NULL if c is not in s . |
See also: u_strchr32 See also: u_memchr See also: u_strstr See also: u_strFindFirst
u_strchr32
U_CAPIUChar *U_EXPORT2 u_strchr32( const UChar *s, UChar32 c )
Find the first occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to the first occurrence of
c in s or NULL if c is not in s . |
See also: u_strchr See also: u_memchr32 See also: u_strstr See also: u_strFindFirst
u_strcmp
U_CAPI int32_t U_EXPORT2 u_strcmp( const UChar *s1, const UChar *s2 )
Compare two Unicode strings for bitwise equality (code unit order).
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
0 if
s1 and s2 are bitwise equal; a negative value if s1 is bitwise less than s2, ; a positive value if s1 is bitwise greater than s2 . |
u_strcmpCodePointOrder
U_CAPI int32_t U_EXPORT2 u_strcmpCodePointOrder( const UChar *s1, const UChar *s2 )
Compare two Unicode strings in code point order.
See u_strCompare for details.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order
|
u_strcpy
U_CAPIUChar *U_EXPORT2 u_strcpy( UChar *dst, const UChar *src )
Copy a ustring.
Adds a null terminator.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to
dst . |
u_strcspn
U_CAPI int32_t U_EXPORT2 u_strcspn( const UChar *string, const UChar *matchSet )
Returns the number of consecutive characters in string
, beginning with the first, that do not occur somewhere in matchSet
.
Works just like C's strcspn but with Unicode.
See also: u_strspn
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
The number of initial characters in
string that do not occur in matchSet . |
u_strlen
U_CAPI int32_t U_EXPORT2 u_strlen( const UChar *s )
Determine the length of an array of UChar.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
The number of UChars in
chars , minus the terminator. |
u_strncasecmp
U_CAPI int32_t U_EXPORT2 u_strncasecmp( const UChar *s1, const UChar *s2, int32_t n, uint32_t options )
Compare two strings case-insensitively using full case folding.
This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), u_strFoldCase(s2, at most n, options)).
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
A negative, zero, or positive integer indicating the comparison result.
|
u_strncat
U_CAPIUChar *U_EXPORT2 u_strncat( UChar *dst, const UChar *src, int32_t n )
Concatenate two ustrings.
Appends at most n
characters from src
to dst
. Adds a terminating NUL. If src is too long, then only n-1
characters will be copied before the terminating NUL. If n<=0
then dst is not modified.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to
dst . |
u_strncmp
U_CAPI int32_t U_EXPORT2 u_strncmp( const UChar *ucs1, const UChar *ucs2, int32_t n )
Compare two ustrings for bitwise equality.
Compares at most n
characters.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
0 if
s1 and s2 are bitwise equal; a negative value if s1 is bitwise less than s2 ; a positive value if s1 is bitwise greater than s2 . |
u_strncmpCodePointOrder
U_CAPI int32_t U_EXPORT2 u_strncmpCodePointOrder( const UChar *s1, const UChar *s2, int32_t n )
Compare two Unicode strings in code point order.
This is different in UTF-16 from u_strncmp() if supplementary characters are present. For details, see u_strCompare().
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order
|
u_strncpy
U_CAPIUChar *U_EXPORT2 u_strncpy( UChar *dst, const UChar *src, int32_t n )
Copy a ustring.
Copies at most n
characters. The result will be null terminated if the length of src
is less than n
.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to
dst . |
u_strpbrk
U_CAPIUChar *U_EXPORT2 u_strpbrk( const UChar *string, const UChar *matchSet )
Locates the first occurrence in the string string
of any of the characters in the string matchSet
.
Works just like C's strpbrk but with Unicode.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to the character in
string that matches one of the characters in matchSet , or NULL if no such character is found. |
u_strrchr
U_CAPIUChar *U_EXPORT2 u_strrchr( const UChar *s, UChar c )
Find the last occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to the last occurrence of
c in s or NULL if c is not in s . |
See also: u_strrchr32 See also: u_memrchr See also: u_strrstr See also: u_strFindLast
u_strrchr32
U_CAPIUChar *U_EXPORT2 u_strrchr32( const UChar *s, UChar32 c )
Find the last occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NUL character is found at the string terminator.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to the last occurrence of
c in s or NULL if c is not in s . |
See also: u_strrchr See also: u_memchr32 See also: u_strrstr See also: u_strFindLast
u_strrstr
U_CAPIUChar *U_EXPORT2 u_strrstr( const UChar *s, const UChar *substring )
Find the last occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to the last occurrence of
substring in s , or s itself if the substring is empty, or NULL if substring is not in s . |
See also: u_strstr See also: u_strFindFirst See also: u_strFindLast
u_strspn
U_CAPI int32_t U_EXPORT2 u_strspn( const UChar *string, const UChar *matchSet )
Returns the number of consecutive characters in string
, beginning with the first, that occur somewhere in matchSet
.
Works just like C's strspn but with Unicode.
See also: u_strcspn
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
The number of initial characters in
string that do occur in matchSet . |
u_strstr
U_CAPIUChar *U_EXPORT2 u_strstr( const UChar *s, const UChar *substring )
Find the first occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to the first occurrence of
substring in s , or s itself if the substring is empty, or NULL if substring is not in s . |
See also: u_strrstr See also: u_strFindFirst See also: u_strFindLast
u_strtok_r
U_CAPIUChar *U_EXPORT2 u_strtok_r( UChar *src, const UChar *delim, UChar **saveState )
The string tokenizer API allows an application to break a string into tokens.
Unlike strtok(), the saveState (the current pointer within the original string) is maintained in saveState. In the first call, the argument src is a pointer to the string. In subsequent calls to return successive tokens of that string, src must be specified as NULL. The value saveState is set by this function to maintain the function's position within the string, and on each subsequent call you must give this argument the same variable. This function does handle surrogate pairs. This function is similar to the strtok_r() the POSIX Threads Extension (1003.1c-1995) version.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
A pointer to the next token found in src, or NULL when there are no more tokens.
|
u_tolower
U_CAPIUChar32 U_EXPORT2 u_tolower( UChar32 c )
The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
Same as java.lang.Character.toLowerCase().
This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see ustring.h and the UnicodeString class. See also the User Guide chapter on C/POSIX migration: https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the Simple_Lowercase_Mapping of the code point, if any; otherwise the code point itself.
|
u_totitle
U_CAPIUChar32 U_EXPORT2 u_totitle( UChar32 c )
The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
Same as java.lang.Character.toTitleCase().
This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see ustring.h and the UnicodeString class. See also the User Guide chapter on C/POSIX migration: https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the Simple_Titlecase_Mapping of the code point, if any; otherwise the code point itself.
|
u_toupper
U_CAPIUChar32 U_EXPORT2 u_toupper( UChar32 c )
The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
Same as java.lang.Character.toUpperCase().
This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see ustring.h and the UnicodeString class. See also the User Guide chapter on C/POSIX migration: https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the Simple_Uppercase_Mapping of the code point, if any; otherwise the code point itself.
|
u_versionToString
U_CAPI void U_EXPORT2 u_versionToString( const UVersionInfo versionArray, char *versionString )
Write a string with dotted-decimal version information according to the input UVersionInfo.
Definition of this function lives in putil.c
Details | |||||
---|---|---|---|---|---|
Parameters |
|
ubrk_clone
U_CAPIUBreakIterator *U_EXPORT2 ubrk_clone( const UBreakIterator *bi, UErrorCode *status )
Thread safe cloning operation.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
pointer to the new clone
|
ubrk_close
U_CAPI void U_EXPORT2 ubrk_close( UBreakIterator *bi )
Close a UBreakIterator.
Once closed, a UBreakIterator may no longer be used.
Details | |||
---|---|---|---|
Parameters |
|
ubrk_countAvailable
U_CAPI int32_t U_EXPORT2 ubrk_countAvailable( void )
Determine how many locales have text breaking information available.
This function is most useful as determining the loop ending condition for calls to ubrk_getAvailable. See also:ubrk_getAvailable
Details | |
---|---|
Returns |
The number of locales for which text breaking information is available.
|
ubrk_current
U_CAPI int32_t U_EXPORT2 ubrk_current( const UBreakIterator *bi )
Determine the most recently-returned text boundary.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
ubrk_first
U_CAPI int32_t U_EXPORT2 ubrk_first( UBreakIterator *bi )
Set the iterator position to zero, the start of the text being scanned.
See also: ubrk_last
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
The new iterator position (zero).
|
ubrk_following
U_CAPI int32_t U_EXPORT2 ubrk_following( UBreakIterator *bi, int32_t offset )
Advance the iterator to the first boundary following the specified offset.
The value returned is always greater than offset, or UBRK_DONE. See also:ubrk_preceding
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
The text boundary following offset, or UBRK_DONE.
|
ubrk_getAvailable
U_CAPI const char *U_EXPORT2 ubrk_getAvailable( int32_t index )
Get a locale for which text breaking information is available.
A UBreakIterator in a locale returned by this function will perform the correct text breaking for the locale. See also:ubrk_countAvailable
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
A locale for which number text breaking information is available, or 0 if none.
|
ubrk_getRuleStatus
U_CAPI int32_t U_EXPORT2 ubrk_getRuleStatus( UBreakIterator *bi )
Return the status from the break rule that determined the most recently returned break position.
The values appear in the rule source within brackets, {123}, for example. For rules that do not specify a status, a default value of 0 is returned.
For word break iterators, the possible values are defined in enum UWordBreak.
ubrk_getRuleStatusVec
U_CAPI int32_t U_EXPORT2 ubrk_getRuleStatusVec( UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status )
Get the statuses from the break rules that determined the most recently returned break position.
The values appear in the rule source within brackets, {123}, for example. The default status value for rules that do not explicitly provide one is zero.
For word break iterators, the possible values are defined in enum UWordBreak.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
The number of rule status values from rules that determined the most recent boundary returned by the break iterator.
|
ubrk_isBoundary
U_CAPIUBool U_EXPORT2 ubrk_isBoundary( UBreakIterator *bi, int32_t offset )
Returns true if the specified position is a boundary position.
As a side effect, leaves the iterator pointing to the first boundary position at or after "offset".
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
True if "offset" is a boundary position.
|
ubrk_last
U_CAPI int32_t U_EXPORT2 ubrk_last( UBreakIterator *bi )
Set the iterator position to the index immediately beyond the last character in the text being scanned.
This is not the same as the last character. See also:ubrk_first
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
The character offset immediately beyond the last character in the text being scanned.
|
ubrk_next
U_CAPI int32_t U_EXPORT2 ubrk_next( UBreakIterator *bi )
Advance the iterator to the boundary following the current boundary.
See also: ubrk_previous
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
The character index of the next text boundary, or UBRK_DONE if all text boundaries have been returned.
|
ubrk_open
U_CAPIUBreakIterator *U_EXPORT2 ubrk_open( UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status )
Open a new UBreakIterator for locating text boundaries for a specified locale.
A UBreakIterator may be used for detecting character, line, word, and sentence breaks in text. See also:ubrk_openRules
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
A UBreakIterator for the specified locale.
|
ubrk_preceding
U_CAPI int32_t U_EXPORT2 ubrk_preceding( UBreakIterator *bi, int32_t offset )
Set the iterator position to the first boundary preceding the specified offset.
The new position is always smaller than offset, or UBRK_DONE. See also:ubrk_following
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
The text boundary preceding offset, or UBRK_DONE.
|
ubrk_previous
U_CAPI int32_t U_EXPORT2 ubrk_previous( UBreakIterator *bi )
Set the iterator position to the boundary preceding the current boundary.
See also: ubrk_next
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
The character index of the preceding text boundary, or UBRK_DONE if all text boundaries have been returned.
|
ubrk_setText
U_CAPI void U_EXPORT2 ubrk_setText( UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status )
Sets an existing iterator to point to a new piece of text.
The break iterator retains a pointer to the supplied text. The caller must not modify or delete the text while the BreakIterator retains the reference.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
ubrk_setUText
U_CAPI void U_EXPORT2 ubrk_setUText( UBreakIterator *bi, UText *text, UErrorCode *status )
Sets an existing iterator to point to a new piece of text.
All index positions returned by break iterator functions are native indices from the UText. For example, when breaking UTF-8 encoded text, the break positions returned by ubrk_next, ubrk_previous, etc. will be UTF-8 string indices, not UTF-16 positions.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
ucol_clone
U_CAPIUCollator *U_EXPORT2 ucol_clone( const UCollator *coll, UErrorCode *status )
Thread safe cloning operation.
The result is a clone of a given collator. See also:ucol_openSee also:ucol_openRulesSee also:ucol_close
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
pointer to the new clone
|
ucol_close
U_CAPI void U_EXPORT2 ucol_close( UCollator *coll )
Close a UCollator.
Once closed, a UCollator should not be used. Every open collator should be closed. Otherwise, a memory leak will result. See also:ucol_openSee also:ucol_openRulesSee also:ucol_clone
Details | |||
---|---|---|---|
Parameters |
|
ucol_countAvailable
U_CAPI int32_t U_EXPORT2 ucol_countAvailable( void )
Determine how many locales have collation rules available.
This function is most useful as determining the loop ending condition for calls to ucol_getAvailable. See also:ucol_getAvailable
Details | |
---|---|
Returns |
The number of locales for which collation rules are available.
|
ucol_getAttribute
U_CAPIUColAttributeValue U_EXPORT2 ucol_getAttribute( const UCollator *coll, UColAttribute attr, UErrorCode *status )
Universal attribute getter.
See also: UColAttribute See also: UColAttributeValue See also: ucol_setAttribute
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Parameters |
|
||||
Returns |
attribute value
|
ucol_getAvailable
U_CAPI const char *U_EXPORT2 ucol_getAvailable( int32_t localeIndex )
Get a locale for which collation rules are available.
A UCollator in a locale returned by this function will perform the correct collation for the locale. See also:ucol_countAvailable
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
A locale for which collation rules are available, or 0 if none.
|
ucol_getDisplayName
U_CAPI int32_t U_EXPORT2 ucol_getDisplayName( const char *objLoc, const char *dispLoc, UChar *result, int32_t resultLength, UErrorCode *status )
Get the display name for a UCollator.
The display name is suitable for presentation to a user.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
The total buffer size needed; if greater than resultLength, the output was truncated.
|
ucol_getEquivalentReorderCodes
U_CAPI int32_t U_EXPORT2 ucol_getEquivalentReorderCodes( int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode )
Retrieves the reorder codes that are grouped with the given reorder code.
Some reorder codes will be grouped and must reorder together. Beginning with ICU 55, scripts only reorder together if they are primary-equal, for example Hiragana and Katakana.
See also: ucol_setReorderCodes See also: ucol_getReorderCodes See also: UScriptCode See also: UColReorderCode
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
The number of reordering codes written to the dest array.
|
ucol_getFunctionalEquivalent
U_CAPI int32_t U_EXPORT2 ucol_getFunctionalEquivalent( char *result, int32_t resultCapacity, const char *keyword, const char *locale, UBool *isAvailable, UErrorCode *status )
Return the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.
If two different input locale + keyword combinations produce the same result locale, then collators instantiated for these two different input locales will behave equivalently. The converse is not always true; two collators may in fact be equivalent, but return different results, due to internal details. The return result has no other meaning than that stated above, and implies nothing as to the relationship between the two locales. This is intended for use by applications who wish to cache collators, or otherwise reuse collators when possible. The functional equivalent may change over time. For more information, please see the Locales and Services section of the ICU User Guide.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
the actual buffer size needed for the locale. If greater than resultCapacity, the returned full name will be truncated and an error code will be returned.
|
ucol_getKeywordValues
U_CAPIUEnumeration *U_EXPORT2 ucol_getKeywordValues( const char *keyword, UErrorCode *status )
Given a keyword, create a string enumeration of all values for that keyword that are currently in use.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
a string enumeration over collation keyword values, or NULL upon error. The caller is responsible for closing the result.
|
ucol_getKeywordValuesForLocale
U_CAPIUEnumeration *U_EXPORT2 ucol_getKeywordValuesForLocale( const char *key, const char *locale, UBool commonlyUsed, UErrorCode *status )
Given a key and a locale, returns an array of string values in a preferred order that would make a difference.
These are all and only those values where the open (creation) of the service with the locale formed from the input locale plus input keyword and that value has different behavior than creation with the input locale alone.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
a string enumeration over keyword values for the given key and the locale.
|
ucol_getKeywords
U_CAPIUEnumeration *U_EXPORT2 ucol_getKeywords( UErrorCode *status )
Create a string enumerator of all possible keywords that are relevant to collation.
At this point, the only recognized keyword for this service is "collation".
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
a string enumeration over locale strings. The caller is responsible for closing the result.
|
ucol_getMaxVariable
U_CAPIUColReorderCode U_EXPORT2 ucol_getMaxVariable( const UCollator *coll )
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
See also: ucol_setMaxVariable
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the maximum variable reordering group.
|
ucol_getReorderCodes
U_CAPI int32_t U_EXPORT2 ucol_getReorderCodes( const UCollator *coll, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode )
Retrieves the reordering codes for this collator.
These reordering codes are a combination of UScript codes and UColReorderCode entries. See also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCodeSee also:UColReorderCode
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
The number of reordering codes written to the dest array.
|
ucol_getSortKey
U_CAPI int32_t U_EXPORT2 ucol_getSortKey( const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength )
Get a sort key for a string from a UCollator.
Sort keys may be compared using strcmp
.
Note that sort keys are often less efficient than simply doing comparison. For more details, see the ICU User Guide.
Like ICU functions that write to an output buffer, the buffer contents is undefined if the buffer capacity (resultLength parameter) is too small. Unlike ICU functions that write a string to an output buffer, the terminating zero byte is counted in the sort key length. See also:ucol_keyHashCode
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
The size needed to fully store the sort key. If there was an internal error generating the sort key, a zero value is returned.
|
ucol_getStrength
U_CAPIUCollationStrength U_EXPORT2 ucol_getStrength( const UCollator *coll )
Get the collation strength used in a UCollator.
The strength influences how strings are compared. See also:ucol_setStrength
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
|
ucol_mergeSortkeys
U_CAPI int32_t U_EXPORT2 ucol_mergeSortkeys( const uint8_t *src1, int32_t src1Length, const uint8_t *src2, int32_t src2Length, uint8_t *dest, int32_t destCapacity )
Merges two sort keys.
The levels are merged with their corresponding counterparts (primaries with primaries, secondaries with secondaries etc.). Between the values from the same level a separator is inserted.
This is useful, for example, for combining sort keys from first and last names to sort such pairs. See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
The recommended way to achieve "merged" sorting is by concatenating strings with U+FFFE between them. The concatenation has the same sort order as the merged sort keys, but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2). Using strings with U+FFFE may yield shorter sort keys.
For details about Sort Key Features see https://unicode-org.github.io/icu/userguide/collation/api#sort-key-features
It is possible to merge multiple sort keys by consecutively merging another one with the intermediate result.
The length of the merge result is the sum of the lengths of the input sort keys.
Example (uncompressed):
191B1D 01 050505 01 910505 00 1F2123 01 050505 01 910505 00
191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00
If the destination buffer is not big enough, then its contents are undefined. If any of source lengths are zero or any of the source pointers are NULL/undefined, the result is of size zero.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
the length of the merged sort key, src1Length+src2Length; can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), in which cases the contents of dest is undefined
|
ucol_open
U_CAPIUCollator *U_EXPORT2 ucol_open( const char *loc, UErrorCode *status )
Open a UCollator for comparing strings.
For some languages, multiple collation types are available; for example, "de@collation=phonebook". Starting with ICU 54, collation attributes can be specified via locale keywords as well, in the old locale extension syntax ("el@colCaseFirst=upper") or in language tag syntax ("el-u-kf-upper"). See User Guide: Collation API.
The UCollator pointer is used in all the calls to the Collation service. After finished, collator must be disposed of by calling ucol_close. See also:ucol_openRulesSee also:ucol_cloneSee also:ucol_close
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
A pointer to a UCollator, or 0 if an error occurred.
|
ucol_openAvailableLocales
U_CAPIUEnumeration *U_EXPORT2 ucol_openAvailableLocales( UErrorCode *status )
Create a string enumerator of all locales for which a valid collator may be opened.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
a string enumeration over locale strings. The caller is responsible for closing the result.
|
ucol_openRules
U_CAPIUCollator *U_EXPORT2 ucol_openRules( const UChar *rules, int32_t rulesLength, UColAttributeValue normalizationMode, UCollationStrength strength, UParseError *parseError, UErrorCode *status )
Produce a UCollator instance according to the rules supplied.
The rules are used to change the default ordering, defined in the UCA in a process called tailoring. The resulting UCollator pointer can be used in the same way as the one obtained by ucol_strcoll. See also:ucol_openSee also:ucol_cloneSee also:ucol_close
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
A pointer to a UCollator. It is not guaranteed that NULL be returned in case of error - please use status argument to check for errors.
|
ucol_setAttribute
U_CAPI void U_EXPORT2 ucol_setAttribute( UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status )
Universal attribute setter.
See also: UColAttribute See also: UColAttributeValue See also: ucol_getAttribute
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
ucol_setMaxVariable
U_CAPI void U_EXPORT2 ucol_setMaxVariable( UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode )
Sets the variable top to the top of the specified reordering group.
The variable top determines the highest-sorting character which is affected by UCOL_ALTERNATE_HANDLING. If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. See also:ucol_getMaxVariable
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
ucol_setReorderCodes
U_CAPI void U_EXPORT2 ucol_setReorderCodes( UCollator *coll, const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode *pErrorCode )
Sets the reordering codes for this collator.
Collation reordering allows scripts and some other groups of characters to be moved relative to each other. This reordering is done on top of the DUCET/CLDR standard collation order. Reordering can specify groups to be placed at the start and/or the end of the collation order. These groups are specified using UScript codes and UColReorderCode entries.
By default, reordering codes specified for the start of the order are placed in the order given after several special non-script blocks. These special groups of characters are space, punctuation, symbol, currency, and digit. These special groups are represented with UColReorderCode entries. Script groups can be intermingled with these special non-script groups if those special groups are explicitly specified in the reordering.
The special code OTHERS stands for any script that is not explicitly mentioned in the list of reordering codes given. Anything that is after OTHERS will go at the very end of the reordering in the order given.
The special reorder code DEFAULT will reset the reordering for this collator to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that was specified when this collator was created from resource data or from rules. The DEFAULT code must be the sole code supplied when it is used. If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.
The special reorder code NONE will remove any reordering for this collator. The result of setting no reordering will be to have the DUCET/CLDR ordering used. The NONE code must be the sole code supplied when it is used.
See also: ucol_getReorderCodes See also: ucol_getEquivalentReorderCodes See also: UScriptCode See also: UColReorderCode
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
ucol_setStrength
U_CAPI void U_EXPORT2 ucol_setStrength( UCollator *coll, UCollationStrength strength )
Set the collation strength used in a UCollator.
The strength influences how strings are compared. See also:ucol_getStrength
Details | |||||
---|---|---|---|---|---|
Parameters |
|
ucol_strcoll
U_CAPIUCollationResult U_EXPORT2 ucol_strcoll( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength )
Compare two strings.
The strings will be compared using the options already specified. See also:ucol_greater See also:ucol_greaterOrEqual See also:ucol_equal
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS
|
ucol_strcollUTF8
U_CAPIUCollationResult U_EXPORT2 ucol_strcollUTF8( const UCollator *coll, const char *source, int32_t sourceLength, const char *target, int32_t targetLength, UErrorCode *status )
Compare two strings in UTF-8.
The strings will be compared using the options already specified. Note: When input string contains malformed a UTF-8 byte sequence, this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). See also:ucol_greater See also:ucol_greaterOrEqual See also:ucol_equal
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS
|
uenum_close
U_CAPI void U_EXPORT2 uenum_close( UEnumeration *en )
Disposes of resources in use by the iterator.
If en is NULL, does nothing. After this call, any char* or UChar* pointer returned by uenum_unext() or uenum_next() is invalid.
Details | |||
---|---|---|---|
Parameters |
|
uenum_count
U_CAPI int32_t U_EXPORT2 uenum_count( UEnumeration *en, UErrorCode *status )
Returns the number of elements that the iterator traverses.
If the iterator is out-of-sync with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR. This is a convenience function. It can end up being very expensive as all the items might have to be pre-fetched (depending on the type of data being traversed). Use with caution and only when necessary.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
number of elements in the iterator
|
uenum_next
U_CAPI const char *U_EXPORT2 uenum_next( UEnumeration *en, int32_t *resultLength, UErrorCode *status )
Returns the next element in the iterator's list.
If there are no more elements, returns NULL. If the iterator is out-of-sync with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. If the native service string is a UChar* string, it is converted to char* with the invariant converter. The result is terminated by (char)0. If the conversion fails (because a character cannot be converted) then status is set to U_INVARIANT_CONVERSION_ERROR and the return value is undefined (but non-NULL).
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
a pointer to the string. The string will be zero-terminated. The return pointer is owned by this iterator and must not be deleted by the caller. The pointer is valid until the next call to any uenum_... method, including uenum_next() or uenum_unext(). When all strings have been traversed, returns NULL.
|
uenum_openCharStringsEnumeration
U_CAPIUEnumeration *U_EXPORT2 uenum_openCharStringsEnumeration( const char *const strings[], int32_t count, UErrorCode *ec )
Given an array of const char* strings (invariant chars only), return a UEnumeration.
String pointers from 0..count-1 must not be null. Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
See also:uenum_close
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
|
uenum_openUCharStringsEnumeration
U_CAPIUEnumeration *U_EXPORT2 uenum_openUCharStringsEnumeration( const UChar *const strings[], int32_t count, UErrorCode *ec )
Given an array of const UChar* strings, return a UEnumeration.
String pointers from 0..count-1 must not be null. Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
See also:uenum_close
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
|
uenum_reset
U_CAPI void U_EXPORT2 uenum_reset( UEnumeration *en, UErrorCode *status )
Resets the iterator to the current list of service IDs.
This re-establishes sync with the service and rewinds the iterator to start at the first element.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
uenum_unext
U_CAPI const UChar *U_EXPORT2 uenum_unext( UEnumeration *en, int32_t *resultLength, UErrorCode *status )
Returns the next element in the iterator's list.
If there are no more elements, returns NULL. If the iterator is out-of-sync with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. If the native service string is a char* string, it is converted to UChar* with the invariant converter. The result is terminated by (UChar)0.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
a pointer to the string. The string will be zero-terminated. The return pointer is owned by this iterator and must not be deleted by the caller. The pointer is valid until the next call to any uenum_... method, including uenum_next() or uenum_unext(). When all strings have been traversed, returns NULL.
|
uldn_close
U_CAPI void U_EXPORT2 uldn_close( ULocaleDisplayNames *ldn )
Closes a ULocaleDisplayNames instance obtained from uldn_open().
Details | |||
---|---|---|---|
Parameters |
|
uldn_getContext
U_CAPIUDisplayContext U_EXPORT2 uldn_getContext( const ULocaleDisplayNames *ldn, UDisplayContextType type, UErrorCode *pErrorCode )
Returns the UDisplayContext value for the specified UDisplayContextType.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
the UDisplayContextValue for the specified type.
|
uldn_getDialectHandling
U_CAPIUDialectHandling U_EXPORT2 uldn_getDialectHandling( const ULocaleDisplayNames *ldn )
Returns the dialect handling used in the display names.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the dialect handling enum
|
uldn_getLocale
U_CAPI const char *U_EXPORT2 uldn_getLocale( const ULocaleDisplayNames *ldn )
Returns the locale used to determine the display names.
This is not necessarily the same locale passed to uldn_open.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the display locale
|
uldn_keyDisplayName
U_CAPI int32_t U_EXPORT2 uldn_keyDisplayName( const ULocaleDisplayNames *ldn, const char *key, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode )
Returns the display name of the provided locale key.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.
|
uldn_keyValueDisplayName
U_CAPI int32_t U_EXPORT2 uldn_keyValueDisplayName( const ULocaleDisplayNames *ldn, const char *key, const char *value, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode )
Returns the display name of the provided value (used with the provided key).
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.
|
uldn_languageDisplayName
U_CAPI int32_t U_EXPORT2 uldn_languageDisplayName( const ULocaleDisplayNames *ldn, const char *lang, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode )
Returns the display name of the provided language code.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.
|
uldn_localeDisplayName
U_CAPI int32_t U_EXPORT2 uldn_localeDisplayName( const ULocaleDisplayNames *ldn, const char *locale, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode )
Returns the display name of the provided locale.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.
|
uldn_open
U_CAPIULocaleDisplayNames *U_EXPORT2 uldn_open( const char *locale, UDialectHandling dialectHandling, UErrorCode *pErrorCode )
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided dialectHandling.
The usual value for dialectHandling is ULOC_STANDARD_NAMES.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Parameters |
|
||||
Returns |
a ULocaleDisplayNames instance
|
uldn_openForContext
U_CAPIULocaleDisplayNames *U_EXPORT2 uldn_openForContext( const char *locale, UDisplayContext *contexts, int32_t length, UErrorCode *pErrorCode )
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided UDisplayContext settings.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
a ULocaleDisplayNames instance
|
uldn_regionDisplayName
U_CAPI int32_t U_EXPORT2 uldn_regionDisplayName( const ULocaleDisplayNames *ldn, const char *region, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode )
Returns the display name of the provided region code.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.
|
uldn_scriptCodeDisplayName
U_CAPI int32_t U_EXPORT2 uldn_scriptCodeDisplayName( const ULocaleDisplayNames *ldn, UScriptCode scriptCode, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode )
Returns the display name of the provided script code.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.
|
uldn_scriptDisplayName
U_CAPI int32_t U_EXPORT2 uldn_scriptDisplayName( const ULocaleDisplayNames *ldn, const char *script, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode )
Returns the display name of the provided script.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.
|
uldn_variantDisplayName
U_CAPI int32_t U_EXPORT2 uldn_variantDisplayName( const ULocaleDisplayNames *ldn, const char *variant, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode )
Returns the display name of the provided variant.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the display name. If it's greater than maxResultSize, the returned name will be truncated.
|
uloc_acceptLanguage
U_CAPI int32_t U_EXPORT2 uloc_acceptLanguage( char *result, int32_t resultAvailable, UAcceptResult *outResult, const char **acceptList, int32_t acceptListCount, UEnumeration *availableLocales, UErrorCode *status )
Based on a list of available locales, determine an acceptable locale for the user.
This is a thin wrapper over C++ class LocaleMatcher.
Details | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||||
Returns |
length needed for the locale.
|
uloc_addLikelySubtags
U_CAPI int32_t U_EXPORT2 uloc_addLikelySubtags( const char *localeID, char *maximizedLocaleID, int32_t maximizedLocaleIDCapacity, UErrorCode *err )
Add the likely subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
http://www.unicode.org/reports/tr35/#Likely_Subtags
If localeID is already in the maximal form, or there is no data available for maximization, it will be copied to the output buffer. For example, "sh" cannot be maximized, since there is no reasonable maximization.
Examples:
"und_Zzzz" maximizes to "en_Latn_US"
"en" maximizes to "en_Latn_US"
"de" maximizes to "de_Latn_DE"
"sr" maximizes to "sr_Cyrl_RS"
"zh_Hani" maximizes to "zh_Hani_CN"
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
The actual buffer size needed for the maximized locale. If it's greater than maximizedLocaleIDCapacity, the returned ID will be truncated. On error, the return value is -1.
|
uloc_canonicalize
U_CAPI int32_t U_EXPORT2 uloc_canonicalize( const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err )
Gets the full name for the specified locale.
Note: This has the effect of 'canonicalizing' the string to a certain extent. Upper and lower case are set as needed, and if the components were in 'POSIX' format they are changed to ICU format. It does NOT map aliased names in any way. See the top of this header file.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
the actual buffer size needed for the full name. If it's greater than nameCapacity, the returned full name will be truncated.
|
uloc_countAvailable
U_CAPI int32_t U_EXPORT2 uloc_countAvailable( void )
Gets the size of the all available locale list.
Details | |
---|---|
Returns |
the size of the locale list
|
uloc_forLanguageTag
U_CAPI int32_t U_EXPORT2 uloc_forLanguageTag( const char *langtag, char *localeID, int32_t localeIDCapacity, int32_t *parsedLength, UErrorCode *err )
Returns a locale ID for the specified BCP47 language tag string.
If the specified language tag contains any ill-formed subtags, the first such subtag and all following subtags are ignored.
This implements the 'Language-Tag' production of BCP 47, and so supports legacy language tags (marked as “Type: grandfathered” in BCP 47) (regular and irregular) as well as private use language tags.
Private use tags are represented as 'x-whatever', and legacy tags are converted to their canonical replacements where they exist.
Note that a few legacy tags have no modern replacement; these will be converted using the fallback described in the first paragraph, so some information might be lost.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the length of the locale ID.
|
uloc_getAvailable
U_CAPI const char *U_EXPORT2 uloc_getAvailable( int32_t n )
Gets the specified locale from a list of available locales.
This method corresponds to uloc_openAvailableByType called with the ULOC_AVAILABLE_DEFAULT type argument.
The return value is a pointer to an item of a locale name array. Both this array and the pointers it contains are owned by ICU and should not be deleted or written through by the caller. The locale name is terminated by a null pointer.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
a specified locale name of all available locales
|
uloc_getBaseName
U_CAPI int32_t U_EXPORT2 uloc_getBaseName( const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err )
Gets the full name for the specified locale, like uloc_getName(), but without keywords.
Note: This has the effect of 'canonicalizing' the string to a certain extent. Upper and lower case are set as needed, and if the components were in 'POSIX' format they are changed to ICU format. It does NOT map aliased names in any way. See the top of this header file.
This API strips off the keyword part, so "de_DE\@collation=phonebook" will become "de_DE". This API supports preflighting.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
the actual buffer size needed for the full name. If it's greater than nameCapacity, the returned full name will be truncated.
|
uloc_getCharacterOrientation
U_CAPIULayoutType U_EXPORT2 uloc_getCharacterOrientation( const char *localeId, UErrorCode *status )
Get the layout character orientation for the specified locale.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
an enum indicating the layout orientation for characters.
|
uloc_getCountry
U_CAPI int32_t U_EXPORT2 uloc_getCountry( const char *localeID, char *country, int32_t countryCapacity, UErrorCode *err )
Gets the country code for the specified locale.
This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
the actual buffer size needed for the country code. If it's greater than countryCapacity, the returned country code will be truncated.
|
uloc_getDefault
U_CAPI const char *U_EXPORT2 uloc_getDefault( void )
Gets ICU's default locale.
The returned string is a snapshot in time, and will remain valid and unchanged even when uloc_setDefault() is called. The returned storage is owned by ICU, and must not be altered or deleted by the caller. On Android, uloc_setDefault() is not visible because the default Locale in ICU4C, ICU4J and java.util.Locale are synchronized. To set a default locale, call java.util.Locale::setDefault in java or by reverse JNI.
Details | |
---|---|
Returns |
the ICU default locale
|
uloc_getDisplayCountry
U_CAPI int32_t U_EXPORT2 uloc_getDisplayCountry( const char *locale, const char *displayLocale, UChar *country, int32_t countryCapacity, UErrorCode *status )
Gets the country name suitable for display for the specified locale.
Warning: this is for the region part of a valid locale ID; it cannot just be the region code (like "FR"). To get the display name for a region alone, or for other options, use ULocaleDisplayNames instead.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the displayable country code. If it's greater than countryCapacity, the returned displayable country code will be truncated.
|
uloc_getDisplayKeyword
U_CAPI int32_t U_EXPORT2 uloc_getDisplayKeyword( const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status )
Gets the keyword name suitable for display for the specified locale.
E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display string for the keyword collation. Usage: UErrorCode status = U_ZERO_ERROR; const char* keyword =NULL; int32_t keywordLen = 0; int32_t keywordCount = 0; UChar displayKeyword[256]; int32_t displayKeywordLen = 0; UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status); for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount){ if(U_FAILURE(status)){ ...something went wrong so handle the error... break; } // the uenum_next returns NUL terminated string keyword = uenum_next(keywordEnum, &keywordLen, &status); displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256); ... do something interesting ..... } uenum_close(keywordEnum);
See also:uloc_openKeywords
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the displayable variant code.
|
uloc_getDisplayKeywordValue
U_CAPI int32_t U_EXPORT2 uloc_getDisplayKeywordValue( const char *locale, const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status )
Gets the value of the keyword suitable for display for the specified locale.
E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
the actual buffer size needed for the displayable variant code.
|
uloc_getDisplayLanguage
U_CAPI int32_t U_EXPORT2 uloc_getDisplayLanguage( const char *locale, const char *displayLocale, UChar *language, int32_t languageCapacity, UErrorCode *status )
Gets the language name suitable for display for the specified locale.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the displayable language code. If it's greater than languageCapacity, the returned language code will be truncated.
|
uloc_getDisplayName
U_CAPI int32_t U_EXPORT2 uloc_getDisplayName( const char *localeID, const char *inLocaleID, UChar *result, int32_t maxResultSize, UErrorCode *err )
Gets the full name suitable for display for the specified locale.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the displayable name. If it's greater than maxResultSize, the returned displayable name will be truncated.
|
uloc_getDisplayScript
U_CAPI int32_t U_EXPORT2 uloc_getDisplayScript( const char *locale, const char *displayLocale, UChar *script, int32_t scriptCapacity, UErrorCode *status )
Gets the script name suitable for display for the specified locale.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the displayable script code. If it's greater than scriptCapacity, the returned displayable script code will be truncated.
|
uloc_getDisplayVariant
U_CAPI int32_t U_EXPORT2 uloc_getDisplayVariant( const char *locale, const char *displayLocale, UChar *variant, int32_t variantCapacity, UErrorCode *status )
Gets the variant name suitable for display for the specified locale.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the actual buffer size needed for the displayable variant code. If it's greater than variantCapacity, the returned displayable variant code will be truncated.
|
uloc_getISO3Country
U_CAPI const char *U_EXPORT2 uloc_getISO3Country( const char *localeID )
Gets the ISO country code for the specified locale.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
country the ISO country code for localeID
|
uloc_getISO3Language
U_CAPI const char *U_EXPORT2 uloc_getISO3Language( const char *localeID )
Gets the ISO language code for the specified locale.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
language the ISO language code for localeID
|
uloc_getISOCountries
U_CAPI const char *const *U_EXPORT2 uloc_getISOCountries( void )
Gets a list of all available 2-letter country codes defined in ISO 639.
This is a pointer to an array of pointers to arrays of char. All of these pointers are owned by ICU do not delete them, and do not write through them. The array is terminated with a null pointer.
Details | |
---|---|
Returns |
a list of all available country codes
|
uloc_getISOLanguages
U_CAPI const char *const *U_EXPORT2 uloc_getISOLanguages( void )
Gets a list of all available 2-letter language codes defined in ISO 639, plus additional 3-letter codes determined to be useful for locale generation as defined by Unicode CLDR.
This is a pointer to an array of pointers to arrays of char. All of these pointers are owned by ICU do not delete them, and do not write through them. The array is terminated with a null pointer.
Details | |
---|---|
Returns |
a list of all available language codes
|
uloc_getKeywordValue
U_CAPI int32_t U_EXPORT2 uloc_getKeywordValue( const char *localeID, const char *keywordName, char *buffer, int32_t bufferCapacity, UErrorCode *status )
Get the value for a keyword.
Locale name does not need to be normalized.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the length of keyword value
|
uloc_getLanguage
U_CAPI int32_t U_EXPORT2 uloc_getLanguage( const char *localeID, char *language, int32_t languageCapacity, UErrorCode *err )
Gets the language code for the specified locale.
This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
the actual buffer size needed for the language code. If it's greater than languageCapacity, the returned language code will be truncated.
|
uloc_getLineOrientation
U_CAPIULayoutType U_EXPORT2 uloc_getLineOrientation( const char *localeId, UErrorCode *status )
Get the layout line orientation for the specified locale.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
an enum indicating the layout orientation for lines.
|
uloc_getName
U_CAPI int32_t U_EXPORT2 uloc_getName( const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err )
Gets the full name for the specified locale.
This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.
Note: This has the effect of 'canonicalizing' the ICU locale ID to a certain extent. Upper and lower case are set as needed. It does NOT map aliased names in any way. See the top of this header file. This API supports preflighting.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
the actual buffer size needed for the full name. If it's greater than nameCapacity, the returned full name will be truncated.
|
uloc_getScript
U_CAPI int32_t U_EXPORT2 uloc_getScript( const char *localeID, char *script, int32_t scriptCapacity, UErrorCode *err )
Gets the script code for the specified locale.
This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
the actual buffer size needed for the language code. If it's greater than scriptCapacity, the returned language code will be truncated.
|
uloc_getVariant
U_CAPI int32_t U_EXPORT2 uloc_getVariant( const char *localeID, char *variant, int32_t variantCapacity, UErrorCode *err )
Gets the variant code for the specified locale.
This function may return with a failure error code for certain kinds of inputs but does not fully check for well-formed locale IDs / language tags.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
the actual buffer size needed for the variant code. If it's greater than variantCapacity, the returned variant code will be truncated.
|
uloc_isRightToLeft
U_CAPIUBool U_EXPORT2 uloc_isRightToLeft( const char *locale )
Returns whether the locale's script is written right-to-left.
If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags(). If no likely script is known, then false is returned.
A script is right-to-left according to the CLDR script metadata which corresponds to whether the script's letters have Bidi_Class=R or AL.
Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl".
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the locale's script is written right-to-left
|
uloc_minimizeSubtags
U_CAPI int32_t U_EXPORT2 uloc_minimizeSubtags( const char *localeID, char *minimizedLocaleID, int32_t minimizedLocaleIDCapacity, UErrorCode *err )
Minimize the subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
http://www.unicode.org/reports/tr35/#Likely_Subtags
If localeID is already in the minimal form, or there is no data available for minimization, it will be copied to the output buffer. Since the minimization algorithm relies on proper maximization, see the comments for uloc_addLikelySubtags for reasons why there might not be any data.
Examples:
"en_Latn_US" minimizes to "en"
"de_Latn_US" minimizes to "de"
"sr_Cyrl_RS" minimizes to "sr"
"zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the script, and minimizing to "zh" would imply "zh_Hans_CN".)
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
The actual buffer size needed for the minimized locale. If it's greater than minimizedLocaleIDCapacity, the returned ID will be truncated. On error, the return value is -1.
|
uloc_openKeywords
U_CAPIUEnumeration *U_EXPORT2 uloc_openKeywords( const char *localeID, UErrorCode *status )
Gets an enumeration of keywords for the specified locale.
Enumeration must get disposed of by the client using uenum_close function.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
enumeration of keywords or NULL if there are no keywords.
|
uloc_setKeywordValue
U_CAPI int32_t U_EXPORT2 uloc_setKeywordValue( const char *keywordName, const char *keywordValue, char *buffer, int32_t bufferCapacity, UErrorCode *status )
Sets or removes the value of the specified keyword.
For removing all keywords, use uloc_getBaseName().
NOTE: Unlike almost every other ICU function which takes a buffer, this function will NOT truncate the output text, and will not update the buffer with unterminated text setting a status of U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received, it means a terminated version of the updated locale ID would not fit in the buffer, and the original buffer is untouched. This is done to prevent incorrect or possibly even malformed locales from being generated and used.
See also: uloc_getKeywordValue
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the length needed for the buffer
|
uloc_toLanguageTag
U_CAPI int32_t U_EXPORT2 uloc_toLanguageTag( const char *localeID, char *langtag, int32_t langtagCapacity, UBool strict, UErrorCode *err )
Returns a well-formed language tag for this locale ID.
Note: When strict
is false, any locale fields which do not satisfy the BCP47 syntax requirement will be omitted from the result. When strict
is true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the err
if any locale fields do not satisfy the BCP47 syntax requirement.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
The length of the BCP47 language tag.
|
uloc_toLegacyKey
U_CAPI const char *U_EXPORT2 uloc_toLegacyKey( const char *keyword )
Converts the specified keyword (BCP 47 Unicode locale extension key, or legacy key) to the legacy key.
For example, legacy key "collation" is returned for the input BCP 47 Unicode locale extension key "co".
See also:toUnicodeLocaleKey
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the well-formed legacy key, or NULL if the specified keyword cannot be mapped to a well-formed legacy key.
|
uloc_toLegacyType
U_CAPI const char *U_EXPORT2 uloc_toLegacyType( const char *keyword, const char *value )
Converts the specified keyword value (BCP 47 Unicode locale extension type, or legacy type or type alias) to the canonical legacy type.
For example, the legacy type "phonebook" is returned for the input BCP 47 Unicode locale extension type "phonebk" with the keyword "collation" (or "co").
When the specified keyword is not recognized, but the specified value satisfies the syntax of legacy key, or when the specified keyword allows 'variable' type and the specified value satisfies the syntax, then the pointer to the input type value itself will be returned. For example, uloc_toLegacyType("Foo", "Bar")
returns "Bar", uloc_toLegacyType("vt", "00A4")
returns "00A4".
See also:toUnicodeLocaleType
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
the well-formed legacy type, or NULL if the specified keyword value cannot be mapped to a well-formed legacy type.
|
uloc_toUnicodeLocaleKey
U_CAPI const char *U_EXPORT2 uloc_toUnicodeLocaleKey( const char *keyword )
Converts the specified keyword (legacy key, or BCP 47 Unicode locale extension key) to the equivalent BCP 47 Unicode locale extension key.
For example, BCP 47 Unicode locale extension key "co" is returned for the input keyword "collation".
When the specified keyword is unknown, but satisfies the BCP syntax, then the pointer to the input keyword itself will be returned. For example, uloc_toUnicodeLocaleKey("ZZ")
returns "ZZ".
See also: uloc_toLegacyKey
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the well-formed BCP 47 Unicode locale extension key, or NULL if the specified locale keyword cannot be mapped to a well-formed BCP 47 Unicode locale extension key.
|
uloc_toUnicodeLocaleType
U_CAPI const char *U_EXPORT2 uloc_toUnicodeLocaleType( const char *keyword, const char *value )
Converts the specified keyword value (legacy type, or BCP 47 Unicode locale extension type) to the well-formed BCP 47 Unicode locale extension type for the specified keyword (category).
For example, BCP 47 Unicode locale extension type "phonebk" is returned for the input keyword value "phonebook", with the keyword "collation" (or "co").
When the specified keyword is not recognized, but the specified value satisfies the syntax of the BCP 47 Unicode locale extension type, or when the specified keyword allows 'variable' type and the specified value satisfies the syntax, then the pointer to the input type value itself will be returned. For example, uloc_toUnicodeLocaleType("Foo", "Bar")
returns "Bar", uloc_toUnicodeLocaleType("variableTop", "00A4")
returns "00A4".
See also: uloc_toLegacyType
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
the well-formed BCP47 Unicode locale extension type, or NULL if the locale keyword value cannot be mapped to a well-formed BCP 47 Unicode locale extension type.
|
ulocdata_getCLDRVersion
U_CAPI void U_EXPORT2 ulocdata_getCLDRVersion( UVersionInfo versionArray, UErrorCode *status )
Return the current CLDR version used by the library.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
unorm2_append
U_CAPI int32_t U_EXPORT2 unorm2_append( const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode )
Appends the second string to the first string (merging them at the boundary) and returns the length of the first string.
The result is normalized if both the strings were normalized. The first and second strings must be different buffers.
Details | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||||
Returns |
first
|
unorm2_close
U_CAPI void U_EXPORT2 unorm2_close( UNormalizer2 *norm2 )
Closes a UNormalizer2 instance from unorm2_openFiltered().
Do not close instances from unorm2_getInstance()!
Details | |||
---|---|---|---|
Parameters |
|
unorm2_composePair
U_CAPIUChar32 U_EXPORT2 unorm2_composePair( const UNormalizer2 *norm2, UChar32 a, UChar32 b )
Performs pairwise composition of a & b and returns the composite if there is one.
Returns a composite code point c only if c has a two-way mapping to a+b. In standard Unicode normalization, this means that c has a canonical decomposition to a+b and c does not have the Full_Composition_Exclusion property.
This function is independent of the mode of the UNormalizer2.
Details | |||||||
---|---|---|---|---|---|---|---|
Parameters |
|
||||||
Returns |
The non-negative composite code point if there is one; otherwise a negative value.
|
unorm2_getCombiningClass
U_CAPI uint8_t U_EXPORT2 unorm2_getCombiningClass( const UNormalizer2 *norm2, UChar32 c )
Gets the combining class of c.
The default implementation returns 0 but all standard implementations return the Unicode Canonical_Combining_Class value.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
c's combining class
|
unorm2_getDecomposition
U_CAPI int32_t U_EXPORT2 unorm2_getDecomposition( const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode )
Gets the decomposition mapping of c.
Roughly equivalent to normalizing the String form of c on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function returns a negative value and does not write a string if c does not have a decomposition mapping in this instance's data. This function is independent of the mode of the UNormalizer2.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the non-negative length of c's decomposition, if there is one; otherwise a negative value
|
unorm2_getNFCInstance
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFCInstance( UErrorCode *pErrorCode )
Returns a UNormalizer2 instance for Unicode NFC normalization.
Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the requested Normalizer2, if successful
|
unorm2_getNFDInstance
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFDInstance( UErrorCode *pErrorCode )
Returns a UNormalizer2 instance for Unicode NFD normalization.
Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the requested Normalizer2, if successful
|
unorm2_getNFKCCasefoldInstance
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFKCCasefoldInstance( UErrorCode *pErrorCode )
Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization which is equivalent to applying the NFKC_Casefold mappings and then NFC.
See https://www.unicode.org/reports/tr44/#NFKC_Casefold
Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the requested Normalizer2, if successful
|
unorm2_getNFKCInstance
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFKCInstance( UErrorCode *pErrorCode )
Returns a UNormalizer2 instance for Unicode NFKC normalization.
Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the requested Normalizer2, if successful
|
unorm2_getNFKDInstance
U_CAPI const UNormalizer2 *U_EXPORT2 unorm2_getNFKDInstance( UErrorCode *pErrorCode )
Returns a UNormalizer2 instance for Unicode NFKD normalization.
Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). Returns an unmodifiable singleton instance. Do not delete it.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the requested Normalizer2, if successful
|
unorm2_getRawDecomposition
U_CAPI int32_t U_EXPORT2 unorm2_getRawDecomposition( const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode )
Gets the raw decomposition mapping of c.
This is similar to the unorm2_getDecomposition() function but returns the raw decomposition mapping as specified in UnicodeData.txt or (for custom data) in the mapping files processed by the gennorm2 tool. By contrast, unorm2_getDecomposition() returns the processed, recursively-decomposed version of this mapping.
When used on a standard NFKC Normalizer2 instance, unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
When used on a standard NFC Normalizer2 instance, it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); in this case, the result contains either one or two code points (=1..4 UChars).
This function is independent of the mode of the UNormalizer2.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
|
unorm2_hasBoundaryAfter
U_CAPIUBool U_EXPORT2 unorm2_hasBoundaryAfter( const UNormalizer2 *norm2, UChar32 c )
Tests if the character always has a normalization boundary after it, regardless of context.
For details see the Normalizer2 base class documentation.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
true if c has a normalization boundary after it
|
unorm2_hasBoundaryBefore
U_CAPIUBool U_EXPORT2 unorm2_hasBoundaryBefore( const UNormalizer2 *norm2, UChar32 c )
Tests if the character always has a normalization boundary before it, regardless of context.
For details see the Normalizer2 base class documentation.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
true if c has a normalization boundary before it
|
unorm2_isInert
U_CAPIUBool U_EXPORT2 unorm2_isInert( const UNormalizer2 *norm2, UChar32 c )
Tests if the character is normalization-inert.
For details see the Normalizer2 base class documentation.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
true if c is normalization-inert
|
unorm2_isNormalized
U_CAPIUBool U_EXPORT2 unorm2_isNormalized( const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode )
Tests if the string is normalized.
Internally, in cases where the quickCheck() method would return "maybe" (which is only possible for the two COMPOSE modes) this method resolves to "yes" or "no" to provide a definitive result, at the cost of doing more work in those cases.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
true if s is normalized
|
unorm2_normalize
U_CAPI int32_t U_EXPORT2 unorm2_normalize( const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode )
Writes the normalized form of the source string to the destination string (replacing its contents) and returns the length of the destination string.
The source and destination strings must be different buffers.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
dest
|
unorm2_normalizeSecondAndAppend
U_CAPI int32_t U_EXPORT2 unorm2_normalizeSecondAndAppend( const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode )
Appends the normalized form of the second string to the first string (merging them at the boundary) and returns the length of the first string.
The result is normalized if the first string was normalized. The first and second strings must be different buffers.
Details | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||||
Returns |
first
|
unorm2_quickCheck
U_CAPIUNormalizationCheckResult U_EXPORT2 unorm2_quickCheck( const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode )
Tests if the string is normalized.
For the two COMPOSE modes, the result could be "maybe" in cases that would take a little more work to resolve definitively. Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster combination of quick check + normalization, to avoid re-checking the "yes" prefix.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
UNormalizationCheckResult
|
unorm2_spanQuickCheckYes
U_CAPI int32_t U_EXPORT2 unorm2_spanQuickCheckYes( const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode )
Returns the end of the normalized substring of the input string.
In other words, with end=spanQuickCheckYes(s, ec);
the substring UnicodeString(s, 0, end)
will pass the quick check with a "yes" result.
The returned end index is usually one or more characters before the "no" or "maybe" character: The end index is at a normalization boundary. (See the class documentation for more about normalization boundaries.)
When the goal is a normalized string and most input strings are expected to be normalized already, then call this method, and if it returns a prefix shorter than the input string, copy that prefix and use normalizeSecondAndAppend() for the remainder.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
"yes" span end index
|
uscript_breaksBetweenLetters
U_CAPIUBool U_EXPORT2 uscript_breaksBetweenLetters( UScriptCode script )
Returns true if the script allows line breaks between letters (excluding hyphenation).
Such a script typically requires dictionary-based line breaking. For example, Hani and Thai.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the script allows line breaks between letters
|
uscript_getCode
U_CAPI int32_t U_EXPORT2 uscript_getCode( const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err )
Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". Fills in USCRIPT_LATIN given "en" OR "en_US" If the required capacity is greater than the capacity of the destination buffer, then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
Note: To search by short or long script alias only, use u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does a fast lookup with no access of the locale data.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
The number of script codes filled in the buffer passed in
|
uscript_getName
U_CAPI const char *U_EXPORT2 uscript_getName( UScriptCode scriptCode )
Returns the long Unicode script name, if there is one.
Otherwise returns the 4-letter ISO 15924 script code. Returns "Malayam" given USCRIPT_MALAYALAM.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
long script name as given in PropertyValueAliases.txt, or the 4-letter code, or NULL if scriptCode is invalid
|
uscript_getSampleString
U_CAPI int32_t U_EXPORT2 uscript_getSampleString( UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode )
Writes the script sample character string.
This string normally consists of one code point but might be longer. The string is empty if the script is not encoded.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
the string length, even if U_BUFFER_OVERFLOW_ERROR
|
uscript_getScript
U_CAPIUScriptCode U_EXPORT2 uscript_getScript( UChar32 codepoint, UErrorCode *err )
Gets the script code associated with the given codepoint.
Returns USCRIPT_MALAYALAM given 0x0D02
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
The UScriptCode, or 0 if codepoint is invalid
|
uscript_getScriptExtensions
U_CAPI int32_t U_EXPORT2 uscript_getScriptExtensions( UChar32 c, UScriptCode *scripts, int32_t capacity, UErrorCode *errorCode )
Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.
- If c does have Script_Extensions, then the Script property value (normally Common or Inherited) is not included.
- If c does not have Script_Extensions, then the one Script code is written to the output array.
- If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. In other words, if the return value is 1, then the output array contains exactly c's single Script code. If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
If there are more than capacity script codes to be written, then U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. (Usual ICU buffer handling behavior.)
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
number of script codes in c's Script_Extensions, or 1 for the single Script value, written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
|
uscript_getShortName
U_CAPI const char *U_EXPORT2 uscript_getShortName( UScriptCode scriptCode )
Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.
Returns "Mlym" given USCRIPT_MALAYALAM.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
short script name (4-letter code), or NULL if scriptCode is invalid
|
uscript_getUsage
U_CAPIUScriptUsage U_EXPORT2 uscript_getUsage( UScriptCode script )
Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
See also: UScriptUsage
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
script usage
|
uscript_hasScript
U_CAPIUBool U_EXPORT2 uscript_hasScript( UChar32 c, UScriptCode sc )
Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.
Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
true if sc is in Script_Extensions(c)
|
uscript_isCased
U_CAPIUBool U_EXPORT2 uscript_isCased( UScriptCode script )
Returns true if in modern (or most recent) usage of the script case distinctions are customary.
For example, Latn and Cyrl.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the script is cased
|
uscript_isRightToLeft
U_CAPIUBool U_EXPORT2 uscript_isRightToLeft( UScriptCode script )
Returns true if the script is written right-to-left.
For example, Arab and Hebr.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
true if the script is right-to-left
|
utext_char32At
U_CAPIUChar32 U_EXPORT2 utext_char32At( UText *ut, int64_t nativeIndex )
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.
If the specified index points to the interior of a multi-unit character - one of the trail bytes of a UTF-8 sequence, for example - the complete code point will be returned.
The iteration position will be set to the start of the returned code point.
This function is roughly equivalent to the sequence utext_setNativeIndex(index); utext_current32(); (There is a subtle difference if the index is out of bounds by being less than zero - utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() will return the char at zero. utext_char32At(negative index), on the other hand, will return the U_SENTINEL value of -1.)
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
the code point at the specified index.
|
utext_clone
U_CAPIUText *U_EXPORT2 utext_clone( UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status )
Clone a UText.
This is much like opening a UText where the source text is itself another UText.
A deep clone will copy both the UText data structures and the underlying text. The original and cloned UText will operate completely independently; modifications made to the text in one will not affect the other. Text providers are not required to support deep clones. The user of clone() must check the status return and be prepared to handle failures.
The standard UText implementations for UTF8, UChar *, UnicodeString and Replaceable all support deep cloning.
The UText returned from a deep clone will be writable, assuming that the text provider is able to support writing, even if the source UText had been made non-writable by means of UText_freeze().
A shallow clone replicates only the UText data structures; it does not make a copy of the underlying text. Shallow clones can be used as an efficient way to have multiple iterators active in a single text string that is not being modified.
A shallow clone operation will not fail, barring truly exceptional conditions such as memory allocation failures.
Shallow UText clones should be avoided if the UText functions that modify the text are expected to be used, either on the original or the cloned UText. Any such modifications can cause unpredictable behavior. Read Only shallow clones provide some protection against errors of this type by disabling text modification via the cloned UText.
A shallow clone made with the readOnly parameter == false will preserve the utext_isWritable() state of the source object. Note, however, that write operations must be avoided while more than one UText exists that refer to the same underlying text.
A UText and its clone may be safely concurrently accessed by separate threads. This is true for read access only with shallow clones, and for both read and write access with deep clones. It is the responsibility of the Text Provider to ensure that this thread safety constraint is met.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
The newly created clone, or NULL if the clone operation failed.
|
utext_close
U_CAPIUText *U_EXPORT2 utext_close( UText *ut )
Close function for UText instances.
Cleans up, releases any resources being held by an open UText.
If the UText was originally allocated by one of the utext_open functions, the storage associated with the utext will also be freed. If the UText storage originated with the application, as it would with a local or static instance, the storage will not be deleted.
An open UText can be reset to refer to new string by using one of the utext_open() functions without first closing the UText.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
NULL if the UText struct was deleted by the close. If the UText struct was originally provided by the caller to the open function, it is returned by this function, and may be safely used again in a subsequent utext_open.
|
utext_current32
U_CAPIUChar32 U_EXPORT2 utext_current32( UText *ut )
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached the end of the input text.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the Unicode code point at the current iterator position.
|
utext_equals
U_CAPIUBool U_EXPORT2 utext_equals( const UText *a, const UText *b )
Compare two UText objects for equality.
UTexts are equal if they are iterating over the same text, and have the same iteration position within the text. If either or both of the parameters are NULL, the comparison is false.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
true if the two UTexts are equal.
|
utext_extract
U_CAPI int32_t U_EXPORT2 utext_extract( UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status )
Extract text from a UText into a UChar buffer.
The range of text to be extracted is specified in the native indices of the UText provider. These may not necessarily be UTF-16 indices.
The size (number of 16 bit UChars) of the data to be extracted is returned. The full number of UChars is returned, even when the extracted text is truncated because the specified buffer size is too small.
The extracted string will (if you are a user) / must (if you are a text provider) be NUL-terminated if there is sufficient space in the destination buffer. This terminating NUL is not included in the returned length.
The iteration index is left at the position following the last extracted character.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||
Returns |
Number of UChars in the data to be extracted. Does not include a trailing NUL.
|
utext_getNativeIndex
U_CAPI int64_t U_EXPORT2 utext_getNativeIndex( const UText *ut )
Get the current iterator position, which can range from 0 to the length of the text.
The position is a native index into the input text, in whatever format it may have (possibly UTF-8 for example), and may not always be the same as the corresponding UChar (UTF-16) index. The returned position will always be aligned to a code point boundary.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the current index position, in the native units of the text provider.
|
utext_getPreviousNativeIndex
U_CAPI int64_t U_EXPORT2 utext_getPreviousNativeIndex( UText *ut )
Get the native index of the character preceding the current position.
If the iteration position is already at the start of the text, zero is returned. The value returned is the same as that obtained from the following sequence, but without the side effect of changing the iteration position.
UText *ut = whatever; ... utext_previous(ut) utext_getNativeIndex(ut);
This function is most useful during forwards iteration, where it will get the native index of the character most recently returned from utext_next().
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the native index of the character preceding the current index position, or zero if the current position is at the start of the text.
|
utext_moveIndex32
U_CAPIUBool U_EXPORT2 utext_moveIndex32( UText *ut, int32_t delta )
Move the iterator position by delta code points.
The number of code points is a signed number; a negative delta will move the iterator backwards, towards the start of the text.
The index is moved by delta
code points forward or backward, but no further backward than to 0 and no further forward than to utext_nativeLength(). The resulting index value will be in between 0 and length, inclusive.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
true if the position could be moved the requested number of positions while staying within the range [0 - text length].
|
utext_nativeLength
U_CAPI int64_t U_EXPORT2 utext_nativeLength( UText *ut )
Get the length of the text.
Depending on the characteristics of the underlying text representation, this may be expensive. See also:utext_isLengthExpensive()
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the length of the text, expressed in native units.
|
utext_next32
U_CAPIUChar32 U_EXPORT2 utext_next32( UText *ut )
Get the code point at the current iteration position of the UText, and advance the position to the first index following the character.
If the position is at the end of the text (the index following the last character, which is also the length of the text), return U_SENTINEL (-1) and do not advance the index.
This is a post-increment operation.
An inline macro version of this function, UTEXT_NEXT32(), is available for performance critical use.
See also:UTEXT_NEXT32
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the Unicode code point at the iteration position.
|
utext_next32From
U_CAPIUChar32 U_EXPORT2 utext_next32From( UText *ut, int64_t nativeIndex )
Set the iteration index and return the code point at that index.
Leave the iteration index at the start of the following code point.
This function is the most efficient and convenient way to begin a forward iteration. The results are identical to the those from the sequence
utext_setIndex(); utext_next32();
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
Code point which starts at or before index, or U_SENTINEL (-1) if it is out of bounds.
|
utext_openUChars
U_CAPIUText *U_EXPORT2 utext_openUChars( UText *ut, const UChar *s, int64_t length, UErrorCode *status )
Open a read-only UText for UChar * string.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
A pointer to the UText. If a pre-allocated UText was provided, it will always be used and returned.
|
utext_openUTF8
U_CAPIUText *U_EXPORT2 utext_openUTF8( UText *ut, const char *s, int64_t length, UErrorCode *status )
Open a read-only UText implementation for UTF-8 strings.
Any invalid UTF-8 in the input will be handled in this way: a sequence of bytes that has the form of a truncated, but otherwise valid, UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. Any other illegal bytes will each be replaced by a \uFFFD.
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||
Returns |
A pointer to the UText. If a pre-allocated UText was provided, it will always be used and returned.
|
utext_previous32
U_CAPIUChar32 U_EXPORT2 utext_previous32( UText *ut )
Move the iterator position to the character (code point) whose index precedes the current position, and return that character.
This is a pre-decrement operation.
If the initial position is at the start of the text (index of 0) return U_SENTINEL (-1), and leave the position unchanged.
An inline macro version of this function, UTEXT_PREVIOUS32(), is available for performance critical use.
See also:UTEXT_PREVIOUS32
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
the previous UChar32 code point, or U_SENTINEL (-1) if the iteration has reached the start of the text.
|
utext_previous32From
U_CAPIUChar32 U_EXPORT2 utext_previous32From( UText *ut, int64_t nativeIndex )
Set the iteration index, and return the code point preceding the one specified by the initial index.
Leave the iteration position at the start of the returned code point.
This function is the most efficient and convenient way to begin a backwards iteration.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
Code point preceding the one at the initial index, or U_SENTINEL (-1) if it is out of bounds.
|
utext_setNativeIndex
U_CAPI void U_EXPORT2 utext_setNativeIndex( UText *ut, int64_t nativeIndex )
Set the current iteration position to the nearest code point boundary at or preceding the specified index.
The index is in the native units of the original input text. If the index is out of range, it will be pinned to be within the range of the input text.
It will usually be more efficient to begin an iteration using the functions utext_next32From() or utext_previous32From() rather than setIndex().
Moving the index position to an adjacent character is best done with utext_next32(), utext_previous32() or utext_moveIndex32(). Attempting to do direct arithmetic on the index position is complicated by the fact that the size (in native units) of a character depends on the underlying representation of the character (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not easily knowable.
Details | |||||
---|---|---|---|---|---|
Parameters |
|
utrans_clone
U_CAPIUTransliterator *U_EXPORT2 utrans_clone( const UTransliterator *trans, UErrorCode *status )
Create a copy of a transliterator.
Any non-NULL result from this function should later be closed with utrans_close().
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
a transliterator pointer that may be passed to other utrans_xxx() functions, or NULL if the clone call fails.
|
utrans_close
U_CAPI void U_EXPORT2 utrans_close( UTransliterator *trans )
Close a transliterator.
Any non-NULL pointer returned by utrans_openXxx() or utrans_clone() should eventually be closed.
Details | |||
---|---|---|---|
Parameters |
|
utrans_openIDs
U_CAPIUEnumeration *U_EXPORT2 utrans_openIDs( UErrorCode *pErrorCode )
Return a UEnumeration for the available transliterators.
Details | |||
---|---|---|---|
Parameters |
|
||
Returns |
UEnumeration for the available transliterators. Close with uenum_close().
|
utrans_openInverse
U_CAPIUTransliterator *U_EXPORT2 utrans_openInverse( const UTransliterator *trans, UErrorCode *status )
Open an inverse of an existing transliterator.
For this to work, the inverse must be registered with the system. For example, if the Transliterator "A-B" is opened, and then its inverse is opened, the result is the Transliterator "B-A", if such a transliterator is registered with the system. Otherwise the result is NULL and a failing UErrorCode is set. Any non-NULL result from this function should later be closed with utrans_close().
Details | |||||
---|---|---|---|---|---|
Parameters |
|
||||
Returns |
a pointer to a newly-opened transliterator that is the inverse of trans, or NULL if the open call fails.
|
utrans_openU
U_CAPIUTransliterator *U_EXPORT2 utrans_openU( const UChar *id, int32_t idLength, UTransDirection dir, const UChar *rules, int32_t rulesLength, UParseError *parseError, UErrorCode *pErrorCode )
Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.
Any non-NULL result from this function should later be closed with utrans_close().
Details | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||||||
Returns |
a transliterator pointer that may be passed to other utrans_xxx() functions, or NULL if the open call fails.
|
utrans_setFilter
U_CAPI void U_EXPORT2 utrans_setFilter( UTransliterator *trans, const UChar *filterPattern, int32_t filterPatternLen, UErrorCode *status )
Set the filter used by a transliterator.
A filter can be used to make the transliterator pass certain characters through untouched. The filter is expressed using a UnicodeSet pattern. If the filterPattern is NULL or the empty string, then the transliterator will be reset to use no filter.
See also:UnicodeSet
Details | |||||||||
---|---|---|---|---|---|---|---|---|---|
Parameters |
|
utrans_toRules
U_CAPI int32_t U_EXPORT2 utrans_toRules( const UTransliterator *trans, UBool escapeUnprintable, UChar *result, int32_t resultLength, UErrorCode *status )
Create a rule string that can be passed to utrans_openU to recreate this transliterator.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
||||||||||
Returns |
int32_t The length of the rule string (may be greater than resultLength, in which case an error is returned).
|
utrans_trans
U_CAPI void U_EXPORT2 utrans_trans( const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, int32_t start, int32_t *limit, UErrorCode *status )
Transliterate a segment of a UReplaceable string.
The string is passed in as a UReplaceable pointer rep and a UReplaceableCallbacks function pointer struct repFunc. Functions in the repFunc struct will be called in order to modify the rep string.
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
utrans_transIncremental
U_CAPI void U_EXPORT2 utrans_transIncremental( const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, UTransPosition *pos, UErrorCode *status )
Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously.
This method is typically called after new text has been inserted, e.g. as a result of a keyboard event. The transliterator will try to transliterate characters of rep
between index.cursor
and index.limit
. Characters before index.cursor
will not be changed.
Upon return, values in index
will be updated. index.start
will be advanced to the first character that future calls to this method will read. index.cursor
and index.limit
will be adjusted to delimit the range of text that future calls to this method may change.
Typical usage of this method begins with an initial call with index.start
and index.limit
set to indicate the portion of text
to be transliterated, and index.cursor == index.start
. Thereafter, index
can be used without modification in future calls, provided that all changes to text
are made via this method.
This method assumes that future calls may be made that will insert new text into the buffer. As a result, it only performs unambiguous transliterations. After the last call to this method, there may be untransliterated text that is waiting for more input to resolve an ambiguity. In order to perform these pending transliterations, clients should call utrans_trans() with a start of index.start and a limit of index.end after the last call to this method has been made.
Details | |||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
utrans_transIncrementalUChars
U_CAPI void U_EXPORT2 utrans_transIncrementalUChars( const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, UTransPosition *pos, UErrorCode *status )
Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously.
See utrans_transIncremental(). The string is passed in in a UChar* buffer. The string is modified in place. If the result is longer than textCapacity, it is truncated. The actual length of the result is returned in *textLength, if textLength is non-NULL. *textLength may be greater than textCapacity, but only textCapacity UChars will be written to *text, including the zero terminator. See utrans_transIncremental() for usage details.
See also: utrans_transIncremental
Details | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|
utrans_transUChars
U_CAPI void U_EXPORT2 utrans_transUChars( const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, int32_t start, int32_t *limit, UErrorCode *status )
Transliterate a segment of a UChar* string.
The string is passed in in a UChar* buffer. The string is modified in place. If the result is longer than textCapacity, it is truncated. The actual length of the result is returned in *textLength, if textLength is non-NULL. *textLength may be greater than textCapacity, but only textCapacity UChars will be written to *text, including the zero terminator.
Details | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Parameters |
|