Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2014 SGI. * All rights reserved. */ #ifndef UTF8NORM_H #define UTF8NORM_H #include <linux/types.h> #include <linux/export.h> #include <linux/string.h> #include <linux/module.h> /* Encoding a unicode version number as a single unsigned int. */ #define UNICODE_MAJ_SHIFT (16) #define UNICODE_MIN_SHIFT (8) #define UNICODE_AGE(MAJ, MIN, REV) \ (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \ ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \ ((unsigned int)(REV))) /* Highest unicode version supported by the data tables. */ extern int utf8version_is_supported(u8 maj, u8 min, u8 rev); extern int utf8version_latest(void); /* * Look for the correct const struct utf8data for a unicode version. * Returns NULL if the version requested is too new. * * Two normalization forms are supported: nfdi and nfdicf. * * nfdi: * - Apply unicode normalization form NFD. * - Remove any Default_Ignorable_Code_Point. * * nfdicf: * - Apply unicode normalization form NFD. * - Remove any Default_Ignorable_Code_Point. * - Apply a full casefold (C + F). */ extern const struct utf8data *utf8nfdi(unsigned int maxage); extern const struct utf8data *utf8nfdicf(unsigned int maxage); /* * Determine the maximum age of any unicode character in the string. * Returns 0 if only unassigned code points are present. * Returns -1 if the input is not valid UTF-8. */ extern int utf8agemax(const struct utf8data *data, const char *s); extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len); /* * Determine the minimum age of any unicode character in the string. * Returns 0 if any unassigned code points are present. * Returns -1 if the input is not valid UTF-8. */ extern int utf8agemin(const struct utf8data *data, const char *s); extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len); /* * Determine the length of the normalized from of the string, * excluding any terminating NULL byte. * Returns 0 if only ignorable code points are present. * Returns -1 if the input is not valid UTF-8. */ extern ssize_t utf8len(const struct utf8data *data, const char *s); extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len); /* Needed in struct utf8cursor below. */ #define UTF8HANGULLEAF (12) /* * Cursor structure used by the normalizer. */ struct utf8cursor { const struct utf8data *data; const char *s; const char *p; const char *ss; const char *sp; unsigned int len; unsigned int slen; short int ccc; short int nccc; unsigned char hangul[UTF8HANGULLEAF]; }; /* * Initialize a utf8cursor to normalize a string. * Returns 0 on success. * Returns -1 on failure. */ extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data, const char *s); extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data, const char *s, size_t len); /* * Get the next byte in the normalization. * Returns a value > 0 && < 256 on success. * Returns 0 when the end of the normalization is reached. * Returns -1 if the string being normalized is not valid UTF-8. */ extern int utf8byte(struct utf8cursor *u8c); #endif /* UTF8NORM_H */ |