diff options
| author | Albert Cervin <albert@acervin.com> | 2022-11-02 22:20:04 +0100 |
|---|---|---|
| committer | Albert Cervin <albert@acervin.com> | 2022-11-16 23:33:49 +0100 |
| commit | 2f4cb88d5c60f725323739300bb49dfa8923e7d5 (patch) | |
| tree | 6ec22c2be92eff05f18e5919e747faab56e555ad /src/utf8.h | |
| download | dged-2f4cb88d5c60f725323739300bb49dfa8923e7d5.tar.gz dged-2f4cb88d5c60f725323739300bb49dfa8923e7d5.tar.xz dged-2f4cb88d5c60f725323739300bb49dfa8923e7d5.zip | |
🎉 And so it begins
Diffstat (limited to 'src/utf8.h')
| -rw-r--r-- | src/utf8.h | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/src/utf8.h b/src/utf8.h new file mode 100644 index 0000000..901b1af --- /dev/null +++ b/src/utf8.h @@ -0,0 +1,17 @@ +#include <stdbool.h> +#include <stdint.h> + +/*! + * \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of + * length `nbytes`, represents + */ +uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes); + +/* Return the number of bytes used to make up the next `nchars` characters */ +uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nchars); + +/* true if `byte` is a unicode byte sequence start byte */ +bool utf8_byte_is_unicode_start(uint8_t byte); +bool utf8_byte_is_unicode_continuation(uint8_t byte); +bool utf8_byte_is_ascii(uint8_t byte); +bool utf8_byte_is_unicode(uint8_t byte); |
