From 2f4cb88d5c60f725323739300bb49dfa8923e7d5 Mon Sep 17 00:00:00 2001 From: Albert Cervin Date: Wed, 2 Nov 2022 22:20:04 +0100 Subject: =?UTF-8?q?=F0=9F=8E=89=20And=20so=20it=20begins?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utf8.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/utf8.h (limited to 'src/utf8.h') diff --git a/src/utf8.h b/src/utf8.h new file mode 100644 index 0000000..901b1af --- /dev/null +++ b/src/utf8.h @@ -0,0 +1,17 @@ +#include +#include + +/*! + * \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of + * length `nbytes`, represents + */ +uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes); + +/* Return the number of bytes used to make up the next `nchars` characters */ +uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nchars); + +/* true if `byte` is a unicode byte sequence start byte */ +bool utf8_byte_is_unicode_start(uint8_t byte); +bool utf8_byte_is_unicode_continuation(uint8_t byte); +bool utf8_byte_is_ascii(uint8_t byte); +bool utf8_byte_is_unicode(uint8_t byte); -- cgit v1.2.3