summaryrefslogtreecommitdiff
path: root/src/utf8.h
diff options
context:
space:
mode:
authorAlbert Cervin <albert@acervin.com>2022-11-02 22:20:04 +0100
committerAlbert Cervin <albert@acervin.com>2022-11-16 23:33:49 +0100
commit2f4cb88d5c60f725323739300bb49dfa8923e7d5 (patch)
tree6ec22c2be92eff05f18e5919e747faab56e555ad /src/utf8.h
downloaddged-2f4cb88d5c60f725323739300bb49dfa8923e7d5.tar.gz
dged-2f4cb88d5c60f725323739300bb49dfa8923e7d5.tar.xz
dged-2f4cb88d5c60f725323739300bb49dfa8923e7d5.zip
🎉 And so it begins
Diffstat (limited to 'src/utf8.h')
-rw-r--r--src/utf8.h17
1 files changed, 17 insertions, 0 deletions
diff --git a/src/utf8.h b/src/utf8.h
new file mode 100644
index 0000000..901b1af
--- /dev/null
+++ b/src/utf8.h
@@ -0,0 +1,17 @@
+#include <stdbool.h>
+#include <stdint.h>
+
+/*!
+ * \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of
+ * length `nbytes`, represents
+ */
+uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes);
+
+/* Return the number of bytes used to make up the next `nchars` characters */
+uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nchars);
+
+/* true if `byte` is a unicode byte sequence start byte */
+bool utf8_byte_is_unicode_start(uint8_t byte);
+bool utf8_byte_is_unicode_continuation(uint8_t byte);
+bool utf8_byte_is_ascii(uint8_t byte);
+bool utf8_byte_is_unicode(uint8_t byte);