summaryrefslogtreecommitdiff
path: root/src/dged/utf8.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/dged/utf8.h')
-rw-r--r--src/dged/utf8.h28
1 files changed, 23 insertions, 5 deletions
diff --git a/src/dged/utf8.h b/src/dged/utf8.h
index 04aa242..22ce22d 100644
--- a/src/dged/utf8.h
+++ b/src/dged/utf8.h
@@ -1,19 +1,37 @@
+#ifndef _UTF8_H
+#define _UTF8_H
+
#include <stdbool.h>
#include <stdint.h>
+struct codepoint {
+ uint32_t codepoint;
+ uint32_t nbytes;
+};
+
+struct utf8_codepoint_iterator {
+ uint8_t *data;
+ uint64_t nbytes;
+ uint64_t offset;
+ struct codepoint current;
+};
+
+struct utf8_codepoint_iterator
+create_utf8_codepoint_iterator(uint8_t *data, uint64_t len,
+ uint64_t initial_offset);
+struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter);
+
/*!
* \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of
* length `nbytes`, represents
*/
uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes);
-/* Return the number of bytes used to make up the next `nchars` characters */
-uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars);
+uint32_t unicode_visual_char_width(const struct codepoint *codepoint);
-/* true if `byte` is a unicode byte sequence start byte */
bool utf8_byte_is_unicode_start(uint8_t byte);
bool utf8_byte_is_unicode_continuation(uint8_t byte);
-bool utf8_byte_is_ascii(uint8_t byte);
bool utf8_byte_is_unicode(uint8_t byte);
+bool utf8_byte_is_ascii(uint8_t byte);
-uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len);
+#endif