1 files changed, 23 insertions, 5 deletions
diff --git a/src/dged/utf8.h b/src/dged/utf8.h
index 04aa242..22ce22d 100644
--- a/src/dged/utf8.h
+++ b/src/dged/utf8.h
@@ -1,19 +1,37 @@
+#ifndef _UTF8_H
+#define _UTF8_H
+
 #include <stdbool.h>
 #include <stdint.h>
 
+struct codepoint {
+  uint32_t codepoint;
+  uint32_t nbytes;
+};
+
+struct utf8_codepoint_iterator {
+  uint8_t *data;
+  uint64_t nbytes;
+  uint64_t offset;
+  struct codepoint current;
+};
+
+struct utf8_codepoint_iterator
+create_utf8_codepoint_iterator(uint8_t *data, uint64_t len,
+                               uint64_t initial_offset);
+struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter);
+
 /*!
  * \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of
  * length `nbytes`, represents
  */
 uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes);
 
-/* Return the number of bytes used to make up the next `nchars` characters */
-uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars);
+uint32_t unicode_visual_char_width(const struct codepoint *codepoint);
 
-/* true if `byte` is a unicode byte sequence start byte */
 bool utf8_byte_is_unicode_start(uint8_t byte);
 bool utf8_byte_is_unicode_continuation(uint8_t byte);
-bool utf8_byte_is_ascii(uint8_t byte);
 bool utf8_byte_is_unicode(uint8_t byte);
+bool utf8_byte_is_ascii(uint8_t byte);
 
-uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len);
+#endif