diff options
Diffstat (limited to 'src/dged/utf8.h')
| -rw-r--r-- | src/dged/utf8.h | 28 |
1 files changed, 23 insertions, 5 deletions
diff --git a/src/dged/utf8.h b/src/dged/utf8.h index 04aa242..22ce22d 100644 --- a/src/dged/utf8.h +++ b/src/dged/utf8.h @@ -1,19 +1,37 @@ +#ifndef _UTF8_H +#define _UTF8_H + #include <stdbool.h> #include <stdint.h> +struct codepoint { + uint32_t codepoint; + uint32_t nbytes; +}; + +struct utf8_codepoint_iterator { + uint8_t *data; + uint64_t nbytes; + uint64_t offset; + struct codepoint current; +}; + +struct utf8_codepoint_iterator +create_utf8_codepoint_iterator(uint8_t *data, uint64_t len, + uint64_t initial_offset); +struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter); + /*! * \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of * length `nbytes`, represents */ uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes); -/* Return the number of bytes used to make up the next `nchars` characters */ -uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars); +uint32_t unicode_visual_char_width(const struct codepoint *codepoint); -/* true if `byte` is a unicode byte sequence start byte */ bool utf8_byte_is_unicode_start(uint8_t byte); bool utf8_byte_is_unicode_continuation(uint8_t byte); -bool utf8_byte_is_ascii(uint8_t byte); bool utf8_byte_is_unicode(uint8_t byte); +bool utf8_byte_is_ascii(uint8_t byte); -uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len); +#endif |
