blob: 22ce22d329d65a5ff0e273b13fbb932ef59db853 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
#ifndef _UTF8_H
#define _UTF8_H
#include <stdbool.h>
#include <stdint.h>
struct codepoint {
uint32_t codepoint;
uint32_t nbytes;
};
struct utf8_codepoint_iterator {
uint8_t *data;
uint64_t nbytes;
uint64_t offset;
struct codepoint current;
};
struct utf8_codepoint_iterator
create_utf8_codepoint_iterator(uint8_t *data, uint64_t len,
uint64_t initial_offset);
struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter);
/*!
* \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of
* length `nbytes`, represents
*/
uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes);
uint32_t unicode_visual_char_width(const struct codepoint *codepoint);
bool utf8_byte_is_unicode_start(uint8_t byte);
bool utf8_byte_is_unicode_continuation(uint8_t byte);
bool utf8_byte_is_unicode(uint8_t byte);
bool utf8_byte_is_ascii(uint8_t byte);
#endif
|