summaryrefslogtreecommitdiff
path: root/src/dged
diff options
context:
space:
mode:
Diffstat (limited to 'src/dged')
-rw-r--r--src/dged/buffer.c473
-rw-r--r--src/dged/buffer.h44
-rw-r--r--src/dged/buffer_view.c40
-rw-r--r--src/dged/display.c33
-rw-r--r--src/dged/display.h2
-rw-r--r--src/dged/keyboard.c32
-rw-r--r--src/dged/syntax.c68
-rw-r--r--src/dged/text.c233
-rw-r--r--src/dged/text.h54
-rw-r--r--src/dged/utf8.c152
-rw-r--r--src/dged/utf8.h28
11 files changed, 620 insertions, 539 deletions
diff --git a/src/dged/buffer.c b/src/dged/buffer.c
index 6051f69..1062a47 100644
--- a/src/dged/buffer.c
+++ b/src/dged/buffer.c
@@ -157,6 +157,42 @@ void buffer_static_teardown() {
}
}
+static uint32_t get_tab_width(struct buffer *buffer) {
+ struct setting *tw = lang_setting(&buffer->lang, "tab-width");
+ if (tw == NULL) {
+ tw = settings_get("editor.tab-width");
+ }
+
+ uint32_t tab_width = 4;
+ if (tw != NULL && tw->value.type == Setting_Number) {
+ tab_width = tw->value.number_value;
+ }
+ return tab_width;
+}
+
+static bool use_tabs(struct buffer *buffer) {
+ struct setting *ut = lang_setting(&buffer->lang, "use-tabs");
+ if (ut == NULL) {
+ ut = settings_get("editor.use-tabs");
+ }
+
+ bool use_tabs = false;
+ if (ut != NULL && ut->value.type == Setting_Bool) {
+ use_tabs = ut->value.bool_value;
+ }
+
+ return use_tabs;
+}
+
+static uint32_t visual_char_width(struct codepoint *codepoint,
+ uint32_t tab_width) {
+ if (codepoint->codepoint == '\t') {
+ return tab_width;
+ } else {
+ return unicode_visual_char_width(codepoint);
+ }
+}
+
static struct buffer create_internal(const char *name, char *filename) {
struct buffer b = (struct buffer){
.filename = filename,
@@ -185,7 +221,7 @@ static struct buffer create_internal(const char *name, char *filename) {
static void strip_final_newline(struct buffer *b) {
uint32_t nlines = text_num_lines(b->text);
- if (nlines > 0 && text_line_length(b->text, nlines - 1) == 0) {
+ if (nlines > 0 && buffer_line_length(b, nlines - 1) == 0) {
text_delete(b->text, nlines - 1, 0, nlines - 1, 1);
}
}
@@ -207,7 +243,7 @@ static void buffer_read_from_file(struct buffer *b) {
int bytes = fread(buff, 1, 4096, file);
if (bytes > 0) {
uint32_t ignore;
- text_append(b->text, buff, bytes, &ignore, &ignore);
+ text_append(b->text, buff, bytes, &ignore);
} else if (bytes == 0) {
break; // EOF
} else {
@@ -239,70 +275,66 @@ static void write_line(struct text_chunk *chunk, void *userdata) {
fputc('\n', file);
}
-static bool is_word_break(uint8_t c) {
+static bool is_word_break(const struct codepoint *codepoint) {
+ uint32_t c = codepoint->codepoint;
return c == ' ' || c == '.' || c == '(' || c == ')' || c == '[' || c == ']' ||
- c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':';
+ c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':' ||
+ c == '"';
}
-static bool is_word_char(uint8_t c) { return !is_word_break(c); }
-
-struct match_result {
- struct location at;
- bool found;
-};
-
-static struct match_result find_next_in_line(struct buffer *buffer,
- struct location start,
- bool (*predicate)(uint8_t c)) {
- struct text_chunk line = text_get_line(buffer->text, start.line);
- bool found = false;
+static bool is_word_char(const struct codepoint *c) {
+ return !is_word_break(c);
+}
- if (line.nbytes == 0) {
+static struct match_result
+find_next_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c)) {
+ if (text_line_size(buffer->text, start.line) == 0) {
return (struct match_result){.at = start, .found = false};
}
- uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col);
- while (bytei < line.nbytes) {
- if (predicate(line.text[bytei])) {
+ bool found = false;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, start.line);
+ uint32_t coli = 0, tab_width = get_tab_width(buffer);
+ struct codepoint *codepoint;
+ while ((codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ if (coli >= start.col && predicate(codepoint)) {
found = true;
break;
}
- ++bytei;
+
+ coli += visual_char_width(codepoint, tab_width);
}
- uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei);
return (struct match_result){
- .at = (struct location){.line = start.line, .col = target_col},
- .found = found};
+ .at = (struct location){.line = start.line, .col = coli}, .found = found};
}
-static struct match_result find_prev_in_line(struct buffer *buffer,
- struct location start,
- bool (*predicate)(uint8_t c)) {
- struct text_chunk line = text_get_line(buffer->text, start.line);
- bool found = false;
+static struct match_result
+find_prev_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c)) {
- if (line.nbytes == 0) {
+ if (text_line_size(buffer->text, start.line) == 0) {
return (struct match_result){.at = start, .found = false};
}
- uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col);
- while (bytei > 0) {
- if (predicate(line.text[bytei])) {
+ bool found = false;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, start.line);
+ uint32_t coli = 0, tab_width = get_tab_width(buffer), found_at;
+ struct codepoint *codepoint;
+ while (coli < start.col && (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ if (predicate(codepoint)) {
found = true;
- break;
+ found_at = coli;
}
- --bytei;
- }
-
- // first byte on line can also be a match
- if (predicate(line.text[bytei])) {
- found = true;
+ coli += visual_char_width(codepoint, tab_width);
}
- uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei);
return (struct match_result){
- .at = (struct location){.line = start.line, .col = target_col},
+ .at =
+ (struct location){.line = start.line, .col = found ? found_at : coli},
.found = found};
}
@@ -315,13 +347,52 @@ static struct text_chunk *copy_region(struct buffer *buffer,
free(curr->text);
}
+ struct location begin_bytes =
+ buffer_location_to_byte_coords(buffer, region.begin);
+ struct location end_bytes =
+ buffer_location_to_byte_coords(buffer, region.end);
+
struct text_chunk txt =
- text_get_region(buffer->text, region.begin.line, region.begin.col,
- region.end.line, region.end.col);
+ text_get_region(buffer->text, begin_bytes.line, begin_bytes.col,
+ end_bytes.line, end_bytes.col);
*curr = txt;
return curr;
}
+static struct location do_indent(struct buffer *buffer, struct location at,
+ uint32_t tab_width, bool use_tabs) {
+ if (use_tabs) {
+ return buffer_add(buffer, at, (uint8_t *)"\t", 1);
+ } else {
+ return buffer_add(buffer, at, (uint8_t *)" ",
+ tab_width > 16 ? 16 : tab_width);
+ }
+}
+
+static uint64_t to_global_offset(struct buffer *buffer,
+ struct location bytecoords) {
+ uint32_t line = bytecoords.line;
+ uint32_t col = bytecoords.col;
+ uint32_t byteoff = 0;
+ uint32_t nlines = buffer_num_lines(buffer);
+
+ if (nlines == 0) {
+ return 0;
+ }
+
+ for (uint32_t l = 0; l < line && l < nlines; ++l) {
+ // +1 for newline
+ byteoff += text_line_size(buffer->text, l) + 1;
+ }
+
+ // handle last line
+ uint32_t l = line < nlines ? line : nlines - 1;
+ uint32_t nbytes = text_line_size(buffer->text, l);
+ byteoff += col <= nbytes ? col : nbytes + 1;
+
+ return byteoff;
+}
+
/* --------------------- buffer methods -------------------- */
struct buffer buffer_create(const char *name) {
@@ -452,18 +523,29 @@ struct location buffer_add(struct buffer *buffer, struct location at,
struct location initial = at;
struct location final = at;
- uint32_t lines_added, cols_added;
- text_insert_at(buffer->text, initial.line, initial.col, text, nbytes,
- &lines_added, &cols_added);
+ struct location at_bytes = buffer_location_to_byte_coords(buffer, at);
+
+ uint32_t lines_added;
+ text_insert_at(buffer->text, at_bytes.line, at_bytes.col, text, nbytes,
+ &lines_added);
// move to after inserted text
if (lines_added > 0) {
final = buffer_clamp(buffer, (int64_t)at.line + lines_added, 0);
} else {
+ uint32_t cols_added = 0, tab_width = get_tab_width(buffer);
+ struct utf8_codepoint_iterator iter =
+ create_utf8_codepoint_iterator(text, nbytes, 0);
+ struct codepoint *codepoint;
+ while ((codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ cols_added += visual_char_width(codepoint, tab_width);
+ }
final =
buffer_clamp(buffer, (int64_t)at.line, (int64_t)at.col + cols_added);
}
+ struct location final_bytes = buffer_location_to_byte_coords(buffer, final);
+
undo_push_add(
&buffer->undo,
(struct undo_add){.begin = {.row = initial.line, .col = initial.col},
@@ -474,11 +556,17 @@ struct location buffer_add(struct buffer *buffer, struct location at,
(struct undo_boundary){.save_point = false});
}
- uint32_t begin_idx = text_global_idx(buffer->text, initial.line, initial.col);
- uint32_t end_idx = text_global_idx(buffer->text, final.line, final.col);
+ uint32_t begin_idx = to_global_offset(buffer, at_bytes);
+ uint32_t end_idx = to_global_offset(buffer, final_bytes);
VEC_FOR_EACH(&buffer->hooks->insert_hooks, struct insert_hook * h) {
- h->callback(buffer, region_new(initial, final), begin_idx, end_idx,
+ h->callback(buffer,
+ (struct edit_location){
+ .coordinates = region_new(initial, final),
+ .bytes = region_new(at_bytes, final_bytes),
+ .global_byte_begin = begin_idx,
+ .global_byte_end = end_idx,
+ },
h->userdata);
}
@@ -488,15 +576,16 @@ struct location buffer_add(struct buffer *buffer, struct location at,
struct location buffer_set_text(struct buffer *buffer, uint8_t *text,
uint32_t nbytes) {
- uint32_t lines, cols;
+ uint32_t lines_added;
text_clear(buffer->text);
- text_append(buffer->text, text, nbytes, &lines, &cols);
+ text_append(buffer->text, text, nbytes, &lines_added);
// if last line is empty, remove it
strip_final_newline(buffer);
- return buffer_clamp(buffer, lines, cols);
+ return buffer_clamp(buffer, lines_added,
+ buffer_line_length(buffer, lines_added));
}
void buffer_clear(struct buffer *buffer) { text_clear(buffer->text); }
@@ -524,9 +613,18 @@ struct location buffer_previous_char(struct buffer *buffer,
}
--dot.line;
- dot.col = buffer_num_chars(buffer, dot.line);
+ dot.col = buffer_line_length(buffer, dot.line);
} else {
- --dot.col;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, dot.line);
+ struct codepoint *codepoint;
+ uint32_t coli = 0, tab_width = get_tab_width(buffer), last_width = 0;
+ while (coli < dot.col && (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ last_width = visual_char_width(codepoint, tab_width);
+ coli += last_width;
+ }
+
+ dot.col = coli - last_width;
}
return dot;
@@ -571,14 +669,14 @@ struct location buffer_previous_line(struct buffer *buffer,
}
--dot.line;
- uint32_t nchars = buffer_num_chars(buffer, dot.line);
+ uint32_t nchars = buffer_line_length(buffer, dot.line);
uint32_t new_col = dot.col > nchars ? nchars : dot.col;
return dot;
}
struct location buffer_next_char(struct buffer *buffer, struct location dot) {
- if (dot.col == buffer_num_chars(buffer, dot.line)) {
+ if (dot.col == buffer_line_length(buffer, dot.line)) {
uint32_t lastline = buffer->lazy_row_add ? buffer_num_lines(buffer)
: buffer_num_lines(buffer) - 1;
if (dot.line == lastline) {
@@ -588,7 +686,16 @@ struct location buffer_next_char(struct buffer *buffer, struct location dot) {
dot.col = 0;
++dot.line;
} else {
- ++dot.col;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, dot.line);
+ struct codepoint *codepoint;
+ uint32_t coli = 0;
+ while (coli <= dot.col &&
+ (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ coli += visual_char_width(codepoint, get_tab_width(buffer));
+ }
+
+ dot.col = coli;
}
return dot;
@@ -635,7 +742,7 @@ struct location buffer_next_line(struct buffer *buffer, struct location dot) {
++dot.line;
uint32_t new_col = dot.col;
- uint32_t nchars = buffer_num_chars(buffer, dot.line);
+ uint32_t nchars = buffer_line_length(buffer, dot.line);
new_col = new_col > nchars ? nchars : new_col;
return dot;
@@ -664,8 +771,8 @@ struct location buffer_clamp(struct buffer *buffer, int64_t line, int64_t col) {
// clamp col
if (col < 0) {
col = 0;
- } else if (col > buffer_num_chars(buffer, line)) {
- col = buffer_num_chars(buffer, line);
+ } else if (col > buffer_line_length(buffer, line)) {
+ col = buffer_line_length(buffer, line);
}
location.col = col;
@@ -681,7 +788,7 @@ struct location buffer_end(struct buffer *buffer) {
return (struct location){.line = nlines, .col = 0};
} else {
return (struct location){.line = nlines - 1,
- .col = buffer_num_chars(buffer, nlines - 1)};
+ .col = buffer_line_length(buffer, nlines - 1)};
}
}
@@ -689,55 +796,22 @@ uint32_t buffer_num_lines(struct buffer *buffer) {
return text_num_lines(buffer->text);
}
-uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line) {
- if (line >= buffer_num_lines(buffer)) {
- return 0;
+uint32_t buffer_line_length(struct buffer *buffer, uint32_t line) {
+ uint32_t tab_size = get_tab_width(buffer), len = 0;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, line);
+ struct codepoint *codepoint;
+ while ((codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ len += visual_char_width(codepoint, tab_size);
}
- return text_line_length(buffer->text, line);
+ return len;
}
struct location buffer_newline(struct buffer *buffer, struct location at) {
return buffer_add(buffer, at, (uint8_t *)"\n", 1);
}
-static uint32_t get_tab_width(struct buffer *buffer) {
- struct setting *tw = lang_setting(&buffer->lang, "tab-width");
- if (tw == NULL) {
- tw = settings_get("editor.tab-width");
- }
-
- uint32_t tab_width = 4;
- if (tw != NULL && tw->value.type == Setting_Number) {
- tab_width = tw->value.number_value;
- }
- return tab_width;
-}
-
-static bool use_tabs(struct buffer *buffer) {
- struct setting *ut = lang_setting(&buffer->lang, "use-tabs");
- if (ut == NULL) {
- ut = settings_get("editor.use-tabs");
- }
-
- bool use_tabs = false;
- if (ut != NULL && ut->value.type == Setting_Bool) {
- use_tabs = ut->value.bool_value;
- }
-
- return use_tabs;
-}
-
-static struct location do_indent(struct buffer *buffer, struct location at,
- uint32_t tab_width, bool use_tabs) {
- if (use_tabs) {
- return buffer_add(buffer, at, (uint8_t *)"\t", 1);
- } else {
- return buffer_add(buffer, at, (uint8_t *)" ",
- tab_width > 16 ? 16 : tab_width);
- }
-}
-
struct location buffer_indent(struct buffer *buffer, struct location at) {
return do_indent(buffer, at, get_tab_width(buffer), use_tabs(buffer));
}
@@ -778,16 +852,13 @@ struct location buffer_undo(struct buffer *buffer, struct location dot) {
case Undo_Add: {
struct undo_add *add = &rec->add;
- pos =
- buffer_delete(buffer, (struct region){.begin =
- (struct location){
- .line = add->begin.row,
- .col = add->begin.col,
- },
- .end = (struct location){
- .line = add->end.row,
- .col = add->end.col,
- }});
+ pos = buffer_delete(buffer,
+ (struct region){
+ .begin = (struct location){.line = add->begin.row,
+ .col = add->begin.col},
+ .end = (struct location){.line = add->end.row,
+ .col = add->end.col},
+ });
break;
}
@@ -888,9 +959,14 @@ struct location buffer_delete(struct buffer *buffer, struct region region) {
return region.begin;
}
+ struct location begin_bytes =
+ buffer_location_to_byte_coords(buffer, region.begin);
+ struct location end_bytes =
+ buffer_location_to_byte_coords(buffer, region.end);
+
struct text_chunk txt =
- text_get_region(buffer->text, region.begin.line, region.begin.col,
- region.end.line, region.end.col);
+ text_get_region(buffer->text, begin_bytes.line, begin_bytes.col,
+ end_bytes.line, end_bytes.col);
undo_push_boundary(&buffer->undo,
(struct undo_boundary){.save_point = false});
@@ -903,17 +979,22 @@ struct location buffer_delete(struct buffer *buffer, struct region region) {
undo_push_boundary(&buffer->undo,
(struct undo_boundary){.save_point = false});
- uint32_t begin_idx =
- text_global_idx(buffer->text, region.begin.line, region.begin.col);
- uint32_t end_idx =
- text_global_idx(buffer->text, region.end.line, region.end.col);
+ uint64_t begin_idx = to_global_offset(buffer, begin_bytes);
+ uint64_t end_idx = to_global_offset(buffer, end_bytes);
- text_delete(buffer->text, region.begin.line, region.begin.col,
- region.end.line, region.end.col);
+ text_delete(buffer->text, begin_bytes.line, begin_bytes.col, end_bytes.line,
+ end_bytes.col);
buffer->modified = true;
VEC_FOR_EACH(&buffer->hooks->delete_hooks, struct delete_hook * h) {
- h->callback(buffer, region, begin_idx, end_idx, h->userdata);
+ h->callback(buffer,
+ (struct edit_location){
+ .coordinates = region,
+ .bytes = region_new(begin_bytes, end_bytes),
+ .global_byte_begin = begin_idx,
+ .global_byte_end = end_idx,
+ },
+ h->userdata);
}
return region.begin;
@@ -1035,27 +1116,6 @@ struct cmdbuf {
struct buffer *buffer;
};
-static uint32_t visual_char_width(uint8_t *byte, uint32_t maxlen) {
- if (*byte == '\t') {
- return 4;
- } else {
- return utf8_visual_char_width(byte, maxlen);
- }
-}
-
-uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col,
- uint32_t end_col) {
- uint32_t start_byte = utf8_nbytes(txt, len, start_col);
- uint32_t end_byte = utf8_nbytes(txt, len, end_col);
-
- uint32_t width = 0;
- for (uint32_t bytei = start_byte; bytei < end_byte; ++bytei) {
- width += visual_char_width(&txt[bytei], len - bytei);
- }
-
- return width;
-}
-
static void apply_properties(struct command_list *cmds,
struct text_property *properties[],
uint32_t nproperties) {
@@ -1097,65 +1157,67 @@ void render_line(struct text_chunk *line, void *userdata) {
command_list_set_show_whitespace(cmdbuf->cmds, cmdbuf->show_ws);
// calculate scroll offsets
- uint32_t scroll_bytes =
- utf8_nbytes(line->text, line->nbytes, cmdbuf->origin.col);
- uint32_t text_nbytes_scroll =
- scroll_bytes > line->nbytes ? 0 : line->nbytes - scroll_bytes;
- uint8_t *text = line->text + scroll_bytes;
-
- uint32_t visual_col_start = 0;
- uint32_t cur_visual_col = 0;
- uint32_t start_byte = 0, text_nbytes = 0;
struct text_property *properties[32] = {0};
uint64_t prev_properties_hash = 0;
- for (uint32_t cur_byte = start_byte, coli = 0;
- cur_byte < text_nbytes_scroll && cur_visual_col < cmdbuf->width &&
- coli < line->nchars - cmdbuf->origin.col;
- ++coli) {
+ uint32_t tab_width = get_tab_width(cmdbuf->buffer);
+
+ // handle scroll column offset
+ uint32_t coli = 0, bytei = 0;
+ struct utf8_codepoint_iterator iter = text_chunk_codepoint_iterator(line);
+ struct codepoint *codepoint;
+ while (coli < cmdbuf->origin.col &&
+ (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ coli += visual_char_width(codepoint, tab_width);
+ bytei += codepoint->nbytes;
+ }
- uint32_t bytes_remaining = text_nbytes_scroll - cur_byte;
- uint32_t char_nbytes = utf8_nbytes(text + cur_byte, bytes_remaining, 1);
- uint32_t char_vwidth = visual_char_width(text + cur_byte, bytes_remaining);
+ // coli is the visual column [0..width-1]
+ coli = 0;
+ uint32_t drawn_bytei = bytei;
+ uint32_t drawn_coli = coli;
+ while (coli < cmdbuf->width &&
+ (codepoint = utf8_next_codepoint(&iter)) != NULL) {
// calculate character properties
uint32_t nproperties = 0;
- text_get_properties(
- cmdbuf->buffer->text,
- (struct location){.line = line->line, .col = coli + cmdbuf->origin.col},
- properties, 32, &nproperties);
+ text_get_properties(cmdbuf->buffer->text, line->line, bytei, properties, 32,
+ &nproperties);
// if we have any new or lost props, flush text up until now, reset
// and re-apply current properties
uint64_t new_properties_hash = properties_hash(properties, nproperties);
if (new_properties_hash != prev_properties_hash) {
- command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line,
- text + start_byte, cur_byte - start_byte);
+ command_list_draw_text(cmdbuf->cmds, drawn_coli, visual_line,
+ line->text + drawn_bytei, bytei - drawn_bytei);
command_list_reset_color(cmdbuf->cmds);
- visual_col_start = cur_visual_col;
- start_byte = cur_byte;
+ drawn_coli = coli;
+ drawn_bytei = bytei;
// apply new properties
apply_properties(cmdbuf->cmds, properties, nproperties);
}
prev_properties_hash = new_properties_hash;
- cur_byte += char_nbytes;
- text_nbytes += char_nbytes;
- cur_visual_col += char_vwidth;
+ bytei += codepoint->nbytes;
+ coli += visual_char_width(codepoint, tab_width);
}
// flush remaining
- command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line,
- text + start_byte, text_nbytes - start_byte);
+ command_list_draw_text(cmdbuf->cmds, drawn_coli, visual_line,
+ line->text + drawn_bytei, bytei - drawn_bytei);
+
+ drawn_coli = coli;
+ drawn_bytei = bytei;
command_list_reset_color(cmdbuf->cmds);
command_list_set_show_whitespace(cmdbuf->cmds, false);
- if (cur_visual_col < cmdbuf->width) {
- command_list_draw_repeated(cmdbuf->cmds, cur_visual_col, visual_line, ' ',
- cmdbuf->width - cur_visual_col);
+ // TODO: considering the whole screen is cleared, is this really needed?
+ if (drawn_coli < cmdbuf->width) {
+ command_list_draw_repeated(cmdbuf->cmds, drawn_coli, visual_line, ' ',
+ cmdbuf->width - drawn_coli);
}
}
@@ -1200,19 +1262,19 @@ void buffer_render(struct buffer *buffer, struct buffer_render_params *params) {
void buffer_add_text_property(struct buffer *buffer, struct location start,
struct location end,
struct text_property property) {
- text_add_property(
- buffer->text, (struct location){.line = start.line, .col = start.col},
- (struct location){.line = end.line, .col = end.col}, property);
+ struct location bytestart = buffer_location_to_byte_coords(buffer, start);
+ struct location byteend = buffer_location_to_byte_coords(buffer, end);
+ text_add_property(buffer->text, bytestart.line, bytestart.col, byteend.line,
+ byteend.col, property);
}
void buffer_get_text_properties(struct buffer *buffer, struct location location,
struct text_property **properties,
uint32_t max_nproperties,
uint32_t *nproperties) {
- text_get_properties(
- buffer->text,
- (struct location){.line = location.line, .col = location.col}, properties,
- max_nproperties, nproperties);
+ struct location bytecoords = buffer_location_to_byte_coords(buffer, location);
+ text_get_properties(buffer->text, bytecoords.line, bytecoords.col, properties,
+ max_nproperties, nproperties);
}
void buffer_clear_text_properties(struct buffer *buffer) {
@@ -1244,9 +1306,12 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line,
(struct location){.line = end + 1, .col = 0});
struct s8 *lines = (struct s8 *)malloc(sizeof(struct s8) * ntosort);
- struct text_chunk txt =
- text_get_region(buffer->text, region.begin.line, region.begin.col,
- region.end.line, region.end.col);
+
+ struct location bytebeg =
+ buffer_location_to_byte_coords(buffer, region.begin);
+ struct location byteend = buffer_location_to_byte_coords(buffer, region.end);
+ struct text_chunk txt = text_get_region(
+ buffer->text, bytebeg.line, bytebeg.col, byteend.line, byteend.col);
uint32_t line_start = 0;
uint32_t curr_line = 0;
@@ -1278,3 +1343,41 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line,
free(txt.text);
}
}
+
+struct location buffer_location_to_byte_coords(struct buffer *buffer,
+ struct location coords) {
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, coords.line);
+ uint32_t byteoffset = 0, col = 0, tab_width = get_tab_width(buffer);
+ struct codepoint *codepoint;
+
+ /* Let this walk up to (and including the target column) to
+ * make sure we account for zero-width characters when calculating the
+ * byte offset.
+ */
+ while (col <= coords.col &&
+ (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ byteoffset += codepoint->nbytes;
+ col += visual_char_width(codepoint, tab_width);
+ }
+
+ /* Remove the byte-width of the last char again since it gives us the
+ * position right before it while still taking zero-width codepoints
+ * into account.
+ */
+ return (struct location){.line = coords.line,
+ .col = byteoffset -
+ (codepoint != NULL ? codepoint->nbytes : 0)};
+}
+
+struct match_result
+buffer_find_prev_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c)) {
+ return find_prev_in_line(buffer, start, predicate);
+}
+
+struct match_result
+buffer_find_next_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c)) {
+ return find_next_in_line(buffer, start, predicate);
+}
diff --git a/src/dged/buffer.h b/src/dged/buffer.h
index cd5bd95..c9fe2ca 100644
--- a/src/dged/buffer.h
+++ b/src/dged/buffer.h
@@ -295,13 +295,13 @@ struct location buffer_end(struct buffer *buffer);
uint32_t buffer_num_lines(struct buffer *buffer);
/**
- * Get the number of chars in a given line in buffer.
+ * Get the line length in number of column positions.
*
* @param [in] buffer The buffer to use.
- * @param [in] line The line to get number of chars for.
- * @returns The number of chars in @ref line.
+ * @param [in] line The line to get number of columns for.
+ * @returns The number of column positions in the current line.
*/
-uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line);
+uint32_t buffer_line_length(struct buffer *buffer, uint32_t line);
/**
* Insert a newline in the buffer.
@@ -555,6 +555,13 @@ uint32_t buffer_add_reload_hook(struct buffer *buffer, reload_hook_cb callback,
void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id,
remove_hook_cb callback);
+struct edit_location {
+ struct region coordinates;
+ struct region bytes;
+ uint64_t global_byte_begin;
+ uint64_t global_byte_end;
+};
+
/**
* Buffer insert hook callback function.
*
@@ -565,9 +572,8 @@ void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id,
* @param end_idx The global byte offset to the end of where text was inserted.
* @param userdata The userdata as sent in to @ref buffer_add_insert_hook.
*/
-typedef void (*insert_hook_cb)(struct buffer *buffer, struct region inserted,
- uint32_t begin_idx, uint32_t end_idx,
- void *userdata);
+typedef void (*insert_hook_cb)(struct buffer *buffer,
+ struct edit_location inserted, void *userdata);
/**
* Add an insert hook, called when text is inserted into the @p buffer.
@@ -600,9 +606,8 @@ void buffer_remove_insert_hook(struct buffer *buffer, uint32_t hook_id,
* @param end_idx The global byte offset to the end of the removed text.
* @param userdata The userdata as sent in to @ref buffer_add_delete_hook.
*/
-typedef void (*delete_hook_cb)(struct buffer *buffer, struct region removed,
- uint32_t begin_idx, uint32_t end_idx,
- void *userdata);
+typedef void (*delete_hook_cb)(struct buffer *buffer,
+ struct edit_location removed, void *userdata);
/**
* Add a delete hook, called when text is removed from the @p buffer.
@@ -724,10 +729,6 @@ void buffer_update(struct buffer *buffer, struct buffer_update_params *params);
*/
void buffer_render(struct buffer *buffer, struct buffer_render_params *params);
-// TODO: move this to where it makes sense
-uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col,
- uint32_t end_col);
-
/**
* Sort lines in a buffer alphabetically.
*
@@ -738,4 +739,19 @@ uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col,
void buffer_sort_lines(struct buffer *buffer, uint32_t start_line,
uint32_t end_line);
+struct location buffer_location_to_byte_coords(struct buffer *buffer,
+ struct location coords);
+
+struct match_result {
+ struct location at;
+ bool found;
+};
+
+struct match_result
+buffer_find_prev_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c));
+struct match_result
+buffer_find_next_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c));
+
#endif
diff --git a/src/dged/buffer_view.c b/src/dged/buffer_view.c
index 4e67d78..f3dd2b9 100644
--- a/src/dged/buffer_view.c
+++ b/src/dged/buffer_view.c
@@ -128,7 +128,7 @@ void buffer_view_backward_nlines(struct buffer_view *view, uint32_t nlines) {
}
void buffer_view_goto_end_of_line(struct buffer_view *view) {
- view->dot.col = buffer_num_chars(view->buffer, view->dot.line);
+ view->dot.col = buffer_line_length(view->buffer, view->dot.line);
}
void buffer_view_goto_beginning_of_line(struct buffer_view *view) {
@@ -224,15 +224,22 @@ void buffer_view_delete_word(struct buffer_view *view) {
}
void buffer_view_kill_line(struct buffer_view *view) {
- uint32_t nchars =
- buffer_num_chars(view->buffer, view->dot.line) - view->dot.col;
- if (nchars == 0) {
- nchars = 1;
+ uint32_t ncols =
+ buffer_line_length(view->buffer, view->dot.line) - view->dot.col;
+
+ uint32_t line = view->dot.line;
+ uint32_t col = view->dot.col + ncols;
+
+ // kill the newline if we are at the end of the line
+ if (ncols == 0) {
+ struct location loc = buffer_next_char(view->buffer, view->dot);
+ line = loc.line;
+ col = loc.col;
}
struct region reg = region_new(view->dot, (struct location){
- .line = view->dot.line,
- .col = view->dot.col + nchars,
+ .line = line,
+ .col = col,
});
buffer_cut(view->buffer, reg);
@@ -241,7 +248,8 @@ void buffer_view_kill_line(struct buffer_view *view) {
void buffer_view_sort_lines(struct buffer_view *view) {
struct region reg = region_new(view->dot, view->mark);
if (view->mark_set && region_has_size(reg)) {
- if (reg.end.line > 0 && buffer_num_chars(view->buffer, reg.end.line) == 0) {
+ if (reg.end.line > 0 &&
+ buffer_line_length(view->buffer, reg.end.line) == 0) {
reg.end.line -= 1;
}
@@ -271,21 +279,7 @@ struct location buffer_view_dot_to_relative(struct buffer_view *view) {
}
struct location buffer_view_dot_to_visual(struct buffer_view *view) {
- // calculate visual column index for dot column
- struct text_chunk c = buffer_line(view->buffer, view->dot.line);
- uint32_t width = visual_string_width(c.text, c.nbytes, 0, view->dot.col);
- if (view->scroll.col > 0) {
- width -= visual_string_width(c.text, c.nbytes, 0, view->scroll.col);
- }
-
- struct location l = buffer_view_dot_to_relative(view);
- l.col = width + view->fringe_width;
-
- if (c.allocated) {
- free(c.text);
- }
-
- return l;
+ return buffer_view_dot_to_relative(view);
}
void buffer_view_undo(struct buffer_view *view) {
diff --git a/src/dged/display.c b/src/dged/display.c
index bc604f0..ea3f459 100644
--- a/src/dged/display.c
+++ b/src/dged/display.c
@@ -60,7 +60,7 @@ struct push_fmt_cmd {
struct repeat_cmd {
uint32_t col;
uint32_t row;
- int32_t c;
+ uint32_t c;
uint32_t nrepeat;
};
@@ -135,21 +135,7 @@ void display_destroy(struct display *display) {
uint32_t display_width(struct display *display) { return display->width; }
uint32_t display_height(struct display *display) { return display->height; }
-void putch(uint8_t c) {
- // TODO: move this to buffer rendering
- if (c < ' ') {
- fprintf(stdout, "^%c", c + 0x40);
- } else if (c == 0x7f) {
- fprintf(stdout, "^?");
- } else if (utf8_byte_is_unicode_start(c) ||
- utf8_byte_is_unicode_continuation(c)) {
- putc(c, stdout);
- } else if (c >= ' ' && c < 0x7f) {
- putc(c, stdout);
- } else {
- fprintf(stdout, "|0x%02x|", c);
- }
-}
+void putch(uint8_t c) { putc(c, stdout); }
static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) {
if (fmt_stack == NULL || fmt_stack_len == 0) {
@@ -164,6 +150,7 @@ static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) {
void putch_ws(uint8_t c, bool show_whitespace, uint8_t *fmt_stack,
uint32_t fmt_stack_len) {
+ // TODO: tab width needs to be sent here
if (show_whitespace && c == '\t') {
fputs("\x1b[90m → \x1b[39m", stdout);
apply_fmt(fmt_stack, fmt_stack_len);
@@ -295,7 +282,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col,
}
void command_list_draw_repeated(struct command_list *list, uint32_t col,
- uint32_t row, int32_t c, uint32_t nrepeat) {
+ uint32_t row, uint32_t c, uint32_t nrepeat) {
struct repeat_cmd *cmd = add_command(list, RenderCommand_Repeat)->repeat;
cmd->col = col;
cmd->row = row;
@@ -401,10 +388,14 @@ void display_render(struct display *display,
display_move_cursor(display, repeat_cmd->row + cl->yoffset,
repeat_cmd->col + cl->xoffset);
apply_fmt(fmt_stack, fmt_stack_len);
- uint32_t nbytes = utf8_nbytes((uint8_t *)&repeat_cmd->c, 4, 1);
- for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) {
- putbytes((uint8_t *)&repeat_cmd->c, nbytes, show_whitespace_state,
- fmt_stack, fmt_stack_len);
+ struct utf8_codepoint_iterator iter =
+ create_utf8_codepoint_iterator((uint8_t *)&repeat_cmd->c, 4, 0);
+ struct codepoint *codepoint = utf8_next_codepoint(&iter);
+ if (codepoint != NULL) {
+ for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) {
+ putbytes((uint8_t *)&repeat_cmd->c, codepoint->nbytes,
+ show_whitespace_state, fmt_stack, fmt_stack_len);
+ }
}
break;
}
diff --git a/src/dged/display.h b/src/dged/display.h
index 0fda30d..f9c7ef8 100644
--- a/src/dged/display.h
+++ b/src/dged/display.h
@@ -238,7 +238,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col,
* @param nrepeat Number of times to repeat byte.
*/
void command_list_draw_repeated(struct command_list *list, uint32_t col,
- uint32_t row, int32_t c, uint32_t nrepeat);
+ uint32_t row, uint32_t c, uint32_t nrepeat);
void command_list_draw_command_list(struct command_list *list,
struct command_list *to_draw);
diff --git a/src/dged/keyboard.c b/src/dged/keyboard.c
index 26eb308..04565e0 100644
--- a/src/dged/keyboard.c
+++ b/src/dged/keyboard.c
@@ -78,20 +78,24 @@ void parse_keys(uint8_t *bytes, uint32_t nbytes, struct key *out_keys,
} else if (utf8_byte_is_unicode_continuation(b)) {
// do nothing for these
} else { // ascii char or unicode start byte (self-inserting)
- uint32_t nb = utf8_byte_is_unicode_start(b)
- ? utf8_nbytes(bytes + bytei, nbytes - bytei, 1)
- : 1;
-
- // "compress" number of keys if previous key was also a
- // "simple" key
- if (prev_kp != NULL && prev_kp->mod == None) {
- prev_kp->end += nb;
- } else {
- kp->mod = None;
- kp->key = b;
- kp->start = bytei;
- kp->end = bytei + nb;
- ++nkps;
+ // TODO: do this better
+ struct utf8_codepoint_iterator iter =
+ create_utf8_codepoint_iterator(bytes + bytei, nbytes - bytei, 0);
+ struct codepoint *codepoint = utf8_next_codepoint(&iter);
+ if (codepoint != NULL) {
+ uint32_t nb = codepoint->nbytes;
+
+ // "compress" number of keys if previous key was also a
+ // "simple" key
+ if (prev_kp != NULL && prev_kp->mod == None) {
+ prev_kp->end += nb;
+ } else {
+ kp->mod = None;
+ kp->key = b;
+ kp->start = bytei;
+ kp->end = bytei + nb;
+ ++nkps;
+ }
}
}
}
diff --git a/src/dged/syntax.c b/src/dged/syntax.c
index 8d0fd1a..569dc70 100644
--- a/src/dged/syntax.c
+++ b/src/dged/syntax.c
@@ -342,7 +342,8 @@ static void update_parser(struct buffer *buffer, void *userdata,
: origin.line + height;
ts_query_cursor_set_point_range(
cursor, (TSPoint){.row = origin.line, .column = origin.col},
- (TSPoint){.row = end_line, .column = buffer_num_chars(buffer, end_line)});
+ (TSPoint){.row = end_line,
+ .column = buffer_line_length(buffer, end_line)});
ts_query_cursor_exec(cursor, h->query, ts_tree_root_node(h->tree));
TSQueryMatch match;
@@ -406,47 +407,39 @@ static void update_parser(struct buffer *buffer, void *userdata,
continue;
}
- buffer_add_text_property(
- buffer,
- (struct location){.line = start.row,
- .col = text_byteindex_to_col(
- buffer->text, start.row, start.column)},
- (struct location){.line = end.row,
- .col = text_byteindex_to_col(buffer->text, end.row,
- end.column - 1)},
- (struct text_property){
- .type = TextProperty_Colors,
- .colors =
- (struct text_property_colors){
- .set_fg = true,
- .fg = color,
- },
- });
+ text_add_property(buffer->text, start.row, start.column, end.row,
+ end.column > 0 ? end.column - 1 : 0,
+ (struct text_property){
+ .type = TextProperty_Colors,
+ .colors =
+ (struct text_property_colors){
+ .set_fg = true,
+ .fg = color,
+ },
+ });
}
}
ts_query_cursor_delete(cursor);
}
-static void text_removed(struct buffer *buffer, struct region removed,
- uint32_t begin_idx, uint32_t end_idx, void *userdata) {
+static void text_removed(struct buffer *buffer, struct edit_location removed,
+ void *userdata) {
struct highlight *h = (struct highlight *)userdata;
- TSPoint begin = {.row = removed.begin.line,
- .column = text_col_to_byteindex(
- buffer->text, removed.begin.line, removed.begin.col)};
+ TSPoint begin = {.row = removed.bytes.begin.line,
+ .column = removed.bytes.begin.col};
TSPoint new_end = begin;
- TSPoint old_end = {.row = removed.end.line,
- .column = text_col_to_byteindex(
- buffer->text, removed.end.line, removed.end.col)};
+ TSPoint old_end = {.row = removed.bytes.end.line,
+ .column = removed.bytes.end.col};
TSInputEdit edit = {
.start_point = begin,
.old_end_point = old_end,
.new_end_point = new_end,
- .start_byte = begin_idx,
- .old_end_byte = end_idx,
- .new_end_byte = begin_idx,
+ .start_byte = removed.global_byte_begin,
+ .old_end_byte = removed.global_byte_end,
+ .new_end_byte = removed.global_byte_begin,
};
ts_tree_edit(h->tree, &edit);
@@ -479,27 +472,24 @@ static void buffer_reloaded(struct buffer *buffer, void *userdata) {
}
}
-static void text_inserted(struct buffer *buffer, struct region inserted,
- uint32_t begin_idx, uint32_t end_idx,
+static void text_inserted(struct buffer *buffer, struct edit_location inserted,
void *userdata) {
struct timer *text_inserted = timer_start("syntax.txt-inserted");
struct highlight *h = (struct highlight *)userdata;
- TSPoint begin = {.row = inserted.begin.line,
- .column = text_col_to_byteindex(
- buffer->text, inserted.begin.line, inserted.begin.col)};
+ TSPoint begin = {.row = inserted.bytes.begin.line,
+ .column = inserted.bytes.begin.col};
TSPoint old_end = begin;
- TSPoint new_end = {.row = inserted.end.line,
- .column = text_col_to_byteindex(
- buffer->text, inserted.end.line, inserted.end.col)};
+ TSPoint new_end = {.row = inserted.bytes.end.line,
+ .column = inserted.bytes.end.col};
TSInputEdit edit = {
.start_point = begin,
.old_end_point = old_end,
.new_end_point = new_end,
- .start_byte = begin_idx,
- .old_end_byte = begin_idx,
- .new_end_byte = end_idx,
+ .start_byte = inserted.global_byte_begin,
+ .old_end_byte = inserted.global_byte_begin,
+ .new_end_byte = inserted.global_byte_end,
};
ts_tree_edit(h->tree, &edit);
diff --git a/src/dged/text.c b/src/dged/text.c
index 3d1078f..18ab04f 100644
--- a/src/dged/text.c
+++ b/src/dged/text.c
@@ -18,7 +18,6 @@ struct line {
uint8_t *data;
uint8_t flags;
uint32_t nbytes;
- uint32_t nchars;
};
struct text_property_entry {
@@ -54,11 +53,9 @@ void text_destroy(struct text *text) {
text->lines[li].data = NULL;
text->lines[li].flags = 0;
text->lines[li].nbytes = 0;
- text->lines[li].nchars = 0;
}
free(text->lines);
-
free(text);
}
@@ -68,68 +65,25 @@ void text_clear(struct text *text) {
text->lines[li].data = NULL;
text->lines[li].flags = 0;
text->lines[li].nbytes = 0;
- text->lines[li].nchars = 0;
}
text->nlines = 0;
text_clear_properties(text);
}
-// given `char_idx` as a character index, return the byte index
-uint32_t charidx_to_byteidx(struct line *line, uint32_t char_idx) {
- if (line->nchars == 0) {
- return 0;
- }
-
- if (char_idx > line->nchars) {
- return line->nbytes - 1;
- }
-
- return utf8_nbytes(line->data, line->nbytes, char_idx);
-}
-
-uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col) {
- return charidx_to_byteidx(&text->lines[line], col);
-}
-
-// given `byte_idx` as a byte index, return the character index
-uint32_t byteidx_to_charidx(struct line *line, uint32_t byte_idx) {
- if (byte_idx > line->nbytes) {
- return line->nchars;
+struct utf8_codepoint_iterator
+text_line_codepoint_iterator(const struct text *text, uint32_t lineidx) {
+ if (lineidx >= text_num_lines(text)) {
+ return create_utf8_codepoint_iterator(NULL, 0, 0);
}
- return utf8_nchars(line->data, byte_idx);
+ return create_utf8_codepoint_iterator(text->lines[lineidx].data,
+ text->lines[lineidx].nbytes, 0);
}
-uint32_t text_byteindex_to_col(struct text *text, uint32_t line,
- uint32_t byteindex) {
- return byteidx_to_charidx(&text->lines[line], byteindex);
-}
-
-uint32_t text_global_idx(struct text *text, uint32_t line, uint32_t col) {
- uint32_t byteoff = 0;
- uint32_t nlines = text_num_lines(text);
-
- if (nlines == 0) {
- return 0;
- }
-
- for (uint32_t l = 0; l < line && l < nlines; ++l) {
- // +1 for newline
- byteoff += text_line_size(text, l) + 1;
- }
-
- uint32_t l = line < nlines ? line : nlines - 1;
- uint32_t nchars = text_line_length(text, l);
- uint32_t c = col < nchars ? col : nchars;
- byteoff += text_col_to_byteindex(text, l, c);
-
- if (col > nchars) {
- // account for newline
- ++byteoff;
- }
-
- return byteoff;
+struct utf8_codepoint_iterator
+text_chunk_codepoint_iterator(const struct text_chunk *chunk) {
+ return create_utf8_codepoint_iterator(chunk->text, chunk->nbytes, 0);
}
void append_empty_lines(struct text *text, uint32_t numlines) {
@@ -145,17 +99,10 @@ void append_empty_lines(struct text *text, uint32_t numlines) {
struct line *nline = &text->lines[text->nlines];
nline->data = NULL;
nline->nbytes = 0;
- nline->nchars = 0;
nline->flags = 0;
++text->nlines;
}
-
- if (text->nlines > text->capacity) {
- printf("text->nlines: %d, text->capacity: %d\n", text->nlines,
- text->capacity);
- raise(SIGTRAP);
- }
}
void ensure_line(struct text *text, uint32_t line) {
@@ -166,8 +113,8 @@ void ensure_line(struct text *text, uint32_t line) {
// It is assumed that `data` does not contain any \n, that is handled by
// higher-level functions
-void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data,
- uint32_t len, uint32_t nchars) {
+static void insert_at(struct text *text, uint32_t line, uint32_t offset,
+ uint8_t *data, uint32_t len) {
if (len == 0) {
return;
@@ -178,11 +125,10 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data,
struct line *l = &text->lines[line];
l->nbytes += len;
- l->nchars += nchars;
l->flags = LineChanged;
l->data = realloc(l->data, l->nbytes);
- uint32_t bytei = charidx_to_byteidx(l, col);
+ uint32_t bytei = offset;
// move following bytes out of the way
if (bytei + len < l->nbytes) {
@@ -194,15 +140,7 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data,
memcpy(l->data + bytei, data, len);
}
-uint32_t text_line_length(struct text *text, uint32_t lineidx) {
- if (lineidx >= text_num_lines(text)) {
- return 0;
- }
-
- return text->lines[lineidx].nchars;
-}
-
-uint32_t text_line_size(struct text *text, uint32_t lineidx) {
+uint32_t text_line_size(const struct text *text, uint32_t lineidx) {
if (lineidx >= text_num_lines(text)) {
return 0;
}
@@ -210,20 +148,19 @@ uint32_t text_line_size(struct text *text, uint32_t lineidx) {
return text->lines[lineidx].nbytes;
}
-uint32_t text_num_lines(struct text *text) { return text->nlines; }
+uint32_t text_num_lines(const struct text *text) { return text->nlines; }
+
+static void split_line(struct text *text, uint32_t offset, uint32_t lineidx,
+ uint32_t newlineidx) {
+ struct line *line = &text->lines[lineidx];
+ struct line *next = &text->lines[newlineidx];
-void split_line(uint32_t col, struct line *line, struct line *next) {
uint8_t *data = line->data;
uint32_t nbytes = line->nbytes;
- uint32_t nchars = line->nchars;
-
- uint32_t chari = col;
- uint32_t bytei = charidx_to_byteidx(line, chari);
+ uint32_t bytei = offset;
line->nbytes = bytei;
- line->nchars = chari;
next->nbytes = nbytes - bytei;
- next->nchars = nchars - chari;
line->flags = next->flags = line->flags;
next->data = NULL;
@@ -260,7 +197,7 @@ void shift_lines(struct text *text, uint32_t start, int32_t direction) {
memmove(dest, src, nlines * sizeof(struct line));
}
-void new_line_at(struct text *text, uint32_t line, uint32_t col) {
+void new_line_at(struct text *text, uint32_t line, uint32_t offset) {
ensure_line(text, line);
uint32_t newline = line + 1;
@@ -274,7 +211,7 @@ void new_line_at(struct text *text, uint32_t line, uint32_t col) {
}
// split line if needed
- split_line(col, &text->lines[line], &text->lines[newline]);
+ split_line(text, offset, line, newline);
}
void delete_line(struct text *text, uint32_t line) {
@@ -294,29 +231,25 @@ void delete_line(struct text *text, uint32_t line) {
--text->nlines;
text->lines[text->nlines].data = NULL;
text->lines[text->nlines].nbytes = 0;
- text->lines[text->nlines].nchars = 0;
}
-void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col,
- uint8_t *bytes, uint32_t nbytes,
- uint32_t *lines_added, uint32_t *cols_added) {
+static void text_insert_at_inner(struct text *text, uint32_t line,
+ uint32_t offset, uint8_t *bytes,
+ uint32_t nbytes, uint32_t *lines_added) {
uint32_t linelen = 0, start_line = line;
- *cols_added = 0;
for (uint32_t bytei = 0; bytei < nbytes; ++bytei) {
uint8_t byte = bytes[bytei];
if (byte == '\n') {
uint8_t *line_data = bytes + (bytei - linelen);
- uint32_t nchars = utf8_nchars(line_data, linelen);
+ insert_at(text, line, offset, line_data, linelen);
- insert_at(text, line, col, line_data, linelen, nchars);
-
- col += nchars;
- new_line_at(text, line, col);
+ offset += linelen;
+ new_line_at(text, line, offset);
++line;
linelen = 0;
- col = 0;
+ offset = 0;
} else {
++linelen;
}
@@ -325,30 +258,26 @@ void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col,
// handle remaining
if (linelen > 0) {
uint8_t *line_data = bytes + (nbytes - linelen);
- uint32_t nchars = utf8_nchars(line_data, linelen);
- insert_at(text, line, col, line_data, linelen, nchars);
- *cols_added = nchars;
+ insert_at(text, line, offset, line_data, linelen);
}
*lines_added = line - start_line;
}
void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes,
- uint32_t *lines_added, uint32_t *cols_added) {
+ uint32_t *lines_added) {
uint32_t line = text->nlines > 0 ? text->nlines - 1 : 0;
- uint32_t col = text_line_length(text, line);
-
- text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added);
+ uint32_t offset = text_line_size(text, line);
+ text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added);
}
-void text_insert_at(struct text *text, uint32_t line, uint32_t col,
- uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added,
- uint32_t *cols_added) {
- text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added);
+void text_insert_at(struct text *text, uint32_t line, uint32_t offset,
+ uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added) {
+ text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added);
}
-void text_delete(struct text *text, uint32_t start_line, uint32_t start_col,
- uint32_t end_line, uint32_t end_col) {
+void text_delete(struct text *text, uint32_t start_line, uint32_t start_offset,
+ uint32_t end_line, uint32_t end_offset) {
if (text->nlines == 0) {
return;
@@ -362,45 +291,44 @@ void text_delete(struct text *text, uint32_t start_line, uint32_t start_col,
if (end_line > maxline) {
end_line = maxline;
- end_col = text->lines[end_line].nchars;
+ end_offset = text_line_size(text, end_line);
}
struct line *firstline = &text->lines[start_line];
struct line *lastline = &text->lines[end_line];
// clamp column
- if (start_col > firstline->nchars) {
- start_col = firstline->nchars > 0 ? firstline->nchars - 1 : 0;
+ uint32_t firstline_len = text_line_size(text, start_line);
+ if (start_offset > firstline_len) {
+ start_offset = firstline_len > 0 ? firstline_len - 1 : 0;
}
// handle deletion of newlines
- if (end_col > lastline->nchars) {
+ uint32_t lastline_len = text_line_size(text, end_line);
+ if (end_offset > lastline_len) {
if (end_line + 1 < text->nlines) {
- end_col = 0;
+ end_offset = 0;
++end_line;
lastline = &text->lines[end_line];
} else {
- end_col = lastline->nchars;
+ end_offset = lastline_len;
}
}
- uint32_t bytei = utf8_nbytes(lastline->data, lastline->nbytes, end_col);
+ uint32_t srcbytei = end_offset;
+ uint32_t dstbytei = start_offset;
+ uint32_t ncopy = lastline->nbytes - srcbytei;
if (lastline == firstline) {
// in this case we can "overwrite"
- uint32_t dstbytei =
- utf8_nbytes(firstline->data, firstline->nbytes, start_col);
- memmove(firstline->data + dstbytei, lastline->data + bytei,
- lastline->nbytes - bytei);
+ memmove(firstline->data + dstbytei, lastline->data + srcbytei, ncopy);
} else {
// otherwise we actually have to copy from the last line
- insert_at(text, start_line, start_col, lastline->data + bytei,
- lastline->nbytes - bytei, lastline->nchars - end_col);
+ insert_at(text, start_line, start_offset, lastline->data + srcbytei, ncopy);
}
- firstline->nchars = start_col + (lastline->nchars - end_col);
- firstline->nbytes =
- utf8_nbytes(firstline->data, firstline->nbytes, start_col) +
- (lastline->nbytes - bytei);
+ // new byte count is whatever we had before (left of dstbytei)
+ // plus what we copied
+ firstline->nbytes = dstbytei + ncopy;
// delete full lines, backwards to not shift old, crappy data upwards
for (uint32_t linei = end_line >= text->nlines ? end_line - 1 : end_line;
@@ -429,7 +357,6 @@ void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines,
.allocated = false,
.text = src_line->data,
.nbytes = src_line->nbytes,
- .nchars = src_line->nchars,
.line = li,
};
callback(&line, userdata);
@@ -441,8 +368,8 @@ struct text_chunk text_get_line(struct text *text, uint32_t line) {
return (struct text_chunk){
.text = src_line->data,
.nbytes = src_line->nbytes,
- .nchars = src_line->nchars,
.line = line,
+ .allocated = false,
};
}
@@ -453,33 +380,34 @@ struct copy_cmd {
};
struct text_chunk text_get_region(struct text *text, uint32_t start_line,
- uint32_t start_col, uint32_t end_line,
- uint32_t end_col) {
- if (start_line == end_line && start_col == end_col) {
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset) {
+ if (start_line == end_line && start_offset == end_offset) {
return (struct text_chunk){0};
}
struct line *first_line = &text->lines[start_line];
struct line *last_line = &text->lines[end_line];
+ uint32_t first_line_len = first_line->nbytes;
+ uint32_t last_line_len = last_line->nbytes;
- if (start_col > first_line->nchars) {
+ if (start_offset > first_line_len) {
return (struct text_chunk){0};
}
// handle copying of newlines
- if (end_col > last_line->nchars) {
+ if (end_offset > last_line_len) {
++end_line;
- end_col = 0;
+ end_offset = 0;
last_line = &text->lines[end_line];
}
uint32_t nlines = end_line - start_line + 1;
struct copy_cmd *copy_cmds = calloc(nlines, sizeof(struct copy_cmd));
- uint32_t total_chars = 0, total_bytes = 0;
+ uint32_t total_bytes = 0;
for (uint32_t line = start_line; line <= end_line; ++line) {
struct line *l = &text->lines[line];
- total_chars += l->nchars;
total_bytes += l->nbytes;
struct copy_cmd *cmd = &copy_cmds[line - start_line];
@@ -490,19 +418,14 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line,
// correct first line
struct copy_cmd *cmd_first = &copy_cmds[0];
- uint32_t byteoff =
- utf8_nbytes(first_line->data, first_line->nbytes, start_col);
- cmd_first->byteoffset += byteoff;
- cmd_first->nbytes -= byteoff;
- total_bytes -= byteoff;
- total_chars -= start_col;
+ cmd_first->byteoffset += start_offset;
+ cmd_first->nbytes -= start_offset;
+ total_bytes -= start_offset;
// correct last line
struct copy_cmd *cmd_last = &copy_cmds[nlines - 1];
- uint32_t byteindex = utf8_nbytes(last_line->data, last_line->nbytes, end_col);
- cmd_last->nbytes -= (last_line->nbytes - byteindex);
- total_bytes -= (last_line->nbytes - byteindex);
- total_chars -= (last_line->nchars - end_col);
+ cmd_last->nbytes -= (last_line->nbytes - end_offset);
+ total_bytes -= (last_line->nbytes - end_offset);
uint8_t *data = (uint8_t *)malloc(
total_bytes + /* nr of newline chars */ (end_line - start_line));
@@ -518,7 +441,6 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line,
data[curr] = '\n';
++curr;
++total_bytes;
- ++total_chars;
}
}
@@ -527,28 +449,25 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line,
.text = data,
.line = 0,
.nbytes = total_bytes,
- .nchars = total_chars,
.allocated = true,
};
}
-bool text_line_contains_unicode(struct text *text, uint32_t line) {
- return text->lines[line].nbytes != text->lines[line].nchars;
-}
-
-void text_add_property(struct text *text, struct location start,
- struct location end, struct text_property property) {
+void text_add_property(struct text *text, uint32_t start_line,
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset, struct text_property property) {
struct text_property_entry entry = {
- .start = start,
- .end = end,
+ .start = (struct location){.line = start_line, .col = start_offset},
+ .end = (struct location){.line = end_line, .col = end_offset},
.property = property,
};
VEC_PUSH(&text->properties, entry);
}
-void text_get_properties(struct text *text, struct location location,
+void text_get_properties(struct text *text, uint32_t line, uint32_t offset,
struct text_property **properties,
uint32_t max_nproperties, uint32_t *nproperties) {
+ struct location location = {.line = line, .col = offset};
uint32_t nres = 0;
VEC_FOR_EACH(&text->properties, struct text_property_entry * prop) {
if (location_is_between(location, prop->start, prop->end)) {
diff --git a/src/dged/text.h b/src/dged/text.h
index 8b49ef4..28bd325 100644
--- a/src/dged/text.h
+++ b/src/dged/text.h
@@ -6,9 +6,16 @@
#include <stdint.h>
#include "location.h"
+#include "utf8.h"
struct text;
-struct render_command;
+
+struct text_chunk {
+ uint8_t *text;
+ uint32_t nbytes;
+ uint32_t line;
+ bool allocated;
+};
struct text *text_create(uint32_t initial_capacity);
void text_destroy(struct text *text);
@@ -18,31 +25,21 @@ void text_destroy(struct text *text);
*/
void text_clear(struct text *text);
-void text_insert_at(struct text *text, uint32_t line, uint32_t col,
- uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added,
- uint32_t *cols_added);
+void text_insert_at(struct text *text, uint32_t line, uint32_t offset,
+ uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added);
void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes,
- uint32_t *lines_added, uint32_t *cols_added);
+ uint32_t *lines_added);
-void text_delete(struct text *text, uint32_t start_line, uint32_t start_col,
- uint32_t end_line, uint32_t end_col);
+void text_delete(struct text *text, uint32_t start_line, uint32_t start_offset,
+ uint32_t end_line, uint32_t end_offset);
-uint32_t text_num_lines(struct text *text);
-uint32_t text_line_length(struct text *text, uint32_t lineidx);
-uint32_t text_line_size(struct text *text, uint32_t lineidx);
-uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col);
-uint32_t text_byteindex_to_col(struct text *text, uint32_t line,
- uint32_t byteindex);
-uint32_t text_global_idx(struct text *text, uint32_t line, uint32_t col);
-
-struct text_chunk {
- uint8_t *text;
- uint32_t nbytes;
- uint32_t nchars;
- uint32_t line;
- bool allocated;
-};
+uint32_t text_num_lines(const struct text *text);
+uint32_t text_line_size(const struct text *text, uint32_t lineidx);
+struct utf8_codepoint_iterator
+text_line_codepoint_iterator(const struct text *text, uint32_t lineidx);
+struct utf8_codepoint_iterator
+text_chunk_codepoint_iterator(const struct text_chunk *chunk);
typedef void (*chunk_cb)(struct text_chunk *chunk, void *userdata);
void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines,
@@ -52,10 +49,8 @@ void text_for_each_chunk(struct text *text, chunk_cb callback, void *userdata);
struct text_chunk text_get_line(struct text *text, uint32_t line);
struct text_chunk text_get_region(struct text *text, uint32_t start_line,
- uint32_t start_col, uint32_t end_line,
- uint32_t end_col);
-
-bool text_line_contains_unicode(struct text *text, uint32_t line);
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset);
enum text_property_type {
TextProperty_Colors,
@@ -77,10 +72,11 @@ struct text_property {
};
};
-void text_add_property(struct text *text, struct location start,
- struct location end, struct text_property property);
+void text_add_property(struct text *text, uint32_t start_line,
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset, struct text_property property);
-void text_get_properties(struct text *text, struct location location,
+void text_get_properties(struct text *text, uint32_t line, uint32_t offset,
struct text_property **properties,
uint32_t max_nproperties, uint32_t *nproperties);
diff --git a/src/dged/utf8.c b/src/dged/utf8.c
index 52de2da..ede4fb1 100644
--- a/src/dged/utf8.c
+++ b/src/dged/utf8.c
@@ -1,5 +1,6 @@
#include "utf8.h"
+#include <assert.h>
#include <stdio.h>
#include <wchar.h>
@@ -10,76 +11,125 @@ bool utf8_byte_is_unicode_continuation(uint8_t byte) {
bool utf8_byte_is_unicode(uint8_t byte) { return (byte & 0x80) != 0x0; }
bool utf8_byte_is_ascii(uint8_t byte) { return !utf8_byte_is_unicode(byte); }
-uint32_t utf8_nbytes_in_char(uint8_t byte) {
- // length of char is the number of leading ones
- // flip it and count number of leading zeros
- uint8_t invb = ~byte;
- return __builtin_clz((uint32_t)invb) - 24;
+enum utf8_state {
+ Utf8_Accept = 0,
+ Utf8_Reject = 1,
+};
+
+// clang-format off
+static const uint8_t utf8d[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+ 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+ 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+ 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+ 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+ 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+ 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+// clang-format on
+
+/*
+ * emoji decoding algorithm from
+ * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+ */
+static enum utf8_state decode(enum utf8_state *state, uint32_t *codep,
+ uint32_t byte) {
+ uint32_t type = utf8d[byte];
+
+ *codep = (*state != Utf8_Accept) ? (byte & 0x3fu) | (*codep << 6)
+ : (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state * 16 + type];
+ return *state;
+}
+
+static struct codepoint next_utf8_codepoint(uint8_t *bytes, uint64_t nbytes) {
+ uint32_t codepoint = 0;
+ enum utf8_state state = Utf8_Accept;
+ uint32_t bi = 0;
+ while (bi < nbytes) {
+ enum utf8_state res = decode(&state, &codepoint, bytes[bi]);
+ ++bi;
+
+ if (res == Utf8_Accept || res == Utf8_Reject) {
+ break;
+ }
+ }
+
+ if (state == Utf8_Reject) {
+ codepoint = 0xfffd;
+ }
+
+ return (struct codepoint){.codepoint = codepoint, .nbytes = bi};
}
-// TODO: grapheme clusters, this returns the number of unicode code points
+struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter) {
+ if (iter->offset >= iter->nbytes) {
+ return NULL;
+ }
+
+ iter->current = next_utf8_codepoint(iter->data + iter->offset,
+ iter->nbytes - iter->offset);
+ iter->offset += iter->current.nbytes;
+ return &iter->current;
+}
+
+struct utf8_codepoint_iterator
+create_utf8_codepoint_iterator(uint8_t *data, uint64_t len,
+ uint64_t initial_offset) {
+ return (struct utf8_codepoint_iterator){
+ .data = data,
+ .nbytes = len,
+ .offset = initial_offset,
+ };
+}
+
+/* TODO: grapheme clusters and other classification, this
+ * returns the number of unicode code points
+ */
uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes) {
+ uint32_t bi = 0;
uint32_t nchars = 0;
- uint32_t expected = 0;
- for (uint32_t bi = 0; bi < nbytes; ++bi) {
- uint8_t byte = bytes[bi];
- if (utf8_byte_is_unicode(byte)) {
- if (utf8_byte_is_unicode_start(byte)) {
- expected = utf8_nbytes_in_char(byte) - 1;
- } else { // continuation byte
- --expected;
- if (expected == 0) {
- ++nchars;
- }
- }
- } else { // ascii
- ++nchars;
- }
+ while (bi < nbytes) {
+ struct codepoint codepoint = next_utf8_codepoint(bytes + bi, nbytes - bi);
+ ++nchars;
+ bi += codepoint.nbytes;
}
+
return nchars;
}
-// TODO: grapheme clusters, this uses the number of unicode code points
+/* TODO: grapheme clusters and other classification, this
+ * returns the number of unicode code points
+ */
uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars) {
-
uint32_t bi = 0;
uint32_t chars = 0;
uint32_t expected = 0;
while (chars < nchars && bi < nbytes) {
- uint8_t byte = bytes[bi];
- if (utf8_byte_is_unicode(byte)) {
- if (utf8_byte_is_unicode_start(byte)) {
- expected = utf8_nbytes_in_char(byte) - 1;
- } else { // continuation char
- --expected;
- if (expected == 0) {
- ++chars;
- }
- }
- } else { // ascii
- ++chars;
- }
-
- ++bi;
+ struct codepoint codepoint = next_utf8_codepoint(bytes + bi, nbytes - bi);
+ bi += codepoint.nbytes;
+ ++chars;
}
+ // TODO: reject invalid?
return bi;
}
-uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len) {
- if (utf8_byte_is_unicode_start(*bytes)) {
- wchar_t wc;
- size_t nbytes = 0;
- if ((nbytes = mbrtowc(&wc, (char *)bytes, len, NULL)) > 0) {
- size_t w = wcwidth(wc);
- return w > 0 ? w : 2;
- } else {
- return 1;
- }
- } else if (utf8_byte_is_unicode_continuation(*bytes)) {
- return 0;
+uint32_t unicode_visual_char_width(const struct codepoint *codepoint) {
+ if (codepoint->nbytes > 0) {
+ // TODO: use unicode classification instead
+ size_t w = wcwidth(codepoint->codepoint);
+ return w >= 0 ? w : 2;
} else {
- return 1;
+ return 0;
}
}
diff --git a/src/dged/utf8.h b/src/dged/utf8.h
index 04aa242..22ce22d 100644
--- a/src/dged/utf8.h
+++ b/src/dged/utf8.h
@@ -1,19 +1,37 @@
+#ifndef _UTF8_H
+#define _UTF8_H
+
#include <stdbool.h>
#include <stdint.h>
+struct codepoint {
+ uint32_t codepoint;
+ uint32_t nbytes;
+};
+
+struct utf8_codepoint_iterator {
+ uint8_t *data;
+ uint64_t nbytes;
+ uint64_t offset;
+ struct codepoint current;
+};
+
+struct utf8_codepoint_iterator
+create_utf8_codepoint_iterator(uint8_t *data, uint64_t len,
+ uint64_t initial_offset);
+struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter);
+
/*!
* \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of
* length `nbytes`, represents
*/
uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes);
-/* Return the number of bytes used to make up the next `nchars` characters */
-uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars);
+uint32_t unicode_visual_char_width(const struct codepoint *codepoint);
-/* true if `byte` is a unicode byte sequence start byte */
bool utf8_byte_is_unicode_start(uint8_t byte);
bool utf8_byte_is_unicode_continuation(uint8_t byte);
-bool utf8_byte_is_ascii(uint8_t byte);
bool utf8_byte_is_unicode(uint8_t byte);
+bool utf8_byte_is_ascii(uint8_t byte);
-uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len);
+#endif