1 files changed, 288 insertions, 185 deletions
diff --git a/src/dged/buffer.c b/src/dged/buffer.c
index 6051f69..1062a47 100644
--- a/src/dged/buffer.c
+++ b/src/dged/buffer.c
@@ -157,6 +157,42 @@ void buffer_static_teardown() {
   }
 }
 
+static uint32_t get_tab_width(struct buffer *buffer) {
+  struct setting *tw = lang_setting(&buffer->lang, "tab-width");
+  if (tw == NULL) {
+    tw = settings_get("editor.tab-width");
+  }
+
+  uint32_t tab_width = 4;
+  if (tw != NULL && tw->value.type == Setting_Number) {
+    tab_width = tw->value.number_value;
+  }
+  return tab_width;
+}
+
+static bool use_tabs(struct buffer *buffer) {
+  struct setting *ut = lang_setting(&buffer->lang, "use-tabs");
+  if (ut == NULL) {
+    ut = settings_get("editor.use-tabs");
+  }
+
+  bool use_tabs = false;
+  if (ut != NULL && ut->value.type == Setting_Bool) {
+    use_tabs = ut->value.bool_value;
+  }
+
+  return use_tabs;
+}
+
+static uint32_t visual_char_width(struct codepoint *codepoint,
+                                  uint32_t tab_width) {
+  if (codepoint->codepoint == '\t') {
+    return tab_width;
+  } else {
+    return unicode_visual_char_width(codepoint);
+  }
+}
+
 static struct buffer create_internal(const char *name, char *filename) {
   struct buffer b = (struct buffer){
       .filename = filename,
@@ -185,7 +221,7 @@ static struct buffer create_internal(const char *name, char *filename) {
 
 static void strip_final_newline(struct buffer *b) {
   uint32_t nlines = text_num_lines(b->text);
-  if (nlines > 0 && text_line_length(b->text, nlines - 1) == 0) {
+  if (nlines > 0 && buffer_line_length(b, nlines - 1) == 0) {
     text_delete(b->text, nlines - 1, 0, nlines - 1, 1);
   }
 }
@@ -207,7 +243,7 @@ static void buffer_read_from_file(struct buffer *b) {
       int bytes = fread(buff, 1, 4096, file);
       if (bytes > 0) {
         uint32_t ignore;
-        text_append(b->text, buff, bytes, &ignore, &ignore);
+        text_append(b->text, buff, bytes, &ignore);
       } else if (bytes == 0) {
         break; // EOF
       } else {
@@ -239,70 +275,66 @@ static void write_line(struct text_chunk *chunk, void *userdata) {
   fputc('\n', file);
 }
 
-static bool is_word_break(uint8_t c) {
+static bool is_word_break(const struct codepoint *codepoint) {
+  uint32_t c = codepoint->codepoint;
   return c == ' ' || c == '.' || c == '(' || c == ')' || c == '[' || c == ']' ||
-         c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':';
+         c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':' ||
+         c == '"';
 }
 
-static bool is_word_char(uint8_t c) { return !is_word_break(c); }
-
-struct match_result {
-  struct location at;
-  bool found;
-};
-
-static struct match_result find_next_in_line(struct buffer *buffer,
-                                             struct location start,
-                                             bool (*predicate)(uint8_t c)) {
-  struct text_chunk line = text_get_line(buffer->text, start.line);
-  bool found = false;
+static bool is_word_char(const struct codepoint *c) {
+  return !is_word_break(c);
+}
 
-  if (line.nbytes == 0) {
+static struct match_result
+find_next_in_line(struct buffer *buffer, struct location start,
+                  bool (*predicate)(const struct codepoint *c)) {
+  if (text_line_size(buffer->text, start.line) == 0) {
     return (struct match_result){.at = start, .found = false};
   }
 
-  uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col);
-  while (bytei < line.nbytes) {
-    if (predicate(line.text[bytei])) {
+  bool found = false;
+  struct utf8_codepoint_iterator iter =
+      text_line_codepoint_iterator(buffer->text, start.line);
+  uint32_t coli = 0, tab_width = get_tab_width(buffer);
+  struct codepoint *codepoint;
+  while ((codepoint = utf8_next_codepoint(&iter)) != NULL) {
+    if (coli >= start.col && predicate(codepoint)) {
       found = true;
       break;
     }
-    ++bytei;
+
+    coli += visual_char_width(codepoint, tab_width);
   }
 
-  uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei);
   return (struct match_result){
-      .at = (struct location){.line = start.line, .col = target_col},
-      .found = found};
+      .at = (struct location){.line = start.line, .col = coli}, .found = found};
 }
 
-static struct match_result find_prev_in_line(struct buffer *buffer,
-                                             struct location start,
-                                             bool (*predicate)(uint8_t c)) {
-  struct text_chunk line = text_get_line(buffer->text, start.line);
-  bool found = false;
+static struct match_result
+find_prev_in_line(struct buffer *buffer, struct location start,
+                  bool (*predicate)(const struct codepoint *c)) {
 
-  if (line.nbytes == 0) {
+  if (text_line_size(buffer->text, start.line) == 0) {
     return (struct match_result){.at = start, .found = false};
   }
 
-  uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col);
-  while (bytei > 0) {
-    if (predicate(line.text[bytei])) {
+  bool found = false;
+  struct utf8_codepoint_iterator iter =
+      text_line_codepoint_iterator(buffer->text, start.line);
+  uint32_t coli = 0, tab_width = get_tab_width(buffer), found_at;
+  struct codepoint *codepoint;
+  while (coli < start.col && (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+    if (predicate(codepoint)) {
       found = true;
-      break;
+      found_at = coli;
     }
-    --bytei;
-  }
-
-  // first byte on line can also be a match
-  if (predicate(line.text[bytei])) {
-    found = true;
+    coli += visual_char_width(codepoint, tab_width);
   }
 
-  uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei);
   return (struct match_result){
-      .at = (struct location){.line = start.line, .col = target_col},
+      .at =
+          (struct location){.line = start.line, .col = found ? found_at : coli},
       .found = found};
 }
 
@@ -315,13 +347,52 @@ static struct text_chunk *copy_region(struct buffer *buffer,
     free(curr->text);
   }
 
+  struct location begin_bytes =
+      buffer_location_to_byte_coords(buffer, region.begin);
+  struct location end_bytes =
+      buffer_location_to_byte_coords(buffer, region.end);
+
   struct text_chunk txt =
-      text_get_region(buffer->text, region.begin.line, region.begin.col,
-                      region.end.line, region.end.col);
+      text_get_region(buffer->text, begin_bytes.line, begin_bytes.col,
+                      end_bytes.line, end_bytes.col);
   *curr = txt;
   return curr;
 }
 
+static struct location do_indent(struct buffer *buffer, struct location at,
+                                 uint32_t tab_width, bool use_tabs) {
+  if (use_tabs) {
+    return buffer_add(buffer, at, (uint8_t *)"\t", 1);
+  } else {
+    return buffer_add(buffer, at, (uint8_t *)"                ",
+                      tab_width > 16 ? 16 : tab_width);
+  }
+}
+
+static uint64_t to_global_offset(struct buffer *buffer,
+                                 struct location bytecoords) {
+  uint32_t line = bytecoords.line;
+  uint32_t col = bytecoords.col;
+  uint32_t byteoff = 0;
+  uint32_t nlines = buffer_num_lines(buffer);
+
+  if (nlines == 0) {
+    return 0;
+  }
+
+  for (uint32_t l = 0; l < line && l < nlines; ++l) {
+    // +1 for newline
+    byteoff += text_line_size(buffer->text, l) + 1;
+  }
+
+  // handle last line
+  uint32_t l = line < nlines ? line : nlines - 1;
+  uint32_t nbytes = text_line_size(buffer->text, l);
+  byteoff += col <= nbytes ? col : nbytes + 1;
+
+  return byteoff;
+}
+
 /* --------------------- buffer methods -------------------- */
 
 struct buffer buffer_create(const char *name) {
@@ -452,18 +523,29 @@ struct location buffer_add(struct buffer *buffer, struct location at,
   struct location initial = at;
   struct location final = at;
 
-  uint32_t lines_added, cols_added;
-  text_insert_at(buffer->text, initial.line, initial.col, text, nbytes,
-                 &lines_added, &cols_added);
+  struct location at_bytes = buffer_location_to_byte_coords(buffer, at);
+
+  uint32_t lines_added;
+  text_insert_at(buffer->text, at_bytes.line, at_bytes.col, text, nbytes,
+                 &lines_added);
 
   // move to after inserted text
   if (lines_added > 0) {
     final = buffer_clamp(buffer, (int64_t)at.line + lines_added, 0);
   } else {
+    uint32_t cols_added = 0, tab_width = get_tab_width(buffer);
+    struct utf8_codepoint_iterator iter =
+        create_utf8_codepoint_iterator(text, nbytes, 0);
+    struct codepoint *codepoint;
+    while ((codepoint = utf8_next_codepoint(&iter)) != NULL) {
+      cols_added += visual_char_width(codepoint, tab_width);
+    }
     final =
         buffer_clamp(buffer, (int64_t)at.line, (int64_t)at.col + cols_added);
   }
 
+  struct location final_bytes = buffer_location_to_byte_coords(buffer, final);
+
   undo_push_add(
       &buffer->undo,
       (struct undo_add){.begin = {.row = initial.line, .col = initial.col},
@@ -474,11 +556,17 @@ struct location buffer_add(struct buffer *buffer, struct location at,
                        (struct undo_boundary){.save_point = false});
   }
 
-  uint32_t begin_idx = text_global_idx(buffer->text, initial.line, initial.col);
-  uint32_t end_idx = text_global_idx(buffer->text, final.line, final.col);
+  uint32_t begin_idx = to_global_offset(buffer, at_bytes);
+  uint32_t end_idx = to_global_offset(buffer, final_bytes);
 
   VEC_FOR_EACH(&buffer->hooks->insert_hooks, struct insert_hook * h) {
-    h->callback(buffer, region_new(initial, final), begin_idx, end_idx,
+    h->callback(buffer,
+                (struct edit_location){
+                    .coordinates = region_new(initial, final),
+                    .bytes = region_new(at_bytes, final_bytes),
+                    .global_byte_begin = begin_idx,
+                    .global_byte_end = end_idx,
+                },
                 h->userdata);
   }
 
@@ -488,15 +576,16 @@ struct location buffer_add(struct buffer *buffer, struct location at,
 
 struct location buffer_set_text(struct buffer *buffer, uint8_t *text,
                                 uint32_t nbytes) {
-  uint32_t lines, cols;
+  uint32_t lines_added;
 
   text_clear(buffer->text);
-  text_append(buffer->text, text, nbytes, &lines, &cols);
+  text_append(buffer->text, text, nbytes, &lines_added);
 
   // if last line is empty, remove it
   strip_final_newline(buffer);
 
-  return buffer_clamp(buffer, lines, cols);
+  return buffer_clamp(buffer, lines_added,
+                      buffer_line_length(buffer, lines_added));
 }
 
 void buffer_clear(struct buffer *buffer) { text_clear(buffer->text); }
@@ -524,9 +613,18 @@ struct location buffer_previous_char(struct buffer *buffer,
     }
 
     --dot.line;
-    dot.col = buffer_num_chars(buffer, dot.line);
+    dot.col = buffer_line_length(buffer, dot.line);
   } else {
-    --dot.col;
+    struct utf8_codepoint_iterator iter =
+        text_line_codepoint_iterator(buffer->text, dot.line);
+    struct codepoint *codepoint;
+    uint32_t coli = 0, tab_width = get_tab_width(buffer), last_width = 0;
+    while (coli < dot.col && (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+      last_width = visual_char_width(codepoint, tab_width);
+      coli += last_width;
+    }
+
+    dot.col = coli - last_width;
   }
 
   return dot;
@@ -571,14 +669,14 @@ struct location buffer_previous_line(struct buffer *buffer,
   }
 
   --dot.line;
-  uint32_t nchars = buffer_num_chars(buffer, dot.line);
+  uint32_t nchars = buffer_line_length(buffer, dot.line);
   uint32_t new_col = dot.col > nchars ? nchars : dot.col;
 
   return dot;
 }
 
 struct location buffer_next_char(struct buffer *buffer, struct location dot) {
-  if (dot.col == buffer_num_chars(buffer, dot.line)) {
+  if (dot.col == buffer_line_length(buffer, dot.line)) {
     uint32_t lastline = buffer->lazy_row_add ? buffer_num_lines(buffer)
                                              : buffer_num_lines(buffer) - 1;
     if (dot.line == lastline) {
@@ -588,7 +686,16 @@ struct location buffer_next_char(struct buffer *buffer, struct location dot) {
     dot.col = 0;
     ++dot.line;
   } else {
-    ++dot.col;
+    struct utf8_codepoint_iterator iter =
+        text_line_codepoint_iterator(buffer->text, dot.line);
+    struct codepoint *codepoint;
+    uint32_t coli = 0;
+    while (coli <= dot.col &&
+           (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+      coli += visual_char_width(codepoint, get_tab_width(buffer));
+    }
+
+    dot.col = coli;
   }
 
   return dot;
@@ -635,7 +742,7 @@ struct location buffer_next_line(struct buffer *buffer, struct location dot) {
 
   ++dot.line;
   uint32_t new_col = dot.col;
-  uint32_t nchars = buffer_num_chars(buffer, dot.line);
+  uint32_t nchars = buffer_line_length(buffer, dot.line);
   new_col = new_col > nchars ? nchars : new_col;
 
   return dot;
@@ -664,8 +771,8 @@ struct location buffer_clamp(struct buffer *buffer, int64_t line, int64_t col) {
   // clamp col
   if (col < 0) {
     col = 0;
-  } else if (col > buffer_num_chars(buffer, line)) {
-    col = buffer_num_chars(buffer, line);
+  } else if (col > buffer_line_length(buffer, line)) {
+    col = buffer_line_length(buffer, line);
   }
 
   location.col = col;
@@ -681,7 +788,7 @@ struct location buffer_end(struct buffer *buffer) {
     return (struct location){.line = nlines, .col = 0};
   } else {
     return (struct location){.line = nlines - 1,
-                             .col = buffer_num_chars(buffer, nlines - 1)};
+                             .col = buffer_line_length(buffer, nlines - 1)};
   }
 }
 
@@ -689,55 +796,22 @@ uint32_t buffer_num_lines(struct buffer *buffer) {
   return text_num_lines(buffer->text);
 }
 
-uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line) {
-  if (line >= buffer_num_lines(buffer)) {
-    return 0;
+uint32_t buffer_line_length(struct buffer *buffer, uint32_t line) {
+  uint32_t tab_size = get_tab_width(buffer), len = 0;
+  struct utf8_codepoint_iterator iter =
+      text_line_codepoint_iterator(buffer->text, line);
+  struct codepoint *codepoint;
+  while ((codepoint = utf8_next_codepoint(&iter)) != NULL) {
+    len += visual_char_width(codepoint, tab_size);
   }
 
-  return text_line_length(buffer->text, line);
+  return len;
 }
 
 struct location buffer_newline(struct buffer *buffer, struct location at) {
   return buffer_add(buffer, at, (uint8_t *)"\n", 1);
 }
 
-static uint32_t get_tab_width(struct buffer *buffer) {
-  struct setting *tw = lang_setting(&buffer->lang, "tab-width");
-  if (tw == NULL) {
-    tw = settings_get("editor.tab-width");
-  }
-
-  uint32_t tab_width = 4;
-  if (tw != NULL && tw->value.type == Setting_Number) {
-    tab_width = tw->value.number_value;
-  }
-  return tab_width;
-}
-
-static bool use_tabs(struct buffer *buffer) {
-  struct setting *ut = lang_setting(&buffer->lang, "use-tabs");
-  if (ut == NULL) {
-    ut = settings_get("editor.use-tabs");
-  }
-
-  bool use_tabs = false;
-  if (ut != NULL && ut->value.type == Setting_Bool) {
-    use_tabs = ut->value.bool_value;
-  }
-
-  return use_tabs;
-}
-
-static struct location do_indent(struct buffer *buffer, struct location at,
-                                 uint32_t tab_width, bool use_tabs) {
-  if (use_tabs) {
-    return buffer_add(buffer, at, (uint8_t *)"\t", 1);
-  } else {
-    return buffer_add(buffer, at, (uint8_t *)"                ",
-                      tab_width > 16 ? 16 : tab_width);
-  }
-}
-
 struct location buffer_indent(struct buffer *buffer, struct location at) {
   return do_indent(buffer, at, get_tab_width(buffer), use_tabs(buffer));
 }
@@ -778,16 +852,13 @@ struct location buffer_undo(struct buffer *buffer, struct location dot) {
     case Undo_Add: {
       struct undo_add *add = &rec->add;
 
-      pos =
-          buffer_delete(buffer, (struct region){.begin =
-                                                    (struct location){
-                                                        .line = add->begin.row,
-                                                        .col = add->begin.col,
-                                                    },
-                                                .end = (struct location){
-                                                    .line = add->end.row,
-                                                    .col = add->end.col,
-                                                }});
+      pos = buffer_delete(buffer,
+                          (struct region){
+                              .begin = (struct location){.line = add->begin.row,
+                                                         .col = add->begin.col},
+                              .end = (struct location){.line = add->end.row,
+                                                       .col = add->end.col},
+                          });
 
       break;
     }
@@ -888,9 +959,14 @@ struct location buffer_delete(struct buffer *buffer, struct region region) {
     return region.begin;
   }
 
+  struct location begin_bytes =
+      buffer_location_to_byte_coords(buffer, region.begin);
+  struct location end_bytes =
+      buffer_location_to_byte_coords(buffer, region.end);
+
   struct text_chunk txt =
-      text_get_region(buffer->text, region.begin.line, region.begin.col,
-                      region.end.line, region.end.col);
+      text_get_region(buffer->text, begin_bytes.line, begin_bytes.col,
+                      end_bytes.line, end_bytes.col);
 
   undo_push_boundary(&buffer->undo,
                      (struct undo_boundary){.save_point = false});
@@ -903,17 +979,22 @@ struct location buffer_delete(struct buffer *buffer, struct region region) {
   undo_push_boundary(&buffer->undo,
                      (struct undo_boundary){.save_point = false});
 
-  uint32_t begin_idx =
-      text_global_idx(buffer->text, region.begin.line, region.begin.col);
-  uint32_t end_idx =
-      text_global_idx(buffer->text, region.end.line, region.end.col);
+  uint64_t begin_idx = to_global_offset(buffer, begin_bytes);
+  uint64_t end_idx = to_global_offset(buffer, end_bytes);
 
-  text_delete(buffer->text, region.begin.line, region.begin.col,
-              region.end.line, region.end.col);
+  text_delete(buffer->text, begin_bytes.line, begin_bytes.col, end_bytes.line,
+              end_bytes.col);
   buffer->modified = true;
 
   VEC_FOR_EACH(&buffer->hooks->delete_hooks, struct delete_hook * h) {
-    h->callback(buffer, region, begin_idx, end_idx, h->userdata);
+    h->callback(buffer,
+                (struct edit_location){
+                    .coordinates = region,
+                    .bytes = region_new(begin_bytes, end_bytes),
+                    .global_byte_begin = begin_idx,
+                    .global_byte_end = end_idx,
+                },
+                h->userdata);
   }
 
   return region.begin;
@@ -1035,27 +1116,6 @@ struct cmdbuf {
   struct buffer *buffer;
 };
 
-static uint32_t visual_char_width(uint8_t *byte, uint32_t maxlen) {
-  if (*byte == '\t') {
-    return 4;
-  } else {
-    return utf8_visual_char_width(byte, maxlen);
-  }
-}
-
-uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col,
-                             uint32_t end_col) {
-  uint32_t start_byte = utf8_nbytes(txt, len, start_col);
-  uint32_t end_byte = utf8_nbytes(txt, len, end_col);
-
-  uint32_t width = 0;
-  for (uint32_t bytei = start_byte; bytei < end_byte; ++bytei) {
-    width += visual_char_width(&txt[bytei], len - bytei);
-  }
-
-  return width;
-}
-
 static void apply_properties(struct command_list *cmds,
                              struct text_property *properties[],
                              uint32_t nproperties) {
@@ -1097,65 +1157,67 @@ void render_line(struct text_chunk *line, void *userdata) {
   command_list_set_show_whitespace(cmdbuf->cmds, cmdbuf->show_ws);
 
   // calculate scroll offsets
-  uint32_t scroll_bytes =
-      utf8_nbytes(line->text, line->nbytes, cmdbuf->origin.col);
-  uint32_t text_nbytes_scroll =
-      scroll_bytes > line->nbytes ? 0 : line->nbytes - scroll_bytes;
-  uint8_t *text = line->text + scroll_bytes;
-
-  uint32_t visual_col_start = 0;
-  uint32_t cur_visual_col = 0;
-  uint32_t start_byte = 0, text_nbytes = 0;
   struct text_property *properties[32] = {0};
   uint64_t prev_properties_hash = 0;
 
-  for (uint32_t cur_byte = start_byte, coli = 0;
-       cur_byte < text_nbytes_scroll && cur_visual_col < cmdbuf->width &&
-       coli < line->nchars - cmdbuf->origin.col;
-       ++coli) {
+  uint32_t tab_width = get_tab_width(cmdbuf->buffer);
+
+  // handle scroll column offset
+  uint32_t coli = 0, bytei = 0;
+  struct utf8_codepoint_iterator iter = text_chunk_codepoint_iterator(line);
+  struct codepoint *codepoint;
+  while (coli < cmdbuf->origin.col &&
+         (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+    coli += visual_char_width(codepoint, tab_width);
+    bytei += codepoint->nbytes;
+  }
 
-    uint32_t bytes_remaining = text_nbytes_scroll - cur_byte;
-    uint32_t char_nbytes = utf8_nbytes(text + cur_byte, bytes_remaining, 1);
-    uint32_t char_vwidth = visual_char_width(text + cur_byte, bytes_remaining);
+  // coli is the visual column [0..width-1]
+  coli = 0;
+  uint32_t drawn_bytei = bytei;
+  uint32_t drawn_coli = coli;
 
+  while (coli < cmdbuf->width &&
+         (codepoint = utf8_next_codepoint(&iter)) != NULL) {
     // calculate character properties
     uint32_t nproperties = 0;
-    text_get_properties(
-        cmdbuf->buffer->text,
-        (struct location){.line = line->line, .col = coli + cmdbuf->origin.col},
-        properties, 32, &nproperties);
+    text_get_properties(cmdbuf->buffer->text, line->line, bytei, properties, 32,
+                        &nproperties);
 
     // if we have any new or lost props, flush text up until now, reset
     // and re-apply current properties
     uint64_t new_properties_hash = properties_hash(properties, nproperties);
     if (new_properties_hash != prev_properties_hash) {
-      command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line,
-                             text + start_byte, cur_byte - start_byte);
+      command_list_draw_text(cmdbuf->cmds, drawn_coli, visual_line,
+                             line->text + drawn_bytei, bytei - drawn_bytei);
       command_list_reset_color(cmdbuf->cmds);
 
-      visual_col_start = cur_visual_col;
-      start_byte = cur_byte;
+      drawn_coli = coli;
+      drawn_bytei = bytei;
 
       // apply new properties
       apply_properties(cmdbuf->cmds, properties, nproperties);
     }
 
     prev_properties_hash = new_properties_hash;
-    cur_byte += char_nbytes;
-    text_nbytes += char_nbytes;
-    cur_visual_col += char_vwidth;
+    bytei += codepoint->nbytes;
+    coli += visual_char_width(codepoint, tab_width);
   }
 
   // flush remaining
-  command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line,
-                         text + start_byte, text_nbytes - start_byte);
+  command_list_draw_text(cmdbuf->cmds, drawn_coli, visual_line,
+                         line->text + drawn_bytei, bytei - drawn_bytei);
+
+  drawn_coli = coli;
+  drawn_bytei = bytei;
 
   command_list_reset_color(cmdbuf->cmds);
   command_list_set_show_whitespace(cmdbuf->cmds, false);
 
-  if (cur_visual_col < cmdbuf->width) {
-    command_list_draw_repeated(cmdbuf->cmds, cur_visual_col, visual_line, ' ',
-                               cmdbuf->width - cur_visual_col);
+  // TODO: considering the whole screen is cleared, is this really needed?
+  if (drawn_coli < cmdbuf->width) {
+    command_list_draw_repeated(cmdbuf->cmds, drawn_coli, visual_line, ' ',
+                               cmdbuf->width - drawn_coli);
   }
 }
 
@@ -1200,19 +1262,19 @@ void buffer_render(struct buffer *buffer, struct buffer_render_params *params) {
 void buffer_add_text_property(struct buffer *buffer, struct location start,
                               struct location end,
                               struct text_property property) {
-  text_add_property(
-      buffer->text, (struct location){.line = start.line, .col = start.col},
-      (struct location){.line = end.line, .col = end.col}, property);
+  struct location bytestart = buffer_location_to_byte_coords(buffer, start);
+  struct location byteend = buffer_location_to_byte_coords(buffer, end);
+  text_add_property(buffer->text, bytestart.line, bytestart.col, byteend.line,
+                    byteend.col, property);
 }
 
 void buffer_get_text_properties(struct buffer *buffer, struct location location,
                                 struct text_property **properties,
                                 uint32_t max_nproperties,
                                 uint32_t *nproperties) {
-  text_get_properties(
-      buffer->text,
-      (struct location){.line = location.line, .col = location.col}, properties,
-      max_nproperties, nproperties);
+  struct location bytecoords = buffer_location_to_byte_coords(buffer, location);
+  text_get_properties(buffer->text, bytecoords.line, bytecoords.col, properties,
+                      max_nproperties, nproperties);
 }
 
 void buffer_clear_text_properties(struct buffer *buffer) {
@@ -1244,9 +1306,12 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line,
                  (struct location){.line = end + 1, .col = 0});
 
   struct s8 *lines = (struct s8 *)malloc(sizeof(struct s8) * ntosort);
-  struct text_chunk txt =
-      text_get_region(buffer->text, region.begin.line, region.begin.col,
-                      region.end.line, region.end.col);
+
+  struct location bytebeg =
+      buffer_location_to_byte_coords(buffer, region.begin);
+  struct location byteend = buffer_location_to_byte_coords(buffer, region.end);
+  struct text_chunk txt = text_get_region(
+      buffer->text, bytebeg.line, bytebeg.col, byteend.line, byteend.col);
 
   uint32_t line_start = 0;
   uint32_t curr_line = 0;
@@ -1278,3 +1343,41 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line,
     free(txt.text);
   }
 }
+
+struct location buffer_location_to_byte_coords(struct buffer *buffer,
+                                               struct location coords) {
+  struct utf8_codepoint_iterator iter =
+      text_line_codepoint_iterator(buffer->text, coords.line);
+  uint32_t byteoffset = 0, col = 0, tab_width = get_tab_width(buffer);
+  struct codepoint *codepoint;
+
+  /* Let this walk up to (and including the target column) to
+   * make sure we account for zero-width characters when calculating the
+   * byte offset.
+   */
+  while (col <= coords.col &&
+         (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+    byteoffset += codepoint->nbytes;
+    col += visual_char_width(codepoint, tab_width);
+  }
+
+  /* Remove the byte-width of the last char again since it gives us the
+   * position right before it while still taking zero-width codepoints
+   * into account.
+   */
+  return (struct location){.line = coords.line,
+                           .col = byteoffset -
+                                  (codepoint != NULL ? codepoint->nbytes : 0)};
+}
+
+struct match_result
+buffer_find_prev_in_line(struct buffer *buffer, struct location start,
+                         bool (*predicate)(const struct codepoint *c)) {
+  return find_prev_in_line(buffer, start, predicate);
+}
+
+struct match_result
+buffer_find_next_in_line(struct buffer *buffer, struct location start,
+                         bool (*predicate)(const struct codepoint *c)) {
+  return find_next_in_line(buffer, start, predicate);
+}