From adb65be595a207d6674b14885e3b82397e1ae59f Mon Sep 17 00:00:00 2001 From: Albert Cervin Date: Wed, 5 Nov 2025 23:17:51 +0100 Subject: Add check for printable chars in unicode --- src/dged/utf8.c | 9 +++++++++ src/dged/utf8.h | 1 + 2 files changed, 10 insertions(+) diff --git a/src/dged/utf8.c b/src/dged/utf8.c index 01dcdbd..b47f5fc 100644 --- a/src/dged/utf8.c +++ b/src/dged/utf8.c @@ -123,7 +123,16 @@ uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars) { return bi; } +bool unicode_is_printable(const struct codepoint *codepoint) { + return ((codepoint->codepoint >= 0x20 && codepoint->codepoint <= 0x7E) || + codepoint->codepoint >= 0xA0); +} + uint32_t unicode_visual_char_width(const struct codepoint *codepoint) { + if (!unicode_is_printable(codepoint)) { + return 1; + } + if (codepoint->nbytes > 0) { // TODO: use unicode classification instead int w = wcwidth(codepoint->codepoint); diff --git a/src/dged/utf8.h b/src/dged/utf8.h index 22ce22d..150fe02 100644 --- a/src/dged/utf8.h +++ b/src/dged/utf8.h @@ -28,6 +28,7 @@ struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter); uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes); uint32_t unicode_visual_char_width(const struct codepoint *codepoint); +bool unicode_is_printable(const struct codepoint *codepoint); bool utf8_byte_is_unicode_start(uint8_t byte); bool utf8_byte_is_unicode_continuation(uint8_t byte); -- cgit v1.2.3