diff options
| author | Albert Cervin <albert@acervin.com> | 2023-05-18 23:51:51 +0200 |
|---|---|---|
| committer | Albert Cervin <albert@acervin.com> | 2023-05-24 22:18:12 +0200 |
| commit | 4f3b576db6b01c8c88076985478e2a7fa37be340 (patch) | |
| tree | 9723ed39a19872fd52f2867613e78e02de3cf79b /src/dged/settings-parse.c | |
| parent | a4d17ddb8e7d23ccca13132f4d88cfc5f5730b76 (diff) | |
| download | dged-4f3b576db6b01c8c88076985478e2a7fa37be340.tar.gz dged-4f3b576db6b01c8c88076985478e2a7fa37be340.tar.xz dged-4f3b576db6b01c8c88076985478e2a7fa37be340.zip | |
TOML settings parsing
Currently a very simplistic parser that do not support all TOML
datatypes. Supported are:
- Tables
- Strings (incl multiline)
- Integers
- Inline Tables
- Booleans
- Comments
Diffstat (limited to 'src/dged/settings-parse.c')
| -rw-r--r-- | src/dged/settings-parse.c | 277 |
1 files changed, 277 insertions, 0 deletions
diff --git a/src/dged/settings-parse.c b/src/dged/settings-parse.c new file mode 100644 index 0000000..6c1c482 --- /dev/null +++ b/src/dged/settings-parse.c @@ -0,0 +1,277 @@ +#include "settings-parse.h" + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "utf8.h" + +enum byte_class { + Byte_Alphanumeric, + Byte_Symbol, +}; + +struct parser parser_create(struct reader reader) { + struct parser state = { + .row = 0, + .col = 0, + .reader = reader, + }; + + VEC_INIT(&state.buffer, 32); + + return state; +} + +void parser_destroy(struct parser *parser) { VEC_DESTROY(&parser->buffer); } + +static enum byte_class classify(uint8_t byte) { + if ((byte >= 'a' && byte <= 'z') || (byte >= 'A' && byte <= 'Z') || + (byte >= '0' && byte <= '9') || byte == '_' || byte == '-' || + utf8_byte_is_unicode(byte)) { + return Byte_Alphanumeric; + } + + return Byte_Symbol; +} + +static void trim_parse_buffer_whitespace(void **data, uint32_t *len) { + uint8_t *d = (uint8_t *)*data; + uint32_t new_len = *len; + + // beginning + while (d[0] == ' ' || d[0] == '\t') { + --new_len; + ++d; + } + + // end + while (d[new_len - 1] == ' ' || d[new_len - 1] == '\t') { + --new_len; + } + + *data = d; + *len = new_len; +} + +static bool read_data_with_initial(struct parser *state, uint8_t *initial_byte, + uint8_t end, void **data_out, + uint32_t *len_out) { + uint8_t byte; + VEC_CLEAR(&state->buffer); + if (initial_byte != NULL) { + VEC_PUSH(&state->buffer, *initial_byte); + } + + while (state->reader.getbytes(1, &byte, state->reader.userdata) > 0 && + byte != end) { + ++state->col; + VEC_PUSH(&state->buffer, byte); + } + + *data_out = VEC_FRONT(&state->buffer); + *len_out = VEC_SIZE(&state->buffer); + + trim_parse_buffer_whitespace(data_out, len_out); + + return byte == end; +} + +static bool read_data(struct parser *state, uint8_t end, void **data_out, + uint32_t *len_out) { + return read_data_with_initial(state, NULL, end, data_out, len_out); +} + +static bool discard(struct parser *state, uint8_t end) { + uint8_t byte; + while (state->reader.getbytes(1, &byte, state->reader.userdata) > 0 && + byte != end) { + ++state->col; + } + + return byte == end; +} + +static void errtoken(struct token *token_out, const char *fmt, ...) { + static char errmsgbuf[256] = {0}; + va_list args; + va_start(args, fmt); + size_t written = vsnprintf(errmsgbuf, 256, fmt, args); + va_end(args); + + token_out->type = Token_Error; + token_out->data = errmsgbuf; + token_out->len = written; +} + +bool parser_next_token(struct parser *state, struct token *token_out) { + uint8_t byte; + static bool parse_value = false; + static int64_t int_value = 0; + static bool bool_value = false; + + memset(token_out, 0, sizeof(struct token)); + + while (state->reader.getbytes(1, &byte, state->reader.userdata) > 0) { + switch (classify(byte)) { + case Byte_Alphanumeric: // unquoted key / value + if (!parse_value) { + token_out->type = Token_Key; + token_out->row = state->row; + token_out->col = state->col; + + if (!read_data_with_initial(state, &byte, '=', &token_out->data, + &token_out->len)) { + errtoken(token_out, "Unexpected EOF while looking for end of key"); + return true; + } + + parse_value = true; + } else { + parse_value = false; + token_out->row = state->row; + token_out->col = state->col; + + if (byte >= '0' && byte <= '9') { + token_out->type = Token_IntValue; + void *data; + uint32_t len; + read_data_with_initial(state, &byte, '\n', &data, &len); + + char *s = calloc(len + 1, 1); + strncpy(s, (char *)data, len); + + errno = 0; + int_value = strtol(s, NULL, 0); + free(s); + if (errno != 0) { + errtoken(token_out, "Invalid integer value %.*s: %s", len, + (char *)data, strerror(errno)); + return true; + } + + token_out->data = &int_value; + token_out->len = 0; + } else if (byte == 't' || byte == 'f') { + token_out->type = Token_BoolValue; + void *data = NULL; + uint32_t len = 0; + read_data_with_initial(state, &byte, '\n', &data, &len); + + if (strncmp((char *)data, "true", len) == 0) { + bool_value = true; + token_out->data = &bool_value; + token_out->len = 0; + } else if (strncmp((char *)data, "false", len) == 0) { + bool_value = false; + token_out->data = &bool_value; + token_out->len = 0; + } else { + errtoken(token_out, "Invalid bool value: %.*s", len, (char *)data); + } + } + } + + return true; + + case Byte_Symbol: + switch (byte) { + case '#': // comment + token_out->type = Token_Comment; + token_out->row = state->row; + token_out->col = state->col; + if (!read_data(state, '\n', &token_out->data, &token_out->len)) { + errtoken(token_out, + "Unexpected EOF while looking for end of comment line"); + return true; + } + + uint8_t *data = (uint8_t *)token_out->data; + if (data[token_out->len - 1] == '\r') { + --token_out->len; + } + + state->col = 0; + ++state->row; + + return true; + + case '{': // inline table + parse_value = false; + token_out->type = Token_InlineTable; + token_out->row = state->row; + token_out->col = state->col; + return true; + break; + + case '}': // end inline table + parse_value = false; + break; + + case '[': // table open + token_out->type = Token_Table; + token_out->row = state->row; + token_out->col = state->col; + if (!read_data(state, ']', &token_out->data, &token_out->len)) { + errtoken(token_out, "Unexpected EOF while looking for matching ']'"); + return true; + } + + ++state->col; + return true; + + case '"': // quoted key or string value + bool multiline = false; + if (parse_value) { + token_out->type = Token_StringValue; + } else { + token_out->type = Token_Key; + } + token_out->row = state->row; + token_out->col = state->col; + + // check for multiline + uint32_t numquotes = 1; + while (state->reader.getbytes(1, &byte, state->reader.userdata) > 0 && + byte == '"') { + ++numquotes; + } + + if (numquotes == 3) { + multiline = true; + } + + if (!read_data_with_initial(state, &byte, '"', &token_out->data, + &token_out->len)) { + errtoken(token_out, "Unexpected EOF while looking for matching '\"'"); + parse_value = false; + return true; + } + + if (!parse_value) { + discard(state, '='); + } + + if (multiline) { + discard(state, '"'); + discard(state, '"'); + } + + ++state->col; + parse_value = false; + return true; + + case '\n': + case '\r': + state->col = 0; + ++state->row; + break; + } + break; + } + } + + return false; +} |
