parsing prefix version (binary operators) of msph

This commit is contained in:
Ellen Arvidsson 2025-04-22 21:08:03 +03:00
parent 7e5f080282
commit 17be15d7b5
12 changed files with 617 additions and 92 deletions

View file

@ -8,7 +8,6 @@
#include "msph/err.h"
#include "msph/token.h"
struct msph_matcher {
size_t off;
size_t matchlen;
@ -98,6 +97,8 @@ static int tok_match(struct msph_ctx *, struct msph_token_src *,
struct msph_matcher *);
static int tok_commit(struct msph_ctx *, struct msph_token_src *,
struct msph_matcher *, struct msph_token *);
static void tok_update_pos(struct msph_ctx *, struct msph_token_src *,
struct msph_matcher *m);
static int char_at(struct msph_ctx *, struct msph_token_src *, size_t,
char *);
static int fromcbuf_charcpy(char *, const char *, size_t, size_t, size_t);
@ -116,8 +117,9 @@ void msph_ctx_init(struct msph_ctx *ctx)
}
struct msph_token_stream *
msph_token_stream_file(struct msph_ctx *ctx, FILE *f)
msph_token_stream_file(struct msph_ctx *ctx, const char *name, FILE *f)
{
size_t res;
struct msph_token_stream *ret;
if (ctx == NULL || f == NULL) {
@ -132,7 +134,13 @@ msph_token_stream_file(struct msph_ctx *ctx, FILE *f)
}
ret->ctx = ctx;
if ((res = strlcpy(ret->name, name, BUF_LEN(ret->name)))
>= BUF_LEN(ret->name)) {
MSPH_ERR(ctx, MSPH_ERR_TOOLONG);
goto err;
}
ret->src.type = MSPH_TOKEN_SRC_FILE;
ret->src.pos = (struct msph_text_pos) { .line = 1, .col = 1 };
ret->src.inner.file.f = f;
ret->src.inner.file.pos = 0;
ret->src.inner.file.end = 0;
@ -149,8 +157,10 @@ err:
}
struct msph_token_stream *
msph_token_stream_frombuf(struct msph_ctx *ctx, const char *buf, size_t len)
msph_token_stream_frombuf(struct msph_ctx *ctx, const char *name,
const char *buf, size_t len)
{
size_t res;
struct msph_token_stream *ret;
if ((ret = calloc(1, sizeof(struct msph_token_stream))) == NULL) {
@ -159,12 +169,23 @@ msph_token_stream_frombuf(struct msph_ctx *ctx, const char *buf, size_t len)
}
ret->ctx = ctx;
if ((res = strlcpy(ret->name, name, BUF_LEN(ret->name)))
>= BUF_LEN(ret->name)) {
MSPH_ERR(ctx, MSPH_ERR_TOOLONG);
goto err;
}
ret->src.type = MSPH_TOKEN_SRC_STR;
ret->src.pos = (struct msph_text_pos) { .line = 1, .col = 1 };
ret->src.inner.str.s = buf;
ret->src.inner.str.len = strnlen(buf, len);
ret->src.inner.str.pos = 0;
return (ret);
err:
free(ret);
return (NULL);
}
ssize_t
@ -244,7 +265,9 @@ msph_token_stream_close(struct msph_token_stream *s)
return (ret);
}
/* -1 on error or num tokens read */
/* read at most n tokens from s into p.
* return -1 on error, or num tokens read
*/
ssize_t
msph_token_stream_read(struct msph_token *ptr, size_t n,
struct msph_token_stream *s)
@ -279,6 +302,7 @@ read_single_tok(struct msph_token *ptr, struct msph_token_stream *s)
/* Skipping whitespace */
if (tok_match(ctx, src, &wspace) == -1)
return (-1);
SPHO_DEBUG_PRINT("wspace.matchlen=%zu\n", wspace.matchlen);
if (wspace.matchlen > 0 &&
tok_commit(ctx, src, &wspace, NULL) == -1)
return (-1);
@ -329,42 +353,34 @@ msph_token_stream_eof(struct msph_token_stream *s)
}
struct msph_token *
msph_token_create(struct msph_ctx *ctx, int type, union msph_token_data *data)
msph_token_copy(struct msph_ctx *ctx, struct msph_token *token)
{
size_t i;
struct msph_token *tok;
struct msph_token *copy;
struct msph_token_info *info;
info = NULL;
for (i = 0; token_info[i].type != TOK_END; i++) {
if (token_info[i].type == type) {
if (token_info[i].type == token->type) {
info = &token_info[i];
break;
}
}
if (info == NULL) {
MSPH_ERR_INFO(ctx, MSPH_ERR_TOKEN_INVAL, type);
MSPH_ERR_INFO(ctx, MSPH_ERR_TOKEN_INVAL, token->type);
return (NULL);
}
if ((tok = malloc(sizeof(*tok))) == NULL) {
if ((copy = malloc(sizeof(*copy))) == NULL) {
MSPH_ERR(ctx, MSPH_ERR_SYS);
return (NULL);
}
tok->type = type;
memcpy(copy, token, sizeof(*copy));
switch (type) {
case TOK_IDENT:
memcpy(&tok->data, data, sizeof(*data));
break;
default:
break;
}
return (tok);
return (copy);
}
static ssize_t
@ -404,8 +420,6 @@ src_file_fill_buf(struct msph_ctx *ctx, struct msph_token_src_file *file)
} while (file->end != file->pos);
SPHO_DEBUG_PRINT("src_file_fill_buf: read %zd\n", ret);
return (ret);
}
@ -414,20 +428,16 @@ static int
file_char_at(struct msph_ctx *ctx, struct msph_token_src *src, size_t i,
char *out)
{
int ret;
ssize_t fill;
struct msph_token_src_file *file;
SPHO_PRECOND(src != NULL);
SPHO_PRECOND(src->type == MSPH_TOKEN_SRC_FILE);
ret = 0;
file = &src->inner.file;
fill = 0;
do {
SPHO_DEBUG_PRINT("want to read %zu, valid range (%zu, %zu)\n",
(file->pos + i) % BUF_LEN(file->buf), file->pos, file->end);
/* simplest case */
if (file->pos + i < file->end) {
*out = file->buf[file->pos + i];
@ -445,9 +455,9 @@ file_char_at(struct msph_ctx *ctx, struct msph_token_src *src, size_t i,
if ((fill = src_file_fill_buf(ctx, file)) == -1)
return (-1);
} while (fill > 0 && ret++);
} while (fill > 0);
return (ret);
return (0);
}
static int
@ -474,7 +484,34 @@ char_at(struct msph_ctx *ctx, struct msph_token_src *src, size_t i, char *out)
break;
}
SPHO_DEBUG_PRINT("char_at: ret=%d, *out=%c\n", ret, *out);
#ifdef SPHO_ENABLE_DEBUG_PRINT
if (isspace(*out)) {
const char *charrep;
switch (*out) {
case '\n':
charrep = "\\n";
break;
case '\t':
charrep = "\\t";
break;
case '\r':
charrep = "\\r";
break;
case '\v':
charrep = "\\v";
break;
case '\f':
charrep = "\\f";
break;
default:
charrep = "WOOOOOOOOOOPS";
break;
}
SPHO_DEBUG_PRINT("char_at: ret=%d, *out=%s\n", ret, charrep);
} else {
SPHO_DEBUG_PRINT("char_at: ret=%d, *out=%c\n", ret, *out);
}
#endif
return (ret);
}
@ -563,6 +600,10 @@ tok_match(struct msph_ctx *ctx, struct msph_token_src *src,
MATCH_CHAR(')');
case TOK_COLON:
MATCH_CHAR(':');
case TOK_DOT:
MATCH_CHAR('.');
case TOK_COMMA:
MATCH_CHAR(',');
case TOK_EQUALS:
MATCH_CHAR('=');
case TOK_AMP:
@ -626,12 +667,16 @@ tok_commit(struct msph_ctx *ctx, struct msph_token_src *src,
struct msph_matcher *m, struct msph_token *ptr)
{
size_t pos_old;
struct msph_text_pos tok_pos;
struct msph_token_src_str *str;
struct msph_token_src_file *file;
SPHO_PRECOND(ctx != NULL && m != NULL);
SPHO_PRECOND(m->matchlen != 0);
tok_pos = src->pos;
tok_update_pos(ctx, src, m);
switch (src->type) {
case MSPH_TOKEN_SRC_FILE:
file = &src->inner.file;
@ -646,6 +691,7 @@ tok_commit(struct msph_ctx *ctx, struct msph_token_src *src,
return (0);
ptr->type = m->type;
ptr->pos = tok_pos;
if (! TOK_HAS_DATA(ptr->type))
return (0);
@ -674,6 +720,7 @@ tok_commit(struct msph_ctx *ctx, struct msph_token_src *src,
return (0);
ptr->type = m->type;
ptr->pos = tok_pos;
if (! TOK_HAS_DATA(ptr->type))
return (0);
@ -692,7 +739,34 @@ tok_commit(struct msph_ctx *ctx, struct msph_token_src *src,
}
}
static void
tok_update_pos(struct msph_ctx *ctx, struct msph_token_src *src,
struct msph_matcher *m)
{
int res;
char c;
size_t i;
for (i = 0; i < m->matchlen; i++) {
res = char_at(ctx, src, i, &c);
SPHO_ASSERT(res == 1);
switch (c) {
case '\t':
src->pos.col += MSPH_TAB_WIDTH;
break;
case '\n':
src->pos.line++;
src->pos.col = 1;
break;
case '\r':
break;
default:
src->pos.col++;
break;
}
}
}
static const char *
tok_base_str(struct msph_token *tok)