prefix notation type parser

This commit is contained in:
Ellen Arvidsson 2025-04-19 20:35:42 +03:00
parent 9b24c8a496
commit 9ac779c1cf
14 changed files with 1217 additions and 151 deletions

View file

@ -16,7 +16,7 @@ struct msph_matcher {
const int type;
};
struct msph_matcher token_matcher[] = {
struct msph_matcher token_matchers[] = {
{ 0, 0, -1 },
{ 0, 0, TOK_LBRACE },
{ 0, 0, TOK_RBRACE },
@ -26,6 +26,8 @@ struct msph_matcher token_matcher[] = {
{ 0, 0, TOK_RPAREN },
{ 0, 0, TOK_COLON },
{ 0, 0, TOK_EQUALS },
{ 0, 0, TOK_COMMA },
{ 0, 0, TOK_DOT },
{ 0, 0, TOK_AMP },
{ 0, 0, TOK_PIPE },
@ -36,7 +38,7 @@ struct msph_matcher token_matcher[] = {
{ 0, 0, TOK_KW_TYPE },
{ 0, 0, TOK_KW_NOMINAL },
{ 0, 0, TOK_KW_MEMBER },
{ 0, 0, TOK_KW_CHECK },
{ 0, 0, TOK_KW_ASSERT },
{ 0, 0, TOK_KW_BOX },
{ 0, 0, TOK_KW_FORALL },
@ -62,6 +64,8 @@ struct msph_token_info {
TOK_INFO(TOK_RPAREN, ")"),
TOK_INFO(TOK_COLON, ":"),
TOK_INFO(TOK_EQUALS, "="),
TOK_INFO(TOK_COMMA, ","),
TOK_INFO(TOK_DOT, "."),
TOK_INFO(TOK_AMP, "&"),
TOK_INFO(TOK_PIPE, "|"),
@ -72,7 +76,7 @@ struct msph_token_info {
TOK_INFO(TOK_KW_TYPE, "type"),
TOK_INFO(TOK_KW_NOMINAL, "nominal"),
TOK_INFO(TOK_KW_MEMBER, "member"),
TOK_INFO(TOK_KW_CHECK, "check"),
TOK_INFO(TOK_KW_ASSERT, "assert"),
TOK_INFO(TOK_KW_BOX, "box"),
TOK_INFO(TOK_KW_FORALL, "forall"),
@ -80,7 +84,6 @@ struct msph_token_info {
TOK_INFO(TOK_CONST_FALSE, "False"),
TOK_INFO(TOK_IDENT, NULL),
TOK_INFO(TOK_WSPACE, NULL),
{ TOK_END , NULL, NULL }
#undef TOK_INFO
};
@ -113,12 +116,11 @@ void msph_ctx_init(struct msph_ctx *ctx)
}
struct msph_token_stream *
msph_token_stream_file(struct msph_ctx *ctx, FILE *f, const char *name)
msph_token_stream_file(struct msph_ctx *ctx, FILE *f)
{
size_t res;
struct msph_token_stream *ret;
if (ctx == NULL || f == NULL || name == NULL) {
if (ctx == NULL || f == NULL) {
MSPH_ERR(ctx, MSPH_ERR_INVAL);
return (NULL);
}
@ -135,13 +137,6 @@ msph_token_stream_file(struct msph_ctx *ctx, FILE *f, const char *name)
ret->src.inner.file.pos = 0;
ret->src.inner.file.end = 0;
res = strlcpy(ret->src.inner.file.name, name,
sizeof(ret->src.inner.file.name));
if (res >= sizeof(ret->src.inner.file.name)) {
MSPH_ERR(ctx, MSPH_ERR_TOOLONG);
goto err;
}
return (ret);
err:
if (fclose(f) == EOF)
@ -166,18 +161,24 @@ msph_token_stream_frombuf(struct msph_ctx *ctx, const char *buf, size_t len)
ret->ctx = ctx;
ret->src.type = MSPH_TOKEN_SRC_STR;
ret->src.inner.str.s = buf;
ret->src.inner.str.len = len;
ret->src.inner.str.len = strnlen(buf, len);
ret->src.inner.str.pos = 0;
return (ret);
}
ssize_t
msph_token_str(char *buf, size_t len, struct msph_token *tok)
msph_token_str(char *buf, size_t len,
struct msph_token *tok)
{
ssize_t ret;
const char *base;
ret = (ssize_t)snprintf(buf, len, "%s", tok_base_str(tok));
base = tok_base_str(tok);
if (base == NULL) {
return (-1);
}
ret = (ssize_t)snprintf(buf, len, "%s", base);
if (ret < 0 || ret >= (ssize_t)len)
return (ret);
@ -187,7 +188,7 @@ msph_token_str(char *buf, size_t len, struct msph_token *tok)
switch (tok->type) {
case TOK_IDENT:
ret += (ssize_t)snprintf(buf, len, "(%s)", tok->d.s.buf);
ret += (ssize_t)snprintf(buf, len, "(%s)", tok->data.str);
break;
default:
break;
@ -196,7 +197,7 @@ msph_token_str(char *buf, size_t len, struct msph_token *tok)
return (ret);
}
#define MSPH_TOKEN_PRINT_BUF_LEN 2 * MSPH_TOKEN_BUF_LEN
#define MSPH_TOKEN_PRINT_BUF_LEN 2 * MSPH_IDENT_LEN
int
msph_token_stream_print(struct msph_token_stream *s, FILE *out)
{
@ -204,10 +205,13 @@ msph_token_stream_print(struct msph_token_stream *s, FILE *out)
struct msph_token tok;
char tokstr[MSPH_TOKEN_PRINT_BUF_LEN];
while ((ret = msph_token_stream_read_tok( &tok, 1, s)) > 0) {
while ((ret = msph_token_stream_read(&tok, 1, s)) > 0) {
ret = msph_token_str(tokstr, BUF_LEN(tokstr), &tok);
if (ret < 0)
continue;
if (ret < 0) {
MSPH_ERR_INFO(s->ctx, MSPH_ERR_TOKEN_INVAL, tok.type);
break;
}
if ((size_t)ret < BUF_LEN(tokstr))
fprintf(out, "%s\n", tokstr);
@ -240,9 +244,9 @@ msph_token_stream_close(struct msph_token_stream *s)
return (ret);
}
/* -1 or num tokens read */
/* -1 on error or num tokens read */
ssize_t
msph_token_stream_read_tok(struct msph_token *ptr, size_t n,
msph_token_stream_read(struct msph_token *ptr, size_t n,
struct msph_token_stream *s)
{
size_t ret;
@ -280,27 +284,88 @@ read_single_tok(struct msph_token *ptr, struct msph_token_stream *s)
return (-1);
max_m = 0;
for (m = 1; token_matcher[m].type != TOK_END; m++) {
res = tok_match(ctx, src, &token_matcher[m]);
for (m = 1; token_matchers[m].type != TOK_END; m++) {
res = tok_match(ctx, src, &token_matchers[m]);
if (res == -1)
return (-1);
if (res == 0 &&
token_matcher[m].matchlen > token_matcher[max_m].matchlen) {
if (res == 0 && token_matchers[m].matchlen >
token_matchers[max_m].matchlen) {
max_m = m;
}
}
if (max_m == 0)
return (0);
if (max_m == 0) {
if (msph_token_stream_eof(s))
return (0);
MSPH_ERR(s->ctx, MSPH_ERR_TOKEN_NOMATCH);
return (-1);
}
if (tok_commit(ctx, src, &token_matcher[max_m], ptr) == -1)
if (tok_commit(ctx, src, &token_matchers[max_m], ptr) == -1)
return (-1);
return (1);
}
int
msph_token_stream_eof(struct msph_token_stream *s)
{
struct msph_token_src_file *file;
struct msph_token_src_str *str;
switch (s->src.type) {
case MSPH_TOKEN_SRC_FILE:
file = &s->src.inner.file;
return (file->pos == file->end && feof(file->f));
case MSPH_TOKEN_SRC_STR:
str = &s->src.inner.str;
return (str->pos == str->len);
default:
MSPH_ERR(s->ctx, MSPH_ERR_INVAL);
return (-1);
}
}
struct msph_token *
msph_token_create(struct msph_ctx *ctx, int type, union msph_token_data *data)
{
size_t i;
struct msph_token *tok;
struct msph_token_info *info;
info = NULL;
for (i = 0; token_info[i].type != TOK_END; i++) {
if (token_info[i].type == type) {
info = &token_info[i];
break;
}
}
if (info == NULL) {
MSPH_ERR_INFO(ctx, MSPH_ERR_TOKEN_INVAL, type);
return (NULL);
}
if ((tok = malloc(sizeof(*tok))) == NULL) {
MSPH_ERR(ctx, MSPH_ERR_SYS);
return (NULL);
}
tok->type = type;
switch (type) {
case TOK_IDENT:
memcpy(&tok->data, data, sizeof(*data));
break;
default:
break;
}
return (tok);
}
static ssize_t
src_file_fill_buf(struct msph_ctx *ctx, struct msph_token_src_file *file)
@ -316,7 +381,7 @@ src_file_fill_buf(struct msph_ctx *ctx, struct msph_token_src_file *file)
maxread = BUF_LEN(file->buf) - file->end;
if (maxread == 0) {
MSPH_ERR(ctx, MSPH_ERR_TOOLONG);
MSPH_ERR(ctx, MSPH_ERR_TOKEN_TOOLONG);
return (-1);
}
@ -516,8 +581,8 @@ tok_match(struct msph_ctx *ctx, struct msph_token_src *src,
MATCH_STR("nominal");
case TOK_KW_MEMBER:
MATCH_STR("member");
case TOK_KW_CHECK:
MATCH_STR("check");
case TOK_KW_ASSERT:
MATCH_STR("assert");
case TOK_KW_BOX:
MATCH_STR("box");
case TOK_KW_FORALL:
@ -574,8 +639,8 @@ tok_commit(struct msph_ctx *ctx, struct msph_token_src *src,
file->pos += m->matchlen;
file->pos %= BUF_LEN(file->buf);
SPHO_ASSERT(file->pos < BUF_LEN(file->buf) ||
file->pos < pos_old);
SPHO_ASSERT(file->pos <= file->end ||
(file->pos < pos_old && file->pos < BUF_LEN(file->buf)));
if (ptr == NULL)
return (0);
@ -584,18 +649,18 @@ tok_commit(struct msph_ctx *ctx, struct msph_token_src *src,
if (! TOK_HAS_DATA(ptr->type))
return (0);
if (m->matchlen >= sizeof(ptr->d.s.buf)) {
MSPH_ERR(ctx, MSPH_ERR_TOOLONG);
if (m->matchlen >= sizeof(ptr->data.str)) {
MSPH_ERR(ctx, MSPH_ERR_TOKEN_TOOLONG);
return (-1);
}
if (fromcbuf_charcpy(ptr->d.s.buf, file->buf, sizeof(file->buf),
pos_old, m->matchlen) == -1) {
MSPH_ERR(ctx, MSPH_ERR_TOOLONG);
if (fromcbuf_charcpy(ptr->data.str, file->buf,
sizeof(file->buf), pos_old, m->matchlen) == -1) {
MSPH_ERR(ctx, MSPH_ERR_TOKEN_TOOLONG);
return (-1);
}
ptr->d.s.buf[m->matchlen] = '\0';
ptr->data.str[m->matchlen] = '\0';
return (0);
case MSPH_TOKEN_SRC_STR:
@ -612,14 +677,14 @@ tok_commit(struct msph_ctx *ctx, struct msph_token_src *src,
if (! TOK_HAS_DATA(ptr->type))
return (0);
if (m->matchlen >= sizeof(ptr->d.s.buf)) {
MSPH_ERR(ctx, MSPH_ERR_TOOLONG);
if (m->matchlen >= sizeof(ptr->data.str)) {
MSPH_ERR(ctx, MSPH_ERR_TOKEN_TOOLONG);
return (-1);
}
memcpy(ptr->d.s.buf, str->s, m->matchlen *
memcpy(ptr->data.str, str->s, m->matchlen *
sizeof(str->s[0]));
ptr->d.s.buf[m->matchlen] = '\0';
ptr->data.str[m->matchlen] = '\0';
return (0);
default: