#include #include #include #include #include #include "msph/err.h" #include "msph/token.h" struct msph_matcher { size_t off; size_t matchlen; const int type; }; struct msph_matcher token_matchers[] = { { 0, 0, -1 }, { 0, 0, TOK_LBRACE }, { 0, 0, TOK_RBRACE }, { 0, 0, TOK_LBRAK }, { 0, 0, TOK_RBRAK }, { 0, 0, TOK_LPAREN }, { 0, 0, TOK_RPAREN }, { 0, 0, TOK_COLON }, { 0, 0, TOK_EQUALS }, { 0, 0, TOK_COMMA }, { 0, 0, TOK_DOT }, { 0, 0, TOK_AMP }, { 0, 0, TOK_PIPE }, { 0, 0, TOK_IMPL }, { 0, 0, TOK_RARROW }, { 0, 0, TOK_SUB }, { 0, 0, TOK_KW_TYPE }, { 0, 0, TOK_KW_NOMINAL }, { 0, 0, TOK_KW_MEMBER }, { 0, 0, TOK_KW_ASSERT }, { 0, 0, TOK_KW_BOX }, { 0, 0, TOK_KW_FORALL }, { 0, 0, TOK_CONST_TRUE }, { 0, 0, TOK_CONST_FALSE }, { 0, 0, TOK_IDENT }, { 0, 0, TOK_END } }; struct msph_matcher wspace = { 0, 0, TOK_WSPACE }; struct msph_token_info { const int type; const char *dbg_str; const char *str; } token_info[] = { #define TOK_INFO(tok, s) { tok , #tok, s } TOK_INFO(TOK_LBRACE, "{"), TOK_INFO(TOK_RBRACE, "}"), TOK_INFO(TOK_LBRAK, "["), TOK_INFO(TOK_RBRAK, "]"), TOK_INFO(TOK_LPAREN, "("), TOK_INFO(TOK_RPAREN, ")"), TOK_INFO(TOK_COLON, ":"), TOK_INFO(TOK_EQUALS, "="), TOK_INFO(TOK_COMMA, ","), TOK_INFO(TOK_DOT, "."), TOK_INFO(TOK_AMP, "&"), TOK_INFO(TOK_PIPE, "|"), TOK_INFO(TOK_IMPL, "=>"), TOK_INFO(TOK_RARROW, "->"), TOK_INFO(TOK_SUB, "<:"), TOK_INFO(TOK_KW_TYPE, "type"), TOK_INFO(TOK_KW_NOMINAL, "nominal"), TOK_INFO(TOK_KW_MEMBER, "member"), TOK_INFO(TOK_KW_ASSERT, "assert"), TOK_INFO(TOK_KW_BOX, "box"), TOK_INFO(TOK_KW_FORALL, "forall"), TOK_INFO(TOK_CONST_TRUE, "True"), TOK_INFO(TOK_CONST_FALSE, "False"), TOK_INFO(TOK_IDENT, NULL), { TOK_END , NULL, NULL } #undef TOK_INFO }; #define BUF_LEN(b) ((sizeof(b) / sizeof((b)[0]))) static ssize_t src_file_fill_buf(struct msph_ctx *, struct msph_token_src_file *); static int tok_match(struct msph_ctx *, struct msph_token_src *, struct msph_matcher *); static int tok_commit(struct msph_ctx *, struct msph_token_src *, struct msph_matcher *, struct msph_token *); static void tok_update_pos(struct msph_ctx *, struct msph_token_src *, struct msph_matcher *m); static int char_at(struct msph_ctx *, struct msph_token_src *, size_t, char *); static int fromcbuf_charcpy(char *, const char *, size_t, size_t, size_t); static int file_char_at(struct msph_ctx *, struct msph_token_src *, size_t, char *out); static int read_single_tok(struct msph_token *, struct msph_token_stream *); static const char *tok_base_str(struct msph_token *); struct msph_token_stream * msph_token_stream_file(struct msph_ctx *ctx, const char *name, FILE *f) { size_t res; struct msph_token_stream *ret; if (ctx == NULL || f == NULL) { MSPH_ERR(ctx, MSPH_ERR_INVAL); return (NULL); } if ((ret = calloc(1, sizeof(struct msph_token_stream))) == NULL) { MSPH_ERR(ctx, MSPH_ERR_SYS); goto err; } ret->ctx = ctx; if ((res = strlcpy(ret->name, name, BUF_LEN(ret->name))) >= BUF_LEN(ret->name)) { MSPH_ERR(ctx, MSPH_ERR_TOOLONG); goto err; } ret->src.type = MSPH_TOKEN_SRC_FILE; ret->src.pos = (struct msph_text_pos) { .line = 1, .col = 1 }; ret->src.inner.file.f = f; ret->src.inner.file.pos = 0; ret->src.inner.file.end = 0; return (ret); err: if (fclose(f) == EOF) abort(); if (ret != NULL) free(ret); return (NULL); } struct msph_token_stream * msph_token_stream_frombuf(struct msph_ctx *ctx, const char *name, const char *buf, size_t len) { size_t res; struct msph_token_stream *ret; if ((ret = calloc(1, sizeof(struct msph_token_stream))) == NULL) { MSPH_ERR(ctx, MSPH_ERR_SYS); return (NULL); } ret->ctx = ctx; if ((res = strlcpy(ret->name, name, BUF_LEN(ret->name))) >= BUF_LEN(ret->name)) { MSPH_ERR(ctx, MSPH_ERR_TOOLONG); goto err; } ret->src.type = MSPH_TOKEN_SRC_STR; ret->src.pos = (struct msph_text_pos) { .line = 1, .col = 1 }; ret->src.inner.str.s = buf; ret->src.inner.str.len = strnlen(buf, len); ret->src.inner.str.pos = 0; return (ret); err: free(ret); return (NULL); } ssize_t msph_token_str(char *buf, size_t len, struct msph_token *tok) { ssize_t ret; const char *base; base = tok_base_str(tok); if (base == NULL) { return (-1); } ret = (ssize_t)snprintf(buf, len, "%s", base); if (ret < 0 || ret >= (ssize_t)len) return (ret); len -= (size_t)ret; buf += ret; switch (tok->type) { case TOK_IDENT: ret += (ssize_t)snprintf(buf, len, "(%s)", tok->data.str); break; default: break; } return (ret); } #define MSPH_TOKEN_PRINT_BUF_LEN 2 * MSPH_IDENT_LEN int msph_token_stream_print(struct msph_token_stream *s, FILE *out) { ssize_t ret; struct msph_token tok; char tokstr[MSPH_TOKEN_PRINT_BUF_LEN]; while ((ret = msph_token_stream_read(&tok, 1, s)) > 0) { ret = msph_token_str(tokstr, BUF_LEN(tokstr), &tok); if (ret < 0) { MSPH_ERR_INFO(s->ctx, MSPH_ERR_TOKEN_INVAL, tok.type); break; } if ((size_t)ret < BUF_LEN(tokstr)) fprintf(out, "%s\n", tokstr); else fprintf(out, "%s...(trunkated)", tokstr); } return ((int)ret); } int msph_token_stream_close(struct msph_token_stream *s) { int ret; ret = -1; switch (s->src.type) { case MSPH_TOKEN_SRC_FILE: ret = fclose(s->src.inner.file.f); break; case MSPH_TOKEN_SRC_STR: ret = 0; break; default: break; } return (ret); } /* read at most n tokens from s into p. * return -1 on error, or num tokens read */ ssize_t msph_token_stream_read(struct msph_token *ptr, size_t n, struct msph_token_stream *s) { size_t ret; int res; ret = 0; res = -1; while (ret < n && (res = read_single_tok(&ptr[ret], s)) != 0) { if (res == -1) return (-1); ret++; } return ((ssize_t)ret); } /* 1: matched token, 0: failed to match token, -1: error */ static int read_single_tok(struct msph_token *ptr, struct msph_token_stream *s) { int res; size_t m; size_t max_m; struct msph_ctx *ctx; struct msph_token_src *src; ctx = s->ctx; src = &s->src; /* Skipping whitespace */ if (tok_match(ctx, src, &wspace) == -1) return (-1); SPHO_DEBUG_PRINT("wspace.matchlen=%zu\n", wspace.matchlen); if (wspace.matchlen > 0 && tok_commit(ctx, src, &wspace, NULL) == -1) return (-1); max_m = 0; for (m = 1; token_matchers[m].type != TOK_END; m++) { res = tok_match(ctx, src, &token_matchers[m]); if (res == -1) return (-1); if (res == 0 && token_matchers[m].matchlen > token_matchers[max_m].matchlen) { max_m = m; } } if (max_m == 0) { if (msph_token_stream_eof(s)) return (0); MSPH_ERR(s->ctx, MSPH_ERR_TOKEN_NOMATCH); return (-1); } if (tok_commit(ctx, src, &token_matchers[max_m], ptr) == -1) return (-1); return (1); } int msph_token_stream_eof(struct msph_token_stream *s) { struct msph_token_src_file *file; struct msph_token_src_str *str; switch (s->src.type) { case MSPH_TOKEN_SRC_FILE: file = &s->src.inner.file; return (file->pos == file->end && feof(file->f)); case MSPH_TOKEN_SRC_STR: str = &s->src.inner.str; return (str->pos == str->len); default: MSPH_ERR(s->ctx, MSPH_ERR_INVAL); return (-1); } } struct msph_token * msph_token_copy(struct msph_ctx *ctx, struct msph_token *token) { size_t i; struct msph_token *copy; struct msph_token_info *info; info = NULL; for (i = 0; token_info[i].type != TOK_END; i++) { if (token_info[i].type == token->type) { info = &token_info[i]; break; } } if (info == NULL) { MSPH_ERR_INFO(ctx, MSPH_ERR_TOKEN_INVAL, token->type); return (NULL); } if ((copy = malloc(sizeof(*copy))) == NULL) { MSPH_ERR(ctx, MSPH_ERR_SYS); return (NULL); } memcpy(copy, token, sizeof(*copy)); return (copy); } static ssize_t src_file_fill_buf(struct msph_ctx *ctx, struct msph_token_src_file *file) { ssize_t ret; size_t nread, maxread; ret = nread = maxread = 0; do { if (file->end < file->pos) maxread = file->pos - file->end; else maxread = BUF_LEN(file->buf) - file->end; if (maxread == 0) { MSPH_ERR(ctx, MSPH_ERR_TOKEN_TOOLONG); return (-1); } nread = fread(&file->buf[file->end], sizeof(file->buf[0]), maxread, file->f); ret += nread; file->end = (file->end + nread) % BUF_LEN(file->buf); SPHO_DEBUG_PRINT("src_file_fill_buf: valid range (%zu, %zu)\n", file->pos, file->end); if (nread < maxread) { if (ferror(file->f)) { MSPH_ERR(ctx, MSPH_ERR_SYS); return (-1); } break; } } while (file->end != file->pos); return (ret); } /* reads a single char from the circular buffer in src */ static int file_char_at(struct msph_ctx *ctx, struct msph_token_src *src, size_t i, char *out) { ssize_t fill; struct msph_token_src_file *file; SPHO_PRECOND(src != NULL); SPHO_PRECOND(src->type == MSPH_TOKEN_SRC_FILE); file = &src->inner.file; fill = 0; do { /* simplest case */ if (file->pos + i < file->end) { *out = file->buf[file->pos + i]; return (1); } /* wrap around */ if (file->end < file->pos && ((file->pos + i) % BUF_LEN(file->buf)) < file->end) { *out = file->buf[(file->pos + i) % BUF_LEN(file->buf)]; return (1); } if (feof(file->f)) return (0); if ((fill = src_file_fill_buf(ctx, file)) == -1) return (-1); } while (fill > 0); return (0); } static int char_at(struct msph_ctx *ctx, struct msph_token_src *src, size_t i, char *out) { int ret; struct msph_token_src_str *str; ret = -1; switch (src->type) { case MSPH_TOKEN_SRC_FILE: ret = file_char_at(ctx, src, i, out); break; case MSPH_TOKEN_SRC_STR: str = &src->inner.str; if (str->pos + i < str->len) { *out = str->s[str->pos + i]; ret = 1; } else { ret = 0; } break; default: break; } #ifdef SPHO_ENABLE_DEBUG_PRINT if (isspace(*out)) { const char *charrep; switch (*out) { case '\n': charrep = "\\n"; break; case '\t': charrep = "\\t"; break; case '\r': charrep = "\\r"; break; case '\v': charrep = "\\v"; break; case '\f': charrep = "\\f"; break; default: charrep = "WOOOOOOOOOOPS"; break; } SPHO_DEBUG_PRINT("char_at: ret=%d, *out=%s\n", ret, charrep); } else { SPHO_DEBUG_PRINT("char_at: ret=%d, *out=%c\n", ret, *out); } #endif return (ret); } static int fromcbuf_charcpy(char *dst, const char *src, size_t src_len, size_t src_pos, size_t ncpy) { size_t cpy1, cpy2; if (src_len < ncpy) { return (-1); } cpy1 = (src_pos + ncpy < src_len) ? ncpy : src_len - src_pos; cpy2 = ncpy - cpy1; SPHO_DEBUG_PRINT("fromcbuf_charcpy: cpy1=%zu cpy2=%zu\n", cpy1, cpy2); memcpy(dst, &src[src_pos], cpy1 * sizeof(src[0])); if (! cpy2) return (0); memcpy(dst, &src[0], cpy2 * sizeof(src[0])); return (0); } static int tok_match(struct msph_ctx *ctx, struct msph_token_src *src, struct msph_matcher *m) { int res; char chr; const char *match_str; size_t off, len; SPHO_PRECOND(m != NULL && src != NULL); m->matchlen = 0; #define MATCH_CHAR(c) \ do { \ SPHO_DEBUG_PRINT("tok_match: '%c'\n", c); \ if ((res = char_at(ctx, src, 0, &chr)) == -1) \ return (-1); \ else if (res == 0) \ return (0); \ SPHO_DEBUG_PRINT("tok_match: char_at(0)='%c'\n", c); \ \ if (chr == (c)) { \ m->matchlen = 1; \ } \ SPHO_DEBUG_PRINT("tok_match: matchlen=%zu\n", m->matchlen); \ return (0); \ } while (0) #define MATCH_STR(str) \ do { \ match_str = str; \ len = strlen(match_str); \ for (off = 0; off < len; off++) { \ if ((res = char_at(ctx, src, off, &chr)) == -1) \ return (-1); \ else if (res == 0) \ return (0); \ \ if (chr != match_str[off]) \ break; \ } \ if (off == len) \ m->matchlen = len; \ return (0); \ } while (0) switch (m->type) { case TOK_LBRACE: MATCH_CHAR('{'); case TOK_RBRACE: MATCH_CHAR('}'); case TOK_LBRAK: MATCH_CHAR('['); case TOK_RBRAK: MATCH_CHAR(']'); case TOK_LPAREN: MATCH_CHAR('('); case TOK_RPAREN: MATCH_CHAR(')'); case TOK_COLON: MATCH_CHAR(':'); case TOK_DOT: MATCH_CHAR('.'); case TOK_COMMA: MATCH_CHAR(','); case TOK_EQUALS: MATCH_CHAR('='); case TOK_AMP: MATCH_CHAR('&'); case TOK_PIPE: MATCH_CHAR('|'); case TOK_IMPL: MATCH_STR("=>"); case TOK_RARROW: MATCH_STR("->"); case TOK_SUB: MATCH_STR("<:"); case TOK_KW_TYPE: MATCH_STR("type"); case TOK_KW_NOMINAL: MATCH_STR("nominal"); case TOK_KW_MEMBER: MATCH_STR("member"); case TOK_KW_ASSERT: MATCH_STR("assert"); case TOK_KW_BOX: MATCH_STR("box"); case TOK_KW_FORALL: MATCH_STR("forall"); case TOK_CONST_TRUE: MATCH_STR("True"); case TOK_CONST_FALSE: MATCH_STR("False"); case TOK_IDENT: off = 0; while ((res = char_at(ctx, src, off++, &chr)) == 1) { if (! isalnum(chr)) break; m->matchlen++; } if (res == -1) return (-1); return (0); case TOK_WSPACE: off = 0; while((res = char_at(ctx, src, off++, &chr)) == 1) { if (! isspace(chr)) break; m->matchlen++; } if (res == -1) return (-1); return (0); default: SPHO_ASSERT(0); return (-1); break; } #undef MATCH_CHAR #undef MATCH_STR } #define TOK_HAS_DATA(type) (type == TOK_IDENT) static int tok_commit(struct msph_ctx *ctx, struct msph_token_src *src, struct msph_matcher *m, struct msph_token *ptr) { size_t pos_old; struct msph_text_pos tok_pos; struct msph_token_src_str *str; struct msph_token_src_file *file; SPHO_PRECOND(ctx != NULL && m != NULL); SPHO_PRECOND(m->matchlen != 0); tok_pos = src->pos; tok_update_pos(ctx, src, m); switch (src->type) { case MSPH_TOKEN_SRC_FILE: file = &src->inner.file; pos_old = file->pos; file->pos += m->matchlen; file->pos %= BUF_LEN(file->buf); SPHO_ASSERT(file->pos <= file->end || (file->pos < pos_old && file->pos < BUF_LEN(file->buf))); if (ptr == NULL) return (0); ptr->type = m->type; ptr->pos = tok_pos; if (! TOK_HAS_DATA(ptr->type)) return (0); if (m->matchlen >= sizeof(ptr->data.str)) { MSPH_ERR(ctx, MSPH_ERR_TOKEN_TOOLONG); return (-1); } if (fromcbuf_charcpy(ptr->data.str, file->buf, sizeof(file->buf), pos_old, m->matchlen) == -1) { MSPH_ERR(ctx, MSPH_ERR_TOKEN_TOOLONG); return (-1); } ptr->data.str[m->matchlen] = '\0'; return (0); case MSPH_TOKEN_SRC_STR: str = &src->inner.str; pos_old = str->pos; str->pos += m->matchlen; SPHO_ASSERT(str->pos <= str->len); if (ptr == NULL) return (0); ptr->type = m->type; ptr->pos = tok_pos; if (! TOK_HAS_DATA(ptr->type)) return (0); if (m->matchlen >= sizeof(ptr->data.str)) { MSPH_ERR(ctx, MSPH_ERR_TOKEN_TOOLONG); return (-1); } memcpy(ptr->data.str, str->s, m->matchlen * sizeof(str->s[0])); ptr->data.str[m->matchlen] = '\0'; return (0); default: return (-1); } } static void tok_update_pos(struct msph_ctx *ctx, struct msph_token_src *src, struct msph_matcher *m) { int res; char c; size_t i; for (i = 0; i < m->matchlen; i++) { res = char_at(ctx, src, i, &c); SPHO_ASSERT(res == 1); switch (c) { case '\t': src->pos.col += MSPH_TAB_WIDTH; break; case '\n': src->pos.line++; src->pos.col = 1; break; case '\r': break; default: src->pos.col++; break; } } } static const char * tok_base_str(struct msph_token *tok) { size_t i; for (i = 0; token_info[i].type != TOK_END; i++) { if (token_info[i].type == tok->type) return (token_info[i].dbg_str); } return (NULL); }