diff --git a/CMakeLists.txt b/CMakeLists.txt index f3c8263..34e7f44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,4 +55,5 @@ set(MSPH_HEADER ) add_executable(msph ${MSPH_HEADER} ${MSPH_SRC}) -target_include_directories(spho PRIVATE ${INCLUDE_DIR}) +target_include_directories(msph PRIVATE ${INCLUDE_DIR}) +target_link_libraries(devcheck spho) diff --git a/src/msph/msph_token.c b/src/msph/msph_token.c index 08c21aa..f69c8b6 100644 --- a/src/msph/msph_token.c +++ b/src/msph/msph_token.c @@ -3,9 +3,11 @@ #include #include #include +#include #include "spho/err.h" +#include "msph_token.h" #define MSPH_ERR_SYS 0x0001 @@ -14,6 +16,31 @@ #define MSPH_ERR(ctx, e) SPHO_ERR(ctx, e) #define MSPH_TOKS_ERR(toks, e) MSPH_ERR((toks)->ctx, e) +struct msph_token_matcher { + size_t off; + size_t matchlen; + + const int type; +} msph_matcher[] = { + { 0, 0, TOK_START }, + { 0, 0, TOK_IDENT }, + { 0, 0, TOK_END } +}; + +static ssize_t src_file_fill_buf(struct msph_ctx *, + struct msph_token_src_file *); +static int tok_match(struct msph_ctx *, struct msph_token_src *, + struct msph_token_matcher *); +static int tok_commit(struct msph_ctx *, struct msph_token_src *, + struct msph_token_matcher *, struct msph_token *); +static int char_at(struct msph_ctx *, struct msph_token_src *, size_t, + char *); +static int fromcbuf_charcpy(char *, const char *, size_t, size_t, size_t); +static int file_char_at(struct msph_ctx *, struct msph_token_src *, size_t, + char *out); +static int read_single_tok(struct msph_token *, struct msph_token_stream *); + + struct msph_token_stream * msph_token_stream_fopen(struct msph_ctx *ctx, const char *path) { @@ -34,13 +61,12 @@ msph_token_stream_fopen(struct msph_ctx *ctx, const char *path) ret->ctx = ctx; ret->src.type = MSPH_TOKEN_SRC_FILE; ret->src.inner.file.f = f; - ret->src.inner.file.eof = 0; - ret->src.inner.file.pos = ret->src.file.buf; - ret->src.inner.file.end = ret->src.file.buf; - ret->src.inner.file.read_ptr = ret->src.file.buf; + ret->src.inner.file.pos = 0; + ret->src.inner.file.end = 0; - res = strlcpy(ret->src.file.name, path, sizeof(ret->src.file.name)); - if (res >= sizeof(ret->src.file.name)) { + res = strlcpy(ret->src.inner.file.name, path, + sizeof(ret->src.inner.file.name)); + if (res >= sizeof(ret->src.inner.file.name)) { MSPH_ERR(ctx, MSPH_ERR_TOOLONG); goto err; } @@ -67,10 +93,10 @@ msph_token_stream_frombuf(struct msph_ctx *ctx, const char *buf, size_t len) } ret->ctx = ctx; - ret->type = MSPH_TOKEN_SRC_STR; - ret->src.str.s = buf; - ret->src.str.len = len; - ret->src.str.pos = 0; + ret->src.type = MSPH_TOKEN_SRC_STR; + ret->src.inner.str.s = buf; + ret->src.inner.str.len = len; + ret->src.inner.str.pos = 0; return (ret); } @@ -83,9 +109,9 @@ msph_token_stream_close(struct msph_token_stream *s) ret = -1; - switch (s->type) { + switch (s->src.type) { case MSPH_TOKEN_SRC_FILE: - ret = fclose(s->src.file.f); + ret = fclose(s->src.inner.file.f); break; case MSPH_TOKEN_SRC_STR: ret = 0; @@ -97,7 +123,6 @@ msph_token_stream_close(struct msph_token_stream *s) return (ret); } - /* -1 or num tokens read */ ssize_t msph_token_stream_read_tok(struct msph_token *ptr, size_t n, @@ -120,102 +145,6 @@ msph_token_stream_read_tok(struct msph_token *ptr, size_t n, return (ret); } -struct msph_token_matcher { - size_t pos_off; - size_t matchlen; - - const int tok_type; -} msph_matcher[] = { - { 0, 0, TOK_START }, - { 0, 0, TOK_IDENT }, - { 0, 0, TOK_END } -}; - -#define BUF_LEN(b) (sizeof(b) / sizeof(b[0])) -static int -file_char_at(struct msph_ctx *ctx, struct msph_token_src *src, size_t i, - char *out) -{ - size_t nread; - size_t maxread; - struct msph_token_src_file *file; - - ret = -1; - file = &src->inner.file; - - if (file->pos + i < file->end) { - *out = file->buf[file->pos + i]; - return (0); - } - if (file->end < file->pos && - ((file->pos + i) % BUF_LEN(file->buf)) < file->end) { - *out = file->buf[(file->pos + i) % BUF_LEN(file->buf)]; - return (0); - } - - if (file->eof) { - return (-1); - } - - if (file->end < file->pos) - maxread = file->pos - file->end; - else - maxread = BUF_LEN(file->buf) - file->end; - - maxread = file->end < file->pos ? file->pos - file->end : - BUF_LEN(file->buf) - file->end; - - if (maxread == 0) { - MSPH_ERR(ctx, MSPH_ERR_TOOLONG); - return (-1); - } - - ret = fread(&file->buf[file->end], sizeof(file->buf[0]), maxread, - file->f); - - if (ret == 0) { - if (ferror(file->f)) { - MSPH_ERR(ctx, MSPH_ERR_SYS); - return (-1); - } - file->eof = 1; - return (-1); - } -} - -static int -char_at(struct msph_token_src *src, size_t i, char *out) -{ - int ret; - - switch (src->type) { - case MSPH_TOKEN_SRC_FILE: - ret = file_char_at(s, i, out); - break; - case MSPH_TOKEN_SRC_STR: - ret = str_char_at(s, i, out); - break; - default: - break; - } - - return (ret); -} - -static int -tok_match(struct msph_token_matcher *m, struct msph_token_stream *s) -{ -} - -static void -tok_commit(struct msph_token *ptr, struct msph_token_stream *s, - struct msph_matcher *m) -{ - SPHO_PRECOND(p != NULL && m != NULL); - SPHO_PRECOND(m->matchlen != 0); - -} - /* 1: success, 0: failed match, -1: error */ static int read_single_tok(struct msph_token *ptr, struct msph_token_stream *s) @@ -223,10 +152,15 @@ read_single_tok(struct msph_token *ptr, struct msph_token_stream *s) int res; size_t m; size_t max_m; + struct msph_ctx *ctx; + struct msph_token_src *src; + + ctx = s->ctx; + src = &s->src; max_m = 0; for (m = 0; msph_matcher[m].type != TOK_END; m++) { - res = tok_match(&msph_matcher[m], s); + res = tok_match(ctx, src, &msph_matcher[m]); if (res == -1) return (-1); @@ -240,8 +174,306 @@ read_single_tok(struct msph_token *ptr, struct msph_token_stream *s) if (max_m == 0) return (0); - tok_commit(ptr, &msph_matcher[max_m]); + if (tok_commit(ctx, src, &msph_matcher[max_m], ptr) == -1) + return (-1); return (1); } + +#define BUF_LEN(b) (sizeof(b) / sizeof((b)[0])) + +static ssize_t +src_file_fill_buf(struct msph_ctx *ctx, struct msph_token_src_file *file) +{ + ssize_t ret; + size_t nread, maxread; + + ret = nread = maxread = 0; + do { + if (file->end < file->pos) + maxread = file->pos - file->end; + else + maxread = BUF_LEN(file->buf) - file->end; + + if (maxread == 0) { + MSPH_ERR(ctx, MSPH_ERR_TOOLONG); + return (-1); + } + + nread = fread(&file->buf[file->end], sizeof(file->buf[0]), + maxread, file->f); + + ret += nread; + file->end = (file->end + nread) % BUF_LEN(file->buf); + + if (nread < maxread) { + if (ferror(file->f)) { + MSPH_ERR(ctx, MSPH_ERR_SYS); + return (-1); + } + break; + } + + } while (file->end != file->pos); + + return (ret); +} + +/* reads a single char from the circular buffer in src */ +static int +file_char_at(struct msph_ctx *ctx, struct msph_token_src *src, size_t i, + char *out) +{ + int ret; + ssize_t fill; + struct msph_token_src_file *file; + + SPHO_PRECOND(s != NULL); + SPHO_PRECOND(s->src.type == MSPH_TOKEN_SRC_FILE); + + ret = 0; + file = &src->inner.file; + + do { + /* simplest case */ + if (file->pos + i < file->end) { + *out = file->buf[file->pos + i]; + return (1); + } + /* wrap around */ + if (file->end < file->pos && + ((file->pos + i) % BUF_LEN(file->buf)) < file->end) { + *out = file->buf[(file->pos + i) % BUF_LEN(file->buf)]; + return (1); + } + + if (feof(file->f)) + return (0); + if (src_file_fill_buf(ctx, file) == -1) + return (-1); + } while (ret++); + + return (-1); +} + +static int +char_at(struct msph_ctx *ctx, struct msph_token_src *src, size_t i, char *out) +{ + int ret; + struct msph_token_src_str *str; + + switch (src->type) { + case MSPH_TOKEN_SRC_FILE: + return (file_char_at(ctx, src, i, out)); + case MSPH_TOKEN_SRC_STR: + str = &src->inner.str; + if (str->pos + i < str->len) { + *out = str->s[str->pos + i]; + return (1); + } + return (0); + default: + break; + } + + return (ret); +} + +static int +fromcbuf_charcpy(char *dst, const char *src, size_t src_len, size_t src_pos, + size_t ncpy) +{ + size_t cpy1, cpy2; + if (src_len < ncpy) { + return (-1); + } + + cpy1 = src_pos + ncpy < src_len ? ncpy : src_len - src_pos; + cpy2 = ncpy - cpy1; + + memcpy(dst, &src[src_pos], cpy1 * sizeof(src[0])); + + if (! cpy2) + return (0); + + memcpy(dst, &src[0], cpy2 * sizeof(src[0])); + + return (0); +} + +static int +tok_match(struct msph_ctx *ctx, struct msph_token_src *src, + struct msph_token_matcher *m) +{ + int res; + int more; + char chr; + const char *match_str; + size_t off, len; + SPHO_PRECOND(m != NULL && s != NULL); + + m->matchlen = 0; + +#define MATCH_CHAR(c) \ + do { \ + if ((res = char_at(ctx, src, 0, &chr)) == -1) \ + return (-1); \ + else if (res == 0) \ + return (0); \ +\ + if (chr == (c)) { \ + m->matchlen = 1; \ + } \ + return (0); \ + } while (0) + +#define MATCH_STR(str) \ + do { \ + match_str = str; \ + len = strlen(match_str); \ + for (off = 0; off < len; off++) { \ + if ((res = char_at(ctx, src, off, &chr)) == -1) \ + return (-1); \ + else if (res == 0) \ + return (0); \ +\ + if (chr != match_str[off]) \ + break; \ + } \ + if (off == len) \ + m->matchlen = len; \ + return (0); \ + } while (0) + + switch (m->type) { + case TOK_LBRACE: + MATCH_CHAR('{'); + case TOK_RBRACE: + MATCH_CHAR('}'); + case TOK_LBRAK: + MATCH_CHAR('['); + case TOK_RBRAK: + MATCH_CHAR(']'); + case TOK_LPAREN: + MATCH_CHAR('('); + case TOK_RPAREN: + MATCH_CHAR(')'); + case TOK_COLON: + MATCH_CHAR(':'); + case TOK_EQUALS: + MATCH_CHAR('='); + case TOK_AMP: + MATCH_CHAR('&'); + case TOK_PIPE: + MATCH_CHAR('|'); + case TOK_RARROW: + MATCH_STR("=>"); + case TOK_SUB: + MATCH_STR("<:"); + case TOK_KW_TYPE: + MATCH_STR("type"); + case TOK_KW_NOMINAL: + MATCH_STR("nominal"); + case TOK_KW_MEMBER: + MATCH_STR("member"); + case TOK_KW_CHECK: + MATCH_STR("check"); + case TOK_KW_BOX: + MATCH_STR("box"); + case TOK_KW_FORALL: + MATCH_STR("forall"); + case TOK_CONST_TRUE: + MATCH_STR("True"); + case TOK_CONST_FALSE: + MATCH_STR("False"); + case TOK_IDENT: + off = 0; + while ((res = char_at(ctx, src, off++, &chr)) == 1) { + if (! isalnum(chr)) + break; + m->matchlen++; + } + if (res == -1) + return (-1); + return (0); + default: + SPHO_ASSERT(0); + return (-1); + break; + } +#undef MATCH_CHAR +#undef MATCH_STR +} + +#define TOK_HAS_DATA(type) (type == TOK_IDENT) +static int +tok_commit(struct msph_ctx *ctx, struct msph_token_src *src, + struct msph_token_matcher *m, struct msph_token *ptr) +{ + size_t pos_old; + struct msph_token_src_str *str; + struct msph_token_src_file *file; + + SPHO_PRECOND(p != NULL && m != NULL); + SPHO_PRECOND(m->matchlen != 0); + + switch (src->type) { + case MSPH_TOKEN_SRC_FILE: + file = &src->inner.file; + pos_old = file->pos; + + file->pos += m->matchlen; + file->pos %= BUF_LEN(file->buf); + SPHO_ASSERT(file->pos < BUF_LEN(file->buf) || + file->pos < pos_old); + + ptr->type = m->type; + if (! TOK_HAS_DATA(ptr->type)) + return (0); + + if (m->matchlen >= sizeof(ptr->d.s.buf)) { + MSPH_ERR(ctx, MSPH_ERR_TOOLONG); + return (-1); + } + + if (fromcbuf_charcpy(ptr->d.s.buf, file->buf, sizeof(file->buf), + pos_old, m->matchlen) == -1) { + MSPH_ERR(ctx, MSPH_ERR_TOOLONG); + return (-1); + } + + ptr->d.s.buf[m->matchlen] = '\0'; + return (0); + + case MSPH_TOKEN_SRC_STR: + str = &src->inner.str; + pos_old = str->pos; + + str->pos += m->matchlen; + SPHO_ASSERT(str->pos <= str->len); + + ptr->type = m->type; + if (! TOK_HAS_DATA(ptr->type)) + return (0); + + if (m->matchlen >= sizeof(ptr->d.s.buf)) { + MSPH_ERR(ctx, MSPH_ERR_TOOLONG); + return (-1); + } + + memcpy(ptr->d.s.buf, str->s, m->matchlen * + sizeof(str->s[0])); + ptr->d.s.buf[m->matchlen] = '\0'; + + return (0); + default: + return (-1); + } +} + + + + + + diff --git a/src/msph/msph_token.h b/src/msph/msph_token.h index 1e7a996..7207052 100644 --- a/src/msph/msph_token.h +++ b/src/msph/msph_token.h @@ -92,13 +92,13 @@ enum msph_tok_type { TOK_RBRAK, // ] TOK_LPAREN, // ( TOK_RPAREN, // ) - TOK_OP_COLON, // : - TOK_OP_EQUALS, // = + TOK_COLON, // : + TOK_EQUALS, // = - TOK_OP_AMP, // & - TOK_OP_PIPE, // | - TOK_OP_RARROW, // => - TOK_OP_SUB, // <: + TOK_AMP, // & + TOK_PIPE, // | + TOK_RARROW, // => + TOK_SUB, // <: TOK_KW_TYPE, // type TOK_KW_NOMINAL, // nominal @@ -135,12 +135,13 @@ struct msph_token { struct msph_token_src_file { FILE *f; - int eof; - size_t pos; // TODO rename bufpos - size_t end; // TODO rename bufend - size_t read_pos; + + /* circular buffer for reading */ + size_t pos; + size_t end; char buf[MSPH_FILE_BUF_LEN]; + /* file path */ char name[MSPH_PATH_LEN]; }; @@ -157,7 +158,7 @@ union msph_token_src_data { }; struct msph_token_src { - int type + int type; union msph_token_src_data inner; }; @@ -174,6 +175,8 @@ struct msph_token_stream *msph_token_stream_frombuf(struct msph_ctx *, int msph_token_stream_close(struct msph_token_stream*); -struct msph_token *msph_token_source_pop(struct msph_token_stream *); +ssize_t msph_token_stream_read_tok(struct msph_token *, size_t, + struct msph_token_stream *); + #endif /* _MSPH_EXPR_H */