git clone https://orangeshoelaces.net/git/tttm.git
Author: Vasily Kolobkov on 05/13/2016
Committer: Vasily Kolobkov on 05/13/2016
Parse input online
Shove larger lines to a memory mapped cache file.
errors.h | 15 +
laxsrc.c | 101 +
laxsrc.h | 10 +
parser.c | 778 +++++---
parser.h | 14 +-
pshades.c | 72 +-
6 files changed, 712 insertions(+), 278 deletions(-)
diff --git a/errors.h b/errors.h
new file mode 100644
index 0000000..03a3233
--- /dev/null
+++ b/errors.h
@@ -0,0 +1,15 @@
+enum {
+ TE_OK = 0,
+
+ TE_EOF,
+ TE_IO,
+ TE_TIMEOUT,
+ TE_BUFOFLOW,
+
+ TE_PARSE,
+ TE_CACHEIO,
+ TE_CACHEOFLOW,
+ TE_PTOFLOW,
+ TE_VM,
+ TE_XLSTR,
+};
diff --git a/laxsrc.c b/laxsrc.c
new file mode 100644
index 0000000..f680844
--- /dev/null
+++ b/laxsrc.c
@@ -0,0 +1,101 @@
+#include <poll.h>
+#include <stddef.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include "errors.h"
+#include "laxsrc.h"
+
+#define LEN(x) (sizeof(x) / sizeof((x)[0]))
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+
+static int timeout = 10000;
+
+void
+laxsrc_init(struct laxsrc *s, int fd)
+{
+ bzero(s, sizeof(*s));
+ s->fd = fd;
+}
+
+int
+laxsrc_read(struct laxsrc *s, char *dst, size_t lo, size_t hi, int *e)
+{
+ size_t boff;
+ size_t len;
+ struct pollfd pd;
+ int ready;
+ ssize_t n;
+
+ *e = TE_OK;
+ len = 0;
+ pd.fd = s->fd;
+ pd.events = POLLIN | POLLHUP;
+
+ if (s->buflen > 0) {
+ boff = LEN(s->buf) - s->buflen;
+ len = MIN(s->buflen, hi);
+ memcpy(dst, s->buf + boff, len);
+ s->buflen -= len;
+ }
+
+ if (s->eof) goto eof;
+
+ while (len < lo) {
+ ready = poll(&pd, 1, timeout);
+
+ if (ready == -1) goto eio;
+ if (ready == 0) goto eto;
+
+ if (pd.revents & (POLLIN | POLLHUP)) {
+ n = read(pd.fd, dst + len, hi - len);
+ if (n == -1) {
+ goto eio;
+ } else if (n == 0) {
+ goto eof;
+ } else {
+ len += n;
+ }
+ }
+ }
+
+ exit:
+ return len;
+ eio:
+ *e = TE_IO;
+ len = 0;
+ goto exit;
+ eto:
+ *e = TE_TIMEOUT;
+ len = 0;
+ goto exit;
+ eof:
+ if (len < lo) {
+ *e = TE_EOF;
+ len = 0;
+ } else {
+ *e = TE_OK;
+ }
+ s->eof = 1;
+ goto exit;
+}
+
+int
+laxsrc_putback(struct laxsrc *s, char *chunk, size_t len)
+{
+ int e;
+ size_t boff;
+
+ e = TE_OK;
+ if (s->buflen + len > LEN(s->buf))
+ goto eoflow;
+ s->buflen += len;
+ boff = LEN(s->buf) - s->buflen;
+ memcpy(s->buf + boff, chunk, len);
+ exit:
+ return e;
+ eoflow:
+ e = TE_BUFOFLOW;
+ goto exit;
+}
diff --git a/laxsrc.h b/laxsrc.h
new file mode 100644
index 0000000..b76eaac
--- /dev/null
+++ b/laxsrc.h
@@ -0,0 +1,10 @@
+struct laxsrc {
+ int fd;
+ int eof;
+ char buf[1024];
+ size_t buflen;
+};
+
+void laxsrc_init(struct laxsrc *, int);
+int laxsrc_read(struct laxsrc *, char *, size_t, size_t, int *);
+int laxsrc_putback(struct laxsrc *, char *, size_t);
diff --git a/parser.c b/parser.c
index 0208efc..6f7ba02 100644
--- a/parser.c
+++ b/parser.c
@@ -8,10 +8,21 @@
#include <stdlib.h>
#include <string.h>
#include <strings.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include "errors.h"
+#include "laxsrc.h"
#include "parser.h"
-#define LEN(a) (sizeof(a) / sizeof(a)[0])
+#define LEN(x) (sizeof(x) / sizeof((x)[0]))
+#define MAX(a, b) ((a) >= (b) ? (a) : (b))
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+
+#define ASCII_CTL \
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
static struct literal {
int val;
@@ -112,51 +123,59 @@ static struct literal {
{ IL_VIDEO, "VIDEO", 5 },
};
-static const char astr_specials[] = ""
- "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- " \"%()*\\{\x7f";
+static const char astr_specials[] = ASCII_CTL " \"%()*\\{\x7f";
-static const char atom_specials[] = ""
- "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- " \"%()*\\]{\x7f";
+static const char atom_specials[] = ASCII_CTL " \"%()*\\]{\x7f";
-static const char tag_specials[] = ""
- "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- " \"%()*+\\{\x7f";
+static const char tag_specials[] = ASCII_CTL " \"%()*+\\{\x7f";
+
+static const size_t wndcap = 1024;
struct parcur {
- const char *tok;
+ size_t off;
union parnode *pt;
};
struct parctx {
struct parcur cur;
- const char *tok;
- const char *tokend;
- size_t toklen;
+ int e;
+ char *wnd;
+ size_t wndlee;
+ size_t wndoff;
union parnode *pt;
union parnode *ptend;
size_t ptlen;
- int e;
- size_t oblth;
+ char *buf;
+ size_t buflen;
+ int cache;
+ off_t corig;
+ struct laxsrc *in;
+ size_t strlen;
+ enum { SS_MEM, SS_CACHE } strstor;
};
typedef int parfn(struct parctx *);
-int par_respln(const char *, size_t, union parnode *, size_t, size_t);
+int par_readln(struct laxsrc *, int, char *, size_t, union parnode *, size_t);
static int cmpchr(const void *, const void *);
static int contains(const char *, size_t, char);
+static void par_init(struct parctx *, struct laxsrc *, int,
+ char *, size_t, union parnode *, size_t);
+static int par_procure(struct parctx *, size_t, size_t);
+static int par_movewnd(struct parctx *, size_t);
+static int par_primecache(struct parctx *);
+static int par_prycache(struct parctx *, size_t);
+static int par_readnup(struct parctx *, size_t);
+static int par_peek(struct parctx *, size_t, char *);
+static int par_backup(struct parctx *);
+
static int p_chk(struct parctx *, struct parcur *);
static int p_insint(struct parctx *, int, size_t);
static int p_inslit(struct parctx *, int);
static int p_insnum(struct parctx *, uint32_t);
-static int p_insoblstr(struct parctx *, uint32_t);
-static int p_insstr(struct parctx *, int, const char *, uint32_t);
+static int p_insstr(struct parctx *, int, size_t, uint32_t);
static int p_beg(struct parctx *, struct parcur *, int);
static int p_end(struct parctx *, struct parcur *);
static int p_rwd(struct parctx *, struct parcur *);
@@ -297,14 +316,16 @@ static int p_opar(struct parctx *);
static int p_sp(struct parctx *);
int
-par_respln(const char *tok, size_t toklen, union parnode *pt,
- size_t ptlen, size_t oblth)
+par_readln(struct laxsrc *in, int cache, char *buf, size_t buflen,
+ union parnode *pt, size_t ptlen)
{
- struct parctx p = { { tok, pt}, tok, tok + toklen, toklen,
- pt, pt + ptlen, ptlen, 0, oblth };
+ int e;
+ struct parctx p;
+ par_init(&p, in, cache, buf, buflen, pt, ptlen);
p_respln(&p);
- return p.e;
+ e = par_backup(&p);
+ return e ? e : p.e;
}
int
@@ -319,6 +340,213 @@ contains(const char *s, size_t slen, char c)
return bsearch(&c, s, slen, sizeof(char), cmpchr) != 0;
}
+void
+par_init(struct parctx *p, struct laxsrc *in, int cache,
+ char *buf, size_t buflen, union parnode *pt, size_t ptlen)
+{
+ bzero(p, sizeof(*p));
+ p->cur.pt = pt;
+ p->wnd = buf;
+ p->pt = pt;
+ p->ptend = pt + ptlen;
+ p->ptlen = ptlen;
+ p->buf = buf;
+ p->buflen = buflen;
+ p->cache = cache;
+ p->in = in;
+ p->strstor = SS_MEM;
+}
+
+/*
+ Prepare window for accessing string symbols with indexes [off..off + len).
+ When reading from source to fulfill the request, no less than the needed
+ amount and anything over that that fits the window is transfered.
+
+ precond: len <= wndcap
+*/
+int
+par_procure(struct parctx *p, size_t off, size_t len)
+{
+ int e;
+ size_t tend;
+ int wndmiss, readbound;
+
+ e = TE_OK;
+ tend = off + len;
+
+ if (off >= p->wndoff &&
+ off + len <= p->wndoff + p->wndlee)
+ goto exit;
+
+ wndmiss = off < p->wndoff || tend > p->wndoff + wndcap;
+ if (wndmiss && (e = par_movewnd(p, off)))
+ goto exit;
+
+ readbound = tend > p->strlen;
+ if (readbound)
+ e = par_readnup(p, tend - p->strlen);
+
+ exit:
+ return e;
+}
+
+int
+par_movewnd(struct parctx *p, size_t off)
+{
+ int e;
+ char *ci;
+
+ e = TE_OK;
+ if (p->strstor == SS_MEM && off + wndcap <= p->buflen) {
+ p->wnd = p->buf + off;
+ goto finwnd;
+ }
+
+ if (p->strstor == SS_MEM) {
+ if ((e = par_primecache(p)))
+ goto exit;
+ } else {
+ if (munmap(p->wnd, wndcap) == -1)
+ goto evm;
+ }
+ if ((e = par_prycache(p, off + wndcap)))
+ goto exit;
+
+ ci = mmap(0, wndcap, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, p->cache, p->corig + off);
+ if (ci == MAP_FAILED)
+ goto evm;
+
+ p->wnd = ci;
+ goto finwnd;
+
+ finwnd:
+ p->wndlee = MIN(wndcap, p->strlen - off);
+ p->wndoff = off;
+ exit:
+ return e;
+ evm:
+ e = TE_VM;
+ goto exit;
+}
+
+int
+par_primecache(struct parctx *p)
+{
+ int e;
+ off_t off;
+ void *ci;
+
+ e = TE_OK;
+ if ((off = lseek(p->cache, 0, SEEK_CUR)) == -1)
+ goto eio;
+
+ p->corig = off;
+ if ((e = par_prycache(p, p->buflen)))
+ goto exit;
+
+ ci = mmap(0, p->buflen, PROT_WRITE, MAP_PRIVATE, p->cache, p->corig);
+ if (ci == MAP_FAILED)
+ goto evm;
+
+ memcpy(ci, p->buf, p->buflen);
+ if (munmap(ci, p->buflen) == -1)
+ goto evm;
+
+ p->strstor = SS_CACHE;
+ exit:
+ return e;
+ eio:
+ e = TE_CACHEIO;
+ goto exit;
+ evm:
+ e = TE_VM;
+ goto exit;
+}
+
+int
+par_prycache(struct parctx *p, size_t len)
+{
+ int e;
+ size_t flen;
+ struct stat cs;
+
+ e = TE_OK;
+ if (fstat(p->cache, &cs) == -1)
+ goto eio;
+
+ flen = len + p->corig;
+ if (flen < MAX(len, p->corig) || flen > INT64_MAX)
+ goto eover;
+
+ if (cs.st_size < flen &&
+ pwrite(p->cache, "", 1, flen - 1) != 1)
+ goto eio;
+
+ exit:
+ return e;
+ eio:
+ e = TE_CACHEIO;
+ goto exit;
+ eover:
+ e = TE_CACHEOFLOW;
+ goto exit;
+}
+
+int
+par_readnup(struct parctx *p, size_t n)
+{
+ int e;
+ size_t hi, nr;
+
+ hi = wndcap - p->wndlee;
+ nr = laxsrc_read(p->in, p->wnd + p->wndlee, n, hi, &e);
+ if (e == TE_OK && nr > 0) {
+ p->wndlee += nr;
+ p->strlen += nr;
+ }
+
+ return e;
+}
+
+int
+par_peek(struct parctx *p, size_t off, char *c)
+{
+ int e;
+ size_t wndlag;
+
+ if ((e = par_procure(p, off, 1)))
+ goto exit;
+
+ wndlag = off - p->wndoff;
+ *c = p->wnd[wndlag];
+ exit:
+ return e;
+}
+
+int
+par_backup(struct parctx *p)
+{
+ int e;
+ size_t nwnd;
+ size_t wndlag;
+ size_t off;
+
+ e = TE_OK;
+ nwnd = (p->strlen + wndcap - 1) / wndcap;
+
+ for (off = (nwnd - 1) * wndcap; off >= p->cur.off; off -= wndcap) {
+ if ((e = par_movewnd(p, off)))
+ break;
+
+ wndlag = MAX(off, p->cur.off) - p->wndoff;
+ e = laxsrc_putback(p->in, p->wnd + wndlag, p->wndlee - wndlag);
+ if (e)
+ break;
+ }
+ return e;
+}
+
/*
Combinators and misc meta critters
*/
@@ -337,13 +565,13 @@ p_insint(struct parctx *p, int prod, size_t len)
cur = p->cur.pt;
if (cur == p->ptend) {
- p->e = PE_NOTENNODES;
+ p->e = TE_PTOFLOW;
return 0;
}
cur->inter.type = PN_INTER;
cur->inter.prod = prod;
cur->inter.len = len;
- p->e = PE_OK;
+ p->e = TE_OK;
p->cur.pt++;
return 1;
}
@@ -355,12 +583,12 @@ p_inslit(struct parctx *p, int val)
cur = p->cur.pt;
if (cur == p->ptend) {
- p->e = PE_NOTENNODES;
+ p->e = TE_PTOFLOW;
return 0;
}
cur->lit.type = PN_LIT;
cur->lit.val = val;
- p->e = PE_OK;
+ p->e = TE_OK;
p->cur.pt++;
return 1;
}
@@ -372,48 +600,30 @@ p_insnum(struct parctx *p, uint32_t val)
cur = p->cur.pt;
if (cur == p->ptend) {
- p->e = PE_NOTENNODES;
+ p->e = TE_PTOFLOW;
return 0;
}
cur->lit.type = PN_NUM;
cur->lit.val = val;
- p->e = PE_OK;
+ p->e = TE_OK;
p->cur.pt++;
return 1;
}
int
-p_insoblstr(struct parctx *p, uint32_t len)
+p_insstr(struct parctx *p, int type, size_t off, uint32_t len)
{
union parnode *cur;
cur = p->cur.pt;
if (cur == p->ptend) {
- p->e = PE_NOTENNODES;
- return 0;
- }
- cur->str.type = PN_OBLSTR;
- cur->str.len = len;
- cur->str.iob = 0;
- p->e = PE_OK;
- p->cur.pt++;
- return 1;
-}
-
-int
-p_insstr(struct parctx *p, int type, const char *tok, uint32_t len)
-{
- union parnode *cur;
-
- cur = p->cur.pt;
- if (cur == p->ptend) {
- p->e = PE_NOTENNODES;
+ p->e = TE_PTOFLOW;
return 0;
}
cur->str.type = type;
+ cur->str.off = off;
cur->str.len = len;
- cur->str.tok = tok;
- p->e = PE_OK;
+ p->e = TE_OK;
p->cur.pt++;
return 1;
}
@@ -428,7 +638,7 @@ int
p_end(struct parctx *p, struct parcur *beg)
{
beg->pt->inter.len = p->cur.pt - beg->pt - 1;
- p->e = PE_OK;
+ p->e = TE_OK;
return 1;
}
@@ -437,7 +647,7 @@ p_rwd(struct parctx *p, struct parcur *cp)
{
p->cur = *cp;
if (!p->e)
- p->e = PE_PARSE;
+ p->e = TE_PARSE;
return 0;
}
@@ -445,8 +655,8 @@ p_rwd(struct parctx *p, struct parcur *cp)
int
p_opt(struct parctx *p)
{
- if (p->e == PE_PARSE)
- p->e = PE_OK;
+ if (p->e == TE_PARSE)
+ p->e = TE_OK;
return !p->e;
}
@@ -467,7 +677,7 @@ p_listsep(struct parctx *p, parfn *prod, int sep)
{
int res;
- for(res = prod(p); res && p_2xcombo(p, sep, prod);)
+ for (res = prod(p); res && p_2xcombo(p, sep, prod);)
;
return res && p_opt(p);
}
@@ -487,29 +697,50 @@ p_rep(struct parctx *p, parfn *prod)
int
p_repchr(struct parctx *p, const char *except, size_t elen, int strtype)
{
- const char *t;
+ size_t pr;
+ char *prp;
+ size_t wndlag;
+ size_t leeway;
size_t len;
- t = p->cur.tok;
- while (t != p->tokend && !contains(except, elen, *t))
- t++;
+ pr = p->cur.off;
+ while ((p->e = par_procure(p, pr, 1)) == TE_OK) {
+ wndlag = pr - p->wndoff;
+ leeway = p->wndoff + p->wndlee;
- len = t - p->cur.tok;
- if (len == 0) {
- p->e = PE_PARSE;
- goto exit;
- } else if (len > UINT32_MAX) {
- p->e = PE_STRTOOBIG;
- goto exit;
+ for (prp = p->wnd + wndlag; pr < leeway; pr++, prp++) {
+ if (contains(except, elen, *prp))
+ goto stop;
+ }
}
- if (!p_insstr(p, strtype, p->cur.tok, (uint32_t)len))
+ stop:
+ if (p->e == TE_EOF)
+ p->e = TE_OK;
+
+ if (p->e != TE_OK)
+ goto exit;
+
+ len = pr - p->cur.off;
+ if (len == 0)
+ goto eparse;
+
+ if (len > UINT32_MAX)
+ goto exl;
+
+ if (!p_insstr(p, strtype, p->cur.off, (uint32_t)len))
goto exit;
- p->cur.tok = t;
- p->e = PE_OK;
+ p->cur.off = pr;
+ p->e = TE_OK;
exit:
- return !p->e;
+ return p->e == TE_OK;
+ eparse:
+ p->e = TE_PARSE;
+ goto exit;
+ exl:
+ p->e = TE_XLSTR;
+ goto exit;
}
int
@@ -608,7 +839,9 @@ p_badcsopt(struct parctx *p)
int
p_base64(struct parctx *p)
{
- /* you won't get far w/o b64 */
+ /* brush 6 bit encoding off yet */
+ p->e = TE_PARSE;
+ return 0;
}
int
@@ -850,30 +1083,56 @@ p_dayfix(struct parctx *p)
int
p_dig(struct parctx *p, size_t count)
{
+ size_t pr;
+ char *prp;
+ size_t wndlag;
+ size_t leeway;
+ size_t len;
uint32_t n, prev;
- const char *t;
-
- prev = n = 0;
- t = p->cur.tok;
-
- while (t != p->tokend && count && *t >= '0' && *t <= '9') {
- prev = n;
- n = UINT32_MAX & (10 * n + (*t - 48));
- if (prev > n) {
- p->e = PE_PARSE;
- goto exit;
+ size_t d;
+
+ pr = p->cur.off;
+ n = prev = 0;
+ d = count == SIZE_MAX ? 0 : 1;
+
+ while ((p->e = par_procure(p, pr, 1)) == TE_OK) {
+ wndlag = pr - p->wndoff;
+ leeway = p->wndoff + p->wndlee;
+
+ for (prp = p->wnd + wndlag; pr < leeway; pr++, prp++) {
+ if (count == 0 || (*prp < '0' && *prp > '9'))
+ goto stop;
+ prev = n;
+ n = UINT32_MAX & (10 * n + (*prp - 48));
+ if (prev > n) {
+ p->e = TE_PARSE;
+ goto stop;
+ }
+ count -= d;
}
- t++;
- count--;
- }
- if (t == p->cur.tok || count != 0) {
- p->e = PE_PARSE;
- } else if (p_insnum(p, n)) {
- p->cur.tok = t;
- p->e = PE_OK;
}
+ stop:
+ if (p->e == TE_EOF)
+ p->e = TE_OK;
+
+ if (p->e != TE_OK)
+ goto exit;
+
+ len = pr - p->cur.off;
+ if (len == 0)
+ goto eparse;
+
+ if (!p_insnum(p, n))
+ goto exit;
+
+ p->cur.off = pr;
+ p->e = TE_OK;
+
exit:
- return !p->e;
+ return p->e == TE_OK;
+ eparse:
+ p->e = TE_PARSE;
+ goto exit;
}
int
@@ -1020,21 +1279,51 @@ p_gencode(struct parctx *p)
int
p_genctext(struct parctx *p)
{
- int res;
- const char *t;
+ size_t pr;
+ char *prp;
+ size_t wndlag;
+ size_t leeway;
+ size_t len;
+
+ pr = p->cur.off;
+ while ((p->e = par_procure(p, pr, 1)) == TE_OK) {
+ wndlag = pr - p->wndoff;
+ leeway = p->wndoff + p->wndlee;
- t = p->cur.tok;
+ for (prp = p->wnd + wndlag; pr < leeway; pr++, prp++) {
+ if (*prp == 0 || *prp > '\x7f' || *prp == ']' ||
+ *prp == '\r' || *prp == '\n')
+ goto stop;
+ }
+ }
+ stop:
+ if (p->e == TE_EOF)
+ p->e = TE_OK;
+
+ if (p->e != TE_OK)
+ goto exit;
+
+ len = pr - p->cur.off;
+ if (len == 0)
+ goto eparse;
- while (t != p->tokend && *t >= '\1' && *t <= '\x7f' &&
- *t != ']' && *t != '\r' && *t != '\n')
- t++;
+ if (len > UINT32_MAX)
+ goto exl;
- res = t > p->cur.tok;
- if (res)
- p->cur.tok = t;
+ if (!p_insstr(p, PN_STR, p->cur.off, (uint32_t)len))
+ goto exit;
- p->e = !res;
- return res;
+ p->cur.off = pr;
+ p->e = TE_OK;
+
+ exit:
+ return p->e == TE_OK;
+ eparse:
+ p->e = TE_PARSE;
+ goto exit;
+ exl:
+ p->e = TE_XLSTR;
+ goto exit;
}
int
@@ -1055,53 +1344,62 @@ p_hdrlist(struct parctx *p)
int
p_lit(struct parctx *p, int val)
{
- size_t left;
struct literal *lit;
+ size_t wndlag;
- left = p->tokend - p->cur.tok;
lit = literals + val;
- if (left < lit->slen) {
- p->e = PE_PARSE;
+ if ((p->e = par_procure(p, p->cur.off, lit->slen)))
goto exit;
- }
- if (strncasecmp(p->cur.tok, lit->srep, lit->slen) == 0) {
+
+ wndlag = p->cur.off - p->wndoff;
+ if (strncasecmp(p->wnd + wndlag, lit->srep, lit->slen) == 0) {
if (p_inslit(p, val)) {
- p->cur.tok += lit->slen;
- p->e = PE_OK;
+ p->cur.off += lit->slen;
+ p->e = TE_OK;
}
} else {
- p->e = PE_PARSE;
+ p->e = TE_PARSE;
}
exit:
- return !p->e;
+ return p->e == TE_OK;
}
int
p_lstr(struct parctx *p)
{
- int res;
size_t len;
struct parcur b, n;
- res = p_chk(p, &b) && p_lit(p, IL_OBRACE) &&
+ p_chk(p, &b) && p_lit(p, IL_OBRACE) &&
p_chk(p, &n) && p_num(p) && p_lit(p, IL_CBRACE) &&
p_lit(p, IL_EOL) || p_rwd(p, &b);
- if (res) {
- len = n.pt->num.val;
- if (len > UINT32_MAX) {
- p->e = PE_STRTOOBIG;
- res = 0;
- } else if (len < p->oblth) {
- res = p_insstr(p, PN_LSTR, p->cur.tok, (uint32_t)len);
- if (res)
- p->cur.tok += len;
- } else {
- res = p_insoblstr(p, (uint32_t)len);
- }
- }
- return res;
+ if (p->e != TE_OK)
+ goto exit;
+
+ len = n.pt->num.val;
+ if (len > UINT32_MAX)
+ goto exl;
+
+ if (len > 0 && (p->e = par_procure(p, p->cur.off + len - 1, 1)))
+ goto eproc;
+
+ if (!p_insstr(p, PN_LSTR, p->cur.off, (uint32_t)len))
+ goto exit;
+
+ p->cur.off += len;
+ p->e = TE_OK;
+
+ exit:
+ return p->e == TE_OK;
+ eproc:
+ if (p->e == TE_EOF)
+ p->e = TE_PARSE;
+ goto exit;
+ exl:
+ p->e = TE_XLSTR;
+ goto exit;
}
int
@@ -1379,45 +1677,31 @@ p_nstr(struct parctx *p)
int
p_num(struct parctx *p)
{
- uint32_t n, prev;
- const char *t;
-
- prev = n = 0;
- t = p->cur.tok;
-
- while (t != p->tokend && *t >= '0' && *t <= '9') {
- prev = n;
- n = UINT32_MAX & (10 * n + (*t - 48));
- if (prev > n) {
- p->e = PE_PARSE;
- goto exit;
- }
- t++;
- }
- if (t == p->cur.tok) {
- p->e = PE_PARSE;
- } else if (p_insnum(p, n)) {
- p->cur.tok = t;
- p->e = PE_OK;
- }
- exit:
- return !p->e;
+ return p_dig(p, SIZE_MAX);
}
int
p_nznum(struct parctx *p)
{
- const char *t;
-
- t = p->cur.tok;
+ size_t wndlag;
+ const char *prp;
- if (t == p->tokend || *t < '1' || *t > '9') {
- p->e = PE_PARSE;
+ if ((p->e = par_procure(p, p->cur.off, 1)))
goto exit;
- }
+
+ wndlag = p->cur.off - p->wndoff;
+ prp = p->wnd + wndlag;
+
+ if (*prp < '1' || *prp > '9')
+ goto eparse;
+
p_num(p);
+
exit:
- return !p->e;
+ return p->e == TE_OK;
+ eparse:
+ p->e = TE_PARSE;
+ goto exit;
}
int
@@ -1440,83 +1724,98 @@ p_permflcode(struct parctx *p)
int
p_qchar(struct parctx *p)
{
- const char *t;
+ char x, y;
int len;
- size_t left;
- t = p->cur.tok;
- left = p->tokend - p->cur.tok;
-
- if (t == p->tokend) {
- p->e = PE_PARSE;
+ if ((p->e = par_peek(p, p->cur.off, &x)))
goto exit;
- }
- if (*t >= '\1' && *t <= '\x7f' && *t != '\r' &&
- *t != '\n' && *t != '"' && *t != '\\') {
+ if (x >= '\1' && x <= '\x7f' && x != '\r' &&
+ x != '\n' && x != '"' && x != '\\') {
len = 1;
- } else if (left >= 2 &&
- (strncmp(t, "\\\"", 2) == 0 || strncmp(t, "\\\\", 2) == 0)) {
- len = 2;
- } else {
- p->e = PE_PARSE;
+ goto ins;
+ }
+ if ((p->e = par_peek(p, p->cur.off + 1, &y)))
goto exit;
+ if (x == '\\' && (y == '"' || y == '\\')) {
+ len = 2;
+ goto ins;
}
+ goto eparse;
- if (!p_insstr(p, PN_QSTR, p->cur.tok, len)) {
- p->e = PE_PARSE;
+ ins:
+ if (!p_insstr(p, PN_QSTR, p->cur.off, (uint32_t)len))
goto exit;
- }
- p->cur.tok += len;
- p->e = PE_OK;
+
+ p->cur.off += len;
+ p->e = TE_OK;
exit:
- return !p->e;
+ return p->e == TE_OK;
+ eparse:
+ p->e = TE_PARSE;
+ goto exit;
}
int
p_qchars(struct parctx *p)
{
- const char *t;
+ size_t pr;
+ char *prp;
+ size_t wndlag;
+ size_t leeway;
size_t len;
int esc;
- t = p->cur.tok;
+ pr = p->cur.off;
esc = 0;
- while (t != p->tokend) {
- if (esc) {
- if (*t == '"' || *t == '\\') {
- t++;
- esc = 0;
- } else {
- p->e = PE_PARSE;
- goto exit;
+
+ while ((p->e = par_procure(p, pr, 1)) == TE_OK) {
+ wndlag = pr - p->wndoff;
+ leeway = p->wndoff + p->wndlee;
+
+ for (prp = p->wnd + wndlag; pr < leeway; pr++, prp++) {
+ if (esc) {
+ if (*prp == '"' || *prp == '\\') {
+ esc = 0;
+ } else {
+ goto eparse;
+ }
+ } else if (*prp == '\\') {
+ esc = 1;
+ } else if (*prp == 0 || *prp > '\x7f' ||
+ *prp == '\r' || *prp == '\n' || *prp == '"') {
+ goto stop;
}
- } else if (*t == '\\') {
- t++;
- esc = 1;
- } else if (*t >= '\1' && *t <= '\x7f' && *t != '\r' &&
- *t != '\n' && *t != '"' && *t != '\\') {
- t++;
- } else {
- p->e = PE_PARSE;
- goto exit;
}
}
- len = t - p->cur.tok;
- if (len == 0) {
- p->e = PE_PARSE;
- goto exit;
- } else if (len > UINT32_MAX) {
- p->e = PE_STRTOOBIG;
+ stop:
+ if (p->e == TE_EOF)
+ p->e = TE_OK;
+
+ if (p->e != TE_OK)
goto exit;
- } else if (!p_insstr(p, PN_QSTR, p->cur.tok, (uint32_t)len))
+
+ len = pr - p->cur.off;
+ if (len == 0)
+ goto eparse;
+
+ if (len > UINT32_MAX)
+ goto exl;
+
+ if (!p_insstr(p, PN_QSTR, p->cur.off, (uint32_t)len))
goto exit;
- p->cur.tok = t;
- p->e = PE_OK;
+ p->cur.off = pr;
+ p->e = TE_OK;
exit:
- return !p->e;
+ return p->e == TE_OK;
+ eparse:
+ p->e = TE_PARSE;
+ goto exit;
+ exl:
+ p->e = TE_XLSTR;
+ goto exit;
}
int
@@ -1672,30 +1971,51 @@ p_taggedresp(struct parctx *p)
int
p_text(struct parctx *p)
{
- const char *t;
+ size_t pr;
+ char *prp;
+ size_t wndlag;
+ size_t leeway;
size_t len;
- t = p->cur.tok;
- while (t != p->tokend && *t >= '\1' && *t <= '\x7f' &&
- *t != '\r' && *t != '\n')
- t++;
+ pr = p->cur.off;
+ while ((p->e = par_procure(p, pr, 1)) == TE_OK) {
+ wndlag = pr - p->wndoff;
+ leeway = p->wndoff + p->wndlee;
- len = t - p->cur.tok;
- if (len == 0) {
- p->e = PE_PARSE;
- goto exit;
- } else if (len > UINT32_MAX) {
- p->e = PE_STRTOOBIG;
- goto exit;
+ for (prp = p->wnd + wndlag; pr < leeway; pr++, prp++) {
+ if (*prp == 0 || *prp > '\x7f' ||
+ *prp == '\r' || *prp == '\n')
+ goto stop;
+ }
}
- if (!p_insstr(p, PN_STR, p->cur.tok, (uint32_t)len))
+ stop:
+ if (p->e == TE_EOF)
+ p->e = TE_OK;
+
+ if (p->e != TE_OK)
goto exit;
- p->cur.tok = t;
- p->e = PE_OK;
+ len = pr - p->cur.off;
+ if (len == 0)
+ goto eparse;
+
+ if (len > UINT32_MAX)
+ goto exl;
+
+ if (!p_insstr(p, PN_STR, p->cur.off, (uint32_t)len))
+ goto exit;
+
+ p->cur.off = pr;
+ p->e = TE_OK;
exit:
- return !p->e;
+ return p->e == TE_OK;
+ eparse:
+ p->e = TE_PARSE;
+ goto exit;
+ exl:
+ p->e = TE_XLSTR;
+ goto exit;
}
int
diff --git a/parser.h b/parser.h
index edd74ac..ace2893 100644
--- a/parser.h
+++ b/parser.h
@@ -106,9 +106,7 @@ enum {
IP_TAGGEDRESP,
};
-enum { PN_INTER, PN_LIT, PN_NUM, PN_STR, PN_QSTR, PN_LSTR, PN_OBLSTR };
-
-enum { PE_OK, PE_PARSE, PE_NOTENNODES, PE_STRTOOBIG };
+enum { PN_INTER, PN_LIT, PN_NUM, PN_STR, PN_QSTR, PN_LSTR };
union parnode {
int type;
@@ -128,12 +126,10 @@ union parnode {
struct {
int type;
uint32_t len;
- union {
- const char *tok;
- size_t iob;
- };
+ size_t off;
} str;
};
-int par_respln(const char *tok, size_t toklen, union parnode *pt,
- size_t ptlen, size_t oblth);
+struct laxsrc;
+
+int par_readln(struct laxsrc *, int, char *, size_t, union parnode *, size_t);
diff --git a/pshades.c b/pshades.c
index 40873e2..d301775 100644
--- a/pshades.c
+++ b/pshades.c
@@ -1,17 +1,28 @@
#include <err.h>
#include <errno.h>
+#include <fcntl.h>
#include <stddef.h>
#include <stdio.h>
#include <unistd.h>
+#include "errors.h"
+#include "laxsrc.h"
#include "parser.h"
#define LEN(a) (sizeof(a) / sizeof(a)[0])
-char *perr[] = {
- [PE_PARSE] = "input not recognized",
- [PE_NOTENNODES] = "cannot fit parse tree within supplied space",
- [PE_STRTOOBIG] = "stalled upon a string longer than 2^32 - 1",
+char *errors[] = {
+ [TE_EOF] = "unexpected eof",
+ [TE_IO] = "source i/o",
+ [TE_TIMEOUT] = "source read timed out",
+ [TE_BUFOFLOW] = "source back buffer overflown",
+ [TE_PARSE] = "input not recognized",
+ [TE_CACHEIO] = "cache i/o",
+ [TE_CACHEOFLOW] = "cache overflown",
+ [TE_PTOFLOW] = "cannot fit parse tree within supplied space",
+ [TE_VM] = "cache vm mapping",
+ [TE_XLSTR] = "stalled upon a string longer than 2^32 - 1",
+
};
char *prod[] = {
@@ -43,41 +54,26 @@ char *literals[] = {
"UIDVAL", "UNSEEN", "VIDEO"
};
-void ptprint(union parnode *, size_t);
+void ptprint(union parnode *, char *, size_t);
union parnode *ptnext(union parnode *);
int
main(int argc, char **argv)
{
- size_t l, inlen;
- ssize_t n;
- char input[1024], *p;
- union parnode pt[1024];
int e;
+ struct laxsrc in;
+ int cache;
+ char ln[1024];
+ union parnode pt[1024];
- p = input;
- l = LEN(input);
-
- while (l > 0 && (n = read(STDIN_FILENO, p, l))) {
- if (n < 0) {
- if (errno == EINTR) continue;
- else break;
- }
- p += n;
- l -= n;
- }
-
- if (n < 0)
- err(1, "error reading input");
- else if (l == 0 && n > 0)
- errx(1, "input is too big (%zu is max accepted)", LEN(input));
-
- inlen = p - input;
+ laxsrc_init(&in, STDIN_FILENO);
+ if ((cache = open("cache", O_RDWR | O_CREAT)) == -1)
+ err(1, "can't open cache");
- if ((e = par_respln(input, inlen, pt, LEN(pt), 1024)))
- errx(1, "error parsing input: %s", perr[e]);
+ if ((e = par_readln(&in, cache, ln, LEN(ln), pt, LEN(pt))))
+ errx(1, "error parsing input: %s", errors[e]);
- ptprint(pt, 0);
+ ptprint(pt, ln, 0);
return 0;
}
@@ -91,10 +87,10 @@ ptnext(union parnode *n)
}
void
-ptprint(union parnode *n, size_t depth)
+ptprint(union parnode *n, char *ln, size_t depth)
{
static char *prefix = " >";
- size_t nend;
+ union parnode *nend;
printf("%.*s", (int)depth, prefix);
@@ -102,26 +98,22 @@ ptprint(union parnode *n, size_t depth)
case PN_INTER:
printf("%s\n", prod[n->inter.prod]);
for (nend = n + n->inter.len + 1, n++; n < nend; n = ptnext(n))
- ptprint(n, depth + 1);
+ ptprint(n, ln, depth + 1);
break;
case PN_LIT:
printf("%s\n", literals[n->lit.val]);
break;
case PN_NUM:
- /* acting a tad careless here... */
printf("%zu\n", (size_t)n->num.val);
break;
case PN_STR:
- printf("%.*s\n", (int)n->str.len, n->str.tok);
+ printf("%.*s\n", (int)n->str.len, ln + n->str.off);
break;
case PN_QSTR:
- printf("\"%.*s\"\n", (int)n->str.len, n->str.tok);
+ printf("\"%.*s\"\n", (int)n->str.len, ln + n->str.off);
break;
case PN_LSTR:
- printf("%.*s\n", (int)n->str.len, n->str.tok);
- break;
- case PN_OBLSTR:
- printf("OBLSTR #%zu\n", n->str.iob);
+ printf("%.*s\n", (int)n->str.len, ln + n->str.off);
break;
default:
errx(1, "cannot grok parse node of type %d\n", n->type);