diff options
Diffstat (limited to 'lua/lstrlib.c')
-rw-r--r-- | lua/lstrlib.c | 860 |
1 files changed, 697 insertions, 163 deletions
diff --git a/lua/lstrlib.c b/lua/lstrlib.c index 9261fd2..fe30e34 100644 --- a/lua/lstrlib.c +++ b/lua/lstrlib.c @@ -1,19 +1,23 @@ /* -** $Id: lstrlib.c,v 1.178.1.1 2013/04/12 18:48:47 roberto Exp $ +** $Id: lstrlib.c,v 1.239 2015/11/25 16:28:17 roberto Exp $ ** Standard library for string operations and pattern-matching ** See Copyright Notice in lua.h */ +#define lstrlib_c +#define LUA_LIB + +#include "lprefix.h" + #include <ctype.h> +#include <float.h> +#include <limits.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#define lstrlib_c -#define LUA_LIB - #include "lua.h" #include "lauxlib.h" @@ -29,10 +33,21 @@ #endif -/* macro to `unsign' a character */ +/* macro to 'unsign' a character */ #define uchar(c) ((unsigned char)(c)) +/* +** Some sizes are better limited to fit in 'int', but must also fit in +** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) +*/ +#define MAX_SIZET ((size_t)(~(size_t)0)) + +#define MAXSIZE \ + (sizeof(size_t) < sizeof(int) ? MAX_SIZET : (size_t)(INT_MAX)) + + + static int str_len (lua_State *L) { size_t l; @@ -43,22 +58,22 @@ static int str_len (lua_State *L) { /* translate a relative string position: negative means back from end */ -static size_t posrelat (ptrdiff_t pos, size_t len) { - if (pos >= 0) return (size_t)pos; +static lua_Integer posrelat (lua_Integer pos, size_t len) { + if (pos >= 0) return pos; else if (0u - (size_t)pos > len) return 0; - else return len - ((size_t)-pos) + 1; + else return (lua_Integer)len + pos + 1; } static int str_sub (lua_State *L) { size_t l; const char *s = luaL_checklstring(L, 1, &l); - size_t start = posrelat(luaL_checkinteger(L, 2), l); - size_t end = posrelat(luaL_optinteger(L, 3, -1), l); + lua_Integer start = posrelat(luaL_checkinteger(L, 2), l); + lua_Integer end = posrelat(luaL_optinteger(L, 3, -1), l); if (start < 1) start = 1; - if (end > l) end = l; + if (end > (lua_Integer)l) end = l; if (start <= end) - lua_pushlstring(L, s + start - 1, end - start + 1); + lua_pushlstring(L, s + start - 1, (size_t)(end - start) + 1); else lua_pushliteral(L, ""); return 1; } @@ -102,25 +117,23 @@ static int str_upper (lua_State *L) { } -/* reasonable limit to avoid arithmetic overflow */ -#define MAXSIZE ((~(size_t)0) >> 1) - static int str_rep (lua_State *L) { size_t l, lsep; const char *s = luaL_checklstring(L, 1, &l); - int n = luaL_checkint(L, 2); + lua_Integer n = luaL_checkinteger(L, 2); const char *sep = luaL_optlstring(L, 3, "", &lsep); if (n <= 0) lua_pushliteral(L, ""); - else if (l + lsep < l || l + lsep >= MAXSIZE / n) /* may overflow? */ + else if (l + lsep < l || l + lsep > MAXSIZE / n) /* may overflow? */ return luaL_error(L, "resulting string too large"); else { - size_t totallen = n * l + (n - 1) * lsep; + size_t totallen = (size_t)n * l + (size_t)(n - 1) * lsep; luaL_Buffer b; char *p = luaL_buffinitsize(L, &b, totallen); while (n-- > 1) { /* first n-1 copies (followed by separator) */ memcpy(p, s, l * sizeof(char)); p += l; - if (lsep > 0) { /* avoid empty 'memcpy' (may be expensive) */ - memcpy(p, sep, lsep * sizeof(char)); p += lsep; + if (lsep > 0) { /* empty 'memcpy' is not that cheap */ + memcpy(p, sep, lsep * sizeof(char)); + p += lsep; } } memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */ @@ -133,15 +146,15 @@ static int str_rep (lua_State *L) { static int str_byte (lua_State *L) { size_t l; const char *s = luaL_checklstring(L, 1, &l); - size_t posi = posrelat(luaL_optinteger(L, 2, 1), l); - size_t pose = posrelat(luaL_optinteger(L, 3, posi), l); + lua_Integer posi = posrelat(luaL_optinteger(L, 2, 1), l); + lua_Integer pose = posrelat(luaL_optinteger(L, 3, posi), l); int n, i; if (posi < 1) posi = 1; - if (pose > l) pose = l; + if (pose > (lua_Integer)l) pose = l; if (posi > pose) return 0; /* empty interval; return no values */ - n = (int)(pose - posi + 1); - if (posi + n <= pose) /* (size_t -> int) overflow? */ + if (pose - posi >= INT_MAX) /* arithmetic overflow? */ return luaL_error(L, "string slice too long"); + n = (int)(pose - posi) + 1; luaL_checkstack(L, n, "string slice too long"); for (i=0; i<n; i++) lua_pushinteger(L, uchar(s[posi+i-1])); @@ -155,7 +168,7 @@ static int str_char (lua_State *L) { luaL_Buffer b; char *p = luaL_buffinitsize(L, &b, n); for (i=1; i<=n; i++) { - int c = luaL_checkint(L, i); + lua_Integer c = luaL_checkinteger(L, i); luaL_argcheck(L, uchar(c) == c, i, "value out of range"); p[i - 1] = uchar(c); } @@ -164,19 +177,20 @@ static int str_char (lua_State *L) { } -static int writer (lua_State *L, const void* b, size_t size, void* B) { +static int writer (lua_State *L, const void *b, size_t size, void *B) { (void)L; - luaL_addlstring((luaL_Buffer*) B, (const char *)b, size); + luaL_addlstring((luaL_Buffer *) B, (const char *)b, size); return 0; } static int str_dump (lua_State *L) { luaL_Buffer b; + int strip = lua_toboolean(L, 2); luaL_checktype(L, 1, LUA_TFUNCTION); lua_settop(L, 1); luaL_buffinit(L,&b); - if (lua_dump(L, writer, &b) != 0) + if (lua_dump(L, writer, &b, strip) != 0) return luaL_error(L, "unable to dump given function"); luaL_pushresult(&b); return 1; @@ -196,11 +210,12 @@ static int str_dump (lua_State *L) { typedef struct MatchState { - int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ const char *src_init; /* init of source string */ const char *src_end; /* end ('\0') of source string */ const char *p_end; /* end ('\0') of pattern */ lua_State *L; + size_t nrep; /* limit to avoid non-linear complexity */ + int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ int level; /* total number of captures (finished or unfinished) */ struct { const char *init; @@ -219,6 +234,17 @@ static const char *match (MatchState *ms, const char *s, const char *p); #endif +/* +** parameters to control the maximum number of operators handled in +** a match (to avoid non-linear complexity). The maximum will be: +** (subject length) * A_REPS + B_REPS +*/ +#if !defined(A_REPS) +#define A_REPS 4 +#define B_REPS 100000 +#endif + + #define L_ESC '%' #define SPECIALS "^$*+?.([%-" @@ -243,16 +269,16 @@ static const char *classend (MatchState *ms, const char *p) { switch (*p++) { case L_ESC: { if (p == ms->p_end) - luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")"); + luaL_error(ms->L, "malformed pattern (ends with '%%')"); return p+1; } case '[': { if (*p == '^') p++; - do { /* look for a `]' */ + do { /* look for a ']' */ if (p == ms->p_end) - luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")"); + luaL_error(ms->L, "malformed pattern (missing ']')"); if (*(p++) == L_ESC && p < ms->p_end) - p++; /* skip escapes (e.g. `%]') */ + p++; /* skip escapes (e.g. '%]') */ } while (*p != ']'); return p+1; } @@ -287,7 +313,7 @@ static int matchbracketclass (int c, const char *p, const char *ec) { int sig = 1; if (*(p+1) == '^') { sig = 0; - p++; /* skip the `^' */ + p++; /* skip the '^' */ } while (++p < ec) { if (*p == L_ESC) { @@ -325,8 +351,7 @@ static int singlematch (MatchState *ms, const char *s, const char *p, static const char *matchbalance (MatchState *ms, const char *s, const char *p) { if (p >= ms->p_end - 1) - luaL_error(ms->L, "malformed pattern " - "(missing arguments to " LUA_QL("%%b") ")"); + luaL_error(ms->L, "malformed pattern (missing arguments to '%%b')"); if (*s != *p) return NULL; else { int b = *p; @@ -425,7 +450,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) { break; } case '$': { - if ((p + 1) != ms->p_end) /* is the `$' the last char in pattern? */ + if ((p + 1) != ms->p_end) /* is the '$' the last char in pattern? */ goto dflt; /* no; go to default */ s = (s == ms->src_end) ? s : NULL; /* check end of string */ break; @@ -443,8 +468,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) { const char *ep; char previous; p += 2; if (*p != '[') - luaL_error(ms->L, "missing " LUA_QL("[") " after " - LUA_QL("%%f") " in pattern"); + luaL_error(ms->L, "missing '[' after '%%f' in pattern"); ep = classend(ms, p); /* points to what is next */ previous = (s == ms->src_init) ? '\0' : *(s - 1); if (!matchbracketclass(uchar(previous), p, ep - 1) && @@ -478,6 +502,8 @@ static const char *match (MatchState *ms, const char *s, const char *p) { s = NULL; /* fail */ } else { /* matched once */ + if (ms->nrep-- == 0) + luaL_error(ms->L, "pattern too complex"); switch (*ep) { /* handle optional suffix */ case '?': { /* optional */ const char *res; @@ -490,7 +516,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) { } case '+': /* 1 or more repetitions */ s++; /* 1 match already done */ - /* go through */ + /* FALLTHROUGH */ case '*': /* 0 or more repetitions */ s = max_expand(ms, s, p, ep); break; @@ -514,16 +540,16 @@ static const char *match (MatchState *ms, const char *s, const char *p) { static const char *lmemfind (const char *s1, size_t l1, const char *s2, size_t l2) { if (l2 == 0) return s1; /* empty strings are everywhere */ - else if (l2 > l1) return NULL; /* avoids a negative `l1' */ + else if (l2 > l1) return NULL; /* avoids a negative 'l1' */ else { - const char *init; /* to search for a `*s2' inside `s1' */ - l2--; /* 1st char will be checked by `memchr' */ - l1 = l1-l2; /* `s2' cannot be found after that */ + const char *init; /* to search for a '*s2' inside 's1' */ + l2--; /* 1st char will be checked by 'memchr' */ + l1 = l1-l2; /* 's2' cannot be found after that */ while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { init++; /* 1st char is already checked */ if (memcmp(init, s2+1, l2) == 0) return init-1; - else { /* correct `l1' and `s1' to try again */ + else { /* correct 'l1' and 's1' to try again */ l1 -= init-s1; s1 = init; } @@ -539,13 +565,13 @@ static void push_onecapture (MatchState *ms, int i, const char *s, if (i == 0) /* ms->level == 0, too */ lua_pushlstring(ms->L, s, e - s); /* add whole match */ else - luaL_error(ms->L, "invalid capture index"); + luaL_error(ms->L, "invalid capture index %%%d", i + 1); } else { ptrdiff_t l = ms->capture[i].len; if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture"); if (l == CAP_POSITION) - lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1); + lua_pushinteger(ms->L, (ms->capture[i].init - ms->src_init) + 1); else lua_pushlstring(ms->L, ms->capture[i].init, l); } @@ -574,23 +600,43 @@ static int nospecials (const char *p, size_t l) { } +static void prepstate (MatchState *ms, lua_State *L, + const char *s, size_t ls, const char *p, size_t lp) { + ms->L = L; + ms->matchdepth = MAXCCALLS; + ms->src_init = s; + ms->src_end = s + ls; + ms->p_end = p + lp; + if (ls < (MAX_SIZET - B_REPS) / A_REPS) + ms->nrep = A_REPS * ls + B_REPS; + else /* overflow (very long subject) */ + ms->nrep = MAX_SIZET; /* no limit */ +} + + +static void reprepstate (MatchState *ms) { + ms->level = 0; + lua_assert(ms->matchdepth == MAXCCALLS); +} + + static int str_find_aux (lua_State *L, int find) { size_t ls, lp; const char *s = luaL_checklstring(L, 1, &ls); const char *p = luaL_checklstring(L, 2, &lp); - size_t init = posrelat(luaL_optinteger(L, 3, 1), ls); + lua_Integer init = posrelat(luaL_optinteger(L, 3, 1), ls); if (init < 1) init = 1; - else if (init > ls + 1) { /* start after string's end? */ + else if (init > (lua_Integer)ls + 1) { /* start after string's end? */ lua_pushnil(L); /* cannot find anything */ return 1; } /* explicit request or no special characters? */ if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { /* do a plain search */ - const char *s2 = lmemfind(s + init - 1, ls - init + 1, p, lp); + const char *s2 = lmemfind(s + init - 1, ls - (size_t)init + 1, p, lp); if (s2) { - lua_pushinteger(L, s2 - s + 1); - lua_pushinteger(L, s2 - s + lp); + lua_pushinteger(L, (s2 - s) + 1); + lua_pushinteger(L, (s2 - s) + lp); return 2; } } @@ -601,18 +647,13 @@ static int str_find_aux (lua_State *L, int find) { if (anchor) { p++; lp--; /* skip anchor character */ } - ms.L = L; - ms.matchdepth = MAXCCALLS; - ms.src_init = s; - ms.src_end = s + ls; - ms.p_end = p + lp; + prepstate(&ms, L, s, ls, p, lp); do { const char *res; - ms.level = 0; - lua_assert(ms.matchdepth == MAXCCALLS); + reprepstate(&ms); if ((res=match(&ms, s1, p)) != NULL) { if (find) { - lua_pushinteger(L, s1 - s + 1); /* start */ + lua_pushinteger(L, (s1 - s) + 1); /* start */ lua_pushinteger(L, res - s); /* end */ return push_captures(&ms, NULL, 0) + 2; } @@ -636,29 +677,26 @@ static int str_match (lua_State *L) { } +/* state for 'gmatch' */ +typedef struct GMatchState { + const char *src; /* current position */ + const char *p; /* pattern */ + MatchState ms; /* match state */ +} GMatchState; + + static int gmatch_aux (lua_State *L) { - MatchState ms; - size_t ls, lp; - const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); - const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); + GMatchState *gm = (GMatchState *)lua_touserdata(L, lua_upvalueindex(3)); const char *src; - ms.L = L; - ms.matchdepth = MAXCCALLS; - ms.src_init = s; - ms.src_end = s+ls; - ms.p_end = p + lp; - for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); - src <= ms.src_end; - src++) { + for (src = gm->src; src <= gm->ms.src_end; src++) { const char *e; - ms.level = 0; - lua_assert(ms.matchdepth == MAXCCALLS); - if ((e = match(&ms, src, p)) != NULL) { - lua_Integer newstart = e-s; - if (e == src) newstart++; /* empty match? go at least one position */ - lua_pushinteger(L, newstart); - lua_replace(L, lua_upvalueindex(3)); - return push_captures(&ms, src, e); + reprepstate(&gm->ms); + if ((e = match(&gm->ms, src, gm->p)) != NULL) { + if (e == src) /* empty match? */ + gm->src =src + 1; /* go at least one position */ + else + gm->src = e; + return push_captures(&gm->ms, src, e); } } return 0; /* not found */ @@ -666,10 +704,14 @@ static int gmatch_aux (lua_State *L) { static int gmatch (lua_State *L) { - luaL_checkstring(L, 1); - luaL_checkstring(L, 2); - lua_settop(L, 2); - lua_pushinteger(L, 0); + size_t ls, lp; + const char *s = luaL_checklstring(L, 1, &ls); + const char *p = luaL_checklstring(L, 2, &lp); + GMatchState *gm; + lua_settop(L, 2); /* keep them on closure to avoid being collected */ + gm = (GMatchState *)lua_newuserdata(L, sizeof(GMatchState)); + prepstate(&gm->ms, L, s, ls, p, lp); + gm->src = s; gm->p = p; lua_pushcclosure(L, gmatch_aux, 3); return 1; } @@ -678,7 +720,8 @@ static int gmatch (lua_State *L) { static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, const char *e) { size_t l, i; - const char *news = lua_tolstring(ms->L, 3, &l); + lua_State *L = ms->L; + const char *news = lua_tolstring(L, 3, &l); for (i = 0; i < l; i++) { if (news[i] != L_ESC) luaL_addchar(b, news[i]); @@ -686,14 +729,15 @@ static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, i++; /* skip ESC */ if (!isdigit(uchar(news[i]))) { if (news[i] != L_ESC) - luaL_error(ms->L, "invalid use of " LUA_QL("%c") - " in replacement string", L_ESC); + luaL_error(L, "invalid use of '%c' in replacement string", L_ESC); luaL_addchar(b, news[i]); } else if (news[i] == '0') luaL_addlstring(b, s, e - s); else { push_onecapture(ms, news[i] - '1', s, e); + luaL_tolstring(L, -1, NULL); /* if number, convert it to string */ + lua_remove(L, -2); /* remove original value */ luaL_addvalue(b); /* add capture to accumulated result */ } } @@ -737,9 +781,9 @@ static int str_gsub (lua_State *L) { const char *src = luaL_checklstring(L, 1, &srcl); const char *p = luaL_checklstring(L, 2, &lp); int tr = lua_type(L, 3); - size_t max_s = luaL_optinteger(L, 4, srcl+1); + lua_Integer max_s = luaL_optinteger(L, 4, srcl + 1); int anchor = (*p == '^'); - size_t n = 0; + lua_Integer n = 0; MatchState ms; luaL_Buffer b; luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || @@ -749,17 +793,11 @@ static int str_gsub (lua_State *L) { if (anchor) { p++; lp--; /* skip anchor character */ } - ms.L = L; - ms.matchdepth = MAXCCALLS; - ms.src_init = src; - ms.src_end = src+srcl; - ms.p_end = p + lp; + prepstate(&ms, L, src, srcl, p, lp); while (n < max_s) { const char *e; - ms.level = 0; - lua_assert(ms.matchdepth == MAXCCALLS); - e = match(&ms, src, p); - if (e) { + reprepstate(&ms); + if ((e = match(&ms, src, p)) != NULL) { n++; add_value(&ms, &b, src, e, tr); } @@ -786,48 +824,102 @@ static int str_gsub (lua_State *L) { ** ======================================================= */ +#if !defined(lua_number2strx) /* { */ + /* -** LUA_INTFRMLEN is the length modifier for integer conversions in -** 'string.format'; LUA_INTFRM_T is the integer type corresponding to -** the previous length +** Hexadecimal floating-point formatter */ -#if !defined(LUA_INTFRMLEN) /* { */ -#if defined(LUA_USE_LONGLONG) -#define LUA_INTFRMLEN "ll" -#define LUA_INTFRM_T long long +#include <locale.h> +#include <math.h> -#else +#define SIZELENMOD (sizeof(LUA_NUMBER_FRMLEN)/sizeof(char)) -#define LUA_INTFRMLEN "l" -#define LUA_INTFRM_T long -#endif -#endif /* } */ +/* +** Number of bits that goes into the first digit. It can be any value +** between 1 and 4; the following definition tries to align the number +** to nibble boundaries by making what is left after that first digit a +** multiple of 4. +*/ +#define L_NBFD ((l_mathlim(MANT_DIG) - 1)%4 + 1) /* -** LUA_FLTFRMLEN is the length modifier for float conversions in -** 'string.format'; LUA_FLTFRM_T is the float type corresponding to -** the previous length +** Add integer part of 'x' to buffer and return new 'x' */ -#if !defined(LUA_FLTFRMLEN) +static lua_Number adddigit (char *buff, int n, lua_Number x) { + lua_Number dd = l_mathop(floor)(x); /* get integer part from 'x' */ + int d = (int)dd; + buff[n] = (d < 10 ? d + '0' : d - 10 + 'a'); /* add to buffer */ + return x - dd; /* return what is left */ +} -#define LUA_FLTFRMLEN "" -#define LUA_FLTFRM_T double -#endif +static int num2straux (char *buff, int sz, lua_Number x) { + if (x != x || x == HUGE_VAL || x == -HUGE_VAL) /* inf or NaN? */ + return l_sprintf(buff, sz, LUA_NUMBER_FMT, x); /* equal to '%g' */ + else if (x == 0) { /* can be -0... */ + /* create "0" or "-0" followed by exponent */ + return l_sprintf(buff, sz, LUA_NUMBER_FMT "x0p+0", x); + } + else { + int e; + lua_Number m = l_mathop(frexp)(x, &e); /* 'x' fraction and exponent */ + int n = 0; /* character count */ + if (m < 0) { /* is number negative? */ + buff[n++] = '-'; /* add signal */ + m = -m; /* make it positive */ + } + buff[n++] = '0'; buff[n++] = 'x'; /* add "0x" */ + m = adddigit(buff, n++, m * (1 << L_NBFD)); /* add first digit */ + e -= L_NBFD; /* this digit goes before the radix point */ + if (m > 0) { /* more digits? */ + buff[n++] = lua_getlocaledecpoint(); /* add radix point */ + do { /* add as many digits as needed */ + m = adddigit(buff, n++, m * 16); + } while (m > 0); + } + n += l_sprintf(buff + n, sz - n, "p%+d", e); /* add exponent */ + lua_assert(n < sz); + return n; + } +} + + +static int lua_number2strx (lua_State *L, char *buff, int sz, + const char *fmt, lua_Number x) { + int n = num2straux(buff, sz, x); + if (fmt[SIZELENMOD] == 'A') { + int i; + for (i = 0; i < n; i++) + buff[i] = toupper(uchar(buff[i])); + } + else if (fmt[SIZELENMOD] != 'a') + luaL_error(L, "modifiers for format '%%a'/'%%A' not implemented"); + return n; +} + +#endif /* } */ + + +/* +** Maximum size of each formatted item. This maximum size is produced +** by format('%.99f', -maxfloat), and is equal to 99 + 3 ('-', '.', +** and '\0') + number of decimal digits to represent maxfloat (which +** is maximum exponent + 1). (99+3+1 then rounded to 120 for "extra +** expenses", such as locale-dependent stuff) +*/ +#define MAX_ITEM (120 + l_mathlim(MAX_10_EXP)) -/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ -#define MAX_ITEM 512 /* valid flags in a format specification */ #define FLAGS "-+ #0" + /* -** maximum size of each format specification (such as '%-099.99d') -** (+10 accounts for %99.99x plus margin of error) +** maximum size of each format specification (such as "%-099.99d") */ -#define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10) +#define MAX_FORMAT 32 static void addquoted (lua_State *L, luaL_Buffer *b, int arg) { @@ -842,9 +934,9 @@ static void addquoted (lua_State *L, luaL_Buffer *b, int arg) { else if (*s == '\0' || iscntrl(uchar(*s))) { char buff[10]; if (!isdigit(uchar(*(s+1)))) - sprintf(buff, "\\%d", (int)uchar(*s)); + l_sprintf(buff, sizeof(buff), "\\%d", (int)uchar(*s)); else - sprintf(buff, "\\%03d", (int)uchar(*s)); + l_sprintf(buff, sizeof(buff), "\\%03d", (int)uchar(*s)); luaL_addstring(b, buff); } else @@ -869,8 +961,8 @@ static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { if (isdigit(uchar(*p))) luaL_error(L, "invalid format (width or precision too long)"); *(form++) = '%'; - memcpy(form, strfrmt, (p - strfrmt + 1) * sizeof(char)); - form += p - strfrmt + 1; + memcpy(form, strfrmt, ((p - strfrmt) + 1) * sizeof(char)); + form += (p - strfrmt) + 1; *form = '\0'; return p; } @@ -903,7 +995,7 @@ static int str_format (lua_State *L) { else if (*++strfrmt == L_ESC) luaL_addchar(&b, *strfrmt++); /* %% */ else { /* format item */ - char form[MAX_FORMAT]; /* to store the format (`%...') */ + char form[MAX_FORMAT]; /* to store the format ('%...') */ char *buff = luaL_prepbuffsize(&b, MAX_ITEM); /* to put formatted item */ int nb = 0; /* number of bytes in added item */ if (++arg > top) @@ -911,36 +1003,25 @@ static int str_format (lua_State *L) { strfrmt = scanformat(L, strfrmt, form); switch (*strfrmt++) { case 'c': { - nb = sprintf(buff, form, luaL_checkint(L, arg)); - break; - } - case 'd': case 'i': { - lua_Number n = luaL_checknumber(L, arg); - LUA_INTFRM_T ni = (LUA_INTFRM_T)n; - lua_Number diff = n - (lua_Number)ni; - luaL_argcheck(L, -1 < diff && diff < 1, arg, - "not a number in proper range"); - addlenmod(form, LUA_INTFRMLEN); - nb = sprintf(buff, form, ni); + nb = l_sprintf(buff, MAX_ITEM, form, (int)luaL_checkinteger(L, arg)); break; } + case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': { - lua_Number n = luaL_checknumber(L, arg); - unsigned LUA_INTFRM_T ni = (unsigned LUA_INTFRM_T)n; - lua_Number diff = n - (lua_Number)ni; - luaL_argcheck(L, -1 < diff && diff < 1, arg, - "not a non-negative number in proper range"); - addlenmod(form, LUA_INTFRMLEN); - nb = sprintf(buff, form, ni); + lua_Integer n = luaL_checkinteger(L, arg); + addlenmod(form, LUA_INTEGER_FRMLEN); + nb = l_sprintf(buff, MAX_ITEM, form, n); break; } - case 'e': case 'E': case 'f': -#if defined(LUA_USE_AFORMAT) case 'a': case 'A': -#endif + addlenmod(form, LUA_NUMBER_FRMLEN); + nb = lua_number2strx(L, buff, MAX_ITEM, form, + luaL_checknumber(L, arg)); + break; + case 'e': case 'E': case 'f': case 'g': case 'G': { - addlenmod(form, LUA_FLTFRMLEN); - nb = sprintf(buff, form, (LUA_FLTFRM_T)luaL_checknumber(L, arg)); + addlenmod(form, LUA_NUMBER_FRMLEN); + nb = l_sprintf(buff, MAX_ITEM, form, luaL_checknumber(L, arg)); break; } case 'q': { @@ -950,23 +1031,27 @@ static int str_format (lua_State *L) { case 's': { size_t l; const char *s = luaL_tolstring(L, arg, &l); - if (!strchr(form, '.') && l >= 100) { - /* no precision and string is too long to be formatted; - keep original string */ - luaL_addvalue(&b); - break; - } + if (form[2] == '\0') /* no modifiers? */ + luaL_addvalue(&b); /* keep entire string */ else { - nb = sprintf(buff, form, s); - lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ - break; + luaL_argcheck(L, l == strlen(s), arg, "string contains zeros"); + if (!strchr(form, '.') && l >= 100) { + /* no precision and string is too long to be formatted */ + luaL_addvalue(&b); /* keep entire string */ + } + else { /* format the string into 'buff' */ + nb = l_sprintf(buff, MAX_ITEM, form, s); + lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ + } } + break; } - default: { /* also treat cases `pnLlh' */ - return luaL_error(L, "invalid option " LUA_QL("%%%c") " to " - LUA_QL("format"), *(strfrmt - 1)); + default: { /* also treat cases 'pnLlh' */ + return luaL_error(L, "invalid option '%%%c' to 'format'", + *(strfrmt - 1)); } } + lua_assert(nb < MAX_ITEM); luaL_addsize(&b, nb); } } @@ -977,6 +1062,452 @@ static int str_format (lua_State *L) { /* }====================================================== */ +/* +** {====================================================== +** PACK/UNPACK +** ======================================================= +*/ + + +/* value used for padding */ +#if !defined(LUA_PACKPADBYTE) +#define LUA_PACKPADBYTE 0x00 +#endif + +/* maximum size for the binary representation of an integer */ +#define MAXINTSIZE 16 + +/* number of bits in a character */ +#define NB CHAR_BIT + +/* mask for one character (NB 1's) */ +#define MC ((1 << NB) - 1) + +/* size of a lua_Integer */ +#define SZINT ((int)sizeof(lua_Integer)) + + +/* dummy union to get native endianness */ +static const union { + int dummy; + char little; /* true iff machine is little endian */ +} nativeendian = {1}; + + +/* dummy structure to get native alignment requirements */ +struct cD { + char c; + union { double d; void *p; lua_Integer i; lua_Number n; } u; +}; + +#define MAXALIGN (offsetof(struct cD, u)) + + +/* +** Union for serializing floats +*/ +typedef union Ftypes { + float f; + double d; + lua_Number n; + char buff[5 * sizeof(lua_Number)]; /* enough for any float type */ +} Ftypes; + + +/* +** information to pack/unpack stuff +*/ +typedef struct Header { + lua_State *L; + int islittle; + int maxalign; +} Header; + + +/* +** options for pack/unpack +*/ +typedef enum KOption { + Kint, /* signed integers */ + Kuint, /* unsigned integers */ + Kfloat, /* floating-point numbers */ + Kchar, /* fixed-length strings */ + Kstring, /* strings with prefixed length */ + Kzstr, /* zero-terminated strings */ + Kpadding, /* padding */ + Kpaddalign, /* padding for alignment */ + Knop /* no-op (configuration or spaces) */ +} KOption; + + +/* +** Read an integer numeral from string 'fmt' or return 'df' if +** there is no numeral +*/ +static int digit (int c) { return '0' <= c && c <= '9'; } + +static int getnum (const char **fmt, int df) { + if (!digit(**fmt)) /* no number? */ + return df; /* return default value */ + else { + int a = 0; + do { + a = a*10 + (*((*fmt)++) - '0'); + } while (digit(**fmt) && a <= ((int)MAXSIZE - 9)/10); + return a; + } +} + + +/* +** Read an integer numeral and raises an error if it is larger +** than the maximum size for integers. +*/ +static int getnumlimit (Header *h, const char **fmt, int df) { + int sz = getnum(fmt, df); + if (sz > MAXINTSIZE || sz <= 0) + luaL_error(h->L, "integral size (%d) out of limits [1,%d]", + sz, MAXINTSIZE); + return sz; +} + + +/* +** Initialize Header +*/ +static void initheader (lua_State *L, Header *h) { + h->L = L; + h->islittle = nativeendian.little; + h->maxalign = 1; +} + + +/* +** Read and classify next option. 'size' is filled with option's size. +*/ +static KOption getoption (Header *h, const char **fmt, int *size) { + int opt = *((*fmt)++); + *size = 0; /* default */ + switch (opt) { + case 'b': *size = sizeof(char); return Kint; + case 'B': *size = sizeof(char); return Kuint; + case 'h': *size = sizeof(short); return Kint; + case 'H': *size = sizeof(short); return Kuint; + case 'l': *size = sizeof(long); return Kint; + case 'L': *size = sizeof(long); return Kuint; + case 'j': *size = sizeof(lua_Integer); return Kint; + case 'J': *size = sizeof(lua_Integer); return Kuint; + case 'T': *size = sizeof(size_t); return Kuint; + case 'f': *size = sizeof(float); return Kfloat; + case 'd': *size = sizeof(double); return Kfloat; + case 'n': *size = sizeof(lua_Number); return Kfloat; + case 'i': *size = getnumlimit(h, fmt, sizeof(int)); return Kint; + case 'I': *size = getnumlimit(h, fmt, sizeof(int)); return Kuint; + case 's': *size = getnumlimit(h, fmt, sizeof(size_t)); return Kstring; + case 'c': + *size = getnum(fmt, -1); + if (*size == -1) + luaL_error(h->L, "missing size for format option 'c'"); + return Kchar; + case 'z': return Kzstr; + case 'x': *size = 1; return Kpadding; + case 'X': return Kpaddalign; + case ' ': break; + case '<': h->islittle = 1; break; + case '>': h->islittle = 0; break; + case '=': h->islittle = nativeendian.little; break; + case '!': h->maxalign = getnumlimit(h, fmt, MAXALIGN); break; + default: luaL_error(h->L, "invalid format option '%c'", opt); + } + return Knop; +} + + +/* +** Read, classify, and fill other details about the next option. +** 'psize' is filled with option's size, 'notoalign' with its +** alignment requirements. +** Local variable 'size' gets the size to be aligned. (Kpadal option +** always gets its full alignment, other options are limited by +** the maximum alignment ('maxalign'). Kchar option needs no alignment +** despite its size. +*/ +static KOption getdetails (Header *h, size_t totalsize, + const char **fmt, int *psize, int *ntoalign) { + KOption opt = getoption(h, fmt, psize); + int align = *psize; /* usually, alignment follows size */ + if (opt == Kpaddalign) { /* 'X' gets alignment from following option */ + if (**fmt == '\0' || getoption(h, fmt, &align) == Kchar || align == 0) + luaL_argerror(h->L, 1, "invalid next option for option 'X'"); + } + if (align <= 1 || opt == Kchar) /* need no alignment? */ + *ntoalign = 0; + else { + if (align > h->maxalign) /* enforce maximum alignment */ + align = h->maxalign; + if ((align & (align - 1)) != 0) /* is 'align' not a power of 2? */ + luaL_argerror(h->L, 1, "format asks for alignment not power of 2"); + *ntoalign = (align - (int)(totalsize & (align - 1))) & (align - 1); + } + return opt; +} + + +/* +** Pack integer 'n' with 'size' bytes and 'islittle' endianness. +** The final 'if' handles the case when 'size' is larger than +** the size of a Lua integer, correcting the extra sign-extension +** bytes if necessary (by default they would be zeros). +*/ +static void packint (luaL_Buffer *b, lua_Unsigned n, + int islittle, int size, int neg) { + char *buff = luaL_prepbuffsize(b, size); + int i; + buff[islittle ? 0 : size - 1] = (char)(n & MC); /* first byte */ + for (i = 1; i < size; i++) { + n >>= NB; + buff[islittle ? i : size - 1 - i] = (char)(n & MC); + } + if (neg && size > SZINT) { /* negative number need sign extension? */ + for (i = SZINT; i < size; i++) /* correct extra bytes */ + buff[islittle ? i : size - 1 - i] = (char)MC; + } + luaL_addsize(b, size); /* add result to buffer */ +} + + +/* +** Copy 'size' bytes from 'src' to 'dest', correcting endianness if +** given 'islittle' is different from native endianness. +*/ +static void copywithendian (volatile char *dest, volatile const char *src, + int size, int islittle) { + if (islittle == nativeendian.little) { + while (size-- != 0) + *(dest++) = *(src++); + } + else { + dest += size - 1; + while (size-- != 0) + *(dest--) = *(src++); + } +} + + +static int str_pack (lua_State *L) { + luaL_Buffer b; + Header h; + const char *fmt = luaL_checkstring(L, 1); /* format string */ + int arg = 1; /* current argument to pack */ + size_t totalsize = 0; /* accumulate total size of result */ + initheader(L, &h); + lua_pushnil(L); /* mark to separate arguments from string buffer */ + luaL_buffinit(L, &b); + while (*fmt != '\0') { + int size, ntoalign; + KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); + totalsize += ntoalign + size; + while (ntoalign-- > 0) + luaL_addchar(&b, LUA_PACKPADBYTE); /* fill alignment */ + arg++; + switch (opt) { + case Kint: { /* signed integers */ + lua_Integer n = luaL_checkinteger(L, arg); + if (size < SZINT) { /* need overflow check? */ + lua_Integer lim = (lua_Integer)1 << ((size * NB) - 1); + luaL_argcheck(L, -lim <= n && n < lim, arg, "integer overflow"); + } + packint(&b, (lua_Unsigned)n, h.islittle, size, (n < 0)); + break; + } + case Kuint: { /* unsigned integers */ + lua_Integer n = luaL_checkinteger(L, arg); + if (size < SZINT) /* need overflow check? */ + luaL_argcheck(L, (lua_Unsigned)n < ((lua_Unsigned)1 << (size * NB)), + arg, "unsigned overflow"); + packint(&b, (lua_Unsigned)n, h.islittle, size, 0); + break; + } + case Kfloat: { /* floating-point options */ + volatile Ftypes u; + char *buff = luaL_prepbuffsize(&b, size); + lua_Number n = luaL_checknumber(L, arg); /* get argument */ + if (size == sizeof(u.f)) u.f = (float)n; /* copy it into 'u' */ + else if (size == sizeof(u.d)) u.d = (double)n; + else u.n = n; + /* move 'u' to final result, correcting endianness if needed */ + copywithendian(buff, u.buff, size, h.islittle); + luaL_addsize(&b, size); + break; + } + case Kchar: { /* fixed-size string */ + size_t len; + const char *s = luaL_checklstring(L, arg, &len); + if ((size_t)size <= len) /* string larger than (or equal to) needed? */ + luaL_addlstring(&b, s, size); /* truncate string to asked size */ + else { /* string smaller than needed */ + luaL_addlstring(&b, s, len); /* add it all */ + while (len++ < (size_t)size) /* pad extra space */ + luaL_addchar(&b, LUA_PACKPADBYTE); + } + break; + } + case Kstring: { /* strings with length count */ + size_t len; + const char *s = luaL_checklstring(L, arg, &len); + luaL_argcheck(L, size >= (int)sizeof(size_t) || + len < ((size_t)1 << (size * NB)), + arg, "string length does not fit in given size"); + packint(&b, (lua_Unsigned)len, h.islittle, size, 0); /* pack length */ + luaL_addlstring(&b, s, len); + totalsize += len; + break; + } + case Kzstr: { /* zero-terminated string */ + size_t len; + const char *s = luaL_checklstring(L, arg, &len); + luaL_argcheck(L, strlen(s) == len, arg, "string contains zeros"); + luaL_addlstring(&b, s, len); + luaL_addchar(&b, '\0'); /* add zero at the end */ + totalsize += len + 1; + break; + } + case Kpadding: luaL_addchar(&b, LUA_PACKPADBYTE); /* FALLTHROUGH */ + case Kpaddalign: case Knop: + arg--; /* undo increment */ + break; + } + } + luaL_pushresult(&b); + return 1; +} + + +static int str_packsize (lua_State *L) { + Header h; + const char *fmt = luaL_checkstring(L, 1); /* format string */ + size_t totalsize = 0; /* accumulate total size of result */ + initheader(L, &h); + while (*fmt != '\0') { + int size, ntoalign; + KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); + size += ntoalign; /* total space used by option */ + luaL_argcheck(L, totalsize <= MAXSIZE - size, 1, + "format result too large"); + totalsize += size; + switch (opt) { + case Kstring: /* strings with length count */ + case Kzstr: /* zero-terminated string */ + luaL_argerror(L, 1, "variable-length format"); + /* call never return, but to avoid warnings: *//* FALLTHROUGH */ + default: break; + } + } + lua_pushinteger(L, (lua_Integer)totalsize); + return 1; +} + + +/* +** Unpack an integer with 'size' bytes and 'islittle' endianness. +** If size is smaller than the size of a Lua integer and integer +** is signed, must do sign extension (propagating the sign to the +** higher bits); if size is larger than the size of a Lua integer, +** it must check the unread bytes to see whether they do not cause an +** overflow. +*/ +static lua_Integer unpackint (lua_State *L, const char *str, + int islittle, int size, int issigned) { + lua_Unsigned res = 0; + int i; + int limit = (size <= SZINT) ? size : SZINT; + for (i = limit - 1; i >= 0; i--) { + res <<= NB; + res |= (lua_Unsigned)(unsigned char)str[islittle ? i : size - 1 - i]; + } + if (size < SZINT) { /* real size smaller than lua_Integer? */ + if (issigned) { /* needs sign extension? */ + lua_Unsigned mask = (lua_Unsigned)1 << (size*NB - 1); + res = ((res ^ mask) - mask); /* do sign extension */ + } + } + else if (size > SZINT) { /* must check unread bytes */ + int mask = (!issigned || (lua_Integer)res >= 0) ? 0 : MC; + for (i = limit; i < size; i++) { + if ((unsigned char)str[islittle ? i : size - 1 - i] != mask) + luaL_error(L, "%d-byte integer does not fit into Lua Integer", size); + } + } + return (lua_Integer)res; +} + + +static int str_unpack (lua_State *L) { + Header h; + const char *fmt = luaL_checkstring(L, 1); + size_t ld; + const char *data = luaL_checklstring(L, 2, &ld); + size_t pos = (size_t)posrelat(luaL_optinteger(L, 3, 1), ld) - 1; + int n = 0; /* number of results */ + luaL_argcheck(L, pos <= ld, 3, "initial position out of string"); + initheader(L, &h); + while (*fmt != '\0') { + int size, ntoalign; + KOption opt = getdetails(&h, pos, &fmt, &size, &ntoalign); + if ((size_t)ntoalign + size > ~pos || pos + ntoalign + size > ld) + luaL_argerror(L, 2, "data string too short"); + pos += ntoalign; /* skip alignment */ + /* stack space for item + next position */ + luaL_checkstack(L, 2, "too many results"); + n++; + switch (opt) { + case Kint: + case Kuint: { + lua_Integer res = unpackint(L, data + pos, h.islittle, size, + (opt == Kint)); + lua_pushinteger(L, res); + break; + } + case Kfloat: { + volatile Ftypes u; + lua_Number num; + copywithendian(u.buff, data + pos, size, h.islittle); + if (size == sizeof(u.f)) num = (lua_Number)u.f; + else if (size == sizeof(u.d)) num = (lua_Number)u.d; + else num = u.n; + lua_pushnumber(L, num); + break; + } + case Kchar: { + lua_pushlstring(L, data + pos, size); + break; + } + case Kstring: { + size_t len = (size_t)unpackint(L, data + pos, h.islittle, size, 0); + luaL_argcheck(L, pos + len + size <= ld, 2, "data string too short"); + lua_pushlstring(L, data + pos + size, len); + pos += len; /* skip string */ + break; + } + case Kzstr: { + size_t len = (int)strlen(data + pos); + lua_pushlstring(L, data + pos, len); + pos += len + 1; /* skip string plus final '\0' */ + break; + } + case Kpaddalign: case Kpadding: case Knop: + n--; /* undo increment */ + break; + } + pos += size; + } + lua_pushinteger(L, pos + 1); /* next position */ + return n + 1; +} + +/* }====================================================== */ + + static const luaL_Reg strlib[] = { {"byte", str_byte}, {"char", str_char}, @@ -992,6 +1523,9 @@ static const luaL_Reg strlib[] = { {"reverse", str_reverse}, {"sub", str_sub}, {"upper", str_upper}, + {"pack", str_pack}, + {"packsize", str_packsize}, + {"unpack", str_unpack}, {NULL, NULL} }; |