diff --git a/app/lua53/lapi.c b/app/lua53/lapi.c index 4619b14a..1b468b95 100644 --- a/app/lua53/lapi.c +++ b/app/lua53/lapi.c @@ -393,7 +393,7 @@ LUA_API const char *lua_tolstring (lua_State *L, int idx, size_t *len) { LUA_API size_t lua_rawlen (lua_State *L, int idx) { StkId o = index2addr(L, idx); switch (ttype(o)) { - case LUA_TSHRSTR: return getshrlen(tsvalue(o)); + case LUA_TSHRSTR: return getstrshrlen(tsvalue(o)); case LUA_TLNGSTR: return tsvalue(o)->u.lnglen; case LUA_TUSERDATA: return uvalue(o)->len; case LUA_TTBLRAM: return luaH_getn(hvalue(o)); diff --git a/app/lua53/ldump.c b/app/lua53/ldump.c index 90bbdfaa..578824fa 100644 --- a/app/lua53/ldump.c +++ b/app/lua53/ldump.c @@ -125,7 +125,8 @@ static void DumpString (const TString *s, DumpState *D) { if (s == NULL) { DumpByte(LUAU_TSSTRING + 0, D); } else { - lu_byte tt = (gettt(s) == LUA_TSHRSTR) ? LUAU_TSSTRING : LUAU_TLSTRING; + lu_byte tt = (gettt((struct GCObject *)s) == LUA_TSHRSTR) \ + ? LUAU_TSSTRING : LUAU_TLSTRING; size_t l = tsslen(s); const char *str = getstr(s); #ifdef LUA_USE_HOST @@ -314,7 +315,7 @@ static void addTS (TString *ts, DumpState *D) { return; if (ttisnil(luaH_getstr(D->stringIndex, ts))) { TValue k, v, *slot; - gettt(ts)<=LUA_TSHRSTR ? D->sTScnt++ : D->lTScnt++; + gettt((struct GCObject *)ts)<=LUA_TSHRSTR ? D->sTScnt++ : D->lTScnt++; setsvalue(L, &k, ts); setivalue(&v, D->sTScnt + D->lTScnt); slot = luaH_set(L, D->stringIndex, &k); diff --git a/app/lua53/lgc.h b/app/lua53/lgc.h index aa372527..c75c4bb8 100644 --- a/app/lua53/lgc.h +++ b/app/lua53/lgc.h @@ -85,23 +85,23 @@ #define WHITEBITS bit2mask(WHITE0BIT, WHITE1BIT) -#define iswhite(x) testbits(getmarked(x), WHITEBITS) -#define isblack(x) testbit(getmarked(x), BLACKBIT) +#define iswhite(x) testbits(getmarked((struct GCObject *)x), WHITEBITS) +#define isblack(x) testbit(getmarked((struct GCObject *)x), BLACKBIT) #define isgray(x) /* neither white nor black */ \ - (!testbits(getmarked(x), WHITEBITS | bitmask(BLACKBIT))) + (!testbits(getmarked((struct GCObject *)x), WHITEBITS | bitmask(BLACKBIT))) #define tofinalize(x) testbit(getmarked(x), FINALIZEDBIT) #define otherwhite(g) ((g)->currentwhite ^ WHITEBITS) #define isdeadm(ow,m) (!(((m) ^ WHITEBITS) & (ow))) -#define isdead(g,v) isdeadm(otherwhite(g), getmarked(v)) +#define isdead(g,v) isdeadm(otherwhite(g), getmarked((struct GCObject *)v)) #define changewhite(x) ((x)->marked ^= WHITEBITS) #define gray2black(x) l_setbit((x)->marked, BLACKBIT) #define luaC_white(g) cast(lu_byte, (g)->currentwhite & WHITEBITS) -#define isLFSobj(x) testbit(getmarked(x), LFSBIT) +#define isLFSobj(x) testbit(getmarked((struct GCObject *)x), LFSBIT) #define setLFSbit(x) l_setbit((x)->marked, LFSBIT) /* ** Does one step of collection when debt becomes positive. 'pre'/'pos' diff --git a/app/lua53/lobject.h b/app/lua53/lobject.h index 5441f81a..083fd2c9 100644 --- a/app/lua53/lobject.h +++ b/app/lua53/lobject.h @@ -70,27 +70,6 @@ /* mark a tag as collectable */ #define ctb(t) ((t) | BIT_ISCOLLECTABLE) -/* -** Byte field access macro. On ESP targets this causes the compiler to emit -** a l32i + extui instruction pair instead of a single l8ui avoiding a call -** the S/W unaligned exception handler. This is used to force aligned access -** to commonly accessed fields in Flash-based record structures. It is not -** needed for RAM-only structures. -** -** wo is the offset of aligned word in bytes 0,4,8,.. -** bo is the field within the word in bits 0..31 -*/ -#ifdef LUA_USE_ESP -#define GET_BYTE_FN(name,t,wo,bo) \ -static inline lu_int32 get ## name(const void *o) { \ - lu_int32 res; /* extract named field */ \ - asm ("l32i %0, %1, " #wo "; extui %0, %0, " #bo ", 8;" : "=r"(res) : "r"(o) : );\ - return res; } -#else -#define GET_BYTE_FN(name,t,wo,bo) \ -static inline lu_byte get ## name(const void *o) { return (cast(const t *,o))->name; } -#endif - /* ** Common type for all collectable objects */ @@ -110,8 +89,8 @@ typedef struct GCObject GCObject; struct GCObject { CommonHeader; }; -GET_BYTE_FN(tt,GCObject,4,0) -GET_BYTE_FN(marked,GCObject,4,8) +LUA_LOAD_BYTE_FN(gettt, struct GCObject, tt); +LUA_LOAD_BYTE_FN(getmarked, struct GCObject, marked); /* @@ -258,7 +237,7 @@ typedef struct lua_TValue { #define setsvalue(L,obj,x) \ { TValue *io = (obj); TString *x_ = (x); \ - val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt(x_))); \ + val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt((struct GCObject *)x_))); \ checkliveness(L,io); } #define setuvalue(L,obj,x) \ @@ -283,7 +262,7 @@ typedef struct lua_TValue { #define sethvalue(L,obj,x) \ { TValue *io = (obj); Table *x_ = (x); \ - val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt(x_))); \ + val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt((struct GCObject *)x_))); \ checkliveness(L,io); } #define setdeadvalue(obj) settt_(obj, LUA_TDEADKEY) @@ -344,8 +323,8 @@ typedef struct TString { struct TString *hnext; /* linked list for hash table */ } u; } TString; -GET_BYTE_FN(extra,TString,4,16) -GET_BYTE_FN(shrlen,TString,4,24) +LUA_LOAD_BYTE_FN(getstrextra, TString, extra) +LUA_LOAD_BYTE_FN(getstrshrlen, TString, shrlen) /* @@ -369,7 +348,8 @@ typedef union UTString { #define svalue(o) getstr(tsvalue(o)) /* get string length from 'TString *s' */ -#define tsslen(s) (gettt(s) == LUA_TSHRSTR ? getshrlen(s) : (s)->u.lnglen) +#define tsslen(s) \ + (gettt((struct GCObject *)s) == LUA_TSHRSTR ? getstrshrlen(s) : (s)->u.lnglen) /* get string length from 'TValue *o' */ #define vslen(o) tsslen(tsvalue(o)) @@ -463,9 +443,9 @@ typedef struct Proto { GCObject *gclist; } Proto; -GET_BYTE_FN(numparams,Proto,4,16) -GET_BYTE_FN(is_vararg,Proto,4,24) -GET_BYTE_FN(maxstacksize,Proto,8,0) +LUA_LOAD_BYTE_FN(getnumparams,Proto,numparams) +LUA_LOAD_BYTE_FN(getis_vararg,Proto,is_vararg) +LUA_LOAD_BYTE_FN(getmaxstacksize,Proto,maxstacksize) /* @@ -555,8 +535,8 @@ typedef struct Table { GCObject *gclist; } Table; -GET_BYTE_FN(flags,Table,4,16) -GET_BYTE_FN(lsizenode,Table,4,24) +LUA_LOAD_BYTE_FN(gettblflags,Table,flags) +LUA_LOAD_BYTE_FN(gettbllsizenode,Table,lsizenode) typedef const struct ROTable_entry { diff --git a/app/lua53/lstring.c b/app/lua53/lstring.c index b9500670..b5e7f3ff 100644 --- a/app/lua53/lstring.c +++ b/app/lua53/lstring.c @@ -38,7 +38,8 @@ */ int luaS_eqlngstr (TString *a, TString *b) { size_t len = a->u.lnglen; - lua_assert(gettt(a) == LUA_TLNGSTR && gettt(b) == LUA_TLNGSTR); + lua_assert(gettt((struct GCObject *)a) == LUA_TLNGSTR + && gettt((struct GCObject *)b) == LUA_TLNGSTR); return (a == b) || /* same instance or... */ ((len == b->u.lnglen) && /* equal length and ... */ (memcmp(getstr(a), getstr(b), len) == 0)); /* equal contents */ @@ -55,8 +56,8 @@ unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) { unsigned int luaS_hashlongstr (TString *ts) { - lua_assert(ts->tt == LUA_TLNGSTR); - if (getextra(ts) == 0) { /* no hash? */ + lua_assert(gettt((struct GCObject *)ts) == LUA_TLNGSTR); + if (getstrextra(ts) == 0) { /* no hash? */ ts->hash = luaS_hash(getstr(ts), ts->u.lnglen, ts->hash); ts->extra = 1; /* now it has its hash */ } @@ -162,7 +163,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) { TString **list = &g->strt.hash[lmod(h, g->strt.size)]; lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */ for (ts = *list; ts != NULL; ts = ts->u.hnext) { - if (l == getshrlen(ts) && + if (l == getstrshrlen(ts) && (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) { /* found! */ if (isdead(g, ts)) /* dead (but not collected yet)? */ @@ -178,7 +179,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) { for (ts = g->ROstrt.hash[lmod(h, g->ROstrt.size)]; ts != NULL; ts = ts->u.hnext) { - if (l == getshrlen(ts) && + if (l == getstrshrlen(ts) && memcmp(str, getstr(ts), l * sizeof(char)) == 0) { /* found in ROstrt! */ return ts; diff --git a/app/lua53/lstring.h b/app/lua53/lstring.h index b878b645..87cae080 100644 --- a/app/lua53/lstring.h +++ b/app/lua53/lstring.h @@ -24,13 +24,15 @@ /* ** test whether a string is a reserved word */ -#define isreserved(s) (gettt(s) == LUA_TSHRSTR && getextra(s) > 0) +#define isreserved(s) \ + (gettt((struct GCObject *)s) == LUA_TSHRSTR && getstrextra(s) > 0) /* ** equality for short strings, which are always internalized */ -#define eqshrstr(a,b) check_exp(gettt(a) == LUA_TSHRSTR, (a) == (b)) +#define eqshrstr(a,b) \ + check_exp(gettt((struct GCObject *)a) == LUA_TSHRSTR, (a) == (b)) LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed); diff --git a/app/lua53/ltable.c b/app/lua53/ltable.c index 58041b57..5778ffd7 100644 --- a/app/lua53/ltable.c +++ b/app/lua53/ltable.c @@ -554,7 +554,7 @@ const TValue *luaH_getshortstr (Table *t, TString *key) { if (isrotable(t)) return rotable_findentry((ROTable*) t, key, NULL); n = hashstr(t, key); - lua_assert(gettt(key) == LUA_TSHRSTR); + lua_assert(gettt((struct GCObject *)key) == LUA_TSHRSTR); for (;;) { /* check whether 'key' is somewhere in the chain */ const TValue *k = gkey(n); if (ttisshrstring(k) && eqshrstr(tsvalue(k), key)) @@ -592,7 +592,7 @@ static const TValue *getgeneric (Table *t, const TValue *key) { const TValue *luaH_getstr (Table *t, TString *key) { - if (gettt(key) == LUA_TSHRSTR) + if (gettt((struct GCObject *)key) == LUA_TSHRSTR) return luaH_getshortstr(t, key); else { /* for long strings, use generic case */ TValue ko; @@ -736,16 +736,16 @@ int luaH_isdummy (const Table *t) { return isdummy(t); } */ static const TValue* rotable_findentry(ROTable *t, TString *key, unsigned *ppos) { const ROTable_entry *e = cast(const ROTable_entry *, t->entry); - const int tl = getlsizenode(t); + const int tl = gettbllsizenode((struct Table *)t); const char *strkey = getstr(key); const int hash = HASH(t, key); KeyCache *cl = luaE_getcache(hash); int i, j = 1, l; - if (!e || gettt(key) != LUA_TSHRSTR) + if (!e || gettt((struct GCObject *)key) != LUA_TSHRSTR) return luaO_nilobject; - l = getshrlen(key); + l = getstrshrlen(key); /* scan the ROTable key cache and return if hit found */ for (i = 0; i < KEYCACHE_M; i++) { int cl_ndx = cl[i] >> NDX_SHFT; @@ -804,7 +804,7 @@ static const TValue* rotable_findentry(ROTable *t, TString *key, unsigned *ppos) static void rotable_next_helper(lua_State *L, ROTable *t, int pos, TValue *key, TValue *val) { const ROTable_entry *e = cast(const ROTable_entry *, t->entry); - if (pos < getlsizenode(t)) { + if (pos < gettbllsizenode((Table *)t)) { /* Found an entry */ setsvalue(L, key, luaS_new(L, e[pos].key)); setobj2s(L, val, &e[pos].value); @@ -817,7 +817,7 @@ static void rotable_next_helper(lua_State *L, ROTable *t, int pos, /* next (used for iteration) */ static void rotable_next(lua_State *L, ROTable *t, TValue *key, TValue *val) { - unsigned keypos = getlsizenode(t); + unsigned keypos = gettbllsizenode((struct Table *)t); /* Special case: if key is nil, return the first element of the rotable */ if (ttisnil(key)) diff --git a/app/lua53/ltable.h b/app/lua53/ltable.h index 0ca0dbfc..c0782f71 100644 --- a/app/lua53/ltable.h +++ b/app/lua53/ltable.h @@ -40,8 +40,8 @@ (gkey(cast(Node *, cast(char *, (v)) - offsetof(Node, i_val)))) /* test Table to determine if it is a RW or RO table */ -#define isrotable(t) (gettt(t)==LUA_TTBLROF) -#define isrwtable(t) (gettt(t)==LUA_TTBLRAM) +#define isrotable(t) (gettt((struct GCObject *)t)==LUA_TTBLROF) +#define isrwtable(t) (gettt((struct GCObject *)t)==LUA_TTBLRAM) LUAI_FUNC const TValue *luaH_getint (Table *t, lua_Integer key); diff --git a/app/lua53/ltm.h b/app/lua53/ltm.h index baa03a61..ffb86acc 100644 --- a/app/lua53/ltm.h +++ b/app/lua53/ltm.h @@ -46,7 +46,7 @@ typedef enum { #define gfasttm(g,et,e) ((et) == NULL ? NULL : \ - (getflags(et) & (1u<<(e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e])) + (gettblflags(et) & (1u<<(e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e])) #define fasttm(l,et,e) gfasttm(G(l), et, e) diff --git a/app/lua53/luaconf.h b/app/lua53/luaconf.h index c4b91d0d..4415a58f 100644 --- a/app/lua53/luaconf.h +++ b/app/lua53/luaconf.h @@ -204,6 +204,39 @@ #define LUA_QL(x) "'" x "'" // No longer used in lua53, but still used #define LUA_QS LUA_QL("%s") // in some of our apllication modules +/* =================================================================== */ + +/* +@@ LUA_LOAD_BYTE_FN is used to define macros for reading bytes from +** object headers. This can be used to speed up architectures which +** must resort to trap-and-emulate for sub-word memory accesses. +*/ + +#ifdef LUA_USE_ESP +/* +** Byte field access macro. On ESP targets this causes the compiler to emit +** a l32i + extui instruction pair instead of a single l8ui avoiding a call +** the S/W unaligned exception handler. This is used to force aligned access +** to commonly accessed fields in Flash-based record structures. It is not +** needed for RAM-only structures. +*/ +#define LUA_LOAD_BYTE_FN(fn, type, field) \ +static inline lu_int32 fn(const type *o) { \ + lu_int32 res; /* extract named field */ \ + asm ("l32i %0, %1, %2;" \ + "extui %0, %0, %3, 8;" \ + : "=r"(res) : "r"(o) \ + , "i"((offsetof(type, field)/4)*4) \ + , "i"((offsetof(type, field)%4)*8) \ + : );\ + return res; } +#endif + +#if !defined(LUA_LOAD_BYTE_FN) +#define LUA_LOAD_BYTE_FN(fn, type, field) \ + static inline lu_byte fn(const type *o) { return o->field; } +#endif + /* ** {================================================================== ** Other NodeMCU configuration. diff --git a/app/lua53/lvm.c b/app/lua53/lvm.c index c186e5e7..ff87aaf1 100644 --- a/app/lua53/lvm.c +++ b/app/lua53/lvm.c @@ -445,7 +445,7 @@ int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) { #define tostring(L,o) \ (ttisstring(o) || (cvt2str(o) && (luaO_tostring(L, o), 1))) -#define isemptystr(o) (ttisshrstring(o) && getshrlen(tsvalue(o)) == 0) +#define isemptystr(o) (ttisshrstring(o) && getstrshrlen(tsvalue(o)) == 0) /* copy strings in stack from top - n up to top - 1 to buffer */ static void copy2buff (StkId top, int n, char *buff) { @@ -516,7 +516,7 @@ void luaV_objlen (lua_State *L, StkId ra, const TValue *rb) { return; } case LUA_TSHRSTR: { - setivalue(ra, getshrlen(tsvalue(rb))); + setivalue(ra, getstrshrlen(tsvalue(rb))); return; } case LUA_TLNGSTR: { diff --git a/docs/lua53.md b/docs/lua53.md index 10886455..cf00bf70 100644 --- a/docs/lua53.md +++ b/docs/lua53.md @@ -142,7 +142,7 @@ Lua53 also reimplements the Lua51 LCD (Lua Compact Debug) patch. This replaces t By default the GCC compiler emits a `l8ui` instruction to access byte fields on the ESP8266 and ESP32 Xtensa processors. This instruction will generate an unaligned fetch exception when this byte field is in Flash memory (as will accessing short fields). These exceptions are handled by emulating the instruction in software using an unaligned access handler; this allows execution to continue albeit with the runtime cost of handling the exception in software. We wish to avoid the performance hit of executing this handler for such exceptions. -`lobject.h` now defines a `GET_BYTE_FN(name,t,wo,bo)` macro. In the case of host targets this macro generates the normal field access, but in the case of Xtensa targets uses of this macro define an `static inline` access function for each field. These functions at the default `-O2` optimisation level cause the code generator to emit a pair of `l32i.n` + `extui` instructions replacing the single `l8ui` instruction. This has the cost of an extra instruction execution for accessing RAM data, but also removes the 200+ clock overhead of the software exception handler in the case of flash memory accesses. +`luaconf.h` now defines a `LUA_LOAD_BYTE_FN(name,type,field)` macro. In the case of host targets this macro generates the normal field access, but in the case of Xtensa targets uses of this macro define an `static inline` access function for each field. These functions at the default `-O2` optimisation level cause the code generator to emit a pair of `l32i.n` + `extui` instructions replacing the single `l8ui` instruction. This has the cost of an extra instruction execution for accessing RAM data, but also removes the 200+ clock overhead of the software exception handler in the case of flash memory accesses. There are 9 byte fields in the `GCObject`,`TString`, `Proto`, `ROTable` structures that can either be statically compiled as `const struct` into library code space or generated by the Lua cross compiler and loaded into the LFS region; the `GET_BYTE_FN` macro is used to create inline access functions for these fields, and read references of the form `(o)->tt` (for example) have been recoded using the access macro form `gettt(o)`. There are 44 such changed access references in the source which together represent perhaps 99% of potential sources of this software exception within the Lua VM.