Switch GET_BYTE_FN to LUA_LOAD_BYTE_FN

This is intended to be a more portable abstraction: rather than
hard-coding offsets in the macro invocations, it relies on the existence
of __builtin_offset and its availability even within inline asm blocks.

While here, rename some of the functions involved to hopefully improve
clarity
This commit is contained in:
Nathaniel Wesley Filardo 2021-03-28 16:15:43 +01:00 committed by Nathaniel Wesley Filardo
parent 10e566babc
commit 565bbbfcbb
12 changed files with 78 additions and 61 deletions

View File

@ -393,7 +393,7 @@ LUA_API const char *lua_tolstring (lua_State *L, int idx, size_t *len) {
LUA_API size_t lua_rawlen (lua_State *L, int idx) {
StkId o = index2addr(L, idx);
switch (ttype(o)) {
case LUA_TSHRSTR: return getshrlen(tsvalue(o));
case LUA_TSHRSTR: return getstrshrlen(tsvalue(o));
case LUA_TLNGSTR: return tsvalue(o)->u.lnglen;
case LUA_TUSERDATA: return uvalue(o)->len;
case LUA_TTBLRAM: return luaH_getn(hvalue(o));

View File

@ -125,7 +125,8 @@ static void DumpString (const TString *s, DumpState *D) {
if (s == NULL) {
DumpByte(LUAU_TSSTRING + 0, D);
} else {
lu_byte tt = (gettt(s) == LUA_TSHRSTR) ? LUAU_TSSTRING : LUAU_TLSTRING;
lu_byte tt = (gettt((struct GCObject *)s) == LUA_TSHRSTR) \
? LUAU_TSSTRING : LUAU_TLSTRING;
size_t l = tsslen(s);
const char *str = getstr(s);
#ifdef LUA_USE_HOST
@ -314,7 +315,7 @@ static void addTS (TString *ts, DumpState *D) {
return;
if (ttisnil(luaH_getstr(D->stringIndex, ts))) {
TValue k, v, *slot;
gettt(ts)<=LUA_TSHRSTR ? D->sTScnt++ : D->lTScnt++;
gettt((struct GCObject *)ts)<=LUA_TSHRSTR ? D->sTScnt++ : D->lTScnt++;
setsvalue(L, &k, ts);
setivalue(&v, D->sTScnt + D->lTScnt);
slot = luaH_set(L, D->stringIndex, &k);

View File

@ -85,23 +85,23 @@
#define WHITEBITS bit2mask(WHITE0BIT, WHITE1BIT)
#define iswhite(x) testbits(getmarked(x), WHITEBITS)
#define isblack(x) testbit(getmarked(x), BLACKBIT)
#define iswhite(x) testbits(getmarked((struct GCObject *)x), WHITEBITS)
#define isblack(x) testbit(getmarked((struct GCObject *)x), BLACKBIT)
#define isgray(x) /* neither white nor black */ \
(!testbits(getmarked(x), WHITEBITS | bitmask(BLACKBIT)))
(!testbits(getmarked((struct GCObject *)x), WHITEBITS | bitmask(BLACKBIT)))
#define tofinalize(x) testbit(getmarked(x), FINALIZEDBIT)
#define otherwhite(g) ((g)->currentwhite ^ WHITEBITS)
#define isdeadm(ow,m) (!(((m) ^ WHITEBITS) & (ow)))
#define isdead(g,v) isdeadm(otherwhite(g), getmarked(v))
#define isdead(g,v) isdeadm(otherwhite(g), getmarked((struct GCObject *)v))
#define changewhite(x) ((x)->marked ^= WHITEBITS)
#define gray2black(x) l_setbit((x)->marked, BLACKBIT)
#define luaC_white(g) cast(lu_byte, (g)->currentwhite & WHITEBITS)
#define isLFSobj(x) testbit(getmarked(x), LFSBIT)
#define isLFSobj(x) testbit(getmarked((struct GCObject *)x), LFSBIT)
#define setLFSbit(x) l_setbit((x)->marked, LFSBIT)
/*
** Does one step of collection when debt becomes positive. 'pre'/'pos'

View File

@ -70,27 +70,6 @@
/* mark a tag as collectable */
#define ctb(t) ((t) | BIT_ISCOLLECTABLE)
/*
** Byte field access macro. On ESP targets this causes the compiler to emit
** a l32i + extui instruction pair instead of a single l8ui avoiding a call
** the S/W unaligned exception handler. This is used to force aligned access
** to commonly accessed fields in Flash-based record structures. It is not
** needed for RAM-only structures.
**
** wo is the offset of aligned word in bytes 0,4,8,..
** bo is the field within the word in bits 0..31
*/
#ifdef LUA_USE_ESP
#define GET_BYTE_FN(name,t,wo,bo) \
static inline lu_int32 get ## name(const void *o) { \
lu_int32 res; /* extract named field */ \
asm ("l32i %0, %1, " #wo "; extui %0, %0, " #bo ", 8;" : "=r"(res) : "r"(o) : );\
return res; }
#else
#define GET_BYTE_FN(name,t,wo,bo) \
static inline lu_byte get ## name(const void *o) { return (cast(const t *,o))->name; }
#endif
/*
** Common type for all collectable objects
*/
@ -110,8 +89,8 @@ typedef struct GCObject GCObject;
struct GCObject {
CommonHeader;
};
GET_BYTE_FN(tt,GCObject,4,0)
GET_BYTE_FN(marked,GCObject,4,8)
LUA_LOAD_BYTE_FN(gettt, struct GCObject, tt);
LUA_LOAD_BYTE_FN(getmarked, struct GCObject, marked);
/*
@ -258,7 +237,7 @@ typedef struct lua_TValue {
#define setsvalue(L,obj,x) \
{ TValue *io = (obj); TString *x_ = (x); \
val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt(x_))); \
val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt((struct GCObject *)x_))); \
checkliveness(L,io); }
#define setuvalue(L,obj,x) \
@ -283,7 +262,7 @@ typedef struct lua_TValue {
#define sethvalue(L,obj,x) \
{ TValue *io = (obj); Table *x_ = (x); \
val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt(x_))); \
val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt((struct GCObject *)x_))); \
checkliveness(L,io); }
#define setdeadvalue(obj) settt_(obj, LUA_TDEADKEY)
@ -344,8 +323,8 @@ typedef struct TString {
struct TString *hnext; /* linked list for hash table */
} u;
} TString;
GET_BYTE_FN(extra,TString,4,16)
GET_BYTE_FN(shrlen,TString,4,24)
LUA_LOAD_BYTE_FN(getstrextra, TString, extra)
LUA_LOAD_BYTE_FN(getstrshrlen, TString, shrlen)
/*
@ -369,7 +348,8 @@ typedef union UTString {
#define svalue(o) getstr(tsvalue(o))
/* get string length from 'TString *s' */
#define tsslen(s) (gettt(s) == LUA_TSHRSTR ? getshrlen(s) : (s)->u.lnglen)
#define tsslen(s) \
(gettt((struct GCObject *)s) == LUA_TSHRSTR ? getstrshrlen(s) : (s)->u.lnglen)
/* get string length from 'TValue *o' */
#define vslen(o) tsslen(tsvalue(o))
@ -463,9 +443,9 @@ typedef struct Proto {
GCObject *gclist;
} Proto;
GET_BYTE_FN(numparams,Proto,4,16)
GET_BYTE_FN(is_vararg,Proto,4,24)
GET_BYTE_FN(maxstacksize,Proto,8,0)
LUA_LOAD_BYTE_FN(getnumparams,Proto,numparams)
LUA_LOAD_BYTE_FN(getis_vararg,Proto,is_vararg)
LUA_LOAD_BYTE_FN(getmaxstacksize,Proto,maxstacksize)
/*
@ -555,8 +535,8 @@ typedef struct Table {
GCObject *gclist;
} Table;
GET_BYTE_FN(flags,Table,4,16)
GET_BYTE_FN(lsizenode,Table,4,24)
LUA_LOAD_BYTE_FN(gettblflags,Table,flags)
LUA_LOAD_BYTE_FN(gettbllsizenode,Table,lsizenode)
typedef const struct ROTable_entry {

View File

@ -38,7 +38,8 @@
*/
int luaS_eqlngstr (TString *a, TString *b) {
size_t len = a->u.lnglen;
lua_assert(gettt(a) == LUA_TLNGSTR && gettt(b) == LUA_TLNGSTR);
lua_assert(gettt((struct GCObject *)a) == LUA_TLNGSTR
&& gettt((struct GCObject *)b) == LUA_TLNGSTR);
return (a == b) || /* same instance or... */
((len == b->u.lnglen) && /* equal length and ... */
(memcmp(getstr(a), getstr(b), len) == 0)); /* equal contents */
@ -55,8 +56,8 @@ unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {
unsigned int luaS_hashlongstr (TString *ts) {
lua_assert(ts->tt == LUA_TLNGSTR);
if (getextra(ts) == 0) { /* no hash? */
lua_assert(gettt((struct GCObject *)ts) == LUA_TLNGSTR);
if (getstrextra(ts) == 0) { /* no hash? */
ts->hash = luaS_hash(getstr(ts), ts->u.lnglen, ts->hash);
ts->extra = 1; /* now it has its hash */
}
@ -162,7 +163,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
TString **list = &g->strt.hash[lmod(h, g->strt.size)];
lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */
for (ts = *list; ts != NULL; ts = ts->u.hnext) {
if (l == getshrlen(ts) &&
if (l == getstrshrlen(ts) &&
(memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
/* found! */
if (isdead(g, ts)) /* dead (but not collected yet)? */
@ -178,7 +179,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
for (ts = g->ROstrt.hash[lmod(h, g->ROstrt.size)];
ts != NULL;
ts = ts->u.hnext) {
if (l == getshrlen(ts) &&
if (l == getstrshrlen(ts) &&
memcmp(str, getstr(ts), l * sizeof(char)) == 0) {
/* found in ROstrt! */
return ts;

View File

@ -24,13 +24,15 @@
/*
** test whether a string is a reserved word
*/
#define isreserved(s) (gettt(s) == LUA_TSHRSTR && getextra(s) > 0)
#define isreserved(s) \
(gettt((struct GCObject *)s) == LUA_TSHRSTR && getstrextra(s) > 0)
/*
** equality for short strings, which are always internalized
*/
#define eqshrstr(a,b) check_exp(gettt(a) == LUA_TSHRSTR, (a) == (b))
#define eqshrstr(a,b) \
check_exp(gettt((struct GCObject *)a) == LUA_TSHRSTR, (a) == (b))
LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed);

View File

@ -554,7 +554,7 @@ const TValue *luaH_getshortstr (Table *t, TString *key) {
if (isrotable(t))
return rotable_findentry((ROTable*) t, key, NULL);
n = hashstr(t, key);
lua_assert(gettt(key) == LUA_TSHRSTR);
lua_assert(gettt((struct GCObject *)key) == LUA_TSHRSTR);
for (;;) { /* check whether 'key' is somewhere in the chain */
const TValue *k = gkey(n);
if (ttisshrstring(k) && eqshrstr(tsvalue(k), key))
@ -592,7 +592,7 @@ static const TValue *getgeneric (Table *t, const TValue *key) {
const TValue *luaH_getstr (Table *t, TString *key) {
if (gettt(key) == LUA_TSHRSTR)
if (gettt((struct GCObject *)key) == LUA_TSHRSTR)
return luaH_getshortstr(t, key);
else { /* for long strings, use generic case */
TValue ko;
@ -736,16 +736,16 @@ int luaH_isdummy (const Table *t) { return isdummy(t); }
*/
static const TValue* rotable_findentry(ROTable *t, TString *key, unsigned *ppos) {
const ROTable_entry *e = cast(const ROTable_entry *, t->entry);
const int tl = getlsizenode(t);
const int tl = gettbllsizenode((struct Table *)t);
const char *strkey = getstr(key);
const int hash = HASH(t, key);
KeyCache *cl = luaE_getcache(hash);
int i, j = 1, l;
if (!e || gettt(key) != LUA_TSHRSTR)
if (!e || gettt((struct GCObject *)key) != LUA_TSHRSTR)
return luaO_nilobject;
l = getshrlen(key);
l = getstrshrlen(key);
/* scan the ROTable key cache and return if hit found */
for (i = 0; i < KEYCACHE_M; i++) {
int cl_ndx = cl[i] >> NDX_SHFT;
@ -804,7 +804,7 @@ static const TValue* rotable_findentry(ROTable *t, TString *key, unsigned *ppos)
static void rotable_next_helper(lua_State *L, ROTable *t, int pos,
TValue *key, TValue *val) {
const ROTable_entry *e = cast(const ROTable_entry *, t->entry);
if (pos < getlsizenode(t)) {
if (pos < gettbllsizenode((Table *)t)) {
/* Found an entry */
setsvalue(L, key, luaS_new(L, e[pos].key));
setobj2s(L, val, &e[pos].value);
@ -817,7 +817,7 @@ static void rotable_next_helper(lua_State *L, ROTable *t, int pos,
/* next (used for iteration) */
static void rotable_next(lua_State *L, ROTable *t, TValue *key, TValue *val) {
unsigned keypos = getlsizenode(t);
unsigned keypos = gettbllsizenode((struct Table *)t);
/* Special case: if key is nil, return the first element of the rotable */
if (ttisnil(key))

View File

@ -40,8 +40,8 @@
(gkey(cast(Node *, cast(char *, (v)) - offsetof(Node, i_val))))
/* test Table to determine if it is a RW or RO table */
#define isrotable(t) (gettt(t)==LUA_TTBLROF)
#define isrwtable(t) (gettt(t)==LUA_TTBLRAM)
#define isrotable(t) (gettt((struct GCObject *)t)==LUA_TTBLROF)
#define isrwtable(t) (gettt((struct GCObject *)t)==LUA_TTBLRAM)
LUAI_FUNC const TValue *luaH_getint (Table *t, lua_Integer key);

View File

@ -46,7 +46,7 @@ typedef enum {
#define gfasttm(g,et,e) ((et) == NULL ? NULL : \
(getflags(et) & (1u<<(e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e]))
(gettblflags(et) & (1u<<(e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e]))
#define fasttm(l,et,e) gfasttm(G(l), et, e)

View File

@ -204,6 +204,39 @@
#define LUA_QL(x) "'" x "'" // No longer used in lua53, but still used
#define LUA_QS LUA_QL("%s") // in some of our apllication modules
/* =================================================================== */
/*
@@ LUA_LOAD_BYTE_FN is used to define macros for reading bytes from
** object headers. This can be used to speed up architectures which
** must resort to trap-and-emulate for sub-word memory accesses.
*/
#ifdef LUA_USE_ESP
/*
** Byte field access macro. On ESP targets this causes the compiler to emit
** a l32i + extui instruction pair instead of a single l8ui avoiding a call
** the S/W unaligned exception handler. This is used to force aligned access
** to commonly accessed fields in Flash-based record structures. It is not
** needed for RAM-only structures.
*/
#define LUA_LOAD_BYTE_FN(fn, type, field) \
static inline lu_int32 fn(const type *o) { \
lu_int32 res; /* extract named field */ \
asm ("l32i %0, %1, %2;" \
"extui %0, %0, %3, 8;" \
: "=r"(res) : "r"(o) \
, "i"((offsetof(type, field)/4)*4) \
, "i"((offsetof(type, field)%4)*8) \
: );\
return res; }
#endif
#if !defined(LUA_LOAD_BYTE_FN)
#define LUA_LOAD_BYTE_FN(fn, type, field) \
static inline lu_byte fn(const type *o) { return o->field; }
#endif
/*
** {==================================================================
** Other NodeMCU configuration.

View File

@ -445,7 +445,7 @@ int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
#define tostring(L,o) \
(ttisstring(o) || (cvt2str(o) && (luaO_tostring(L, o), 1)))
#define isemptystr(o) (ttisshrstring(o) && getshrlen(tsvalue(o)) == 0)
#define isemptystr(o) (ttisshrstring(o) && getstrshrlen(tsvalue(o)) == 0)
/* copy strings in stack from top - n up to top - 1 to buffer */
static void copy2buff (StkId top, int n, char *buff) {
@ -516,7 +516,7 @@ void luaV_objlen (lua_State *L, StkId ra, const TValue *rb) {
return;
}
case LUA_TSHRSTR: {
setivalue(ra, getshrlen(tsvalue(rb)));
setivalue(ra, getstrshrlen(tsvalue(rb)));
return;
}
case LUA_TLNGSTR: {

View File

@ -142,7 +142,7 @@ Lua53 also reimplements the Lua51 LCD (Lua Compact Debug) patch. This replaces t
By default the GCC compiler emits a `l8ui` instruction to access byte fields on the ESP8266 and ESP32 Xtensa processors. This instruction will generate an unaligned fetch exception when this byte field is in Flash memory (as will accessing short fields). These exceptions are handled by emulating the instruction in software using an unaligned access handler; this allows execution to continue albeit with the runtime cost of handling the exception in software. We wish to avoid the performance hit of executing this handler for such exceptions.
`lobject.h` now defines a `GET_BYTE_FN(name,t,wo,bo)` macro. In the case of host targets this macro generates the normal field access, but in the case of Xtensa targets uses of this macro define an `static inline` access function for each field. These functions at the default `-O2` optimisation level cause the code generator to emit a pair of `l32i.n` + `extui` instructions replacing the single `l8ui` instruction. This has the cost of an extra instruction execution for accessing RAM data, but also removes the 200+ clock overhead of the software exception handler in the case of flash memory accesses.
`luaconf.h` now defines a `LUA_LOAD_BYTE_FN(name,type,field)` macro. In the case of host targets this macro generates the normal field access, but in the case of Xtensa targets uses of this macro define an `static inline` access function for each field. These functions at the default `-O2` optimisation level cause the code generator to emit a pair of `l32i.n` + `extui` instructions replacing the single `l8ui` instruction. This has the cost of an extra instruction execution for accessing RAM data, but also removes the 200+ clock overhead of the software exception handler in the case of flash memory accesses.
There are 9 byte fields in the `GCObject`,`TString`, `Proto`, `ROTable` structures that can either be statically compiled as `const struct` into library code space or generated by the Lua cross compiler and loaded into the LFS region; the `GET_BYTE_FN` macro is used to create inline access functions for these fields, and read references of the form `(o)->tt` (for example) have been recoded using the access macro form `gettt(o)`. There are 44 such changed access references in the source which together represent perhaps 99% of potential sources of this software exception within the Lua VM.