Merge pull request #14557 from Budibase/fix/sql-use-ctes

SQL - use CTEs to avoid pagination issues with large tables
This commit is contained in:
Michael Drury 2024-09-11 16:05:01 +01:00 committed by GitHub
commit e1d750b987
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 94 additions and 122 deletions

View File

@ -42,7 +42,7 @@ services:
couchdb-service: couchdb-service:
container_name: budi-couchdb3-dev container_name: budi-couchdb3-dev
restart: on-failure restart: on-failure
image: budibase/couchdb:v3.3.3 image: budibase/couchdb:v3.3.3-sqs-v2.1.1
environment: environment:
- COUCHDB_PASSWORD=${COUCH_DB_PASSWORD} - COUCHDB_PASSWORD=${COUCH_DB_PASSWORD}
- COUCHDB_USER=${COUCH_DB_USER} - COUCHDB_USER=${COUCH_DB_USER}

View File

@ -76,7 +76,7 @@
"build:docker:single": "./scripts/build-single-image.sh", "build:docker:single": "./scripts/build-single-image.sh",
"build:docker:single:sqs": "./scripts/build-single-image-sqs.sh", "build:docker:single:sqs": "./scripts/build-single-image-sqs.sh",
"build:docker:dependencies": "docker build -f hosting/dependencies/Dockerfile -t budibase/dependencies:latest ./hosting", "build:docker:dependencies": "docker build -f hosting/dependencies/Dockerfile -t budibase/dependencies:latest ./hosting",
"publish:docker:couch": "docker buildx build --platform linux/arm64,linux/amd64 -f hosting/couchdb/Dockerfile -t budibase/couchdb:latest -t budibase/couchdb:v3.3.3 --push ./hosting/couchdb", "publish:docker:couch": "docker buildx build --platform linux/arm64,linux/amd64 -f hosting/couchdb/Dockerfile -t budibase/couchdb:latest -t budibase/couchdb:v3.3.3 -t budibase/couchdb:v3.3.3-sqs-v2.1.1 --push ./hosting/couchdb",
"publish:docker:dependencies": "docker buildx build --platform linux/arm64,linux/amd64 -f hosting/dependencies/Dockerfile -t budibase/dependencies:latest -t budibase/dependencies:v3.2.1 --push ./hosting", "publish:docker:dependencies": "docker buildx build --platform linux/arm64,linux/amd64 -f hosting/dependencies/Dockerfile -t budibase/dependencies:latest -t budibase/dependencies:v3.2.1 --push ./hosting",
"release:helm": "node scripts/releaseHelmChart", "release:helm": "node scripts/releaseHelmChart",
"env:multi:enable": "lerna run --stream env:multi:enable", "env:multi:enable": "lerna run --stream env:multi:enable",

View File

@ -862,6 +862,21 @@ class InternalBuilder {
return withSchema return withSchema
} }
private buildJsonField(field: string): string {
const parts = field.split(".")
let tableField: string, unaliased: string
if (parts.length > 1) {
const alias = parts.shift()!
unaliased = parts.join(".")
tableField = `${this.quote(alias)}.${this.quote(unaliased)}`
} else {
unaliased = parts.join(".")
tableField = this.quote(unaliased)
}
const separator = this.client === SqlClient.ORACLE ? " VALUE " : ","
return `'${unaliased}'${separator}${tableField}`
}
addJsonRelationships( addJsonRelationships(
query: Knex.QueryBuilder, query: Knex.QueryBuilder,
fromTable: string, fromTable: string,
@ -871,27 +886,6 @@ class InternalBuilder {
const knex = this.knex const knex = this.knex
const { resource, tableAliases: aliases, endpoint } = this.query const { resource, tableAliases: aliases, endpoint } = this.query
const fields = resource?.fields || [] const fields = resource?.fields || []
const jsonField = (field: string) => {
const parts = field.split(".")
let tableField: string, unaliased: string
if (parts.length > 1) {
const alias = parts.shift()!
unaliased = parts.join(".")
tableField = `${this.quote(alias)}.${this.quote(unaliased)}`
} else {
unaliased = parts.join(".")
tableField = this.quote(unaliased)
}
let separator = ","
switch (sqlClient) {
case SqlClient.ORACLE:
separator = " VALUE "
break
case SqlClient.MS_SQL:
separator = ":"
}
return `'${unaliased}'${separator}${tableField}`
}
for (let relationship of relationships) { for (let relationship of relationships) {
const { const {
tableName: toTable, tableName: toTable,
@ -921,7 +915,7 @@ class InternalBuilder {
) )
} }
const fieldList: string = relationshipFields const fieldList: string = relationshipFields
.map(field => jsonField(field)) .map(field => this.buildJsonField(field))
.join(",") .join(",")
// SQL Server uses TOP - which performs a little differently to the normal LIMIT syntax // SQL Server uses TOP - which performs a little differently to the normal LIMIT syntax
// it reduces the result set rather than limiting how much data it filters over // it reduces the result set rather than limiting how much data it filters over
@ -1073,43 +1067,6 @@ class InternalBuilder {
return query return query
} }
addRelationships(
query: Knex.QueryBuilder,
fromTable: string,
relationships: RelationshipsJson[]
): Knex.QueryBuilder {
const tableSets: Record<string, [RelationshipsJson]> = {}
// aggregate into table sets (all the same to tables)
for (let relationship of relationships) {
const keyObj: { toTable: string; throughTable: string | undefined } = {
toTable: relationship.tableName,
throughTable: undefined,
}
if (relationship.through) {
keyObj.throughTable = relationship.through
}
const key = JSON.stringify(keyObj)
if (tableSets[key]) {
tableSets[key].push(relationship)
} else {
tableSets[key] = [relationship]
}
}
for (let [key, relationships] of Object.entries(tableSets)) {
const { toTable, throughTable } = JSON.parse(key)
query = this.addJoin(
query,
{
from: fromTable,
to: toTable,
through: throughTable,
},
relationships
)
}
return query
}
qualifiedKnex(opts?: { alias?: string | boolean }): Knex.QueryBuilder { qualifiedKnex(opts?: { alias?: string | boolean }): Knex.QueryBuilder {
let alias = this.query.tableAliases?.[this.query.endpoint.entityId] let alias = this.query.tableAliases?.[this.query.endpoint.entityId]
if (opts?.alias === false) { if (opts?.alias === false) {
@ -1193,8 +1150,7 @@ class InternalBuilder {
if (!primary) { if (!primary) {
throw new Error("Primary key is required for upsert") throw new Error("Primary key is required for upsert")
} }
const ret = query.insert(parsedBody).onConflict(primary).merge() return query.insert(parsedBody).onConflict(primary).merge()
return ret
} else if ( } else if (
this.client === SqlClient.MS_SQL || this.client === SqlClient.MS_SQL ||
this.client === SqlClient.ORACLE this.client === SqlClient.ORACLE
@ -1253,12 +1209,29 @@ class InternalBuilder {
if (!counting) { if (!counting) {
query = this.addSorting(query) query = this.addSorting(query)
} }
// handle joins
if (relationships) {
query = this.addJsonRelationships(query, tableName, relationships)
}
return this.addFilters(query, filters, { relationship: true }) query = this.addFilters(query, filters, { relationship: true })
// handle relationships with a CTE for all others
if (relationships?.length) {
const mainTable =
this.query.tableAliases?.[this.query.endpoint.entityId] ||
this.query.endpoint.entityId
const cte = this.addSorting(
this.knex
.with("paginated", query)
.select(this.generateSelectStatement())
.from({
[mainTable]: "paginated",
})
)
// add JSON aggregations attached to the CTE
return this.addJsonRelationships(cte, tableName, relationships)
}
// no relationships found - return query
else {
return query
}
} }
update(opts: QueryOptions): Knex.QueryBuilder { update(opts: QueryOptions): Knex.QueryBuilder {

View File

@ -145,35 +145,32 @@ export function basicProcessing({
: typeof value === "string" : typeof value === "string"
? JSON.parse(value) ? JSON.parse(value)
: undefined : undefined
if (array) { if (array && Array.isArray(array)) {
thisRow[col] = array thisRow[col] = array
// make sure all of them have an _id // make sure all of them have an _id
if (Array.isArray(thisRow[col])) { const sortField = relatedTable.primaryDisplay || relatedTable.primary![0]!
const sortField = thisRow[col] = (thisRow[col] as Row[])
relatedTable.primaryDisplay || relatedTable.primary![0]! .map(relatedRow =>
thisRow[col] = (thisRow[col] as Row[]) basicProcessing({
.map(relatedRow => row: relatedRow,
basicProcessing({ table: relatedTable,
row: relatedRow, tables,
table: relatedTable, isLinked: false,
tables, sqs,
isLinked: false,
sqs,
})
)
.sort((a, b) => {
const aField = a?.[sortField],
bField = b?.[sortField]
if (!aField) {
return 1
} else if (!bField) {
return -1
}
return aField.localeCompare
? aField.localeCompare(bField)
: aField - bField
}) })
} )
.sort((a, b) => {
const aField = a?.[sortField],
bField = b?.[sortField]
if (!aField) {
return 1
} else if (!bField) {
return -1
}
return aField.localeCompare
? aField.localeCompare(bField)
: aField - bField
})
} }
} }
return fixJsonTypes(thisRow, table) return fixJsonTypes(thisRow, table)

View File

@ -832,12 +832,13 @@ describe.each(
}, },
}) })
expect(res).toHaveLength(1) expect(res).toHaveLength(1)
expect(res[0]).toEqual( expect(res[0]).toEqual({
expect.objectContaining({ id: 2,
id: 2, name: "two",
name: "two", // the use of table.* introduces the possibility of nulls being returned
}) birthday: null,
) number: null,
})
}) })
// this parameter really only impacts SQL queries // this parameter really only impacts SQL queries

View File

@ -161,16 +161,16 @@ describe("SQL query builder", () => {
it("should add the schema to the LEFT JOIN", () => { it("should add the schema to the LEFT JOIN", () => {
const query = sql._query(generateRelationshipJson({ schema: "production" })) const query = sql._query(generateRelationshipJson({ schema: "production" }))
expect(query).toEqual({ expect(query).toEqual({
bindings: [relationshipLimit, limit], bindings: [limit, relationshipLimit],
sql: `select "brands".*, (select json_agg(json_build_object('product_id',"products"."product_id",'product_name',"products"."product_name",'brand_id',"products"."brand_id")) from (select "products".* from "production"."products" as "products" where "products"."brand_id" = "brands"."brand_id" order by "products"."brand_id" asc limit $1) as "products") as "products" from "production"."brands" order by "test"."id" asc limit $2`, sql: `with "paginated" as (select "brands".* from "production"."brands" order by "test"."id" asc limit $1) select "brands".*, (select json_agg(json_build_object('product_id',"products"."product_id",'product_name',"products"."product_name",'brand_id',"products"."brand_id")) from (select "products".* from "production"."products" as "products" where "products"."brand_id" = "brands"."brand_id" order by "products"."brand_id" asc limit $2) as "products") as "products" from "paginated" as "brands" order by "test"."id" asc`,
}) })
}) })
it("should handle if the schema is not present when doing a LEFT JOIN", () => { it("should handle if the schema is not present when doing a LEFT JOIN", () => {
const query = sql._query(generateRelationshipJson()) const query = sql._query(generateRelationshipJson())
expect(query).toEqual({ expect(query).toEqual({
bindings: [relationshipLimit, limit], bindings: [limit, relationshipLimit],
sql: `select "brands".*, (select json_agg(json_build_object('product_id',"products"."product_id",'product_name',"products"."product_name",'brand_id',"products"."brand_id")) from (select "products".* from "products" as "products" where "products"."brand_id" = "brands"."brand_id" order by "products"."brand_id" asc limit $1) as "products") as "products" from "brands" order by "test"."id" asc limit $2`, sql: `with "paginated" as (select "brands".* from "brands" order by "test"."id" asc limit $1) select "brands".*, (select json_agg(json_build_object('product_id',"products"."product_id",'product_name',"products"."product_name",'brand_id',"products"."brand_id")) from (select "products".* from "products" as "products" where "products"."brand_id" = "brands"."brand_id" order by "products"."brand_id" asc limit $2) as "products") as "products" from "paginated" as "brands" order by "test"."id" asc`,
}) })
}) })
@ -179,8 +179,8 @@ describe("SQL query builder", () => {
generateManyRelationshipJson({ schema: "production" }) generateManyRelationshipJson({ schema: "production" })
) )
expect(query).toEqual({ expect(query).toEqual({
bindings: [relationshipLimit, limit], bindings: [limit, relationshipLimit],
sql: `select "stores".*, (select json_agg(json_build_object('product_id',"products"."product_id",'product_name',"products"."product_name")) from (select "products".* from "production"."products" as "products" inner join "production"."stocks" as "stocks" on "products"."product_id" = "stocks"."product_id" where "stocks"."store_id" = "stores"."store_id" order by "products"."product_id" asc limit $1) as "products") as "products" from "production"."stores" order by "test"."id" asc limit $2`, sql: `with "paginated" as (select "stores".* from "production"."stores" order by "test"."id" asc limit $1) select "stores".*, (select json_agg(json_build_object('product_id',"products"."product_id",'product_name',"products"."product_name")) from (select "products".* from "production"."products" as "products" inner join "production"."stocks" as "stocks" on "products"."product_id" = "stocks"."product_id" where "stocks"."store_id" = "stores"."store_id" order by "products"."product_id" asc limit $2) as "products") as "products" from "paginated" as "stores" order by "test"."id" asc`,
}) })
}) })

View File

@ -60,7 +60,7 @@ describe("Captures of real examples", () => {
queryJson queryJson
) )
expect(query).toEqual({ expect(query).toEqual({
bindings: [relationshipLimit, relationshipLimit, primaryLimit], bindings: [primaryLimit, relationshipLimit, relationshipLimit],
sql: expect.stringContaining( sql: expect.stringContaining(
multiline( multiline(
`select json_agg(json_build_object('executorid',"b"."executorid",'taskname',"b"."taskname",'taskid',"b"."taskid",'completed',"b"."completed",'qaid',"b"."qaid",'executorid',"b"."executorid",'taskname',"b"."taskname",'taskid',"b"."taskid",'completed',"b"."completed",'qaid',"b"."qaid")` `select json_agg(json_build_object('executorid',"b"."executorid",'taskname',"b"."taskname",'taskid',"b"."taskid",'completed',"b"."completed",'qaid',"b"."qaid",'executorid',"b"."executorid",'taskname',"b"."taskname",'taskid',"b"."taskid",'completed',"b"."completed",'qaid',"b"."qaid")`
@ -75,11 +75,11 @@ describe("Captures of real examples", () => {
queryJson queryJson
) )
expect(query).toEqual({ expect(query).toEqual({
bindings: [relationshipLimit, "assembling", primaryLimit], bindings: ["assembling", primaryLimit, relationshipLimit],
sql: expect.stringContaining( sql: expect.stringContaining(
multiline( multiline(
`where exists (select 1 from "tasks" as "b" inner join "products_tasks" as "c" on "b"."taskid" = "c"."taskid" `where exists (select 1 from "tasks" as "b" inner join "products_tasks" as "c" on "b"."taskid" = "c"."taskid" where "c"."productid" = "a"."productid"
where "c"."productid" = "a"."productid" and COALESCE("b"."taskname" = $2, FALSE)` and COALESCE("b"."taskname" = $1, FALSE)`
) )
), ),
}) })
@ -91,12 +91,13 @@ describe("Captures of real examples", () => {
queryJson queryJson
) )
expect(query).toEqual({ expect(query).toEqual({
bindings: [relationshipLimit, primaryLimit], bindings: [primaryLimit, relationshipLimit],
sql: expect.stringContaining( sql: expect.stringContaining(
multiline( multiline(
`select json_agg(json_build_object('executorid',"b"."executorid",'taskname',"b"."taskname",'taskid',"b"."taskid",'completed',"b"."completed",'qaid',"b"."qaid")) `with "paginated" as (select "a".* from "products" as "a" order by "a"."productname" asc nulls first, "a"."productid" asc limit $1)
from (select "b".* from "tasks" as "b" inner join "products_tasks" as "c" on "b"."taskid" = "c"."taskid" select "a".*, (select json_agg(json_build_object('executorid',"b"."executorid",'taskname',"b"."taskname",'taskid',"b"."taskid",'completed',"b"."completed",'qaid',"b"."qaid"))
where "c"."productid" = "a"."productid" order by "b"."taskid" asc limit $1` from (select "b".* from "tasks" as "b" inner join "products_tasks" as "c" on "b"."taskid" = "c"."taskid" where "c"."productid" = "a"."productid" order by "b"."taskid" asc limit $2) as "b") as "tasks"
from "paginated" as "a" order by "a"."productname" asc nulls first, "a"."productid" asc`
) )
), ),
}) })
@ -109,12 +110,12 @@ describe("Captures of real examples", () => {
queryJson queryJson
) )
expect(query).toEqual({ expect(query).toEqual({
bindings: [relationshipLimit, ...filters, relationshipLimit], bindings: [...filters, relationshipLimit, relationshipLimit],
sql: multiline( sql: multiline(
`select "a".*, (select json_agg(json_build_object('productname',"b"."productname",'productid',"b"."productid")) `with "paginated" as (select "a".* from "tasks" as "a" where "a"."taskid" in ($1, $2) order by "a"."taskid" asc limit $3)
select "a".*, (select json_agg(json_build_object('productname',"b"."productname",'productid',"b"."productid"))
from (select "b".* from "products" as "b" inner join "products_tasks" as "c" on "b"."productid" = "c"."productid" from (select "b".* from "products" as "b" inner join "products_tasks" as "c" on "b"."productid" = "c"."productid"
where "c"."taskid" = "a"."taskid" order by "b"."productid" asc limit $1) as "b") as "products" where "c"."taskid" = "a"."taskid" order by "b"."productid" asc limit $4) as "b") as "products" from "paginated" as "a" order by "a"."taskid" asc`
from "tasks" as "a" where "a"."taskid" in ($2, $3) order by "a"."taskid" asc limit $4`
), ),
}) })
}) })
@ -132,18 +133,18 @@ describe("Captures of real examples", () => {
expect(query).toEqual({ expect(query).toEqual({
bindings: [ bindings: [
relationshipLimit,
relationshipLimit,
relationshipLimit,
rangeValue.low, rangeValue.low,
rangeValue.high, rangeValue.high,
equalValue, equalValue,
notEqualsValue, notEqualsValue,
primaryLimit, primaryLimit,
relationshipLimit,
relationshipLimit,
relationshipLimit,
], ],
sql: expect.stringContaining( sql: expect.stringContaining(
multiline( multiline(
`where exists (select 1 from "persons" as "c" where "c"."personid" = "a"."executorid" and "c"."year" between $4 and $5)` `where exists (select 1 from "persons" as "c" where "c"."personid" = "a"."executorid" and "c"."year" between $1 and $2)`
) )
), ),
}) })