This PR includes a change to pagination which makes sure if the 5000 max row limit is hit that pagination still kicks in. This means that you can eventually return all rows, although for very large tables you may hit rate limits (if you have thousands of rows related to each row in your table).

2024-07-31 16:21:49 +01:00 · 2024-07-31 16:21:49 +01:00 · 1d695be77c
parent a646fc5052
commit 1d695be77c
5 changed files with 104 additions and 141 deletions
--- a/packages/server/scripts/integrations/postgres/init.sql
+++ b/packages/server/scripts/integrations/postgres/init.sql
@ -1,92 +1,35 @@
-SELECT 'CREATE DATABASE main'
-WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'main')\gexec
-CREATE SCHEMA "test-1";
-CREATE TYPE person_job AS ENUM ('qa', 'programmer', 'designer', 'support');
-CREATE TABLE Persons (
-    PersonID SERIAL PRIMARY KEY,
-    LastName varchar(255),
-    FirstName varchar(255),
-    Address varchar(255),
-    City varchar(255) DEFAULT 'Belfast',
-    Age INTEGER DEFAULT 20 NOT NULL,
-    Year INTEGER,
-    Type person_job
+-- Create the first table
+CREATE TABLE first_table (
+    id SERIAL PRIMARY KEY,
+    name VARCHAR(255) NOT NULL,
+    description TEXT
 );
-CREATE TABLE Tasks (
-    TaskID SERIAL PRIMARY KEY,
-    ExecutorID INT,
-    QaID INT,
-    Completed BOOLEAN,
-    TaskName varchar(255),
-    CONSTRAINT fkexecutor
-        FOREIGN KEY(ExecutorID)
-            REFERENCES Persons(PersonID),
-    CONSTRAINT fkqa
-        FOREIGN KEY(QaID)
-	    REFERENCES Persons(PersonID)
-);
-CREATE TABLE Products (
-    ProductID SERIAL PRIMARY KEY,
-    ProductName varchar(255)
-);
-CREATE TABLE Products_Tasks (
-    ProductID INT NOT NULL,
-    TaskID INT NOT NULL,
-    CONSTRAINT fkProducts
-        FOREIGN KEY(ProductID)
-            REFERENCES Products(ProductID),
-    CONSTRAINT fkTasks
-        FOREIGN KEY(TaskID)
-            REFERENCES Tasks(TaskID),
-    PRIMARY KEY (ProductID, TaskID)
-);
-CREATE TABLE "test-1".table1 (
-  id SERIAL PRIMARY KEY,
-  Name varchar(255)
-);
-CREATE TABLE CompositeTable (
-  KeyPartOne varchar(128),
-  KeyPartTwo varchar(128),
-  Name varchar(255),
-  PRIMARY KEY (KeyPartOne, KeyPartTwo)
-);
-INSERT INTO Persons (FirstName, LastName, Address, City, Type, Year) VALUES ('Mike', 'Hughes', '123 Fake Street', 'Belfast', 'qa', 1999);
-INSERT INTO Persons (FirstName, LastName, Address, City, Type, Year) VALUES ('John', 'Smith', '64 Updown Road', 'Dublin', 'programmer', 1996);
-INSERT INTO Persons (FirstName, LastName, Address, City, Type, Age, Year) VALUES ('Foo', 'Bar', 'Foo Street', 'Bartown', 'support', 0, 1993);
-INSERT INTO Persons (FirstName, LastName, Address, City, Type) VALUES ('Jonny', 'Muffin', 'Muffin Street', 'Cork', 'support');
-INSERT INTO Persons (FirstName, LastName, Address, City, Type, Age, Year) VALUES ('Dave', 'Bar', '2 Foo Street', 'Bartown', 'support', 0, 1993);
-INSERT INTO Persons (FirstName, LastName, Address, City, Type, Age, Year) VALUES ('James', 'Bar', '3 Foo Street', 'Bartown', 'support', 0, 1993);
-INSERT INTO Persons (FirstName, LastName, Address, City, Type, Age, Year) VALUES ('Jenny', 'Bar', '4 Foo Street', 'Bartown', 'support', 0, 1993);
-INSERT INTO Persons (FirstName, LastName, Address, City, Type, Age, Year) VALUES ('Grace', 'Bar', '5 Foo Street', 'Bartown', 'support', 0, 1993);
-INSERT INTO Persons (FirstName, LastName, Address, City, Type, Age, Year) VALUES ('Sarah', 'Bar', '6 Foo Street', 'Bartown', 'support', 0, 1993);
-INSERT INTO Persons (FirstName, LastName, Address, City, Type, Age, Year) VALUES ('Kelly', 'Bar', '7 Foo Street', 'Bartown', 'support', 0, 1993);

-- insert a lot of tasks for testing
-WITH RECURSIVE generate_series AS (
-  SELECT 1 AS n
-  UNION ALL
-  SELECT n + 1 FROM generate_series WHERE n < 6000
-),
-random_data AS (
-  SELECT
-    n,
-    (random() * 9 + 1)::int AS ExecutorID,
-    (random() * 9 + 1)::int AS QaID,
-    'assembling' AS TaskName,
-    (random() < 0.5) AS Completed
-  FROM generate_series
-)
-INSERT INTO Tasks (ExecutorID, QaID, TaskName, Completed)
-SELECT ExecutorID, QaID, TaskName, Completed
-FROM random_data;
-INSERT INTO Products (ProductName) VALUES ('Computers');
-INSERT INTO Products (ProductName) VALUES ('Laptops');
-INSERT INTO Products (ProductName) VALUES ('Chairs');
-INSERT INTO Products_Tasks (ProductID, TaskID) VALUES (1, 1);
-INSERT INTO Products_Tasks (ProductID, TaskID) VALUES (2, 1);
-INSERT INTO Products_Tasks (ProductID, TaskID) VALUES (3, 1);
-INSERT INTO Products_Tasks (ProductID, TaskID) VALUES (1, 2);
-INSERT INTO "test-1".table1 (Name) VALUES ('Test');
-INSERT INTO CompositeTable (KeyPartOne, KeyPartTwo, Name) VALUES ('aaa', 'bbb', 'Michael');
-INSERT INTO CompositeTable (KeyPartOne, KeyPartTwo, Name) VALUES ('bbb', 'ccc', 'Andrew');
-INSERT INTO CompositeTable (KeyPartOne, KeyPartTwo, Name) VALUES ('ddd', '', 'OneKey');
+-- Create the second table
+CREATE TABLE second_table (
+    id SERIAL PRIMARY KEY,
+    first_table_id INT REFERENCES first_table(id),
+    data TEXT NOT NULL
+);
+
+-- Insert 50 rows into the first table
+DO
+$$
+BEGIN
+    FOR i IN 1..50 LOOP
+        INSERT INTO first_table (name, description)
+        VALUES ('Name ' || i, 'Description ' || i);
+    END LOOP;
+END
+$$;
+
+-- Insert 10,000 rows into the second table, all related to the first row in the first table
+DO
+$$
+BEGIN
+    FOR i IN 1..10000 LOOP
+        INSERT INTO second_table (first_table_id, data)
+        VALUES (1, 'Data ' || i);
+    END LOOP;
+END
+$$;
--- a/packages/server/src/api/controllers/row/ExternalRequest.ts
+++ b/packages/server/src/api/controllers/row/ExternalRequest.ts
@ -66,9 +66,14 @@ export interface RunConfig {
  includeSqlRelationships?: IncludeRelationship
 }

+export type ExternalReadRequestReturnType = {
+  rows: Row[]
+  rawResponseSize: number
+}
+
 export type ExternalRequestReturnType<T extends Operation> =
  T extends Operation.READ
-    ? Row[]
+    ? ExternalReadRequestReturnType
    : T extends Operation.COUNT
    ? number
    : { row: Row; table: Table }
@ -741,9 +746,11 @@ export class ExternalRequest<T extends Operation> {
    )
    // if reading it'll just be an array of rows, return whole thing
    if (operation === Operation.READ) {
-      return (
-        Array.isArray(output) ? output : [output]
-      ) as ExternalRequestReturnType<T>
+      const rows = Array.isArray(output) ? output : [output]
+      return {
+        rows,
+        rawResponseSize: responseRows.length,
+      } as ExternalRequestReturnType<T>
    } else {
      return { row: output[0], table } as ExternalRequestReturnType<T>
    }
--- a/packages/server/src/api/controllers/row/external.ts
+++ b/packages/server/src/api/controllers/row/external.ts
@ -136,7 +136,7 @@ export async function fetchEnrichedRow(ctx: UserCtx) {
    includeSqlRelationships: IncludeRelationship.INCLUDE,
  })
  const table: Table = tables[tableName]
-  const row = response[0]
+  const row = response.rows[0]
  // this seems like a lot of work, but basically we need to dig deeper for the enrich
  // for a single row, there is probably a better way to do this with some smart multi-layer joins
  for (let [fieldName, field] of Object.entries(table.schema)) {
--- a/packages/server/src/sdk/app/rows/search/external.ts
+++ b/packages/server/src/sdk/app/rows/search/external.ts
@ -23,6 +23,7 @@ import pick from "lodash/pick"
 import { outputProcessing } from "../../../../utilities/rowProcessor"
 import sdk from "../../../"
 import { isSearchingByRowID } from "./utils"
+import { ExternalReadRequestReturnType } from "../../../../api/controllers/row/ExternalRequest"

 function getPaginationAndLimitParameters(
  filters: SearchFilters,
@ -47,7 +48,7 @@ function getPaginationAndLimitParameters(
      limit: limit + 1,
    }
    if (bookmark) {
-      paginateObj.offset = limit * bookmark
+      paginateObj.offset = bookmark
    }
  } else if (limit) {
    paginateObj = {
@ -105,37 +106,42 @@ export async function search(
      paginate: paginateObj as PaginationJson,
      includeSqlRelationships: IncludeRelationship.INCLUDE,
    }
-    const queries: Promise<Row[] | number>[] = []
-    queries.push(handleRequest(Operation.READ, tableId, parameters))
+    const queries: [
+      Promise<ExternalReadRequestReturnType>,
+      Promise<number> | undefined
+    ] = [handleRequest(Operation.READ, tableId, parameters), undefined]
    if (countRows) {
-      queries.push(handleRequest(Operation.COUNT, tableId, parameters))
+      queries[1] = handleRequest(Operation.COUNT, tableId, parameters)
    }
    const responses = await Promise.all(queries)
-    let rows = responses[0] as Row[]
-    const totalRows =
-      responses.length > 1 ? (responses[1] as number) : undefined
+    let rows = responses[0].rows
+    const rawResponseSize = responses[0].rawResponseSize
+    const totalRows = responses.length > 1 ? responses[1] : undefined

-    let hasNextPage = false
-    // remove the extra row if it's there
-    if (paginate && limit && rows.length > limit) {
-      rows.pop()
-      hasNextPage = true
-    }
-
-    if (options.fields) {
-      const fields = [...options.fields, ...PROTECTED_EXTERNAL_COLUMNS]
-      rows = rows.map((r: any) => pick(r, fields))
-    }
-
-    rows = await outputProcessing<Row[]>(table, rows, {
+    let processed = await outputProcessing<Row[]>(table, rows, {
      preserveLinks: true,
      squash: true,
    })

+    let hasNextPage = false
+    // if the raw rows is greater than the limit then we likely need to paginate
+    if (paginate && limit && rawResponseSize > limit) {
+      hasNextPage = true
+      // processed rows has merged relationships down, this might not be more than limit
+      if (processed.length > limit) {
+        processed.pop()
+      }
+    }
+
+    if (options.fields) {
+      const fields = [...options.fields, ...PROTECTED_EXTERNAL_COLUMNS]
+      processed = processed.map((r: any) => pick(r, fields))
+    }
+
    // need wrapper object for bookmarks etc when paginating
-    const response: SearchResponse<Row> = { rows, hasNextPage }
+    const response: SearchResponse<Row> = { rows: processed, hasNextPage }
    if (hasNextPage && bookmark != null) {
-      response.bookmark = bookmark + 1
+      response.bookmark = processed.length
    }
    if (totalRows != null) {
      response.totalRows = totalRows
@ -255,24 +261,21 @@ export async function exportRows(
 }

 export async function fetch(tableId: string): Promise<Row[]> {
-  const response = await handleRequest<Operation.READ>(
-    Operation.READ,
-    tableId,
-    {
-      includeSqlRelationships: IncludeRelationship.INCLUDE,
-    }
-  )
+  const response = await handleRequest(Operation.READ, tableId, {
+    includeSqlRelationships: IncludeRelationship.INCLUDE,
+  })
  const table = await sdk.tables.getTable(tableId)
-  return await outputProcessing<Row[]>(table, response, {
+  return await outputProcessing<Row[]>(table, response.rows, {
    preserveLinks: true,
    squash: true,
  })
 }

 export async function fetchRaw(tableId: string): Promise<Row[]> {
-  return await handleRequest<Operation.READ>(Operation.READ, tableId, {
+  const response = await handleRequest(Operation.READ, tableId, {
    includeSqlRelationships: IncludeRelationship.INCLUDE,
  })
+  return response.rows
 }

 export async function fetchView(viewName: string) {
--- a/packages/server/src/sdk/app/rows/search/sqs.ts
+++ b/packages/server/src/sdk/app/rows/search/sqs.ts
@ -45,6 +45,7 @@ import { dataFilters, PROTECTED_INTERNAL_COLUMNS } from "@budibase/shared-core"
 import { isSearchingByRowID } from "./utils"

 const builder = new sql.Sql(SqlClient.SQL_LITE)
+const SQLITE_COLUMN_LIMIT = 2000
 const MISSING_COLUMN_REGEX = new RegExp(`no such column: .+`)
 const MISSING_TABLE_REGX = new RegExp(`no such table: .+`)
 const DUPLICATE_COLUMN_REGEX = new RegExp(`duplicate column name: .+`)
@ -55,12 +56,14 @@ function buildInternalFieldList(
  opts?: { relationships?: RelationshipsJson[] }
 ) {
  let fieldList: string[] = []
-  const addJunctionFields = (relatedTable: Table, fields: string[]) => {
+  const getJunctionFields = (relatedTable: Table, fields: string[]) => {
+    const junctionFields: string[] = []
    fields.forEach(field => {
-      fieldList.push(
+      junctionFields.push(
        `${generateJunctionTableID(table._id!, relatedTable._id!)}.${field}`
      )
    })
+    return junctionFields
  }
  fieldList = fieldList.concat(
    PROTECTED_INTERNAL_COLUMNS.map(col => `${table._id}.${col}`)
@ -70,18 +73,22 @@ function buildInternalFieldList(
    if (!opts?.relationships && isRelationship) {
      continue
    }
-    if (isRelationship) {
+    if (!isRelationship) {
+      fieldList.push(`${table._id}.${mapToUserColumn(col.name)}`)
+    } else {
      const linkCol = col as RelationshipFieldMetadata
      const relatedTable = tables.find(table => table._id === linkCol.tableId)
-      // no relationships provided, don't go more than a layer deep
-      if (relatedTable) {
-        fieldList = fieldList.concat(
-          buildInternalFieldList(relatedTable, tables)
-        )
-        addJunctionFields(relatedTable, ["doc1.fieldName", "doc2.fieldName"])
+      if (!relatedTable) {
+        continue
      }
-    } else {
-      fieldList.push(`${table._id}.${mapToUserColumn(col.name)}`)
+      const relatedFields = buildInternalFieldList(relatedTable, tables).concat(
+        getJunctionFields(relatedTable, ["doc1.fieldName", "doc2.fieldName"])
+      )
+      // break out of the loop if we have reached the max number of columns
+      if (relatedFields.length + fieldList.length > SQLITE_COLUMN_LIMIT) {
+        break
+      }
+      fieldList = fieldList.concat(relatedFields)
    }
  }
  return [...new Set(fieldList)]
@ -315,7 +322,7 @@ export async function search(
    paginate = true
    request.paginate = {
      limit: params.limit + 1,
-      offset: bookmark * params.limit,
+      offset: bookmark,
    }
  }

@ -345,10 +352,13 @@ export async function search(
    )

    // check for pagination final row
-    let nextRow: Row | undefined
+    let nextRow: boolean = false
    if (paginate && params.limit && rows.length > params.limit) {
      // remove the extra row that confirmed if there is another row to move to
-      nextRow = processed.pop()
+      nextRow = true
+      if (processed.length > params.limit) {
+        processed.pop()
+      }
    }

    // get the rows
@ -372,7 +382,7 @@ export async function search(
    // check for pagination
    if (paginate && nextRow) {
      response.hasNextPage = true
-      response.bookmark = bookmark + 1
+      response.bookmark = processed.length
    }
    if (paginate && !nextRow) {
      response.hasNextPage = false