Merge pull request #1335 from Budibase/lab-day/lucene

CouchDB 3.0 Searching
This commit is contained in:
Michael Drury 2021-03-26 16:55:30 +00:00 committed by GitHub
commit 91a781025d
16 changed files with 309 additions and 68 deletions

View File

@ -30,7 +30,6 @@ services:
- ./envoy.dev.yaml:/etc/envoy/envoy.yaml - ./envoy.dev.yaml:/etc/envoy/envoy.yaml
ports: ports:
- "${MAIN_PORT}:10000" - "${MAIN_PORT}:10000"
#- "9901:9901"
depends_on: depends_on:
- minio-service - minio-service
- couchdb-service - couchdb-service
@ -38,18 +37,17 @@ services:
couchdb-service: couchdb-service:
container_name: budi-couchdb-dev container_name: budi-couchdb-dev
restart: always restart: always
image: apache/couchdb:3.0 image: ibmcom/couchdb3
environment: environment:
- COUCHDB_PASSWORD=${COUCH_DB_PASSWORD} - COUCHDB_PASSWORD=${COUCH_DB_PASSWORD}
- COUCHDB_USER=${COUCH_DB_USER} - COUCHDB_USER=${COUCH_DB_USER}
ports: ports:
- "${COUCH_DB_PORT}:5984" - "${COUCH_DB_PORT}:5984"
#- "4369:4369"
#- "9100:9100"
volumes: volumes:
- couchdb_data:/opt/couchdb/data - couchdb3_data:/opt/couchdb/data
couch-init: couch-init:
container_name: budi-couchdb-init-dev
image: curlimages/curl image: curlimages/curl
environment: environment:
PUT_CALL: "curl -u ${COUCH_DB_USER}:${COUCH_DB_PASSWORD} -X PUT couchdb-service:5984" PUT_CALL: "curl -u ${COUCH_DB_USER}:${COUCH_DB_PASSWORD} -X PUT couchdb-service:5984"
@ -66,9 +64,8 @@ services:
volumes: volumes:
- redis_data:/data - redis_data:/data
volumes: volumes:
couchdb_data: couchdb3_data:
driver: local driver: local
minio_data: minio_data:
driver: local driver: local

View File

@ -71,7 +71,6 @@ services:
- ./envoy.yaml:/etc/envoy/envoy.yaml - ./envoy.yaml:/etc/envoy/envoy.yaml
ports: ports:
- "${MAIN_PORT}:10000" - "${MAIN_PORT}:10000"
#- "9901:9901"
depends_on: depends_on:
- minio-service - minio-service
- worker-service - worker-service
@ -80,16 +79,14 @@ services:
couchdb-service: couchdb-service:
restart: always restart: always
image: apache/couchdb:3.0 image: ibmcom/couchdb3
environment: environment:
- COUCHDB_PASSWORD=${COUCH_DB_PASSWORD} - COUCHDB_PASSWORD=${COUCH_DB_PASSWORD}
- COUCHDB_USER=${COUCH_DB_USER} - COUCHDB_USER=${COUCH_DB_USER}
ports: ports:
- "${COUCH_DB_PORT}:5984" - "${COUCH_DB_PORT}:5984"
#- "4369:4369"
#- "9100:9100"
volumes: volumes:
- couchdb_data:/opt/couchdb/data - couchdb3_data:/opt/couchdb/data
couch-init: couch-init:
image: curlimages/curl image: curlimages/curl
@ -108,7 +105,7 @@ services:
- redis_data:/data - redis_data:/data
volumes: volumes:
couchdb_data: couchdb3_data:
driver: local driver: local
minio_data: minio_data:
driver: local driver: local

View File

@ -21,14 +21,16 @@ export const fetchTableData = async tableId => {
* Perform a mango query against an internal table * Perform a mango query against an internal table
* @param {String} tableId - id of the table to search * @param {String} tableId - id of the table to search
* @param {Object} search - Mango Compliant search object * @param {Object} search - Mango Compliant search object
* @param {Object} pagination - the pagination controls
*/ */
export const searchTableData = async ({ tableId, search, pagination }) => { export const searchTableData = async ({ tableId, search, pagination }) => {
const rows = await API.post({ const output = await API.post({
url: `/api/${tableId}/rows/search`, url: `/api/${tableId}/rows/search`,
body: { body: {
query: search, query: search,
pagination, pagination,
}, },
}) })
return await enrichRows(rows, tableId) output.rows = await enrichRows(output.rows, tableId)
return output
} }

View File

@ -30,6 +30,17 @@ module.exports = async (url, opts) => {
}, },
404 404
) )
} else if (url.includes("_search")) {
return json({
rows: [
{
doc: {
_id: "test",
},
},
],
bookmark: "test",
})
} }
return fetch(url, opts) return fetch(url, opts)
} }

View File

@ -2,8 +2,11 @@ const CouchDB = require("../../db")
const env = require("../../environment") const env = require("../../environment")
const setBuilderToken = require("../../utilities/builder/setBuilderToken") const setBuilderToken = require("../../utilities/builder/setBuilderToken")
const packageJson = require("../../../package.json") const packageJson = require("../../../package.json")
const { createLinkView } = require("../../db/linkedRows") const {
const { createRoutingView } = require("../../utilities/routing") createLinkView,
createRoutingView,
createAllSearchIndex,
} = require("../../db/views/staticViews")
const { const {
getTemplateStream, getTemplateStream,
createApp, createApp,
@ -92,6 +95,7 @@ async function createInstance(template) {
// add view for linked rows // add view for linked rows
await createLinkView(appId) await createLinkView(appId)
await createRoutingView(appId) await createRoutingView(appId)
await createAllSearchIndex(appId)
// replicate the template data to the instance DB // replicate the template data to the instance DB
// this is currently very hard to test, downloading and importing template files // this is currently very hard to test, downloading and importing template files

View File

@ -17,6 +17,7 @@ const {
const { FieldTypes } = require("../../constants") const { FieldTypes } = require("../../constants")
const { isEqual } = require("lodash") const { isEqual } = require("lodash")
const { cloneDeep } = require("lodash/fp") const { cloneDeep } = require("lodash/fp")
const { QueryBuilder, search } = require("./search/utils")
const TABLE_VIEW_BEGINS_WITH = `all${SEPARATOR}${DocumentTypes.TABLE}${SEPARATOR}` const TABLE_VIEW_BEGINS_WITH = `all${SEPARATOR}${DocumentTypes.TABLE}${SEPARATOR}`
@ -259,39 +260,46 @@ exports.search = async function(ctx) {
const db = new CouchDB(appId) const db = new CouchDB(appId)
const { const {
query, query,
pagination: { pageSize = 10, page }, pagination: { pageSize = 10, bookmark },
} = ctx.request.body } = ctx.request.body
const tableId = ctx.params.tableId
// make all strings a starts with operation rather than pure equality const queryBuilder = new QueryBuilder(appId)
for (const [key, queryVal] of Object.entries(query)) { .setLimit(pageSize)
if (typeof queryVal === "string") { .addTable(tableId)
query[key] = { if (bookmark) {
$gt: queryVal, queryBuilder.setBookmark(bookmark)
$lt: `${queryVal}\uffff`,
}
}
} }
// pure equality for table let searchString
query.tableId = ctx.params.tableId if (ctx.query && ctx.query.raw && ctx.query.raw !== "") {
const response = await db.find({ searchString = queryBuilder.complete(query["RAW"])
selector: query, } else {
limit: pageSize, // make all strings a starts with operation rather than pure equality
skip: pageSize * page, for (const [key, queryVal] of Object.entries(query)) {
}) if (typeof queryVal === "string") {
queryBuilder.addString(key, queryVal)
} else {
queryBuilder.addEqual(key, queryVal)
}
}
searchString = queryBuilder.complete()
}
const rows = response.docs const response = await search(searchString)
// delete passwords from users // delete passwords from users
if (query.tableId === ViewNames.USERS) { if (tableId === ViewNames.USERS) {
for (let row of rows) { for (let row of response.rows) {
delete row.password delete row.password
} }
} }
const table = await db.get(ctx.params.tableId) const table = await db.get(tableId)
ctx.body = {
ctx.body = await outputProcessing(appId, table, rows) rows: await outputProcessing(appId, table, response.rows),
bookmark: response.bookmark,
}
} }
exports.fetchTableRows = async function(ctx) { exports.fetchTableRows = async function(ctx) {

View File

@ -0,0 +1,18 @@
const { QueryBuilder, buildSearchUrl, search } = require("./utils")
exports.rowSearch = async ctx => {
// this can't be done through pouch, have to reach for trusty node-fetch
const appId = ctx.user.appId
const bookmark = ctx.params.bookmark
let url
if (ctx.params.query) {
url = new QueryBuilder(appId, ctx.params.query, bookmark).complete()
} else if (ctx.params.raw) {
url = buildSearchUrl({
appId,
query: ctx.params.raw,
bookmark,
})
}
ctx.body = await search(url)
}

View File

@ -0,0 +1,137 @@
const { SearchIndexes } = require("../../../db/utils")
const { checkSlashesInUrl } = require("../../../utilities")
const env = require("../../../environment")
const fetch = require("node-fetch")
/**
* Given a set of inputs this will generate the URL which is to be sent to the search proxy in CouchDB.
* @param {string} appId The ID of the app which we will be searching within.
* @param {string} query The lucene query string which is to be used for searching.
* @param {string|null} bookmark If there were more than the limit specified can send the bookmark that was
* returned with query for next set of search results.
* @param {number} limit The number of entries to return per query.
* @param {boolean} excludeDocs By default full rows are returned, if required this can be disabled.
* @return {string} The URL which a GET can be performed on to receive results.
*/
function buildSearchUrl({ appId, query, bookmark, excludeDocs, limit = 50 }) {
let url = `${env.COUCH_DB_URL}/${appId}/_design/database/_search`
url += `/${SearchIndexes.ROWS}?q=${query}`
url += `&limit=${limit}`
if (!excludeDocs) {
url += "&include_docs=true"
}
if (bookmark) {
url += `&bookmark=${bookmark}`
}
return checkSlashesInUrl(url)
}
class QueryBuilder {
constructor(appId, base) {
this.appId = appId
this.query = {
string: {},
fuzzy: {},
range: {},
equal: {},
...base,
}
this.limit = 50
this.bookmark = null
}
setLimit(limit) {
this.limit = limit
return this
}
setBookmark(bookmark) {
this.bookmark = bookmark
return this
}
addString(key, partial) {
this.query.string[key] = partial
return this
}
addFuzzy(key, fuzzy) {
this.query.fuzzy[key] = fuzzy
return this
}
addRange(key, low, high) {
this.query.range = {
low,
high,
}
return this
}
addEqual(key, value) {
this.query.equal[key] = value
return this
}
addTable(tableId) {
this.query.equal.tableId = tableId
return this
}
complete(rawQuery = null) {
let output = ""
function build(structure, queryFn) {
for (let [key, value] of Object.entries(structure)) {
if (output.length !== 0) {
output += " AND "
}
output += queryFn(key, value)
}
}
if (this.query.string) {
build(this.query.string, (key, value) => `${key}:${value}*`)
}
if (this.query.range) {
build(
this.query.range,
(key, value) => `${key}:[${value.low} TO ${value.high}]`
)
}
if (this.query.fuzzy) {
build(this.query.fuzzy, (key, value) => `${key}:${value}~`)
}
if (this.query.equal) {
build(this.query.equal, (key, value) => `${key}:${value}`)
}
if (rawQuery) {
output = output.length === 0 ? rawQuery : `&${rawQuery}`
}
return buildSearchUrl({
appId: this.appId,
query: output,
bookmark: this.bookmark,
limit: this.limit,
})
}
}
exports.search = async query => {
const response = await fetch(query, {
method: "GET",
})
const json = await response.json()
let output = {
rows: [],
}
if (json.rows != null && json.rows.length > 0) {
output.rows = json.rows.map(row => row.doc)
}
if (json.bookmark) {
output.bookmark = json.bookmark
}
return output
}
exports.QueryBuilder = QueryBuilder
exports.buildSearchUrl = buildSearchUrl

View File

@ -2,6 +2,7 @@ require("svelte/register")
const send = require("koa-send") const send = require("koa-send")
const { resolve, join } = require("../../../utilities/centralPath") const { resolve, join } = require("../../../utilities/centralPath")
const { checkSlashesInUrl } = require("../../../utilities")
const fetch = require("node-fetch") const fetch = require("node-fetch")
const uuid = require("uuid") const uuid = require("uuid")
const { prepareUpload } = require("../deploy/utils") const { prepareUpload } = require("../deploy/utils")
@ -28,10 +29,7 @@ function objectStoreUrl() {
function internalObjectStoreUrl() { function internalObjectStoreUrl() {
if (env.SELF_HOSTED) { if (env.SELF_HOSTED) {
return (env.MINIO_URL + OBJ_STORE_DIRECTORY).replace( return checkSlashesInUrl(env.MINIO_URL + OBJ_STORE_DIRECTORY)
/(https?:\/\/)|(\/)+/g,
"$1$2"
)
} else { } else {
return BB_CDN return BB_CDN
} }

View File

@ -0,0 +1,8 @@
const Router = require("@koa/router")
const controller = require("../controllers/search")
const router = Router()
router.get("/api/search/rows", controller.rowSearch)
module.exports = router

View File

@ -2,6 +2,9 @@ const { outputProcessing } = require("../../../utilities/rowProcessor")
const setup = require("./utilities") const setup = require("./utilities")
const { basicRow } = setup.structures const { basicRow } = setup.structures
// mock the fetch for the search system
jest.mock("node-fetch")
describe("/rows", () => { describe("/rows", () => {
let request = setup.getRequest() let request = setup.getRequest()
let config = setup.getConfig() let config = setup.getConfig()
@ -303,25 +306,19 @@ describe("/rows", () => {
describe("search", () => { describe("search", () => {
it("should run a search on the table", async () => { it("should run a search on the table", async () => {
const row = await config.createRow()
// add another row that shouldn't be found
await config.createRow({
...basicRow(),
name: "Other Contact",
})
const res = await request const res = await request
.post(`/api/${table._id}/rows/search`) .post(`/api/${table._id}/rows/search`)
.send({ .send({
query: { query: {
name: "Test", name: "Test",
}, },
pagination: { pageSize: 25, page: 0 } pagination: { pageSize: 25 }
}) })
.set(config.defaultHeaders()) .set(config.defaultHeaders())
.expect('Content-Type', /json/) .expect('Content-Type', /json/)
.expect(200) .expect(200)
expect(res.body.length).toEqual(1) expect(res.body.rows.length).toEqual(1)
expect(res.body[0]._id).toEqual(row._id) expect(res.body.bookmark).toBeDefined()
}) })
}) })

View File

@ -27,7 +27,7 @@ const EventType = {
} }
exports.EventType = EventType exports.EventType = EventType
// re-export utils here for ease of use // re-export search here for ease of use
exports.IncludeDocs = IncludeDocs exports.IncludeDocs = IncludeDocs
exports.getLinkDocuments = getLinkDocuments exports.getLinkDocuments = getLinkDocuments
exports.createLinkView = createLinkView exports.createLinkView = createLinkView

View File

@ -37,11 +37,16 @@ const ViewNames = {
USERS: "ta_users", USERS: "ta_users",
} }
const SearchIndexes = {
ROWS: "rows",
}
exports.StaticDatabases = StaticDatabases exports.StaticDatabases = StaticDatabases
exports.ViewNames = ViewNames exports.ViewNames = ViewNames
exports.DocumentTypes = DocumentTypes exports.DocumentTypes = DocumentTypes
exports.SEPARATOR = SEPARATOR exports.SEPARATOR = SEPARATOR
exports.UNICODE_MAX = UNICODE_MAX exports.UNICODE_MAX = UNICODE_MAX
exports.SearchIndexes = SearchIndexes
exports.getQueryIndex = viewName => { exports.getQueryIndex = viewName => {
return `database/${viewName}` return `database/${viewName}`

View File

@ -1,5 +1,10 @@
const CouchDB = require("../index") const CouchDB = require("../index")
const { DocumentTypes, SEPARATOR, ViewNames } = require("../utils") const {
DocumentTypes,
SEPARATOR,
ViewNames,
SearchIndexes,
} = require("../utils")
const SCREEN_PREFIX = DocumentTypes.SCREEN + SEPARATOR const SCREEN_PREFIX = DocumentTypes.SCREEN + SEPARATOR
/************************************************** /**************************************************
@ -72,3 +77,42 @@ exports.createRoutingView = async appId => {
} }
await db.put(designDoc) await db.put(designDoc)
} }
async function searchIndex(appId, indexName, fnString) {
const db = new CouchDB(appId)
const designDoc = await db.get("_design/database")
designDoc.indexes = {
[indexName]: {
index: fnString,
},
}
await db.put(designDoc)
}
exports.createAllSearchIndex = async appId => {
await searchIndex(
appId,
SearchIndexes.ROWS,
function(doc) {
function idx(input, prev) {
for (let key of Object.keys(input)) {
const idxKey = prev != null ? `${prev}.${key}` : key
if (key === "_id" || key === "_rev") {
continue
}
if (typeof input[key] !== "object") {
// eslint-disable-next-line no-undef
index(idxKey, input[key], { store: true })
} else {
idx(input[key], idxKey)
}
}
}
if (doc._id.startsWith("ro_")) {
// eslint-disable-next-line no-undef
index("default", doc._id)
idx(doc)
}
}.toString()
)
}

View File

@ -106,3 +106,7 @@ exports.getAllApps = async () => {
.map(({ value }) => value) .map(({ value }) => value)
} }
} }
exports.checkSlashesInUrl = url => {
return url.replace(/(https?:\/\/)|(\/)+/g, "$1$2")
}

View File

@ -25,10 +25,11 @@
let tableDefinition let tableDefinition
let schema let schema
// pagination let nextBookmark = null
let page = 0 let bookmark = null
let lastBookmark = null
$: fetchData(table, page) $: fetchData(table, bookmark)
// omit empty strings // omit empty strings
$: parsedSearch = Object.keys(search).reduce( $: parsedSearch = Object.keys(search).reduce(
(acc, next) => (acc, next) =>
@ -38,33 +39,43 @@
$: actions = [ $: actions = [
{ {
type: ActionTypes.RefreshDatasource, type: ActionTypes.RefreshDatasource,
callback: () => fetchData(table, page), callback: () => fetchData(table, bookmark),
metadata: { datasource: { type: "table", tableId: table } }, metadata: { datasource: { type: "table", tableId: table } },
}, },
] ]
async function fetchData(table, page) { async function fetchData(table, mark) {
if (table) { if (table) {
const tableDef = await API.fetchTableDefinition(table) const tableDef = await API.fetchTableDefinition(table)
schema = tableDef.schema schema = tableDef.schema
rows = await API.searchTableData({ const output = await API.searchTableData({
tableId: table, tableId: table,
search: parsedSearch, search: parsedSearch,
pagination: { pagination: {
pageSize, pageSize,
page, bookmark: mark,
}, },
}) })
rows = output.rows
nextBookmark = output.bookmark
} }
loaded = true loaded = true
} }
function nextPage() { function nextPage() {
page += 1 lastBookmark = bookmark
bookmark = nextBookmark
} }
function previousPage() { function previousPage() {
page -= 1 nextBookmark = bookmark
if (lastBookmark !== bookmark) {
bookmark = lastBookmark
} else {
// special case for going back to beginning
bookmark = null
lastBookmark = null
}
} }
</script> </script>
@ -99,15 +110,15 @@
secondary secondary
on:click={() => { on:click={() => {
search = {} search = {}
page = 0 bookmark = null
}}> }}>
Reset Reset
</Button> </Button>
<Button <Button
primary primary
on:click={() => { on:click={() => {
page = 0 bookmark = null
fetchData(table, page) fetchData(table, bookmark)
}}> }}>
Search Search
</Button> </Button>
@ -129,10 +140,10 @@
{/if} {/if}
{/if} {/if}
<div class="pagination"> <div class="pagination">
{#if page > 0} {#if lastBookmark != null || bookmark != null}
<Button primary on:click={previousPage}>Back</Button> <Button primary on:click={previousPage}>Back</Button>
{/if} {/if}
{#if rows.length === pageSize} {#if nextBookmark != null && rows.length !== 0}
<Button primary on:click={nextPage}>Next</Button> <Button primary on:click={nextPage}>Next</Button>
{/if} {/if}
</div> </div>