Merge pull request #1335 from Budibase/lab-day/lucene

CouchDB 3.0 Searching
This commit is contained in:
Michael Drury 2021-03-26 16:55:30 +00:00 committed by GitHub
commit 6efea66505
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 309 additions and 68 deletions

View File

@ -30,7 +30,6 @@ services:
- ./envoy.dev.yaml:/etc/envoy/envoy.yaml
ports:
- "${MAIN_PORT}:10000"
#- "9901:9901"
depends_on:
- minio-service
- couchdb-service
@ -38,18 +37,17 @@ services:
couchdb-service:
container_name: budi-couchdb-dev
restart: always
image: apache/couchdb:3.0
image: ibmcom/couchdb3
environment:
- COUCHDB_PASSWORD=${COUCH_DB_PASSWORD}
- COUCHDB_USER=${COUCH_DB_USER}
ports:
- "${COUCH_DB_PORT}:5984"
#- "4369:4369"
#- "9100:9100"
volumes:
- couchdb_data:/opt/couchdb/data
- couchdb3_data:/opt/couchdb/data
couch-init:
container_name: budi-couchdb-init-dev
image: curlimages/curl
environment:
PUT_CALL: "curl -u ${COUCH_DB_USER}:${COUCH_DB_PASSWORD} -X PUT couchdb-service:5984"
@ -66,9 +64,8 @@ services:
volumes:
- redis_data:/data
volumes:
couchdb_data:
couchdb3_data:
driver: local
minio_data:
driver: local

View File

@ -71,7 +71,6 @@ services:
- ./envoy.yaml:/etc/envoy/envoy.yaml
ports:
- "${MAIN_PORT}:10000"
#- "9901:9901"
depends_on:
- minio-service
- worker-service
@ -80,16 +79,14 @@ services:
couchdb-service:
restart: always
image: apache/couchdb:3.0
image: ibmcom/couchdb3
environment:
- COUCHDB_PASSWORD=${COUCH_DB_PASSWORD}
- COUCHDB_USER=${COUCH_DB_USER}
ports:
- "${COUCH_DB_PORT}:5984"
#- "4369:4369"
#- "9100:9100"
volumes:
- couchdb_data:/opt/couchdb/data
- couchdb3_data:/opt/couchdb/data
couch-init:
image: curlimages/curl
@ -108,7 +105,7 @@ services:
- redis_data:/data
volumes:
couchdb_data:
couchdb3_data:
driver: local
minio_data:
driver: local

View File

@ -21,14 +21,16 @@ export const fetchTableData = async tableId => {
* Perform a mango query against an internal table
* @param {String} tableId - id of the table to search
* @param {Object} search - Mango Compliant search object
* @param {Object} pagination - the pagination controls
*/
export const searchTableData = async ({ tableId, search, pagination }) => {
const rows = await API.post({
const output = await API.post({
url: `/api/${tableId}/rows/search`,
body: {
query: search,
pagination,
},
})
return await enrichRows(rows, tableId)
output.rows = await enrichRows(output.rows, tableId)
return output
}

View File

@ -30,6 +30,17 @@ module.exports = async (url, opts) => {
},
404
)
} else if (url.includes("_search")) {
return json({
rows: [
{
doc: {
_id: "test",
},
},
],
bookmark: "test",
})
}
return fetch(url, opts)
}

View File

@ -2,8 +2,11 @@ const CouchDB = require("../../db")
const env = require("../../environment")
const setBuilderToken = require("../../utilities/builder/setBuilderToken")
const packageJson = require("../../../package.json")
const { createLinkView } = require("../../db/linkedRows")
const { createRoutingView } = require("../../utilities/routing")
const {
createLinkView,
createRoutingView,
createAllSearchIndex,
} = require("../../db/views/staticViews")
const {
getTemplateStream,
createApp,
@ -92,6 +95,7 @@ async function createInstance(template) {
// add view for linked rows
await createLinkView(appId)
await createRoutingView(appId)
await createAllSearchIndex(appId)
// replicate the template data to the instance DB
// this is currently very hard to test, downloading and importing template files

View File

@ -17,6 +17,7 @@ const {
const { FieldTypes } = require("../../constants")
const { isEqual } = require("lodash")
const { cloneDeep } = require("lodash/fp")
const { QueryBuilder, search } = require("./search/utils")
const TABLE_VIEW_BEGINS_WITH = `all${SEPARATOR}${DocumentTypes.TABLE}${SEPARATOR}`
@ -259,39 +260,46 @@ exports.search = async function(ctx) {
const db = new CouchDB(appId)
const {
query,
pagination: { pageSize = 10, page },
pagination: { pageSize = 10, bookmark },
} = ctx.request.body
const tableId = ctx.params.tableId
// make all strings a starts with operation rather than pure equality
for (const [key, queryVal] of Object.entries(query)) {
if (typeof queryVal === "string") {
query[key] = {
$gt: queryVal,
$lt: `${queryVal}\uffff`,
}
}
const queryBuilder = new QueryBuilder(appId)
.setLimit(pageSize)
.addTable(tableId)
if (bookmark) {
queryBuilder.setBookmark(bookmark)
}
// pure equality for table
query.tableId = ctx.params.tableId
const response = await db.find({
selector: query,
limit: pageSize,
skip: pageSize * page,
})
let searchString
if (ctx.query && ctx.query.raw && ctx.query.raw !== "") {
searchString = queryBuilder.complete(query["RAW"])
} else {
// make all strings a starts with operation rather than pure equality
for (const [key, queryVal] of Object.entries(query)) {
if (typeof queryVal === "string") {
queryBuilder.addString(key, queryVal)
} else {
queryBuilder.addEqual(key, queryVal)
}
}
searchString = queryBuilder.complete()
}
const rows = response.docs
const response = await search(searchString)
// delete passwords from users
if (query.tableId === ViewNames.USERS) {
for (let row of rows) {
if (tableId === ViewNames.USERS) {
for (let row of response.rows) {
delete row.password
}
}
const table = await db.get(ctx.params.tableId)
ctx.body = await outputProcessing(appId, table, rows)
const table = await db.get(tableId)
ctx.body = {
rows: await outputProcessing(appId, table, response.rows),
bookmark: response.bookmark,
}
}
exports.fetchTableRows = async function(ctx) {

View File

@ -0,0 +1,18 @@
const { QueryBuilder, buildSearchUrl, search } = require("./utils")
exports.rowSearch = async ctx => {
// this can't be done through pouch, have to reach for trusty node-fetch
const appId = ctx.user.appId
const bookmark = ctx.params.bookmark
let url
if (ctx.params.query) {
url = new QueryBuilder(appId, ctx.params.query, bookmark).complete()
} else if (ctx.params.raw) {
url = buildSearchUrl({
appId,
query: ctx.params.raw,
bookmark,
})
}
ctx.body = await search(url)
}

View File

@ -0,0 +1,137 @@
const { SearchIndexes } = require("../../../db/utils")
const { checkSlashesInUrl } = require("../../../utilities")
const env = require("../../../environment")
const fetch = require("node-fetch")
/**
* Given a set of inputs this will generate the URL which is to be sent to the search proxy in CouchDB.
* @param {string} appId The ID of the app which we will be searching within.
* @param {string} query The lucene query string which is to be used for searching.
* @param {string|null} bookmark If there were more than the limit specified can send the bookmark that was
* returned with query for next set of search results.
* @param {number} limit The number of entries to return per query.
* @param {boolean} excludeDocs By default full rows are returned, if required this can be disabled.
* @return {string} The URL which a GET can be performed on to receive results.
*/
function buildSearchUrl({ appId, query, bookmark, excludeDocs, limit = 50 }) {
let url = `${env.COUCH_DB_URL}/${appId}/_design/database/_search`
url += `/${SearchIndexes.ROWS}?q=${query}`
url += `&limit=${limit}`
if (!excludeDocs) {
url += "&include_docs=true"
}
if (bookmark) {
url += `&bookmark=${bookmark}`
}
return checkSlashesInUrl(url)
}
class QueryBuilder {
constructor(appId, base) {
this.appId = appId
this.query = {
string: {},
fuzzy: {},
range: {},
equal: {},
...base,
}
this.limit = 50
this.bookmark = null
}
setLimit(limit) {
this.limit = limit
return this
}
setBookmark(bookmark) {
this.bookmark = bookmark
return this
}
addString(key, partial) {
this.query.string[key] = partial
return this
}
addFuzzy(key, fuzzy) {
this.query.fuzzy[key] = fuzzy
return this
}
addRange(key, low, high) {
this.query.range = {
low,
high,
}
return this
}
addEqual(key, value) {
this.query.equal[key] = value
return this
}
addTable(tableId) {
this.query.equal.tableId = tableId
return this
}
complete(rawQuery = null) {
let output = ""
function build(structure, queryFn) {
for (let [key, value] of Object.entries(structure)) {
if (output.length !== 0) {
output += " AND "
}
output += queryFn(key, value)
}
}
if (this.query.string) {
build(this.query.string, (key, value) => `${key}:${value}*`)
}
if (this.query.range) {
build(
this.query.range,
(key, value) => `${key}:[${value.low} TO ${value.high}]`
)
}
if (this.query.fuzzy) {
build(this.query.fuzzy, (key, value) => `${key}:${value}~`)
}
if (this.query.equal) {
build(this.query.equal, (key, value) => `${key}:${value}`)
}
if (rawQuery) {
output = output.length === 0 ? rawQuery : `&${rawQuery}`
}
return buildSearchUrl({
appId: this.appId,
query: output,
bookmark: this.bookmark,
limit: this.limit,
})
}
}
exports.search = async query => {
const response = await fetch(query, {
method: "GET",
})
const json = await response.json()
let output = {
rows: [],
}
if (json.rows != null && json.rows.length > 0) {
output.rows = json.rows.map(row => row.doc)
}
if (json.bookmark) {
output.bookmark = json.bookmark
}
return output
}
exports.QueryBuilder = QueryBuilder
exports.buildSearchUrl = buildSearchUrl

View File

@ -2,6 +2,7 @@ require("svelte/register")
const send = require("koa-send")
const { resolve, join } = require("../../../utilities/centralPath")
const { checkSlashesInUrl } = require("../../../utilities")
const fetch = require("node-fetch")
const uuid = require("uuid")
const { prepareUpload } = require("../deploy/utils")
@ -28,10 +29,7 @@ function objectStoreUrl() {
function internalObjectStoreUrl() {
if (env.SELF_HOSTED) {
return (env.MINIO_URL + OBJ_STORE_DIRECTORY).replace(
/(https?:\/\/)|(\/)+/g,
"$1$2"
)
return checkSlashesInUrl(env.MINIO_URL + OBJ_STORE_DIRECTORY)
} else {
return BB_CDN
}

View File

@ -0,0 +1,8 @@
const Router = require("@koa/router")
const controller = require("../controllers/search")
const router = Router()
router.get("/api/search/rows", controller.rowSearch)
module.exports = router

View File

@ -2,6 +2,9 @@ const { outputProcessing } = require("../../../utilities/rowProcessor")
const setup = require("./utilities")
const { basicRow } = setup.structures
// mock the fetch for the search system
jest.mock("node-fetch")
describe("/rows", () => {
let request = setup.getRequest()
let config = setup.getConfig()
@ -303,25 +306,19 @@ describe("/rows", () => {
describe("search", () => {
it("should run a search on the table", async () => {
const row = await config.createRow()
// add another row that shouldn't be found
await config.createRow({
...basicRow(),
name: "Other Contact",
})
const res = await request
.post(`/api/${table._id}/rows/search`)
.send({
query: {
name: "Test",
},
pagination: { pageSize: 25, page: 0 }
pagination: { pageSize: 25 }
})
.set(config.defaultHeaders())
.expect('Content-Type', /json/)
.expect(200)
expect(res.body.length).toEqual(1)
expect(res.body[0]._id).toEqual(row._id)
expect(res.body.rows.length).toEqual(1)
expect(res.body.bookmark).toBeDefined()
})
})

View File

@ -27,7 +27,7 @@ const EventType = {
}
exports.EventType = EventType
// re-export utils here for ease of use
// re-export search here for ease of use
exports.IncludeDocs = IncludeDocs
exports.getLinkDocuments = getLinkDocuments
exports.createLinkView = createLinkView

View File

@ -37,11 +37,16 @@ const ViewNames = {
USERS: "ta_users",
}
const SearchIndexes = {
ROWS: "rows",
}
exports.StaticDatabases = StaticDatabases
exports.ViewNames = ViewNames
exports.DocumentTypes = DocumentTypes
exports.SEPARATOR = SEPARATOR
exports.UNICODE_MAX = UNICODE_MAX
exports.SearchIndexes = SearchIndexes
exports.getQueryIndex = viewName => {
return `database/${viewName}`

View File

@ -1,5 +1,10 @@
const CouchDB = require("../index")
const { DocumentTypes, SEPARATOR, ViewNames } = require("../utils")
const {
DocumentTypes,
SEPARATOR,
ViewNames,
SearchIndexes,
} = require("../utils")
const SCREEN_PREFIX = DocumentTypes.SCREEN + SEPARATOR
/**************************************************
@ -72,3 +77,42 @@ exports.createRoutingView = async appId => {
}
await db.put(designDoc)
}
async function searchIndex(appId, indexName, fnString) {
const db = new CouchDB(appId)
const designDoc = await db.get("_design/database")
designDoc.indexes = {
[indexName]: {
index: fnString,
},
}
await db.put(designDoc)
}
exports.createAllSearchIndex = async appId => {
await searchIndex(
appId,
SearchIndexes.ROWS,
function(doc) {
function idx(input, prev) {
for (let key of Object.keys(input)) {
const idxKey = prev != null ? `${prev}.${key}` : key
if (key === "_id" || key === "_rev") {
continue
}
if (typeof input[key] !== "object") {
// eslint-disable-next-line no-undef
index(idxKey, input[key], { store: true })
} else {
idx(input[key], idxKey)
}
}
}
if (doc._id.startsWith("ro_")) {
// eslint-disable-next-line no-undef
index("default", doc._id)
idx(doc)
}
}.toString()
)
}

View File

@ -106,3 +106,7 @@ exports.getAllApps = async () => {
.map(({ value }) => value)
}
}
exports.checkSlashesInUrl = url => {
return url.replace(/(https?:\/\/)|(\/)+/g, "$1$2")
}

View File

@ -25,10 +25,11 @@
let tableDefinition
let schema
// pagination
let page = 0
let nextBookmark = null
let bookmark = null
let lastBookmark = null
$: fetchData(table, page)
$: fetchData(table, bookmark)
// omit empty strings
$: parsedSearch = Object.keys(search).reduce(
(acc, next) =>
@ -38,33 +39,43 @@
$: actions = [
{
type: ActionTypes.RefreshDatasource,
callback: () => fetchData(table, page),
callback: () => fetchData(table, bookmark),
metadata: { datasource: { type: "table", tableId: table } },
},
]
async function fetchData(table, page) {
async function fetchData(table, mark) {
if (table) {
const tableDef = await API.fetchTableDefinition(table)
schema = tableDef.schema
rows = await API.searchTableData({
const output = await API.searchTableData({
tableId: table,
search: parsedSearch,
pagination: {
pageSize,
page,
bookmark: mark,
},
})
rows = output.rows
nextBookmark = output.bookmark
}
loaded = true
}
function nextPage() {
page += 1
lastBookmark = bookmark
bookmark = nextBookmark
}
function previousPage() {
page -= 1
nextBookmark = bookmark
if (lastBookmark !== bookmark) {
bookmark = lastBookmark
} else {
// special case for going back to beginning
bookmark = null
lastBookmark = null
}
}
</script>
@ -99,15 +110,15 @@
secondary
on:click={() => {
search = {}
page = 0
bookmark = null
}}>
Reset
</Button>
<Button
primary
on:click={() => {
page = 0
fetchData(table, page)
bookmark = null
fetchData(table, bookmark)
}}>
Search
</Button>
@ -129,10 +140,10 @@
{/if}
{/if}
<div class="pagination">
{#if page > 0}
{#if lastBookmark != null || bookmark != null}
<Button primary on:click={previousPage}>Back</Button>
{/if}
{#if rows.length === pageSize}
{#if nextBookmark != null && rows.length !== 0}
<Button primary on:click={nextPage}>Next</Button>
{/if}
</div>