Merge pull request #15829 from Budibase/datadog-bull

Bull queue observability.
This commit is contained in:
Sam Rose 2025-03-27 17:53:23 +01:00 committed by GitHub
commit 591d00942e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 270 additions and 77 deletions

View File

@ -56,6 +56,7 @@
"koa-pino-logger": "4.0.0", "koa-pino-logger": "4.0.0",
"lodash": "4.17.21", "lodash": "4.17.21",
"node-fetch": "2.6.7", "node-fetch": "2.6.7",
"object-sizeof": "2.6.1",
"passport-google-oauth": "2.0.0", "passport-google-oauth": "2.0.0",
"passport-local": "1.0.0", "passport-local": "1.0.0",
"passport-oauth2-refresh": "^2.1.0", "passport-oauth2-refresh": "^2.1.0",

View File

@ -1,6 +1,6 @@
import { AnyDocument, Database, Document, DocumentType } from "@budibase/types" import { AnyDocument, Database, Document, DocumentType } from "@budibase/types"
import { JobQueue, Queue, createQueue } from "../queue" import { BudibaseQueue, JobQueue } from "../queue"
import * as dbUtils from "../db" import * as dbUtils from "../db"
interface ProcessDocMessage { interface ProcessDocMessage {
@ -13,11 +13,11 @@ const PERSIST_MAX_ATTEMPTS = 100
let processor: DocWritethroughProcessor | undefined let processor: DocWritethroughProcessor | undefined
export class DocWritethroughProcessor { export class DocWritethroughProcessor {
private static _queue: Queue private static _queue: BudibaseQueue<ProcessDocMessage>
public static get queue() { public static get queue() {
if (!DocWritethroughProcessor._queue) { if (!DocWritethroughProcessor._queue) {
DocWritethroughProcessor._queue = createQueue<ProcessDocMessage>( DocWritethroughProcessor._queue = new BudibaseQueue<ProcessDocMessage>(
JobQueue.DOC_WRITETHROUGH_QUEUE, JobQueue.DOC_WRITETHROUGH_QUEUE,
{ {
jobOptions: { jobOptions: {

View File

@ -18,7 +18,9 @@ import {
const initialTime = Date.now() const initialTime = Date.now()
async function waitForQueueCompletion() { async function waitForQueueCompletion() {
await utils.queue.processMessages(DocWritethroughProcessor.queue) await utils.queue.processMessages(
DocWritethroughProcessor.queue.getBullQueue()
)
} }
beforeAll(() => utils.queue.useRealQueues()) beforeAll(() => utils.queue.useRealQueues())

View File

@ -1,5 +1,4 @@
import BullQueue from "bull" import { BudibaseQueue, JobQueue } from "../../queue"
import { createQueue, JobQueue } from "../../queue"
import { Event, Identity } from "@budibase/types" import { Event, Identity } from "@budibase/types"
export interface EventPayload { export interface EventPayload {
@ -9,10 +8,19 @@ export interface EventPayload {
timestamp?: string | number timestamp?: string | number
} }
export let asyncEventQueue: BullQueue.Queue export let asyncEventQueue: BudibaseQueue<EventPayload>
export function init() { export function init() {
asyncEventQueue = createQueue<EventPayload>(JobQueue.SYSTEM_EVENT_QUEUE) asyncEventQueue = new BudibaseQueue<EventPayload>(
JobQueue.SYSTEM_EVENT_QUEUE,
{
jobTags: (event: EventPayload) => {
return {
"event.name": event.event,
}
},
}
)
} }
export async function shutdown() { export async function shutdown() {

View File

@ -8,24 +8,30 @@ import {
} from "@budibase/types" } from "@budibase/types"
import { EventProcessor } from "./types" import { EventProcessor } from "./types"
import { getAppId, doInTenant, getTenantId } from "../../context" import { getAppId, doInTenant, getTenantId } from "../../context"
import BullQueue from "bull" import { BudibaseQueue, JobQueue } from "../../queue"
import { createQueue, JobQueue } from "../../queue"
import { isAudited } from "../../utils" import { isAudited } from "../../utils"
import env from "../../environment" import env from "../../environment"
export default class AuditLogsProcessor implements EventProcessor { export default class AuditLogsProcessor implements EventProcessor {
static auditLogsEnabled = false static auditLogsEnabled = false
static auditLogQueue: BullQueue.Queue<AuditLogQueueEvent> static auditLogQueue: BudibaseQueue<AuditLogQueueEvent>
// can't use constructor as need to return promise // can't use constructor as need to return promise
static init(fn: AuditLogFn) { static init(fn: AuditLogFn) {
AuditLogsProcessor.auditLogsEnabled = true AuditLogsProcessor.auditLogsEnabled = true
const writeAuditLogs = fn const writeAuditLogs = fn
AuditLogsProcessor.auditLogQueue = createQueue<AuditLogQueueEvent>( AuditLogsProcessor.auditLogQueue = new BudibaseQueue<AuditLogQueueEvent>(
JobQueue.AUDIT_LOG JobQueue.AUDIT_LOG,
{
jobTags: (event: AuditLogQueueEvent) => {
return {
"event.name": event.event,
}
},
}
) )
return AuditLogsProcessor.auditLogQueue.process(async job => { return AuditLogsProcessor.auditLogQueue.process(async job => {
return doInTenant(job.data.tenantId, async () => { await doInTenant(job.data.tenantId, async () => {
let properties = job.data.properties let properties = job.data.properties
if (properties.audited) { if (properties.audited) {
properties = { properties = {

View File

@ -96,12 +96,17 @@ export class InMemoryQueue<T = any> implements Partial<Queue<T>> {
let resp = func(message) let resp = func(message)
async function retryFunc(fnc: any) { async function retryFunc(fnc: any, attempt = 0) {
try { try {
await fnc await fnc
} catch (e: any) { } catch (e: any) {
await helpers.wait(50) attempt++
await retryFunc(func(message)) if (attempt < 3) {
await helpers.wait(100 * attempt)
await retryFunc(func(message), attempt)
} else {
throw e
}
} }
} }

View File

@ -2,10 +2,12 @@ import env from "../environment"
import { getRedisOptions } from "../redis/utils" import { getRedisOptions } from "../redis/utils"
import { JobQueue } from "./constants" import { JobQueue } from "./constants"
import InMemoryQueue from "./inMemoryQueue" import InMemoryQueue from "./inMemoryQueue"
import BullQueue, { QueueOptions, JobOptions } from "bull" import BullQueue, { Queue, QueueOptions, JobOptions, Job } from "bull"
import { addListeners, StalledFn } from "./listeners" import { addListeners, StalledFn } from "./listeners"
import { Duration } from "../utils" import { Duration } from "../utils"
import * as timers from "../timers" import * as timers from "../timers"
import tracer from "dd-trace"
import sizeof from "object-sizeof"
export type { QueueOptions, Queue, JobOptions } from "bull" export type { QueueOptions, Queue, JobOptions } from "bull"
@ -15,7 +17,7 @@ const QUEUE_LOCK_MS = Duration.fromMinutes(5).toMs()
const QUEUE_LOCK_RENEW_INTERNAL_MS = Duration.fromSeconds(30).toMs() const QUEUE_LOCK_RENEW_INTERNAL_MS = Duration.fromSeconds(30).toMs()
// cleanup the queue every 60 seconds // cleanup the queue every 60 seconds
const CLEANUP_PERIOD_MS = Duration.fromSeconds(60).toMs() const CLEANUP_PERIOD_MS = Duration.fromSeconds(60).toMs()
let QUEUES: BullQueue.Queue[] = [] let QUEUES: Queue[] = []
let cleanupInterval: NodeJS.Timeout let cleanupInterval: NodeJS.Timeout
async function cleanup() { async function cleanup() {
@ -25,49 +27,205 @@ async function cleanup() {
} }
} }
export function createQueue<T>( async function withMetrics<T>(
jobQueue: JobQueue, name: string,
opts: { cb: () => Promise<T>,
tags?: Record<string, string | number>
): Promise<T> {
const start = performance.now()
try {
const result = await cb()
tracer.dogstatsd.increment(`${name}.success`, 1, tags)
return result
} catch (err) {
tracer.dogstatsd.increment(`${name}.error`, 1, tags)
throw err
} finally {
const durationMs = performance.now() - start
tracer.dogstatsd.distribution(`${name}.duration.ms`, durationMs, tags)
tracer.dogstatsd.increment(name, 1, tags)
}
}
function jobOptsTags(opts: JobOptions) {
return {
"job.opts.attempts": opts.attempts,
"job.opts.backoff": opts.backoff,
"job.opts.delay": opts.delay,
"job.opts.jobId": opts.jobId,
"job.opts.lifo": opts.lifo,
"job.opts.preventParsingData": opts.preventParsingData,
"job.opts.priority": opts.priority,
"job.opts.removeOnComplete": opts.removeOnComplete,
"job.opts.removeOnFail": opts.removeOnFail,
"job.opts.repeat": opts.repeat,
"job.opts.stackTraceLimit": opts.stackTraceLimit,
"job.opts.timeout": opts.timeout,
}
}
function jobTags(job: Job) {
return {
"job.id": job.id,
"job.attemptsMade": job.attemptsMade,
"job.timestamp": job.timestamp,
"job.data.sizeBytes": sizeof(job.data),
...jobOptsTags(job.opts || {}),
}
}
export interface BudibaseQueueOpts<T> {
removeStalledCb?: StalledFn removeStalledCb?: StalledFn
maxStalledCount?: number maxStalledCount?: number
jobOptions?: JobOptions jobOptions?: JobOptions
} = {} jobTags?: (job: T) => Record<string, any>
): BullQueue.Queue<T> { }
export class BudibaseQueue<T> {
private queue: Queue<T>
private opts: BudibaseQueueOpts<T>
private jobQueue: JobQueue
constructor(jobQueue: JobQueue, opts: BudibaseQueueOpts<T> = {}) {
this.opts = opts
this.jobQueue = jobQueue
this.queue = this.initQueue()
}
private initQueue() {
const redisOpts = getRedisOptions() const redisOpts = getRedisOptions()
const queueConfig: QueueOptions = { const queueConfig: QueueOptions = {
redis: redisOpts, redis: redisOpts,
settings: { settings: {
maxStalledCount: opts.maxStalledCount ? opts.maxStalledCount : 0, maxStalledCount: this.opts.maxStalledCount
? this.opts.maxStalledCount
: 0,
lockDuration: QUEUE_LOCK_MS, lockDuration: QUEUE_LOCK_MS,
lockRenewTime: QUEUE_LOCK_RENEW_INTERNAL_MS, lockRenewTime: QUEUE_LOCK_RENEW_INTERNAL_MS,
}, },
} }
if (opts.jobOptions) { if (this.opts.jobOptions) {
queueConfig.defaultJobOptions = opts.jobOptions queueConfig.defaultJobOptions = this.opts.jobOptions
} }
let queue: BullQueue.Queue<T> let queue: Queue<T>
if (!env.isTest()) { if (!env.isTest()) {
queue = new BullQueue(jobQueue, queueConfig) queue = new BullQueue(this.jobQueue, queueConfig)
} else if ( } else if (
process.env.BULL_TEST_REDIS_PORT && process.env.BULL_TEST_REDIS_PORT &&
!isNaN(+process.env.BULL_TEST_REDIS_PORT) !isNaN(+process.env.BULL_TEST_REDIS_PORT)
) { ) {
queue = new BullQueue(jobQueue, { queue = new BullQueue(this.jobQueue, {
redis: { host: "localhost", port: +process.env.BULL_TEST_REDIS_PORT }, redis: { host: "localhost", port: +process.env.BULL_TEST_REDIS_PORT },
}) })
} else { } else {
queue = new InMemoryQueue(jobQueue, queueConfig) as any queue = new InMemoryQueue(this.jobQueue, queueConfig) as any
} }
addListeners(queue, jobQueue, opts?.removeStalledCb)
addListeners(queue, this.jobQueue, this.opts.removeStalledCb)
QUEUES.push(queue) QUEUES.push(queue)
if (!cleanupInterval && !env.isTest()) { if (!cleanupInterval && !env.isTest()) {
cleanupInterval = timers.set(cleanup, CLEANUP_PERIOD_MS) cleanupInterval = timers.set(cleanup, CLEANUP_PERIOD_MS)
// fire off an initial cleanup // fire off an initial cleanup
cleanup().catch(err => { cleanup().catch(err => {
console.error(`Unable to cleanup ${jobQueue} initially - ${err}`) console.error(`Unable to cleanup ${this.jobQueue} initially - ${err}`)
}) })
} }
return queue return queue
}
getBullQueue() {
return this.queue
}
process(
concurrency: number,
cb: (job: Job<T>) => Promise<void>
): Promise<void>
process(cb: (job: Job<T>) => Promise<void>): Promise<void>
process(...args: any[]) {
let concurrency: number | undefined = undefined
let cb: (job: Job<T>) => Promise<void>
if (args.length === 2) {
concurrency = args[0]
cb = args[1]
} else {
cb = args[0]
}
const wrappedCb = async (job: Job<T>) => {
await tracer.trace("queue.process", async span => {
// @ts-expect-error monkey patching the parent span id
if (job.data._parentSpanContext) {
// @ts-expect-error monkey patching the parent span id
const parentContext = job.data._parentSpanContext
const parent = {
traceId: parentContext.traceId,
spanId: parentContext.spanId,
toTraceId: () => parentContext.traceId,
toSpanId: () => parentContext.spanId,
toTraceparent: () => "",
}
span.addLink(parent)
}
span.addTags({ "queue.name": this.jobQueue, ...jobTags(job) })
if (this.opts.jobTags) {
span.addTags(this.opts.jobTags(job.data))
}
tracer.dogstatsd.distribution(
"queue.process.sizeBytes",
sizeof(job.data),
this.metricTags()
)
await this.withMetrics("queue.process", () => cb(job))
})
}
if (concurrency) {
return this.queue.process(concurrency, wrappedCb)
} else {
return this.queue.process(wrappedCb)
}
}
async add(data: T, opts?: JobOptions): Promise<Job<T>> {
return await tracer.trace("queue.add", async span => {
span.addTags({
"queue.name": this.jobQueue,
"job.data.sizeBytes": sizeof(data),
...jobOptsTags(opts || {}),
})
if (this.opts.jobTags) {
span.addTags(this.opts.jobTags(data))
}
// @ts-expect-error monkey patching the parent span id
data._parentSpanContext = {
traceId: span.context().toTraceId(),
spanId: span.context().toSpanId(),
}
tracer.dogstatsd.distribution(
"queue.add.sizeBytes",
sizeof(data),
this.metricTags()
)
return await this.withMetrics("queue.add", () =>
this.queue.add(data, opts)
)
})
}
private withMetrics<T>(name: string, cb: () => Promise<T>) {
return withMetrics(name, cb, this.metricTags())
}
private metricTags() {
return { queueName: this.jobQueue }
}
close() {
return this.queue.close()
}
} }
export async function shutdown() { export async function shutdown() {

@ -1 +1 @@
Subproject commit d9d2766261e02b11318ae76ad0d79698e14f89a9 Subproject commit 538deccd0000090447d6cfe7f9716b16fdfefcc2

View File

@ -16,7 +16,7 @@ export const router: Router = new Router()
router.get("/health", async ctx => { router.get("/health", async ctx => {
if (automationsEnabled()) { if (automationsEnabled()) {
if (!(await automationQueue.isReady())) { if (!(await automationQueue.getBullQueue().isReady())) {
ctx.status = 503 ctx.status = 503
return return
} }

View File

@ -13,7 +13,7 @@ export type AppMigrationJob = {
// always create app migration queue - so that events can be pushed and read from it // always create app migration queue - so that events can be pushed and read from it
// across the different api and automation services // across the different api and automation services
const appMigrationQueue = queue.createQueue<AppMigrationJob>( const appMigrationQueue = new queue.BudibaseQueue<AppMigrationJob>(
queue.JobQueue.APP_MIGRATION, queue.JobQueue.APP_MIGRATION,
{ {
jobOptions: { jobOptions: {

View File

@ -7,25 +7,36 @@ import { getAppMigrationQueue } from "../appMigrations/queue"
import { createBullBoard } from "@bull-board/api" import { createBullBoard } from "@bull-board/api"
import { AutomationData } from "@budibase/types" import { AutomationData } from "@budibase/types"
export const automationQueue = queue.createQueue<AutomationData>( export const automationQueue = new queue.BudibaseQueue<AutomationData>(
queue.JobQueue.AUTOMATION, queue.JobQueue.AUTOMATION,
{ removeStalledCb: automation.removeStalled } {
removeStalledCb: automation.removeStalled,
jobTags: (job: AutomationData) => {
return {
"automation.id": job.automation._id,
"automation.name": job.automation.name,
"automation.appId": job.automation.appId,
"automation.createdAt": job.automation.createdAt,
"automation.trigger": job.automation.definition.trigger.stepId,
}
},
}
) )
const PATH_PREFIX = "/bulladmin" const PATH_PREFIX = "/bulladmin"
export async function init() { export async function init() {
// Set up queues for bull board admin // Set up queues for bull board admin
const queues = [new BullAdapter(automationQueue)] const queues = [new BullAdapter(automationQueue.getBullQueue())]
const backupQueue = backups.getBackupQueue() const backupQueue = backups.getBackupQueue()
if (backupQueue) { if (backupQueue) {
queues.push(new BullAdapter(backupQueue)) queues.push(new BullAdapter(backupQueue.getBullQueue()))
} }
const appMigrationQueue = getAppMigrationQueue() const appMigrationQueue = getAppMigrationQueue()
if (appMigrationQueue) { if (appMigrationQueue) {
queues.push(new BullAdapter(appMigrationQueue)) queues.push(new BullAdapter(appMigrationQueue.getBullQueue()))
} }
const serverAdapter = new KoaAdapter() const serverAdapter = new KoaAdapter()

View File

@ -22,7 +22,7 @@ export function afterAll() {
} }
export function getTestQueue(): queue.InMemoryQueue<AutomationData> { export function getTestQueue(): queue.InMemoryQueue<AutomationData> {
return getQueue() as unknown as queue.InMemoryQueue<AutomationData> return getQueue().getBullQueue() as unknown as queue.InMemoryQueue<AutomationData>
} }
export function triggerCron(message: Job<AutomationData>) { export function triggerCron(message: Job<AutomationData>) {
@ -48,7 +48,7 @@ export async function runInProd(fn: any) {
export async function captureAllAutomationRemovals(f: () => Promise<unknown>) { export async function captureAllAutomationRemovals(f: () => Promise<unknown>) {
const messages: Job<AutomationData>[] = [] const messages: Job<AutomationData>[] = []
const queue = getQueue() const queue = getQueue().getBullQueue()
const messageListener = async (message: Job<AutomationData>) => { const messageListener = async (message: Job<AutomationData>) => {
messages.push(message) messages.push(message)
@ -82,7 +82,7 @@ export async function captureAutomationRemovals(
export async function captureAllAutomationMessages(f: () => Promise<unknown>) { export async function captureAllAutomationMessages(f: () => Promise<unknown>) {
const messages: Job<AutomationData>[] = [] const messages: Job<AutomationData>[] = []
const queue = getQueue() const queue = getQueue().getBullQueue()
const messageListener = async (message: Job<AutomationData>) => { const messageListener = async (message: Job<AutomationData>) => {
messages.push(message) messages.push(message)
@ -122,7 +122,7 @@ export async function captureAllAutomationResults(
f: () => Promise<unknown> f: () => Promise<unknown>
): Promise<queue.TestQueueMessage<AutomationData>[]> { ): Promise<queue.TestQueueMessage<AutomationData>[]> {
const runs: queue.TestQueueMessage<AutomationData>[] = [] const runs: queue.TestQueueMessage<AutomationData>[] = []
const queue = getQueue() const queue = getQueue().getBullQueue()
let messagesOutstanding = 0 let messagesOutstanding = 0
const completedListener = async ( const completedListener = async (

View File

@ -107,12 +107,14 @@ export async function updateTestHistory(
// end the repetition and the job itself // end the repetition and the job itself
export async function disableAllCrons(appId: any) { export async function disableAllCrons(appId: any) {
const promises = [] const promises = []
const jobs = await automationQueue.getRepeatableJobs() const jobs = await automationQueue.getBullQueue().getRepeatableJobs()
for (let job of jobs) { for (let job of jobs) {
if (job.key.includes(`${appId}_cron`)) { if (job.key.includes(`${appId}_cron`)) {
promises.push(automationQueue.removeRepeatableByKey(job.key)) promises.push(
automationQueue.getBullQueue().removeRepeatableByKey(job.key)
)
if (job.id) { if (job.id) {
promises.push(automationQueue.removeJobs(job.id)) promises.push(automationQueue.getBullQueue().removeJobs(job.id))
} }
} }
} }
@ -121,10 +123,10 @@ export async function disableAllCrons(appId: any) {
} }
export async function disableCronById(jobId: JobId) { export async function disableCronById(jobId: JobId) {
const jobs = await automationQueue.getRepeatableJobs() const jobs = await automationQueue.getBullQueue().getRepeatableJobs()
for (const job of jobs) { for (const job of jobs) {
if (job.id === jobId) { if (job.id === jobId) {
await automationQueue.removeRepeatableByKey(job.key) await automationQueue.getBullQueue().removeRepeatableByKey(job.key)
} }
} }
console.log(`jobId=${jobId} disabled`) console.log(`jobId=${jobId} disabled`)

View File

@ -190,7 +190,7 @@ describe("oauth2 utils", () => {
await config.doInContext(config.appId, () => getToken(oauthConfig._id)) await config.doInContext(config.appId, () => getToken(oauthConfig._id))
await testUtils.queue.processMessages( await testUtils.queue.processMessages(
cache.docWritethrough.DocWritethroughProcessor.queue cache.docWritethrough.DocWritethroughProcessor.queue.getBullQueue()
) )
const usageLog = await config.doInContext(config.appId, () => const usageLog = await config.doInContext(config.appId, () =>
@ -216,7 +216,7 @@ describe("oauth2 utils", () => {
config.doInContext(config.appId, () => getToken(oauthConfig._id)) config.doInContext(config.appId, () => getToken(oauthConfig._id))
).rejects.toThrow() ).rejects.toThrow()
await testUtils.queue.processMessages( await testUtils.queue.processMessages(
cache.docWritethrough.DocWritethroughProcessor.queue cache.docWritethrough.DocWritethroughProcessor.queue.getBullQueue()
) )
const usageLog = await config.doInContext(config.appId, () => const usageLog = await config.doInContext(config.appId, () =>
@ -247,7 +247,7 @@ describe("oauth2 utils", () => {
getToken(oauthConfig._id) getToken(oauthConfig._id)
) )
await testUtils.queue.processMessages( await testUtils.queue.processMessages(
cache.docWritethrough.DocWritethroughProcessor.queue cache.docWritethrough.DocWritethroughProcessor.queue.getBullQueue()
) )
for (const appId of [config.appId, config.prodAppId]) { for (const appId of [config.appId, config.prodAppId]) {