Improve APM traces around automations.
This commit is contained in:
parent
07a9b881cd
commit
ddf64b9f70
|
@ -123,7 +123,7 @@ export async function doInAutomationContext<T>(params: {
|
||||||
task: () => T
|
task: () => T
|
||||||
}): Promise<T> {
|
}): Promise<T> {
|
||||||
await ensureSnippetContext()
|
await ensureSnippetContext()
|
||||||
return newContext(
|
return await newContext(
|
||||||
{
|
{
|
||||||
tenantId: getTenantIDFromAppID(params.appId),
|
tenantId: getTenantIDFromAppID(params.appId),
|
||||||
appId: params.appId,
|
appId: params.appId,
|
||||||
|
|
|
@ -40,39 +40,35 @@ function loggingArgs(job: AutomationJob) {
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function processEvent(job: AutomationJob) {
|
export async function processEvent(job: AutomationJob) {
|
||||||
return tracer.trace(
|
return tracer.trace("processEvent", async span => {
|
||||||
"processEvent",
|
const appId = job.data.event.appId!
|
||||||
{ resource: "automation" },
|
const automationId = job.data.automation._id!
|
||||||
async span => {
|
|
||||||
const appId = job.data.event.appId!
|
|
||||||
const automationId = job.data.automation._id!
|
|
||||||
|
|
||||||
span?.addTags({
|
span.addTags({
|
||||||
appId,
|
appId,
|
||||||
automationId,
|
automationId,
|
||||||
job: {
|
job: {
|
||||||
id: job.id,
|
id: job.id,
|
||||||
name: job.name,
|
name: job.name,
|
||||||
attemptsMade: job.attemptsMade,
|
attemptsMade: job.attemptsMade,
|
||||||
opts: {
|
attempts: job.opts.attempts,
|
||||||
attempts: job.opts.attempts,
|
priority: job.opts.priority,
|
||||||
priority: job.opts.priority,
|
delay: job.opts.delay,
|
||||||
delay: job.opts.delay,
|
repeat: job.opts.repeat,
|
||||||
repeat: job.opts.repeat,
|
backoff: job.opts.backoff,
|
||||||
backoff: job.opts.backoff,
|
lifo: job.opts.lifo,
|
||||||
lifo: job.opts.lifo,
|
timeout: job.opts.timeout,
|
||||||
timeout: job.opts.timeout,
|
jobId: job.opts.jobId,
|
||||||
jobId: job.opts.jobId,
|
removeOnComplete: job.opts.removeOnComplete,
|
||||||
removeOnComplete: job.opts.removeOnComplete,
|
removeOnFail: job.opts.removeOnFail,
|
||||||
removeOnFail: job.opts.removeOnFail,
|
stackTraceLimit: job.opts.stackTraceLimit,
|
||||||
stackTraceLimit: job.opts.stackTraceLimit,
|
preventParsingData: job.opts.preventParsingData,
|
||||||
preventParsingData: job.opts.preventParsingData,
|
},
|
||||||
},
|
})
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
const task = async () => {
|
const task = async () => {
|
||||||
try {
|
try {
|
||||||
|
return await tracer.trace("task", async () => {
|
||||||
if (isCronTrigger(job.data.automation) && !job.data.event.timestamp) {
|
if (isCronTrigger(job.data.automation) && !job.data.event.timestamp) {
|
||||||
// Requires the timestamp at run time
|
// Requires the timestamp at run time
|
||||||
job.data.event.timestamp = Date.now()
|
job.data.event.timestamp = Date.now()
|
||||||
|
@ -81,25 +77,19 @@ export async function processEvent(job: AutomationJob) {
|
||||||
console.log("automation running", ...loggingArgs(job))
|
console.log("automation running", ...loggingArgs(job))
|
||||||
|
|
||||||
const runFn = () => Runner.run(job)
|
const runFn = () => Runner.run(job)
|
||||||
const result = await quotas.addAutomation(runFn, {
|
const result = await quotas.addAutomation(runFn, { automationId })
|
||||||
automationId,
|
|
||||||
})
|
|
||||||
console.log("automation completed", ...loggingArgs(job))
|
console.log("automation completed", ...loggingArgs(job))
|
||||||
return result
|
return result
|
||||||
} catch (err) {
|
})
|
||||||
span?.addTags({ error: true })
|
} catch (err) {
|
||||||
console.error(
|
span.addTags({ error: true })
|
||||||
`automation was unable to run`,
|
console.error(`automation was unable to run`, err, ...loggingArgs(job))
|
||||||
err,
|
return { err }
|
||||||
...loggingArgs(job)
|
|
||||||
)
|
|
||||||
return { err }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return await context.doInAutomationContext({ appId, automationId, task })
|
|
||||||
}
|
}
|
||||||
)
|
|
||||||
|
return await context.doInAutomationContext({ appId, automationId, task })
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function updateTestHistory(
|
export async function updateTestHistory(
|
||||||
|
|
|
@ -310,87 +310,83 @@ class Orchestrator {
|
||||||
}
|
}
|
||||||
|
|
||||||
async execute(): Promise<AutomationResults> {
|
async execute(): Promise<AutomationResults> {
|
||||||
return tracer.trace(
|
return await tracer.trace("execute", async span => {
|
||||||
"Orchestrator.execute",
|
span.addTags({ appId: this.appId, automationId: this.automation._id })
|
||||||
{ resource: "automation" },
|
|
||||||
async span => {
|
|
||||||
span?.addTags({ appId: this.appId, automationId: this.automation._id })
|
|
||||||
|
|
||||||
const job = cloneDeep(this.job)
|
const job = cloneDeep(this.job)
|
||||||
delete job.data.event.appId
|
delete job.data.event.appId
|
||||||
delete job.data.event.metadata
|
delete job.data.event.metadata
|
||||||
|
|
||||||
if (this.isCron() && !job.data.event.timestamp) {
|
if (this.isCron() && !job.data.event.timestamp) {
|
||||||
job.data.event.timestamp = Date.now()
|
job.data.event.timestamp = Date.now()
|
||||||
}
|
|
||||||
|
|
||||||
const trigger: AutomationTriggerResult = {
|
|
||||||
id: job.data.automation.definition.trigger.id,
|
|
||||||
stepId: job.data.automation.definition.trigger.stepId,
|
|
||||||
inputs: null,
|
|
||||||
outputs: job.data.event,
|
|
||||||
}
|
|
||||||
const result: AutomationResults = { trigger, steps: [trigger] }
|
|
||||||
|
|
||||||
const ctx: AutomationContext = {
|
|
||||||
trigger: trigger.outputs,
|
|
||||||
steps: [trigger.outputs],
|
|
||||||
stepsById: {},
|
|
||||||
stepsByName: {},
|
|
||||||
user: trigger.outputs.user,
|
|
||||||
}
|
|
||||||
await enrichBaseContext(ctx)
|
|
||||||
|
|
||||||
const timeout =
|
|
||||||
this.job.data.event.timeout || env.AUTOMATION_THREAD_TIMEOUT
|
|
||||||
|
|
||||||
try {
|
|
||||||
await helpers.withTimeout(timeout, async () => {
|
|
||||||
const [stepOutputs, executionTime] = await utils.time(() =>
|
|
||||||
this.executeSteps(ctx, job.data.automation.definition.steps)
|
|
||||||
)
|
|
||||||
|
|
||||||
result.steps.push(...stepOutputs)
|
|
||||||
|
|
||||||
console.info(
|
|
||||||
`Automation ID: ${
|
|
||||||
this.automation._id
|
|
||||||
} Execution time: ${executionTime.toMs()} milliseconds`,
|
|
||||||
{
|
|
||||||
_logKey: "automation",
|
|
||||||
executionTime,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
})
|
|
||||||
} catch (e: any) {
|
|
||||||
if (e.errno === "ETIME") {
|
|
||||||
span?.addTags({ timedOut: true })
|
|
||||||
console.warn(`Automation execution timed out after ${timeout}ms`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let errorCount = 0
|
|
||||||
if (this.isProdApp() && this.isCron() && this.hasErrored(ctx)) {
|
|
||||||
errorCount = (await this.incrementErrorCount()) || 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if (errorCount >= MAX_AUTOMATION_RECURRING_ERRORS) {
|
|
||||||
await this.stopCron("errors", { result })
|
|
||||||
span?.addTags({ shouldStop: true })
|
|
||||||
} else {
|
|
||||||
await this.logResult(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
}
|
}
|
||||||
)
|
|
||||||
|
const trigger: AutomationTriggerResult = {
|
||||||
|
id: job.data.automation.definition.trigger.id,
|
||||||
|
stepId: job.data.automation.definition.trigger.stepId,
|
||||||
|
inputs: null,
|
||||||
|
outputs: job.data.event,
|
||||||
|
}
|
||||||
|
const result: AutomationResults = { trigger, steps: [trigger] }
|
||||||
|
|
||||||
|
const ctx: AutomationContext = {
|
||||||
|
trigger: trigger.outputs,
|
||||||
|
steps: [trigger.outputs],
|
||||||
|
stepsById: {},
|
||||||
|
stepsByName: {},
|
||||||
|
user: trigger.outputs.user,
|
||||||
|
}
|
||||||
|
await enrichBaseContext(ctx)
|
||||||
|
|
||||||
|
const timeout =
|
||||||
|
this.job.data.event.timeout || env.AUTOMATION_THREAD_TIMEOUT
|
||||||
|
|
||||||
|
try {
|
||||||
|
await helpers.withTimeout(timeout, async () => {
|
||||||
|
const [stepOutputs, executionTime] = await utils.time(() =>
|
||||||
|
this.executeSteps(ctx, job.data.automation.definition.steps)
|
||||||
|
)
|
||||||
|
|
||||||
|
result.steps.push(...stepOutputs)
|
||||||
|
|
||||||
|
console.info(
|
||||||
|
`Automation ID: ${
|
||||||
|
this.automation._id
|
||||||
|
} Execution time: ${executionTime.toMs()} milliseconds`,
|
||||||
|
{
|
||||||
|
_logKey: "automation",
|
||||||
|
executionTime,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
})
|
||||||
|
} catch (e: any) {
|
||||||
|
if (e.errno === "ETIME") {
|
||||||
|
span?.addTags({ timedOut: true })
|
||||||
|
console.warn(`Automation execution timed out after ${timeout}ms`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let errorCount = 0
|
||||||
|
if (this.isProdApp() && this.isCron() && this.hasErrored(ctx)) {
|
||||||
|
errorCount = (await this.incrementErrorCount()) || 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorCount >= MAX_AUTOMATION_RECURRING_ERRORS) {
|
||||||
|
await this.stopCron("errors", { result })
|
||||||
|
span?.addTags({ shouldStop: true })
|
||||||
|
} else {
|
||||||
|
await this.logResult(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
private async executeSteps(
|
private async executeSteps(
|
||||||
ctx: AutomationContext,
|
ctx: AutomationContext,
|
||||||
steps: AutomationStep[]
|
steps: AutomationStep[]
|
||||||
): Promise<AutomationStepResult[]> {
|
): Promise<AutomationStepResult[]> {
|
||||||
return tracer.trace("Orchestrator.executeSteps", async () => {
|
return await tracer.trace("executeSteps", async () => {
|
||||||
let stepIndex = 0
|
let stepIndex = 0
|
||||||
const results: AutomationStepResult[] = []
|
const results: AutomationStepResult[] = []
|
||||||
|
|
||||||
|
@ -446,74 +442,92 @@ class Orchestrator {
|
||||||
step: LoopStep,
|
step: LoopStep,
|
||||||
stepToLoop: AutomationStep
|
stepToLoop: AutomationStep
|
||||||
): Promise<AutomationStepResult> {
|
): Promise<AutomationStepResult> {
|
||||||
await processObject(step.inputs, prepareContext(ctx))
|
return await tracer.trace("executeLoopStep", async span => {
|
||||||
|
await processObject(step.inputs, prepareContext(ctx))
|
||||||
|
|
||||||
const maxIterations = getLoopMaxIterations(step)
|
const maxIterations = getLoopMaxIterations(step)
|
||||||
const items: Record<string, any>[] = []
|
const items: Record<string, any>[] = []
|
||||||
let iterations = 0
|
let iterations = 0
|
||||||
let iterable: any[] = []
|
let iterable: any[] = []
|
||||||
try {
|
try {
|
||||||
iterable = getLoopIterable(step)
|
iterable = getLoopIterable(step)
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
return stepFailure(stepToLoop, {
|
span.addTags({
|
||||||
status: AutomationStepStatus.INCORRECT_TYPE,
|
status: AutomationStepStatus.INCORRECT_TYPE,
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
for (; iterations < iterable.length; iterations++) {
|
|
||||||
const currentItem = iterable[iterations]
|
|
||||||
|
|
||||||
if (iterations === maxIterations) {
|
|
||||||
return stepFailure(stepToLoop, {
|
|
||||||
status: AutomationStepStatus.MAX_ITERATIONS,
|
|
||||||
iterations,
|
iterations,
|
||||||
})
|
})
|
||||||
}
|
|
||||||
|
|
||||||
if (matchesLoopFailureCondition(step, currentItem)) {
|
|
||||||
return stepFailure(stepToLoop, {
|
return stepFailure(stepToLoop, {
|
||||||
status: AutomationStepStatus.FAILURE_CONDITION,
|
status: AutomationStepStatus.INCORRECT_TYPE,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.loop = { currentItem }
|
for (; iterations < iterable.length; iterations++) {
|
||||||
const result = await this.executeStep(ctx, stepToLoop)
|
const currentItem = iterable[iterations]
|
||||||
items.push(result.outputs)
|
|
||||||
ctx.loop = undefined
|
|
||||||
}
|
|
||||||
|
|
||||||
const status =
|
if (iterations === maxIterations) {
|
||||||
iterations === 0 ? AutomationStatus.NO_CONDITION_MET : undefined
|
span.addTags({
|
||||||
return stepSuccess(stepToLoop, { status, iterations, items })
|
status: AutomationStepStatus.MAX_ITERATIONS,
|
||||||
|
iterations,
|
||||||
|
})
|
||||||
|
return stepFailure(stepToLoop, {
|
||||||
|
status: AutomationStepStatus.MAX_ITERATIONS,
|
||||||
|
iterations,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matchesLoopFailureCondition(step, currentItem)) {
|
||||||
|
span.addTags({
|
||||||
|
status: AutomationStepStatus.FAILURE_CONDITION,
|
||||||
|
iterations,
|
||||||
|
})
|
||||||
|
return stepFailure(stepToLoop, {
|
||||||
|
status: AutomationStepStatus.FAILURE_CONDITION,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.loop = { currentItem }
|
||||||
|
const result = await this.executeStep(ctx, stepToLoop)
|
||||||
|
items.push(result.outputs)
|
||||||
|
ctx.loop = undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const status =
|
||||||
|
iterations === 0 ? AutomationStatus.NO_CONDITION_MET : undefined
|
||||||
|
return stepSuccess(stepToLoop, { status, iterations, items })
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
private async executeBranchStep(
|
private async executeBranchStep(
|
||||||
ctx: AutomationContext,
|
ctx: AutomationContext,
|
||||||
step: BranchStep
|
step: BranchStep
|
||||||
): Promise<AutomationStepResult[]> {
|
): Promise<AutomationStepResult[]> {
|
||||||
const { branches, children } = step.inputs
|
return await tracer.trace("executeBranchStep", async span => {
|
||||||
|
const { branches, children } = step.inputs
|
||||||
|
|
||||||
for (const branch of branches) {
|
for (const branch of branches) {
|
||||||
if (await branchMatches(ctx, branch)) {
|
if (await branchMatches(ctx, branch)) {
|
||||||
return [
|
span.addTags({ branchName: branch.name, branchId: branch.id })
|
||||||
stepSuccess(step, {
|
return [
|
||||||
branchName: branch.name,
|
stepSuccess(step, {
|
||||||
status: `${branch.name} branch taken`,
|
branchName: branch.name,
|
||||||
branchId: `${branch.id}`,
|
status: `${branch.name} branch taken`,
|
||||||
}),
|
branchId: `${branch.id}`,
|
||||||
...(await this.executeSteps(ctx, children?.[branch.id] || [])),
|
}),
|
||||||
]
|
...(await this.executeSteps(ctx, children?.[branch.id] || [])),
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return [stepFailure(step, { status: AutomationStatus.NO_CONDITION_MET })]
|
span.addTags({ status: AutomationStatus.NO_CONDITION_MET })
|
||||||
|
return [stepFailure(step, { status: AutomationStatus.NO_CONDITION_MET })]
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
private async executeStep(
|
private async executeStep(
|
||||||
ctx: AutomationContext,
|
ctx: AutomationContext,
|
||||||
step: Readonly<AutomationStep>
|
step: Readonly<AutomationStep>
|
||||||
): Promise<AutomationStepResult> {
|
): Promise<AutomationStepResult> {
|
||||||
return tracer.trace("Orchestrator.executeStep", async span => {
|
return await tracer.trace(step.stepId, async span => {
|
||||||
span.addTags({
|
span.addTags({
|
||||||
step: {
|
step: {
|
||||||
stepId: step.stepId,
|
stepId: step.stepId,
|
||||||
|
@ -524,6 +538,7 @@ class Orchestrator {
|
||||||
internal: step.internal,
|
internal: step.internal,
|
||||||
deprecated: step.deprecated,
|
deprecated: step.deprecated,
|
||||||
},
|
},
|
||||||
|
inputsKeys: Object.keys(step.inputs),
|
||||||
})
|
})
|
||||||
|
|
||||||
if (this.stopped) {
|
if (this.stopped) {
|
||||||
|
@ -557,6 +572,7 @@ class Orchestrator {
|
||||||
;(outputs as any).status = AutomationStatus.STOPPED
|
;(outputs as any).status = AutomationStatus.STOPPED
|
||||||
}
|
}
|
||||||
|
|
||||||
|
span.addTags({ outputsKeys: Object.keys(outputs) })
|
||||||
return stepSuccess(step, outputs, inputs)
|
return stepSuccess(step, outputs, inputs)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue