Write a custom detector¶
chatbot-auditor ships with seven detectors but the 7-mode framework is not exhaustive. This tutorial walks through writing a new detector from scratch — in this case, one that flags conversations with abnormally long user wait times.
The scenario¶
Your customer service policy says the bot should acknowledge every user message within 5 seconds. In production, some sessions stall for 30+ seconds between "user asks" and "bot responds" — a failure mode not covered by the built-in detectors.
We'll build SlowResponseDetector.
Step 1: Subclass Detector¶
Every detector declares four class attributes plus a detect() method:
# slow_response.py
from __future__ import annotations
from typing import ClassVar
from chatbot_auditor import (
Conversation,
Detection,
Detector,
Evidence,
FailureMode,
Role,
Severity,
)
class SlowResponseDetector(Detector):
name: ClassVar[str] = "slow_response"
description: ClassVar[str] = (
"Flags conversations where the bot took longer than a configurable "
"threshold to respond to a user message."
)
# Reuse an existing failure mode, or add your own FailureMode value upstream.
failure_mode: ClassVar[FailureMode] = FailureMode.SILENT_CHURN
requires_llm: ClassVar[bool] = False
def __init__(self, *, max_seconds: float = 5.0, min_lag_count: int = 1) -> None:
if max_seconds <= 0:
raise ValueError("max_seconds must be positive")
self.max_seconds = max_seconds
self.min_lag_count = min_lag_count
def detect(self, conversation: Conversation) -> list[Detection]:
laggy_indices = list(self._find_laggy_responses(conversation))
if len(laggy_indices) < self.min_lag_count:
return []
return [self._build_detection(conversation, laggy_indices)]
Step 2: Implement the core logic¶
Keep the business logic in a private method so it's easy to unit test.
def _find_laggy_responses(self, conversation: Conversation) -> list[int]:
laggy: list[int] = []
for i, msg in enumerate(conversation.messages):
if msg.role != Role.BOT or i == 0 or msg.timestamp is None:
continue
prior = conversation.messages[i - 1]
if prior.role != Role.USER or prior.timestamp is None:
continue
lag = (msg.timestamp - prior.timestamp).total_seconds()
if lag > self.max_seconds:
laggy.append(i)
return laggy
Step 3: Build the detection¶
Detections carry structured information so reports and dashboards can render them consistently.
def _build_detection(
self, conversation: Conversation, laggy_indices: list[int]
) -> Detection:
count = len(laggy_indices)
severity = Severity.CRITICAL if count >= 3 else Severity.MEDIUM
evidence = [
Evidence(
message_index=i,
quote=_truncate(conversation.messages[i].content, 120),
note="Bot took longer than threshold to respond",
)
for i in laggy_indices
]
return Detection(
conversation_id=conversation.id,
detector=self.name,
failure_mode=self.failure_mode,
severity=severity,
confidence=0.9,
explanation=(
f"Bot took longer than {self.max_seconds:.0f}s to respond "
f"{count} time(s)."
),
evidence=evidence,
recommended_action="alert",
metadata={
"laggy_response_count": count,
"threshold_seconds": self.max_seconds,
},
)
def _truncate(text: str, n: int) -> str:
return text if len(text) <= n else text[: n - 3] + "..."
Step 4: Write tests¶
Good detectors have good tests. Aim for high-confidence positive cases, confirmed non-detections, and edge cases.
# test_slow_response.py
from datetime import UTC, datetime, timedelta
from chatbot_auditor import Conversation, Message, Role
from slow_response import SlowResponseDetector
def _conv(timings: list[tuple[Role, float]]) -> Conversation:
base = datetime(2026, 4, 17, tzinfo=UTC)
return Conversation(
id="c1",
messages=[
Message(
role=role,
content=f"msg{i}",
timestamp=base + timedelta(seconds=secs),
)
for i, (role, secs) in enumerate(timings)
],
)
def test_slow_response_detected() -> None:
conv = _conv([(Role.USER, 0), (Role.BOT, 10)])
detector = SlowResponseDetector(max_seconds=5.0)
detections = detector.detect(conv)
assert len(detections) == 1
assert detections[0].metadata["laggy_response_count"] == 1
def test_fast_response_not_detected() -> None:
conv = _conv([(Role.USER, 0), (Role.BOT, 2)])
detector = SlowResponseDetector(max_seconds=5.0)
assert detector.detect(conv) == []
def test_multiple_laggy_responses_raise_severity() -> None:
conv = _conv([
(Role.USER, 0),
(Role.BOT, 15),
(Role.USER, 20),
(Role.BOT, 40),
(Role.USER, 45),
(Role.BOT, 70),
])
detections = SlowResponseDetector(max_seconds=5.0).detect(conv)
assert detections[0].severity.value == "critical"
Step 5: Register it¶
Use the new detector alongside the built-ins via a custom registry:
from chatbot_auditor import audit, default_registry
from slow_response import SlowResponseDetector
registry = default_registry()
registry.register(SlowResponseDetector(max_seconds=5.0))
for d in audit(conversations, detectors=registry):
print(d.detector, d.severity, d.explanation)
Patterns worth following¶
- Validate configuration in
__init__. Fail loudly on bad thresholds. - Return an empty list often. Most conversations aren't failures — don't over-flag.
- Set
confidencethoughtfully. Values below 0.7 mean "review recommended"; higher means "act on this." - Populate
evidence. Dashboards, reports, and human reviewers depend on it. - Use
metadatafor machine-readable details. Downstream systems can pivot on raw numbers rather than parsing the explanation string. - Keep detectors independent. Don't have one detector rely on another's output — run them in parallel and let the registry collect.
Related¶
- Detector concepts — the detection architecture
- Detector reference — API docs