Skip to main content

Basic Guardrails

from definable.agent import Agent
from definable.agent.guardrail import Guardrails, max_tokens, pii_filter, tool_blocklist

agent = Agent(
    model="gpt-4o",
    guardrails=Guardrails(
        input=[max_tokens(500)],
        output=[pii_filter()],
        tool=[tool_blocklist({"delete_all"})],
    ),
)

Custom Input Guardrail

from definable.agent.guardrail import input_guardrail, GuardrailResult

@input_guardrail
async def no_profanity(text: str, context) -> GuardrailResult:
    banned = ["badword", "offensive"]
    if any(word in text.lower() for word in banned):
        return GuardrailResult.block("Profanity detected")
    return GuardrailResult.allow()

agent = Agent(
    model="gpt-4o",
    guardrails=Guardrails(input=[no_profanity]),
)

Custom Output Guardrail

from definable.agent.guardrail import output_guardrail, GuardrailResult

@output_guardrail
async def redact_names(text: str, context) -> GuardrailResult:
    cleaned = text.replace("Alice", "[REDACTED]")
    if cleaned != text:
        return GuardrailResult.modify(cleaned, reason="Names redacted")
    return GuardrailResult.allow()

Composable Logic

from definable.agent.guardrail import ALL, ANY, NOT, when, max_tokens, block_topics

# All must pass
strict = ALL(max_tokens(1000), block_topics(["violence"]))

# Any must pass
flexible = ANY(max_tokens(5000), max_tokens(10000))

# Conditional
admin_limit = when(
    condition=lambda ctx: ctx.user_id != "admin",
    guardrail=max_tokens(500),
)

Handling Blocks

from definable.agent.run import RunStatus

agent = Agent(
    model="gpt-4o",
    guardrails=Guardrails(
        input=[max_tokens(100)],
        on_block="return_message",  # Don't raise, return blocked status
    ),
)

output = agent.run("very long message...")
if output.status == RunStatus.blocked:
    print(f"Blocked: {output.content}")