from definable.agent.eval import AgentAsJudgeEval, EvalCase
judge = AgentAsJudgeEval(
judge_model="openai/gpt-4o",
criteria="Is the response helpful, accurate, and concise?",
mode="numeric", # or "binary"
)
result = await judge.arun(agent, EvalCase(
input="Explain quantum computing in one sentence.",
))
print(f"Score: {result.score}/10")
print(f"Reasoning: {result.reasoning}")