"""Evaluation response models for the Assistant API."""
from __future__ import annotations
from typing import Any, Literal
from msgspec import Struct
from pinecone.models._display import HtmlBuilder, safe_display, truncate_text
from pinecone.models.assistant._mixin import StructDictMixin
from pinecone.models.assistant.chat import ChatUsage
EntailmentType = Literal["entailed", "contradicted", "neutral"] | str
class EntailmentResult(StructDictMixin, Struct, kw_only=True):
"""A single fact with its entailment judgment.
Attributes:
fact: The content of the evaluated fact.
entailment: The entailment classification — one of
``"entailed"``, ``"contradicted"``, or ``"neutral"``.
reasoning: The reasoning behind the entailment judgment.
Empty string when not provided by the API.
"""
fact: str
entailment: EntailmentType
reasoning: str = ""
@safe_display
def __repr__(self) -> str:
return (
f"EntailmentResult(entailment={self.entailment!r},"
f" fact={truncate_text(self.fact, max_chars=80)!r})"
)
@safe_display
def _repr_pretty_(self, p: Any, cycle: bool) -> None:
if cycle:
p.text("EntailmentResult(...)")
return
with p.group(2, "EntailmentResult(", ")"):
p.breakable()
p.text(f"entailment={self.entailment!r},")
p.breakable()
p.text(f"fact={truncate_text(self.fact, max_chars=200)!r},")
if self.reasoning:
p.breakable()
p.text(f"reasoning={truncate_text(self.reasoning, max_chars=200)!r},")
@safe_display
def _repr_html_(self) -> str:
builder = HtmlBuilder("EntailmentResult")
builder.row("Entailment", self.entailment)
builder.row("Fact", truncate_text(self.fact, max_chars=500))
if self.reasoning:
builder.row("Reasoning", truncate_text(self.reasoning, max_chars=500))
if self.entailment == "contradicted":
rows: list[tuple[str, str]] = [
("Fact", truncate_text(self.fact, max_chars=500)),
]
if self.reasoning:
rows.append(("Reasoning", truncate_text(self.reasoning, max_chars=500)))
builder.section("Contradiction", rows, theme="error")
return builder.build()
class AlignmentScores(StructDictMixin, Struct, kw_only=True):
"""Aggregate alignment scores for an evaluation.
Attributes:
correctness: Precision of the generated answer.
completeness: Recall of the generated answer.
alignment: Harmonic mean of correctness and completeness.
"""
correctness: float
completeness: float
alignment: float
@safe_display
def __repr__(self) -> str:
return (
f"AlignmentScores(correctness={self.correctness:.3f},"
f" completeness={self.completeness:.3f},"
f" alignment={self.alignment:.3f})"
)
@safe_display
def _repr_pretty_(self, p: Any, cycle: bool) -> None:
if cycle:
p.text("AlignmentScores(...)")
return
with p.group(2, "AlignmentScores(", ")"):
p.breakable()
p.text(f"correctness={self.correctness:.3f},")
p.breakable()
p.text(f"completeness={self.completeness:.3f},")
p.breakable()
p.text(f"alignment={self.alignment:.3f},")
@safe_display
def _repr_html_(self) -> str:
builder = HtmlBuilder("AlignmentScores")
builder.row("Correctness", f"{self.correctness:.3f}")
builder.row("Completeness", f"{self.completeness:.3f}")
builder.row("Alignment", f"{self.alignment:.3f}")
return builder.build()
[docs]
class AlignmentResult(StructDictMixin, Struct, kw_only=True):
"""Full result of an alignment evaluation.
Attributes:
scores: Aggregate correctness, completeness, and alignment scores.
facts: Per-fact entailment results with reasoning.
usage: Token usage statistics for the evaluation request.
"""
scores: AlignmentScores
facts: list[EntailmentResult]
usage: ChatUsage
@safe_display
def __repr__(self) -> str:
return (
f"AlignmentResult(alignment={self.scores.alignment:.3f},"
f" correctness={self.scores.correctness:.3f},"
f" completeness={self.scores.completeness:.3f},"
f" facts={len(self.facts)}, usage={self.usage!r})"
)
@safe_display
def _repr_pretty_(self, p: Any, cycle: bool) -> None:
if cycle:
p.text("AlignmentResult(...)")
return
with p.group(2, "AlignmentResult(", ")"):
p.breakable()
p.text(f"alignment={self.scores.alignment:.3f},")
p.breakable()
p.text(f"correctness={self.scores.correctness:.3f},")
p.breakable()
p.text(f"completeness={self.scores.completeness:.3f},")
p.breakable()
p.text(f"facts={len(self.facts)},")
p.breakable()
p.text(f"usage={self.usage!r},")
for fact in self.facts[:3]:
p.breakable()
p.text(repr(fact))
@safe_display
def _repr_html_(self) -> str:
builder = HtmlBuilder("AlignmentResult")
builder.row("Correctness", f"{self.scores.correctness:.3f}")
builder.row("Completeness", f"{self.scores.completeness:.3f}")
builder.row("Alignment", f"{self.scores.alignment:.3f}")
builder.row("Facts", len(self.facts))
builder.row("Usage", repr(self.usage))
if self.facts:
fact_rows: list[tuple[str, str]] = [
(f.entailment, truncate_text(f.fact, 80)) for f in self.facts[:5]
]
builder.section("Facts", fact_rows)
contradictions = [f for f in self.facts if f.entailment == "contradicted"]
if contradictions:
contradiction_rows: list[tuple[str, str]] = [
(truncate_text(f.fact, 80), f.reasoning or "") for f in contradictions[:5]
]
builder.section("Contradictions", contradiction_rows, theme="error")
return builder.build()