Files
langchain/tests/unit_tests/evaluation/criteria/test_eval_chain.py
T
William FH a673a51efa [Breaking] Update Evaluation Functionality (#7388)
- Migrate from deprecated langchainplus_sdk to `langsmith` package
- Update the `run_on_dataset()` API to use an eval config
- Update a number of evaluators, as well as the loading logic
- Update docstrings / reference docs
- Update tracer to share single HTTP session
2023-07-13 02:13:06 -07:00

54 lines
1.7 KiB
Python

"""Test the criteria eval chain."""
import pytest
from langchain.evaluation.criteria.eval_chain import (
_SUPPORTED_CRITERIA,
Criteria,
CriteriaEvalChain,
LabeledCriteriaEvalChain,
)
from langchain.evaluation.schema import StringEvaluator
from tests.unit_tests.llms.fake_llm import FakeLLM
def test_resolve_criteria() -> None:
# type: ignore
assert CriteriaEvalChain.resolve_criteria("helpfulness") == {
"helpfulness": _SUPPORTED_CRITERIA[Criteria.HELPFULNESS]
}
assert CriteriaEvalChain.resolve_criteria("correctness") == {
"correctness": _SUPPORTED_CRITERIA[Criteria.CORRECTNESS]
}
def test_criteria_eval_chain() -> None:
chain = CriteriaEvalChain.from_llm(
llm=FakeLLM(
queries={"text": "The meaning of life\nY"}, sequential_responses=True
),
criteria={"my criterion": "my criterion description"},
)
with pytest.warns(UserWarning, match=chain._skip_reference_warning):
result = chain.evaluate_strings(
prediction="my prediction", reference="my reference", input="my input"
)
assert result["reasoning"] == "The meaning of life"
def test_criteria_eval_chain_missing_reference() -> None:
chain = LabeledCriteriaEvalChain.from_llm(
llm=FakeLLM(
queries={"text": "The meaning of life\nY"},
sequential_responses=True,
),
criteria={"my criterion": "my criterion description"},
)
with pytest.raises(ValueError):
chain.evaluate_strings(prediction="my prediction", input="my input")
def test_implements_string_protocol() -> None:
assert issubclass(CriteriaEvalChain, StringEvaluator)