mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
141 lines
5.0 KiB
Python
141 lines
5.0 KiB
Python
from typing import List
|
|
from loguru import logger
|
|
|
|
import openai
|
|
import instructor
|
|
|
|
|
|
from pydantic import Field, BaseModel, FieldValidationInfo, model_validator
|
|
|
|
|
|
class Fact(BaseModel):
|
|
statement: str = Field(
|
|
..., description="Body of the sentence, as part of a response"
|
|
)
|
|
substring_phrase: List[str] = Field(
|
|
...,
|
|
description="String quote long enough to evaluate the truthfulness of the fact",
|
|
)
|
|
|
|
@model_validator(mode="after")
|
|
def validate_sources(self, info: FieldValidationInfo) -> "Fact":
|
|
"""
|
|
For each substring_phrase, find the span of the substring_phrase in the context.
|
|
If the span is not found, remove the substring_phrase from the list.
|
|
"""
|
|
if info.context is None:
|
|
logger.info("No context found, skipping validation")
|
|
return self
|
|
|
|
# Get the context from the info
|
|
text_chunks = info.context.get("text_chunk", None)
|
|
|
|
# Get the spans of the substring_phrase in the context
|
|
spans = list(self.get_spans(text_chunks))
|
|
logger.info(
|
|
f"Found {len(spans)} span(s) for from {len(self.substring_phrase)} citation(s)."
|
|
)
|
|
# Replace the substring_phrase with the actual substring
|
|
self.substring_phrase = [text_chunks[span[0] : span[1]] for span in spans]
|
|
return self
|
|
|
|
def _get_span(self, quote, context, errs=5):
|
|
import regex
|
|
|
|
minor = quote
|
|
major = context
|
|
|
|
errs_ = 0
|
|
s = regex.search(f"({minor}){{e<={errs_}}}", major)
|
|
while s is None and errs_ <= errs:
|
|
errs_ += 1
|
|
s = regex.search(f"({minor}){{e<={errs_}}}", major)
|
|
|
|
if s is not None:
|
|
yield from s.spans()
|
|
|
|
def get_spans(self, context):
|
|
for quote in self.substring_phrase:
|
|
yield from self._get_span(quote, context)
|
|
|
|
|
|
class QuestionAnswer(instructor.OpenAISchema):
|
|
"""
|
|
Class representing a question and its answer as a list of facts each one should have a soruce.
|
|
each sentence contains a body and a list of sources."""
|
|
|
|
question: str = Field(..., description="Question that was asked")
|
|
answer: List[Fact] = Field(
|
|
...,
|
|
description="Body of the answer, each fact should be its seperate object with a body and a list of sources",
|
|
)
|
|
|
|
@model_validator(mode="after")
|
|
def validate_sources(self) -> "QuestionAnswer":
|
|
"""
|
|
Checks that each fact has some sources, and removes those that do not.
|
|
"""
|
|
logger.info(f"Validating {len(self.answer)} facts")
|
|
self.answer = [fact for fact in self.answer if len(fact.substring_phrase) > 0]
|
|
logger.info(f"Found {len(self.answer)} facts with sources")
|
|
return self
|
|
|
|
|
|
def ask_ai(question: str, context: str) -> QuestionAnswer:
|
|
completion = openai.ChatCompletion.create(
|
|
model="gpt-3.5-turbo-0613",
|
|
temperature=0,
|
|
functions=[QuestionAnswer.openai_schema],
|
|
function_call={"name": QuestionAnswer.openai_schema["name"]},
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": f"You are a world class algorithm to answer questions with correct and exact citations. ",
|
|
},
|
|
{"role": "user", "content": f"Answer question using the following context"},
|
|
{"role": "user", "content": f"{context}"},
|
|
{"role": "user", "content": f"Question: {question}"},
|
|
{
|
|
"role": "user",
|
|
"content": f"Tips: Make sure to cite your sources, and use the exact words from the context.",
|
|
},
|
|
],
|
|
)
|
|
|
|
# Creating an Answer object from the completion response
|
|
return QuestionAnswer.from_response(
|
|
completion, validation_context={"text_chunk": context}
|
|
)
|
|
|
|
|
|
question = "where did he go to school?"
|
|
context = """
|
|
My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.I went to an arts highschool but in university I studied Computational Mathematics and physics. As part of coop I worked at many companies including Stitchfix, Facebook. I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.
|
|
"""
|
|
|
|
answer = ask_ai(question, context)
|
|
print(answer.model_dump_json(indent=2))
|
|
"""
|
|
2023-09-09 15:48:11.022 | INFO | __main__:validate_sources:35 - Found 1 span(s) for from 1 citation(s).
|
|
2023-09-09 15:48:11.023 | INFO | __main__:validate_sources:35 - Found 1 span(s) for from 1 citation(s).
|
|
2023-09-09 15:48:11.023 | INFO | __main__:validate_sources:78 - Validating 2 facts
|
|
2023-09-09 15:48:11.023 | INFO | __main__:validate_sources:80 - Found 2 facts with sources
|
|
{
|
|
"question": "where did he go to school?",
|
|
"answer": [
|
|
{
|
|
"statement": "Jason Liu went to an arts highschool.",
|
|
"substring_phrase": [
|
|
"arts highschool"
|
|
]
|
|
},
|
|
{
|
|
"statement": "Jason Liu studied Computational Mathematics and physics in university.",
|
|
"substring_phrase": [
|
|
"university"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
"""
|