from typing import List from graphviz import Digraph from pydantic import BaseModel, Field import instructor import openai # Patch openai to use instructor # allows for response_model instructor.patch() class Property(BaseModel): key: str value: str resolved_absolute_value: str class Entity(BaseModel): id: int = Field( ..., description="Unique identifier for the entity, used for deduplication, design a scheme allows multiple entities", ) subquote_string: List[str] = Field( ..., description="Correctly resolved value of the entity, if the entity is a reference to another entity, this should be the id of the referenced entity, include a few more words before and after the value to allow for some context to be used in the resolution", ) entity_title: str properties: List[Property] = Field( ..., description="List of properties of the entity" ) dependencies: List[int] = Field( ..., description="List of entity ids that this entity depends or relies on to resolve it", ) class DocumentExtraction(BaseModel): entities: List[Entity] = Field( ..., description="Body of the answer, each fact should be its seperate object with a body and a list of sources", ) def ask_ai(content) -> DocumentExtraction: resp: DocumentExtraction = openai.ChatCompletion.create( model="gpt-4", response_model=DocumentExtraction, messages=[ { "role": "system", "content": "You are a perfect entity resolution system that extracts facts from the document. Extract and resolve a list of entities from the following document:", }, { "role": "user", "content": content, }, ], ) # type: ignore return resp def generate_html_label(entity: Entity) -> str: rows = [ f"
| {entity.entity_title} |