mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
7d5af2247f
This reverts commit 3ac5c26ca3.
177 lines
6.0 KiB
Python
177 lines
6.0 KiB
Python
"""
|
|
This script parses a string representation of a filesystem structure into a tree-like directory structure.
|
|
|
|
The 'Node' class represents a node in this tree, which can be either a file or a folder. Files cannot have
|
|
children, while folders can.
|
|
|
|
The 'DirectoryTree' class contains a single root folder from which all other files/folders can be reached.
|
|
The 'parse_tree_to_filesystem' function uses OpenAI's GPT-3 model to convert a string representation of a
|
|
directory tree into a 'DirectoryTree' object. This object can then be manipulated programmatically as needed,
|
|
with methods such as 'print_paths' available for convenience.
|
|
|
|
Please note: Recursive models currently work if they are wrapped by a non-recursive one. This is why we are
|
|
passing a 'DirectoryTree' (which contains a single 'Node') as the function call, not a 'Node' directly. This
|
|
is due to a limitation in how Pydantic generates schemas for recursive objects, which creates
|
|
'dict_keys(['$ref', 'definitions'])'. Instead of writing a resolver for such references, we can simply wrap the
|
|
recursive class in a non-recursive one so the function_call class never has a cyclic reference.
|
|
|
|
Example usage:
|
|
>>> root = parse_tree_to_filesystem(
|
|
... '''
|
|
... root
|
|
... ├── folder1
|
|
... │ ├── file1.txt
|
|
... │ └── file2.txt
|
|
... └── folder2
|
|
... ├── file3.txt
|
|
... └── subfolder1
|
|
... └── file4.txt
|
|
... '''
|
|
... )
|
|
>>> root.print_paths()
|
|
# Expected output:
|
|
# >>> root NodeType.FOLDER
|
|
# >>> root/folder1 NodeType.FOLDER
|
|
# >>> root/folder1/file1.txt NodeType.FILE
|
|
# >>> root/folder1/file2.txt NodeType.FILE
|
|
# >>> root/folder2 NodeType.FOLDER
|
|
# >>> root/folder2/file3.txt NodeType.FILE
|
|
# >>> root/folder2/subfolder1 NodeType.FOLDER
|
|
# >>> root/folder2/subfolder1/file4.txt NodeType.FILE
|
|
"""
|
|
|
|
import openai
|
|
import enum
|
|
|
|
from pydantic import Field
|
|
from typing import List
|
|
from openai_function_call import OpenAISchema
|
|
from tenacity import retry, stop_after_attempt
|
|
|
|
|
|
class NodeType(str, enum.Enum):
|
|
"""Enumeration representing the types of nodes in a filesystem."""
|
|
|
|
FILE = "file"
|
|
FOLDER = "folder"
|
|
|
|
|
|
class Node(OpenAISchema):
|
|
"""
|
|
Class representing a single node in a filesystem. Can be either a file or a folder.
|
|
Note that a file cannot have children, but a folder can.
|
|
|
|
Args:
|
|
name (str): The name of the node.
|
|
children (List[Node]): The list of child nodes (if any).
|
|
node_type (NodeType): The type of the node, either a file or a folder.
|
|
|
|
Methods:
|
|
print_paths: Prints the path of the node and its children.
|
|
"""
|
|
|
|
name: str = Field(..., description="Name of the folder")
|
|
children: List["Node"] = Field(
|
|
default_factory=list,
|
|
description="List of children nodes, only applicable for folders, files cannot have children",
|
|
)
|
|
node_type: NodeType = Field(
|
|
default=NodeType.FILE,
|
|
description="Either a file or folder, use the name to determine which it could be",
|
|
)
|
|
|
|
def print_paths(self, parent_path=""):
|
|
"""Prints the path of the node and its children."""
|
|
|
|
if self.node_type == NodeType.FOLDER:
|
|
path = f"{parent_path}/{self.name}" if parent_path != "" else self.name
|
|
|
|
print(path, self.node_type)
|
|
|
|
if self.children is not None:
|
|
for child in self.children:
|
|
child.print_paths(path)
|
|
else:
|
|
print(f"{parent_path}/{self.name}", self.node_type)
|
|
|
|
|
|
class DirectoryTree(OpenAISchema):
|
|
"""
|
|
Container class representing a directory tree.
|
|
|
|
Args:
|
|
root (Node): The root node of the tree.
|
|
|
|
Methods:
|
|
print_paths: Prints the paths of the root node and its children.
|
|
"""
|
|
|
|
root: Node = Field(..., description="Root folder of the directory tree")
|
|
|
|
def print_paths(self):
|
|
"""Prints the paths of the root node and its children."""
|
|
|
|
self.root.print_paths()
|
|
|
|
|
|
Node.update_forward_refs()
|
|
DirectoryTree.update_forward_refs()
|
|
|
|
|
|
@retry(stop=stop_after_attempt(3))
|
|
def parse_tree_to_filesystem(data: str) -> DirectoryTree:
|
|
"""
|
|
Convert a string representing a directory tree into a filesystem structure
|
|
using OpenAI's GPT-3 model.
|
|
|
|
Args:
|
|
data (str): The string to convert into a filesystem.
|
|
|
|
Returns:
|
|
DirectoryTree: The directory tree representing the filesystem.
|
|
"""
|
|
|
|
completion = openai.ChatCompletion.create(
|
|
model="gpt-3.5-turbo-0613",
|
|
temperature=0.2,
|
|
functions=[DirectoryTree.openai_schema],
|
|
function_call={"name": DirectoryTree.openai_schema["name"]},
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "You are a perfect file system parsing algorithm. You are given a string representing a directory tree. You must return the correct filesystem structure.",
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": f"Consider the data below:\n{data} and return the correctly labeled filesystem",
|
|
},
|
|
],
|
|
max_tokens=1000,
|
|
)
|
|
root = DirectoryTree.from_response(completion)
|
|
return root
|
|
|
|
|
|
if __name__ == "__main__":
|
|
root = parse_tree_to_filesystem(
|
|
"""
|
|
root
|
|
├── folder1
|
|
│ ├── file1.txt
|
|
│ └── file2.txt
|
|
└── folder2
|
|
├── file3.txt
|
|
└── subfolder1
|
|
└── file4.txt
|
|
"""
|
|
)
|
|
root.print_paths()
|
|
# >>> root NodeType.FOLDER
|
|
# >>> root/folder1 NodeType.FOLDER
|
|
# >>> root/folder1/file1.txt NodeType.FILE
|
|
# >>> root/folder1/file2.txt NodeType.FILE
|
|
# >>> root/folder2 NodeType.FOLDER
|
|
# >>> root/folder2/file3.txt NodeType.FILE
|
|
# >>> root/folder2/subfolder1 NodeType.FOLDER
|
|
# >>> root/folder2/subfolder1/file4.txt NodeType.FILE
|