From 8a7c95e5553ddb185ef078c6561d56aa981575ca Mon Sep 17 00:00:00 2001 From: wewebber-merlin <138414820+wewebber-merlin@users.noreply.github.com> Date: Thu, 6 Jul 2023 05:23:45 +1000 Subject: [PATCH] Retryable exception for empty OpenAI embedding. (#7070) Description: The OpenAI "embeddings" API intermittently falls into a failure state where an embedding is returned as [ Nan ], rather than the expected 1536 floats. This patch checks for that state (specifically, for an embedding of length 1) and if it occurs, throws an ApiError, which will cause the chunk to be retried. Issue: I have been unable to find an official langchain issue for this problem, but it is discussed (by another user) at https://stackoverflow.com/questions/76469415/getting-embeddings-of-length-1-from-langchain-openaiembeddings Maintainer: @dev2049 Testing: Since this is an intermittent OpenAI issue, I have not provided a unit or integration test. The provided code has, though, been run successfully over several million tokens. --------- Co-authored-by: William Webber Co-authored-by: Harrison Chase --- langchain/embeddings/openai.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/langchain/embeddings/openai.py b/langchain/embeddings/openai.py index 260a0dc81..9194a22e4 100644 --- a/langchain/embeddings/openai.py +++ b/langchain/embeddings/openai.py @@ -86,13 +86,23 @@ def _async_retry_decorator(embeddings: OpenAIEmbeddings) -> Any: return wrap +# https://stackoverflow.com/questions/76469415/getting-embeddings-of-length-1-from-langchain-openaiembeddings +def _check_response(response: dict) -> dict: + if any(len(d["embedding"]) == 1 for d in response["data"]): + import openai + + raise openai.error.APIError("OpenAI API returned an empty embedding") + return response + + def embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) -> Any: """Use tenacity to retry the embedding call.""" retry_decorator = _create_retry_decorator(embeddings) @retry_decorator def _embed_with_retry(**kwargs: Any) -> Any: - return embeddings.client.create(**kwargs) + response = embeddings.client.create(**kwargs) + return _check_response(response) return _embed_with_retry(**kwargs) @@ -102,7 +112,8 @@ async def async_embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) -> @_async_retry_decorator(embeddings) async def _async_embed_with_retry(**kwargs: Any) -> Any: - return await embeddings.client.acreate(**kwargs) + response = await embeddings.client.acreate(**kwargs) + return _check_response(response) return await _async_embed_with_retry(**kwargs)