From 511561122bd261b1ea045ac28bcfb2c1538bf2e9 Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Thu, 15 Mar 2018 03:12:01 -0700 Subject: [PATCH] Cache pipfile parsing On a (390+ line) Pipfile, it takes ~5s to parse the entire thing :O. Pipenv has to parse the pipfile repeatedly and all over the place, so caching the contents speeds things up dramatically (at least in this case). This PR establishes a little cache based upon file location + md5sum of contents for the pipfile (and the hashing is pretty fast here). Given that the cache key is based on the file contents, should be completely fine to do (only possible issue is if parsed_pipfile gets mutated - which is why I've added a defensive deepcopy). Without the deepcopy, cache hit takes ~0.09ms. With the deepcopy, cache hit takes 1.19ms. If we can confirm no need for deepcopy, would shave off a second or two off really big Pipfiles. --- pipenv/project.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pipenv/project.py b/pipenv/project.py index bdb1b98b..225914b6 100644 --- a/pipenv/project.py +++ b/pipenv/project.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import copy import json import os import re @@ -10,6 +11,7 @@ import hashlib import contoml import delegator import pipfile +import threading import toml from pip9 import ConfigOptionParser @@ -47,6 +49,10 @@ if PIPENV_PIPFILE: PIPENV_PIPFILE = normalize_drive(os.path.abspath(PIPENV_PIPFILE)) +_cache = threading.local() +_cache.pipfile_cache = {} + + class Project(object): """docstring for Project""" @@ -292,6 +298,15 @@ class Project(object): # Open the pipfile, read it into memory. with open(self.pipfile_location) as f: contents = f.read() + # this should be pretty fast (ish) and we need this pipfile a lot + cache_key = (self.pipfile_location, hashlib.md5(contents.encode('utf8')).hexdigest()) + if cache_key not in _cache.pipfile_cache: + parsed = self._parse_pipfile(contents) + _cache.pipfile_cache[cache_key] = parsed + # deepcopy likely unnecessary but why not avoid bugs? + return copy.deepcopy(_cache.pipfile_cache[cache_key]) + + def _parse_pipfile(self, contents): # If any outline tables are present... if ('[packages.' in contents) or ('[dev-packages.' in contents): data = toml.loads(contents)