From 511561122bd261b1ea045ac28bcfb2c1538bf2e9 Mon Sep 17 00:00:00 2001
From: Jeff Tratner <jtratner@counsyl.com>
Date: Thu, 15 Mar 2018 03:12:01 -0700
Subject: [PATCH] Cache pipfile parsing

On a (390+ line) Pipfile, it takes ~5s to parse the entire thing :O.
Pipenv has to parse the pipfile repeatedly and all over the place, so
caching the contents speeds things up dramatically (at least in this
case).

This PR establishes a little cache based upon file location + md5sum of
contents for the pipfile (and the hashing is pretty fast here). Given
that the cache key is based on the file contents, should be completely
fine to do (only possible issue is if parsed_pipfile gets mutated -
which is why I've added a defensive deepcopy).

Without the deepcopy, cache hit takes ~0.09ms.
With the deepcopy, cache hit takes 1.19ms.

If we can confirm no need for deepcopy, would shave off a second or two
off really big Pipfiles.
---
 pipenv/project.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pipenv/project.py b/pipenv/project.py
index bdb1b98b..225914b6 100644
--- a/pipenv/project.py
+++ b/pipenv/project.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import copy
 import json
 import os
 import re
@@ -10,6 +11,7 @@ import hashlib
 import contoml
 import delegator
 import pipfile
+import threading
 import toml
 
 from pip9 import ConfigOptionParser
@@ -47,6 +49,10 @@ if PIPENV_PIPFILE:
         PIPENV_PIPFILE = normalize_drive(os.path.abspath(PIPENV_PIPFILE))
 
 
+_cache = threading.local()
+_cache.pipfile_cache = {}
+
+
 class Project(object):
     """docstring for Project"""
 
@@ -292,6 +298,15 @@ class Project(object):
         # Open the pipfile, read it into memory.
         with open(self.pipfile_location) as f:
             contents = f.read()
+        # this should be pretty fast (ish) and we need this pipfile a lot
+        cache_key = (self.pipfile_location, hashlib.md5(contents.encode('utf8')).hexdigest())
+        if cache_key not in _cache.pipfile_cache:
+            parsed = self._parse_pipfile(contents)
+            _cache.pipfile_cache[cache_key] = parsed
+        # deepcopy likely unnecessary but why not avoid bugs?
+        return copy.deepcopy(_cache.pipfile_cache[cache_key])
+
+    def _parse_pipfile(self, contents):
         # If any outline tables are present...
         if ('[packages.' in contents) or ('[dev-packages.' in contents):
             data = toml.loads(contents)