adding performance benchmarks

This commit is contained in:
Samuel Colvin
2017-05-08 15:48:32 +01:00
parent d6edca642a
commit ef60baa65e
7 changed files with 232 additions and 0 deletions
+1
View File
@@ -8,3 +8,4 @@ dist/
test.py
.coverage
htmlcov/
benchmarks/cases.json
+1
View File
@@ -20,6 +20,7 @@ install:
script:
- make lint
- make test
- make benchmark
- ./tests/check_tag.py
after_success:
+5
View File
@@ -3,6 +3,7 @@ install:
pip install -U setuptools pip
pip install -U .
pip install -r tests/requirements.txt
pip install -r benchmarks/requirements.txt
.PHONY: isort
isort:
@@ -26,6 +27,10 @@ testcov:
.PHONY: all
all: testcov lint
.PHONY: benchmark
benchmark:
python benchmarks/run.py
.PHONY: clean
clean:
rm -rf `find . -name __pycache__`
+2
View File
@@ -0,0 +1,2 @@
python-dateutil
trafaret
+131
View File
@@ -0,0 +1,131 @@
import json
import random
import string
from datetime import datetime
from functools import partial
from pathlib import Path
from statistics import mean, stdev
from test_trafaret import TestTrafaret
from test_pydantic import TestPydantic
PUNCTUATION = ' \t\n!"#$%&\'()*+,-./'
LETTERS = string.ascii_letters
UNICODE = '\xa0\xad¡¢£¤¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
ALL = PUNCTUATION * 5 + LETTERS * 20 + UNICODE
random = random.SystemRandom()
class GenerateData:
def __init__(self):
pass
def rand_string(min_length, max_length, corpus=ALL):
return ''.join(random.choices(corpus, k=random.randrange(min_length, max_length)))
MISSING = object()
def null_missing_v(f, null_chance=0.2, missing_chance=None):
r = random.random()
if random.random() < null_chance:
return None
missing_chance = null_chance if missing_chance is None else missing_chance
if r < (null_chance + missing_chance):
return MISSING
return f()
def null_missing_string(*args, **kwargs):
f = partial(rand_string, *args)
return null_missing_v(f, **kwargs)
def rand_email():
if random.random() < 0.2:
c1, c2 = UNICODE, LETTERS
else:
c1, c2 = LETTERS, LETTERS
return f'{rand_string(10, 50, corpus=c1)}@{rand_string(10, 50, corpus=c2)}.{rand_string(2, 5, corpus=c2)}'
def null_missing_email():
return null_missing_v(rand_email)
def rand_date():
r = random.randrange
return f'{r(1900, 2020)}-{r(0, 12)}-{r(0, 32)}T{r(0, 24)}:{r(0, 60)}:{r(0, 60)}'
def remove_missing(d):
if isinstance(d, dict):
return {k: remove_missing(v) for k, v in d.items() if v is not MISSING}
elif isinstance(d, list):
return [remove_missing(d_) for d_ in d]
else:
return d
def generate_case():
return remove_missing(dict(
id=random.randrange(1, 2000),
client_name=null_missing_string(10, 280, null_chance=0.05, missing_chance=0.05),
sort_index=random.random() * 200,
client_email=null_missing_email(), # email checks differ with different frameworks
client_phone=null_missing_string(5, 15),
location=dict(
latitude=random.random() * 180 - 90,
longitude=random.random() * 180,
),
contractor=str(random.randrange(5, 2000)), # TODO negative checks
upstream_http_referrer=null_missing_string(10, 1050),
grecaptcha_response=null_missing_string(10, 1050, null_chance=0.05, missing_chance=0.05),
last_updated=rand_date(),
skills=[dict(
subject=null_missing_string(5, 20, null_chance=0.01, missing_chance=0),
subject_id=i,
category=rand_string(5, 20),
qual_level=rand_string(5, 20),
qual_level_id=random.randrange(2000),
qual_level_ranking=random.random() * 20
) for i in range(random.randrange(1, 5))]
))
THIS_DIR = Path(__file__).parent.resolve()
def main():
json_path = THIS_DIR / 'cases.json'
if not json_path.exists():
print('generating test cases...')
cases = [generate_case() for _ in range(2000)]
with json_path.open('w') as f:
json.dump(cases, f, indent=2, sort_keys=True)
else:
with json_path.open() as f:
cases = json.load(f)
tests = [TestTrafaret, TestPydantic]
for test_class in tests:
times = []
p = test_class.package
for i in range(5):
count, pass_count = 0, 0
start = datetime.now()
test = test_class(False)
for i in range(3):
for case in cases:
passed, result = test.validate(case)
count += 1
pass_count += passed
time = (datetime.now() - start).total_seconds()
success = pass_count / count * 100
print(f'{p:>20}: time={time:0.2f}s, success={success:0.2f}%')
times.append(time)
print(f'{p:>20}: best={min(times):0.2f}s, avg={mean(times):0.2f}s, stdev={stdev(times):0.2f}s')
if __name__ == '__main__':
main()
+47
View File
@@ -0,0 +1,47 @@
from datetime import datetime
from typing import List
from pydantic import BaseModel, constr, EmailStr
class TestPydantic:
package = 'pydantic'
def __init__(self, allow_extra):
class Model(BaseModel):
id: int = ...
client_name: constr(max_length=255) = ...
sort_index: float = ...
client_email: EmailStr = None
client_phone: constr(max_length=255) = None
class Location(BaseModel):
latitude: float = None
longitude: float = None
location: Location = None
contractor: int = None
upstream_http_referrer: constr(max_length=1023) = None
grecaptcha_response: constr(min_length=20, max_length=1000) = ...
last_updated: datetime = None
class Skill(BaseModel):
subject: str = ...
subject_id: int = ...
category: str = ...
qual_level: str = ...
qual_level_id: int = ...
qual_level_ranking: float = 0
skills: List[Skill] = []
class Config:
ignore_extra = allow_extra
self.model = Model
def validate(self, data):
try:
return True, self.model(**data)
except ValueError:
return False, None
+45
View File
@@ -0,0 +1,45 @@
from dateutil.parser import parse
import trafaret as t
class TestTrafaret:
package = 'trafaret'
def __init__(self, allow_extra):
self.schema = t.Dict({
'id': t.Int(),
'client_name': t.String(max_length=255),
'sort_index': t.Float,
t.Key('client_email', optional=True): t.Or(t.Null | t.Email()),
t.Key('client_phone', optional=True): t.Or(t.Null | t.String(max_length=255)),
t.Key('location', optional=True): t.Or(t.Null | t.Dict({
'latitude': t.Or(t.Float | t.Null),
'longitude': t.Or(t.Float | t.Null),
})),
t.Key('contractor', optional=True): t.Or(t.Null | t.Int(gt=0)),
t.Key('upstream_http_referrer', optional=True): t.Or(t.Null | t.String(max_length=1023)),
t.Key('grecaptcha_response'): t.String(min_length=20, max_length=1000),
t.Key('last_updated', optional=True): t.Or(t.Null | t.String >> parse),
t.Key('skills', default=[]): t.List(t.Dict({
'subject': t.String,
'subject_id': t.Int,
'category': t.String,
'qual_level': t.String,
'qual_level_id': t.Int,
t.Key('qual_level_ranking', default=0): t.Float,
})),
})
if allow_extra:
self.schema.allow_extra('*')
def validate(self, data):
try:
return True, self.schema.check(data)
except t.DataError:
return False, None
except ValueError:
return False, None