diff --git a/.gitignore b/.gitignore index 0b97989..21c1649 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ dist/ test.py .coverage htmlcov/ +benchmarks/cases.json diff --git a/.travis.yml b/.travis.yml index 74335f4..730ad75 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,7 @@ install: script: - make lint - make test +- make benchmark - ./tests/check_tag.py after_success: diff --git a/Makefile b/Makefile index 0aef217..4626200 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ install: pip install -U setuptools pip pip install -U . pip install -r tests/requirements.txt + pip install -r benchmarks/requirements.txt .PHONY: isort isort: @@ -26,6 +27,10 @@ testcov: .PHONY: all all: testcov lint +.PHONY: benchmark +benchmark: + python benchmarks/run.py + .PHONY: clean clean: rm -rf `find . -name __pycache__` diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt new file mode 100644 index 0000000..88a47af --- /dev/null +++ b/benchmarks/requirements.txt @@ -0,0 +1,2 @@ +python-dateutil +trafaret diff --git a/benchmarks/run.py b/benchmarks/run.py new file mode 100644 index 0000000..5a5f563 --- /dev/null +++ b/benchmarks/run.py @@ -0,0 +1,131 @@ +import json +import random +import string +from datetime import datetime +from functools import partial +from pathlib import Path +from statistics import mean, stdev + +from test_trafaret import TestTrafaret +from test_pydantic import TestPydantic + +PUNCTUATION = ' \t\n!"#$%&\'()*+,-./' +LETTERS = string.ascii_letters +UNICODE = '\xa0\xad¡¢£¤¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' +ALL = PUNCTUATION * 5 + LETTERS * 20 + UNICODE +random = random.SystemRandom() + + +class GenerateData: + def __init__(self): + pass + + +def rand_string(min_length, max_length, corpus=ALL): + return ''.join(random.choices(corpus, k=random.randrange(min_length, max_length))) + + +MISSING = object() + + +def null_missing_v(f, null_chance=0.2, missing_chance=None): + r = random.random() + if random.random() < null_chance: + return None + missing_chance = null_chance if missing_chance is None else missing_chance + if r < (null_chance + missing_chance): + return MISSING + return f() + + +def null_missing_string(*args, **kwargs): + f = partial(rand_string, *args) + return null_missing_v(f, **kwargs) + + +def rand_email(): + if random.random() < 0.2: + c1, c2 = UNICODE, LETTERS + else: + c1, c2 = LETTERS, LETTERS + return f'{rand_string(10, 50, corpus=c1)}@{rand_string(10, 50, corpus=c2)}.{rand_string(2, 5, corpus=c2)}' + + +def null_missing_email(): + return null_missing_v(rand_email) + + +def rand_date(): + r = random.randrange + return f'{r(1900, 2020)}-{r(0, 12)}-{r(0, 32)}T{r(0, 24)}:{r(0, 60)}:{r(0, 60)}' + + +def remove_missing(d): + if isinstance(d, dict): + return {k: remove_missing(v) for k, v in d.items() if v is not MISSING} + elif isinstance(d, list): + return [remove_missing(d_) for d_ in d] + else: + return d + + +def generate_case(): + return remove_missing(dict( + id=random.randrange(1, 2000), + client_name=null_missing_string(10, 280, null_chance=0.05, missing_chance=0.05), + sort_index=random.random() * 200, + client_email=null_missing_email(), # email checks differ with different frameworks + client_phone=null_missing_string(5, 15), + location=dict( + latitude=random.random() * 180 - 90, + longitude=random.random() * 180, + ), + contractor=str(random.randrange(5, 2000)), # TODO negative checks + upstream_http_referrer=null_missing_string(10, 1050), + grecaptcha_response=null_missing_string(10, 1050, null_chance=0.05, missing_chance=0.05), + last_updated=rand_date(), + skills=[dict( + subject=null_missing_string(5, 20, null_chance=0.01, missing_chance=0), + subject_id=i, + category=rand_string(5, 20), + qual_level=rand_string(5, 20), + qual_level_id=random.randrange(2000), + qual_level_ranking=random.random() * 20 + ) for i in range(random.randrange(1, 5))] + )) + +THIS_DIR = Path(__file__).parent.resolve() + + +def main(): + json_path = THIS_DIR / 'cases.json' + if not json_path.exists(): + print('generating test cases...') + cases = [generate_case() for _ in range(2000)] + with json_path.open('w') as f: + json.dump(cases, f, indent=2, sort_keys=True) + else: + with json_path.open() as f: + cases = json.load(f) + tests = [TestTrafaret, TestPydantic] + for test_class in tests: + times = [] + p = test_class.package + for i in range(5): + count, pass_count = 0, 0 + start = datetime.now() + test = test_class(False) + for i in range(3): + for case in cases: + passed, result = test.validate(case) + count += 1 + pass_count += passed + time = (datetime.now() - start).total_seconds() + success = pass_count / count * 100 + print(f'{p:>20}: time={time:0.2f}s, success={success:0.2f}%') + times.append(time) + print(f'{p:>20}: best={min(times):0.2f}s, avg={mean(times):0.2f}s, stdev={stdev(times):0.2f}s') + + +if __name__ == '__main__': + main() diff --git a/benchmarks/test_pydantic.py b/benchmarks/test_pydantic.py new file mode 100644 index 0000000..76bc8c7 --- /dev/null +++ b/benchmarks/test_pydantic.py @@ -0,0 +1,47 @@ +from datetime import datetime +from typing import List + +from pydantic import BaseModel, constr, EmailStr + + +class TestPydantic: + package = 'pydantic' + + def __init__(self, allow_extra): + + class Model(BaseModel): + id: int = ... + client_name: constr(max_length=255) = ... + sort_index: float = ... + client_email: EmailStr = None + client_phone: constr(max_length=255) = None + + class Location(BaseModel): + latitude: float = None + longitude: float = None + location: Location = None + + contractor: int = None + upstream_http_referrer: constr(max_length=1023) = None + grecaptcha_response: constr(min_length=20, max_length=1000) = ... + last_updated: datetime = None + + class Skill(BaseModel): + subject: str = ... + subject_id: int = ... + category: str = ... + qual_level: str = ... + qual_level_id: int = ... + qual_level_ranking: float = 0 + skills: List[Skill] = [] + + class Config: + ignore_extra = allow_extra + + self.model = Model + + def validate(self, data): + try: + return True, self.model(**data) + except ValueError: + return False, None diff --git a/benchmarks/test_trafaret.py b/benchmarks/test_trafaret.py new file mode 100644 index 0000000..31eac10 --- /dev/null +++ b/benchmarks/test_trafaret.py @@ -0,0 +1,45 @@ +from dateutil.parser import parse +import trafaret as t + + +class TestTrafaret: + package = 'trafaret' + + def __init__(self, allow_extra): + self.schema = t.Dict({ + 'id': t.Int(), + 'client_name': t.String(max_length=255), + 'sort_index': t.Float, + t.Key('client_email', optional=True): t.Or(t.Null | t.Email()), + t.Key('client_phone', optional=True): t.Or(t.Null | t.String(max_length=255)), + + t.Key('location', optional=True): t.Or(t.Null | t.Dict({ + 'latitude': t.Or(t.Float | t.Null), + 'longitude': t.Or(t.Float | t.Null), + })), + + t.Key('contractor', optional=True): t.Or(t.Null | t.Int(gt=0)), + t.Key('upstream_http_referrer', optional=True): t.Or(t.Null | t.String(max_length=1023)), + t.Key('grecaptcha_response'): t.String(min_length=20, max_length=1000), + + t.Key('last_updated', optional=True): t.Or(t.Null | t.String >> parse), + + t.Key('skills', default=[]): t.List(t.Dict({ + 'subject': t.String, + 'subject_id': t.Int, + 'category': t.String, + 'qual_level': t.String, + 'qual_level_id': t.Int, + t.Key('qual_level_ranking', default=0): t.Float, + })), + }) + if allow_extra: + self.schema.allow_extra('*') + + def validate(self, data): + try: + return True, self.schema.check(data) + except t.DataError: + return False, None + except ValueError: + return False, None