From 170446e8c93bce614721e2abc6731663164e9902 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.org>
Date: Fri, 27 Jan 2017 00:24:59 -0500
Subject: [PATCH] init

---
 Pipfile     |  9 +++++++
 README.rst  | 39 +++++++++++++++++++++++++++++++
 ovaltine.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 115 insertions(+)
 create mode 100644 Pipfile
 create mode 100644 README.rst
 create mode 100644 ovaltine.py
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..edddb49
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,9 @@
+[[source]]
+url = "https://pypi.python.org/simple"
+verify_ssl = true
+
+[dev-packages]
+requests = "*"
+
+[packages]
+chardet = "*"
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..75d2989
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,39 @@
+Ovaltine: a Secret Encoding Decoder Ring for Python
+===================================================
+
+**Ovaltine** (extracted from Requests) is a simple Python library for
+working with unicode embedded within an undeterministic encoding.
+
+For example, many web servers lie about what encoding their responses are.
+You can use Ovaltine, which in turn uses chardet, to detect the apparent
+encoding and get back as much usable data as possible.
+
+Usage
+-----
+
+Simply get back unicode, no matter what::
+
+    >>> import ovaltine
+
+    >>> ovaltine.drink(b'foo bar')
+    u'foo bar'
+
+Or, for more advanced usage::
+
+    >>> from ovaltine import DecoderRing
+
+    >>> content = requests.get('https://kennethreitz.org/').content
+    >>> r = DecoderRing(content)
+
+    >>> r
+    <DecoderRing len=74773, encoding=None, apparently='ISO-8859-2'>
+    >>> r.apparently
+    'ISO-8859-2'
+    >>> r.text
+    ... # Unicode is shown here.
+
+    # Set the encoding yourself.
+    >>> r.encoding = 'UTF-8'
+    >>> r.text
+    ... # Unicode is shown here.
+
diff --git a/ovaltine.py b/ovaltine.py
new file mode 100644
index 0000000..6faa808
--- /dev/null
+++ b/ovaltine.py
@@ -0,0 +1,67 @@
+import sys
+
+import chardet
+
+# Version Hacking
+# ---------------
+
+_ver = sys.version_info
+is_py2 = (_ver[0] == 2)
+is_py3 = (_ver[0] == 3)
+
+if is_py2:
+    str = unicode
+elif is_py3:
+    str = str
+
+
+def drink(content, encoding=None):
+    """Will attempt to turn any given bytes into unicode, and attempt
+    to use the given encoding.
+    """
+
+    ring = DecoderRing(content)
+    ring.encoding = encoding
+    return ring.text
+
+
+class DecoderRing(object):
+    """A secret decoder ring, which decodes secret messages (e.g. bytes
+    of unknown, or simply undeterministic, encoding).
+    """
+    def __init__(self, content):
+        super(DecoderRing, self).__init__()
+        self.content = content
+        self.encoding = None
+
+    def __repr__(self):
+        return '<DecoderRing len={0!r}, encoding={1!r}, apparently={2!r}>'.format(len(self.content), self.encoding, self.apparently)
+
+    @property
+    def apparently(self):
+        """Returns the apparent encoding of the content."""
+        return chardet.detect(self.content)['encoding']
+
+    @property
+    def text(self):
+        """Returns the unicode representation of the content."""
+
+        encoding = self.encoding
+
+        # Fallback to auto-detected encoding.
+        if self.encoding is None:
+            encoding = self.apparently
+
+        # Decode unicode from given encoding.
+        try:
+            content = str(self.content, encoding, errors='replace')
+        except (LookupError, TypeError):
+            # A LookupError is raised if the encoding was not found which could
+            # indicate a misspelling or similar mistake.
+            #
+            # A TypeError can be raised if encoding is None.
+            #
+            # So we try blindly encoding.
+            content = str(self.content, error='replace')
+
+        return content