mirror of
https://github.com/kennethreitz/requests.git
synced 2026-06-05 22:50:18 +00:00
Merge pull request #4120 from requests/update-3.0
Bring 3.0 up-to-date with master.
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
Summary.
|
||||
|
||||
## Expected Result
|
||||
|
||||
What you expected.
|
||||
|
||||
## Actual Result
|
||||
|
||||
What happened instead.
|
||||
|
||||
## Reproduction Steps
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
```
|
||||
|
||||
## System Information
|
||||
|
||||
$ python -m requests.help
|
||||
|
||||
```
|
||||
<paste here>
|
||||
```
|
||||
|
||||
This command is only available on Requests v2.16.4 and greater. Otherwise,
|
||||
please provide some basic information about your system (Python version,
|
||||
operating system, &c).
|
||||
@@ -5,7 +5,6 @@ nosetests.xml
|
||||
junit-report.xml
|
||||
pylint.txt
|
||||
toy.py
|
||||
tox.ini
|
||||
cover/
|
||||
build/
|
||||
docs/_build
|
||||
|
||||
+14
-4
@@ -7,7 +7,7 @@ python:
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "3.7-dev"
|
||||
# - "3.7-dev"
|
||||
# - "pypy" -- appears to hang
|
||||
# - "pypy3"
|
||||
# command to install dependencies
|
||||
@@ -16,7 +16,17 @@ install: "make"
|
||||
script:
|
||||
- |
|
||||
if [[ "$TRAVIS_PYTHON_VERSION" != "2.6" ]] ; then make test-readme; fi
|
||||
- make coverage
|
||||
after_success:
|
||||
- pipenv run codecov
|
||||
- make ci
|
||||
cache: pip
|
||||
jobs:
|
||||
include:
|
||||
- stage: test
|
||||
script:
|
||||
- |
|
||||
if [[ "$TRAVIS_PYTHON_VERSION" != "2.6" ]] ; then make test-readme; fi
|
||||
- make ci
|
||||
- stage: coverage
|
||||
python: 3.6
|
||||
script: codecov
|
||||
|
||||
|
||||
|
||||
@@ -184,3 +184,5 @@ Patches and Suggestions
|
||||
- Jonathan Vanasco (`@jvanasco <https://github.com/jvanasco>`_)
|
||||
- David Fontenot (`@davidfontenot <https://github.com/davidfontenot>`_)
|
||||
- Shmuel Amar (`@shmuelamar <https://github.com/shmuelamar>`_)
|
||||
- Gary Wu (`@garywu <https://github.com/garywu>`_)
|
||||
- Ryan Pineo (`@ryanpineo <https://github.com/ryanpineo>`_)
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
Be cordial or be on your way.
|
||||
|
||||
https://www.kennethreitz.org/essays/be-cordial-or-be-on-your-way
|
||||
+117
-8
@@ -3,6 +3,115 @@
|
||||
Release History
|
||||
---------------
|
||||
|
||||
dev
|
||||
+++
|
||||
|
||||
**Bugfixes**
|
||||
|
||||
- Resolve installation failure if multiprocessing is not available
|
||||
- Resolve tests crash if multiprocessing is not able to determine the number of CPU cores
|
||||
|
||||
|
||||
2.17.3 (2017-05-29)
|
||||
+++++++++++++++++++
|
||||
|
||||
**Improvements**
|
||||
|
||||
- Improved ``packages`` namespace identity support, for monkeypatching libraries.
|
||||
|
||||
|
||||
2.17.2 (2017-05-29)
|
||||
+++++++++++++++++++
|
||||
|
||||
**Improvements**
|
||||
|
||||
- Improved ``packages`` namespace identity support, for monkeypatching libraries.
|
||||
|
||||
|
||||
2.17.1 (2017-05-29)
|
||||
+++++++++++++++++++
|
||||
|
||||
**Improvements**
|
||||
|
||||
- Improved ``packages`` namespace identity support, for monkeypatching libraries.
|
||||
|
||||
|
||||
2.17.0 (2017-05-29)
|
||||
+++++++++++++++++++
|
||||
|
||||
**Improvements**
|
||||
|
||||
- Removal of the 301 redirect cache. This improves thread-safety.
|
||||
|
||||
|
||||
2.16.5 (2017-05-28)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Improvements to ``$ python -m requests.help``.
|
||||
|
||||
2.16.4 (2017-05-27)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Introduction of the ``$ python -m requests.help`` command, for debugging with maintainers!
|
||||
|
||||
2.16.3 (2017-05-27)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Further restored the ``requests.packages`` namespace for compatibility reasons.
|
||||
|
||||
2.16.2 (2017-05-27)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Further restored the ``requests.packages`` namespace for compatibility reasons.
|
||||
|
||||
No code modification (noted below) should be neccessary any longer.
|
||||
|
||||
2.16.1 (2017-05-27)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Restored the ``requests.packages`` namespace for compatibility reasons.
|
||||
- Bugfix for ``urllib3`` version parsing.
|
||||
|
||||
**Note**: code that was written to import against the ``requests.packages``
|
||||
namespace previously will have to import code that rests at this module-level
|
||||
now.
|
||||
|
||||
For example::
|
||||
|
||||
from requests.packages.urllib3.poolmanager import PoolManager
|
||||
|
||||
Will need to be re-written to be::
|
||||
|
||||
from requests.packages import urllib3
|
||||
urllib3.poolmanager.PoolManager
|
||||
|
||||
Or, even better::
|
||||
|
||||
from urllib3.poolmanager import PoolManager
|
||||
|
||||
2.16.0 (2017-05-26)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Unvendor ALL the things!
|
||||
|
||||
2.15.1 (2017-05-26)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Everyone makes mistakes.
|
||||
|
||||
2.15.0 (2017-05-26)
|
||||
+++++++++++++++++++
|
||||
|
||||
**Improvements**
|
||||
|
||||
- Introduction of the ``Response.next`` property, for getting the next
|
||||
``PreparedResponse`` from a redirect chain (when ``allow_redirects=False``).
|
||||
- Internal refactoring of ``__version__`` module.
|
||||
|
||||
**Bugfixes**
|
||||
|
||||
- Restored once-optional parameter for ``requests.utils.get_environ_proxies()``.
|
||||
|
||||
2.14.2 (2017-05-10)
|
||||
+++++++++++++++++++
|
||||
|
||||
@@ -529,7 +638,7 @@ This is the first release that follows our new release process. For more, see
|
||||
|
||||
- Unicode URL improvements for Python 2.
|
||||
- Re-order JSON param for backwards compat.
|
||||
- Automatically defrag authentication schemes from host/pass URIs. (`#2249 <https://github.com/kennethreitz/requests/issues/2249>`_)
|
||||
- Automatically defrag authentication schemes from host/pass URIs. (`#2249 <https://github.com/requests/requests/issues/2249>`_)
|
||||
|
||||
|
||||
2.4.2 (2014-10-05)
|
||||
@@ -537,19 +646,19 @@ This is the first release that follows our new release process. For more, see
|
||||
|
||||
**Improvements**
|
||||
|
||||
- FINALLY! Add json parameter for uploads! (`#2258 <https://github.com/kennethreitz/requests/pull/2258>`_)
|
||||
- Support for bytestring URLs on Python 3.x (`#2238 <https://github.com/kennethreitz/requests/pull/2238>`_)
|
||||
- FINALLY! Add json parameter for uploads! (`#2258 <https://github.com/requests/requests/pull/2258>`_)
|
||||
- Support for bytestring URLs on Python 3.x (`#2238 <https://github.com/requests/requests/pull/2238>`_)
|
||||
|
||||
**Bugfixes**
|
||||
|
||||
- Avoid getting stuck in a loop (`#2244 <https://github.com/kennethreitz/requests/pull/2244>`_)
|
||||
- Multiple calls to iter* fail with unhelpful error. (`#2240 <https://github.com/kennethreitz/requests/issues/2240>`_, `#2241 <https://github.com/kennethreitz/requests/issues/2241>`_)
|
||||
- Avoid getting stuck in a loop (`#2244 <https://github.com/requests/requests/pull/2244>`_)
|
||||
- Multiple calls to iter* fail with unhelpful error. (`#2240 <https://github.com/requests/requests/issues/2240>`_, `#2241 <https://github.com/requests/requests/issues/2241>`_)
|
||||
|
||||
**Documentation**
|
||||
|
||||
- Correct redirection introduction (`#2245 <https://github.com/kennethreitz/requests/pull/2245/>`_)
|
||||
- Added example of how to send multiple files in one request. (`#2227 <https://github.com/kennethreitz/requests/pull/2227/>`_)
|
||||
- Clarify how to pass a custom set of CAs (`#2248 <https://github.com/kennethreitz/requests/pull/2248/>`_)
|
||||
- Correct redirection introduction (`#2245 <https://github.com/requests/requests/pull/2245/>`_)
|
||||
- Added example of how to send multiple files in one request. (`#2227 <https://github.com/requests/requests/pull/2227/>`_)
|
||||
- Clarify how to pass a custom set of CAs (`#2248 <https://github.com/requests/requests/pull/2248/>`_)
|
||||
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -1,2 +1,2 @@
|
||||
include README.rst LICENSE NOTICE HISTORY.rst requests/cacert.pem
|
||||
include README.rst LICENSE NOTICE HISTORY.rst pytest.ini
|
||||
recursive-include tests *.py
|
||||
|
||||
@@ -1,52 +1,20 @@
|
||||
.PHONY: docs
|
||||
|
||||
init:
|
||||
pip install pipenv
|
||||
pipenv install --dev
|
||||
pipenv run pip install -e .[socks]
|
||||
|
||||
pip install -r requirements.txt
|
||||
test:
|
||||
# This runs all of the tests. To run an individual test, run py.test with
|
||||
# the -k flag, like "py.test -k test_path_is_not_double_encoded"
|
||||
pipenv run py.test tests
|
||||
# This runs all of the tests, on both Python 2 and Python 3.
|
||||
detox
|
||||
ci:
|
||||
py.test -n 8 --boxed --junitxml=report.xml
|
||||
|
||||
test-readme:
|
||||
pipenv run python setup.py check -r -s
|
||||
python setup.py check -r -s
|
||||
|
||||
flake8:
|
||||
flake8 --ignore=E501,F401,E128,E402,E731,F821 requests
|
||||
|
||||
coverage:
|
||||
pipenv run py.test --cov-config .coveragerc --verbose --cov-report term --cov-report xml --cov=requests tests
|
||||
|
||||
certs:
|
||||
curl http://ci.kennethreitz.org/job/ca-bundle/lastSuccessfulBuild/artifact/cacerts.pem -o requests/cacert.pem
|
||||
|
||||
deps: urllib3 chardet idna
|
||||
|
||||
urllib3:
|
||||
git clone -b release https://github.com/shazow/urllib3.git && \
|
||||
rm -fr requests/packages/urllib3 && \
|
||||
cd urllib3 && \
|
||||
git checkout `git describe --abbrev=0 --tags` && \
|
||||
cd .. && \
|
||||
mv urllib3/urllib3 requests/packages/ \
|
||||
&& rm -fr urllib3
|
||||
|
||||
chardet:
|
||||
git clone https://github.com/chardet/chardet.git && \
|
||||
rm -fr requests/packages/chardet && \
|
||||
cd chardet && \
|
||||
git checkout `git describe --abbrev=0 --tags` && \
|
||||
cd .. && \
|
||||
mv chardet/chardet requests/packages/ && \
|
||||
rm -fr chardet
|
||||
|
||||
idna:
|
||||
git clone https://github.com/kjd/idna.git && \
|
||||
rm -fr requests/packages/idna && \
|
||||
cd idna && \
|
||||
git checkout `git describe --abbrev=0 --tags` && \
|
||||
cd .. && \
|
||||
mv idna/idna requests/packages/ && \
|
||||
rm -fr idna
|
||||
py.test --cov-config .coveragerc --verbose --cov-report term --cov-report xml --cov=requests tests
|
||||
|
||||
publish:
|
||||
pip install 'twine>=1.5.0'
|
||||
@@ -56,4 +24,4 @@ publish:
|
||||
|
||||
docs:
|
||||
cd docs && make html
|
||||
@echo "\033[95m\n\nBuild successful! View the docs homepage at docs/_build/html/index.html.\n\033[0m"
|
||||
@echo "\033[95m\n\nBuild successful! View the docs homepage at docs/_build/html/index.html.\n\033[0m"
|
||||
@@ -1,137 +0,0 @@
|
||||
Requests includes some vendorized python libraries to ease installation.
|
||||
|
||||
Urllib3 License
|
||||
===============
|
||||
|
||||
This is the MIT license: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt),
|
||||
Modifications copyright 2012 Kenneth Reitz.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Chardet License
|
||||
===============
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
02110-1301 USA
|
||||
|
||||
|
||||
CA Bundle License
|
||||
=================
|
||||
|
||||
This Source Code Form is subject to the terms of the Mozilla Public
|
||||
License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
Idna License
|
||||
============
|
||||
|
||||
Copyright (c) 2013-2016, Kim Davies. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
#. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
#. Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
#. Neither the name of the copyright holder nor the names of the
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
#. THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS "AS IS" AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGE.
|
||||
|
||||
Portions of the codec implementation and unit tests are derived from the
|
||||
Python standard library, which carries the `Python Software Foundation
|
||||
License <https://docs.python.org/2/license.html>`_:
|
||||
|
||||
Copyright (c) 2001-2014 Python Software Foundation; All Rights Reserved
|
||||
|
||||
Portions of the unit tests are derived from the Unicode standard, which
|
||||
is subject to the Unicode, Inc. License Agreement:
|
||||
|
||||
Copyright (c) 1991-2014 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in
|
||||
<http://www.unicode.org/copyright.html>.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that
|
||||
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software,
|
||||
|
||||
(b) this copyright and permission notice appear in associated
|
||||
documentation, and
|
||||
|
||||
(c) there is clear notice in each modified Data File or in the Software
|
||||
as well as in the documentation associated with the Data File(s) or
|
||||
Software that the data or software has been modified.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
[[source]]
|
||||
url = "https://pypi.python.org/simple"
|
||||
verify_ssl = true
|
||||
|
||||
[dev-packages]
|
||||
pytest = ">=2.8.0"
|
||||
codecov = "*"
|
||||
pytest-httpbin = "==0.0.7"
|
||||
pytest-mock = "*"
|
||||
pytest-cov = "*"
|
||||
alabaster = "*"
|
||||
readme_renderer = "*"
|
||||
Sphinx = "<=1.5.5"
|
||||
PySocks = "*"
|
||||
Generated
-116
@@ -1,116 +0,0 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "a26ef57f42fa3da7d70d1c1decb4e6445a6ff66e47c9094ddc9ab8e1f4c0a5b6"
|
||||
},
|
||||
"requires": {},
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://pypi.python.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {},
|
||||
"develop": {
|
||||
"alabaster": {
|
||||
"version": "==0.7.10"
|
||||
},
|
||||
"appdirs": {
|
||||
"version": "==1.4.3"
|
||||
},
|
||||
"babel": {
|
||||
"version": "==2.4.0"
|
||||
},
|
||||
"click": {
|
||||
"version": "==6.7"
|
||||
},
|
||||
"codecov": {
|
||||
"version": "==2.0.9"
|
||||
},
|
||||
"coverage": {
|
||||
"version": "==4.4.1"
|
||||
},
|
||||
"decorator": {
|
||||
"version": "==4.0.11"
|
||||
},
|
||||
"docutils": {
|
||||
"version": "==0.13.1"
|
||||
},
|
||||
"flask": {
|
||||
"version": "==0.12.2"
|
||||
},
|
||||
"funcsigs": {
|
||||
"version": "==1.0.2"
|
||||
},
|
||||
"httpbin": {
|
||||
"version": "==0.5.0"
|
||||
},
|
||||
"imagesize": {
|
||||
"version": "==0.7.1"
|
||||
},
|
||||
"itsdangerous": {
|
||||
"version": "==0.24"
|
||||
},
|
||||
"jinja2": {
|
||||
"version": "==2.9.6"
|
||||
},
|
||||
"markupsafe": {
|
||||
"version": "==1.0"
|
||||
},
|
||||
"mock": {
|
||||
"version": "==2.0.0"
|
||||
},
|
||||
"packaging": {
|
||||
"version": "==16.8"
|
||||
},
|
||||
"pbr": {
|
||||
"version": "==3.0.0"
|
||||
},
|
||||
"py": {
|
||||
"version": "==1.4.33"
|
||||
},
|
||||
"pygments": {
|
||||
"version": "==2.2.0"
|
||||
},
|
||||
"pyparsing": {
|
||||
"version": "==2.2.0"
|
||||
},
|
||||
"pysocks": {
|
||||
"version": "==1.6.7"
|
||||
},
|
||||
"pytest": {
|
||||
"version": "==3.0.7"
|
||||
},
|
||||
"pytest-cov": {
|
||||
"version": "==2.5.1"
|
||||
},
|
||||
"pytest-httpbin": {
|
||||
"version": "==0.0.7"
|
||||
},
|
||||
"pytest-mock": {
|
||||
"version": "==1.6.0"
|
||||
},
|
||||
"pytz": {
|
||||
"version": "==2017.2"
|
||||
},
|
||||
"requests": {
|
||||
"version": "==2.14.2"
|
||||
},
|
||||
"setuptools": {
|
||||
"version": "==35.0.2"
|
||||
},
|
||||
"six": {
|
||||
"version": "==1.10.0"
|
||||
},
|
||||
"snowballstemmer": {
|
||||
"version": "==1.2.1"
|
||||
},
|
||||
"sphinx": {
|
||||
"version": "==1.5.5"
|
||||
},
|
||||
"werkzeug": {
|
||||
"version": "==0.12.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
+11
-16
@@ -10,16 +10,13 @@ Requests: HTTP for Humans
|
||||
.. image:: https://img.shields.io/pypi/pyversions/requests.svg
|
||||
:target: https://pypi.python.org/pypi/requests
|
||||
|
||||
.. image:: https://travis-ci.org/kennethreitz/requests.svg?branch=master
|
||||
:target: https://travis-ci.org/kennethreitz/requests
|
||||
|
||||
.. image:: https://codecov.io/github/kennethreitz/requests/coverage.svg?branch=master
|
||||
:target: https://codecov.io/github/kennethreitz/requests
|
||||
.. image:: https://codecov.io/github/requests/requests/coverage.svg?branch=master
|
||||
:target: https://codecov.io/github/requests/requests
|
||||
:alt: codecov.io
|
||||
|
||||
.. image:: https://img.shields.io/github/contributors/kennethreitz/requests.svg
|
||||
:target: https://github.com/kennethreitz/requests/graphs/contributors
|
||||
|
||||
|
||||
.. image:: https://img.shields.io/github/contributors/requests/requests.svg
|
||||
:target: https://github.com/requests/requests/graphs/contributors
|
||||
|
||||
.. image:: https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg
|
||||
:target: https://saythanks.io/to/kennethreitz
|
||||
|
||||
@@ -50,15 +47,14 @@ Behold, the power of Requests:
|
||||
|
||||
See `the similar code, sans Requests <https://gist.github.com/973705>`_.
|
||||
|
||||
.. image:: https://raw.githubusercontent.com/kennethreitz/requests/master/docs/_static/requests-logo-small.png
|
||||
.. image:: https://raw.githubusercontent.com/requests/requests/master/docs/_static/requests-logo-small.png
|
||||
:target: http://docs.python-requests.org/
|
||||
|
||||
|
||||
Requests allows you to send *organic, grass-fed* HTTP/1.1 requests, without the
|
||||
need for manual labor. There's no need to manually add query strings to your
|
||||
URLs, or to form-encode your POST data. Keep-alive and HTTP connection pooling
|
||||
are 100% automatic, powered by `urllib3 <https://github.com/shazow/urllib3>`_,
|
||||
which is embedded within Requests.
|
||||
are 100% automatic, thanks to `urllib3 <https://github.com/shazow/urllib3>`_.
|
||||
|
||||
Besides, all the cool kids are doing it. Requests is one of the most
|
||||
downloaded Python packages of all time, pulling in over 11,000,000 downloads
|
||||
@@ -84,7 +80,6 @@ Requests is ready for today's web.
|
||||
- Streaming Downloads
|
||||
- ``.netrc`` Support
|
||||
- Chunked Requests
|
||||
- Thread-safety
|
||||
|
||||
Requests officially supports Python 2.6–2.7 & 3.3–3.7, and runs great on PyPy.
|
||||
|
||||
@@ -114,6 +109,6 @@ How to Contribute
|
||||
#. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
#. Send a pull request and bug the maintainer until it gets merged and published. :) Make sure to add yourself to AUTHORS_.
|
||||
|
||||
.. _`the repository`: http://github.com/kennethreitz/requests
|
||||
.. _AUTHORS: https://github.com/kennethreitz/requests/blob/master/AUTHORS.rst
|
||||
.. _Contributor Friendly: https://github.com/kennethreitz/requests/issues?direction=desc&labels=Contributor+Friendly&page=1&sort=updated&state=open
|
||||
.. _`the repository`: http://github.com/requests/requests
|
||||
.. _AUTHORS: https://github.com/requests/requests/blob/master/AUTHORS.rst
|
||||
.. _Contributor Friendly: https://github.com/requests/requests/issues?direction=desc&labels=Contributor+Friendly&page=1&sort=updated&state=open
|
||||
|
||||
+1
-1
@@ -53,4 +53,4 @@ test_script:
|
||||
- "C:\\MinGW\\bin\\mingw32-make coverage"
|
||||
|
||||
on_success:
|
||||
- "pipenv run codecov -f coverage.xml"
|
||||
- "codecov -f coverage.xml"
|
||||
|
||||
Vendored
+3
-3
@@ -5,7 +5,7 @@
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<iframe src="http://ghbtns.com/github-btn.html?user=kennethreitz&repo=requests&type=watch&count=true&size=large"
|
||||
<iframe src="http://ghbtns.com/github-btn.html?user=requests&repo=requests&type=watch&count=true&size=large"
|
||||
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
|
||||
</p>
|
||||
|
||||
@@ -58,9 +58,9 @@
|
||||
|
||||
<p></p>
|
||||
|
||||
<li><a href="http://github.com/kennethreitz/requests">Requests @ GitHub</a></li>
|
||||
<li><a href="http://github.com/requests/requests">Requests @ GitHub</a></li>
|
||||
<li><a href="http://pypi.python.org/pypi/requests">Requests @ PyPI</a></li>
|
||||
<li><a href="http://github.com/kennethreitz/requests/issues">Issue Tracker</a></li>
|
||||
<li><a href="http://github.com/requests/requests/issues">Issue Tracker</a></li>
|
||||
<li><a href="http://docs.python-requests.org/en/latest/community/updates/#software-updates">Release History</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
Vendored
+1
-1
@@ -4,7 +4,7 @@
|
||||
</a>
|
||||
</p>
|
||||
<p>
|
||||
<iframe src="http://ghbtns.com/github-btn.html?user=kennethreitz&repo=requests&type=watch&count=true&size=large"
|
||||
<iframe src="http://ghbtns.com/github-btn.html?user=requests&repo=requests&type=watch&count=true&size=large"
|
||||
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
|
||||
</p>
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ File an Issue
|
||||
|
||||
If you notice some unexpected behaviour in Requests, or want to see support
|
||||
for a new feature,
|
||||
`file an issue on GitHub <https://github.com/kennethreitz/requests/issues>`_.
|
||||
`file an issue on GitHub <https://github.com/requests/requests/issues>`_.
|
||||
|
||||
|
||||
E-mail
|
||||
|
||||
@@ -12,7 +12,7 @@ GitHub
|
||||
------
|
||||
|
||||
The best way to track the development of Requests is through
|
||||
`the GitHub repo <https://github.com/kennethreitz/requests>`_.
|
||||
`the GitHub repo <https://github.com/requests/requests>`_.
|
||||
|
||||
Twitter
|
||||
-------
|
||||
|
||||
+3
-4
@@ -25,7 +25,6 @@ sys.path.insert(0, os.path.abspath('..'))
|
||||
sys.path.insert(0, os.path.abspath('_themes'))
|
||||
|
||||
import requests
|
||||
from requests import __version__
|
||||
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
@@ -67,9 +66,9 @@ author = u'Kenneth Reitz'
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = __version__
|
||||
version = requests.__version__
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = __version__
|
||||
release = requests.__version__
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
@@ -127,7 +126,7 @@ html_theme = 'alabaster'
|
||||
# documentation.
|
||||
html_theme_options = {
|
||||
'show_powered_by': False,
|
||||
'github_user': 'kennethreitz',
|
||||
'github_user': 'requests',
|
||||
'github_repo': 'requests',
|
||||
'github_banner': True,
|
||||
'show_related': False
|
||||
|
||||
@@ -187,7 +187,7 @@ through the `GitHub issues`_, **both open and closed**, to confirm that the bug
|
||||
hasn't been reported before. Duplicate bug reports are a huge drain on the time
|
||||
of other contributors, and should be avoided as much as possible.
|
||||
|
||||
.. _GitHub issues: https://github.com/kennethreitz/requests/issues
|
||||
.. _GitHub issues: https://github.com/requests/requests/issues
|
||||
|
||||
|
||||
Feature Requests
|
||||
|
||||
@@ -7,9 +7,9 @@ Requests is an open but opinionated library, created by an open but opinionated
|
||||
Management Style
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
`Kenneth Reitz <http://kennethreitz.org>`_ is the BDFL. He has final say in any decision related to the Requests project. Kenneth is responsible for the direction and form of the library. In addition to making decisions based on technical merit, he is responsible for making decisions based on the development philosophy of Requests. Only Kenneth may merge code into Requests.
|
||||
`Kenneth Reitz <http://kennethreitz.org>`_ is the BDFL. He has final say in any decision related to the Requests project. Kenneth is responsible for the direction and form of the library, as well as its presentation. In addition to making decisions based on technical merit, he is responsible for making decisions based on the development philosophy of Requests.
|
||||
|
||||
`Ian Cordasco <http://www.coglib.com/~icordasc/>`_ and `Cory Benfield <https://lukasa.co.uk/about/>`_ are the core contributors. They are responsible for triaging bug reports, reviewing pull requests and ensuring that Kenneth is kept up to speed with developments around the library. The day-to-day managing of the project is done by the core contributors. They are responsible for making judgements about whether or not a feature request is likely to be accepted by Kenneth. They do not have the authority to change code or merge code changes, though they may change documentation. Their word is not final.
|
||||
`Ian Cordasco <http://www.coglib.com/~icordasc/>`_ and `Cory Benfield <https://lukasa.co.uk/about/>`_ are the core contributors. They are responsible for triaging bug reports, reviewing pull requests and ensuring that Kenneth is kept up to speed with developments around the library. The day-to-day managing of the project is done by the core contributors. They are responsible for making judgements about whether or not a feature request is likely to be accepted by Kenneth. Their word is, in some ways, more final than Kenneth's.
|
||||
|
||||
Values
|
||||
~~~~~~
|
||||
|
||||
+9
-16
@@ -6,11 +6,11 @@ Requests is under active development, and contributions are more than welcome!
|
||||
#. Check for open issues or open a fresh issue to start a discussion around a bug.
|
||||
There is a Contributor Friendly tag for issues that should be ideal for people who are not very
|
||||
familiar with the codebase yet.
|
||||
#. Fork `the repository <https://github.com/kennethreitz/requests>`_ on GitHub and start making your
|
||||
#. Fork `the repository <https://github.com/requests/requests>`_ on GitHub and start making your
|
||||
changes to a new branch.
|
||||
#. Write a test which shows that the bug was fixed.
|
||||
#. Send a pull request and bug the maintainer until it gets merged and published. :)
|
||||
Make sure to add yourself to `AUTHORS <https://github.com/kennethreitz/requests/blob/master/AUTHORS.rst>`_.
|
||||
Make sure to add yourself to `AUTHORS <https://github.com/requests/requests/blob/master/AUTHORS.rst>`_.
|
||||
|
||||
Feature Freeze
|
||||
--------------
|
||||
@@ -23,10 +23,11 @@ Development Dependencies
|
||||
|
||||
You'll need to install py.test in order to run the Requests' test suite::
|
||||
|
||||
$ pip install pipenv
|
||||
$ pipenv lock
|
||||
$ pipenv install --dev
|
||||
$ pipenv run py.test tests
|
||||
$ venv .venv
|
||||
$ source .venv/bin/activate
|
||||
|
||||
$ make
|
||||
$ python setup.py test
|
||||
============================= test session starts ==============================
|
||||
platform darwin -- Python 3.4.4, pytest-3.0.6, py-1.4.32, pluggy-0.4.0
|
||||
...
|
||||
@@ -41,6 +42,8 @@ You'll need to install py.test in order to run the Requests' test suite::
|
||||
|
||||
============== 442 passed, 1 skipped, 2 xpassed in 46.48 seconds ===============
|
||||
|
||||
You can also run ``$ make tests`` to run against all supported Python versions, using tox/detox.
|
||||
|
||||
Runtime Environments
|
||||
--------------------
|
||||
|
||||
@@ -59,13 +62,3 @@ with the `Requests-Toolbelt`_.
|
||||
|
||||
.. _Requests-Toolbelt: http://toolbelt.readthedocs.io/
|
||||
|
||||
|
||||
Are you crazy?
|
||||
--------------
|
||||
|
||||
- SPDY support would be awesome. No C extensions.
|
||||
|
||||
Downstream Repackaging
|
||||
----------------------
|
||||
|
||||
If you are repackaging Requests, please note that you must also redistribute the ``cacerts.pem`` file in order to get correct SSL functionality.
|
||||
|
||||
+7
-12
@@ -17,11 +17,8 @@ Release v\ |version|. (:ref:`Installation <install>`)
|
||||
.. image:: https://img.shields.io/pypi/pyversions/requests.svg
|
||||
:target: https://pypi.python.org/pypi/requests
|
||||
|
||||
.. image:: https://travis-ci.org/kennethreitz/requests.svg?branch=master
|
||||
:target: https://travis-ci.org/kennethreitz/requests
|
||||
|
||||
.. image:: https://codecov.io/github/kennethreitz/requests/coverage.svg?branch=master
|
||||
:target: https://codecov.io/github/kennethreitz/requests
|
||||
.. image:: https://codecov.io/github/requests/requests/coverage.svg?branch=master
|
||||
:target: https://codecov.io/github/requests/requests
|
||||
:alt: codecov.io
|
||||
|
||||
.. image:: https://img.shields.io/badge/Say%20Thanks!-🦉-1EAEDB.svg
|
||||
@@ -57,15 +54,14 @@ See `similar code, sans Requests <https://gist.github.com/973705>`_.
|
||||
**Requests** allows you to send *organic, grass-fed* HTTP/1.1 requests, without the
|
||||
need for manual labor. There's no need to manually add query strings to your
|
||||
URLs, or to form-encode your POST data. Keep-alive and HTTP connection pooling
|
||||
are 100% automatic, powered by `urllib3 <https://github.com/shazow/urllib3>`_,
|
||||
which is embedded within Requests.
|
||||
are 100% automatic, thanks to `urllib3 <https://github.com/shazow/urllib3>`_.
|
||||
|
||||
User Testimonials
|
||||
-----------------
|
||||
|
||||
Reddit, The NSA, Her Majesty's Government, Amazon, Google, Twilio, Runscope, Mozilla, Heroku,
|
||||
Twitter, Spotify, Microsoft, Amazon, Lyft, BuzzFeed, Reddit, The NSA, Her Majesty's Government, Google, Twilio, Runscope, Mozilla, Heroku,
|
||||
PayPal, NPR, Obama for America, Transifex, Native Instruments, The Washington
|
||||
Post, Twitter, SoundCloud, Kippt, Sony, and Federal U.S.
|
||||
Post, SoundCloud, Kippt, Sony, and Federal U.S.
|
||||
Institutions that prefer to be unnamed claim to use Requests internally.
|
||||
|
||||
**Armin Ronacher**—
|
||||
@@ -73,12 +69,12 @@ Institutions that prefer to be unnamed claim to use Requests internally.
|
||||
right level of abstraction.*
|
||||
|
||||
**Matt DeBoard**—
|
||||
*I'm going to get `@kennethreitz <https://twitter.com/kennethreitz>`_'s Python requests module tattooed
|
||||
*I'm going to get Kenneth Reitz's Python requests module tattooed
|
||||
on my body, somehow. The whole thing.*
|
||||
|
||||
**Daniel Greenfeld**—
|
||||
*Nuked a 1200 LOC spaghetti code library with 10 lines of code thanks to
|
||||
`@kennethreitz <https://twitter.com/kennethreitz>`_'s request library. Today has been AWESOME.*
|
||||
Kenneth Reitz's request library. Today has been AWESOME.*
|
||||
|
||||
**Kenny Meyers**—
|
||||
*Python HTTP: When in doubt, or when not in doubt, use Requests. Beautiful,
|
||||
@@ -107,7 +103,6 @@ Requests is ready for today's web.
|
||||
- Connection Timeouts
|
||||
- Chunked Requests
|
||||
- ``.netrc`` Support
|
||||
- Thread-safety
|
||||
|
||||
Requests officially supports Python 2.6–2.7 & 3.3–3.7, and runs great on PyPy.
|
||||
|
||||
|
||||
@@ -225,7 +225,7 @@ Requests can also ignore verifying the SSL certificate if you set ``verify`` to
|
||||
|
||||
By default, ``verify`` is set to True. Option ``verify`` only applies to host certs.
|
||||
|
||||
Client side certificates
|
||||
Client Side Certificates
|
||||
------------------------
|
||||
|
||||
You can also specify a local cert to use as client side certificate, as a single
|
||||
@@ -280,7 +280,7 @@ immediately. You can override this behaviour and defer downloading the response
|
||||
body until you access the :attr:`Response.content <requests.Response.content>`
|
||||
attribute with the ``stream`` parameter::
|
||||
|
||||
tarball_url = 'https://github.com/kennethreitz/requests/tarball/master'
|
||||
tarball_url = 'https://github.com/requests/requests/tarball/master'
|
||||
r = requests.get(tarball_url, stream=True)
|
||||
|
||||
At this point only the response headers have been downloaded and the connection
|
||||
@@ -642,7 +642,7 @@ from GitHub. Suppose we wanted commit ``a050faf`` on Requests. We would get it
|
||||
like so::
|
||||
|
||||
>>> import requests
|
||||
>>> r = requests.get('https://api.github.com/repos/kennethreitz/requests/git/commits/a050faf084662f3a352dd1a941f2c7c9f886d4ad')
|
||||
>>> r = requests.get('https://api.github.com/repos/requests/requests/git/commits/a050faf084662f3a352dd1a941f2c7c9f886d4ad')
|
||||
|
||||
We should confirm that GitHub responded correctly. If it has, we want to work
|
||||
out what type of content it is. Do this like so::
|
||||
@@ -697,12 +697,12 @@ we should probably avoid making ham-handed POSTS to it. Instead, let's play
|
||||
with the Issues feature of GitHub.
|
||||
|
||||
This documentation was added in response to
|
||||
`Issue #482 <https://github.com/kennethreitz/requests/issues/482>`_. Given that
|
||||
`Issue #482 <https://github.com/requests/requests/issues/482>`_. Given that
|
||||
this issue already exists, we will use it as an example. Let's start by getting it.
|
||||
|
||||
::
|
||||
|
||||
>>> r = requests.get('https://api.github.com/repos/kennethreitz/requests/issues/482')
|
||||
>>> r = requests.get('https://api.github.com/repos/requests/requests/issues/482')
|
||||
>>> r.status_code
|
||||
200
|
||||
|
||||
@@ -745,7 +745,7 @@ is to POST to the thread. Let's do it.
|
||||
::
|
||||
|
||||
>>> body = json.dumps({u"body": u"Sounds great! I'll get right on it!"})
|
||||
>>> url = u"https://api.github.com/repos/kennethreitz/requests/issues/482/comments"
|
||||
>>> url = u"https://api.github.com/repos/requests/requests/issues/482/comments"
|
||||
|
||||
>>> r = requests.post(url=url, data=body)
|
||||
>>> r.status_code
|
||||
@@ -779,7 +779,7 @@ that.
|
||||
5804413
|
||||
|
||||
>>> body = json.dumps({u"body": u"Sounds great! I'll get right on it once I feed my cat."})
|
||||
>>> url = u"https://api.github.com/repos/kennethreitz/requests/issues/comments/5804413"
|
||||
>>> url = u"https://api.github.com/repos/requests/requests/issues/comments/5804413"
|
||||
|
||||
>>> r = requests.patch(url=url, data=body, auth=auth)
|
||||
>>> r.status_code
|
||||
|
||||
+11
-10
@@ -1,14 +1,14 @@
|
||||
.. _install:
|
||||
|
||||
Installation
|
||||
============
|
||||
Installation of Requests
|
||||
========================
|
||||
|
||||
This part of the documentation covers the installation of Requests.
|
||||
The first step to using any software package is getting it properly installed.
|
||||
|
||||
|
||||
Pip Install Requests
|
||||
--------------------
|
||||
$ pip install requests
|
||||
----------------------
|
||||
|
||||
To install Requests, simply run this simple command in your terminal of choice::
|
||||
|
||||
@@ -22,18 +22,19 @@ Get the Source Code
|
||||
-------------------
|
||||
|
||||
Requests is actively developed on GitHub, where the code is
|
||||
`always available <https://github.com/kennethreitz/requests>`_.
|
||||
`always available <https://github.com/requests/requests>`_.
|
||||
|
||||
You can either clone the public repository::
|
||||
|
||||
$ git clone git://github.com/kennethreitz/requests.git
|
||||
$ git clone git://github.com/requests/requests.git
|
||||
|
||||
Or, download the `tarball <https://github.com/kennethreitz/requests/tarball/master>`_::
|
||||
Or, download the `tarball <https://github.com/requests/requests/tarball/master>`_::
|
||||
|
||||
$ curl -OL https://github.com/kennethreitz/requests/tarball/master
|
||||
# optionally, zipball is also available (for Windows users).
|
||||
$ curl -OL https://github.com/requests/requests/tarball/master
|
||||
# optionally, zipball is also available (for Windows users).
|
||||
|
||||
Once you have a copy of the source, you can embed it in your own Python
|
||||
package, or install it into your site-packages easily::
|
||||
|
||||
$ python setup.py install
|
||||
$ cd requests
|
||||
$ pip install .
|
||||
|
||||
@@ -234,7 +234,24 @@ dictionary of data will automatically be form-encoded when the request is made::
|
||||
...
|
||||
}
|
||||
|
||||
There are many times that you want to send data that is not form-encoded. If
|
||||
You can also pass a list of tuples to the ``data`` argument. This is particularly
|
||||
useful when the form has multiple elements that use the same key::
|
||||
|
||||
>>> payload = (('key1', 'value1'), ('key1', 'value2'))
|
||||
>>> r = requests.post('http://httpbin.org/post', data=payload)
|
||||
>>> print(r.text)
|
||||
{
|
||||
...
|
||||
"form": {
|
||||
"key1": [
|
||||
"value1",
|
||||
"value2"
|
||||
]
|
||||
},
|
||||
...
|
||||
}
|
||||
|
||||
There are times that you may want to send data that is not form-encoded. If
|
||||
you pass in a ``string`` instead of a ``dict``, that data will be posted directly.
|
||||
|
||||
For example, the GitHub API v3 accepts JSON-Encoded POST/PATCH data::
|
||||
@@ -489,7 +506,7 @@ Timeouts
|
||||
--------
|
||||
|
||||
You can tell Requests to stop waiting for a response after a given number of
|
||||
seconds with the ``timeout`` parameter. Nearly all production code should use
|
||||
seconds with the ``timeout`` parameter. Nearly all production code should use
|
||||
this parameter in nearly all requests. Failure to do so can cause your program
|
||||
to hang indefinitely::
|
||||
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
[pytest]
|
||||
addopts = -p no:warnings
|
||||
+37
-10
@@ -6,7 +6,7 @@
|
||||
# /
|
||||
|
||||
"""
|
||||
Requests HTTP library
|
||||
Requests HTTP Library
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Requests is an HTTP library, written in Python, for human beings. Basic GET
|
||||
@@ -36,20 +36,42 @@ usage:
|
||||
The other HTTP methods are supported - see `requests.api`. Full documentation
|
||||
is at <http://python-requests.org>.
|
||||
|
||||
:copyright: (c) 2016 by Kenneth Reitz.
|
||||
:copyright: (c) 2017 by Kenneth Reitz.
|
||||
:license: Apache 2.0, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
__title__ = 'requests'
|
||||
__version__ = '3.0.0'
|
||||
__build__ = 0x030000
|
||||
__author__ = 'Kenneth Reitz'
|
||||
__license__ = 'Apache 2.0'
|
||||
__copyright__ = 'Copyright 2016 Kenneth Reitz'
|
||||
# Check urllib3 for compatibility.
|
||||
import urllib3
|
||||
urllib3_version = urllib3.__version__.split('.')
|
||||
# Sometimes, urllib3 only reports its version as 16.1.
|
||||
if len(urllib3_version) == 2:
|
||||
urllib3_version.append('0')
|
||||
major, minor, patch = urllib3_version
|
||||
major, minor, patch = int(major), int(minor), int(patch)
|
||||
# urllib3 >= 1.21.1, < 1.22
|
||||
try:
|
||||
assert major == 1
|
||||
assert minor >= 21
|
||||
assert minor <= 22
|
||||
except AssertionError:
|
||||
raise RuntimeError('Requests dependency \'urllib3\' must be version >= 1.21.1, < 1.22!')
|
||||
|
||||
|
||||
# Check chardet for compatibility.
|
||||
import chardet
|
||||
major, minor, patch = chardet.__version__.split('.')[:3]
|
||||
major, minor, patch = int(major), int(minor), int(patch)
|
||||
# chardet >= 3.0.2, < 3.1.0
|
||||
try:
|
||||
assert major == 3
|
||||
assert minor < 1
|
||||
assert patch >= 2
|
||||
except AssertionError:
|
||||
raise RuntimeError('Requests dependency \'chardet\' must be version >= 3.0.2, < 3.1.0!')
|
||||
|
||||
# Attempt to enable urllib3's SNI support, if possible
|
||||
try:
|
||||
from .packages.urllib3.contrib import pyopenssl
|
||||
from urllib3.contrib import pyopenssl
|
||||
pyopenssl.inject_into_urllib3()
|
||||
except ImportError:
|
||||
pass
|
||||
@@ -57,10 +79,15 @@ except ImportError:
|
||||
import warnings
|
||||
|
||||
# urllib3's DependencyWarnings should be silenced.
|
||||
from .packages.urllib3.exceptions import DependencyWarning
|
||||
from urllib3.exceptions import DependencyWarning
|
||||
warnings.simplefilter('ignore', DependencyWarning)
|
||||
|
||||
from .__version__ import __title__, __description__, __url__, __version__
|
||||
from .__version__ import __build__, __author__, __author_email__, __license__
|
||||
from .__version__ import __copyright__, __cake__
|
||||
|
||||
from . import utils
|
||||
from . import packages
|
||||
from .models import Request, Response, PreparedRequest
|
||||
from .api import request, get, head, post, patch, put, delete, options
|
||||
from .sessions import session, Session
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
# .-. .-. .-. . . .-. .-. .-. .-.
|
||||
# |( |- |.| | | |- `-. | `-.
|
||||
# ' ' `-' `-`.`-' `-' `-' ' `-'
|
||||
|
||||
__title__ = 'requests'
|
||||
__description__ = 'Python HTTP for Humans.'
|
||||
__url__ = 'http://python-requests.org'
|
||||
__version__ = '3.0.0'
|
||||
__build__ = 0x030000
|
||||
__author__ = 'Kenneth Reitz'
|
||||
__author_email__ = 'me@kennethreitz.org'
|
||||
__license__ = 'Apache 2.0'
|
||||
__copyright__ = 'Copyright 2017 Kenneth Reitz'
|
||||
__cake__ = u'\u2728 \U0001f370 \u2728'
|
||||
+18
-17
@@ -15,33 +15,34 @@ try:
|
||||
except ImportError: # threading is an optional module and may not be present.
|
||||
from dummy_threading import RLock
|
||||
|
||||
from urllib3.poolmanager import PoolManager, proxy_from_url
|
||||
from urllib3.response import HTTPResponse
|
||||
from urllib3.util import Timeout as TimeoutSauce
|
||||
from urllib3.util.retry import Retry
|
||||
from urllib3.exceptions import ClosedPoolError
|
||||
from urllib3.exceptions import ConnectTimeoutError
|
||||
from urllib3.exceptions import HTTPError as _HTTPError
|
||||
from urllib3.exceptions import MaxRetryError
|
||||
from urllib3.exceptions import NewConnectionError
|
||||
from urllib3.exceptions import ProxyError as _ProxyError
|
||||
from urllib3.exceptions import ProtocolError
|
||||
from urllib3.exceptions import ReadTimeoutError
|
||||
from urllib3.exceptions import SSLError as _SSLError
|
||||
from urllib3.exceptions import ResponseError
|
||||
|
||||
from .models import Response
|
||||
from .packages.urllib3.poolmanager import PoolManager, proxy_from_url
|
||||
from .packages.urllib3.response import HTTPResponse
|
||||
from .packages.urllib3.util import Timeout as TimeoutSauce
|
||||
from .packages.urllib3.util.retry import Retry
|
||||
from .compat import urlparse, basestring
|
||||
from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers,
|
||||
prepend_scheme_if_needed, get_auth_from_url, urldefragauth,
|
||||
select_proxy)
|
||||
from .structures import CaseInsensitiveDict
|
||||
from .packages.urllib3.exceptions import ClosedPoolError
|
||||
from .packages.urllib3.exceptions import ConnectTimeoutError
|
||||
from .packages.urllib3.exceptions import HTTPError as _HTTPError
|
||||
from .packages.urllib3.exceptions import MaxRetryError
|
||||
from .packages.urllib3.exceptions import NewConnectionError
|
||||
from .packages.urllib3.exceptions import ProxyError as _ProxyError
|
||||
from .packages.urllib3.exceptions import ProtocolError
|
||||
from .packages.urllib3.exceptions import ReadTimeoutError
|
||||
from .packages.urllib3.exceptions import SSLError as _SSLError
|
||||
from .packages.urllib3.exceptions import ResponseError
|
||||
from .cookies import extract_cookies_to_jar
|
||||
from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError,
|
||||
ProxyError, RetryError, InvalidScheme)
|
||||
from .auth import _basic_auth_str
|
||||
|
||||
try:
|
||||
from .packages.urllib3.contrib.socks import SOCKSProxyManager
|
||||
from urllib3.contrib.socks import SOCKSProxyManager
|
||||
except ImportError:
|
||||
def SOCKSProxyManager(*args, **kwargs):
|
||||
raise InvalidScheme("Missing dependencies for SOCKS support.")
|
||||
@@ -176,7 +177,7 @@ class HTTPAdapter(BaseAdapter):
|
||||
:param proxy: The proxy to return a urllib3 ProxyManager for.
|
||||
:param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager.
|
||||
:returns: ProxyManager
|
||||
:rtype: requests.packages.urllib3.ProxyManager
|
||||
:rtype: urllib3.ProxyManager
|
||||
"""
|
||||
if proxy in self.proxy_manager:
|
||||
manager = self.proxy_manager[proxy]
|
||||
@@ -306,7 +307,7 @@ class HTTPAdapter(BaseAdapter):
|
||||
|
||||
:param url: The URL to connect to.
|
||||
:param proxies: (optional) A Requests-style dictionary of proxies used on this request.
|
||||
:rtype: requests.packages.urllib3.ConnectionPool
|
||||
:rtype: urllib3.ConnectionPool
|
||||
"""
|
||||
with self._pool_kw_lock:
|
||||
if url.lower().startswith('https'):
|
||||
|
||||
+1
-1
@@ -141,7 +141,7 @@ def patch(url, data=None, **kwargs):
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
return request('patch', url, data=data, **kwargs)
|
||||
return request('patch', url, data=data, **kwargs)
|
||||
|
||||
|
||||
def delete(url, **kwargs):
|
||||
|
||||
+2
-2
@@ -163,7 +163,7 @@ class HTTPDigestAuth(AuthBase):
|
||||
elif qop == 'auth' or 'auth' in qop.split(','):
|
||||
noncebit = "%s:%s:%s:%s:%s" % (
|
||||
nonce, ncvalue, cnonce, 'auth', HA2
|
||||
)
|
||||
)
|
||||
respdig = KD(HA1, noncebit)
|
||||
else:
|
||||
# XXX handle auth-int.
|
||||
@@ -198,7 +198,7 @@ class HTTPDigestAuth(AuthBase):
|
||||
"""
|
||||
|
||||
# If response is not 4xx, do not auth
|
||||
# See https://github.com/kennethreitz/requests/issues/3772
|
||||
# See https://github.com/requests/requests/issues/3772
|
||||
if not 400 <= r.status_code < 500:
|
||||
self._thread_local.num_401_calls = 1
|
||||
return r
|
||||
|
||||
-5660
File diff suppressed because it is too large
Load Diff
+3
-10
@@ -5,21 +5,14 @@
|
||||
requests.certs
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
This module returns the preferred default CA certificate bundle.
|
||||
This module returns the preferred default CA certificate bundle. There is
|
||||
only one — the one from the certifi package.
|
||||
|
||||
If you are packaging Requests, e.g., for a Linux distribution or a managed
|
||||
environment, you can change the definition of where() to return a separately
|
||||
packaged CA bundle.
|
||||
"""
|
||||
import os.path
|
||||
|
||||
try:
|
||||
from certifi import where
|
||||
except ImportError:
|
||||
def where():
|
||||
"""Return the preferred certificate bundle."""
|
||||
# vendored bundle inside Requests
|
||||
return os.path.join(os.path.dirname(__file__), 'cacert.pem')
|
||||
from certifi import where
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(where())
|
||||
|
||||
+3
-2
@@ -8,7 +8,7 @@ This module handles import compatibility issues between Python 2 and
|
||||
Python 3.
|
||||
"""
|
||||
|
||||
from .packages import chardet
|
||||
import chardet
|
||||
|
||||
import sys
|
||||
|
||||
@@ -39,7 +39,8 @@ if is_py2:
|
||||
import cookielib
|
||||
from Cookie import Morsel
|
||||
from StringIO import StringIO
|
||||
from .packages.urllib3.packages.ordered_dict import OrderedDict
|
||||
|
||||
from urllib3.packages.ordered_dict import OrderedDict
|
||||
|
||||
builtin_str = str
|
||||
bytes = str
|
||||
|
||||
+4
-2
@@ -306,8 +306,10 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
|
||||
"""
|
||||
dictionary = {}
|
||||
for cookie in iter(self):
|
||||
if (domain is None or cookie.domain == domain) and (path is None
|
||||
or cookie.path == path):
|
||||
if (
|
||||
(domain is None or cookie.domain == domain) and
|
||||
(path is None or cookie.path == path)
|
||||
):
|
||||
dictionary[cookie.name] = cookie.value
|
||||
return dictionary
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ requests.exceptions
|
||||
|
||||
This module contains the set of Requests' exceptions.
|
||||
"""
|
||||
from .packages.urllib3.exceptions import HTTPError as BaseHTTPError
|
||||
from urllib3.exceptions import HTTPError as BaseHTTPError
|
||||
|
||||
|
||||
class RequestException(IOError):
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
"""Module containing bug report helper(s)."""
|
||||
from __future__ import print_function
|
||||
|
||||
import json
|
||||
import platform
|
||||
import sys
|
||||
import ssl
|
||||
|
||||
import urllib3
|
||||
import chardet
|
||||
|
||||
from . import __version__ as requests_version
|
||||
|
||||
try:
|
||||
from .packages.urllib3.contrib import pyopenssl
|
||||
except ImportError:
|
||||
pyopenssl = None
|
||||
OpenSSL = None
|
||||
cryptography = None
|
||||
else:
|
||||
import OpenSSL
|
||||
import cryptography
|
||||
|
||||
|
||||
def _implementation():
|
||||
"""Return a dict with the Python implementation and verison.
|
||||
|
||||
Provide both the name and the version of the Python implementation
|
||||
currently running. For example, on CPython 2.7.5 it will return
|
||||
{'name': 'CPython', 'version': '2.7.5'}.
|
||||
|
||||
This function works best on CPython and PyPy: in particular, it probably
|
||||
doesn't work for Jython or IronPython. Future investigation should be done
|
||||
to work out the correct shape of the code for those platforms.
|
||||
"""
|
||||
implementation = platform.python_implementation()
|
||||
|
||||
if implementation == 'CPython':
|
||||
implementation_version = platform.python_version()
|
||||
elif implementation == 'PyPy':
|
||||
implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major,
|
||||
sys.pypy_version_info.minor,
|
||||
sys.pypy_version_info.micro)
|
||||
if sys.pypy_version_info.releaselevel != 'final':
|
||||
implementation_version = ''.join([
|
||||
implementation_version, sys.pypy_version_info.releaselevel
|
||||
])
|
||||
elif implementation == 'Jython':
|
||||
implementation_version = platform.python_version() # Complete Guess
|
||||
elif implementation == 'IronPython':
|
||||
implementation_version = platform.python_version() # Complete Guess
|
||||
else:
|
||||
implementation_version = 'Unknown'
|
||||
|
||||
return {'name': implementation, 'version': implementation_version}
|
||||
|
||||
|
||||
def info():
|
||||
"""Generate information for a bug report."""
|
||||
try:
|
||||
platform_info = {
|
||||
'system': platform.system(),
|
||||
'release': platform.release(),
|
||||
}
|
||||
except IOError:
|
||||
platform_info = {
|
||||
'system': 'Unknown',
|
||||
'release': 'Unknown',
|
||||
}
|
||||
|
||||
implementation_info = _implementation()
|
||||
urllib3_info = {'version': urllib3.__version__}
|
||||
chardet_info = {'version': chardet.__version__}
|
||||
|
||||
pyopenssl_info = {
|
||||
'version': None,
|
||||
'openssl_version': '',
|
||||
}
|
||||
if OpenSSL:
|
||||
pyopenssl_info = {
|
||||
'version': OpenSSL.__version__,
|
||||
'openssl_version': '%x' % OpenSSL.SSL.OPENSSL_VERSION_NUMBER,
|
||||
}
|
||||
cryptography_info = {
|
||||
'version': getattr(cryptography, '__version__', ''),
|
||||
}
|
||||
|
||||
return {
|
||||
'platform': platform_info,
|
||||
'implementation': implementation_info,
|
||||
'system_ssl': {
|
||||
'version': '%x' % ssl.OPENSSL_VERSION_NUMBER,
|
||||
},
|
||||
'using_pyopenssl': pyopenssl is not None,
|
||||
'pyOpenSSL': pyopenssl_info,
|
||||
'urllib3': urllib3_info,
|
||||
'chardet': chardet_info,
|
||||
'cryptography': cryptography_info,
|
||||
'requests': {
|
||||
'version': requests_version,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Pretty-print the bug information as JSON."""
|
||||
print(json.dumps(info(), sort_keys=True, indent=2))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
+25
-23
@@ -14,9 +14,15 @@ import sys
|
||||
|
||||
# Import encoding now, to avoid implicit import later.
|
||||
# Implicit import within threads may cause LookupError when standard library is in a ZIP,
|
||||
# such as in Embedded Python. See https://github.com/kennethreitz/requests/issues/3578.
|
||||
# such as in Embedded Python. See https://github.com/requests/requests/issues/3578.
|
||||
import encodings.idna
|
||||
|
||||
from urllib3.fields import RequestField
|
||||
from urllib3.filepost import encode_multipart_formdata
|
||||
from urllib3.util import parse_url
|
||||
from urllib3.exceptions import (
|
||||
DecodeError, ReadTimeoutError, ProtocolError, LocationParseError)
|
||||
|
||||
from io import UnsupportedOperation
|
||||
from .hooks import default_hooks
|
||||
from .structures import CaseInsensitiveDict
|
||||
@@ -24,12 +30,6 @@ from .structures import CaseInsensitiveDict
|
||||
import requests
|
||||
from .auth import HTTPBasicAuth
|
||||
from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar
|
||||
from .packages.urllib3.fields import RequestField
|
||||
from .packages.urllib3.filepost import encode_multipart_formdata
|
||||
from .packages.urllib3.util import parse_url
|
||||
from .packages.urllib3.exceptions import (
|
||||
DecodeError, ReadTimeoutError, ProtocolError,
|
||||
LocationParseError, ConnectionError)
|
||||
from .exceptions import (
|
||||
HTTPError, MissingScheme, InvalidURL, ChunkedEncodingError,
|
||||
ContentDecodingError, ConnectionError, StreamConsumedError,
|
||||
@@ -219,8 +219,9 @@ class Request(RequestHooksMixin):
|
||||
<PreparedRequest [GET]>
|
||||
"""
|
||||
|
||||
def __init__(self, method=None, url=None, headers=None, files=None,
|
||||
data=None, params=None, auth=None, cookies=None, hooks=None, json=None):
|
||||
def __init__(self,
|
||||
method=None, url=None, headers=None, files=None, data=None,
|
||||
params=None, auth=None, cookies=None, hooks=None, json=None):
|
||||
|
||||
# Default empty dicts for dict params.
|
||||
data = [] if data is None else data
|
||||
@@ -299,8 +300,9 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
|
||||
#: integer denoting starting position of a readable file-like body.
|
||||
self._body_position = None
|
||||
|
||||
def prepare(self, method=None, url=None, headers=None, files=None,
|
||||
data=None, params=None, auth=None, cookies=None, hooks=None, json=None):
|
||||
def prepare(self,
|
||||
method=None, url=None, headers=None, files=None, data=None,
|
||||
params=None, auth=None, cookies=None, hooks=None, json=None):
|
||||
"""Prepares the entire request with the given parameters."""
|
||||
|
||||
self.prepare_method(method)
|
||||
@@ -335,17 +337,11 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
|
||||
self.method = method
|
||||
if self.method is None:
|
||||
raise ValueError('Request method cannot be "None"')
|
||||
self.method = to_native_string(self.method).upper()
|
||||
self.method = to_native_string(self.method.upper())
|
||||
|
||||
@staticmethod
|
||||
def _get_idna_encoded_host(host):
|
||||
try:
|
||||
from .packages import idna
|
||||
except ImportError:
|
||||
# tolerate the possibility of downstream repackagers unvendoring `requests`
|
||||
# For more information, read: packages/__init__.py
|
||||
import idna
|
||||
sys.modules['requests.packages.idna'] = idna
|
||||
import idna
|
||||
|
||||
try:
|
||||
host = idna.encode(host, uts46=True).decode('utf-8')
|
||||
@@ -359,7 +355,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
|
||||
#: We're unable to blindly call unicode/str functions
|
||||
#: as this will include the bytestring indicator (b'')
|
||||
#: on python 3.x.
|
||||
#: https://github.com/kennethreitz/requests/pull/2238
|
||||
#: https://github.com/requests/requests/pull/2238
|
||||
if isinstance(url, bytes):
|
||||
url = url.decode('utf8')
|
||||
else:
|
||||
@@ -506,7 +502,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
|
||||
|
||||
def prepare_content_length(self, body):
|
||||
"""Prepares Content-Length header.
|
||||
|
||||
|
||||
If the length of the body of the request can be computed, Content-Length
|
||||
is set using ``super_len``. If user has manually set either a
|
||||
Transfer-Encoding or Content-Length header when it should not be set
|
||||
@@ -606,6 +602,7 @@ class Response(object):
|
||||
|
||||
self._content = False
|
||||
self._content_consumed = False
|
||||
self._next = None
|
||||
|
||||
#: Integer Code of responded HTTP Status, e.g. 404 or 200.
|
||||
self.status_code = None
|
||||
@@ -700,12 +697,17 @@ class Response(object):
|
||||
|
||||
@property
|
||||
def is_permanent_redirect(self):
|
||||
"""True if this Response one of the permanent versions of redirect"""
|
||||
"""True if this Response one of the permanent versions of redirect."""
|
||||
return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect))
|
||||
|
||||
@property
|
||||
def next(self):
|
||||
"""Returns a PreparedRequest for the next request in a redirect chain, if there is one."""
|
||||
return self._next
|
||||
|
||||
@property
|
||||
def apparent_encoding(self):
|
||||
"""The apparent encoding, provided by the chardet library"""
|
||||
"""The apparent encoding, provided by the chardet library."""
|
||||
return chardet.detect(self.content)['encoding']
|
||||
|
||||
def iter_content(self, chunk_size=1, decode_unicode=False):
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
import sys
|
||||
|
||||
# This code exists for backwards compatibility reasons.
|
||||
# I don't like it either. Just look the other way. :)
|
||||
|
||||
for package in ('urllib3', 'idna', 'chardet'):
|
||||
locals()[package] = __import__(package)
|
||||
# This traversal is apparently necessary such that the identities are
|
||||
# preserved (requests.packages.urllib3.* is urllib3.*)
|
||||
for mod in list(sys.modules):
|
||||
if mod == package or mod.startswith(package + '.'):
|
||||
sys.modules['requests.packages.' + mod] = sys.modules[mod]
|
||||
|
||||
# Kinda cool, though, right?
|
||||
@@ -1,13 +0,0 @@
|
||||
If you are planning to submit a pull request to requests with any changes in
|
||||
this library do not go any further. These are independent libraries which we
|
||||
vendor into requests. Any changes necessary to these libraries must be made in
|
||||
them and submitted as separate pull requests to those libraries.
|
||||
|
||||
urllib3 pull requests go here: https://github.com/shazow/urllib3
|
||||
|
||||
chardet pull requests go here: https://github.com/chardet/chardet
|
||||
|
||||
idna pull requests go here: https://github.com/kjd/idna
|
||||
|
||||
See https://github.com/kennethreitz/requests/pull/1812#issuecomment-30854316
|
||||
for the reasoning behind this.
|
||||
@@ -1,36 +0,0 @@
|
||||
'''
|
||||
Debian and other distributions "unbundle" requests' vendored dependencies, and
|
||||
rewrite all imports to use the global versions of ``urllib3`` and ``chardet``.
|
||||
The problem with this is that not only requests itself imports those
|
||||
dependencies, but third-party code outside of the distros' control too.
|
||||
|
||||
In reaction to these problems, the distro maintainers replaced
|
||||
``requests.packages`` with a magical "stub module" that imports the correct
|
||||
modules. The implementations were varying in quality and all had severe
|
||||
problems. For example, a symlink (or hardlink) that links the correct modules
|
||||
into place introduces problems regarding object identity, since you now have
|
||||
two modules in `sys.modules` with the same API, but different identities::
|
||||
|
||||
requests.packages.urllib3 is not urllib3
|
||||
|
||||
With version ``2.5.2``, requests started to maintain its own stub, so that
|
||||
distro-specific breakage would be reduced to a minimum, even though the whole
|
||||
issue is not requests' fault in the first place. See
|
||||
https://github.com/kennethreitz/requests/pull/2375 for the corresponding pull
|
||||
request.
|
||||
'''
|
||||
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
|
||||
try:
|
||||
from . import urllib3
|
||||
except ImportError:
|
||||
import urllib3
|
||||
sys.modules['%s.urllib3' % __name__] = urllib3
|
||||
|
||||
try:
|
||||
from . import chardet
|
||||
except ImportError:
|
||||
import chardet
|
||||
sys.modules['%s.chardet' % __name__] = chardet
|
||||
@@ -1,39 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
from .compat import PY2, PY3
|
||||
from .universaldetector import UniversalDetector
|
||||
from .version import __version__, VERSION
|
||||
|
||||
|
||||
def detect(byte_str):
|
||||
"""
|
||||
Detect the encoding of the given byte string.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:type byte_str: ``bytes`` or ``bytearray``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
raise TypeError('Expected object of type bytes or bytearray, got: '
|
||||
'{0}'.format(type(byte_str)))
|
||||
else:
|
||||
byte_str = bytearray(byte_str)
|
||||
detector = UniversalDetector()
|
||||
detector.feed(byte_str)
|
||||
return detector.close()
|
||||
@@ -1,386 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Big5 frequency table
|
||||
# by Taiwan's Mandarin Promotion Council
|
||||
# <http://www.edu.tw:81/mandr/>
|
||||
#
|
||||
# 128 --> 0.42261
|
||||
# 256 --> 0.57851
|
||||
# 512 --> 0.74851
|
||||
# 1024 --> 0.89384
|
||||
# 2048 --> 0.97583
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98
|
||||
# Random Distribution Ration = 512/(5401-512)=0.105
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
|
||||
|
||||
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
#Char to FreqOrder table
|
||||
BIG5_TABLE_SIZE = 5376
|
||||
|
||||
BIG5_CHAR_TO_FREQ_ORDER = (
|
||||
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
|
||||
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
|
||||
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
|
||||
63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64
|
||||
3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80
|
||||
4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96
|
||||
5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112
|
||||
630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128
|
||||
179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144
|
||||
995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160
|
||||
2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176
|
||||
1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192
|
||||
3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208
|
||||
706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240
|
||||
3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256
|
||||
2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272
|
||||
437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288
|
||||
3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304
|
||||
1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320
|
||||
5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336
|
||||
266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352
|
||||
5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368
|
||||
1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384
|
||||
32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400
|
||||
188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416
|
||||
3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432
|
||||
3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448
|
||||
324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464
|
||||
2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480
|
||||
2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496
|
||||
314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512
|
||||
287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528
|
||||
3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544
|
||||
1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560
|
||||
1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576
|
||||
1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592
|
||||
2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608
|
||||
265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624
|
||||
4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640
|
||||
1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656
|
||||
5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672
|
||||
2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688
|
||||
383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704
|
||||
98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720
|
||||
523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736
|
||||
710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752
|
||||
5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768
|
||||
379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784
|
||||
1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800
|
||||
585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816
|
||||
690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832
|
||||
5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848
|
||||
1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864
|
||||
544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880
|
||||
3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896
|
||||
4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912
|
||||
3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928
|
||||
279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944
|
||||
610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960
|
||||
1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976
|
||||
4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992
|
||||
3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008
|
||||
3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024
|
||||
2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040
|
||||
5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056
|
||||
3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072
|
||||
5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088
|
||||
1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104
|
||||
2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120
|
||||
1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136
|
||||
78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152
|
||||
1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168
|
||||
4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184
|
||||
3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200
|
||||
534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216
|
||||
165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232
|
||||
626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248
|
||||
2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264
|
||||
5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280
|
||||
1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296
|
||||
2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312
|
||||
1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328
|
||||
1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344
|
||||
5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360
|
||||
5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376
|
||||
5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392
|
||||
3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408
|
||||
4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424
|
||||
4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440
|
||||
2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456
|
||||
5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472
|
||||
3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488
|
||||
598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504
|
||||
5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520
|
||||
5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536
|
||||
1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552
|
||||
2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568
|
||||
3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584
|
||||
4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600
|
||||
5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616
|
||||
3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632
|
||||
4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648
|
||||
1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664
|
||||
1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680
|
||||
4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696
|
||||
1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712
|
||||
240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728
|
||||
1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744
|
||||
1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760
|
||||
3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776
|
||||
619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792
|
||||
5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808
|
||||
2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824
|
||||
1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840
|
||||
1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856
|
||||
5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872
|
||||
829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888
|
||||
4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904
|
||||
375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920
|
||||
2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936
|
||||
444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952
|
||||
1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968
|
||||
1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984
|
||||
730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000
|
||||
4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016
|
||||
4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032
|
||||
1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048
|
||||
3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064
|
||||
5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080
|
||||
5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096
|
||||
1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112
|
||||
2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128
|
||||
1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144
|
||||
3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160
|
||||
2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176
|
||||
3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192
|
||||
2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208
|
||||
4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224
|
||||
4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240
|
||||
3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256
|
||||
97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272
|
||||
3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288
|
||||
424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304
|
||||
3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320
|
||||
4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336
|
||||
3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352
|
||||
1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368
|
||||
5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384
|
||||
199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400
|
||||
5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416
|
||||
1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432
|
||||
391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448
|
||||
4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464
|
||||
4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480
|
||||
397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496
|
||||
2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512
|
||||
2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528
|
||||
3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544
|
||||
1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560
|
||||
4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576
|
||||
2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592
|
||||
1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608
|
||||
1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624
|
||||
2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640
|
||||
3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656
|
||||
1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672
|
||||
5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688
|
||||
1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704
|
||||
4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720
|
||||
1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736
|
||||
135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752
|
||||
1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768
|
||||
4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784
|
||||
4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800
|
||||
2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816
|
||||
1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832
|
||||
4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848
|
||||
660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864
|
||||
5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880
|
||||
2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896
|
||||
3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912
|
||||
4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928
|
||||
790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944
|
||||
5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960
|
||||
5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976
|
||||
1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992
|
||||
4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008
|
||||
4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024
|
||||
2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040
|
||||
3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056
|
||||
3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072
|
||||
2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088
|
||||
1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104
|
||||
4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120
|
||||
3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136
|
||||
3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152
|
||||
2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168
|
||||
4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184
|
||||
5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200
|
||||
3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216
|
||||
2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232
|
||||
3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248
|
||||
1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264
|
||||
2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280
|
||||
3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296
|
||||
4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312
|
||||
2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328
|
||||
2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344
|
||||
5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360
|
||||
1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376
|
||||
2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392
|
||||
1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408
|
||||
3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424
|
||||
4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440
|
||||
2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456
|
||||
3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472
|
||||
3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488
|
||||
2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504
|
||||
4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520
|
||||
2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536
|
||||
3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552
|
||||
4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568
|
||||
5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584
|
||||
3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600
|
||||
194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616
|
||||
1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632
|
||||
4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648
|
||||
1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664
|
||||
4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680
|
||||
5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696
|
||||
510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712
|
||||
5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728
|
||||
5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744
|
||||
2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760
|
||||
3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776
|
||||
2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792
|
||||
2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808
|
||||
681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824
|
||||
1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840
|
||||
4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856
|
||||
3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872
|
||||
3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888
|
||||
838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904
|
||||
2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920
|
||||
625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936
|
||||
2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952
|
||||
4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968
|
||||
1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984
|
||||
4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000
|
||||
1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016
|
||||
3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032
|
||||
574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048
|
||||
3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064
|
||||
5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080
|
||||
5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096
|
||||
3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112
|
||||
3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128
|
||||
1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144
|
||||
2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160
|
||||
5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176
|
||||
1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192
|
||||
1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208
|
||||
3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224
|
||||
919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240
|
||||
1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256
|
||||
4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272
|
||||
5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288
|
||||
2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304
|
||||
3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320
|
||||
516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336
|
||||
1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352
|
||||
2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368
|
||||
2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384
|
||||
5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400
|
||||
5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416
|
||||
5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432
|
||||
2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448
|
||||
2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464
|
||||
1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480
|
||||
4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496
|
||||
3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512
|
||||
3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528
|
||||
4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544
|
||||
4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560
|
||||
2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576
|
||||
2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592
|
||||
5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608
|
||||
4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624
|
||||
5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640
|
||||
4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656
|
||||
502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672
|
||||
121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688
|
||||
1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704
|
||||
3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720
|
||||
4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736
|
||||
1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752
|
||||
5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768
|
||||
2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784
|
||||
2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800
|
||||
3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816
|
||||
5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832
|
||||
1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848
|
||||
3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864
|
||||
5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880
|
||||
1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896
|
||||
5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912
|
||||
2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928
|
||||
3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944
|
||||
2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960
|
||||
3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976
|
||||
3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992
|
||||
3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008
|
||||
4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024
|
||||
803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040
|
||||
2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056
|
||||
4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072
|
||||
3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088
|
||||
5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104
|
||||
1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120
|
||||
5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136
|
||||
425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152
|
||||
1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168
|
||||
479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184
|
||||
4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200
|
||||
1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216
|
||||
4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232
|
||||
1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248
|
||||
433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264
|
||||
3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280
|
||||
4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296
|
||||
5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312
|
||||
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
|
||||
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
|
||||
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
|
||||
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376
|
||||
)
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import Big5DistributionAnalysis
|
||||
from .mbcssm import BIG5_SM_MODEL
|
||||
|
||||
|
||||
class Big5Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(Big5Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
|
||||
self.distribution_analyzer = Big5DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "Big5"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
||||
@@ -1,233 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
|
||||
GB2312_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
|
||||
BIG5_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
|
||||
JIS_TYPICAL_DISTRIBUTION_RATIO)
|
||||
|
||||
|
||||
class CharDistributionAnalysis(object):
|
||||
ENOUGH_DATA_THRESHOLD = 1024
|
||||
SURE_YES = 0.99
|
||||
SURE_NO = 0.01
|
||||
MINIMUM_DATA_THRESHOLD = 3
|
||||
|
||||
def __init__(self):
|
||||
# Mapping table to get frequency order from char order (get from
|
||||
# GetOrder())
|
||||
self._char_to_freq_order = None
|
||||
self._table_size = None # Size of above table
|
||||
# This is a constant value which varies from language to language,
|
||||
# used in calculating confidence. See
|
||||
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
||||
# for further detail.
|
||||
self.typical_distribution_ratio = None
|
||||
self._done = None
|
||||
self._total_chars = None
|
||||
self._freq_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""reset analyser, clear any state"""
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._done = False
|
||||
self._total_chars = 0 # Total characters encountered
|
||||
# The number of characters whose frequency order is less than 512
|
||||
self._freq_chars = 0
|
||||
|
||||
def feed(self, char, char_len):
|
||||
"""feed a character with known length"""
|
||||
if char_len == 2:
|
||||
# we only care about 2-bytes character in our distribution analysis
|
||||
order = self.get_order(char)
|
||||
else:
|
||||
order = -1
|
||||
if order >= 0:
|
||||
self._total_chars += 1
|
||||
# order is valid
|
||||
if order < self._table_size:
|
||||
if 512 > self._char_to_freq_order[order]:
|
||||
self._freq_chars += 1
|
||||
|
||||
def get_confidence(self):
|
||||
"""return confidence based on existing data"""
|
||||
# if we didn't receive any character in our consideration range,
|
||||
# return negative answer
|
||||
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
|
||||
return self.SURE_NO
|
||||
|
||||
if self._total_chars != self._freq_chars:
|
||||
r = (self._freq_chars / ((self._total_chars - self._freq_chars)
|
||||
* self.typical_distribution_ratio))
|
||||
if r < self.SURE_YES:
|
||||
return r
|
||||
|
||||
# normalize confidence (we don't want to be 100% sure)
|
||||
return self.SURE_YES
|
||||
|
||||
def got_enough_data(self):
|
||||
# It is not necessary to receive all data to draw conclusion.
|
||||
# For charset detection, certain amount of data is enough
|
||||
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# We do not handle characters based on the original encoding string,
|
||||
# but convert this encoding string to a number, here called order.
|
||||
# This allows multiple encodings of a language to share one frequency
|
||||
# table.
|
||||
return -1
|
||||
|
||||
|
||||
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCTWDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCTW_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-TW encoding, we are interested
|
||||
# first byte range: 0xc4 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xC4:
|
||||
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCKRDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCKR_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-KR encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xB0:
|
||||
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(GB2312DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = GB2312_TABLE_SIZE
|
||||
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for GB2312 encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0xB0) and (second_char >= 0xA1):
|
||||
return 94 * (first_char - 0xB0) + second_char - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(Big5DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = BIG5_TABLE_SIZE
|
||||
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for big5 encoding, we are interested
|
||||
# first byte range: 0xa4 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if first_char >= 0xA4:
|
||||
if second_char >= 0xA1:
|
||||
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
|
||||
else:
|
||||
return 157 * (first_char - 0xA4) + second_char - 0x40
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(SJISDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for sjis encoding, we are interested
|
||||
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0x81) and (first_char <= 0x9F):
|
||||
order = 188 * (first_char - 0x81)
|
||||
elif (first_char >= 0xE0) and (first_char <= 0xEF):
|
||||
order = 188 * (first_char - 0xE0 + 31)
|
||||
else:
|
||||
return -1
|
||||
order = order + second_char - 0x40
|
||||
if second_char > 0x7F:
|
||||
order = -1
|
||||
return order
|
||||
|
||||
|
||||
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCJPDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-JP encoding, we are interested
|
||||
# first byte range: 0xa0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
char = byte_str[0]
|
||||
if char >= 0xA0:
|
||||
return 94 * (char - 0xA1) + byte_str[1] - 0xa1
|
||||
else:
|
||||
return -1
|
||||
@@ -1,106 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import ProbingState
|
||||
from .charsetprober import CharSetProber
|
||||
|
||||
|
||||
class CharSetGroupProber(CharSetProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self._active_num = 0
|
||||
self.probers = []
|
||||
self._best_guess_prober = None
|
||||
|
||||
def reset(self):
|
||||
super(CharSetGroupProber, self).reset()
|
||||
self._active_num = 0
|
||||
for prober in self.probers:
|
||||
if prober:
|
||||
prober.reset()
|
||||
prober.active = True
|
||||
self._active_num += 1
|
||||
self._best_guess_prober = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.language
|
||||
|
||||
def feed(self, byte_str):
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
continue
|
||||
state = prober.feed(byte_str)
|
||||
if not state:
|
||||
continue
|
||||
if state == ProbingState.FOUND_IT:
|
||||
self._best_guess_prober = prober
|
||||
return self.state
|
||||
elif state == ProbingState.NOT_ME:
|
||||
prober.active = False
|
||||
self._active_num -= 1
|
||||
if self._active_num <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
state = self.state
|
||||
if state == ProbingState.FOUND_IT:
|
||||
return 0.99
|
||||
elif state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
best_conf = 0.0
|
||||
self._best_guess_prober = None
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
self.logger.debug('%s not active', prober.charset_name)
|
||||
continue
|
||||
conf = prober.get_confidence()
|
||||
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
|
||||
if best_conf < conf:
|
||||
best_conf = conf
|
||||
self._best_guess_prober = prober
|
||||
if not self._best_guess_prober:
|
||||
return 0.0
|
||||
return best_conf
|
||||
@@ -1,145 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .enums import ProbingState
|
||||
|
||||
|
||||
class CharSetProber(object):
|
||||
|
||||
SHORTCUT_THRESHOLD = 0.95
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
self._state = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def reset(self):
|
||||
self._state = ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return None
|
||||
|
||||
def feed(self, buf):
|
||||
pass
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
return self._state
|
||||
|
||||
def get_confidence(self):
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
def filter_high_byte_only(buf):
|
||||
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
|
||||
return buf
|
||||
|
||||
@staticmethod
|
||||
def filter_international_words(buf):
|
||||
"""
|
||||
We define three types of bytes:
|
||||
alphabet: english alphabets [a-zA-Z]
|
||||
international: international characters [\x80-\xFF]
|
||||
marker: everything else [^a-zA-Z\x80-\xFF]
|
||||
|
||||
The input buffer can be thought to contain a series of words delimited
|
||||
by markers. This function works to filter all words that contain at
|
||||
least one international character. All contiguous sequences of markers
|
||||
are replaced by a single space ascii character.
|
||||
|
||||
This filter applies to all scripts which do not use English characters.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
|
||||
# This regex expression filters out only words that have at-least one
|
||||
# international character. The word may include one marker character at
|
||||
# the end.
|
||||
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
|
||||
buf)
|
||||
|
||||
for word in words:
|
||||
filtered.extend(word[:-1])
|
||||
|
||||
# If the last character in the word is a marker, replace it with a
|
||||
# space as markers shouldn't affect our analysis (they are used
|
||||
# similarly across all languages and may thus have similar
|
||||
# frequencies).
|
||||
last_char = word[-1:]
|
||||
if not last_char.isalpha() and last_char < b'\x80':
|
||||
last_char = b' '
|
||||
filtered.extend(last_char)
|
||||
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def filter_with_english_letters(buf):
|
||||
"""
|
||||
Returns a copy of ``buf`` that retains only the sequences of English
|
||||
alphabet and high byte characters that are not between <> characters.
|
||||
Also retains English alphabet and high byte characters immediately
|
||||
before occurrences of >.
|
||||
|
||||
This filter can be applied to all scripts which contain both English
|
||||
characters and extended ASCII characters, but is currently only used by
|
||||
``Latin1Prober``.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
in_tag = False
|
||||
prev = 0
|
||||
|
||||
for curr in range(len(buf)):
|
||||
# Slice here to get bytes instead of an int with Python 3
|
||||
buf_char = buf[curr:curr + 1]
|
||||
# Check if we're coming out of or entering an HTML tag
|
||||
if buf_char == b'>':
|
||||
in_tag = False
|
||||
elif buf_char == b'<':
|
||||
in_tag = True
|
||||
|
||||
# If current character is not extended-ASCII and not alphabetic...
|
||||
if buf_char < b'\x80' and not buf_char.isalpha():
|
||||
# ...and we're not in a tag
|
||||
if curr > prev and not in_tag:
|
||||
# Keep everything after last non-extended-ASCII,
|
||||
# non-alphabetic character
|
||||
filtered.extend(buf[prev:curr])
|
||||
# Output a space to delimit stretch we kept
|
||||
filtered.extend(b' ')
|
||||
prev = curr + 1
|
||||
|
||||
# If we're not in a tag...
|
||||
if not in_tag:
|
||||
# Keep everything after last non-extended-ASCII, non-alphabetic
|
||||
# character
|
||||
filtered.extend(buf[prev:])
|
||||
|
||||
return filtered
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Script which takes one or more file paths and reports on their detected
|
||||
encodings
|
||||
|
||||
Example::
|
||||
|
||||
% chardetect somefile someotherfile
|
||||
somefile: windows-1252 with confidence 0.5
|
||||
someotherfile: ascii with confidence 1.0
|
||||
|
||||
If no paths are provided, it takes its input from stdin.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from chardet import __version__
|
||||
from chardet.compat import PY2
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
|
||||
|
||||
def description_of(lines, name='stdin'):
|
||||
"""
|
||||
Return a string describing the probable encoding of a file or
|
||||
list of strings.
|
||||
|
||||
:param lines: The lines to get the encoding of.
|
||||
:type lines: Iterable of bytes
|
||||
:param name: Name of file or collection of lines
|
||||
:type name: str
|
||||
"""
|
||||
u = UniversalDetector()
|
||||
for line in lines:
|
||||
line = bytearray(line)
|
||||
u.feed(line)
|
||||
# shortcut out of the loop to save reading further - particularly useful if we read a BOM.
|
||||
if u.done:
|
||||
break
|
||||
u.close()
|
||||
result = u.result
|
||||
if PY2:
|
||||
name = name.decode(sys.getfilesystemencoding(), 'ignore')
|
||||
if result['encoding']:
|
||||
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
|
||||
result['confidence'])
|
||||
else:
|
||||
return '{0}: no result'.format(name)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
"""
|
||||
Handles command line arguments and gets things started.
|
||||
|
||||
:param argv: List of arguments, as if specified on the command-line.
|
||||
If None, ``sys.argv[1:]`` is used instead.
|
||||
:type argv: list of str
|
||||
"""
|
||||
# Get command line arguments
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Takes one or more file paths and reports their detected \
|
||||
encodings")
|
||||
parser.add_argument('input',
|
||||
help='File whose encoding we would like to determine. \
|
||||
(default: stdin)',
|
||||
type=argparse.FileType('rb'), nargs='*',
|
||||
default=[sys.stdin if PY2 else sys.stdin.buffer])
|
||||
parser.add_argument('--version', action='version',
|
||||
version='%(prog)s {0}'.format(__version__))
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
for f in args.input:
|
||||
if f.isatty():
|
||||
print("You are running chardetect interactively. Press " +
|
||||
"CTRL-D twice at the start of a blank line to signal the " +
|
||||
"end of your input. If you want help, run chardetect " +
|
||||
"--help\n", file=sys.stderr)
|
||||
print(description_of(f, f.name))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,88 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import logging
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
|
||||
class CodingStateMachine(object):
|
||||
"""
|
||||
A state machine to verify a byte sequence for a particular encoding. For
|
||||
each byte the detector receives, it will feed that byte to every active
|
||||
state machine available, one byte at a time. The state machine changes its
|
||||
state based on its previous state and the byte it receives. There are 3
|
||||
states in a state machine that are of interest to an auto-detector:
|
||||
|
||||
START state: This is the state to start with, or a legal byte sequence
|
||||
(i.e. a valid code point) for character has been identified.
|
||||
|
||||
ME state: This indicates that the state machine identified a byte sequence
|
||||
that is specific to the charset it is designed for and that
|
||||
there is no other possible encoding which can contain this byte
|
||||
sequence. This will to lead to an immediate positive answer for
|
||||
the detector.
|
||||
|
||||
ERROR state: This indicates the state machine identified an illegal byte
|
||||
sequence for that encoding. This will lead to an immediate
|
||||
negative answer for this encoding. Detector will exclude this
|
||||
encoding from consideration from here on.
|
||||
"""
|
||||
def __init__(self, sm):
|
||||
self._model = sm
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = 0
|
||||
self._curr_state = None
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._curr_state = MachineState.START
|
||||
|
||||
def next_state(self, c):
|
||||
# for each byte we get its class
|
||||
# if it is first byte, we also get byte length
|
||||
byte_class = self._model['class_table'][c]
|
||||
if self._curr_state == MachineState.START:
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = self._model['char_len_table'][byte_class]
|
||||
# from byte's class and state_table, we get its next state
|
||||
curr_state = (self._curr_state * self._model['class_factor']
|
||||
+ byte_class)
|
||||
self._curr_state = self._model['state_table'][curr_state]
|
||||
self._curr_byte_pos += 1
|
||||
return self._curr_state
|
||||
|
||||
def get_current_charlen(self):
|
||||
return self._curr_char_len
|
||||
|
||||
def get_coding_state_machine(self):
|
||||
return self._model['name']
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._model['language']
|
||||
@@ -1,34 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# Contributor(s):
|
||||
# Dan Blanchard
|
||||
# Ian Cordasco
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
PY2 = True
|
||||
PY3 = False
|
||||
base_str = (str, unicode)
|
||||
text_type = unicode
|
||||
else:
|
||||
PY2 = False
|
||||
PY3 = True
|
||||
base_str = (bytes, str)
|
||||
text_type = str
|
||||
@@ -1,49 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .mbcssm import CP949_SM_MODEL
|
||||
|
||||
|
||||
class CP949Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(CP949Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
|
||||
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
|
||||
# not different.
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "CP949"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
||||
@@ -1,76 +0,0 @@
|
||||
"""
|
||||
All of the Enums that are used throughout the chardet package.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
|
||||
class InputState(object):
|
||||
"""
|
||||
This enum represents the different states a universal detector can be in.
|
||||
"""
|
||||
PURE_ASCII = 0
|
||||
ESC_ASCII = 1
|
||||
HIGH_BYTE = 2
|
||||
|
||||
|
||||
class LanguageFilter(object):
|
||||
"""
|
||||
This enum represents the different language filters we can apply to a
|
||||
``UniversalDetector``.
|
||||
"""
|
||||
CHINESE_SIMPLIFIED = 0x01
|
||||
CHINESE_TRADITIONAL = 0x02
|
||||
JAPANESE = 0x04
|
||||
KOREAN = 0x08
|
||||
NON_CJK = 0x10
|
||||
ALL = 0x1F
|
||||
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
|
||||
CJK = CHINESE | JAPANESE | KOREAN
|
||||
|
||||
|
||||
class ProbingState(object):
|
||||
"""
|
||||
This enum represents the different states a prober can be in.
|
||||
"""
|
||||
DETECTING = 0
|
||||
FOUND_IT = 1
|
||||
NOT_ME = 2
|
||||
|
||||
|
||||
class MachineState(object):
|
||||
"""
|
||||
This enum represents the different states a state machine can be in.
|
||||
"""
|
||||
START = 0
|
||||
ERROR = 1
|
||||
ITS_ME = 2
|
||||
|
||||
|
||||
class SequenceLikelihood(object):
|
||||
"""
|
||||
This enum represents the likelihood of a character following the previous one.
|
||||
"""
|
||||
NEGATIVE = 0
|
||||
UNLIKELY = 1
|
||||
LIKELY = 2
|
||||
POSITIVE = 3
|
||||
|
||||
@classmethod
|
||||
def get_num_categories(cls):
|
||||
""":returns: The number of likelihood categories in the enum."""
|
||||
return 4
|
||||
|
||||
|
||||
class CharacterCategory(object):
|
||||
"""
|
||||
This enum represents the different categories language models for
|
||||
``SingleByteCharsetProber`` put characters into.
|
||||
|
||||
Anything less than CONTROL is considered a letter.
|
||||
"""
|
||||
UNDEFINED = 255
|
||||
LINE_BREAK = 254
|
||||
SYMBOL = 253
|
||||
DIGIT = 252
|
||||
CONTROL = 251
|
||||
@@ -1,101 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .enums import LanguageFilter, ProbingState, MachineState
|
||||
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
|
||||
ISO2022KR_SM_MODEL)
|
||||
|
||||
|
||||
class EscCharSetProber(CharSetProber):
|
||||
"""
|
||||
This CharSetProber uses a "code scheme" approach for detecting encodings,
|
||||
whereby easily recognizable escape or shift sequences are relied on to
|
||||
identify these encodings.
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.coding_sm = []
|
||||
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
|
||||
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.JAPANESE:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.KOREAN:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
|
||||
self.active_sm_count = None
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
self._state = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(EscCharSetProber, self).reset()
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm:
|
||||
continue
|
||||
coding_sm.active = True
|
||||
coding_sm.reset()
|
||||
self.active_sm_count = len(self.coding_sm)
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._detected_charset
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._detected_language
|
||||
|
||||
def get_confidence(self):
|
||||
if self._detected_charset:
|
||||
return 0.99
|
||||
else:
|
||||
return 0.00
|
||||
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm or not coding_sm.active:
|
||||
continue
|
||||
coding_state = coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
coding_sm.active = False
|
||||
self.active_sm_count -= 1
|
||||
if self.active_sm_count <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
self._detected_charset = coding_sm.get_coding_state_machine()
|
||||
self._detected_language = coding_sm.language
|
||||
return self.state
|
||||
|
||||
return self.state
|
||||
@@ -1,246 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
HZ_CLS = (
|
||||
1,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,4,0,5,2,0, # 78 - 7f
|
||||
1,1,1,1,1,1,1,1, # 80 - 87
|
||||
1,1,1,1,1,1,1,1, # 88 - 8f
|
||||
1,1,1,1,1,1,1,1, # 90 - 97
|
||||
1,1,1,1,1,1,1,1, # 98 - 9f
|
||||
1,1,1,1,1,1,1,1, # a0 - a7
|
||||
1,1,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,1,1,1,1,1,1, # c0 - c7
|
||||
1,1,1,1,1,1,1,1, # c8 - cf
|
||||
1,1,1,1,1,1,1,1, # d0 - d7
|
||||
1,1,1,1,1,1,1,1, # d8 - df
|
||||
1,1,1,1,1,1,1,1, # e0 - e7
|
||||
1,1,1,1,1,1,1,1, # e8 - ef
|
||||
1,1,1,1,1,1,1,1, # f0 - f7
|
||||
1,1,1,1,1,1,1,1, # f8 - ff
|
||||
)
|
||||
|
||||
HZ_ST = (
|
||||
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
|
||||
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
|
||||
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
|
||||
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
|
||||
)
|
||||
|
||||
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
HZ_SM_MODEL = {'class_table': HZ_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': HZ_ST,
|
||||
'char_len_table': HZ_CHAR_LEN_TABLE,
|
||||
'name': "HZ-GB-2312",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022CN_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,4,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022CN_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
|
||||
)
|
||||
|
||||
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
|
||||
'class_factor': 9,
|
||||
'state_table': ISO2022CN_ST,
|
||||
'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-CN",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022JP_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,2,2, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,7,0,0,0, # 20 - 27
|
||||
3,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
6,0,4,0,8,0,0,0, # 40 - 47
|
||||
0,9,5,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022JP_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
|
||||
)
|
||||
|
||||
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': ISO2022JP_ST,
|
||||
'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-JP",
|
||||
'language': 'Japanese'}
|
||||
|
||||
ISO2022KR_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,3,0,0,0, # 20 - 27
|
||||
0,4,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,5,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022KR_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
|
||||
)
|
||||
|
||||
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': ISO2022KR_ST,
|
||||
'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-KR",
|
||||
'language': 'Korean'}
|
||||
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import ProbingState, MachineState
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCJPDistributionAnalysis
|
||||
from .jpcntx import EUCJPContextAnalysis
|
||||
from .mbcssm import EUCJP_SM_MODEL
|
||||
|
||||
|
||||
class EUCJPProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCJPProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
|
||||
self.distribution_analyzer = EUCJPDistributionAnalysis()
|
||||
self.context_analyzer = EUCJPContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(EUCJPProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-JP"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char, char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.context_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
@@ -1,195 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Sampling from about 20M text materials include literature and computer technology
|
||||
|
||||
# 128 --> 0.79
|
||||
# 256 --> 0.92
|
||||
# 512 --> 0.986
|
||||
# 1024 --> 0.99944
|
||||
# 2048 --> 0.99999
|
||||
#
|
||||
# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24
|
||||
# Random Distribution Ration = 512 / (2350-512) = 0.279.
|
||||
#
|
||||
# Typical Distribution Ratio
|
||||
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
|
||||
|
||||
EUCKR_TABLE_SIZE = 2352
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCKR_CHAR_TO_FREQ_ORDER = (
|
||||
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
|
||||
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
|
||||
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734,
|
||||
945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739,
|
||||
116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622,
|
||||
708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750,
|
||||
1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856,
|
||||
344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205,
|
||||
709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779,
|
||||
1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19,
|
||||
1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567,
|
||||
1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797,
|
||||
1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802,
|
||||
1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899,
|
||||
885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818,
|
||||
1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409,
|
||||
1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697,
|
||||
1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770,
|
||||
1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723,
|
||||
544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416,
|
||||
1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300,
|
||||
119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083,
|
||||
893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857,
|
||||
1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871,
|
||||
282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420,
|
||||
1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885,
|
||||
127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889,
|
||||
0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893,
|
||||
1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317,
|
||||
1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841,
|
||||
1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910,
|
||||
1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610,
|
||||
269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375,
|
||||
1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939,
|
||||
887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870,
|
||||
217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934,
|
||||
1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888,
|
||||
1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950,
|
||||
1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065,
|
||||
1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002,
|
||||
1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965,
|
||||
1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467,
|
||||
50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285,
|
||||
639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7,
|
||||
103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979,
|
||||
1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985,
|
||||
818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994,
|
||||
1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250,
|
||||
423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824,
|
||||
532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003,
|
||||
2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745,
|
||||
619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61,
|
||||
191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023,
|
||||
2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032,
|
||||
2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912,
|
||||
2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224,
|
||||
719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012,
|
||||
819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050,
|
||||
2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681,
|
||||
499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414,
|
||||
1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068,
|
||||
2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075,
|
||||
1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850,
|
||||
2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606,
|
||||
2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449,
|
||||
1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452,
|
||||
949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112,
|
||||
2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121,
|
||||
2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130,
|
||||
22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274,
|
||||
962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139,
|
||||
2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721,
|
||||
1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298,
|
||||
2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463,
|
||||
2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747,
|
||||
2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285,
|
||||
2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187,
|
||||
2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10,
|
||||
2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350,
|
||||
1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201,
|
||||
2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972,
|
||||
2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219,
|
||||
2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233,
|
||||
2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242,
|
||||
2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247,
|
||||
1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178,
|
||||
1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255,
|
||||
2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259,
|
||||
1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262,
|
||||
2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702,
|
||||
1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273,
|
||||
295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541,
|
||||
2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117,
|
||||
432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187,
|
||||
2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800,
|
||||
808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312,
|
||||
2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229,
|
||||
2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315,
|
||||
501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484,
|
||||
2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170,
|
||||
1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335,
|
||||
425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601,
|
||||
1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395,
|
||||
2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354,
|
||||
1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476,
|
||||
2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035,
|
||||
416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498,
|
||||
2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310,
|
||||
1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389,
|
||||
2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504,
|
||||
1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505,
|
||||
2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145,
|
||||
1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624,
|
||||
593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700,
|
||||
2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221,
|
||||
2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377,
|
||||
644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448,
|
||||
915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485,
|
||||
1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705,
|
||||
1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465,
|
||||
291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471,
|
||||
2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997,
|
||||
2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486,
|
||||
797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494,
|
||||
434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771,
|
||||
585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323,
|
||||
2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491,
|
||||
95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510,
|
||||
161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519,
|
||||
2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532,
|
||||
2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199,
|
||||
704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544,
|
||||
2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247,
|
||||
1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441,
|
||||
249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562,
|
||||
2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362,
|
||||
2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583,
|
||||
2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465,
|
||||
3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431,
|
||||
202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151,
|
||||
974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596,
|
||||
2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406,
|
||||
2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611,
|
||||
2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619,
|
||||
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
|
||||
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
|
||||
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
|
||||
)
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .mbcssm import EUCKR_SM_MODEL
|
||||
|
||||
|
||||
class EUCKRProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCKRProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-KR"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
||||
@@ -1,387 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# EUCTW frequency table
|
||||
# Converted from big5 work
|
||||
# by Taiwan's Mandarin Promotion Council
|
||||
# <http:#www.edu.tw:81/mandr/>
|
||||
|
||||
# 128 --> 0.42261
|
||||
# 256 --> 0.57851
|
||||
# 512 --> 0.74851
|
||||
# 1024 --> 0.89384
|
||||
# 2048 --> 0.97583
|
||||
#
|
||||
# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
|
||||
# Random Distribution Ration = 512/(5401-512)=0.105
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
|
||||
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCTW_TABLE_SIZE = 5376
|
||||
|
||||
EUCTW_CHAR_TO_FREQ_ORDER = (
|
||||
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
|
||||
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
|
||||
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
|
||||
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
|
||||
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
|
||||
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
|
||||
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
|
||||
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
|
||||
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
|
||||
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
|
||||
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
|
||||
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
|
||||
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
|
||||
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
|
||||
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
|
||||
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
|
||||
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
|
||||
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
|
||||
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
|
||||
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
|
||||
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
|
||||
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
|
||||
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
|
||||
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
|
||||
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
|
||||
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
|
||||
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
|
||||
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
|
||||
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
|
||||
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
|
||||
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
|
||||
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
|
||||
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
|
||||
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
|
||||
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
|
||||
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
|
||||
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
|
||||
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
|
||||
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
|
||||
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
|
||||
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
|
||||
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
|
||||
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
|
||||
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
|
||||
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
|
||||
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
|
||||
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
|
||||
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
|
||||
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
|
||||
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
|
||||
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
|
||||
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
|
||||
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
|
||||
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
|
||||
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
|
||||
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
|
||||
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
|
||||
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
|
||||
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
|
||||
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
|
||||
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
|
||||
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
|
||||
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
|
||||
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
|
||||
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
|
||||
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
|
||||
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
|
||||
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
|
||||
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
|
||||
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
|
||||
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
|
||||
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
|
||||
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
|
||||
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
|
||||
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
|
||||
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
|
||||
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
|
||||
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
|
||||
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
|
||||
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
|
||||
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
|
||||
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
|
||||
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
|
||||
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
|
||||
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
|
||||
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
|
||||
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
|
||||
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
|
||||
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
|
||||
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
|
||||
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
|
||||
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
|
||||
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
|
||||
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
|
||||
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
|
||||
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
|
||||
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
|
||||
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
|
||||
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
|
||||
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
|
||||
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
|
||||
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
|
||||
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
|
||||
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
|
||||
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
|
||||
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
|
||||
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
|
||||
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
|
||||
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
|
||||
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
|
||||
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
|
||||
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
|
||||
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
|
||||
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
|
||||
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
|
||||
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
|
||||
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
|
||||
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
|
||||
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
|
||||
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
|
||||
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
|
||||
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
|
||||
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
|
||||
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
|
||||
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
|
||||
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
|
||||
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
|
||||
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
|
||||
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
|
||||
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
|
||||
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
|
||||
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
|
||||
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
|
||||
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
|
||||
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
|
||||
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
|
||||
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
|
||||
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
|
||||
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
|
||||
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
|
||||
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
|
||||
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
|
||||
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
|
||||
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
|
||||
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
|
||||
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
|
||||
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
|
||||
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
|
||||
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
|
||||
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
|
||||
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
|
||||
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
|
||||
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
|
||||
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
|
||||
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
|
||||
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
|
||||
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
|
||||
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
|
||||
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
|
||||
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
|
||||
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
|
||||
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
|
||||
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
|
||||
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
|
||||
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
|
||||
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
|
||||
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
|
||||
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
|
||||
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
|
||||
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
|
||||
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
|
||||
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
|
||||
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
|
||||
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
|
||||
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
|
||||
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
|
||||
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
|
||||
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
|
||||
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
|
||||
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
|
||||
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
|
||||
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
|
||||
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
|
||||
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
|
||||
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
|
||||
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
|
||||
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
|
||||
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
|
||||
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
|
||||
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
|
||||
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
|
||||
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
|
||||
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
|
||||
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
|
||||
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
|
||||
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
|
||||
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
|
||||
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
|
||||
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
|
||||
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
|
||||
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
|
||||
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
|
||||
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
|
||||
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
|
||||
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
|
||||
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
|
||||
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
|
||||
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
|
||||
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
|
||||
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
|
||||
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
|
||||
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
|
||||
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
|
||||
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
|
||||
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
|
||||
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
|
||||
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
|
||||
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
|
||||
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
|
||||
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
|
||||
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
|
||||
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
|
||||
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
|
||||
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
|
||||
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
|
||||
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
|
||||
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
|
||||
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
|
||||
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
|
||||
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
|
||||
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
|
||||
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
|
||||
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
|
||||
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
|
||||
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
|
||||
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
|
||||
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
|
||||
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
|
||||
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
|
||||
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
|
||||
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
|
||||
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
|
||||
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
|
||||
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
|
||||
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
|
||||
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
|
||||
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
|
||||
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
|
||||
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
|
||||
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
|
||||
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
|
||||
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
|
||||
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
|
||||
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
|
||||
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
|
||||
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
|
||||
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
|
||||
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
|
||||
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
|
||||
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
|
||||
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
|
||||
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
|
||||
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
|
||||
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
|
||||
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
|
||||
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
|
||||
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
|
||||
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
|
||||
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
|
||||
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
|
||||
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
|
||||
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
|
||||
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
|
||||
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
|
||||
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
|
||||
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
|
||||
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
|
||||
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
|
||||
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
|
||||
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
|
||||
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
|
||||
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
|
||||
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
|
||||
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
|
||||
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
|
||||
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
|
||||
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
|
||||
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
|
||||
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
|
||||
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
|
||||
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
|
||||
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
|
||||
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
|
||||
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
|
||||
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
|
||||
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
|
||||
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
|
||||
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
|
||||
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
|
||||
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
|
||||
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
|
||||
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
|
||||
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
|
||||
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
|
||||
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
|
||||
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
|
||||
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
|
||||
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
|
||||
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
|
||||
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
|
||||
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
|
||||
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
|
||||
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
|
||||
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
|
||||
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
|
||||
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
|
||||
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
|
||||
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
|
||||
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
|
||||
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
|
||||
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
|
||||
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
|
||||
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
|
||||
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
|
||||
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
|
||||
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
|
||||
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
|
||||
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
|
||||
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
|
||||
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
|
||||
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
|
||||
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
|
||||
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
|
||||
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
|
||||
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
|
||||
)
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCTWDistributionAnalysis
|
||||
from .mbcssm import EUCTW_SM_MODEL
|
||||
|
||||
class EUCTWProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCTWProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
|
||||
self.distribution_analyzer = EUCTWDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-TW"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Taiwan"
|
||||
@@ -1,283 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# GB2312 most frequently used character table
|
||||
#
|
||||
# Char to FreqOrder table , from hz6763
|
||||
|
||||
# 512 --> 0.79 -- 0.79
|
||||
# 1024 --> 0.92 -- 0.13
|
||||
# 2048 --> 0.98 -- 0.06
|
||||
# 6768 --> 1.00 -- 0.02
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79
|
||||
# Random Distribution Ration = 512 / (3755 - 512) = 0.157
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
|
||||
|
||||
GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
|
||||
|
||||
GB2312_TABLE_SIZE = 3760
|
||||
|
||||
GB2312_CHAR_TO_FREQ_ORDER = (
|
||||
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
|
||||
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
|
||||
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
|
||||
249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670,
|
||||
1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820,
|
||||
1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585,
|
||||
152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566,
|
||||
1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575,
|
||||
2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853,
|
||||
3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061,
|
||||
544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155,
|
||||
1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406,
|
||||
927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816,
|
||||
2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606,
|
||||
360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023,
|
||||
2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414,
|
||||
1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513,
|
||||
3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052,
|
||||
198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570,
|
||||
1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575,
|
||||
253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250,
|
||||
2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506,
|
||||
1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26,
|
||||
3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835,
|
||||
1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686,
|
||||
2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054,
|
||||
1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894,
|
||||
585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105,
|
||||
3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403,
|
||||
3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694,
|
||||
252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873,
|
||||
3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940,
|
||||
836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121,
|
||||
1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648,
|
||||
3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992,
|
||||
2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233,
|
||||
1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157,
|
||||
755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807,
|
||||
1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094,
|
||||
4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258,
|
||||
887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478,
|
||||
3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152,
|
||||
3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909,
|
||||
509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272,
|
||||
1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221,
|
||||
2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252,
|
||||
1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301,
|
||||
1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254,
|
||||
389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070,
|
||||
3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461,
|
||||
3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360,
|
||||
4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124,
|
||||
296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535,
|
||||
3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243,
|
||||
1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713,
|
||||
1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071,
|
||||
4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442,
|
||||
215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946,
|
||||
814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257,
|
||||
3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180,
|
||||
1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427,
|
||||
602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781,
|
||||
1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724,
|
||||
2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937,
|
||||
930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943,
|
||||
432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789,
|
||||
396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552,
|
||||
3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246,
|
||||
4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451,
|
||||
3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310,
|
||||
750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860,
|
||||
2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297,
|
||||
2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780,
|
||||
2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745,
|
||||
776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936,
|
||||
2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032,
|
||||
968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657,
|
||||
163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414,
|
||||
220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976,
|
||||
3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436,
|
||||
2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254,
|
||||
2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536,
|
||||
1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238,
|
||||
18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059,
|
||||
2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741,
|
||||
90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447,
|
||||
286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601,
|
||||
1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269,
|
||||
1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894,
|
||||
915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173,
|
||||
681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994,
|
||||
1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956,
|
||||
2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437,
|
||||
3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154,
|
||||
2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240,
|
||||
2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143,
|
||||
2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634,
|
||||
3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472,
|
||||
1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541,
|
||||
1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143,
|
||||
2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312,
|
||||
1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414,
|
||||
3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754,
|
||||
1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424,
|
||||
1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302,
|
||||
3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739,
|
||||
795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004,
|
||||
2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484,
|
||||
1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739,
|
||||
4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535,
|
||||
1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641,
|
||||
1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307,
|
||||
3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573,
|
||||
1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533,
|
||||
47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965,
|
||||
504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99,
|
||||
1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280,
|
||||
160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505,
|
||||
1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012,
|
||||
1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039,
|
||||
744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982,
|
||||
3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530,
|
||||
4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392,
|
||||
3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656,
|
||||
2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220,
|
||||
2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766,
|
||||
1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535,
|
||||
3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728,
|
||||
2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338,
|
||||
1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627,
|
||||
1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885,
|
||||
125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411,
|
||||
2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671,
|
||||
2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162,
|
||||
3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774,
|
||||
4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524,
|
||||
3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346,
|
||||
180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040,
|
||||
3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188,
|
||||
2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280,
|
||||
1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131,
|
||||
259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947,
|
||||
774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970,
|
||||
3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814,
|
||||
4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557,
|
||||
2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997,
|
||||
1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972,
|
||||
1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369,
|
||||
766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376,
|
||||
1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480,
|
||||
3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610,
|
||||
955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128,
|
||||
642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769,
|
||||
1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207,
|
||||
57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392,
|
||||
1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623,
|
||||
193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782,
|
||||
2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650,
|
||||
158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478,
|
||||
2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773,
|
||||
2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007,
|
||||
1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323,
|
||||
1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598,
|
||||
2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961,
|
||||
819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302,
|
||||
1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409,
|
||||
1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683,
|
||||
2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191,
|
||||
2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616,
|
||||
3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302,
|
||||
1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774,
|
||||
4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147,
|
||||
571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731,
|
||||
845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464,
|
||||
3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377,
|
||||
1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315,
|
||||
470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557,
|
||||
3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903,
|
||||
1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060,
|
||||
4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261,
|
||||
1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092,
|
||||
2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810,
|
||||
1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708,
|
||||
498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658,
|
||||
1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871,
|
||||
3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503,
|
||||
448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229,
|
||||
2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112,
|
||||
136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504,
|
||||
1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389,
|
||||
1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27,
|
||||
1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542,
|
||||
3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861,
|
||||
2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845,
|
||||
3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700,
|
||||
3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469,
|
||||
3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582,
|
||||
996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999,
|
||||
2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274,
|
||||
786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020,
|
||||
2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601,
|
||||
12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628,
|
||||
1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31,
|
||||
475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668,
|
||||
233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778,
|
||||
1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169,
|
||||
3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667,
|
||||
3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881,
|
||||
1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276,
|
||||
1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320,
|
||||
3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751,
|
||||
2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432,
|
||||
2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772,
|
||||
1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843,
|
||||
3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116,
|
||||
451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904,
|
||||
4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652,
|
||||
1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664,
|
||||
2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770,
|
||||
3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283,
|
||||
3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626,
|
||||
1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713,
|
||||
768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333,
|
||||
391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062,
|
||||
2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555,
|
||||
931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014,
|
||||
1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510,
|
||||
386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015,
|
||||
1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459,
|
||||
1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390,
|
||||
1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238,
|
||||
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232,
|
||||
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624,
|
||||
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
|
||||
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512
|
||||
)
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import GB2312DistributionAnalysis
|
||||
from .mbcssm import GB2312_SM_MODEL
|
||||
|
||||
class GB2312Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(GB2312Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
|
||||
self.distribution_analyzer = GB2312DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "GB2312"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
||||
@@ -1,292 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Shy Shalom
|
||||
# Portions created by the Initial Developer are Copyright (C) 2005
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
# This prober doesn't actually recognize a language or a charset.
|
||||
# It is a helper prober for the use of the Hebrew model probers
|
||||
|
||||
### General ideas of the Hebrew charset recognition ###
|
||||
#
|
||||
# Four main charsets exist in Hebrew:
|
||||
# "ISO-8859-8" - Visual Hebrew
|
||||
# "windows-1255" - Logical Hebrew
|
||||
# "ISO-8859-8-I" - Logical Hebrew
|
||||
# "x-mac-hebrew" - ?? Logical Hebrew ??
|
||||
#
|
||||
# Both "ISO" charsets use a completely identical set of code points, whereas
|
||||
# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
|
||||
# these code points. windows-1255 defines additional characters in the range
|
||||
# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
|
||||
# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
|
||||
# x-mac-hebrew defines similar additional code points but with a different
|
||||
# mapping.
|
||||
#
|
||||
# As far as an average Hebrew text with no diacritics is concerned, all four
|
||||
# charsets are identical with respect to code points. Meaning that for the
|
||||
# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
|
||||
# (including final letters).
|
||||
#
|
||||
# The dominant difference between these charsets is their directionality.
|
||||
# "Visual" directionality means that the text is ordered as if the renderer is
|
||||
# not aware of a BIDI rendering algorithm. The renderer sees the text and
|
||||
# draws it from left to right. The text itself when ordered naturally is read
|
||||
# backwards. A buffer of Visual Hebrew generally looks like so:
|
||||
# "[last word of first line spelled backwards] [whole line ordered backwards
|
||||
# and spelled backwards] [first word of first line spelled backwards]
|
||||
# [end of line] [last word of second line] ... etc' "
|
||||
# adding punctuation marks, numbers and English text to visual text is
|
||||
# naturally also "visual" and from left to right.
|
||||
#
|
||||
# "Logical" directionality means the text is ordered "naturally" according to
|
||||
# the order it is read. It is the responsibility of the renderer to display
|
||||
# the text from right to left. A BIDI algorithm is used to place general
|
||||
# punctuation marks, numbers and English text in the text.
|
||||
#
|
||||
# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
|
||||
# what little evidence I could find, it seems that its general directionality
|
||||
# is Logical.
|
||||
#
|
||||
# To sum up all of the above, the Hebrew probing mechanism knows about two
|
||||
# charsets:
|
||||
# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
|
||||
# backwards while line order is natural. For charset recognition purposes
|
||||
# the line order is unimportant (In fact, for this implementation, even
|
||||
# word order is unimportant).
|
||||
# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
|
||||
#
|
||||
# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
|
||||
# specifically identified.
|
||||
# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
|
||||
# that contain special punctuation marks or diacritics is displayed with
|
||||
# some unconverted characters showing as question marks. This problem might
|
||||
# be corrected using another model prober for x-mac-hebrew. Due to the fact
|
||||
# that x-mac-hebrew texts are so rare, writing another model prober isn't
|
||||
# worth the effort and performance hit.
|
||||
#
|
||||
#### The Prober ####
|
||||
#
|
||||
# The prober is divided between two SBCharSetProbers and a HebrewProber,
|
||||
# all of which are managed, created, fed data, inquired and deleted by the
|
||||
# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
|
||||
# fact some kind of Hebrew, Logical or Visual. The final decision about which
|
||||
# one is it is made by the HebrewProber by combining final-letter scores
|
||||
# with the scores of the two SBCharSetProbers to produce a final answer.
|
||||
#
|
||||
# The SBCSGroupProber is responsible for stripping the original text of HTML
|
||||
# tags, English characters, numbers, low-ASCII punctuation characters, spaces
|
||||
# and new lines. It reduces any sequence of such characters to a single space.
|
||||
# The buffer fed to each prober in the SBCS group prober is pure text in
|
||||
# high-ASCII.
|
||||
# The two SBCharSetProbers (model probers) share the same language model:
|
||||
# Win1255Model.
|
||||
# The first SBCharSetProber uses the model normally as any other
|
||||
# SBCharSetProber does, to recognize windows-1255, upon which this model was
|
||||
# built. The second SBCharSetProber is told to make the pair-of-letter
|
||||
# lookup in the language model backwards. This in practice exactly simulates
|
||||
# a visual Hebrew model using the windows-1255 logical Hebrew model.
|
||||
#
|
||||
# The HebrewProber is not using any language model. All it does is look for
|
||||
# final-letter evidence suggesting the text is either logical Hebrew or visual
|
||||
# Hebrew. Disjointed from the model probers, the results of the HebrewProber
|
||||
# alone are meaningless. HebrewProber always returns 0.00 as confidence
|
||||
# since it never identifies a charset by itself. Instead, the pointer to the
|
||||
# HebrewProber is passed to the model probers as a helper "Name Prober".
|
||||
# When the Group prober receives a positive identification from any prober,
|
||||
# it asks for the name of the charset identified. If the prober queried is a
|
||||
# Hebrew model prober, the model prober forwards the call to the
|
||||
# HebrewProber to make the final decision. In the HebrewProber, the
|
||||
# decision is made according to the final-letters scores maintained and Both
|
||||
# model probers scores. The answer is returned in the form of the name of the
|
||||
# charset identified, either "windows-1255" or "ISO-8859-8".
|
||||
|
||||
class HebrewProber(CharSetProber):
|
||||
# windows-1255 / ISO-8859-8 code points of interest
|
||||
FINAL_KAF = 0xea
|
||||
NORMAL_KAF = 0xeb
|
||||
FINAL_MEM = 0xed
|
||||
NORMAL_MEM = 0xee
|
||||
FINAL_NUN = 0xef
|
||||
NORMAL_NUN = 0xf0
|
||||
FINAL_PE = 0xf3
|
||||
NORMAL_PE = 0xf4
|
||||
FINAL_TSADI = 0xf5
|
||||
NORMAL_TSADI = 0xf6
|
||||
|
||||
# Minimum Visual vs Logical final letter score difference.
|
||||
# If the difference is below this, don't rely solely on the final letter score
|
||||
# distance.
|
||||
MIN_FINAL_CHAR_DISTANCE = 5
|
||||
|
||||
# Minimum Visual vs Logical model score difference.
|
||||
# If the difference is below this, don't rely at all on the model score
|
||||
# distance.
|
||||
MIN_MODEL_DISTANCE = 0.01
|
||||
|
||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||
|
||||
def __init__(self):
|
||||
super(HebrewProber, self).__init__()
|
||||
self._final_char_logical_score = None
|
||||
self._final_char_visual_score = None
|
||||
self._prev = None
|
||||
self._before_prev = None
|
||||
self._logical_prober = None
|
||||
self._visual_prober = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._final_char_logical_score = 0
|
||||
self._final_char_visual_score = 0
|
||||
# The two last characters seen in the previous buffer,
|
||||
# mPrev and mBeforePrev are initialized to space in order to simulate
|
||||
# a word delimiter at the beginning of the data
|
||||
self._prev = ' '
|
||||
self._before_prev = ' '
|
||||
# These probers are owned by the group prober.
|
||||
|
||||
def set_model_probers(self, logicalProber, visualProber):
|
||||
self._logical_prober = logicalProber
|
||||
self._visual_prober = visualProber
|
||||
|
||||
def is_final(self, c):
|
||||
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
|
||||
self.FINAL_PE, self.FINAL_TSADI]
|
||||
|
||||
def is_non_final(self, c):
|
||||
# The normal Tsadi is not a good Non-Final letter due to words like
|
||||
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This
|
||||
# apostrophe is converted to a space in FilterWithoutEnglishLetters
|
||||
# causing the Non-Final tsadi to appear at an end of a word even
|
||||
# though this is not the case in the original text.
|
||||
# The letters Pe and Kaf rarely display a related behavior of not being
|
||||
# a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
|
||||
# for example legally end with a Non-Final Pe or Kaf. However, the
|
||||
# benefit of these letters as Non-Final letters outweighs the damage
|
||||
# since these words are quite rare.
|
||||
return c in [self.NORMAL_KAF, self.NORMAL_MEM,
|
||||
self.NORMAL_NUN, self.NORMAL_PE]
|
||||
|
||||
def feed(self, byte_str):
|
||||
# Final letter analysis for logical-visual decision.
|
||||
# Look for evidence that the received buffer is either logical Hebrew
|
||||
# or visual Hebrew.
|
||||
# The following cases are checked:
|
||||
# 1) A word longer than 1 letter, ending with a final letter. This is
|
||||
# an indication that the text is laid out "naturally" since the
|
||||
# final letter really appears at the end. +1 for logical score.
|
||||
# 2) A word longer than 1 letter, ending with a Non-Final letter. In
|
||||
# normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
|
||||
# should not end with the Non-Final form of that letter. Exceptions
|
||||
# to this rule are mentioned above in isNonFinal(). This is an
|
||||
# indication that the text is laid out backwards. +1 for visual
|
||||
# score
|
||||
# 3) A word longer than 1 letter, starting with a final letter. Final
|
||||
# letters should not appear at the beginning of a word. This is an
|
||||
# indication that the text is laid out backwards. +1 for visual
|
||||
# score.
|
||||
#
|
||||
# The visual score and logical score are accumulated throughout the
|
||||
# text and are finally checked against each other in GetCharSetName().
|
||||
# No checking for final letters in the middle of words is done since
|
||||
# that case is not an indication for either Logical or Visual text.
|
||||
#
|
||||
# We automatically filter out all 7-bit characters (replace them with
|
||||
# spaces) so the word boundary detection works properly. [MAP]
|
||||
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
# Both model probers say it's not them. No reason to continue.
|
||||
return ProbingState.NOT_ME
|
||||
|
||||
byte_str = self.filter_high_byte_only(byte_str)
|
||||
|
||||
for cur in byte_str:
|
||||
if cur == ' ':
|
||||
# We stand on a space - a word just ended
|
||||
if self._before_prev != ' ':
|
||||
# next-to-last char was not a space so self._prev is not a
|
||||
# 1 letter word
|
||||
if self.is_final(self._prev):
|
||||
# case (1) [-2:not space][-1:final letter][cur:space]
|
||||
self._final_char_logical_score += 1
|
||||
elif self.is_non_final(self._prev):
|
||||
# case (2) [-2:not space][-1:Non-Final letter][
|
||||
# cur:space]
|
||||
self._final_char_visual_score += 1
|
||||
else:
|
||||
# Not standing on a space
|
||||
if ((self._before_prev == ' ') and
|
||||
(self.is_final(self._prev)) and (cur != ' ')):
|
||||
# case (3) [-2:space][-1:final letter][cur:not space]
|
||||
self._final_char_visual_score += 1
|
||||
self._before_prev = self._prev
|
||||
self._prev = cur
|
||||
|
||||
# Forever detecting, till the end or until both model probers return
|
||||
# ProbingState.NOT_ME (handled above)
|
||||
return ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
# Make the decision: is it Logical or Visual?
|
||||
# If the final letter score distance is dominant enough, rely on it.
|
||||
finalsub = self._final_char_logical_score - self._final_char_visual_score
|
||||
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# It's not dominant enough, try to rely on the model scores instead.
|
||||
modelsub = (self._logical_prober.get_confidence()
|
||||
- self._visual_prober.get_confidence())
|
||||
if modelsub > self.MIN_MODEL_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if modelsub < -self.MIN_MODEL_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# Still no good, back to final letter distance, maybe it'll save the
|
||||
# day.
|
||||
if finalsub < 0.0:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# (finalsub > 0 - Logical) or (don't know what to do) default to
|
||||
# Logical.
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return 'Hebrew'
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
# Remain active as long as any of the model probers are active.
|
||||
if (self._logical_prober.state == ProbingState.NOT_ME) and \
|
||||
(self._visual_prober.state == ProbingState.NOT_ME):
|
||||
return ProbingState.NOT_ME
|
||||
return ProbingState.DETECTING
|
||||
@@ -1,325 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Sampling from about 20M text materials include literature and computer technology
|
||||
#
|
||||
# Japanese frequency table, applied to both S-JIS and EUC-JP
|
||||
# They are sorted in order.
|
||||
|
||||
# 128 --> 0.77094
|
||||
# 256 --> 0.85710
|
||||
# 512 --> 0.92635
|
||||
# 1024 --> 0.97130
|
||||
# 2048 --> 0.99431
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
|
||||
# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
|
||||
#
|
||||
# Typical Distribution Ratio, 25% of IDR
|
||||
|
||||
JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
JIS_TABLE_SIZE = 4368
|
||||
|
||||
JIS_CHAR_TO_FREQ_ORDER = (
|
||||
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
|
||||
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
|
||||
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
|
||||
2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64
|
||||
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80
|
||||
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96
|
||||
1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112
|
||||
5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128
|
||||
5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144
|
||||
5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160
|
||||
5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176
|
||||
5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192
|
||||
5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208
|
||||
1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224
|
||||
1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240
|
||||
1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256
|
||||
2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272
|
||||
3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288
|
||||
3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304
|
||||
4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320
|
||||
12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336
|
||||
1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352
|
||||
109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368
|
||||
5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384
|
||||
271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400
|
||||
32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416
|
||||
43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432
|
||||
280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448
|
||||
54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464
|
||||
5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480
|
||||
5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496
|
||||
5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512
|
||||
4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528
|
||||
5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544
|
||||
5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560
|
||||
5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576
|
||||
5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592
|
||||
5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608
|
||||
5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624
|
||||
5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640
|
||||
5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656
|
||||
5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672
|
||||
3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688
|
||||
5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704
|
||||
5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720
|
||||
5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736
|
||||
5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752
|
||||
5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768
|
||||
5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784
|
||||
5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800
|
||||
5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816
|
||||
5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832
|
||||
5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848
|
||||
5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864
|
||||
5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880
|
||||
5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896
|
||||
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912
|
||||
5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928
|
||||
5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944
|
||||
5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960
|
||||
5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976
|
||||
5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992
|
||||
5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008
|
||||
5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024
|
||||
5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040
|
||||
5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056
|
||||
5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072
|
||||
5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088
|
||||
5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104
|
||||
5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120
|
||||
5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136
|
||||
5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152
|
||||
5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168
|
||||
5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184
|
||||
5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200
|
||||
5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216
|
||||
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232
|
||||
5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248
|
||||
5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264
|
||||
5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280
|
||||
5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296
|
||||
6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312
|
||||
6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328
|
||||
6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344
|
||||
6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360
|
||||
6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376
|
||||
6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392
|
||||
6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408
|
||||
6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424
|
||||
4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440
|
||||
854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456
|
||||
665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472
|
||||
1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488
|
||||
1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504
|
||||
896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520
|
||||
3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536
|
||||
3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552
|
||||
804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568
|
||||
3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584
|
||||
3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600
|
||||
586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616
|
||||
2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632
|
||||
277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648
|
||||
3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664
|
||||
1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680
|
||||
380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696
|
||||
1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712
|
||||
850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728
|
||||
2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744
|
||||
2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760
|
||||
2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776
|
||||
2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792
|
||||
1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808
|
||||
1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824
|
||||
1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840
|
||||
1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856
|
||||
2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872
|
||||
1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888
|
||||
2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904
|
||||
1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920
|
||||
1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936
|
||||
1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952
|
||||
1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968
|
||||
1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984
|
||||
1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000
|
||||
606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016
|
||||
684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032
|
||||
1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048
|
||||
2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064
|
||||
2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080
|
||||
2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096
|
||||
3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112
|
||||
3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128
|
||||
884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144
|
||||
3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160
|
||||
1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176
|
||||
861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192
|
||||
2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208
|
||||
1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224
|
||||
576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240
|
||||
3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256
|
||||
4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272
|
||||
2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288
|
||||
1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304
|
||||
2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320
|
||||
1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336
|
||||
385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352
|
||||
178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368
|
||||
1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384
|
||||
2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400
|
||||
2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416
|
||||
2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432
|
||||
3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448
|
||||
1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464
|
||||
2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480
|
||||
359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496
|
||||
837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512
|
||||
855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528
|
||||
1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544
|
||||
2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560
|
||||
633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576
|
||||
1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592
|
||||
1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608
|
||||
353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624
|
||||
1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640
|
||||
1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656
|
||||
1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672
|
||||
764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688
|
||||
2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704
|
||||
278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720
|
||||
2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736
|
||||
3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752
|
||||
2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768
|
||||
1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784
|
||||
6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800
|
||||
1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816
|
||||
2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832
|
||||
1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848
|
||||
470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864
|
||||
72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880
|
||||
3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896
|
||||
3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912
|
||||
1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928
|
||||
1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944
|
||||
1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960
|
||||
1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976
|
||||
123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992
|
||||
913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008
|
||||
2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024
|
||||
900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040
|
||||
3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056
|
||||
2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072
|
||||
423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088
|
||||
1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104
|
||||
2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120
|
||||
220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136
|
||||
1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152
|
||||
745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168
|
||||
4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184
|
||||
2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200
|
||||
1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216
|
||||
666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232
|
||||
1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248
|
||||
2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264
|
||||
376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280
|
||||
6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296
|
||||
1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312
|
||||
1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328
|
||||
2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344
|
||||
3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360
|
||||
914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376
|
||||
3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392
|
||||
1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408
|
||||
674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424
|
||||
1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440
|
||||
199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456
|
||||
3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472
|
||||
370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488
|
||||
2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504
|
||||
414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520
|
||||
4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536
|
||||
2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552
|
||||
1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568
|
||||
1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584
|
||||
1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600
|
||||
166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616
|
||||
1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632
|
||||
3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648
|
||||
1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664
|
||||
3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680
|
||||
264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696
|
||||
543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712
|
||||
983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728
|
||||
2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744
|
||||
1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760
|
||||
867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776
|
||||
1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792
|
||||
894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808
|
||||
1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824
|
||||
530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840
|
||||
839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856
|
||||
480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872
|
||||
1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888
|
||||
1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904
|
||||
2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920
|
||||
4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936
|
||||
227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952
|
||||
1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968
|
||||
328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984
|
||||
1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000
|
||||
3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016
|
||||
1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032
|
||||
2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048
|
||||
2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064
|
||||
1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080
|
||||
1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096
|
||||
2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112
|
||||
455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128
|
||||
2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144
|
||||
1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160
|
||||
1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176
|
||||
1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192
|
||||
1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208
|
||||
3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224
|
||||
2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240
|
||||
2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256
|
||||
575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272
|
||||
3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288
|
||||
3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304
|
||||
1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320
|
||||
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336
|
||||
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
|
||||
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
|
||||
)
|
||||
|
||||
|
||||
@@ -1,233 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
||||
jp2CharContext = (
|
||||
(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
|
||||
(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
|
||||
(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
|
||||
(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
|
||||
(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
|
||||
(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
|
||||
(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
|
||||
(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
|
||||
(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
|
||||
(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
|
||||
(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
|
||||
(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
|
||||
(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
|
||||
(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
|
||||
(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
|
||||
(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
|
||||
(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
|
||||
(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
|
||||
(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
|
||||
(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
|
||||
(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
|
||||
(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
|
||||
(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
|
||||
(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
|
||||
(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
|
||||
(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
|
||||
(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
|
||||
(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
|
||||
(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
|
||||
(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
|
||||
(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
|
||||
(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
|
||||
(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
|
||||
(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
|
||||
(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
|
||||
(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
|
||||
(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
|
||||
(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
|
||||
(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
|
||||
(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
|
||||
(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
|
||||
(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
|
||||
(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
|
||||
(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
|
||||
(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
|
||||
(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
|
||||
(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
|
||||
(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
|
||||
(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
|
||||
(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
|
||||
(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
|
||||
(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
|
||||
(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
|
||||
(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
|
||||
(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
|
||||
(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
|
||||
(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
|
||||
(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
|
||||
(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
|
||||
(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
|
||||
(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
|
||||
(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
|
||||
(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
|
||||
(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
|
||||
(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
|
||||
(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
|
||||
(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
|
||||
(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
|
||||
(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
|
||||
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
|
||||
)
|
||||
|
||||
class JapaneseContextAnalysis(object):
|
||||
NUM_OF_CATEGORY = 6
|
||||
DONT_KNOW = -1
|
||||
ENOUGH_REL_THRESHOLD = 100
|
||||
MAX_REL_THRESHOLD = 1000
|
||||
MINIMUM_DATA_THRESHOLD = 4
|
||||
|
||||
def __init__(self):
|
||||
self._total_rel = None
|
||||
self._rel_sample = None
|
||||
self._need_to_skip_char_num = None
|
||||
self._last_char_order = None
|
||||
self._done = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._total_rel = 0 # total sequence received
|
||||
# category counters, each integer counts sequence in its category
|
||||
self._rel_sample = [0] * self.NUM_OF_CATEGORY
|
||||
# if last byte in current buffer is not the last byte of a character,
|
||||
# we need to know how many bytes to skip in next buffer
|
||||
self._need_to_skip_char_num = 0
|
||||
self._last_char_order = -1 # The order of previous char
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._done = False
|
||||
|
||||
def feed(self, byte_str, num_bytes):
|
||||
if self._done:
|
||||
return
|
||||
|
||||
# The buffer we got is byte oriented, and a character may span in more than one
|
||||
# buffers. In case the last one or two byte in last buffer is not
|
||||
# complete, we record how many byte needed to complete that character
|
||||
# and skip these bytes here. We can choose to record those bytes as
|
||||
# well and analyse the character once it is complete, but since a
|
||||
# character will not make much difference, by simply skipping
|
||||
# this character will simply our logic and improve performance.
|
||||
i = self._need_to_skip_char_num
|
||||
while i < num_bytes:
|
||||
order, char_len = self.get_order(byte_str[i:i + 2])
|
||||
i += char_len
|
||||
if i > num_bytes:
|
||||
self._need_to_skip_char_num = i - num_bytes
|
||||
self._last_char_order = -1
|
||||
else:
|
||||
if (order != -1) and (self._last_char_order != -1):
|
||||
self._total_rel += 1
|
||||
if self._total_rel > self.MAX_REL_THRESHOLD:
|
||||
self._done = True
|
||||
break
|
||||
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
|
||||
self._last_char_order = order
|
||||
|
||||
def got_enough_data(self):
|
||||
return self._total_rel > self.ENOUGH_REL_THRESHOLD
|
||||
|
||||
def get_confidence(self):
|
||||
# This is just one way to calculate confidence. It works well for me.
|
||||
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
|
||||
return (self._total_rel - self._rel_sample[0]) / self._total_rel
|
||||
else:
|
||||
return self.DONT_KNOW
|
||||
|
||||
def get_order(self, byte_str):
|
||||
return -1, 1
|
||||
|
||||
class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||
def __init__(self):
|
||||
super(SJISContextAnalysis, self).__init__()
|
||||
self._charset_name = "SHIFT_JIS"
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._charset_name
|
||||
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC):
|
||||
char_len = 2
|
||||
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
|
||||
self._charset_name = "CP932"
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
|
||||
return second_char - 0x9F, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
|
||||
char_len = 2
|
||||
elif first_char == 0x8F:
|
||||
char_len = 3
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
|
||||
return second_char - 0xA1, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
|
||||
@@ -1,228 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
# this table is modified base on win1251BulgarianCharToOrderMap, so
|
||||
# only number <64 is sure valid
|
||||
|
||||
Latin5_BulgarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
|
||||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
|
||||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
|
||||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
|
||||
194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80
|
||||
210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90
|
||||
81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0
|
||||
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0
|
||||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0
|
||||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0
|
||||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0
|
||||
62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0
|
||||
)
|
||||
|
||||
win1251BulgarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
|
||||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
|
||||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
|
||||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
|
||||
206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80
|
||||
221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90
|
||||
88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0
|
||||
73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0
|
||||
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0
|
||||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0
|
||||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0
|
||||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 96.9392%
|
||||
# first 1024 sequences:3.0618%
|
||||
# rest sequences: 0.2992%
|
||||
# negative sequences: 0.0020%
|
||||
BulgarianLangModel = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
|
||||
3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,
|
||||
0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,
|
||||
0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,
|
||||
0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,
|
||||
2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,
|
||||
3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,
|
||||
1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,
|
||||
3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,
|
||||
1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,
|
||||
2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,
|
||||
2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,
|
||||
3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,
|
||||
1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,
|
||||
2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,
|
||||
2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
|
||||
3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,
|
||||
1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,
|
||||
2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,
|
||||
2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,
|
||||
2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,
|
||||
1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,
|
||||
2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,
|
||||
1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,
|
||||
3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,
|
||||
1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,
|
||||
3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,
|
||||
1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,
|
||||
2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,
|
||||
1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,
|
||||
2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,
|
||||
1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,
|
||||
2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,
|
||||
1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
|
||||
1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,
|
||||
1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,
|
||||
2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,
|
||||
1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
|
||||
2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,
|
||||
1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,
|
||||
0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,
|
||||
1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,
|
||||
1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,
|
||||
0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,
|
||||
0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,
|
||||
1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,
|
||||
1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
|
||||
1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
)
|
||||
|
||||
Latin5BulgarianModel = {
|
||||
'char_to_order_map': Latin5_BulgarianCharToOrderMap,
|
||||
'precedence_matrix': BulgarianLangModel,
|
||||
'typical_positive_ratio': 0.969392,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-5",
|
||||
'language': 'Bulgairan',
|
||||
}
|
||||
|
||||
Win1251BulgarianModel = {
|
||||
'char_to_order_map': win1251BulgarianCharToOrderMap,
|
||||
'precedence_matrix': BulgarianLangModel,
|
||||
'typical_positive_ratio': 0.969392,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1251",
|
||||
'language': 'Bulgarian',
|
||||
}
|
||||
@@ -1,333 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# KOI8-R language model
|
||||
# Character Mapping Table:
|
||||
KOI8R_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90
|
||||
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0
|
||||
238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0
|
||||
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0
|
||||
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0
|
||||
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0
|
||||
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
|
||||
)
|
||||
|
||||
win1251_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
)
|
||||
|
||||
latin5_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
||||
)
|
||||
|
||||
macCyrillic_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
|
||||
)
|
||||
|
||||
IBM855_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
|
||||
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
|
||||
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
|
||||
220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
|
||||
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
|
||||
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
|
||||
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
|
||||
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
|
||||
)
|
||||
|
||||
IBM866_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 97.6601%
|
||||
# first 1024 sequences: 2.3389%
|
||||
# rest sequences: 0.1237%
|
||||
# negative sequences: 0.0009%
|
||||
RussianLangModel = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
|
||||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
|
||||
0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
|
||||
0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
|
||||
1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
|
||||
1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
|
||||
2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
|
||||
1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
|
||||
3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
|
||||
1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
|
||||
2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
|
||||
1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
|
||||
1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
|
||||
1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
|
||||
2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
|
||||
1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
|
||||
3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
|
||||
1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
|
||||
2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
|
||||
1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
|
||||
2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
|
||||
1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
|
||||
1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
|
||||
1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
|
||||
3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
|
||||
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
|
||||
3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
|
||||
1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
|
||||
1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
|
||||
0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
|
||||
1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
|
||||
1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
|
||||
0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
|
||||
1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
|
||||
2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
|
||||
1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
|
||||
1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
|
||||
2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
|
||||
1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
|
||||
0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
|
||||
2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
|
||||
1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
|
||||
1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
|
||||
0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
|
||||
0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
|
||||
0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
|
||||
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
|
||||
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
|
||||
0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
|
||||
0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
||||
)
|
||||
|
||||
Koi8rModel = {
|
||||
'char_to_order_map': KOI8R_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "KOI8-R",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Win1251CyrillicModel = {
|
||||
'char_to_order_map': win1251_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1251",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Latin5CyrillicModel = {
|
||||
'char_to_order_map': latin5_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-5",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
MacCyrillicModel = {
|
||||
'char_to_order_map': macCyrillic_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "MacCyrillic",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Ibm866Model = {
|
||||
'char_to_order_map': IBM866_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "IBM866",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Ibm855Model = {
|
||||
'char_to_order_map': IBM855_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "IBM855",
|
||||
'language': 'Russian',
|
||||
}
|
||||
@@ -1,225 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin7_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
|
||||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
|
||||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
|
||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
|
||||
253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
|
||||
253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0
|
||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
|
||||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
|
||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
|
||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
||||
)
|
||||
|
||||
win1253_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
|
||||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
|
||||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
|
||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
|
||||
253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
|
||||
253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0
|
||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
|
||||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
|
||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
|
||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 98.2851%
|
||||
# first 1024 sequences:1.7001%
|
||||
# rest sequences: 0.0359%
|
||||
# negative sequences: 0.0148%
|
||||
GreekLangModel = (
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
|
||||
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
|
||||
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
|
||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
|
||||
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
|
||||
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
|
||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
|
||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
|
||||
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
|
||||
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
|
||||
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
|
||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
|
||||
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
|
||||
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
|
||||
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
|
||||
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
|
||||
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
|
||||
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
|
||||
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
|
||||
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
|
||||
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
|
||||
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
|
||||
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
|
||||
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
|
||||
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
|
||||
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
|
||||
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
|
||||
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
|
||||
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
|
||||
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
|
||||
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
|
||||
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
|
||||
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
|
||||
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
|
||||
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
|
||||
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
|
||||
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin7GreekModel = {
|
||||
'char_to_order_map': Latin7_char_to_order_map,
|
||||
'precedence_matrix': GreekLangModel,
|
||||
'typical_positive_ratio': 0.982851,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-7",
|
||||
'language': 'Greek',
|
||||
}
|
||||
|
||||
Win1253GreekModel = {
|
||||
'char_to_order_map': win1253_char_to_order_map,
|
||||
'precedence_matrix': GreekLangModel,
|
||||
'typical_positive_ratio': 0.982851,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1253",
|
||||
'language': 'Greek',
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Simon Montagu
|
||||
# Portions created by the Initial Developer are Copyright (C) 2005
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Shoshannah Forbes - original C code (?)
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Windows-1255 language model
|
||||
# Character Mapping Table:
|
||||
WIN1255_CHAR_TO_ORDER_MAP = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40
|
||||
78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50
|
||||
253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60
|
||||
66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70
|
||||
124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
|
||||
215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
|
||||
34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
|
||||
106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
|
||||
30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
|
||||
238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
|
||||
9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
|
||||
12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 98.4004%
|
||||
# first 1024 sequences: 1.5981%
|
||||
# rest sequences: 0.087%
|
||||
# negative sequences: 0.0015%
|
||||
HEBREW_LANG_MODEL = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
|
||||
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
|
||||
1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
|
||||
1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
|
||||
1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
|
||||
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
|
||||
1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
|
||||
0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
|
||||
0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
|
||||
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
|
||||
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
|
||||
0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
|
||||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
|
||||
0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
|
||||
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
|
||||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
|
||||
0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
|
||||
0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
|
||||
0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
|
||||
0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
|
||||
0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
|
||||
0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
|
||||
0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
|
||||
1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
|
||||
0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
|
||||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
|
||||
0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
|
||||
0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
|
||||
0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
|
||||
2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
|
||||
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
|
||||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
|
||||
0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
|
||||
1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
|
||||
0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
|
||||
2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
|
||||
1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
|
||||
2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
|
||||
2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
|
||||
0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
|
||||
0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
|
||||
)
|
||||
|
||||
Win1255HebrewModel = {
|
||||
'char_to_order_map': WIN1255_CHAR_TO_ORDER_MAP,
|
||||
'precedence_matrix': HEBREW_LANG_MODEL,
|
||||
'typical_positive_ratio': 0.984004,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1255",
|
||||
'language': 'Hebrew',
|
||||
}
|
||||
@@ -1,225 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin2_HungarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
|
||||
46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
|
||||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
|
||||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
|
||||
159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
|
||||
175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
|
||||
191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
|
||||
79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
|
||||
221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
|
||||
232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
|
||||
82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
|
||||
245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
|
||||
)
|
||||
|
||||
win1250HungarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
|
||||
46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
|
||||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
|
||||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
|
||||
161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
|
||||
177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
|
||||
191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
|
||||
81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
|
||||
221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
|
||||
232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
|
||||
84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
|
||||
245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 94.7368%
|
||||
# first 1024 sequences:5.2623%
|
||||
# rest sequences: 0.8894%
|
||||
# negative sequences: 0.0009%
|
||||
HungarianLangModel = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
|
||||
3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
|
||||
3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
|
||||
0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
|
||||
0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
|
||||
1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
|
||||
1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
|
||||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
|
||||
3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
|
||||
2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
|
||||
2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
|
||||
2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
|
||||
2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
|
||||
2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
|
||||
3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
|
||||
2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
|
||||
2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
|
||||
2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
|
||||
1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
|
||||
1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
|
||||
3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
|
||||
1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
|
||||
1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
|
||||
2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
|
||||
2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
|
||||
2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
|
||||
3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
|
||||
2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
|
||||
1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
|
||||
1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
|
||||
2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
|
||||
1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
|
||||
1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
|
||||
2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
|
||||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
|
||||
1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
|
||||
2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
|
||||
2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
|
||||
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
|
||||
1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
|
||||
1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
|
||||
1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
|
||||
0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
|
||||
2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
|
||||
2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
|
||||
1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
|
||||
2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
|
||||
1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
|
||||
1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
|
||||
2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
|
||||
2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
|
||||
2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
|
||||
1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
|
||||
2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
|
||||
0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin2HungarianModel = {
|
||||
'char_to_order_map': Latin2_HungarianCharToOrderMap,
|
||||
'precedence_matrix': HungarianLangModel,
|
||||
'typical_positive_ratio': 0.947368,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "ISO-8859-2",
|
||||
'language': 'Hungarian',
|
||||
}
|
||||
|
||||
Win1250HungarianModel = {
|
||||
'char_to_order_map': win1250HungarianCharToOrderMap,
|
||||
'precedence_matrix': HungarianLangModel,
|
||||
'typical_positive_ratio': 0.947368,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "windows-1250",
|
||||
'language': 'Hungarian',
|
||||
}
|
||||
@@ -1,199 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# The following result for thai was collected from a limited sample (1M).
|
||||
|
||||
# Character Mapping Table:
|
||||
TIS620CharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40
|
||||
188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50
|
||||
253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60
|
||||
96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70
|
||||
209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
|
||||
223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
|
||||
236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
|
||||
49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
|
||||
45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
|
||||
22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
|
||||
11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
|
||||
68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 92.6386%
|
||||
# first 1024 sequences:7.3177%
|
||||
# rest sequences: 1.0230%
|
||||
# negative sequences: 0.0436%
|
||||
ThaiLangModel = (
|
||||
0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
|
||||
0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
|
||||
3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
|
||||
0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
||||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
|
||||
3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
||||
3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
|
||||
3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
|
||||
3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
|
||||
3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
|
||||
2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
|
||||
3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
|
||||
0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
|
||||
0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
|
||||
1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
|
||||
3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
|
||||
1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
|
||||
0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
|
||||
0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
|
||||
2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
|
||||
0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
|
||||
3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
||||
3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
|
||||
2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||
3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
|
||||
2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
|
||||
3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
|
||||
3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
|
||||
3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
|
||||
3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
|
||||
1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
|
||||
0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
|
||||
0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
|
||||
3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
|
||||
3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
|
||||
1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
|
||||
3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
|
||||
3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
|
||||
0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
|
||||
0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
|
||||
1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
|
||||
1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
|
||||
3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
|
||||
0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
|
||||
3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
|
||||
0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
|
||||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
|
||||
0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
|
||||
0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
|
||||
0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
|
||||
0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
|
||||
0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
|
||||
0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
|
||||
2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
|
||||
0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
|
||||
3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
|
||||
1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
|
||||
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
TIS620ThaiModel = {
|
||||
'char_to_order_map': TIS620CharToOrderMap,
|
||||
'precedence_matrix': ThaiLangModel,
|
||||
'typical_positive_ratio': 0.926386,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "TIS-620",
|
||||
'language': 'Thai',
|
||||
}
|
||||
@@ -1,193 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Özgür Baskın - Turkish Language Model
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin5_TurkishCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42,
|
||||
48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255,
|
||||
255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15,
|
||||
26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255,
|
||||
180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165,
|
||||
164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106,
|
||||
150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136,
|
||||
94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125,
|
||||
124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119,
|
||||
68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86,
|
||||
89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96,
|
||||
30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107,
|
||||
)
|
||||
|
||||
TurkishLangModel = (
|
||||
3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3,
|
||||
3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3,
|
||||
3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1,
|
||||
3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3,
|
||||
3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2,
|
||||
2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,
|
||||
3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,
|
||||
1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2,
|
||||
3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1,
|
||||
3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2,
|
||||
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1,
|
||||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2,
|
||||
2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,
|
||||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2,
|
||||
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||
3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3,
|
||||
0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,
|
||||
3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1,
|
||||
0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,
|
||||
3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1,
|
||||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3,
|
||||
2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3,
|
||||
2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||
3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0,
|
||||
0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0,
|
||||
1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2,
|
||||
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1,
|
||||
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,
|
||||
3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0,
|
||||
0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0,
|
||||
3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1,
|
||||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1,
|
||||
1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,
|
||||
1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3,
|
||||
2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1,
|
||||
2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,
|
||||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||
3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0,
|
||||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1,
|
||||
1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2,
|
||||
0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1,
|
||||
3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1,
|
||||
0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,
|
||||
3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,
|
||||
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2,
|
||||
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1,
|
||||
0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0,
|
||||
0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0,
|
||||
0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0,
|
||||
3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0,
|
||||
0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0,
|
||||
0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0,
|
||||
3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0,
|
||||
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1,
|
||||
3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,
|
||||
0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1,
|
||||
0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,
|
||||
3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||
0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0,
|
||||
0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0,
|
||||
3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0,
|
||||
0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0,
|
||||
0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0,
|
||||
0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
|
||||
3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0,
|
||||
0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||
0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0,
|
||||
0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0,
|
||||
0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0,
|
||||
1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0,
|
||||
0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1,
|
||||
0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0,
|
||||
3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0,
|
||||
0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,
|
||||
2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0,
|
||||
0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,
|
||||
0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin5TurkishModel = {
|
||||
'char_to_order_map': Latin5_TurkishCharToOrderMap,
|
||||
'precedence_matrix': TurkishLangModel,
|
||||
'typical_positive_ratio': 0.970290,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "ISO-8859-9",
|
||||
'language': 'Turkish',
|
||||
}
|
||||
@@ -1,145 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
FREQ_CAT_NUM = 4
|
||||
|
||||
UDF = 0 # undefined
|
||||
OTH = 1 # other
|
||||
ASC = 2 # ascii capital letter
|
||||
ASS = 3 # ascii small letter
|
||||
ACV = 4 # accent capital vowel
|
||||
ACO = 5 # accent capital other
|
||||
ASV = 6 # accent small vowel
|
||||
ASO = 7 # accent small other
|
||||
CLASS_NUM = 8 # total classes
|
||||
|
||||
Latin1_CharToClass = (
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
|
||||
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
|
||||
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
|
||||
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
|
||||
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
|
||||
OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
|
||||
OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
|
||||
UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
|
||||
OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
|
||||
ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
|
||||
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
|
||||
ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
|
||||
ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
|
||||
ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
|
||||
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
|
||||
)
|
||||
|
||||
# 0 : illegal
|
||||
# 1 : very unlikely
|
||||
# 2 : normal
|
||||
# 3 : very likely
|
||||
Latin1ClassModel = (
|
||||
# UDF OTH ASC ASS ACV ACO ASV ASO
|
||||
0, 0, 0, 0, 0, 0, 0, 0, # UDF
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # OTH
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # ASC
|
||||
0, 3, 3, 3, 1, 1, 3, 3, # ASS
|
||||
0, 3, 3, 3, 1, 2, 1, 2, # ACV
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # ACO
|
||||
0, 3, 1, 3, 1, 1, 1, 3, # ASV
|
||||
0, 3, 1, 3, 1, 1, 3, 3, # ASO
|
||||
)
|
||||
|
||||
|
||||
class Latin1Prober(CharSetProber):
|
||||
def __init__(self):
|
||||
super(Latin1Prober, self).__init__()
|
||||
self._last_char_class = None
|
||||
self._freq_counter = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._last_char_class = OTH
|
||||
self._freq_counter = [0] * FREQ_CAT_NUM
|
||||
CharSetProber.reset(self)
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "ISO-8859-1"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
byte_str = self.filter_with_english_letters(byte_str)
|
||||
for c in byte_str:
|
||||
char_class = Latin1_CharToClass[c]
|
||||
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
|
||||
+ char_class]
|
||||
if freq == 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
self._freq_counter[freq] += 1
|
||||
self._last_char_class = char_class
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
|
||||
total = sum(self._freq_counter)
|
||||
if total < 0.01:
|
||||
confidence = 0.0
|
||||
else:
|
||||
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
|
||||
/ total)
|
||||
if confidence < 0.0:
|
||||
confidence = 0.0
|
||||
# lower the confidence of latin1 so that other more accurate
|
||||
# detector can take priority.
|
||||
confidence = confidence * 0.73
|
||||
return confidence
|
||||
@@ -1,91 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Proofpoint, Inc.
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class MultiByteCharSetProber(CharSetProber):
|
||||
"""
|
||||
MultiByteCharSetProber
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.distribution_analyzer = None
|
||||
self.coding_sm = None
|
||||
self._last_char = [0, 0]
|
||||
|
||||
def reset(self):
|
||||
super(MultiByteCharSetProber, self).reset()
|
||||
if self.coding_sm:
|
||||
self.coding_sm.reset()
|
||||
if self.distribution_analyzer:
|
||||
self.distribution_analyzer.reset()
|
||||
self._last_char = [0, 0]
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.distribution_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
return self.distribution_analyzer.get_confidence()
|
||||
@@ -1,54 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Proofpoint, Inc.
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .utf8prober import UTF8Prober
|
||||
from .sjisprober import SJISProber
|
||||
from .eucjpprober import EUCJPProber
|
||||
from .gb2312prober import GB2312Prober
|
||||
from .euckrprober import EUCKRProber
|
||||
from .cp949prober import CP949Prober
|
||||
from .big5prober import Big5Prober
|
||||
from .euctwprober import EUCTWProber
|
||||
|
||||
|
||||
class MBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self.probers = [
|
||||
UTF8Prober(),
|
||||
SJISProber(),
|
||||
EUCJPProber(),
|
||||
GB2312Prober(),
|
||||
EUCKRProber(),
|
||||
CP949Prober(),
|
||||
Big5Prober(),
|
||||
EUCTWProber()
|
||||
]
|
||||
self.reset()
|
||||
@@ -1,572 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
# BIG5
|
||||
|
||||
BIG5_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
4,4,4,4,4,4,4,4, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
4,3,3,3,3,3,3,3, # a0 - a7
|
||||
3,3,3,3,3,3,3,3, # a8 - af
|
||||
3,3,3,3,3,3,3,3, # b0 - b7
|
||||
3,3,3,3,3,3,3,3, # b8 - bf
|
||||
3,3,3,3,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
BIG5_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
|
||||
)
|
||||
|
||||
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
|
||||
|
||||
BIG5_SM_MODEL = {'class_table': BIG5_CLS,
|
||||
'class_factor': 5,
|
||||
'state_table': BIG5_ST,
|
||||
'char_len_table': BIG5_CHAR_LEN_TABLE,
|
||||
'name': 'Big5'}
|
||||
|
||||
# CP949
|
||||
|
||||
CP949_CLS = (
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
|
||||
1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
|
||||
4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
|
||||
1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
|
||||
5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
|
||||
0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
|
||||
6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
|
||||
6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
|
||||
7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
|
||||
7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
|
||||
)
|
||||
|
||||
CP949_ST = (
|
||||
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
|
||||
)
|
||||
|
||||
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
||||
|
||||
CP949_SM_MODEL = {'class_table': CP949_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': CP949_ST,
|
||||
'char_len_table': CP949_CHAR_LEN_TABLE,
|
||||
'name': 'CP949'}
|
||||
|
||||
# EUC-JP
|
||||
|
||||
EUCJP_CLS = (
|
||||
4,4,4,4,4,4,4,4, # 00 - 07
|
||||
4,4,4,4,4,4,5,5, # 08 - 0f
|
||||
4,4,4,4,4,4,4,4, # 10 - 17
|
||||
4,4,4,5,4,4,4,4, # 18 - 1f
|
||||
4,4,4,4,4,4,4,4, # 20 - 27
|
||||
4,4,4,4,4,4,4,4, # 28 - 2f
|
||||
4,4,4,4,4,4,4,4, # 30 - 37
|
||||
4,4,4,4,4,4,4,4, # 38 - 3f
|
||||
4,4,4,4,4,4,4,4, # 40 - 47
|
||||
4,4,4,4,4,4,4,4, # 48 - 4f
|
||||
4,4,4,4,4,4,4,4, # 50 - 57
|
||||
4,4,4,4,4,4,4,4, # 58 - 5f
|
||||
4,4,4,4,4,4,4,4, # 60 - 67
|
||||
4,4,4,4,4,4,4,4, # 68 - 6f
|
||||
4,4,4,4,4,4,4,4, # 70 - 77
|
||||
4,4,4,4,4,4,4,4, # 78 - 7f
|
||||
5,5,5,5,5,5,5,5, # 80 - 87
|
||||
5,5,5,5,5,5,1,3, # 88 - 8f
|
||||
5,5,5,5,5,5,5,5, # 90 - 97
|
||||
5,5,5,5,5,5,5,5, # 98 - 9f
|
||||
5,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,0,5 # f8 - ff
|
||||
)
|
||||
|
||||
EUCJP_ST = (
|
||||
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
|
||||
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
|
||||
)
|
||||
|
||||
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
|
||||
|
||||
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': EUCJP_ST,
|
||||
'char_len_table': EUCJP_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-JP'}
|
||||
|
||||
# EUC-KR
|
||||
|
||||
EUCKR_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,3,3,3, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,3,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCKR_ST = (
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
|
||||
)
|
||||
|
||||
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
|
||||
|
||||
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
|
||||
'class_factor': 4,
|
||||
'state_table': EUCKR_ST,
|
||||
'char_len_table': EUCKR_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-KR'}
|
||||
|
||||
# EUC-TW
|
||||
|
||||
EUCTW_CLS = (
|
||||
2,2,2,2,2,2,2,2, # 00 - 07
|
||||
2,2,2,2,2,2,0,0, # 08 - 0f
|
||||
2,2,2,2,2,2,2,2, # 10 - 17
|
||||
2,2,2,0,2,2,2,2, # 18 - 1f
|
||||
2,2,2,2,2,2,2,2, # 20 - 27
|
||||
2,2,2,2,2,2,2,2, # 28 - 2f
|
||||
2,2,2,2,2,2,2,2, # 30 - 37
|
||||
2,2,2,2,2,2,2,2, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,2, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,6,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,3,4,4,4,4,4,4, # a0 - a7
|
||||
5,5,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,3,1,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCTW_ST = (
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
|
||||
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
|
||||
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
|
||||
|
||||
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': EUCTW_ST,
|
||||
'char_len_table': EUCTW_CHAR_LEN_TABLE,
|
||||
'name': 'x-euc-tw'}
|
||||
|
||||
# GB2312
|
||||
|
||||
GB2312_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
3,3,3,3,3,3,3,3, # 30 - 37
|
||||
3,3,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,4, # 78 - 7f
|
||||
5,6,6,6,6,6,6,6, # 80 - 87
|
||||
6,6,6,6,6,6,6,6, # 88 - 8f
|
||||
6,6,6,6,6,6,6,6, # 90 - 97
|
||||
6,6,6,6,6,6,6,6, # 98 - 9f
|
||||
6,6,6,6,6,6,6,6, # a0 - a7
|
||||
6,6,6,6,6,6,6,6, # a8 - af
|
||||
6,6,6,6,6,6,6,6, # b0 - b7
|
||||
6,6,6,6,6,6,6,6, # b8 - bf
|
||||
6,6,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
6,6,6,6,6,6,6,6, # e0 - e7
|
||||
6,6,6,6,6,6,6,6, # e8 - ef
|
||||
6,6,6,6,6,6,6,6, # f0 - f7
|
||||
6,6,6,6,6,6,6,0 # f8 - ff
|
||||
)
|
||||
|
||||
GB2312_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
|
||||
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
# To be accurate, the length of class 6 can be either 2 or 4.
|
||||
# But it is not necessary to discriminate between the two since
|
||||
# it is used for frequency analysis only, and we are validating
|
||||
# each code range there as well. So it is safe to set it to be
|
||||
# 2 here.
|
||||
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
|
||||
|
||||
GB2312_SM_MODEL = {'class_table': GB2312_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': GB2312_ST,
|
||||
'char_len_table': GB2312_CHAR_LEN_TABLE,
|
||||
'name': 'GB2312'}
|
||||
|
||||
# Shift_JIS
|
||||
|
||||
SJIS_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
3,3,3,3,3,2,2,3, # 80 - 87
|
||||
3,3,3,3,3,3,3,3, # 88 - 8f
|
||||
3,3,3,3,3,3,3,3, # 90 - 97
|
||||
3,3,3,3,3,3,3,3, # 98 - 9f
|
||||
#0xa0 is illegal in sjis encoding, but some pages does
|
||||
#contain such byte. We need to be more error forgiven.
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,4,4,4, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,0,0,0) # f8 - ff
|
||||
|
||||
|
||||
SJIS_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
|
||||
)
|
||||
|
||||
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
|
||||
|
||||
SJIS_SM_MODEL = {'class_table': SJIS_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': SJIS_ST,
|
||||
'char_len_table': SJIS_CHAR_LEN_TABLE,
|
||||
'name': 'Shift_JIS'}
|
||||
|
||||
# UCS2-BE
|
||||
|
||||
UCS2BE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2BE_ST = (
|
||||
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
|
||||
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
|
||||
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
|
||||
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
|
||||
|
||||
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2BE_ST,
|
||||
'char_len_table': UCS2BE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16BE'}
|
||||
|
||||
# UCS2-LE
|
||||
|
||||
UCS2LE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2LE_ST = (
|
||||
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
|
||||
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
|
||||
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
|
||||
|
||||
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2LE_ST,
|
||||
'char_len_table': UCS2LE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16LE'}
|
||||
|
||||
# UTF-8
|
||||
|
||||
UTF8_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
2,2,2,2,3,3,3,3, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
5,5,5,5,5,5,5,5, # a0 - a7
|
||||
5,5,5,5,5,5,5,5, # a8 - af
|
||||
5,5,5,5,5,5,5,5, # b0 - b7
|
||||
5,5,5,5,5,5,5,5, # b8 - bf
|
||||
0,0,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
7,8,8,8,8,8,8,8, # e0 - e7
|
||||
8,8,8,8,8,9,8,8, # e8 - ef
|
||||
10,11,11,11,11,11,11,11, # f0 - f7
|
||||
12,13,13,13,14,15,0,0 # f8 - ff
|
||||
)
|
||||
|
||||
UTF8_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
|
||||
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
|
||||
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
|
||||
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
|
||||
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
|
||||
)
|
||||
|
||||
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
||||
|
||||
UTF8_SM_MODEL = {'class_table': UTF8_CLS,
|
||||
'class_factor': 16,
|
||||
'state_table': UTF8_ST,
|
||||
'char_len_table': UTF8_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-8'}
|
||||
@@ -1,132 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
|
||||
|
||||
|
||||
class SingleByteCharSetProber(CharSetProber):
|
||||
SAMPLE_SIZE = 64
|
||||
SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
|
||||
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
||||
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
||||
|
||||
def __init__(self, model, reversed=False, name_prober=None):
|
||||
super(SingleByteCharSetProber, self).__init__()
|
||||
self._model = model
|
||||
# TRUE if we need to reverse every pair in the model lookup
|
||||
self._reversed = reversed
|
||||
# Optional auxiliary prober for name decision
|
||||
self._name_prober = name_prober
|
||||
self._last_order = None
|
||||
self._seq_counters = None
|
||||
self._total_seqs = None
|
||||
self._total_char = None
|
||||
self._freq_char = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(SingleByteCharSetProber, self).reset()
|
||||
# char order of last character
|
||||
self._last_order = 255
|
||||
self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
|
||||
self._total_seqs = 0
|
||||
self._total_char = 0
|
||||
# characters that fall in our sampling range
|
||||
self._freq_char = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.charset_name
|
||||
else:
|
||||
return self._model['charset_name']
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.language
|
||||
else:
|
||||
return self._model.get('language')
|
||||
|
||||
def feed(self, byte_str):
|
||||
if not self._model['keep_english_letter']:
|
||||
byte_str = self.filter_international_words(byte_str)
|
||||
if not byte_str:
|
||||
return self.state
|
||||
char_to_order_map = self._model['char_to_order_map']
|
||||
for i, c in enumerate(byte_str):
|
||||
# XXX: Order is in range 1-64, so one would think we want 0-63 here,
|
||||
# but that leads to 27 more test failures than before.
|
||||
order = char_to_order_map[c]
|
||||
# XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
|
||||
# CharacterCategory.SYMBOL is actually 253, so we use CONTROL
|
||||
# to make it closer to the original intent. The only difference
|
||||
# is whether or not we count digits and control characters for
|
||||
# _total_char purposes.
|
||||
if order < CharacterCategory.CONTROL:
|
||||
self._total_char += 1
|
||||
if order < self.SAMPLE_SIZE:
|
||||
self._freq_char += 1
|
||||
if self._last_order < self.SAMPLE_SIZE:
|
||||
self._total_seqs += 1
|
||||
if not self._reversed:
|
||||
i = (self._last_order * self.SAMPLE_SIZE) + order
|
||||
model = self._model['precedence_matrix'][i]
|
||||
else: # reverse the order of the letters in the lookup
|
||||
i = (order * self.SAMPLE_SIZE) + self._last_order
|
||||
model = self._model['precedence_matrix'][i]
|
||||
self._seq_counters[model] += 1
|
||||
self._last_order = order
|
||||
|
||||
charset_name = self._model['charset_name']
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
|
||||
confidence = self.get_confidence()
|
||||
if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, we have a winner',
|
||||
charset_name, confidence)
|
||||
self._state = ProbingState.FOUND_IT
|
||||
elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, below negative '
|
||||
'shortcut threshhold %s', charset_name,
|
||||
confidence,
|
||||
self.NEGATIVE_SHORTCUT_THRESHOLD)
|
||||
self._state = ProbingState.NOT_ME
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
r = 0.01
|
||||
if self._total_seqs > 0:
|
||||
r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
|
||||
self._total_seqs / self._model['typical_positive_ratio'])
|
||||
r = r * self._freq_char / self._total_char
|
||||
if r >= 1.0:
|
||||
r = 0.99
|
||||
return r
|
||||
@@ -1,73 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .sbcharsetprober import SingleByteCharSetProber
|
||||
from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
|
||||
Latin5CyrillicModel, MacCyrillicModel,
|
||||
Ibm866Model, Ibm855Model)
|
||||
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
|
||||
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
|
||||
# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
|
||||
from .langthaimodel import TIS620ThaiModel
|
||||
from .langhebrewmodel import Win1255HebrewModel
|
||||
from .hebrewprober import HebrewProber
|
||||
from .langturkishmodel import Latin5TurkishModel
|
||||
|
||||
|
||||
class SBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self):
|
||||
super(SBCSGroupProber, self).__init__()
|
||||
self.probers = [
|
||||
SingleByteCharSetProber(Win1251CyrillicModel),
|
||||
SingleByteCharSetProber(Koi8rModel),
|
||||
SingleByteCharSetProber(Latin5CyrillicModel),
|
||||
SingleByteCharSetProber(MacCyrillicModel),
|
||||
SingleByteCharSetProber(Ibm866Model),
|
||||
SingleByteCharSetProber(Ibm855Model),
|
||||
SingleByteCharSetProber(Latin7GreekModel),
|
||||
SingleByteCharSetProber(Win1253GreekModel),
|
||||
SingleByteCharSetProber(Latin5BulgarianModel),
|
||||
SingleByteCharSetProber(Win1251BulgarianModel),
|
||||
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
|
||||
# after we retrain model.
|
||||
# SingleByteCharSetProber(Latin2HungarianModel),
|
||||
# SingleByteCharSetProber(Win1250HungarianModel),
|
||||
SingleByteCharSetProber(TIS620ThaiModel),
|
||||
SingleByteCharSetProber(Latin5TurkishModel),
|
||||
]
|
||||
hebrew_prober = HebrewProber()
|
||||
logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
|
||||
False, hebrew_prober)
|
||||
visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
|
||||
hebrew_prober)
|
||||
hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
|
||||
self.probers.extend([hebrew_prober, logical_hebrew_prober,
|
||||
visual_hebrew_prober])
|
||||
|
||||
self.reset()
|
||||
@@ -1,92 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import SJISDistributionAnalysis
|
||||
from .jpcntx import SJISContextAnalysis
|
||||
from .mbcssm import SJIS_SM_MODEL
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class SJISProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(SJISProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
|
||||
self.distribution_analyzer = SJISDistributionAnalysis()
|
||||
self.context_analyzer = SJISContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(SJISProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self.context_analyzer.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char[2 - char_len:],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
|
||||
- char_len], char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
@@ -1,278 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
"""
|
||||
Module containing the UniversalDetector detector class, which is the primary
|
||||
class a user of ``chardet`` should use.
|
||||
|
||||
:author: Mark Pilgrim (initial port to Python)
|
||||
:author: Shy Shalom (original C code)
|
||||
:author: Dan Blanchard (major refactoring for 3.0)
|
||||
:author: Ian Cordasco
|
||||
"""
|
||||
|
||||
|
||||
import codecs
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .enums import InputState, LanguageFilter, ProbingState
|
||||
from .escprober import EscCharSetProber
|
||||
from .latin1prober import Latin1Prober
|
||||
from .mbcsgroupprober import MBCSGroupProber
|
||||
from .sbcsgroupprober import SBCSGroupProber
|
||||
|
||||
|
||||
class UniversalDetector(object):
|
||||
"""
|
||||
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
|
||||
and coordinates all of the different charset probers.
|
||||
|
||||
To get a ``dict`` containing an encoding and its confidence, you can simply
|
||||
run:
|
||||
|
||||
.. code::
|
||||
|
||||
u = UniversalDetector()
|
||||
u.feed(some_bytes)
|
||||
u.close()
|
||||
detected = u.result
|
||||
|
||||
"""
|
||||
|
||||
MINIMUM_THRESHOLD = 0.20
|
||||
HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
|
||||
ESC_DETECTOR = re.compile(b'(\033|~{)')
|
||||
WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
|
||||
ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
|
||||
'iso-8859-2': 'Windows-1250',
|
||||
'iso-8859-5': 'Windows-1251',
|
||||
'iso-8859-6': 'Windows-1256',
|
||||
'iso-8859-7': 'Windows-1253',
|
||||
'iso-8859-8': 'Windows-1255',
|
||||
'iso-8859-9': 'Windows-1254',
|
||||
'iso-8859-13': 'Windows-1257'}
|
||||
|
||||
def __init__(self, lang_filter=LanguageFilter.ALL):
|
||||
self._esc_charset_prober = None
|
||||
self._charset_probers = []
|
||||
self.result = None
|
||||
self.done = None
|
||||
self._got_data = None
|
||||
self._input_state = None
|
||||
self._last_char = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._has_win_bytes = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset the UniversalDetector and all of its probers back to their
|
||||
initial states. This is called by ``__init__``, so you only need to
|
||||
call this directly in between analyses of different documents.
|
||||
"""
|
||||
self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
|
||||
self.done = False
|
||||
self._got_data = False
|
||||
self._has_win_bytes = False
|
||||
self._input_state = InputState.PURE_ASCII
|
||||
self._last_char = b''
|
||||
if self._esc_charset_prober:
|
||||
self._esc_charset_prober.reset()
|
||||
for prober in self._charset_probers:
|
||||
prober.reset()
|
||||
|
||||
def feed(self, byte_str):
|
||||
"""
|
||||
Takes a chunk of a document and feeds it through all of the relevant
|
||||
charset probers.
|
||||
|
||||
After calling ``feed``, you can check the value of the ``done``
|
||||
attribute to see if you need to continue feeding the
|
||||
``UniversalDetector`` more data, or if it has made a prediction
|
||||
(in the ``result`` attribute).
|
||||
|
||||
.. note::
|
||||
You should always call ``close`` when you're done feeding in your
|
||||
document if ``done`` is not already ``True``.
|
||||
"""
|
||||
if self.done:
|
||||
return
|
||||
|
||||
if not len(byte_str):
|
||||
return
|
||||
|
||||
if not isinstance(byte_str, bytearray):
|
||||
byte_str = bytearray(byte_str)
|
||||
|
||||
# First check for known BOMs, since these are guaranteed to be correct
|
||||
if not self._got_data:
|
||||
# If the data starts with BOM, we know it is UTF
|
||||
if byte_str.startswith(codecs.BOM_UTF8):
|
||||
# EF BB BF UTF-8 with BOM
|
||||
self.result = {'encoding': "UTF-8-SIG",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_UTF32_LE,
|
||||
codecs.BOM_UTF32_BE)):
|
||||
# FF FE 00 00 UTF-32, little-endian BOM
|
||||
# 00 00 FE FF UTF-32, big-endian BOM
|
||||
self.result = {'encoding': "UTF-32",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
|
||||
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
|
||||
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
|
||||
# FF FE UTF-16, little endian BOM
|
||||
# FE FF UTF-16, big endian BOM
|
||||
self.result = {'encoding': "UTF-16",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
self._got_data = True
|
||||
if self.result['encoding'] is not None:
|
||||
self.done = True
|
||||
return
|
||||
|
||||
# If none of those matched and we've only see ASCII so far, check
|
||||
# for high bytes and escape sequences
|
||||
if self._input_state == InputState.PURE_ASCII:
|
||||
if self.HIGH_BYTE_DETECTOR.search(byte_str):
|
||||
self._input_state = InputState.HIGH_BYTE
|
||||
elif self._input_state == InputState.PURE_ASCII and \
|
||||
self.ESC_DETECTOR.search(self._last_char + byte_str):
|
||||
self._input_state = InputState.ESC_ASCII
|
||||
|
||||
self._last_char = byte_str[-1:]
|
||||
|
||||
# If we've seen escape sequences, use the EscCharSetProber, which
|
||||
# uses a simple state machine to check for known escape sequences in
|
||||
# HZ and ISO-2022 encodings, since those are the only encodings that
|
||||
# use such sequences.
|
||||
if self._input_state == InputState.ESC_ASCII:
|
||||
if not self._esc_charset_prober:
|
||||
self._esc_charset_prober = EscCharSetProber(self.lang_filter)
|
||||
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding':
|
||||
self._esc_charset_prober.charset_name,
|
||||
'confidence':
|
||||
self._esc_charset_prober.get_confidence(),
|
||||
'language':
|
||||
self._esc_charset_prober.language}
|
||||
self.done = True
|
||||
# If we've seen high bytes (i.e., those with values greater than 127),
|
||||
# we need to do more complicated checks using all our multi-byte and
|
||||
# single-byte probers that are left. The single-byte probers
|
||||
# use character bigram distributions to determine the encoding, whereas
|
||||
# the multi-byte probers use a combination of character unigram and
|
||||
# bigram distributions.
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
if not self._charset_probers:
|
||||
self._charset_probers = [MBCSGroupProber(self.lang_filter)]
|
||||
# If we're checking non-CJK encodings, use single-byte prober
|
||||
if self.lang_filter & LanguageFilter.NON_CJK:
|
||||
self._charset_probers.append(SBCSGroupProber())
|
||||
self._charset_probers.append(Latin1Prober())
|
||||
for prober in self._charset_probers:
|
||||
if prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding': prober.charset_name,
|
||||
'confidence': prober.get_confidence(),
|
||||
'language': prober.language}
|
||||
self.done = True
|
||||
break
|
||||
if self.WIN_BYTE_DETECTOR.search(byte_str):
|
||||
self._has_win_bytes = True
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Stop analyzing the current document and come up with a final
|
||||
prediction.
|
||||
|
||||
:returns: The ``result`` attribute, a ``dict`` with the keys
|
||||
`encoding`, `confidence`, and `language`.
|
||||
"""
|
||||
# Don't bother with checks if we're already done
|
||||
if self.done:
|
||||
return self.result
|
||||
self.done = True
|
||||
|
||||
if not self._got_data:
|
||||
self.logger.debug('no data received!')
|
||||
|
||||
# Default to ASCII if it is all we've seen so far
|
||||
elif self._input_state == InputState.PURE_ASCII:
|
||||
self.result = {'encoding': 'ascii',
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
prober_confidence = None
|
||||
max_prober_confidence = 0.0
|
||||
max_prober = None
|
||||
for prober in self._charset_probers:
|
||||
if not prober:
|
||||
continue
|
||||
prober_confidence = prober.get_confidence()
|
||||
if prober_confidence > max_prober_confidence:
|
||||
max_prober_confidence = prober_confidence
|
||||
max_prober = prober
|
||||
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
|
||||
charset_name = max_prober.charset_name
|
||||
lower_charset_name = max_prober.charset_name.lower()
|
||||
confidence = max_prober.get_confidence()
|
||||
# Use Windows encoding name instead of ISO-8859 if we saw any
|
||||
# extra Windows-specific bytes
|
||||
if lower_charset_name.startswith('iso-8859'):
|
||||
if self._has_win_bytes:
|
||||
charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
|
||||
charset_name)
|
||||
self.result = {'encoding': charset_name,
|
||||
'confidence': confidence,
|
||||
'language': max_prober.language}
|
||||
|
||||
# Log all prober confidences if none met MINIMUM_THRESHOLD
|
||||
if self.logger.getEffectiveLevel() == logging.DEBUG:
|
||||
if self.result['encoding'] is None:
|
||||
self.logger.debug('no probers hit minimum threshold')
|
||||
for prober in self._charset_probers[0].probers:
|
||||
if not prober:
|
||||
continue
|
||||
self.logger.debug('%s %s confidence = %s',
|
||||
prober.charset_name,
|
||||
prober.language,
|
||||
prober.get_confidence())
|
||||
return self.result
|
||||
@@ -1,82 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcssm import UTF8_SM_MODEL
|
||||
|
||||
|
||||
|
||||
class UTF8Prober(CharSetProber):
|
||||
ONE_CHAR_PROB = 0.5
|
||||
|
||||
def __init__(self):
|
||||
super(UTF8Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
|
||||
self._num_mb_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(UTF8Prober, self).reset()
|
||||
self.coding_sm.reset()
|
||||
self._num_mb_chars = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "utf-8"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
coding_state = self.coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
if self.coding_sm.get_current_charlen() >= 2:
|
||||
self._num_mb_chars += 1
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self.get_confidence() > self.SHORTCUT_THRESHOLD:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
unlike = 0.99
|
||||
if self._num_mb_chars < 6:
|
||||
unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
|
||||
return 1.0 - unlike
|
||||
else:
|
||||
return unlike
|
||||
@@ -1,9 +0,0 @@
|
||||
"""
|
||||
This module exists only to simplify retrieving the version number of chardet
|
||||
from within setup.py and from chardet subpackages.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
__version__ = "3.0.2"
|
||||
VERSION = __version__.split('.')
|
||||
@@ -1 +0,0 @@
|
||||
from .core import *
|
||||
@@ -1,118 +0,0 @@
|
||||
from .core import encode, decode, alabel, ulabel, IDNAError
|
||||
import codecs
|
||||
import re
|
||||
|
||||
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self, data, errors='strict'):
|
||||
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return "", 0
|
||||
|
||||
return encode(data), len(data)
|
||||
|
||||
def decode(self, data, errors='strict'):
|
||||
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return u"", 0
|
||||
|
||||
return decode(data), len(data)
|
||||
|
||||
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
|
||||
def _buffer_encode(self, data, errors, final):
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return ("", 0)
|
||||
|
||||
labels = _unicode_dots_re.split(data)
|
||||
trailing_dot = u''
|
||||
if labels:
|
||||
if not labels[-1]:
|
||||
trailing_dot = '.'
|
||||
del labels[-1]
|
||||
elif not final:
|
||||
# Keep potentially unfinished label until the next call
|
||||
del labels[-1]
|
||||
if labels:
|
||||
trailing_dot = '.'
|
||||
|
||||
result = []
|
||||
size = 0
|
||||
for label in labels:
|
||||
result.append(alabel(label))
|
||||
if size:
|
||||
size += 1
|
||||
size += len(label)
|
||||
|
||||
# Join with U+002E
|
||||
result = ".".join(result) + trailing_dot
|
||||
size += len(trailing_dot)
|
||||
return (result, size)
|
||||
|
||||
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
||||
def _buffer_decode(self, data, errors, final):
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return (u"", 0)
|
||||
|
||||
# IDNA allows decoding to operate on Unicode strings, too.
|
||||
if isinstance(data, unicode):
|
||||
labels = _unicode_dots_re.split(data)
|
||||
else:
|
||||
# Must be ASCII string
|
||||
data = str(data)
|
||||
unicode(data, "ascii")
|
||||
labels = data.split(".")
|
||||
|
||||
trailing_dot = u''
|
||||
if labels:
|
||||
if not labels[-1]:
|
||||
trailing_dot = u'.'
|
||||
del labels[-1]
|
||||
elif not final:
|
||||
# Keep potentially unfinished label until the next call
|
||||
del labels[-1]
|
||||
if labels:
|
||||
trailing_dot = u'.'
|
||||
|
||||
result = []
|
||||
size = 0
|
||||
for label in labels:
|
||||
result.append(ulabel(label))
|
||||
if size:
|
||||
size += 1
|
||||
size += len(label)
|
||||
|
||||
result = u".".join(result) + trailing_dot
|
||||
size += len(trailing_dot)
|
||||
return (result, size)
|
||||
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
pass
|
||||
|
||||
def getregentry():
|
||||
return codecs.CodecInfo(
|
||||
name='idna',
|
||||
encode=Codec().encode,
|
||||
decode=Codec().decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamwriter=StreamWriter,
|
||||
streamreader=StreamReader,
|
||||
)
|
||||
@@ -1,12 +0,0 @@
|
||||
from .core import *
|
||||
from .codec import *
|
||||
|
||||
def ToASCII(label):
|
||||
return encode(label)
|
||||
|
||||
def ToUnicode(label):
|
||||
return decode(label)
|
||||
|
||||
def nameprep(s):
|
||||
raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")
|
||||
|
||||
@@ -1,387 +0,0 @@
|
||||
from . import idnadata
|
||||
import bisect
|
||||
import unicodedata
|
||||
import re
|
||||
import sys
|
||||
from .intranges import intranges_contain
|
||||
|
||||
_virama_combining_class = 9
|
||||
_alabel_prefix = b'xn--'
|
||||
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
unicode = str
|
||||
unichr = chr
|
||||
|
||||
class IDNAError(UnicodeError):
|
||||
""" Base exception for all IDNA-encoding related problems """
|
||||
pass
|
||||
|
||||
|
||||
class IDNABidiError(IDNAError):
|
||||
""" Exception when bidirectional requirements are not satisfied """
|
||||
pass
|
||||
|
||||
|
||||
class InvalidCodepoint(IDNAError):
|
||||
""" Exception when a disallowed or unallocated codepoint is used """
|
||||
pass
|
||||
|
||||
|
||||
class InvalidCodepointContext(IDNAError):
|
||||
""" Exception when the codepoint is not valid in the context it is used """
|
||||
pass
|
||||
|
||||
|
||||
def _combining_class(cp):
|
||||
return unicodedata.combining(unichr(cp))
|
||||
|
||||
def _is_script(cp, script):
|
||||
return intranges_contain(ord(cp), idnadata.scripts[script])
|
||||
|
||||
def _punycode(s):
|
||||
return s.encode('punycode')
|
||||
|
||||
def _unot(s):
|
||||
return 'U+{0:04X}'.format(s)
|
||||
|
||||
|
||||
def valid_label_length(label):
|
||||
|
||||
if len(label) > 63:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def valid_string_length(label, trailing_dot):
|
||||
|
||||
if len(label) > (254 if trailing_dot else 253):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def check_bidi(label, check_ltr=False):
|
||||
|
||||
# Bidi rules should only be applied if string contains RTL characters
|
||||
bidi_label = False
|
||||
for (idx, cp) in enumerate(label, 1):
|
||||
direction = unicodedata.bidirectional(cp)
|
||||
if direction == '':
|
||||
# String likely comes from a newer version of Unicode
|
||||
raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx))
|
||||
if direction in ['R', 'AL', 'AN']:
|
||||
bidi_label = True
|
||||
break
|
||||
if not bidi_label and not check_ltr:
|
||||
return True
|
||||
|
||||
# Bidi rule 1
|
||||
direction = unicodedata.bidirectional(label[0])
|
||||
if direction in ['R', 'AL']:
|
||||
rtl = True
|
||||
elif direction == 'L':
|
||||
rtl = False
|
||||
else:
|
||||
raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label)))
|
||||
|
||||
valid_ending = False
|
||||
number_type = False
|
||||
for (idx, cp) in enumerate(label, 1):
|
||||
direction = unicodedata.bidirectional(cp)
|
||||
|
||||
if rtl:
|
||||
# Bidi rule 2
|
||||
if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
|
||||
raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx))
|
||||
# Bidi rule 3
|
||||
if direction in ['R', 'AL', 'EN', 'AN']:
|
||||
valid_ending = True
|
||||
elif direction != 'NSM':
|
||||
valid_ending = False
|
||||
# Bidi rule 4
|
||||
if direction in ['AN', 'EN']:
|
||||
if not number_type:
|
||||
number_type = direction
|
||||
else:
|
||||
if number_type != direction:
|
||||
raise IDNABidiError('Can not mix numeral types in a right-to-left label')
|
||||
else:
|
||||
# Bidi rule 5
|
||||
if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
|
||||
raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx))
|
||||
# Bidi rule 6
|
||||
if direction in ['L', 'EN']:
|
||||
valid_ending = True
|
||||
elif direction != 'NSM':
|
||||
valid_ending = False
|
||||
|
||||
if not valid_ending:
|
||||
raise IDNABidiError('Label ends with illegal codepoint directionality')
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def check_initial_combiner(label):
|
||||
|
||||
if unicodedata.category(label[0])[0] == 'M':
|
||||
raise IDNAError('Label begins with an illegal combining character')
|
||||
return True
|
||||
|
||||
|
||||
def check_hyphen_ok(label):
|
||||
|
||||
if label[2:4] == '--':
|
||||
raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')
|
||||
if label[0] == '-' or label[-1] == '-':
|
||||
raise IDNAError('Label must not start or end with a hyphen')
|
||||
return True
|
||||
|
||||
|
||||
def check_nfc(label):
|
||||
|
||||
if unicodedata.normalize('NFC', label) != label:
|
||||
raise IDNAError('Label must be in Normalization Form C')
|
||||
|
||||
|
||||
def valid_contextj(label, pos):
|
||||
|
||||
cp_value = ord(label[pos])
|
||||
|
||||
if cp_value == 0x200c:
|
||||
|
||||
if pos > 0:
|
||||
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
|
||||
return True
|
||||
|
||||
ok = False
|
||||
for i in range(pos-1, -1, -1):
|
||||
joining_type = idnadata.joining_types.get(ord(label[i]))
|
||||
if joining_type == ord('T'):
|
||||
continue
|
||||
if joining_type in [ord('L'), ord('D')]:
|
||||
ok = True
|
||||
break
|
||||
|
||||
if not ok:
|
||||
return False
|
||||
|
||||
ok = False
|
||||
for i in range(pos+1, len(label)):
|
||||
joining_type = idnadata.joining_types.get(ord(label[i]))
|
||||
if joining_type == ord('T'):
|
||||
continue
|
||||
if joining_type in [ord('R'), ord('D')]:
|
||||
ok = True
|
||||
break
|
||||
return ok
|
||||
|
||||
if cp_value == 0x200d:
|
||||
|
||||
if pos > 0:
|
||||
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
|
||||
return True
|
||||
return False
|
||||
|
||||
else:
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def valid_contexto(label, pos, exception=False):
|
||||
|
||||
cp_value = ord(label[pos])
|
||||
|
||||
if cp_value == 0x00b7:
|
||||
if 0 < pos < len(label)-1:
|
||||
if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:
|
||||
return True
|
||||
return False
|
||||
|
||||
elif cp_value == 0x0375:
|
||||
if pos < len(label)-1 and len(label) > 1:
|
||||
return _is_script(label[pos + 1], 'Greek')
|
||||
return False
|
||||
|
||||
elif cp_value == 0x05f3 or cp_value == 0x05f4:
|
||||
if pos > 0:
|
||||
return _is_script(label[pos - 1], 'Hebrew')
|
||||
return False
|
||||
|
||||
elif cp_value == 0x30fb:
|
||||
for cp in label:
|
||||
if cp == u'\u30fb':
|
||||
continue
|
||||
if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'):
|
||||
return True
|
||||
return False
|
||||
|
||||
elif 0x660 <= cp_value <= 0x669:
|
||||
for cp in label:
|
||||
if 0x6f0 <= ord(cp) <= 0x06f9:
|
||||
return False
|
||||
return True
|
||||
|
||||
elif 0x6f0 <= cp_value <= 0x6f9:
|
||||
for cp in label:
|
||||
if 0x660 <= ord(cp) <= 0x0669:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def check_label(label):
|
||||
|
||||
if isinstance(label, (bytes, bytearray)):
|
||||
label = label.decode('utf-8')
|
||||
if len(label) == 0:
|
||||
raise IDNAError('Empty Label')
|
||||
|
||||
check_nfc(label)
|
||||
check_hyphen_ok(label)
|
||||
check_initial_combiner(label)
|
||||
|
||||
for (pos, cp) in enumerate(label):
|
||||
cp_value = ord(cp)
|
||||
if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']):
|
||||
continue
|
||||
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']):
|
||||
if not valid_contextj(label, pos):
|
||||
raise InvalidCodepointContext('Joiner {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
|
||||
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']):
|
||||
if not valid_contexto(label, pos):
|
||||
raise InvalidCodepointContext('Codepoint {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
|
||||
else:
|
||||
raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
|
||||
|
||||
check_bidi(label)
|
||||
|
||||
|
||||
def alabel(label):
|
||||
|
||||
try:
|
||||
label = label.encode('ascii')
|
||||
try:
|
||||
ulabel(label)
|
||||
except IDNAError:
|
||||
raise IDNAError('The label {0} is not a valid A-label'.format(label))
|
||||
if not valid_label_length(label):
|
||||
raise IDNAError('Label too long')
|
||||
return label
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
|
||||
if not label:
|
||||
raise IDNAError('No Input')
|
||||
|
||||
label = unicode(label)
|
||||
check_label(label)
|
||||
label = _punycode(label)
|
||||
label = _alabel_prefix + label
|
||||
|
||||
if not valid_label_length(label):
|
||||
raise IDNAError('Label too long')
|
||||
|
||||
return label
|
||||
|
||||
|
||||
def ulabel(label):
|
||||
|
||||
if not isinstance(label, (bytes, bytearray)):
|
||||
try:
|
||||
label = label.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
check_label(label)
|
||||
return label
|
||||
|
||||
label = label.lower()
|
||||
if label.startswith(_alabel_prefix):
|
||||
label = label[len(_alabel_prefix):]
|
||||
else:
|
||||
check_label(label)
|
||||
return label.decode('ascii')
|
||||
|
||||
label = label.decode('punycode')
|
||||
check_label(label)
|
||||
return label
|
||||
|
||||
|
||||
def uts46_remap(domain, std3_rules=True, transitional=False):
|
||||
"""Re-map the characters in the string according to UTS46 processing."""
|
||||
from .uts46data import uts46data
|
||||
output = u""
|
||||
try:
|
||||
for pos, char in enumerate(domain):
|
||||
code_point = ord(char)
|
||||
uts46row = uts46data[code_point if code_point < 256 else
|
||||
bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
|
||||
status = uts46row[1]
|
||||
replacement = uts46row[2] if len(uts46row) == 3 else None
|
||||
if (status == "V" or
|
||||
(status == "D" and not transitional) or
|
||||
(status == "3" and std3_rules and replacement is None)):
|
||||
output += char
|
||||
elif replacement is not None and (status == "M" or
|
||||
(status == "3" and std3_rules) or
|
||||
(status == "D" and transitional)):
|
||||
output += replacement
|
||||
elif status != "I":
|
||||
raise IndexError()
|
||||
return unicodedata.normalize("NFC", output)
|
||||
except IndexError:
|
||||
raise InvalidCodepoint(
|
||||
"Codepoint {0} not allowed at position {1} in {2}".format(
|
||||
_unot(code_point), pos + 1, repr(domain)))
|
||||
|
||||
|
||||
def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
|
||||
|
||||
if isinstance(s, (bytes, bytearray)):
|
||||
s = s.decode("ascii")
|
||||
if uts46:
|
||||
s = uts46_remap(s, std3_rules, transitional)
|
||||
trailing_dot = False
|
||||
result = []
|
||||
if strict:
|
||||
labels = s.split('.')
|
||||
else:
|
||||
labels = _unicode_dots_re.split(s)
|
||||
while labels and not labels[0]:
|
||||
del labels[0]
|
||||
if not labels:
|
||||
raise IDNAError('Empty domain')
|
||||
if labels[-1] == '':
|
||||
del labels[-1]
|
||||
trailing_dot = True
|
||||
for label in labels:
|
||||
result.append(alabel(label))
|
||||
if trailing_dot:
|
||||
result.append(b'')
|
||||
s = b'.'.join(result)
|
||||
if not valid_string_length(s, trailing_dot):
|
||||
raise IDNAError('Domain too long')
|
||||
return s
|
||||
|
||||
|
||||
def decode(s, strict=False, uts46=False, std3_rules=False):
|
||||
|
||||
if isinstance(s, (bytes, bytearray)):
|
||||
s = s.decode("ascii")
|
||||
if uts46:
|
||||
s = uts46_remap(s, std3_rules, False)
|
||||
trailing_dot = False
|
||||
result = []
|
||||
if not strict:
|
||||
labels = _unicode_dots_re.split(s)
|
||||
else:
|
||||
labels = s.split(u'.')
|
||||
while labels and not labels[0]:
|
||||
del labels[0]
|
||||
if not labels:
|
||||
raise IDNAError('Empty domain')
|
||||
if not labels[-1]:
|
||||
del labels[-1]
|
||||
trailing_dot = True
|
||||
for label in labels:
|
||||
result.append(ulabel(label))
|
||||
if trailing_dot:
|
||||
result.append(u'')
|
||||
return u'.'.join(result)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,53 +0,0 @@
|
||||
"""
|
||||
Given a list of integers, made up of (hopefully) a small number of long runs
|
||||
of consecutive integers, compute a representation of the form
|
||||
((start1, end1), (start2, end2) ...). Then answer the question "was x present
|
||||
in the original list?" in time O(log(# runs)).
|
||||
"""
|
||||
|
||||
import bisect
|
||||
|
||||
def intranges_from_list(list_):
|
||||
"""Represent a list of integers as a sequence of ranges:
|
||||
((start_0, end_0), (start_1, end_1), ...), such that the original
|
||||
integers are exactly those x such that start_i <= x < end_i for some i.
|
||||
|
||||
Ranges are encoded as single integers (start << 32 | end), not as tuples.
|
||||
"""
|
||||
|
||||
sorted_list = sorted(list_)
|
||||
ranges = []
|
||||
last_write = -1
|
||||
for i in range(len(sorted_list)):
|
||||
if i+1 < len(sorted_list):
|
||||
if sorted_list[i] == sorted_list[i+1]-1:
|
||||
continue
|
||||
current_range = sorted_list[last_write+1:i+1]
|
||||
ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
|
||||
last_write = i
|
||||
|
||||
return tuple(ranges)
|
||||
|
||||
def _encode_range(start, end):
|
||||
return (start << 32) | end
|
||||
|
||||
def _decode_range(r):
|
||||
return (r >> 32), (r & ((1 << 32) - 1))
|
||||
|
||||
|
||||
def intranges_contain(int_, ranges):
|
||||
"""Determine if `int_` falls into one of the ranges in `ranges`."""
|
||||
tuple_ = _encode_range(int_, 0)
|
||||
pos = bisect.bisect_left(ranges, tuple_)
|
||||
# we could be immediately ahead of a tuple (start, end)
|
||||
# with start < int_ <= end
|
||||
if pos > 0:
|
||||
left, right = _decode_range(ranges[pos-1])
|
||||
if left <= int_ < right:
|
||||
return True
|
||||
# or we could be immediately behind a tuple (int_, end)
|
||||
if pos < len(ranges):
|
||||
left, _ = _decode_range(ranges[pos])
|
||||
if left == int_:
|
||||
return True
|
||||
return False
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,97 +0,0 @@
|
||||
"""
|
||||
urllib3 - Thread-safe connection pooling and re-using.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import warnings
|
||||
|
||||
from .connectionpool import (
|
||||
HTTPConnectionPool,
|
||||
HTTPSConnectionPool,
|
||||
connection_from_url
|
||||
)
|
||||
|
||||
from . import exceptions
|
||||
from .filepost import encode_multipart_formdata
|
||||
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
|
||||
from .response import HTTPResponse
|
||||
from .util.request import make_headers
|
||||
from .util.url import get_host
|
||||
from .util.timeout import Timeout
|
||||
from .util.retry import Retry
|
||||
|
||||
|
||||
# Set default logging handler to avoid "No handler found" warnings.
|
||||
import logging
|
||||
try: # Python 2.7+
|
||||
from logging import NullHandler
|
||||
except ImportError:
|
||||
class NullHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
|
||||
__license__ = 'MIT'
|
||||
__version__ = '1.21.1'
|
||||
|
||||
__all__ = (
|
||||
'HTTPConnectionPool',
|
||||
'HTTPSConnectionPool',
|
||||
'PoolManager',
|
||||
'ProxyManager',
|
||||
'HTTPResponse',
|
||||
'Retry',
|
||||
'Timeout',
|
||||
'add_stderr_logger',
|
||||
'connection_from_url',
|
||||
'disable_warnings',
|
||||
'encode_multipart_formdata',
|
||||
'get_host',
|
||||
'make_headers',
|
||||
'proxy_from_url',
|
||||
)
|
||||
|
||||
logging.getLogger(__name__).addHandler(NullHandler())
|
||||
|
||||
|
||||
def add_stderr_logger(level=logging.DEBUG):
|
||||
"""
|
||||
Helper for quickly adding a StreamHandler to the logger. Useful for
|
||||
debugging.
|
||||
|
||||
Returns the handler after adding it.
|
||||
"""
|
||||
# This method needs to be in this __init__.py to get the __name__ correct
|
||||
# even if urllib3 is vendored within another package.
|
||||
logger = logging.getLogger(__name__)
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(level)
|
||||
logger.debug('Added a stderr logging handler to logger: %s', __name__)
|
||||
return handler
|
||||
|
||||
|
||||
# ... Clean up.
|
||||
del NullHandler
|
||||
|
||||
|
||||
# All warning filters *must* be appended unless you're really certain that they
|
||||
# shouldn't be: otherwise, it's very hard for users to use most Python
|
||||
# mechanisms to silence them.
|
||||
# SecurityWarning's always go off by default.
|
||||
warnings.simplefilter('always', exceptions.SecurityWarning, append=True)
|
||||
# SubjectAltNameWarning's should go off once per host
|
||||
warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True)
|
||||
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
|
||||
warnings.simplefilter('default', exceptions.InsecurePlatformWarning,
|
||||
append=True)
|
||||
# SNIMissingWarnings should go off only once.
|
||||
warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True)
|
||||
|
||||
|
||||
def disable_warnings(category=exceptions.HTTPWarning):
|
||||
"""
|
||||
Helper for quickly disabling all urllib3 warnings.
|
||||
"""
|
||||
warnings.simplefilter('ignore', category)
|
||||
@@ -1,314 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from collections import Mapping, MutableMapping
|
||||
try:
|
||||
from threading import RLock
|
||||
except ImportError: # Platform-specific: No threads available
|
||||
class RLock:
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
pass
|
||||
|
||||
|
||||
try: # Python 2.7+
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
from .packages.ordered_dict import OrderedDict
|
||||
from .packages.six import iterkeys, itervalues, PY3
|
||||
|
||||
|
||||
__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict']
|
||||
|
||||
|
||||
_Null = object()
|
||||
|
||||
|
||||
class RecentlyUsedContainer(MutableMapping):
|
||||
"""
|
||||
Provides a thread-safe dict-like container which maintains up to
|
||||
``maxsize`` keys while throwing away the least-recently-used keys beyond
|
||||
``maxsize``.
|
||||
|
||||
:param maxsize:
|
||||
Maximum number of recent elements to retain.
|
||||
|
||||
:param dispose_func:
|
||||
Every time an item is evicted from the container,
|
||||
``dispose_func(value)`` is called. Callback which will get called
|
||||
"""
|
||||
|
||||
ContainerCls = OrderedDict
|
||||
|
||||
def __init__(self, maxsize=10, dispose_func=None):
|
||||
self._maxsize = maxsize
|
||||
self.dispose_func = dispose_func
|
||||
|
||||
self._container = self.ContainerCls()
|
||||
self.lock = RLock()
|
||||
|
||||
def __getitem__(self, key):
|
||||
# Re-insert the item, moving it to the end of the eviction line.
|
||||
with self.lock:
|
||||
item = self._container.pop(key)
|
||||
self._container[key] = item
|
||||
return item
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
evicted_value = _Null
|
||||
with self.lock:
|
||||
# Possibly evict the existing value of 'key'
|
||||
evicted_value = self._container.get(key, _Null)
|
||||
self._container[key] = value
|
||||
|
||||
# If we didn't evict an existing value, we might have to evict the
|
||||
# least recently used item from the beginning of the container.
|
||||
if len(self._container) > self._maxsize:
|
||||
_key, evicted_value = self._container.popitem(last=False)
|
||||
|
||||
if self.dispose_func and evicted_value is not _Null:
|
||||
self.dispose_func(evicted_value)
|
||||
|
||||
def __delitem__(self, key):
|
||||
with self.lock:
|
||||
value = self._container.pop(key)
|
||||
|
||||
if self.dispose_func:
|
||||
self.dispose_func(value)
|
||||
|
||||
def __len__(self):
|
||||
with self.lock:
|
||||
return len(self._container)
|
||||
|
||||
def __iter__(self):
|
||||
raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.')
|
||||
|
||||
def clear(self):
|
||||
with self.lock:
|
||||
# Copy pointers to all values, then wipe the mapping
|
||||
values = list(itervalues(self._container))
|
||||
self._container.clear()
|
||||
|
||||
if self.dispose_func:
|
||||
for value in values:
|
||||
self.dispose_func(value)
|
||||
|
||||
def keys(self):
|
||||
with self.lock:
|
||||
return list(iterkeys(self._container))
|
||||
|
||||
|
||||
class HTTPHeaderDict(MutableMapping):
|
||||
"""
|
||||
:param headers:
|
||||
An iterable of field-value pairs. Must not contain multiple field names
|
||||
when compared case-insensitively.
|
||||
|
||||
:param kwargs:
|
||||
Additional field-value pairs to pass in to ``dict.update``.
|
||||
|
||||
A ``dict`` like container for storing HTTP Headers.
|
||||
|
||||
Field names are stored and compared case-insensitively in compliance with
|
||||
RFC 7230. Iteration provides the first case-sensitive key seen for each
|
||||
case-insensitive pair.
|
||||
|
||||
Using ``__setitem__`` syntax overwrites fields that compare equal
|
||||
case-insensitively in order to maintain ``dict``'s api. For fields that
|
||||
compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
|
||||
in a loop.
|
||||
|
||||
If multiple fields that are equal case-insensitively are passed to the
|
||||
constructor or ``.update``, the behavior is undefined and some will be
|
||||
lost.
|
||||
|
||||
>>> headers = HTTPHeaderDict()
|
||||
>>> headers.add('Set-Cookie', 'foo=bar')
|
||||
>>> headers.add('set-cookie', 'baz=quxx')
|
||||
>>> headers['content-length'] = '7'
|
||||
>>> headers['SET-cookie']
|
||||
'foo=bar, baz=quxx'
|
||||
>>> headers['Content-Length']
|
||||
'7'
|
||||
"""
|
||||
|
||||
def __init__(self, headers=None, **kwargs):
|
||||
super(HTTPHeaderDict, self).__init__()
|
||||
self._container = OrderedDict()
|
||||
if headers is not None:
|
||||
if isinstance(headers, HTTPHeaderDict):
|
||||
self._copy_from(headers)
|
||||
else:
|
||||
self.extend(headers)
|
||||
if kwargs:
|
||||
self.extend(kwargs)
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
self._container[key.lower()] = [key, val]
|
||||
return self._container[key.lower()]
|
||||
|
||||
def __getitem__(self, key):
|
||||
val = self._container[key.lower()]
|
||||
return ', '.join(val[1:])
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._container[key.lower()]
|
||||
|
||||
def __contains__(self, key):
|
||||
return key.lower() in self._container
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Mapping) and not hasattr(other, 'keys'):
|
||||
return False
|
||||
if not isinstance(other, type(self)):
|
||||
other = type(self)(other)
|
||||
return (dict((k.lower(), v) for k, v in self.itermerged()) ==
|
||||
dict((k.lower(), v) for k, v in other.itermerged()))
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
if not PY3: # Python 2
|
||||
iterkeys = MutableMapping.iterkeys
|
||||
itervalues = MutableMapping.itervalues
|
||||
|
||||
__marker = object()
|
||||
|
||||
def __len__(self):
|
||||
return len(self._container)
|
||||
|
||||
def __iter__(self):
|
||||
# Only provide the originally cased names
|
||||
for vals in self._container.values():
|
||||
yield vals[0]
|
||||
|
||||
def pop(self, key, default=__marker):
|
||||
'''D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
|
||||
If key is not found, d is returned if given, otherwise KeyError is raised.
|
||||
'''
|
||||
# Using the MutableMapping function directly fails due to the private marker.
|
||||
# Using ordinary dict.pop would expose the internal structures.
|
||||
# So let's reinvent the wheel.
|
||||
try:
|
||||
value = self[key]
|
||||
except KeyError:
|
||||
if default is self.__marker:
|
||||
raise
|
||||
return default
|
||||
else:
|
||||
del self[key]
|
||||
return value
|
||||
|
||||
def discard(self, key):
|
||||
try:
|
||||
del self[key]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def add(self, key, val):
|
||||
"""Adds a (name, value) pair, doesn't overwrite the value if it already
|
||||
exists.
|
||||
|
||||
>>> headers = HTTPHeaderDict(foo='bar')
|
||||
>>> headers.add('Foo', 'baz')
|
||||
>>> headers['foo']
|
||||
'bar, baz'
|
||||
"""
|
||||
key_lower = key.lower()
|
||||
new_vals = [key, val]
|
||||
# Keep the common case aka no item present as fast as possible
|
||||
vals = self._container.setdefault(key_lower, new_vals)
|
||||
if new_vals is not vals:
|
||||
vals.append(val)
|
||||
|
||||
def extend(self, *args, **kwargs):
|
||||
"""Generic import function for any type of header-like object.
|
||||
Adapted version of MutableMapping.update in order to insert items
|
||||
with self.add instead of self.__setitem__
|
||||
"""
|
||||
if len(args) > 1:
|
||||
raise TypeError("extend() takes at most 1 positional "
|
||||
"arguments ({0} given)".format(len(args)))
|
||||
other = args[0] if len(args) >= 1 else ()
|
||||
|
||||
if isinstance(other, HTTPHeaderDict):
|
||||
for key, val in other.iteritems():
|
||||
self.add(key, val)
|
||||
elif isinstance(other, Mapping):
|
||||
for key in other:
|
||||
self.add(key, other[key])
|
||||
elif hasattr(other, "keys"):
|
||||
for key in other.keys():
|
||||
self.add(key, other[key])
|
||||
else:
|
||||
for key, value in other:
|
||||
self.add(key, value)
|
||||
|
||||
for key, value in kwargs.items():
|
||||
self.add(key, value)
|
||||
|
||||
def getlist(self, key):
|
||||
"""Returns a list of all the values for the named field. Returns an
|
||||
empty list if the key doesn't exist."""
|
||||
try:
|
||||
vals = self._container[key.lower()]
|
||||
except KeyError:
|
||||
return []
|
||||
else:
|
||||
return vals[1:]
|
||||
|
||||
# Backwards compatibility for httplib
|
||||
getheaders = getlist
|
||||
getallmatchingheaders = getlist
|
||||
iget = getlist
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
|
||||
|
||||
def _copy_from(self, other):
|
||||
for key in other:
|
||||
val = other.getlist(key)
|
||||
if isinstance(val, list):
|
||||
# Don't need to convert tuples
|
||||
val = list(val)
|
||||
self._container[key.lower()] = [key] + val
|
||||
|
||||
def copy(self):
|
||||
clone = type(self)()
|
||||
clone._copy_from(self)
|
||||
return clone
|
||||
|
||||
def iteritems(self):
|
||||
"""Iterate over all header lines, including duplicate ones."""
|
||||
for key in self:
|
||||
vals = self._container[key.lower()]
|
||||
for val in vals[1:]:
|
||||
yield vals[0], val
|
||||
|
||||
def itermerged(self):
|
||||
"""Iterate over all headers, merging duplicate ones together."""
|
||||
for key in self:
|
||||
val = self._container[key.lower()]
|
||||
yield val[0], ', '.join(val[1:])
|
||||
|
||||
def items(self):
|
||||
return list(self.iteritems())
|
||||
|
||||
@classmethod
|
||||
def from_httplib(cls, message): # Python 2
|
||||
"""Read headers from a Python 2 httplib message object."""
|
||||
# python2.7 does not expose a proper API for exporting multiheaders
|
||||
# efficiently. This function re-reads raw lines from the message
|
||||
# object and extracts the multiheaders properly.
|
||||
headers = []
|
||||
|
||||
for line in message.headers:
|
||||
if line.startswith((' ', '\t')):
|
||||
key, value = headers[-1]
|
||||
headers[-1] = (key, value + '\r\n' + line.rstrip())
|
||||
continue
|
||||
|
||||
key, value = line.split(':', 1)
|
||||
headers.append((key, value.strip()))
|
||||
|
||||
return cls(headers)
|
||||
@@ -1,373 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import socket
|
||||
from socket import error as SocketError, timeout as SocketTimeout
|
||||
import warnings
|
||||
from .packages import six
|
||||
from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection
|
||||
from .packages.six.moves.http_client import HTTPException # noqa: F401
|
||||
|
||||
try: # Compiled with SSL?
|
||||
import ssl
|
||||
BaseSSLError = ssl.SSLError
|
||||
except (ImportError, AttributeError): # Platform-specific: No SSL.
|
||||
ssl = None
|
||||
|
||||
class BaseSSLError(BaseException):
|
||||
pass
|
||||
|
||||
|
||||
try: # Python 3:
|
||||
# Not a no-op, we're adding this to the namespace so it can be imported.
|
||||
ConnectionError = ConnectionError
|
||||
except NameError: # Python 2:
|
||||
class ConnectionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
from .exceptions import (
|
||||
NewConnectionError,
|
||||
ConnectTimeoutError,
|
||||
SubjectAltNameWarning,
|
||||
SystemTimeWarning,
|
||||
)
|
||||
from .packages.ssl_match_hostname import match_hostname, CertificateError
|
||||
|
||||
from .util.ssl_ import (
|
||||
resolve_cert_reqs,
|
||||
resolve_ssl_version,
|
||||
assert_fingerprint,
|
||||
create_urllib3_context,
|
||||
ssl_wrap_socket
|
||||
)
|
||||
|
||||
|
||||
from .util import connection
|
||||
|
||||
from ._collections import HTTPHeaderDict
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
port_by_scheme = {
|
||||
'http': 80,
|
||||
'https': 443,
|
||||
}
|
||||
|
||||
# When updating RECENT_DATE, move it to
|
||||
# within two years of the current date, and no
|
||||
# earlier than 6 months ago.
|
||||
RECENT_DATE = datetime.date(2016, 1, 1)
|
||||
|
||||
|
||||
class DummyConnection(object):
|
||||
"""Used to detect a failed ConnectionCls import."""
|
||||
pass
|
||||
|
||||
|
||||
class HTTPConnection(_HTTPConnection, object):
|
||||
"""
|
||||
Based on httplib.HTTPConnection but provides an extra constructor
|
||||
backwards-compatibility layer between older and newer Pythons.
|
||||
|
||||
Additional keyword parameters are used to configure attributes of the connection.
|
||||
Accepted parameters include:
|
||||
|
||||
- ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
|
||||
- ``source_address``: Set the source address for the current connection.
|
||||
|
||||
.. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x
|
||||
|
||||
- ``socket_options``: Set specific options on the underlying socket. If not specified, then
|
||||
defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
|
||||
Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
|
||||
|
||||
For example, if you wish to enable TCP Keep Alive in addition to the defaults,
|
||||
you might pass::
|
||||
|
||||
HTTPConnection.default_socket_options + [
|
||||
(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
|
||||
]
|
||||
|
||||
Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
|
||||
"""
|
||||
|
||||
default_port = port_by_scheme['http']
|
||||
|
||||
#: Disable Nagle's algorithm by default.
|
||||
#: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
|
||||
default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
|
||||
|
||||
#: Whether this connection verifies the host's certificate.
|
||||
is_verified = False
|
||||
|
||||
def __init__(self, *args, **kw):
|
||||
if six.PY3: # Python 3
|
||||
kw.pop('strict', None)
|
||||
|
||||
# Pre-set source_address in case we have an older Python like 2.6.
|
||||
self.source_address = kw.get('source_address')
|
||||
|
||||
if sys.version_info < (2, 7): # Python 2.6
|
||||
# _HTTPConnection on Python 2.6 will balk at this keyword arg, but
|
||||
# not newer versions. We can still use it when creating a
|
||||
# connection though, so we pop it *after* we have saved it as
|
||||
# self.source_address.
|
||||
kw.pop('source_address', None)
|
||||
|
||||
#: The socket options provided by the user. If no options are
|
||||
#: provided, we use the default options.
|
||||
self.socket_options = kw.pop('socket_options', self.default_socket_options)
|
||||
|
||||
# Superclass also sets self.source_address in Python 2.7+.
|
||||
_HTTPConnection.__init__(self, *args, **kw)
|
||||
|
||||
def _new_conn(self):
|
||||
""" Establish a socket connection and set nodelay settings on it.
|
||||
|
||||
:return: New socket connection.
|
||||
"""
|
||||
extra_kw = {}
|
||||
if self.source_address:
|
||||
extra_kw['source_address'] = self.source_address
|
||||
|
||||
if self.socket_options:
|
||||
extra_kw['socket_options'] = self.socket_options
|
||||
|
||||
try:
|
||||
conn = connection.create_connection(
|
||||
(self.host, self.port), self.timeout, **extra_kw)
|
||||
|
||||
except SocketTimeout as e:
|
||||
raise ConnectTimeoutError(
|
||||
self, "Connection to %s timed out. (connect timeout=%s)" %
|
||||
(self.host, self.timeout))
|
||||
|
||||
except SocketError as e:
|
||||
raise NewConnectionError(
|
||||
self, "Failed to establish a new connection: %s" % e)
|
||||
|
||||
return conn
|
||||
|
||||
def _prepare_conn(self, conn):
|
||||
self.sock = conn
|
||||
# the _tunnel_host attribute was added in python 2.6.3 (via
|
||||
# http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do
|
||||
# not have them.
|
||||
if getattr(self, '_tunnel_host', None):
|
||||
# TODO: Fix tunnel so it doesn't depend on self.sock state.
|
||||
self._tunnel()
|
||||
# Mark this connection as not reusable
|
||||
self.auto_open = 0
|
||||
|
||||
def connect(self):
|
||||
conn = self._new_conn()
|
||||
self._prepare_conn(conn)
|
||||
|
||||
def request_chunked(self, method, url, body=None, headers=None):
|
||||
"""
|
||||
Alternative to the common request method, which sends the
|
||||
body with chunked encoding and not as one block
|
||||
"""
|
||||
headers = HTTPHeaderDict(headers if headers is not None else {})
|
||||
skip_accept_encoding = 'accept-encoding' in headers
|
||||
skip_host = 'host' in headers
|
||||
self.putrequest(
|
||||
method,
|
||||
url,
|
||||
skip_accept_encoding=skip_accept_encoding,
|
||||
skip_host=skip_host
|
||||
)
|
||||
for header, value in headers.items():
|
||||
self.putheader(header, value)
|
||||
if 'transfer-encoding' not in headers:
|
||||
self.putheader('Transfer-Encoding', 'chunked')
|
||||
self.endheaders()
|
||||
|
||||
if body is not None:
|
||||
stringish_types = six.string_types + (six.binary_type,)
|
||||
if isinstance(body, stringish_types):
|
||||
body = (body,)
|
||||
for chunk in body:
|
||||
if not chunk:
|
||||
continue
|
||||
if not isinstance(chunk, six.binary_type):
|
||||
chunk = chunk.encode('utf8')
|
||||
len_str = hex(len(chunk))[2:]
|
||||
self.send(len_str.encode('utf-8'))
|
||||
self.send(b'\r\n')
|
||||
self.send(chunk)
|
||||
self.send(b'\r\n')
|
||||
|
||||
# After the if clause, to always have a closed body
|
||||
self.send(b'0\r\n\r\n')
|
||||
|
||||
|
||||
class HTTPSConnection(HTTPConnection):
|
||||
default_port = port_by_scheme['https']
|
||||
|
||||
ssl_version = None
|
||||
|
||||
def __init__(self, host, port=None, key_file=None, cert_file=None,
|
||||
strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||
ssl_context=None, **kw):
|
||||
|
||||
HTTPConnection.__init__(self, host, port, strict=strict,
|
||||
timeout=timeout, **kw)
|
||||
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.ssl_context = ssl_context
|
||||
|
||||
# Required property for Google AppEngine 1.9.0 which otherwise causes
|
||||
# HTTPS requests to go out as HTTP. (See Issue #356)
|
||||
self._protocol = 'https'
|
||||
|
||||
def connect(self):
|
||||
conn = self._new_conn()
|
||||
self._prepare_conn(conn)
|
||||
|
||||
if self.ssl_context is None:
|
||||
self.ssl_context = create_urllib3_context(
|
||||
ssl_version=resolve_ssl_version(None),
|
||||
cert_reqs=resolve_cert_reqs(None),
|
||||
)
|
||||
|
||||
self.sock = ssl_wrap_socket(
|
||||
sock=conn,
|
||||
keyfile=self.key_file,
|
||||
certfile=self.cert_file,
|
||||
ssl_context=self.ssl_context,
|
||||
)
|
||||
|
||||
|
||||
class VerifiedHTTPSConnection(HTTPSConnection):
|
||||
"""
|
||||
Based on httplib.HTTPSConnection but wraps the socket with
|
||||
SSL certification.
|
||||
"""
|
||||
cert_reqs = None
|
||||
ca_certs = None
|
||||
ca_cert_dir = None
|
||||
ssl_version = None
|
||||
assert_fingerprint = None
|
||||
|
||||
def set_cert(self, key_file=None, cert_file=None,
|
||||
cert_reqs=None, ca_certs=None,
|
||||
assert_hostname=None, assert_fingerprint=None,
|
||||
ca_cert_dir=None):
|
||||
"""
|
||||
This method should only be called once, before the connection is used.
|
||||
"""
|
||||
# If cert_reqs is not provided, we can try to guess. If the user gave
|
||||
# us a cert database, we assume they want to use it: otherwise, if
|
||||
# they gave us an SSL Context object we should use whatever is set for
|
||||
# it.
|
||||
if cert_reqs is None:
|
||||
if ca_certs or ca_cert_dir:
|
||||
cert_reqs = 'CERT_REQUIRED'
|
||||
elif self.ssl_context is not None:
|
||||
cert_reqs = self.ssl_context.verify_mode
|
||||
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.cert_reqs = cert_reqs
|
||||
self.assert_hostname = assert_hostname
|
||||
self.assert_fingerprint = assert_fingerprint
|
||||
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
|
||||
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
|
||||
|
||||
def connect(self):
|
||||
# Add certificate verification
|
||||
conn = self._new_conn()
|
||||
|
||||
hostname = self.host
|
||||
if getattr(self, '_tunnel_host', None):
|
||||
# _tunnel_host was added in Python 2.6.3
|
||||
# (See: http://hg.python.org/cpython/rev/0f57b30a152f)
|
||||
|
||||
self.sock = conn
|
||||
# Calls self._set_hostport(), so self.host is
|
||||
# self._tunnel_host below.
|
||||
self._tunnel()
|
||||
# Mark this connection as not reusable
|
||||
self.auto_open = 0
|
||||
|
||||
# Override the host with the one we're requesting data from.
|
||||
hostname = self._tunnel_host
|
||||
|
||||
is_time_off = datetime.date.today() < RECENT_DATE
|
||||
if is_time_off:
|
||||
warnings.warn((
|
||||
'System time is way off (before {0}). This will probably '
|
||||
'lead to SSL verification errors').format(RECENT_DATE),
|
||||
SystemTimeWarning
|
||||
)
|
||||
|
||||
# Wrap socket using verification with the root certs in
|
||||
# trusted_root_certs
|
||||
if self.ssl_context is None:
|
||||
self.ssl_context = create_urllib3_context(
|
||||
ssl_version=resolve_ssl_version(self.ssl_version),
|
||||
cert_reqs=resolve_cert_reqs(self.cert_reqs),
|
||||
)
|
||||
|
||||
context = self.ssl_context
|
||||
context.verify_mode = resolve_cert_reqs(self.cert_reqs)
|
||||
self.sock = ssl_wrap_socket(
|
||||
sock=conn,
|
||||
keyfile=self.key_file,
|
||||
certfile=self.cert_file,
|
||||
ca_certs=self.ca_certs,
|
||||
ca_cert_dir=self.ca_cert_dir,
|
||||
server_hostname=hostname,
|
||||
ssl_context=context)
|
||||
|
||||
if self.assert_fingerprint:
|
||||
assert_fingerprint(self.sock.getpeercert(binary_form=True),
|
||||
self.assert_fingerprint)
|
||||
elif context.verify_mode != ssl.CERT_NONE \
|
||||
and not getattr(context, 'check_hostname', False) \
|
||||
and self.assert_hostname is not False:
|
||||
# While urllib3 attempts to always turn off hostname matching from
|
||||
# the TLS library, this cannot always be done. So we check whether
|
||||
# the TLS Library still thinks it's matching hostnames.
|
||||
cert = self.sock.getpeercert()
|
||||
if not cert.get('subjectAltName', ()):
|
||||
warnings.warn((
|
||||
'Certificate for {0} has no `subjectAltName`, falling back to check for a '
|
||||
'`commonName` for now. This feature is being removed by major browsers and '
|
||||
'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 '
|
||||
'for details.)'.format(hostname)),
|
||||
SubjectAltNameWarning
|
||||
)
|
||||
_match_hostname(cert, self.assert_hostname or hostname)
|
||||
|
||||
self.is_verified = (
|
||||
context.verify_mode == ssl.CERT_REQUIRED or
|
||||
self.assert_fingerprint is not None
|
||||
)
|
||||
|
||||
|
||||
def _match_hostname(cert, asserted_hostname):
|
||||
try:
|
||||
match_hostname(cert, asserted_hostname)
|
||||
except CertificateError as e:
|
||||
log.error(
|
||||
'Certificate did not match expected hostname: %s. '
|
||||
'Certificate: %s', asserted_hostname, cert
|
||||
)
|
||||
# Add cert to exception and reraise so client code can inspect
|
||||
# the cert when catching the exception, if they want to
|
||||
e._peer_cert = cert
|
||||
raise
|
||||
|
||||
|
||||
if ssl:
|
||||
# Make a copy for testing.
|
||||
UnverifiedHTTPSConnection = HTTPSConnection
|
||||
HTTPSConnection = VerifiedHTTPSConnection
|
||||
else:
|
||||
HTTPSConnection = DummyConnection
|
||||
@@ -1,899 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import errno
|
||||
import logging
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
from socket import error as SocketError, timeout as SocketTimeout
|
||||
import socket
|
||||
|
||||
|
||||
from .exceptions import (
|
||||
ClosedPoolError,
|
||||
ProtocolError,
|
||||
EmptyPoolError,
|
||||
HeaderParsingError,
|
||||
HostChangedError,
|
||||
LocationValueError,
|
||||
MaxRetryError,
|
||||
ProxyError,
|
||||
ReadTimeoutError,
|
||||
SSLError,
|
||||
TimeoutError,
|
||||
InsecureRequestWarning,
|
||||
NewConnectionError,
|
||||
)
|
||||
from .packages.ssl_match_hostname import CertificateError
|
||||
from .packages import six
|
||||
from .packages.six.moves import queue
|
||||
from .connection import (
|
||||
port_by_scheme,
|
||||
DummyConnection,
|
||||
HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
|
||||
HTTPException, BaseSSLError,
|
||||
)
|
||||
from .request import RequestMethods
|
||||
from .response import HTTPResponse
|
||||
|
||||
from .util.connection import is_connection_dropped
|
||||
from .util.request import set_file_position
|
||||
from .util.response import assert_header_parsing
|
||||
from .util.retry import Retry
|
||||
from .util.timeout import Timeout
|
||||
from .util.url import get_host, Url
|
||||
|
||||
|
||||
if six.PY2:
|
||||
# Queue is imported for side effects on MS Windows
|
||||
import Queue as _unused_module_Queue # noqa: F401
|
||||
|
||||
xrange = six.moves.xrange
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_Default = object()
|
||||
|
||||
|
||||
# Pool objects
|
||||
class ConnectionPool(object):
|
||||
"""
|
||||
Base class for all connection pools, such as
|
||||
:class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
|
||||
"""
|
||||
|
||||
scheme = None
|
||||
QueueCls = queue.LifoQueue
|
||||
|
||||
def __init__(self, host, port=None):
|
||||
if not host:
|
||||
raise LocationValueError("No host specified.")
|
||||
|
||||
self.host = _ipv6_host(host).lower()
|
||||
self.port = port
|
||||
|
||||
def __str__(self):
|
||||
return '%s(host=%r, port=%r)' % (type(self).__name__,
|
||||
self.host, self.port)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
# Return False to re-raise any potential exceptions
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close all pooled connections and disable the pool.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
|
||||
_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
|
||||
|
||||
|
||||
class HTTPConnectionPool(ConnectionPool, RequestMethods):
|
||||
"""
|
||||
Thread-safe connection pool for one host.
|
||||
|
||||
:param host:
|
||||
Host used for this HTTP Connection (e.g. "localhost"), passed into
|
||||
:class:`httplib.HTTPConnection`.
|
||||
|
||||
:param port:
|
||||
Port used for this HTTP Connection (None is equivalent to 80), passed
|
||||
into :class:`httplib.HTTPConnection`.
|
||||
|
||||
:param strict:
|
||||
Causes BadStatusLine to be raised if the status line can't be parsed
|
||||
as a valid HTTP/1.0 or 1.1 status line, passed into
|
||||
:class:`httplib.HTTPConnection`.
|
||||
|
||||
.. note::
|
||||
Only works in Python 2. This parameter is ignored in Python 3.
|
||||
|
||||
:param timeout:
|
||||
Socket timeout in seconds for each individual connection. This can
|
||||
be a float or integer, which sets the timeout for the HTTP request,
|
||||
or an instance of :class:`urllib3.util.Timeout` which gives you more
|
||||
fine-grained control over request timeouts. After the constructor has
|
||||
been parsed, this is always a `urllib3.util.Timeout` object.
|
||||
|
||||
:param maxsize:
|
||||
Number of connections to save that can be reused. More than 1 is useful
|
||||
in multithreaded situations. If ``block`` is set to False, more
|
||||
connections will be created but they will not be saved once they've
|
||||
been used.
|
||||
|
||||
:param block:
|
||||
If set to True, no more than ``maxsize`` connections will be used at
|
||||
a time. When no free connections are available, the call will block
|
||||
until a connection has been released. This is a useful side effect for
|
||||
particular multithreaded situations where one does not want to use more
|
||||
than maxsize connections per host to prevent flooding.
|
||||
|
||||
:param headers:
|
||||
Headers to include with all requests, unless other headers are given
|
||||
explicitly.
|
||||
|
||||
:param retries:
|
||||
Retry configuration to use by default with requests in this pool.
|
||||
|
||||
:param _proxy:
|
||||
Parsed proxy URL, should not be used directly, instead, see
|
||||
:class:`urllib3.connectionpool.ProxyManager`"
|
||||
|
||||
:param _proxy_headers:
|
||||
A dictionary with proxy headers, should not be used directly,
|
||||
instead, see :class:`urllib3.connectionpool.ProxyManager`"
|
||||
|
||||
:param \\**conn_kw:
|
||||
Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
|
||||
:class:`urllib3.connection.HTTPSConnection` instances.
|
||||
"""
|
||||
|
||||
scheme = 'http'
|
||||
ConnectionCls = HTTPConnection
|
||||
ResponseCls = HTTPResponse
|
||||
|
||||
def __init__(self, host, port=None, strict=False,
|
||||
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
|
||||
headers=None, retries=None,
|
||||
_proxy=None, _proxy_headers=None,
|
||||
**conn_kw):
|
||||
ConnectionPool.__init__(self, host, port)
|
||||
RequestMethods.__init__(self, headers)
|
||||
|
||||
self.strict = strict
|
||||
|
||||
if not isinstance(timeout, Timeout):
|
||||
timeout = Timeout.from_float(timeout)
|
||||
|
||||
if retries is None:
|
||||
retries = Retry.DEFAULT
|
||||
|
||||
self.timeout = timeout
|
||||
self.retries = retries
|
||||
|
||||
self.pool = self.QueueCls(maxsize)
|
||||
self.block = block
|
||||
|
||||
self.proxy = _proxy
|
||||
self.proxy_headers = _proxy_headers or {}
|
||||
|
||||
# Fill the queue up so that doing get() on it will block properly
|
||||
for _ in xrange(maxsize):
|
||||
self.pool.put(None)
|
||||
|
||||
# These are mostly for testing and debugging purposes.
|
||||
self.num_connections = 0
|
||||
self.num_requests = 0
|
||||
self.conn_kw = conn_kw
|
||||
|
||||
if self.proxy:
|
||||
# Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
|
||||
# We cannot know if the user has added default socket options, so we cannot replace the
|
||||
# list.
|
||||
self.conn_kw.setdefault('socket_options', [])
|
||||
|
||||
def _new_conn(self):
|
||||
"""
|
||||
Return a fresh :class:`HTTPConnection`.
|
||||
"""
|
||||
self.num_connections += 1
|
||||
log.debug("Starting new HTTP connection (%d): %s",
|
||||
self.num_connections, self.host)
|
||||
|
||||
conn = self.ConnectionCls(host=self.host, port=self.port,
|
||||
timeout=self.timeout.connect_timeout,
|
||||
strict=self.strict, **self.conn_kw)
|
||||
return conn
|
||||
|
||||
def _get_conn(self, timeout=None):
|
||||
"""
|
||||
Get a connection. Will return a pooled connection if one is available.
|
||||
|
||||
If no connections are available and :prop:`.block` is ``False``, then a
|
||||
fresh connection is returned.
|
||||
|
||||
:param timeout:
|
||||
Seconds to wait before giving up and raising
|
||||
:class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
|
||||
:prop:`.block` is ``True``.
|
||||
"""
|
||||
conn = None
|
||||
try:
|
||||
conn = self.pool.get(block=self.block, timeout=timeout)
|
||||
|
||||
except AttributeError: # self.pool is None
|
||||
raise ClosedPoolError(self, "Pool is closed.")
|
||||
|
||||
except queue.Empty:
|
||||
if self.block:
|
||||
raise EmptyPoolError(self,
|
||||
"Pool reached maximum size and no more "
|
||||
"connections are allowed.")
|
||||
pass # Oh well, we'll create a new connection then
|
||||
|
||||
# If this is a persistent connection, check if it got disconnected
|
||||
if conn and is_connection_dropped(conn):
|
||||
log.debug("Resetting dropped connection: %s", self.host)
|
||||
conn.close()
|
||||
if getattr(conn, 'auto_open', 1) == 0:
|
||||
# This is a proxied connection that has been mutated by
|
||||
# httplib._tunnel() and cannot be reused (since it would
|
||||
# attempt to bypass the proxy)
|
||||
conn = None
|
||||
|
||||
return conn or self._new_conn()
|
||||
|
||||
def _put_conn(self, conn):
|
||||
"""
|
||||
Put a connection back into the pool.
|
||||
|
||||
:param conn:
|
||||
Connection object for the current host and port as returned by
|
||||
:meth:`._new_conn` or :meth:`._get_conn`.
|
||||
|
||||
If the pool is already full, the connection is closed and discarded
|
||||
because we exceeded maxsize. If connections are discarded frequently,
|
||||
then maxsize should be increased.
|
||||
|
||||
If the pool is closed, then the connection will be closed and discarded.
|
||||
"""
|
||||
try:
|
||||
self.pool.put(conn, block=False)
|
||||
return # Everything is dandy, done.
|
||||
except AttributeError:
|
||||
# self.pool is None.
|
||||
pass
|
||||
except queue.Full:
|
||||
# This should never happen if self.block == True
|
||||
log.warning(
|
||||
"Connection pool is full, discarding connection: %s",
|
||||
self.host)
|
||||
|
||||
# Connection never got put back into the pool, close it.
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
def _validate_conn(self, conn):
|
||||
"""
|
||||
Called right before a request is made, after the socket is created.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _prepare_proxy(self, conn):
|
||||
# Nothing to do for HTTP connections.
|
||||
pass
|
||||
|
||||
def _get_timeout(self, timeout):
|
||||
""" Helper that always returns a :class:`urllib3.util.Timeout` """
|
||||
if timeout is _Default:
|
||||
return self.timeout.clone()
|
||||
|
||||
if isinstance(timeout, Timeout):
|
||||
return timeout.clone()
|
||||
else:
|
||||
# User passed us an int/float. This is for backwards compatibility,
|
||||
# can be removed later
|
||||
return Timeout.from_float(timeout)
|
||||
|
||||
def _raise_timeout(self, err, url, timeout_value):
|
||||
"""Is the error actually a timeout? Will raise a ReadTimeout or pass"""
|
||||
|
||||
if isinstance(err, SocketTimeout):
|
||||
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
|
||||
|
||||
# See the above comment about EAGAIN in Python 3. In Python 2 we have
|
||||
# to specifically catch it and throw the timeout error
|
||||
if hasattr(err, 'errno') and err.errno in _blocking_errnos:
|
||||
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
|
||||
|
||||
# Catch possible read timeouts thrown as SSL errors. If not the
|
||||
# case, rethrow the original. We need to do this because of:
|
||||
# http://bugs.python.org/issue10272
|
||||
if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6
|
||||
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
|
||||
|
||||
def _make_request(self, conn, method, url, timeout=_Default, chunked=False,
|
||||
**httplib_request_kw):
|
||||
"""
|
||||
Perform a request on a given urllib connection object taken from our
|
||||
pool.
|
||||
|
||||
:param conn:
|
||||
a connection from one of our connection pools
|
||||
|
||||
:param timeout:
|
||||
Socket timeout in seconds for the request. This can be a
|
||||
float or integer, which will set the same timeout value for
|
||||
the socket connect and the socket read, or an instance of
|
||||
:class:`urllib3.util.Timeout`, which gives you more fine-grained
|
||||
control over your timeouts.
|
||||
"""
|
||||
self.num_requests += 1
|
||||
|
||||
timeout_obj = self._get_timeout(timeout)
|
||||
timeout_obj.start_connect()
|
||||
conn.timeout = timeout_obj.connect_timeout
|
||||
|
||||
# Trigger any extra validation we need to do.
|
||||
try:
|
||||
self._validate_conn(conn)
|
||||
except (SocketTimeout, BaseSSLError) as e:
|
||||
# Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
|
||||
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
|
||||
raise
|
||||
|
||||
# conn.request() calls httplib.*.request, not the method in
|
||||
# urllib3.request. It also calls makefile (recv) on the socket.
|
||||
if chunked:
|
||||
conn.request_chunked(method, url, **httplib_request_kw)
|
||||
else:
|
||||
conn.request(method, url, **httplib_request_kw)
|
||||
|
||||
# Reset the timeout for the recv() on the socket
|
||||
read_timeout = timeout_obj.read_timeout
|
||||
|
||||
# App Engine doesn't have a sock attr
|
||||
if getattr(conn, 'sock', None):
|
||||
# In Python 3 socket.py will catch EAGAIN and return None when you
|
||||
# try and read into the file pointer created by http.client, which
|
||||
# instead raises a BadStatusLine exception. Instead of catching
|
||||
# the exception and assuming all BadStatusLine exceptions are read
|
||||
# timeouts, check for a zero timeout before making the request.
|
||||
if read_timeout == 0:
|
||||
raise ReadTimeoutError(
|
||||
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
|
||||
if read_timeout is Timeout.DEFAULT_TIMEOUT:
|
||||
conn.sock.settimeout(socket.getdefaulttimeout())
|
||||
else: # None or a value
|
||||
conn.sock.settimeout(read_timeout)
|
||||
|
||||
# Receive the response from the server
|
||||
try:
|
||||
try: # Python 2.7, use buffering of HTTP responses
|
||||
httplib_response = conn.getresponse(buffering=True)
|
||||
except TypeError: # Python 2.6 and older, Python 3
|
||||
try:
|
||||
httplib_response = conn.getresponse()
|
||||
except Exception as e:
|
||||
# Remove the TypeError from the exception chain in Python 3;
|
||||
# otherwise it looks like a programming error was the cause.
|
||||
six.raise_from(e, None)
|
||||
except (SocketTimeout, BaseSSLError, SocketError) as e:
|
||||
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
|
||||
raise
|
||||
|
||||
# AppEngine doesn't have a version attr.
|
||||
http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
|
||||
log.debug("%s://%s:%s \"%s %s %s\" %s %s", self.scheme, self.host, self.port,
|
||||
method, url, http_version, httplib_response.status,
|
||||
httplib_response.length)
|
||||
|
||||
try:
|
||||
assert_header_parsing(httplib_response.msg)
|
||||
except HeaderParsingError as hpe: # Platform-specific: Python 3
|
||||
log.warning(
|
||||
'Failed to parse headers (url=%s): %s',
|
||||
self._absolute_url(url), hpe, exc_info=True)
|
||||
|
||||
return httplib_response
|
||||
|
||||
def _absolute_url(self, path):
|
||||
return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close all pooled connections and disable the pool.
|
||||
"""
|
||||
# Disable access to the pool
|
||||
old_pool, self.pool = self.pool, None
|
||||
|
||||
try:
|
||||
while True:
|
||||
conn = old_pool.get(block=False)
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
except queue.Empty:
|
||||
pass # Done.
|
||||
|
||||
def is_same_host(self, url):
|
||||
"""
|
||||
Check if the given ``url`` is a member of the same host as this
|
||||
connection pool.
|
||||
"""
|
||||
if url.startswith('/'):
|
||||
return True
|
||||
|
||||
# TODO: Add optional support for socket.gethostbyname checking.
|
||||
scheme, host, port = get_host(url)
|
||||
|
||||
host = _ipv6_host(host).lower()
|
||||
|
||||
# Use explicit default port for comparison when none is given
|
||||
if self.port and not port:
|
||||
port = port_by_scheme.get(scheme)
|
||||
elif not self.port and port == port_by_scheme.get(scheme):
|
||||
port = None
|
||||
|
||||
return (scheme, host, port) == (self.scheme, self.host, self.port)
|
||||
|
||||
def urlopen(self, method, url, body=None, headers=None, retries=None,
|
||||
redirect=True, assert_same_host=True, timeout=_Default,
|
||||
pool_timeout=None, release_conn=None, chunked=False,
|
||||
body_pos=None, **response_kw):
|
||||
"""
|
||||
Get a connection from the pool and perform an HTTP request. This is the
|
||||
lowest level call for making a request, so you'll need to specify all
|
||||
the raw details.
|
||||
|
||||
.. note::
|
||||
|
||||
More commonly, it's appropriate to use a convenience method provided
|
||||
by :class:`.RequestMethods`, such as :meth:`request`.
|
||||
|
||||
.. note::
|
||||
|
||||
`release_conn` will only behave as expected if
|
||||
`preload_content=False` because we want to make
|
||||
`preload_content=False` the default behaviour someday soon without
|
||||
breaking backwards compatibility.
|
||||
|
||||
:param method:
|
||||
HTTP request method (such as GET, POST, PUT, etc.)
|
||||
|
||||
:param body:
|
||||
Data to send in the request body (useful for creating
|
||||
POST requests, see HTTPConnectionPool.post_url for
|
||||
more convenience).
|
||||
|
||||
:param headers:
|
||||
Dictionary of custom headers to send, such as User-Agent,
|
||||
If-None-Match, etc. If None, pool headers are used. If provided,
|
||||
these headers completely replace any pool-specific headers.
|
||||
|
||||
:param retries:
|
||||
Configure the number of retries to allow before raising a
|
||||
:class:`~urllib3.exceptions.MaxRetryError` exception.
|
||||
|
||||
Pass ``None`` to retry until you receive a response. Pass a
|
||||
:class:`~urllib3.util.retry.Retry` object for fine-grained control
|
||||
over different types of retries.
|
||||
Pass an integer number to retry connection errors that many times,
|
||||
but no other types of errors. Pass zero to never retry.
|
||||
|
||||
If ``False``, then retries are disabled and any exception is raised
|
||||
immediately. Also, instead of raising a MaxRetryError on redirects,
|
||||
the redirect response will be returned.
|
||||
|
||||
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
|
||||
|
||||
:param redirect:
|
||||
If True, automatically handle redirects (status codes 301, 302,
|
||||
303, 307, 308). Each redirect counts as a retry. Disabling retries
|
||||
will disable redirect, too.
|
||||
|
||||
:param assert_same_host:
|
||||
If ``True``, will make sure that the host of the pool requests is
|
||||
consistent else will raise HostChangedError. When False, you can
|
||||
use the pool on an HTTP proxy and request foreign hosts.
|
||||
|
||||
:param timeout:
|
||||
If specified, overrides the default timeout for this one
|
||||
request. It may be a float (in seconds) or an instance of
|
||||
:class:`urllib3.util.Timeout`.
|
||||
|
||||
:param pool_timeout:
|
||||
If set and the pool is set to block=True, then this method will
|
||||
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
|
||||
connection is available within the time period.
|
||||
|
||||
:param release_conn:
|
||||
If False, then the urlopen call will not release the connection
|
||||
back into the pool once a response is received (but will release if
|
||||
you read the entire contents of the response such as when
|
||||
`preload_content=True`). This is useful if you're not preloading
|
||||
the response's content immediately. You will need to call
|
||||
``r.release_conn()`` on the response ``r`` to return the connection
|
||||
back into the pool. If None, it takes the value of
|
||||
``response_kw.get('preload_content', True)``.
|
||||
|
||||
:param chunked:
|
||||
If True, urllib3 will send the body using chunked transfer
|
||||
encoding. Otherwise, urllib3 will send the body using the standard
|
||||
content-length form. Defaults to False.
|
||||
|
||||
:param int body_pos:
|
||||
Position to seek to in file-like body in the event of a retry or
|
||||
redirect. Typically this won't need to be set because urllib3 will
|
||||
auto-populate the value when needed.
|
||||
|
||||
:param \\**response_kw:
|
||||
Additional parameters are passed to
|
||||
:meth:`urllib3.response.HTTPResponse.from_httplib`
|
||||
"""
|
||||
if headers is None:
|
||||
headers = self.headers
|
||||
|
||||
if not isinstance(retries, Retry):
|
||||
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
|
||||
|
||||
if release_conn is None:
|
||||
release_conn = response_kw.get('preload_content', True)
|
||||
|
||||
# Check host
|
||||
if assert_same_host and not self.is_same_host(url):
|
||||
raise HostChangedError(self, url, retries)
|
||||
|
||||
conn = None
|
||||
|
||||
# Track whether `conn` needs to be released before
|
||||
# returning/raising/recursing. Update this variable if necessary, and
|
||||
# leave `release_conn` constant throughout the function. That way, if
|
||||
# the function recurses, the original value of `release_conn` will be
|
||||
# passed down into the recursive call, and its value will be respected.
|
||||
#
|
||||
# See issue #651 [1] for details.
|
||||
#
|
||||
# [1] <https://github.com/shazow/urllib3/issues/651>
|
||||
release_this_conn = release_conn
|
||||
|
||||
# Merge the proxy headers. Only do this in HTTP. We have to copy the
|
||||
# headers dict so we can safely change it without those changes being
|
||||
# reflected in anyone else's copy.
|
||||
if self.scheme == 'http':
|
||||
headers = headers.copy()
|
||||
headers.update(self.proxy_headers)
|
||||
|
||||
# Must keep the exception bound to a separate variable or else Python 3
|
||||
# complains about UnboundLocalError.
|
||||
err = None
|
||||
|
||||
# Keep track of whether we cleanly exited the except block. This
|
||||
# ensures we do proper cleanup in finally.
|
||||
clean_exit = False
|
||||
|
||||
# Rewind body position, if needed. Record current position
|
||||
# for future rewinds in the event of a redirect/retry.
|
||||
body_pos = set_file_position(body, body_pos)
|
||||
|
||||
try:
|
||||
# Request a connection from the queue.
|
||||
timeout_obj = self._get_timeout(timeout)
|
||||
conn = self._get_conn(timeout=pool_timeout)
|
||||
|
||||
conn.timeout = timeout_obj.connect_timeout
|
||||
|
||||
is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None)
|
||||
if is_new_proxy_conn:
|
||||
self._prepare_proxy(conn)
|
||||
|
||||
# Make the request on the httplib connection object.
|
||||
httplib_response = self._make_request(conn, method, url,
|
||||
timeout=timeout_obj,
|
||||
body=body, headers=headers,
|
||||
chunked=chunked)
|
||||
|
||||
# If we're going to release the connection in ``finally:``, then
|
||||
# the response doesn't need to know about the connection. Otherwise
|
||||
# it will also try to release it and we'll have a double-release
|
||||
# mess.
|
||||
response_conn = conn if not release_conn else None
|
||||
|
||||
# Pass method to Response for length checking
|
||||
response_kw['request_method'] = method
|
||||
|
||||
# Import httplib's response into our own wrapper object
|
||||
response = self.ResponseCls.from_httplib(httplib_response,
|
||||
pool=self,
|
||||
connection=response_conn,
|
||||
retries=retries,
|
||||
**response_kw)
|
||||
|
||||
# Everything went great!
|
||||
clean_exit = True
|
||||
|
||||
except queue.Empty:
|
||||
# Timed out by queue.
|
||||
raise EmptyPoolError(self, "No pool connections are available.")
|
||||
|
||||
except (BaseSSLError, CertificateError) as e:
|
||||
# Close the connection. If a connection is reused on which there
|
||||
# was a Certificate error, the next request will certainly raise
|
||||
# another Certificate error.
|
||||
clean_exit = False
|
||||
raise SSLError(e)
|
||||
|
||||
except SSLError:
|
||||
# Treat SSLError separately from BaseSSLError to preserve
|
||||
# traceback.
|
||||
clean_exit = False
|
||||
raise
|
||||
|
||||
except (TimeoutError, HTTPException, SocketError, ProtocolError) as e:
|
||||
# Discard the connection for these exceptions. It will be
|
||||
# be replaced during the next _get_conn() call.
|
||||
clean_exit = False
|
||||
|
||||
if isinstance(e, (SocketError, NewConnectionError)) and self.proxy:
|
||||
e = ProxyError('Cannot connect to proxy.', e)
|
||||
elif isinstance(e, (SocketError, HTTPException)):
|
||||
e = ProtocolError('Connection aborted.', e)
|
||||
|
||||
retries = retries.increment(method, url, error=e, _pool=self,
|
||||
_stacktrace=sys.exc_info()[2])
|
||||
retries.sleep()
|
||||
|
||||
# Keep track of the error for the retry warning.
|
||||
err = e
|
||||
|
||||
finally:
|
||||
if not clean_exit:
|
||||
# We hit some kind of exception, handled or otherwise. We need
|
||||
# to throw the connection away unless explicitly told not to.
|
||||
# Close the connection, set the variable to None, and make sure
|
||||
# we put the None back in the pool to avoid leaking it.
|
||||
conn = conn and conn.close()
|
||||
release_this_conn = True
|
||||
|
||||
if release_this_conn:
|
||||
# Put the connection back to be reused. If the connection is
|
||||
# expired then it will be None, which will get replaced with a
|
||||
# fresh connection during _get_conn.
|
||||
self._put_conn(conn)
|
||||
|
||||
if not conn:
|
||||
# Try again
|
||||
log.warning("Retrying (%r) after connection "
|
||||
"broken by '%r': %s", retries, err, url)
|
||||
return self.urlopen(method, url, body, headers, retries,
|
||||
redirect, assert_same_host,
|
||||
timeout=timeout, pool_timeout=pool_timeout,
|
||||
release_conn=release_conn, body_pos=body_pos,
|
||||
**response_kw)
|
||||
|
||||
# Handle redirect?
|
||||
redirect_location = redirect and response.get_redirect_location()
|
||||
if redirect_location:
|
||||
if response.status == 303:
|
||||
method = 'GET'
|
||||
|
||||
try:
|
||||
retries = retries.increment(method, url, response=response, _pool=self)
|
||||
except MaxRetryError:
|
||||
if retries.raise_on_redirect:
|
||||
# Release the connection for this response, since we're not
|
||||
# returning it to be released manually.
|
||||
response.release_conn()
|
||||
raise
|
||||
return response
|
||||
|
||||
retries.sleep_for_retry(response)
|
||||
log.debug("Redirecting %s -> %s", url, redirect_location)
|
||||
return self.urlopen(
|
||||
method, redirect_location, body, headers,
|
||||
retries=retries, redirect=redirect,
|
||||
assert_same_host=assert_same_host,
|
||||
timeout=timeout, pool_timeout=pool_timeout,
|
||||
release_conn=release_conn, body_pos=body_pos,
|
||||
**response_kw)
|
||||
|
||||
# Check if we should retry the HTTP response.
|
||||
has_retry_after = bool(response.getheader('Retry-After'))
|
||||
if retries.is_retry(method, response.status, has_retry_after):
|
||||
try:
|
||||
retries = retries.increment(method, url, response=response, _pool=self)
|
||||
except MaxRetryError:
|
||||
if retries.raise_on_status:
|
||||
# Release the connection for this response, since we're not
|
||||
# returning it to be released manually.
|
||||
response.release_conn()
|
||||
raise
|
||||
return response
|
||||
retries.sleep(response)
|
||||
log.debug("Retry: %s", url)
|
||||
return self.urlopen(
|
||||
method, url, body, headers,
|
||||
retries=retries, redirect=redirect,
|
||||
assert_same_host=assert_same_host,
|
||||
timeout=timeout, pool_timeout=pool_timeout,
|
||||
release_conn=release_conn,
|
||||
body_pos=body_pos, **response_kw)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class HTTPSConnectionPool(HTTPConnectionPool):
|
||||
"""
|
||||
Same as :class:`.HTTPConnectionPool`, but HTTPS.
|
||||
|
||||
When Python is compiled with the :mod:`ssl` module, then
|
||||
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
|
||||
instead of :class:`.HTTPSConnection`.
|
||||
|
||||
:class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
|
||||
``assert_hostname`` and ``host`` in this order to verify connections.
|
||||
If ``assert_hostname`` is False, no verification is done.
|
||||
|
||||
The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``,
|
||||
``ca_cert_dir``, and ``ssl_version`` are only used if :mod:`ssl` is
|
||||
available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade
|
||||
the connection socket into an SSL socket.
|
||||
"""
|
||||
|
||||
scheme = 'https'
|
||||
ConnectionCls = HTTPSConnection
|
||||
|
||||
def __init__(self, host, port=None,
|
||||
strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1,
|
||||
block=False, headers=None, retries=None,
|
||||
_proxy=None, _proxy_headers=None,
|
||||
key_file=None, cert_file=None, cert_reqs=None,
|
||||
ca_certs=None, ssl_version=None,
|
||||
assert_hostname=None, assert_fingerprint=None,
|
||||
ca_cert_dir=None, **conn_kw):
|
||||
|
||||
HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
|
||||
block, headers, retries, _proxy, _proxy_headers,
|
||||
**conn_kw)
|
||||
|
||||
if ca_certs and cert_reqs is None:
|
||||
cert_reqs = 'CERT_REQUIRED'
|
||||
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.cert_reqs = cert_reqs
|
||||
self.ca_certs = ca_certs
|
||||
self.ca_cert_dir = ca_cert_dir
|
||||
self.ssl_version = ssl_version
|
||||
self.assert_hostname = assert_hostname
|
||||
self.assert_fingerprint = assert_fingerprint
|
||||
|
||||
def _prepare_conn(self, conn):
|
||||
"""
|
||||
Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
|
||||
and establish the tunnel if proxy is used.
|
||||
"""
|
||||
|
||||
if isinstance(conn, VerifiedHTTPSConnection):
|
||||
conn.set_cert(key_file=self.key_file,
|
||||
cert_file=self.cert_file,
|
||||
cert_reqs=self.cert_reqs,
|
||||
ca_certs=self.ca_certs,
|
||||
ca_cert_dir=self.ca_cert_dir,
|
||||
assert_hostname=self.assert_hostname,
|
||||
assert_fingerprint=self.assert_fingerprint)
|
||||
conn.ssl_version = self.ssl_version
|
||||
return conn
|
||||
|
||||
def _prepare_proxy(self, conn):
|
||||
"""
|
||||
Establish tunnel connection early, because otherwise httplib
|
||||
would improperly set Host: header to proxy's IP:port.
|
||||
"""
|
||||
# Python 2.7+
|
||||
try:
|
||||
set_tunnel = conn.set_tunnel
|
||||
except AttributeError: # Platform-specific: Python 2.6
|
||||
set_tunnel = conn._set_tunnel
|
||||
|
||||
if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older
|
||||
set_tunnel(self.host, self.port)
|
||||
else:
|
||||
set_tunnel(self.host, self.port, self.proxy_headers)
|
||||
|
||||
conn.connect()
|
||||
|
||||
def _new_conn(self):
|
||||
"""
|
||||
Return a fresh :class:`httplib.HTTPSConnection`.
|
||||
"""
|
||||
self.num_connections += 1
|
||||
log.debug("Starting new HTTPS connection (%d): %s",
|
||||
self.num_connections, self.host)
|
||||
|
||||
if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
|
||||
raise SSLError("Can't connect to HTTPS URL because the SSL "
|
||||
"module is not available.")
|
||||
|
||||
actual_host = self.host
|
||||
actual_port = self.port
|
||||
if self.proxy is not None:
|
||||
actual_host = self.proxy.host
|
||||
actual_port = self.proxy.port
|
||||
|
||||
conn = self.ConnectionCls(host=actual_host, port=actual_port,
|
||||
timeout=self.timeout.connect_timeout,
|
||||
strict=self.strict, **self.conn_kw)
|
||||
|
||||
return self._prepare_conn(conn)
|
||||
|
||||
def _validate_conn(self, conn):
|
||||
"""
|
||||
Called right before a request is made, after the socket is created.
|
||||
"""
|
||||
super(HTTPSConnectionPool, self)._validate_conn(conn)
|
||||
|
||||
# Force connect early to allow us to validate the connection.
|
||||
if not getattr(conn, 'sock', None): # AppEngine might not have `.sock`
|
||||
conn.connect()
|
||||
|
||||
if not conn.is_verified:
|
||||
warnings.warn((
|
||||
'Unverified HTTPS request is being made. '
|
||||
'Adding certificate verification is strongly advised. See: '
|
||||
'https://urllib3.readthedocs.io/en/latest/advanced-usage.html'
|
||||
'#ssl-warnings'),
|
||||
InsecureRequestWarning)
|
||||
|
||||
|
||||
def connection_from_url(url, **kw):
|
||||
"""
|
||||
Given a url, return an :class:`.ConnectionPool` instance of its host.
|
||||
|
||||
This is a shortcut for not having to parse out the scheme, host, and port
|
||||
of the url before creating an :class:`.ConnectionPool` instance.
|
||||
|
||||
:param url:
|
||||
Absolute URL string that must include the scheme. Port is optional.
|
||||
|
||||
:param \\**kw:
|
||||
Passes additional parameters to the constructor of the appropriate
|
||||
:class:`.ConnectionPool`. Useful for specifying things like
|
||||
timeout, maxsize, headers, etc.
|
||||
|
||||
Example::
|
||||
|
||||
>>> conn = connection_from_url('http://google.com/')
|
||||
>>> r = conn.request('GET', '/')
|
||||
"""
|
||||
scheme, host, port = get_host(url)
|
||||
port = port or port_by_scheme.get(scheme, 80)
|
||||
if scheme == 'https':
|
||||
return HTTPSConnectionPool(host, port=port, **kw)
|
||||
else:
|
||||
return HTTPConnectionPool(host, port=port, **kw)
|
||||
|
||||
|
||||
def _ipv6_host(host):
|
||||
"""
|
||||
Process IPv6 address literals
|
||||
"""
|
||||
|
||||
# httplib doesn't like it when we include brackets in IPv6 addresses
|
||||
# Specifically, if we include brackets but also pass the port then
|
||||
# httplib crazily doubles up the square brackets on the Host header.
|
||||
# Instead, we need to make sure we never pass ``None`` as the port.
|
||||
# However, for backward compatibility reasons we can't actually
|
||||
# *assert* that. See http://bugs.python.org/issue28539
|
||||
#
|
||||
# Also if an IPv6 address literal has a zone identifier, the
|
||||
# percent sign might be URIencoded, convert it back into ASCII
|
||||
if host.startswith('[') and host.endswith(']'):
|
||||
host = host.replace('%25', '%').strip('[]')
|
||||
return host
|
||||
@@ -1,590 +0,0 @@
|
||||
"""
|
||||
This module uses ctypes to bind a whole bunch of functions and constants from
|
||||
SecureTransport. The goal here is to provide the low-level API to
|
||||
SecureTransport. These are essentially the C-level functions and constants, and
|
||||
they're pretty gross to work with.
|
||||
|
||||
This code is a bastardised version of the code found in Will Bond's oscrypto
|
||||
library. An enormous debt is owed to him for blazing this trail for us. For
|
||||
that reason, this code should be considered to be covered both by urllib3's
|
||||
license and by oscrypto's:
|
||||
|
||||
Copyright (c) 2015-2016 Will Bond <will@wbond.net>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import platform
|
||||
from ctypes.util import find_library
|
||||
from ctypes import (
|
||||
c_void_p, c_int32, c_char_p, c_size_t, c_byte, c_uint32, c_ulong, c_long,
|
||||
c_bool
|
||||
)
|
||||
from ctypes import CDLL, POINTER, CFUNCTYPE
|
||||
|
||||
|
||||
security_path = find_library('Security')
|
||||
if not security_path:
|
||||
raise ImportError('The library Security could not be found')
|
||||
|
||||
|
||||
core_foundation_path = find_library('CoreFoundation')
|
||||
if not core_foundation_path:
|
||||
raise ImportError('The library CoreFoundation could not be found')
|
||||
|
||||
|
||||
version = platform.mac_ver()[0]
|
||||
version_info = tuple(map(int, version.split('.')))
|
||||
if version_info < (10, 8):
|
||||
raise OSError(
|
||||
'Only OS X 10.8 and newer are supported, not %s.%s' % (
|
||||
version_info[0], version_info[1]
|
||||
)
|
||||
)
|
||||
|
||||
Security = CDLL(security_path, use_errno=True)
|
||||
CoreFoundation = CDLL(core_foundation_path, use_errno=True)
|
||||
|
||||
Boolean = c_bool
|
||||
CFIndex = c_long
|
||||
CFStringEncoding = c_uint32
|
||||
CFData = c_void_p
|
||||
CFString = c_void_p
|
||||
CFArray = c_void_p
|
||||
CFMutableArray = c_void_p
|
||||
CFDictionary = c_void_p
|
||||
CFError = c_void_p
|
||||
CFType = c_void_p
|
||||
CFTypeID = c_ulong
|
||||
|
||||
CFTypeRef = POINTER(CFType)
|
||||
CFAllocatorRef = c_void_p
|
||||
|
||||
OSStatus = c_int32
|
||||
|
||||
CFDataRef = POINTER(CFData)
|
||||
CFStringRef = POINTER(CFString)
|
||||
CFArrayRef = POINTER(CFArray)
|
||||
CFMutableArrayRef = POINTER(CFMutableArray)
|
||||
CFDictionaryRef = POINTER(CFDictionary)
|
||||
CFArrayCallBacks = c_void_p
|
||||
CFDictionaryKeyCallBacks = c_void_p
|
||||
CFDictionaryValueCallBacks = c_void_p
|
||||
|
||||
SecCertificateRef = POINTER(c_void_p)
|
||||
SecExternalFormat = c_uint32
|
||||
SecExternalItemType = c_uint32
|
||||
SecIdentityRef = POINTER(c_void_p)
|
||||
SecItemImportExportFlags = c_uint32
|
||||
SecItemImportExportKeyParameters = c_void_p
|
||||
SecKeychainRef = POINTER(c_void_p)
|
||||
SSLProtocol = c_uint32
|
||||
SSLCipherSuite = c_uint32
|
||||
SSLContextRef = POINTER(c_void_p)
|
||||
SecTrustRef = POINTER(c_void_p)
|
||||
SSLConnectionRef = c_uint32
|
||||
SecTrustResultType = c_uint32
|
||||
SecTrustOptionFlags = c_uint32
|
||||
SSLProtocolSide = c_uint32
|
||||
SSLConnectionType = c_uint32
|
||||
SSLSessionOption = c_uint32
|
||||
|
||||
|
||||
try:
|
||||
Security.SecItemImport.argtypes = [
|
||||
CFDataRef,
|
||||
CFStringRef,
|
||||
POINTER(SecExternalFormat),
|
||||
POINTER(SecExternalItemType),
|
||||
SecItemImportExportFlags,
|
||||
POINTER(SecItemImportExportKeyParameters),
|
||||
SecKeychainRef,
|
||||
POINTER(CFArrayRef),
|
||||
]
|
||||
Security.SecItemImport.restype = OSStatus
|
||||
|
||||
Security.SecCertificateGetTypeID.argtypes = []
|
||||
Security.SecCertificateGetTypeID.restype = CFTypeID
|
||||
|
||||
Security.SecIdentityGetTypeID.argtypes = []
|
||||
Security.SecIdentityGetTypeID.restype = CFTypeID
|
||||
|
||||
Security.SecKeyGetTypeID.argtypes = []
|
||||
Security.SecKeyGetTypeID.restype = CFTypeID
|
||||
|
||||
Security.SecCertificateCreateWithData.argtypes = [
|
||||
CFAllocatorRef,
|
||||
CFDataRef
|
||||
]
|
||||
Security.SecCertificateCreateWithData.restype = SecCertificateRef
|
||||
|
||||
Security.SecCertificateCopyData.argtypes = [
|
||||
SecCertificateRef
|
||||
]
|
||||
Security.SecCertificateCopyData.restype = CFDataRef
|
||||
|
||||
Security.SecCopyErrorMessageString.argtypes = [
|
||||
OSStatus,
|
||||
c_void_p
|
||||
]
|
||||
Security.SecCopyErrorMessageString.restype = CFStringRef
|
||||
|
||||
Security.SecIdentityCreateWithCertificate.argtypes = [
|
||||
CFTypeRef,
|
||||
SecCertificateRef,
|
||||
POINTER(SecIdentityRef)
|
||||
]
|
||||
Security.SecIdentityCreateWithCertificate.restype = OSStatus
|
||||
|
||||
Security.SecKeychainCreate.argtypes = [
|
||||
c_char_p,
|
||||
c_uint32,
|
||||
c_void_p,
|
||||
Boolean,
|
||||
c_void_p,
|
||||
POINTER(SecKeychainRef)
|
||||
]
|
||||
Security.SecKeychainCreate.restype = OSStatus
|
||||
|
||||
Security.SecKeychainDelete.argtypes = [
|
||||
SecKeychainRef
|
||||
]
|
||||
Security.SecKeychainDelete.restype = OSStatus
|
||||
|
||||
Security.SecPKCS12Import.argtypes = [
|
||||
CFDataRef,
|
||||
CFDictionaryRef,
|
||||
POINTER(CFArrayRef)
|
||||
]
|
||||
Security.SecPKCS12Import.restype = OSStatus
|
||||
|
||||
SSLReadFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, c_void_p, POINTER(c_size_t))
|
||||
SSLWriteFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, POINTER(c_byte), POINTER(c_size_t))
|
||||
|
||||
Security.SSLSetIOFuncs.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLReadFunc,
|
||||
SSLWriteFunc
|
||||
]
|
||||
Security.SSLSetIOFuncs.restype = OSStatus
|
||||
|
||||
Security.SSLSetPeerID.argtypes = [
|
||||
SSLContextRef,
|
||||
c_char_p,
|
||||
c_size_t
|
||||
]
|
||||
Security.SSLSetPeerID.restype = OSStatus
|
||||
|
||||
Security.SSLSetCertificate.argtypes = [
|
||||
SSLContextRef,
|
||||
CFArrayRef
|
||||
]
|
||||
Security.SSLSetCertificate.restype = OSStatus
|
||||
|
||||
Security.SSLSetCertificateAuthorities.argtypes = [
|
||||
SSLContextRef,
|
||||
CFTypeRef,
|
||||
Boolean
|
||||
]
|
||||
Security.SSLSetCertificateAuthorities.restype = OSStatus
|
||||
|
||||
Security.SSLSetConnection.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLConnectionRef
|
||||
]
|
||||
Security.SSLSetConnection.restype = OSStatus
|
||||
|
||||
Security.SSLSetPeerDomainName.argtypes = [
|
||||
SSLContextRef,
|
||||
c_char_p,
|
||||
c_size_t
|
||||
]
|
||||
Security.SSLSetPeerDomainName.restype = OSStatus
|
||||
|
||||
Security.SSLHandshake.argtypes = [
|
||||
SSLContextRef
|
||||
]
|
||||
Security.SSLHandshake.restype = OSStatus
|
||||
|
||||
Security.SSLRead.argtypes = [
|
||||
SSLContextRef,
|
||||
c_char_p,
|
||||
c_size_t,
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLRead.restype = OSStatus
|
||||
|
||||
Security.SSLWrite.argtypes = [
|
||||
SSLContextRef,
|
||||
c_char_p,
|
||||
c_size_t,
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLWrite.restype = OSStatus
|
||||
|
||||
Security.SSLClose.argtypes = [
|
||||
SSLContextRef
|
||||
]
|
||||
Security.SSLClose.restype = OSStatus
|
||||
|
||||
Security.SSLGetNumberSupportedCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLGetNumberSupportedCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetSupportedCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite),
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLGetSupportedCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLSetEnabledCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite),
|
||||
c_size_t
|
||||
]
|
||||
Security.SSLSetEnabledCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetNumberEnabledCiphers.argtype = [
|
||||
SSLContextRef,
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLGetNumberEnabledCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetEnabledCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite),
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLGetEnabledCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetNegotiatedCipher.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite)
|
||||
]
|
||||
Security.SSLGetNegotiatedCipher.restype = OSStatus
|
||||
|
||||
Security.SSLGetNegotiatedProtocolVersion.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLProtocol)
|
||||
]
|
||||
Security.SSLGetNegotiatedProtocolVersion.restype = OSStatus
|
||||
|
||||
Security.SSLCopyPeerTrust.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SecTrustRef)
|
||||
]
|
||||
Security.SSLCopyPeerTrust.restype = OSStatus
|
||||
|
||||
Security.SecTrustSetAnchorCertificates.argtypes = [
|
||||
SecTrustRef,
|
||||
CFArrayRef
|
||||
]
|
||||
Security.SecTrustSetAnchorCertificates.restype = OSStatus
|
||||
|
||||
Security.SecTrustSetAnchorCertificatesOnly.argstypes = [
|
||||
SecTrustRef,
|
||||
Boolean
|
||||
]
|
||||
Security.SecTrustSetAnchorCertificatesOnly.restype = OSStatus
|
||||
|
||||
Security.SecTrustEvaluate.argtypes = [
|
||||
SecTrustRef,
|
||||
POINTER(SecTrustResultType)
|
||||
]
|
||||
Security.SecTrustEvaluate.restype = OSStatus
|
||||
|
||||
Security.SecTrustGetCertificateCount.argtypes = [
|
||||
SecTrustRef
|
||||
]
|
||||
Security.SecTrustGetCertificateCount.restype = CFIndex
|
||||
|
||||
Security.SecTrustGetCertificateAtIndex.argtypes = [
|
||||
SecTrustRef,
|
||||
CFIndex
|
||||
]
|
||||
Security.SecTrustGetCertificateAtIndex.restype = SecCertificateRef
|
||||
|
||||
Security.SSLCreateContext.argtypes = [
|
||||
CFAllocatorRef,
|
||||
SSLProtocolSide,
|
||||
SSLConnectionType
|
||||
]
|
||||
Security.SSLCreateContext.restype = SSLContextRef
|
||||
|
||||
Security.SSLSetSessionOption.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLSessionOption,
|
||||
Boolean
|
||||
]
|
||||
Security.SSLSetSessionOption.restype = OSStatus
|
||||
|
||||
Security.SSLSetProtocolVersionMin.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLProtocol
|
||||
]
|
||||
Security.SSLSetProtocolVersionMin.restype = OSStatus
|
||||
|
||||
Security.SSLSetProtocolVersionMax.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLProtocol
|
||||
]
|
||||
Security.SSLSetProtocolVersionMax.restype = OSStatus
|
||||
|
||||
Security.SecCopyErrorMessageString.argtypes = [
|
||||
OSStatus,
|
||||
c_void_p
|
||||
]
|
||||
Security.SecCopyErrorMessageString.restype = CFStringRef
|
||||
|
||||
Security.SSLReadFunc = SSLReadFunc
|
||||
Security.SSLWriteFunc = SSLWriteFunc
|
||||
Security.SSLContextRef = SSLContextRef
|
||||
Security.SSLProtocol = SSLProtocol
|
||||
Security.SSLCipherSuite = SSLCipherSuite
|
||||
Security.SecIdentityRef = SecIdentityRef
|
||||
Security.SecKeychainRef = SecKeychainRef
|
||||
Security.SecTrustRef = SecTrustRef
|
||||
Security.SecTrustResultType = SecTrustResultType
|
||||
Security.SecExternalFormat = SecExternalFormat
|
||||
Security.OSStatus = OSStatus
|
||||
|
||||
Security.kSecImportExportPassphrase = CFStringRef.in_dll(
|
||||
Security, 'kSecImportExportPassphrase'
|
||||
)
|
||||
Security.kSecImportItemIdentity = CFStringRef.in_dll(
|
||||
Security, 'kSecImportItemIdentity'
|
||||
)
|
||||
|
||||
# CoreFoundation time!
|
||||
CoreFoundation.CFRetain.argtypes = [
|
||||
CFTypeRef
|
||||
]
|
||||
CoreFoundation.CFRetain.restype = CFTypeRef
|
||||
|
||||
CoreFoundation.CFRelease.argtypes = [
|
||||
CFTypeRef
|
||||
]
|
||||
CoreFoundation.CFRelease.restype = None
|
||||
|
||||
CoreFoundation.CFGetTypeID.argtypes = [
|
||||
CFTypeRef
|
||||
]
|
||||
CoreFoundation.CFGetTypeID.restype = CFTypeID
|
||||
|
||||
CoreFoundation.CFStringCreateWithCString.argtypes = [
|
||||
CFAllocatorRef,
|
||||
c_char_p,
|
||||
CFStringEncoding
|
||||
]
|
||||
CoreFoundation.CFStringCreateWithCString.restype = CFStringRef
|
||||
|
||||
CoreFoundation.CFStringGetCStringPtr.argtypes = [
|
||||
CFStringRef,
|
||||
CFStringEncoding
|
||||
]
|
||||
CoreFoundation.CFStringGetCStringPtr.restype = c_char_p
|
||||
|
||||
CoreFoundation.CFStringGetCString.argtypes = [
|
||||
CFStringRef,
|
||||
c_char_p,
|
||||
CFIndex,
|
||||
CFStringEncoding
|
||||
]
|
||||
CoreFoundation.CFStringGetCString.restype = c_bool
|
||||
|
||||
CoreFoundation.CFDataCreate.argtypes = [
|
||||
CFAllocatorRef,
|
||||
c_char_p,
|
||||
CFIndex
|
||||
]
|
||||
CoreFoundation.CFDataCreate.restype = CFDataRef
|
||||
|
||||
CoreFoundation.CFDataGetLength.argtypes = [
|
||||
CFDataRef
|
||||
]
|
||||
CoreFoundation.CFDataGetLength.restype = CFIndex
|
||||
|
||||
CoreFoundation.CFDataGetBytePtr.argtypes = [
|
||||
CFDataRef
|
||||
]
|
||||
CoreFoundation.CFDataGetBytePtr.restype = c_void_p
|
||||
|
||||
CoreFoundation.CFDictionaryCreate.argtypes = [
|
||||
CFAllocatorRef,
|
||||
POINTER(CFTypeRef),
|
||||
POINTER(CFTypeRef),
|
||||
CFIndex,
|
||||
CFDictionaryKeyCallBacks,
|
||||
CFDictionaryValueCallBacks
|
||||
]
|
||||
CoreFoundation.CFDictionaryCreate.restype = CFDictionaryRef
|
||||
|
||||
CoreFoundation.CFDictionaryGetValue.argtypes = [
|
||||
CFDictionaryRef,
|
||||
CFTypeRef
|
||||
]
|
||||
CoreFoundation.CFDictionaryGetValue.restype = CFTypeRef
|
||||
|
||||
CoreFoundation.CFArrayCreate.argtypes = [
|
||||
CFAllocatorRef,
|
||||
POINTER(CFTypeRef),
|
||||
CFIndex,
|
||||
CFArrayCallBacks,
|
||||
]
|
||||
CoreFoundation.CFArrayCreate.restype = CFArrayRef
|
||||
|
||||
CoreFoundation.CFArrayCreateMutable.argtypes = [
|
||||
CFAllocatorRef,
|
||||
CFIndex,
|
||||
CFArrayCallBacks
|
||||
]
|
||||
CoreFoundation.CFArrayCreateMutable.restype = CFMutableArrayRef
|
||||
|
||||
CoreFoundation.CFArrayAppendValue.argtypes = [
|
||||
CFMutableArrayRef,
|
||||
c_void_p
|
||||
]
|
||||
CoreFoundation.CFArrayAppendValue.restype = None
|
||||
|
||||
CoreFoundation.CFArrayGetCount.argtypes = [
|
||||
CFArrayRef
|
||||
]
|
||||
CoreFoundation.CFArrayGetCount.restype = CFIndex
|
||||
|
||||
CoreFoundation.CFArrayGetValueAtIndex.argtypes = [
|
||||
CFArrayRef,
|
||||
CFIndex
|
||||
]
|
||||
CoreFoundation.CFArrayGetValueAtIndex.restype = c_void_p
|
||||
|
||||
CoreFoundation.kCFAllocatorDefault = CFAllocatorRef.in_dll(
|
||||
CoreFoundation, 'kCFAllocatorDefault'
|
||||
)
|
||||
CoreFoundation.kCFTypeArrayCallBacks = c_void_p.in_dll(CoreFoundation, 'kCFTypeArrayCallBacks')
|
||||
CoreFoundation.kCFTypeDictionaryKeyCallBacks = c_void_p.in_dll(
|
||||
CoreFoundation, 'kCFTypeDictionaryKeyCallBacks'
|
||||
)
|
||||
CoreFoundation.kCFTypeDictionaryValueCallBacks = c_void_p.in_dll(
|
||||
CoreFoundation, 'kCFTypeDictionaryValueCallBacks'
|
||||
)
|
||||
|
||||
CoreFoundation.CFTypeRef = CFTypeRef
|
||||
CoreFoundation.CFArrayRef = CFArrayRef
|
||||
CoreFoundation.CFStringRef = CFStringRef
|
||||
CoreFoundation.CFDictionaryRef = CFDictionaryRef
|
||||
|
||||
except (AttributeError):
|
||||
raise ImportError('Error initializing ctypes')
|
||||
|
||||
|
||||
class CFConst(object):
|
||||
"""
|
||||
A class object that acts as essentially a namespace for CoreFoundation
|
||||
constants.
|
||||
"""
|
||||
kCFStringEncodingUTF8 = CFStringEncoding(0x08000100)
|
||||
|
||||
|
||||
class SecurityConst(object):
|
||||
"""
|
||||
A class object that acts as essentially a namespace for Security constants.
|
||||
"""
|
||||
kSSLSessionOptionBreakOnServerAuth = 0
|
||||
|
||||
kSSLProtocol2 = 1
|
||||
kSSLProtocol3 = 2
|
||||
kTLSProtocol1 = 4
|
||||
kTLSProtocol11 = 7
|
||||
kTLSProtocol12 = 8
|
||||
|
||||
kSSLClientSide = 1
|
||||
kSSLStreamType = 0
|
||||
|
||||
kSecFormatPEMSequence = 10
|
||||
|
||||
kSecTrustResultInvalid = 0
|
||||
kSecTrustResultProceed = 1
|
||||
# This gap is present on purpose: this was kSecTrustResultConfirm, which
|
||||
# is deprecated.
|
||||
kSecTrustResultDeny = 3
|
||||
kSecTrustResultUnspecified = 4
|
||||
kSecTrustResultRecoverableTrustFailure = 5
|
||||
kSecTrustResultFatalTrustFailure = 6
|
||||
kSecTrustResultOtherError = 7
|
||||
|
||||
errSSLProtocol = -9800
|
||||
errSSLWouldBlock = -9803
|
||||
errSSLClosedGraceful = -9805
|
||||
errSSLClosedNoNotify = -9816
|
||||
errSSLClosedAbort = -9806
|
||||
|
||||
errSSLXCertChainInvalid = -9807
|
||||
errSSLCrypto = -9809
|
||||
errSSLInternal = -9810
|
||||
errSSLCertExpired = -9814
|
||||
errSSLCertNotYetValid = -9815
|
||||
errSSLUnknownRootCert = -9812
|
||||
errSSLNoRootCert = -9813
|
||||
errSSLHostNameMismatch = -9843
|
||||
errSSLPeerHandshakeFail = -9824
|
||||
errSSLPeerUserCancelled = -9839
|
||||
errSSLWeakPeerEphemeralDHKey = -9850
|
||||
errSSLServerAuthCompleted = -9841
|
||||
errSSLRecordOverflow = -9847
|
||||
|
||||
errSecVerifyFailed = -67808
|
||||
errSecNoTrustSettings = -25263
|
||||
errSecItemNotFound = -25300
|
||||
errSecInvalidTrustSettings = -25262
|
||||
|
||||
# Cipher suites. We only pick the ones our default cipher string allows.
|
||||
TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C
|
||||
TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030
|
||||
TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B
|
||||
TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F
|
||||
TLS_DHE_DSS_WITH_AES_256_GCM_SHA384 = 0x00A3
|
||||
TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F
|
||||
TLS_DHE_DSS_WITH_AES_128_GCM_SHA256 = 0x00A2
|
||||
TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E
|
||||
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024
|
||||
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028
|
||||
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A
|
||||
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014
|
||||
TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B
|
||||
TLS_DHE_DSS_WITH_AES_256_CBC_SHA256 = 0x006A
|
||||
TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039
|
||||
TLS_DHE_DSS_WITH_AES_256_CBC_SHA = 0x0038
|
||||
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023
|
||||
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027
|
||||
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009
|
||||
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013
|
||||
TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067
|
||||
TLS_DHE_DSS_WITH_AES_128_CBC_SHA256 = 0x0040
|
||||
TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033
|
||||
TLS_DHE_DSS_WITH_AES_128_CBC_SHA = 0x0032
|
||||
TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D
|
||||
TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C
|
||||
TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D
|
||||
TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C
|
||||
TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035
|
||||
TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F
|
||||
@@ -1,343 +0,0 @@
|
||||
"""
|
||||
Low-level helpers for the SecureTransport bindings.
|
||||
|
||||
These are Python functions that are not directly related to the high-level APIs
|
||||
but are necessary to get them to work. They include a whole bunch of low-level
|
||||
CoreFoundation messing about and memory management. The concerns in this module
|
||||
are almost entirely about trying to avoid memory leaks and providing
|
||||
appropriate and useful assistance to the higher-level code.
|
||||
"""
|
||||
import base64
|
||||
import ctypes
|
||||
import itertools
|
||||
import re
|
||||
import os
|
||||
import ssl
|
||||
import tempfile
|
||||
|
||||
from .bindings import Security, CoreFoundation, CFConst
|
||||
|
||||
|
||||
# This regular expression is used to grab PEM data out of a PEM bundle.
|
||||
_PEM_CERTS_RE = re.compile(
|
||||
b"-----BEGIN CERTIFICATE-----\n(.*?)\n-----END CERTIFICATE-----", re.DOTALL
|
||||
)
|
||||
|
||||
|
||||
def _cf_data_from_bytes(bytestring):
|
||||
"""
|
||||
Given a bytestring, create a CFData object from it. This CFData object must
|
||||
be CFReleased by the caller.
|
||||
"""
|
||||
return CoreFoundation.CFDataCreate(
|
||||
CoreFoundation.kCFAllocatorDefault, bytestring, len(bytestring)
|
||||
)
|
||||
|
||||
|
||||
def _cf_dictionary_from_tuples(tuples):
|
||||
"""
|
||||
Given a list of Python tuples, create an associated CFDictionary.
|
||||
"""
|
||||
dictionary_size = len(tuples)
|
||||
|
||||
# We need to get the dictionary keys and values out in the same order.
|
||||
keys = (t[0] for t in tuples)
|
||||
values = (t[1] for t in tuples)
|
||||
cf_keys = (CoreFoundation.CFTypeRef * dictionary_size)(*keys)
|
||||
cf_values = (CoreFoundation.CFTypeRef * dictionary_size)(*values)
|
||||
|
||||
return CoreFoundation.CFDictionaryCreate(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
cf_keys,
|
||||
cf_values,
|
||||
dictionary_size,
|
||||
CoreFoundation.kCFTypeDictionaryKeyCallBacks,
|
||||
CoreFoundation.kCFTypeDictionaryValueCallBacks,
|
||||
)
|
||||
|
||||
|
||||
def _cf_string_to_unicode(value):
|
||||
"""
|
||||
Creates a Unicode string from a CFString object. Used entirely for error
|
||||
reporting.
|
||||
|
||||
Yes, it annoys me quite a lot that this function is this complex.
|
||||
"""
|
||||
value_as_void_p = ctypes.cast(value, ctypes.POINTER(ctypes.c_void_p))
|
||||
|
||||
string = CoreFoundation.CFStringGetCStringPtr(
|
||||
value_as_void_p,
|
||||
CFConst.kCFStringEncodingUTF8
|
||||
)
|
||||
if string is None:
|
||||
buffer = ctypes.create_string_buffer(1024)
|
||||
result = CoreFoundation.CFStringGetCString(
|
||||
value_as_void_p,
|
||||
buffer,
|
||||
1024,
|
||||
CFConst.kCFStringEncodingUTF8
|
||||
)
|
||||
if not result:
|
||||
raise OSError('Error copying C string from CFStringRef')
|
||||
string = buffer.value
|
||||
if string is not None:
|
||||
string = string.decode('utf-8')
|
||||
return string
|
||||
|
||||
|
||||
def _assert_no_error(error, exception_class=None):
|
||||
"""
|
||||
Checks the return code and throws an exception if there is an error to
|
||||
report
|
||||
"""
|
||||
if error == 0:
|
||||
return
|
||||
|
||||
cf_error_string = Security.SecCopyErrorMessageString(error, None)
|
||||
output = _cf_string_to_unicode(cf_error_string)
|
||||
CoreFoundation.CFRelease(cf_error_string)
|
||||
|
||||
if output is None or output == u'':
|
||||
output = u'OSStatus %s' % error
|
||||
|
||||
if exception_class is None:
|
||||
exception_class = ssl.SSLError
|
||||
|
||||
raise exception_class(output)
|
||||
|
||||
|
||||
def _cert_array_from_pem(pem_bundle):
|
||||
"""
|
||||
Given a bundle of certs in PEM format, turns them into a CFArray of certs
|
||||
that can be used to validate a cert chain.
|
||||
"""
|
||||
der_certs = [
|
||||
base64.b64decode(match.group(1))
|
||||
for match in _PEM_CERTS_RE.finditer(pem_bundle)
|
||||
]
|
||||
if not der_certs:
|
||||
raise ssl.SSLError("No root certificates specified")
|
||||
|
||||
cert_array = CoreFoundation.CFArrayCreateMutable(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
0,
|
||||
ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks)
|
||||
)
|
||||
if not cert_array:
|
||||
raise ssl.SSLError("Unable to allocate memory!")
|
||||
|
||||
try:
|
||||
for der_bytes in der_certs:
|
||||
certdata = _cf_data_from_bytes(der_bytes)
|
||||
if not certdata:
|
||||
raise ssl.SSLError("Unable to allocate memory!")
|
||||
cert = Security.SecCertificateCreateWithData(
|
||||
CoreFoundation.kCFAllocatorDefault, certdata
|
||||
)
|
||||
CoreFoundation.CFRelease(certdata)
|
||||
if not cert:
|
||||
raise ssl.SSLError("Unable to build cert object!")
|
||||
|
||||
CoreFoundation.CFArrayAppendValue(cert_array, cert)
|
||||
CoreFoundation.CFRelease(cert)
|
||||
except Exception:
|
||||
# We need to free the array before the exception bubbles further.
|
||||
# We only want to do that if an error occurs: otherwise, the caller
|
||||
# should free.
|
||||
CoreFoundation.CFRelease(cert_array)
|
||||
|
||||
return cert_array
|
||||
|
||||
|
||||
def _is_cert(item):
|
||||
"""
|
||||
Returns True if a given CFTypeRef is a certificate.
|
||||
"""
|
||||
expected = Security.SecCertificateGetTypeID()
|
||||
return CoreFoundation.CFGetTypeID(item) == expected
|
||||
|
||||
|
||||
def _is_identity(item):
|
||||
"""
|
||||
Returns True if a given CFTypeRef is an identity.
|
||||
"""
|
||||
expected = Security.SecIdentityGetTypeID()
|
||||
return CoreFoundation.CFGetTypeID(item) == expected
|
||||
|
||||
|
||||
def _temporary_keychain():
|
||||
"""
|
||||
This function creates a temporary Mac keychain that we can use to work with
|
||||
credentials. This keychain uses a one-time password and a temporary file to
|
||||
store the data. We expect to have one keychain per socket. The returned
|
||||
SecKeychainRef must be freed by the caller, including calling
|
||||
SecKeychainDelete.
|
||||
|
||||
Returns a tuple of the SecKeychainRef and the path to the temporary
|
||||
directory that contains it.
|
||||
"""
|
||||
# Unfortunately, SecKeychainCreate requires a path to a keychain. This
|
||||
# means we cannot use mkstemp to use a generic temporary file. Instead,
|
||||
# we're going to create a temporary directory and a filename to use there.
|
||||
# This filename will be 8 random bytes expanded into base64. We also need
|
||||
# some random bytes to password-protect the keychain we're creating, so we
|
||||
# ask for 40 random bytes.
|
||||
random_bytes = os.urandom(40)
|
||||
filename = base64.b64encode(random_bytes[:8]).decode('utf-8')
|
||||
password = base64.b64encode(random_bytes[8:]) # Must be valid UTF-8
|
||||
tempdirectory = tempfile.mkdtemp()
|
||||
|
||||
keychain_path = os.path.join(tempdirectory, filename).encode('utf-8')
|
||||
|
||||
# We now want to create the keychain itself.
|
||||
keychain = Security.SecKeychainRef()
|
||||
status = Security.SecKeychainCreate(
|
||||
keychain_path,
|
||||
len(password),
|
||||
password,
|
||||
False,
|
||||
None,
|
||||
ctypes.byref(keychain)
|
||||
)
|
||||
_assert_no_error(status)
|
||||
|
||||
# Having created the keychain, we want to pass it off to the caller.
|
||||
return keychain, tempdirectory
|
||||
|
||||
|
||||
def _load_items_from_file(keychain, path):
|
||||
"""
|
||||
Given a single file, loads all the trust objects from it into arrays and
|
||||
the keychain.
|
||||
Returns a tuple of lists: the first list is a list of identities, the
|
||||
second a list of certs.
|
||||
"""
|
||||
certificates = []
|
||||
identities = []
|
||||
result_array = None
|
||||
|
||||
with open(path, 'rb') as f:
|
||||
raw_filedata = f.read()
|
||||
|
||||
try:
|
||||
filedata = CoreFoundation.CFDataCreate(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
raw_filedata,
|
||||
len(raw_filedata)
|
||||
)
|
||||
result_array = CoreFoundation.CFArrayRef()
|
||||
result = Security.SecItemImport(
|
||||
filedata, # cert data
|
||||
None, # Filename, leaving it out for now
|
||||
None, # What the type of the file is, we don't care
|
||||
None, # what's in the file, we don't care
|
||||
0, # import flags
|
||||
None, # key params, can include passphrase in the future
|
||||
keychain, # The keychain to insert into
|
||||
ctypes.byref(result_array) # Results
|
||||
)
|
||||
_assert_no_error(result)
|
||||
|
||||
# A CFArray is not very useful to us as an intermediary
|
||||
# representation, so we are going to extract the objects we want
|
||||
# and then free the array. We don't need to keep hold of keys: the
|
||||
# keychain already has them!
|
||||
result_count = CoreFoundation.CFArrayGetCount(result_array)
|
||||
for index in range(result_count):
|
||||
item = CoreFoundation.CFArrayGetValueAtIndex(
|
||||
result_array, index
|
||||
)
|
||||
item = ctypes.cast(item, CoreFoundation.CFTypeRef)
|
||||
|
||||
if _is_cert(item):
|
||||
CoreFoundation.CFRetain(item)
|
||||
certificates.append(item)
|
||||
elif _is_identity(item):
|
||||
CoreFoundation.CFRetain(item)
|
||||
identities.append(item)
|
||||
finally:
|
||||
if result_array:
|
||||
CoreFoundation.CFRelease(result_array)
|
||||
|
||||
CoreFoundation.CFRelease(filedata)
|
||||
|
||||
return (identities, certificates)
|
||||
|
||||
|
||||
def _load_client_cert_chain(keychain, *paths):
|
||||
"""
|
||||
Load certificates and maybe keys from a number of files. Has the end goal
|
||||
of returning a CFArray containing one SecIdentityRef, and then zero or more
|
||||
SecCertificateRef objects, suitable for use as a client certificate trust
|
||||
chain.
|
||||
"""
|
||||
# Ok, the strategy.
|
||||
#
|
||||
# This relies on knowing that macOS will not give you a SecIdentityRef
|
||||
# unless you have imported a key into a keychain. This is a somewhat
|
||||
# artificial limitation of macOS (for example, it doesn't necessarily
|
||||
# affect iOS), but there is nothing inside Security.framework that lets you
|
||||
# get a SecIdentityRef without having a key in a keychain.
|
||||
#
|
||||
# So the policy here is we take all the files and iterate them in order.
|
||||
# Each one will use SecItemImport to have one or more objects loaded from
|
||||
# it. We will also point at a keychain that macOS can use to work with the
|
||||
# private key.
|
||||
#
|
||||
# Once we have all the objects, we'll check what we actually have. If we
|
||||
# already have a SecIdentityRef in hand, fab: we'll use that. Otherwise,
|
||||
# we'll take the first certificate (which we assume to be our leaf) and
|
||||
# ask the keychain to give us a SecIdentityRef with that cert's associated
|
||||
# key.
|
||||
#
|
||||
# We'll then return a CFArray containing the trust chain: one
|
||||
# SecIdentityRef and then zero-or-more SecCertificateRef objects. The
|
||||
# responsibility for freeing this CFArray will be with the caller. This
|
||||
# CFArray must remain alive for the entire connection, so in practice it
|
||||
# will be stored with a single SSLSocket, along with the reference to the
|
||||
# keychain.
|
||||
certificates = []
|
||||
identities = []
|
||||
|
||||
# Filter out bad paths.
|
||||
paths = (path for path in paths if path)
|
||||
|
||||
try:
|
||||
for file_path in paths:
|
||||
new_identities, new_certs = _load_items_from_file(
|
||||
keychain, file_path
|
||||
)
|
||||
identities.extend(new_identities)
|
||||
certificates.extend(new_certs)
|
||||
|
||||
# Ok, we have everything. The question is: do we have an identity? If
|
||||
# not, we want to grab one from the first cert we have.
|
||||
if not identities:
|
||||
new_identity = Security.SecIdentityRef()
|
||||
status = Security.SecIdentityCreateWithCertificate(
|
||||
keychain,
|
||||
certificates[0],
|
||||
ctypes.byref(new_identity)
|
||||
)
|
||||
_assert_no_error(status)
|
||||
identities.append(new_identity)
|
||||
|
||||
# We now want to release the original certificate, as we no longer
|
||||
# need it.
|
||||
CoreFoundation.CFRelease(certificates.pop(0))
|
||||
|
||||
# We now need to build a new CFArray that holds the trust chain.
|
||||
trust_chain = CoreFoundation.CFArrayCreateMutable(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
0,
|
||||
ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
|
||||
)
|
||||
for item in itertools.chain(identities, certificates):
|
||||
# ArrayAppendValue does a CFRetain on the item. That's fine,
|
||||
# because the finally block will release our other refs to them.
|
||||
CoreFoundation.CFArrayAppendValue(trust_chain, item)
|
||||
|
||||
return trust_chain
|
||||
finally:
|
||||
for obj in itertools.chain(identities, certificates):
|
||||
CoreFoundation.CFRelease(obj)
|
||||
@@ -1,296 +0,0 @@
|
||||
"""
|
||||
This module provides a pool manager that uses Google App Engine's
|
||||
`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
|
||||
|
||||
Example usage::
|
||||
|
||||
from urllib3 import PoolManager
|
||||
from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
|
||||
|
||||
if is_appengine_sandbox():
|
||||
# AppEngineManager uses AppEngine's URLFetch API behind the scenes
|
||||
http = AppEngineManager()
|
||||
else:
|
||||
# PoolManager uses a socket-level API behind the scenes
|
||||
http = PoolManager()
|
||||
|
||||
r = http.request('GET', 'https://google.com/')
|
||||
|
||||
There are `limitations <https://cloud.google.com/appengine/docs/python/\
|
||||
urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
|
||||
the best choice for your application. There are three options for using
|
||||
urllib3 on Google App Engine:
|
||||
|
||||
1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
|
||||
cost-effective in many circumstances as long as your usage is within the
|
||||
limitations.
|
||||
2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
|
||||
Sockets also have `limitations and restrictions
|
||||
<https://cloud.google.com/appengine/docs/python/sockets/\
|
||||
#limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
|
||||
To use sockets, be sure to specify the following in your ``app.yaml``::
|
||||
|
||||
env_variables:
|
||||
GAE_USE_SOCKETS_HTTPLIB : 'true'
|
||||
|
||||
3. If you are using `App Engine Flexible
|
||||
<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
|
||||
:class:`PoolManager` without any configuration or special environment variables.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import os
|
||||
import warnings
|
||||
from ..packages.six.moves.urllib.parse import urljoin
|
||||
|
||||
from ..exceptions import (
|
||||
HTTPError,
|
||||
HTTPWarning,
|
||||
MaxRetryError,
|
||||
ProtocolError,
|
||||
TimeoutError,
|
||||
SSLError
|
||||
)
|
||||
|
||||
from ..packages.six import BytesIO
|
||||
from ..request import RequestMethods
|
||||
from ..response import HTTPResponse
|
||||
from ..util.timeout import Timeout
|
||||
from ..util.retry import Retry
|
||||
|
||||
try:
|
||||
from google.appengine.api import urlfetch
|
||||
except ImportError:
|
||||
urlfetch = None
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AppEnginePlatformWarning(HTTPWarning):
|
||||
pass
|
||||
|
||||
|
||||
class AppEnginePlatformError(HTTPError):
|
||||
pass
|
||||
|
||||
|
||||
class AppEngineManager(RequestMethods):
|
||||
"""
|
||||
Connection manager for Google App Engine sandbox applications.
|
||||
|
||||
This manager uses the URLFetch service directly instead of using the
|
||||
emulated httplib, and is subject to URLFetch limitations as described in
|
||||
the App Engine documentation `here
|
||||
<https://cloud.google.com/appengine/docs/python/urlfetch>`_.
|
||||
|
||||
Notably it will raise an :class:`AppEnginePlatformError` if:
|
||||
* URLFetch is not available.
|
||||
* If you attempt to use this on App Engine Flexible, as full socket
|
||||
support is available.
|
||||
* If a request size is more than 10 megabytes.
|
||||
* If a response size is more than 32 megabtyes.
|
||||
* If you use an unsupported request method such as OPTIONS.
|
||||
|
||||
Beyond those cases, it will raise normal urllib3 errors.
|
||||
"""
|
||||
|
||||
def __init__(self, headers=None, retries=None, validate_certificate=True,
|
||||
urlfetch_retries=True):
|
||||
if not urlfetch:
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch is not available in this environment.")
|
||||
|
||||
if is_prod_appengine_mvms():
|
||||
raise AppEnginePlatformError(
|
||||
"Use normal urllib3.PoolManager instead of AppEngineManager"
|
||||
"on Managed VMs, as using URLFetch is not necessary in "
|
||||
"this environment.")
|
||||
|
||||
warnings.warn(
|
||||
"urllib3 is using URLFetch on Google App Engine sandbox instead "
|
||||
"of sockets. To use sockets directly instead of URLFetch see "
|
||||
"https://urllib3.readthedocs.io/en/latest/reference/urllib3.contrib.html.",
|
||||
AppEnginePlatformWarning)
|
||||
|
||||
RequestMethods.__init__(self, headers)
|
||||
self.validate_certificate = validate_certificate
|
||||
self.urlfetch_retries = urlfetch_retries
|
||||
|
||||
self.retries = retries or Retry.DEFAULT
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
# Return False to re-raise any potential exceptions
|
||||
return False
|
||||
|
||||
def urlopen(self, method, url, body=None, headers=None,
|
||||
retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT,
|
||||
**response_kw):
|
||||
|
||||
retries = self._get_retries(retries, redirect)
|
||||
|
||||
try:
|
||||
follow_redirects = (
|
||||
redirect and
|
||||
retries.redirect != 0 and
|
||||
retries.total)
|
||||
response = urlfetch.fetch(
|
||||
url,
|
||||
payload=body,
|
||||
method=method,
|
||||
headers=headers or {},
|
||||
allow_truncated=False,
|
||||
follow_redirects=self.urlfetch_retries and follow_redirects,
|
||||
deadline=self._get_absolute_timeout(timeout),
|
||||
validate_certificate=self.validate_certificate,
|
||||
)
|
||||
except urlfetch.DeadlineExceededError as e:
|
||||
raise TimeoutError(self, e)
|
||||
|
||||
except urlfetch.InvalidURLError as e:
|
||||
if 'too large' in str(e):
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch request too large, URLFetch only "
|
||||
"supports requests up to 10mb in size.", e)
|
||||
raise ProtocolError(e)
|
||||
|
||||
except urlfetch.DownloadError as e:
|
||||
if 'Too many redirects' in str(e):
|
||||
raise MaxRetryError(self, url, reason=e)
|
||||
raise ProtocolError(e)
|
||||
|
||||
except urlfetch.ResponseTooLargeError as e:
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch response too large, URLFetch only supports"
|
||||
"responses up to 32mb in size.", e)
|
||||
|
||||
except urlfetch.SSLCertificateError as e:
|
||||
raise SSLError(e)
|
||||
|
||||
except urlfetch.InvalidMethodError as e:
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch does not support method: %s" % method, e)
|
||||
|
||||
http_response = self._urlfetch_response_to_http_response(
|
||||
response, retries=retries, **response_kw)
|
||||
|
||||
# Handle redirect?
|
||||
redirect_location = redirect and http_response.get_redirect_location()
|
||||
if redirect_location:
|
||||
# Check for redirect response
|
||||
if (self.urlfetch_retries and retries.raise_on_redirect):
|
||||
raise MaxRetryError(self, url, "too many redirects")
|
||||
else:
|
||||
if http_response.status == 303:
|
||||
method = 'GET'
|
||||
|
||||
try:
|
||||
retries = retries.increment(method, url, response=http_response, _pool=self)
|
||||
except MaxRetryError:
|
||||
if retries.raise_on_redirect:
|
||||
raise MaxRetryError(self, url, "too many redirects")
|
||||
return http_response
|
||||
|
||||
retries.sleep_for_retry(http_response)
|
||||
log.debug("Redirecting %s -> %s", url, redirect_location)
|
||||
redirect_url = urljoin(url, redirect_location)
|
||||
return self.urlopen(
|
||||
method, redirect_url, body, headers,
|
||||
retries=retries, redirect=redirect,
|
||||
timeout=timeout, **response_kw)
|
||||
|
||||
# Check if we should retry the HTTP response.
|
||||
has_retry_after = bool(http_response.getheader('Retry-After'))
|
||||
if retries.is_retry(method, http_response.status, has_retry_after):
|
||||
retries = retries.increment(
|
||||
method, url, response=http_response, _pool=self)
|
||||
log.debug("Retry: %s", url)
|
||||
retries.sleep(http_response)
|
||||
return self.urlopen(
|
||||
method, url,
|
||||
body=body, headers=headers,
|
||||
retries=retries, redirect=redirect,
|
||||
timeout=timeout, **response_kw)
|
||||
|
||||
return http_response
|
||||
|
||||
def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
|
||||
|
||||
if is_prod_appengine():
|
||||
# Production GAE handles deflate encoding automatically, but does
|
||||
# not remove the encoding header.
|
||||
content_encoding = urlfetch_resp.headers.get('content-encoding')
|
||||
|
||||
if content_encoding == 'deflate':
|
||||
del urlfetch_resp.headers['content-encoding']
|
||||
|
||||
transfer_encoding = urlfetch_resp.headers.get('transfer-encoding')
|
||||
# We have a full response's content,
|
||||
# so let's make sure we don't report ourselves as chunked data.
|
||||
if transfer_encoding == 'chunked':
|
||||
encodings = transfer_encoding.split(",")
|
||||
encodings.remove('chunked')
|
||||
urlfetch_resp.headers['transfer-encoding'] = ','.join(encodings)
|
||||
|
||||
return HTTPResponse(
|
||||
# In order for decoding to work, we must present the content as
|
||||
# a file-like object.
|
||||
body=BytesIO(urlfetch_resp.content),
|
||||
headers=urlfetch_resp.headers,
|
||||
status=urlfetch_resp.status_code,
|
||||
**response_kw
|
||||
)
|
||||
|
||||
def _get_absolute_timeout(self, timeout):
|
||||
if timeout is Timeout.DEFAULT_TIMEOUT:
|
||||
return None # Defer to URLFetch's default.
|
||||
if isinstance(timeout, Timeout):
|
||||
if timeout._read is not None or timeout._connect is not None:
|
||||
warnings.warn(
|
||||
"URLFetch does not support granular timeout settings, "
|
||||
"reverting to total or default URLFetch timeout.",
|
||||
AppEnginePlatformWarning)
|
||||
return timeout.total
|
||||
return timeout
|
||||
|
||||
def _get_retries(self, retries, redirect):
|
||||
if not isinstance(retries, Retry):
|
||||
retries = Retry.from_int(
|
||||
retries, redirect=redirect, default=self.retries)
|
||||
|
||||
if retries.connect or retries.read or retries.redirect:
|
||||
warnings.warn(
|
||||
"URLFetch only supports total retries and does not "
|
||||
"recognize connect, read, or redirect retry parameters.",
|
||||
AppEnginePlatformWarning)
|
||||
|
||||
return retries
|
||||
|
||||
|
||||
def is_appengine():
|
||||
return (is_local_appengine() or
|
||||
is_prod_appengine() or
|
||||
is_prod_appengine_mvms())
|
||||
|
||||
|
||||
def is_appengine_sandbox():
|
||||
return is_appengine() and not is_prod_appengine_mvms()
|
||||
|
||||
|
||||
def is_local_appengine():
|
||||
return ('APPENGINE_RUNTIME' in os.environ and
|
||||
'Development/' in os.environ['SERVER_SOFTWARE'])
|
||||
|
||||
|
||||
def is_prod_appengine():
|
||||
return ('APPENGINE_RUNTIME' in os.environ and
|
||||
'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and
|
||||
not is_prod_appengine_mvms())
|
||||
|
||||
|
||||
def is_prod_appengine_mvms():
|
||||
return os.environ.get('GAE_VM', False) == 'true'
|
||||
@@ -1,112 +0,0 @@
|
||||
"""
|
||||
NTLM authenticating pool, contributed by erikcederstran
|
||||
|
||||
Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
from logging import getLogger
|
||||
from ntlm import ntlm
|
||||
|
||||
from .. import HTTPSConnectionPool
|
||||
from ..packages.six.moves.http_client import HTTPSConnection
|
||||
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
|
||||
class NTLMConnectionPool(HTTPSConnectionPool):
|
||||
"""
|
||||
Implements an NTLM authentication version of an urllib3 connection pool
|
||||
"""
|
||||
|
||||
scheme = 'https'
|
||||
|
||||
def __init__(self, user, pw, authurl, *args, **kwargs):
|
||||
"""
|
||||
authurl is a random URL on the server that is protected by NTLM.
|
||||
user is the Windows user, probably in the DOMAIN\\username format.
|
||||
pw is the password for the user.
|
||||
"""
|
||||
super(NTLMConnectionPool, self).__init__(*args, **kwargs)
|
||||
self.authurl = authurl
|
||||
self.rawuser = user
|
||||
user_parts = user.split('\\', 1)
|
||||
self.domain = user_parts[0].upper()
|
||||
self.user = user_parts[1]
|
||||
self.pw = pw
|
||||
|
||||
def _new_conn(self):
|
||||
# Performs the NTLM handshake that secures the connection. The socket
|
||||
# must be kept open while requests are performed.
|
||||
self.num_connections += 1
|
||||
log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s',
|
||||
self.num_connections, self.host, self.authurl)
|
||||
|
||||
headers = {}
|
||||
headers['Connection'] = 'Keep-Alive'
|
||||
req_header = 'Authorization'
|
||||
resp_header = 'www-authenticate'
|
||||
|
||||
conn = HTTPSConnection(host=self.host, port=self.port)
|
||||
|
||||
# Send negotiation message
|
||||
headers[req_header] = (
|
||||
'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser))
|
||||
log.debug('Request headers: %s', headers)
|
||||
conn.request('GET', self.authurl, None, headers)
|
||||
res = conn.getresponse()
|
||||
reshdr = dict(res.getheaders())
|
||||
log.debug('Response status: %s %s', res.status, res.reason)
|
||||
log.debug('Response headers: %s', reshdr)
|
||||
log.debug('Response data: %s [...]', res.read(100))
|
||||
|
||||
# Remove the reference to the socket, so that it can not be closed by
|
||||
# the response object (we want to keep the socket open)
|
||||
res.fp = None
|
||||
|
||||
# Server should respond with a challenge message
|
||||
auth_header_values = reshdr[resp_header].split(', ')
|
||||
auth_header_value = None
|
||||
for s in auth_header_values:
|
||||
if s[:5] == 'NTLM ':
|
||||
auth_header_value = s[5:]
|
||||
if auth_header_value is None:
|
||||
raise Exception('Unexpected %s response header: %s' %
|
||||
(resp_header, reshdr[resp_header]))
|
||||
|
||||
# Send authentication message
|
||||
ServerChallenge, NegotiateFlags = \
|
||||
ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value)
|
||||
auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge,
|
||||
self.user,
|
||||
self.domain,
|
||||
self.pw,
|
||||
NegotiateFlags)
|
||||
headers[req_header] = 'NTLM %s' % auth_msg
|
||||
log.debug('Request headers: %s', headers)
|
||||
conn.request('GET', self.authurl, None, headers)
|
||||
res = conn.getresponse()
|
||||
log.debug('Response status: %s %s', res.status, res.reason)
|
||||
log.debug('Response headers: %s', dict(res.getheaders()))
|
||||
log.debug('Response data: %s [...]', res.read()[:100])
|
||||
if res.status != 200:
|
||||
if res.status == 401:
|
||||
raise Exception('Server rejected request: wrong '
|
||||
'username or password')
|
||||
raise Exception('Wrong server response: %s %s' %
|
||||
(res.status, res.reason))
|
||||
|
||||
res.fp = None
|
||||
log.debug('Connection established')
|
||||
return conn
|
||||
|
||||
def urlopen(self, method, url, body=None, headers=None, retries=3,
|
||||
redirect=True, assert_same_host=True):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
headers['Connection'] = 'Keep-Alive'
|
||||
return super(NTLMConnectionPool, self).urlopen(method, url, body,
|
||||
headers, retries,
|
||||
redirect,
|
||||
assert_same_host)
|
||||
@@ -1,452 +0,0 @@
|
||||
"""
|
||||
SSL with SNI_-support for Python 2. Follow these instructions if you would
|
||||
like to verify SSL certificates in Python 2. Note, the default libraries do
|
||||
*not* do certificate checking; you need to do additional work to validate
|
||||
certificates yourself.
|
||||
|
||||
This needs the following packages installed:
|
||||
|
||||
* pyOpenSSL (tested with 16.0.0)
|
||||
* cryptography (minimum 1.3.4, from pyopenssl)
|
||||
* idna (minimum 2.0, from cryptography)
|
||||
|
||||
However, pyopenssl depends on cryptography, which depends on idna, so while we
|
||||
use all three directly here we end up having relatively few packages required.
|
||||
|
||||
You can install them with the following command:
|
||||
|
||||
pip install pyopenssl cryptography idna
|
||||
|
||||
To activate certificate checking, call
|
||||
:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code
|
||||
before you begin making HTTP requests. This can be done in a ``sitecustomize``
|
||||
module, or at any other time before your application begins using ``urllib3``,
|
||||
like this::
|
||||
|
||||
try:
|
||||
import urllib3.contrib.pyopenssl
|
||||
urllib3.contrib.pyopenssl.inject_into_urllib3()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
Now you can use :mod:`urllib3` as you normally would, and it will support SNI
|
||||
when the required modules are installed.
|
||||
|
||||
Activating this module also has the positive side effect of disabling SSL/TLS
|
||||
compression in Python 2 (see `CRIME attack`_).
|
||||
|
||||
If you want to configure the default list of supported cipher suites, you can
|
||||
set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable.
|
||||
|
||||
.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
|
||||
.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import OpenSSL.SSL
|
||||
from cryptography import x509
|
||||
from cryptography.hazmat.backends.openssl import backend as openssl_backend
|
||||
from cryptography.hazmat.backends.openssl.x509 import _Certificate
|
||||
|
||||
from socket import timeout, error as SocketError
|
||||
from io import BytesIO
|
||||
|
||||
try: # Platform-specific: Python 2
|
||||
from socket import _fileobject
|
||||
except ImportError: # Platform-specific: Python 3
|
||||
_fileobject = None
|
||||
from ..packages.backports.makefile import backport_makefile
|
||||
|
||||
import logging
|
||||
import ssl
|
||||
import six
|
||||
import sys
|
||||
|
||||
from .. import util
|
||||
|
||||
__all__ = ['inject_into_urllib3', 'extract_from_urllib3']
|
||||
|
||||
# SNI always works.
|
||||
HAS_SNI = True
|
||||
|
||||
# Map from urllib3 to PyOpenSSL compatible parameter-values.
|
||||
_openssl_versions = {
|
||||
ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD,
|
||||
ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD,
|
||||
}
|
||||
|
||||
if hasattr(ssl, 'PROTOCOL_TLSv1_1') and hasattr(OpenSSL.SSL, 'TLSv1_1_METHOD'):
|
||||
_openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD
|
||||
|
||||
if hasattr(ssl, 'PROTOCOL_TLSv1_2') and hasattr(OpenSSL.SSL, 'TLSv1_2_METHOD'):
|
||||
_openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD
|
||||
|
||||
try:
|
||||
_openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD})
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
_stdlib_to_openssl_verify = {
|
||||
ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE,
|
||||
ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER,
|
||||
ssl.CERT_REQUIRED:
|
||||
OpenSSL.SSL.VERIFY_PEER + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
|
||||
}
|
||||
_openssl_to_stdlib_verify = dict(
|
||||
(v, k) for k, v in _stdlib_to_openssl_verify.items()
|
||||
)
|
||||
|
||||
# OpenSSL will only write 16K at a time
|
||||
SSL_WRITE_BLOCKSIZE = 16384
|
||||
|
||||
orig_util_HAS_SNI = util.HAS_SNI
|
||||
orig_util_SSLContext = util.ssl_.SSLContext
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def inject_into_urllib3():
|
||||
'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.'
|
||||
|
||||
_validate_dependencies_met()
|
||||
|
||||
util.ssl_.SSLContext = PyOpenSSLContext
|
||||
util.HAS_SNI = HAS_SNI
|
||||
util.ssl_.HAS_SNI = HAS_SNI
|
||||
util.IS_PYOPENSSL = True
|
||||
util.ssl_.IS_PYOPENSSL = True
|
||||
|
||||
|
||||
def extract_from_urllib3():
|
||||
'Undo monkey-patching by :func:`inject_into_urllib3`.'
|
||||
|
||||
util.ssl_.SSLContext = orig_util_SSLContext
|
||||
util.HAS_SNI = orig_util_HAS_SNI
|
||||
util.ssl_.HAS_SNI = orig_util_HAS_SNI
|
||||
util.IS_PYOPENSSL = False
|
||||
util.ssl_.IS_PYOPENSSL = False
|
||||
|
||||
|
||||
def _validate_dependencies_met():
|
||||
"""
|
||||
Verifies that PyOpenSSL's package-level dependencies have been met.
|
||||
Throws `ImportError` if they are not met.
|
||||
"""
|
||||
# Method added in `cryptography==1.1`; not available in older versions
|
||||
from cryptography.x509.extensions import Extensions
|
||||
if getattr(Extensions, "get_extension_for_class", None) is None:
|
||||
raise ImportError("'cryptography' module missing required functionality. "
|
||||
"Try upgrading to v1.3.4 or newer.")
|
||||
|
||||
# pyOpenSSL 0.14 and above use cryptography for OpenSSL bindings. The _x509
|
||||
# attribute is only present on those versions.
|
||||
from OpenSSL.crypto import X509
|
||||
x509 = X509()
|
||||
if getattr(x509, "_x509", None) is None:
|
||||
raise ImportError("'pyOpenSSL' module missing required functionality. "
|
||||
"Try upgrading to v0.14 or newer.")
|
||||
|
||||
|
||||
def _dnsname_to_stdlib(name):
|
||||
"""
|
||||
Converts a dNSName SubjectAlternativeName field to the form used by the
|
||||
standard library on the given Python version.
|
||||
|
||||
Cryptography produces a dNSName as a unicode string that was idna-decoded
|
||||
from ASCII bytes. We need to idna-encode that string to get it back, and
|
||||
then on Python 3 we also need to convert to unicode via UTF-8 (the stdlib
|
||||
uses PyUnicode_FromStringAndSize on it, which decodes via UTF-8).
|
||||
"""
|
||||
def idna_encode(name):
|
||||
"""
|
||||
Borrowed wholesale from the Python Cryptography Project. It turns out
|
||||
that we can't just safely call `idna.encode`: it can explode for
|
||||
wildcard names. This avoids that problem.
|
||||
"""
|
||||
import idna
|
||||
|
||||
for prefix in [u'*.', u'.']:
|
||||
if name.startswith(prefix):
|
||||
name = name[len(prefix):]
|
||||
return prefix.encode('ascii') + idna.encode(name)
|
||||
return idna.encode(name)
|
||||
|
||||
name = idna_encode(name)
|
||||
if sys.version_info >= (3, 0):
|
||||
name = name.decode('utf-8')
|
||||
return name
|
||||
|
||||
|
||||
def get_subj_alt_name(peer_cert):
|
||||
"""
|
||||
Given an PyOpenSSL certificate, provides all the subject alternative names.
|
||||
"""
|
||||
# Pass the cert to cryptography, which has much better APIs for this.
|
||||
# This is technically using private APIs, but should work across all
|
||||
# relevant versions until PyOpenSSL gets something proper for this.
|
||||
cert = _Certificate(openssl_backend, peer_cert._x509)
|
||||
|
||||
# We want to find the SAN extension. Ask Cryptography to locate it (it's
|
||||
# faster than looping in Python)
|
||||
try:
|
||||
ext = cert.extensions.get_extension_for_class(
|
||||
x509.SubjectAlternativeName
|
||||
).value
|
||||
except x509.ExtensionNotFound:
|
||||
# No such extension, return the empty list.
|
||||
return []
|
||||
except (x509.DuplicateExtension, x509.UnsupportedExtension,
|
||||
x509.UnsupportedGeneralNameType, UnicodeError) as e:
|
||||
# A problem has been found with the quality of the certificate. Assume
|
||||
# no SAN field is present.
|
||||
log.warning(
|
||||
"A problem was encountered with the certificate that prevented "
|
||||
"urllib3 from finding the SubjectAlternativeName field. This can "
|
||||
"affect certificate validation. The error was %s",
|
||||
e,
|
||||
)
|
||||
return []
|
||||
|
||||
# We want to return dNSName and iPAddress fields. We need to cast the IPs
|
||||
# back to strings because the match_hostname function wants them as
|
||||
# strings.
|
||||
# Sadly the DNS names need to be idna encoded and then, on Python 3, UTF-8
|
||||
# decoded. This is pretty frustrating, but that's what the standard library
|
||||
# does with certificates, and so we need to attempt to do the same.
|
||||
names = [
|
||||
('DNS', _dnsname_to_stdlib(name))
|
||||
for name in ext.get_values_for_type(x509.DNSName)
|
||||
]
|
||||
names.extend(
|
||||
('IP Address', str(name))
|
||||
for name in ext.get_values_for_type(x509.IPAddress)
|
||||
)
|
||||
|
||||
return names
|
||||
|
||||
|
||||
class WrappedSocket(object):
|
||||
'''API-compatibility wrapper for Python OpenSSL's Connection-class.
|
||||
|
||||
Note: _makefile_refs, _drop() and _reuse() are needed for the garbage
|
||||
collector of pypy.
|
||||
'''
|
||||
|
||||
def __init__(self, connection, socket, suppress_ragged_eofs=True):
|
||||
self.connection = connection
|
||||
self.socket = socket
|
||||
self.suppress_ragged_eofs = suppress_ragged_eofs
|
||||
self._makefile_refs = 0
|
||||
self._closed = False
|
||||
|
||||
def fileno(self):
|
||||
return self.socket.fileno()
|
||||
|
||||
# Copy-pasted from Python 3.5 source code
|
||||
def _decref_socketios(self):
|
||||
if self._makefile_refs > 0:
|
||||
self._makefile_refs -= 1
|
||||
if self._closed:
|
||||
self.close()
|
||||
|
||||
def recv(self, *args, **kwargs):
|
||||
try:
|
||||
data = self.connection.recv(*args, **kwargs)
|
||||
except OpenSSL.SSL.SysCallError as e:
|
||||
if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'):
|
||||
return b''
|
||||
else:
|
||||
raise SocketError(str(e))
|
||||
except OpenSSL.SSL.ZeroReturnError as e:
|
||||
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
|
||||
return b''
|
||||
else:
|
||||
raise
|
||||
except OpenSSL.SSL.WantReadError:
|
||||
rd = util.wait_for_read(self.socket, self.socket.gettimeout())
|
||||
if not rd:
|
||||
raise timeout('The read operation timed out')
|
||||
else:
|
||||
return self.recv(*args, **kwargs)
|
||||
else:
|
||||
return data
|
||||
|
||||
def recv_into(self, *args, **kwargs):
|
||||
try:
|
||||
return self.connection.recv_into(*args, **kwargs)
|
||||
except OpenSSL.SSL.SysCallError as e:
|
||||
if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'):
|
||||
return 0
|
||||
else:
|
||||
raise SocketError(str(e))
|
||||
except OpenSSL.SSL.ZeroReturnError as e:
|
||||
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
|
||||
return 0
|
||||
else:
|
||||
raise
|
||||
except OpenSSL.SSL.WantReadError:
|
||||
rd = util.wait_for_read(self.socket, self.socket.gettimeout())
|
||||
if not rd:
|
||||
raise timeout('The read operation timed out')
|
||||
else:
|
||||
return self.recv_into(*args, **kwargs)
|
||||
|
||||
def settimeout(self, timeout):
|
||||
return self.socket.settimeout(timeout)
|
||||
|
||||
def _send_until_done(self, data):
|
||||
while True:
|
||||
try:
|
||||
return self.connection.send(data)
|
||||
except OpenSSL.SSL.WantWriteError:
|
||||
wr = util.wait_for_write(self.socket, self.socket.gettimeout())
|
||||
if not wr:
|
||||
raise timeout()
|
||||
continue
|
||||
except OpenSSL.SSL.SysCallError as e:
|
||||
raise SocketError(str(e))
|
||||
|
||||
def sendall(self, data):
|
||||
total_sent = 0
|
||||
while total_sent < len(data):
|
||||
sent = self._send_until_done(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE])
|
||||
total_sent += sent
|
||||
|
||||
def shutdown(self):
|
||||
# FIXME rethrow compatible exceptions should we ever use this
|
||||
self.connection.shutdown()
|
||||
|
||||
def close(self):
|
||||
if self._makefile_refs < 1:
|
||||
try:
|
||||
self._closed = True
|
||||
return self.connection.close()
|
||||
except OpenSSL.SSL.Error:
|
||||
return
|
||||
else:
|
||||
self._makefile_refs -= 1
|
||||
|
||||
def getpeercert(self, binary_form=False):
|
||||
x509 = self.connection.get_peer_certificate()
|
||||
|
||||
if not x509:
|
||||
return x509
|
||||
|
||||
if binary_form:
|
||||
return OpenSSL.crypto.dump_certificate(
|
||||
OpenSSL.crypto.FILETYPE_ASN1,
|
||||
x509)
|
||||
|
||||
return {
|
||||
'subject': (
|
||||
(('commonName', x509.get_subject().CN),),
|
||||
),
|
||||
'subjectAltName': get_subj_alt_name(x509)
|
||||
}
|
||||
|
||||
def _reuse(self):
|
||||
self._makefile_refs += 1
|
||||
|
||||
def _drop(self):
|
||||
if self._makefile_refs < 1:
|
||||
self.close()
|
||||
else:
|
||||
self._makefile_refs -= 1
|
||||
|
||||
|
||||
if _fileobject: # Platform-specific: Python 2
|
||||
def makefile(self, mode, bufsize=-1):
|
||||
self._makefile_refs += 1
|
||||
return _fileobject(self, mode, bufsize, close=True)
|
||||
else: # Platform-specific: Python 3
|
||||
makefile = backport_makefile
|
||||
|
||||
WrappedSocket.makefile = makefile
|
||||
|
||||
|
||||
class PyOpenSSLContext(object):
|
||||
"""
|
||||
I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible
|
||||
for translating the interface of the standard library ``SSLContext`` object
|
||||
to calls into PyOpenSSL.
|
||||
"""
|
||||
def __init__(self, protocol):
|
||||
self.protocol = _openssl_versions[protocol]
|
||||
self._ctx = OpenSSL.SSL.Context(self.protocol)
|
||||
self._options = 0
|
||||
self.check_hostname = False
|
||||
|
||||
@property
|
||||
def options(self):
|
||||
return self._options
|
||||
|
||||
@options.setter
|
||||
def options(self, value):
|
||||
self._options = value
|
||||
self._ctx.set_options(value)
|
||||
|
||||
@property
|
||||
def verify_mode(self):
|
||||
return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()]
|
||||
|
||||
@verify_mode.setter
|
||||
def verify_mode(self, value):
|
||||
self._ctx.set_verify(
|
||||
_stdlib_to_openssl_verify[value],
|
||||
_verify_callback
|
||||
)
|
||||
|
||||
def set_default_verify_paths(self):
|
||||
self._ctx.set_default_verify_paths()
|
||||
|
||||
def set_ciphers(self, ciphers):
|
||||
if isinstance(ciphers, six.text_type):
|
||||
ciphers = ciphers.encode('utf-8')
|
||||
self._ctx.set_cipher_list(ciphers)
|
||||
|
||||
def load_verify_locations(self, cafile=None, capath=None, cadata=None):
|
||||
if cafile is not None:
|
||||
cafile = cafile.encode('utf-8')
|
||||
if capath is not None:
|
||||
capath = capath.encode('utf-8')
|
||||
self._ctx.load_verify_locations(cafile, capath)
|
||||
if cadata is not None:
|
||||
self._ctx.load_verify_locations(BytesIO(cadata))
|
||||
|
||||
def load_cert_chain(self, certfile, keyfile=None, password=None):
|
||||
self._ctx.use_certificate_file(certfile)
|
||||
if password is not None:
|
||||
self._ctx.set_passwd_cb(lambda max_length, prompt_twice, userdata: password)
|
||||
self._ctx.use_privatekey_file(keyfile or certfile)
|
||||
|
||||
def wrap_socket(self, sock, server_side=False,
|
||||
do_handshake_on_connect=True, suppress_ragged_eofs=True,
|
||||
server_hostname=None):
|
||||
cnx = OpenSSL.SSL.Connection(self._ctx, sock)
|
||||
|
||||
if isinstance(server_hostname, six.text_type): # Platform-specific: Python 3
|
||||
server_hostname = server_hostname.encode('utf-8')
|
||||
|
||||
if server_hostname is not None:
|
||||
cnx.set_tlsext_host_name(server_hostname)
|
||||
|
||||
cnx.set_connect_state()
|
||||
|
||||
while True:
|
||||
try:
|
||||
cnx.do_handshake()
|
||||
except OpenSSL.SSL.WantReadError:
|
||||
rd = util.wait_for_read(sock, sock.gettimeout())
|
||||
if not rd:
|
||||
raise timeout('select timed out')
|
||||
continue
|
||||
except OpenSSL.SSL.Error as e:
|
||||
raise ssl.SSLError('bad handshake: %r' % e)
|
||||
break
|
||||
|
||||
return WrappedSocket(cnx, sock)
|
||||
|
||||
|
||||
def _verify_callback(cnx, x509, err_no, err_depth, return_code):
|
||||
return err_no == 0
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user